# -*- coding: utf-8 -*-
"""
Created on Wed Mar 17 21:12:44 2021

Simple example to demonstrate feature extraction and preprocessing

@author: Maurice
"""

from helper_code import *
import numpy as np, os, sys, joblib
import scipy


import neurokit2 as nk

import matplotlib.pyplot as plt
from ecg_processing import process_records
from ecg_recording import create_ecg_recording

from sklearn.preprocessing import MinMaxScaler

import time

if __name__ == '__main__': # needed at begin of each script, at least on windows for multiprocessing

    base_dir = "C:/Users/Maurice/CINC_2021/"
    data_dir = "C:/Users/Maurice/CINC_2021/Data/"
    database_names = ('WFDB_CPSC2018','WFDB_ga','WFDB_PTB','WFDB_PTBXL','WFDB_StPetersburg','WFDB_CPSC2018_20','error_repro')
    
    #t_headers,t_recordings=find_challenge_files(data_dir+database_names[1])
    
    start_time = time.time()
    
    records = process_records(data_dir+database_names[5],multiproc=False)
    print("Runtime in seconds: " + str(time.time()-start_time))
    
    #start_time = time.time()
    #record = create_ecg_recording(t_headers[0],t_recordings[0],True,True)
    #print("Runtime in seconds: " + str(time.time()-start_time))
    
    feature_vectors = np.array([(record.ecg_features) for record in records])
    label_vectors = np.array([(record.labels_OH) for record in records])
    if len(feature_vectors)==0:
        print("no feature vectors")
    #delete entries with too many -1/nan values (rows): more than half==-1 -> delete
    feature_vectors_mask = np.count_nonzero(feature_vectors==-1,axis=1)<feature_vectors.shape[1]/2
    feature_vectors = feature_vectors[feature_vectors_mask]
    label_vectors = label_vectors[feature_vectors_mask]
    #--------------------------------------------------------------------
    #--------------set -1/nan to median--------------------------------------------
    feature_median = np.median(feature_vectors,axis=0)
    #alternative
    #feature_mean = np.ma.array(feature_vectors, mask=feature_vectors==-1).mean(0)
    feature_cleaned = np.where(feature_vectors==-1,feature_median,feature_vectors)
    #-----------------------------------------------------------
    
    feature_indices = records[1].get_lead_indices()
    
    # scale data for training
    scaler = MinMaxScaler()
    
    
    X_train_scaled = scaler.fit_transform(feature_cleaned)
    Y_train = label_vectors
    
    scaler2 = MinMaxScaler()
    # parameters of [age,sex,lead II,lead V5]
    scaler2.min_=scaler.min_[np.r_[0:feature_indices[0],feature_indices[1]:feature_indices[2],feature_indices[10]:feature_indices[11]]]
    scaler2.scale_=scaler.scale_[np.r_[0:feature_indices[0],feature_indices[1]:feature_indices[2],feature_indices[10]:feature_indices[11]]]
    
    
    #---------------saving features-----------------------------------------------
    
    #from tempfile import TemporaryFile #not working on windows
    #outfile = TemporaryFile()
    
    np.savez(base_dir + "Data_XY/feats_test2.npz",X=X_train_scaled,Y=Y_train)
    npzfile = np.load(base_dir + "Data_XY/feats_test2.npz")
    print(npzfile.files)
    X = npzfile['X']
    Y = npzfile['Y']

