%% Limpieza inicial
clc;
close all;

load('BuenosDatabase.mat');
load('reetiquetado_database.mat');
load('F:\matlab_workspace\data\CINC2017_Challenge\db_senyales.mat')

%% Creacin de variables 
n            = length(grps); % Nmero de muestras
num_features = 12;

% Estructuras a generar
dataset = zeros(n, num_features);
class   = cell(n, 1);
classi  = zeros(n, 1);
headers = {'entRR', 'lzcrr', 'meanRR', 'pNN50', 'ppc', 'RMSSD', 'skewRR', 'sqdev', 'stdRR', 'VAI', 'VLI', 'omega'};

%% Bucle principal
for i = 1 : n
    
    class{i}       = database{i}.Info.Clase; %grps{i};
    dataset(i,  1) = entRR(i);
    dataset(i,  2) = lzcrr(i);
    dataset(i,  3) = meanRR(i);
    dataset(i,  4) = pNN50(i);
    dataset(i,  5) = ppc(i);
    dataset(i,  6) = RMSSD(i);
    dataset(i,  7) = skewRR(i);
    dataset(i,  8) = sqdev(i);
    dataset(i,  9) = stdRR(i);
    dataset(i, 10) = VAI(i);
    dataset(i, 11) = VLI(i);
    
    Sx       = db_senyales{i};
    %class{i} = Sx.Info.Clase;
    Y        = Sx.Senyales.ECG;
    dataset(i, 12) = getOmegaAvg(Y);
end

nsg = 3;

%while  sum(sum(isnan(dataset))) > 0
    
%     mu = mean(dataset);
%     sg = std(dataset);
%     ic1 = mu - nsg .* sg;
%     ic2 = mu + nsg .* sg;
%     
%     for i = 1 : num_features
%         v = dataset(:, i);
%         v(v <= ic1(i) | v >= ic2(i)) = NaN;
%         med = nanmean(v);
%         v(isnan(v)) = med;
%         dataset(:, i) = v;
%     end
%end


%% Creacin del vector de clases en formato numrico
clases = unique(class);
colors = {'r*', 'c*', 'b+', 'go'};
for i = 1 : length(clases)
    idx = strcmp(class, clases{i});
    classi(idx) = i;
end

figure;
for i = 1 : num_features
    subplot(3, 4, i);
    boxplot(dataset(:, i), class');
    title(headers{i}, 'Interpreter', 'none');
end



% Creacin de la matriz de respuestas
test = zeros(n, 4);
test(classi == 1, 1) = 1;
test(classi == 2, 2) = 1;
test(classi == 3, 3) = 1;
test(classi == 4, 4) = 1;

hay_nan            = sum(isnan(dataset), 2);
filtro             = hay_nan > 0;
fprintf('Clases filtradas: \n');

if sum(hay_nan > 0) > 0
    classi(filtro)
    class(filtro)

    dataset(filtro, :) = [];
    test(filtro, :)    = [];
    classi(filtro)     = [];
end

[n, ~] = size(dataset);
fprintf('Nmero filas filtradas: %d \n', sum(hay_nan>0));
%class{hay_nan > 0}

%% Seleccionamos el split que vamos a utilizar
Split50per50per; % para experimentos iniciales
%SplitFinalTraining;

%% Cambiamos el tipo de los datos y normalizamos
[train_x, mu, sigma] = zscore(train_x);
test_x=bsxfun(@minus,   test_x, mu);
test_x=bsxfun(@rdivide, test_x, sigma);
train_y = double(train_y);
test_y  = double(test_y);


