import os
import pickle
import sys

import yaml

path = os.getcwd()
sys.path.append(os.path.join("/".join(path.split("/"))))

from helper_code import *
from src.data_preparation.dataset_func import get_data_info_dict
from src.setup import get_configs

RAW_DATA_DIR = "/data1/ecg2/L12_Dataset_Train_2021"
# SAVE_DATA_DIR = "./preprocessed_data"
SAVE_DATA_DIR = "/data1/ecg2/added_hr_210807/"

if __name__ == "__main__":

    if not os.path.isdir(SAVE_DATA_DIR):
        os.mkdir(SAVE_DATA_DIR)

    header_files, recording_files = find_challenge_files(RAW_DATA_DIR)

    if not len(recording_files):
        raise Exception("No data was provided.")

    lead_list = ["TWELVE", "SIX", "FOUR", "THREE", "TWO"]

    # save original preprocessed data
    for lead in lead_list:
        configs = get_configs("./configs")
        configs["DEVELOP"] = False
        data_info_dict = get_data_info_dict(lead, RAW_DATA_DIR, configs)

        save_list = ["headers", "recordings", "features", "sample_ids", "labels", "mean_per_lead", "classes"]

        preprocessed_dataset = {}

        for k in save_list:
            preprocessed_dataset[k] = data_info_dict[k]

        with open(SAVE_DATA_DIR + f"/{lead}_simple_preprocessed_train_data_2021.pkl", "wb") as fw:
            pickle.dump(preprocessed_dataset, fw)
