% function FFTs = preprocessAudio(x,fs,verbose)
%
% Pre-processes an audio file for classification.
%
% The first step of pre-processing is segmenting the audio using Springer's
% state-of-the-art segmentation algorithm. 
%
% After segmentation, the N-point FFT of each state is calculated. The 'N'
% for each state was determined based on the distribution of the lengths of
% each state. This pre-calculated data is recorded in FFT_LENGTHS.mat.
%
%% Inputs:
% x:  A time-domain audio signal
% fs: The sampling rate of the audio signal
% verbose (optional): Boolean defining if useful print statements are
%                     desired. (default = false)
% wavelet (optional): Boolean defining if audio will be smoothed with a
%                     Haar wavelet transform before computing the FFT.
%                     (default = false)
%
%% Outputs:
% FFTs: A specially-formatted MATLAB struct with 5 matrix fields
%           1. FFT.S1
%           2. FFT.systole
%           3. FFT.S2
%           4. FFT.diastole
%           5. FFT.cycle
%
%       Each field is a (N/2) x M matrix. N is the length of the N-point
%       FFT used for each cycle (or sub-cycle). M is the number of complete
%       cycles (or sub-cycles) in the audio file. M is not the same for
%       each struct field. N was determined by analyzing the distribution
%       of sample lengths of each sub-cycle.
%
%       For example, it was found that in 1,000 audio files, the longest S1
%       sub-cycle was 361 samples long (180 milliseconds at a 2 kHz
%       sampling rate). Therefore, N_S1 was set as 362, and shorter S1
%       cycles are zero-padded to produce a 362-point FFT. Since the input
%       signal is real, the FFT is symmetric, and we discard half of the
%       data. Each column of FFT.S1 is the 'half-FFT' of each S1 cycle in
%       the audio file.
%
%% Copyright (C) 2016  Bradley Whitaker
% b.whitaker@gatech.edu
%
% This program is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with this program.  If not, see <http://www.gnu.org/licenses/>.

function [FFTs] = preprocessAudio(x,fs,verbose,wavelet)

%% Default 'verbose' to false
if nargin == 2
    verbose = false;
    wavelet = false;
end

if nargin == 3
    wavelet = false;
end

%% Load the trained parameter matrices for Springer's HSMM model.
% The parameters were trained using 409 heart sounds from MIT heart
% sound database, i.e., recordings a0001-a0409.
load Springer_B_matrix;
load Springer_pi_vector;
load Springer_total_obs_distribution;

%% Load the pre-determined FFT lengths
load FFT_LENGTHS;
NFFT = [NFFT_S1,NFFT_systole,NFFT_S2,NFFT_diastole];

%% Declare variables
S1_FFT = [];
systole_FFT = [];
S2_FFT = [];
diastole_FFT = [];
cycle_FFT = [];
cycle_audio = [];

%% Segment the file

if verbose
    tic;
    fprintf('Pre-processing audio file...');
end

file_states = runSpringerSegmentationAlgorithm(x,fs,Springer_B_matrix,Springer_pi_vector,Springer_total_obs_distribution,false);
   
%% Ignore the first state
cur_state = file_states(1);
cur_length = find(file_states ~= cur_state,1)-1;
x_recon = x(1:cur_length);
file_states = file_states(cur_length+1:end);
cur_idx = cur_length+1;

i = 1;

%% Loop through the rest of the states
while(~isempty(file_states))
    
    % Find the next state, its length (for bookkeeping), and the respective
    % portion of the input audio file.
    cur_state = file_states(1);
    cur_length = find(file_states ~= cur_state,1)-1;
    

    if (isempty(cur_length))
        % This is the last state. Ignore and terminate.
        file_states = [];
        x_recon = [x_recon; x(cur_idx:end)]; 
    else
        % This is not the last state.
        %   1. Extract the audio corresponding to the current state.
        %   2. Take the N-point fft, where N is determined by the current
        %      state. S1 is shorter than diastole, so we do not need a long
        %      FFT to get all of the relevant information.
        %   3. Add half of the real part of the FFT to the respective
        %      output matrix. (We only need half; since our input signal is
        %      real, the magnitude of its FFT is symmetric.)
        %   4. (Keep track of full cycles as well)
        
        cur_audio = x(cur_idx:cur_idx+cur_length-1);
        
        if wavelet
            [cur_audio,~] = dwt(cur_audio,'haar');
        end
        
        x_recon = [x_recon; cur_audio];
        cur_FFT = fft(cur_audio,NFFT(cur_state));
        cur_FFT = abs(cur_FFT(1:NFFT(cur_state)/2));
        
        if (cur_state == 1) % S1 starts a new cycle
            S1_FFT = [S1_FFT,cur_FFT];
            return_S1{i} = cur_audio; i = i+1;
            cycle_audio = cur_audio;
        elseif (cur_state == 2) % systole
            systole_FFT = [systole_FFT,cur_FFT];
            if (~isempty(cycle_audio))
                cycle_audio = [cycle_audio;cur_audio];
            end
        elseif (cur_state == 3) % S2
            S2_FFT = [S2_FFT,cur_FFT];
            if (~isempty(cycle_audio))
                cycle_audio = [cycle_audio;cur_audio];
            end
        elseif (cur_state == 4) % diastole ends a full cycle
            diastole_FFT = [diastole_FFT,cur_FFT];
            if (~isempty(cycle_audio))
                cycle_audio = [cycle_audio;cur_audio];
                cur_FFT = fft(cycle_audio,NFFT_cycle);
                cur_FFT = abs(cur_FFT(1:NFFT_cycle/2));
                cycle_FFT = [cycle_FFT,cur_FFT];
                cycle_audio = [];
            end
        end
        
        % Remove the state from the list and update the current index
        file_states = file_states(cur_length+1:end);
        cur_idx = cur_idx + cur_length;
    end

end % while()

%% Package the output
FFTs.S1 = S1_FFT;
FFTs.systole = systole_FFT;
FFTs.S2 = S2_FFT;
FFTs.diastole = diastole_FFT;
FFTs.cycle = cycle_FFT;

if verbose
    time = toc;
    fprintf('\tCalculation took %d seconds\n',time);
end

end