function collate_wesad_dataset % COLLATE_WESAD_DATASET Collates individual subjects' data files into % a single MATLAB file for analysis. % % Inputs: % % files - a MATLAB file for each individual subject in the WESAD % dataset. Details of how to obtain these MATLAB files are % provided at: https://peterhcharlton.github.io/resources/datasets % % Outputs: % % files - a MATLAB file for each activity in the protocol (at baseline % and during amusement, stress and meditation) % % Preparation: % % Modify the MATLAB script by inserting the 'up.paths.root_folder' and % 'up.paths.save_folder' into the setup_up function. % % Further information: % % https://peterhcharlton.github.io/resources/datasets % % Licence: % Available under the MIT License - please see the accompanying % file named "MIT_LICENSE.txt" % % Author: Peter H. Charlton, May 2021. fprintf('\n ~~~ Collating WESAD dataset ~~~') up = setup_up; collate_data(up); end function up = setup_up fprintf('\n - Setting up parameters') % The following root folder contains subfolders for each subject (name % 'S#' where # is the subject number) up.paths.root_folder = '/Users/petercharlton/Documents/Data/WESAD/raw_data/'; up.paths.save_folder = '/Users/petercharlton/Documents/Data/WESAD/conv_data/'; % Specify the subject numbers up.subjs = [2:11,13:17]; % there are 15 subjects in the PPG Dalia dataset, numbered as stated % Specify sampling frequencies (obtained from the 'PPG_FieldStudy_readme.pdf' document) up.fs.ppg = 64; % in Hz up.fs.acc_w = 32; % in Hz up.fs.eda_w = 4; % in Hz up.fs.temp_w = 4; % in Hz up.fs.ecg = 700; % in Hz up.fs.resp = 700; % in Hz up.fs.activity = 700; % in Hz end function collate_data(up) % import data from each subject in turn fprintf('\n - Importing data'); for subj_no = 1:length(up.subjs) curr_subj = up.subjs(subj_no); fprintf('\n - subject %d', curr_subj); % Load data (this Matlab file has been created by converting the .pkl % file to .mat format. subj_path = [up.paths.root_folder, 'S', num2str(curr_subj), filesep, 'S', num2str(curr_subj), '.mat']; load(subj_path) % loads variable called 'pickle_data' % - import PPG master_data(subj_no).ppg.v = pickle_data.signal.wrist.BVP; master_data(subj_no).ppg.fs = up.fs.ppg; % - import ECG master_data(subj_no).ecg.v = pickle_data.signal.chest.ECG; master_data(subj_no).ecg.fs = up.fs.ecg; % - Import Resp master_data(subj_no).ref.ind.v = pickle_data.signal.chest.Resp; master_data(subj_no).ref.ind.fs = up.fs.resp; % - Import wrist acc master_data(subj_no).acc_w.v = pickle_data.signal.wrist.ACC; master_data(subj_no).acc_w.fs = up.fs.acc_w; % - Import wrist EDA master_data(subj_no).eda_w.v = pickle_data.signal.wrist.EDA; master_data(subj_no).eda_w.fs = up.fs.eda_w; % - Import wrist temp master_data(subj_no).temp_w.v = pickle_data.signal.wrist.TEMP; master_data(subj_no).temp_w.fs = up.fs.temp_w; % - Import activity labels master_data(subj_no).ref.activity.v = pickle_data.label; master_data(subj_no).ref.activity.fs = up.fs.activity; % in Hz master_data(subj_no).ref.activity.key.txt = {'not defined', 'baseline', 'stress', 'amusement', 'meditation', 'not defined', 'not defined', 'not defined'}; master_data(subj_no).ref.activity.key.num = 0:7; % - Import fixed data file_path = [up.paths.root_folder, 'S', num2str(curr_subj), filesep, 'S', num2str(curr_subj), '_readme.txt']; readme_data = importfile(file_path); master_data(subj_no).fix.subj_name = pickle_data.subject; master_data(subj_no).fix.age = readme_data.age; master_data(subj_no).fix.weight = readme_data.weight; master_data(subj_no).fix.height = readme_data.height; master_data(subj_no).fix.gender = readme_data.gender; master_data(subj_no).fix.units.age = 'years'; master_data(subj_no).fix.units.weight = 'kg'; master_data(subj_no).fix.units.height = 'cm'; clear readme_data subj_path pickle_data file_path curr_subj end clear subj_no % split data into different activities fprintf('\n - Splitting data into different activities') acts.num = master_data(1).ref.activity.key.num; acts.txt = master_data(1).ref.activity.key.txt; % - cycle through activities for act_no = 1 : length(acts.txt) curr_act_no = acts.num(act_no); curr_act_txt = acts.txt{act_no}; if strcmp(curr_act_txt, 'not defined') continue end % copy master data data = master_data; % eliminate data for each subject in turn which weren't recorded during % this activity subjs_to_keep = []; for subj_no = 1 : length(data) % identify periods of this activity periods.deb = find(data(subj_no).ref.activity.v(2:end)==curr_act_no & ... data(subj_no).ref.activity.v(1:end-1)~=curr_act_no); periods.fin = find(data(subj_no).ref.activity.v(1:end-1)==curr_act_no & ... data(subj_no).ref.activity.v(2:end)~=curr_act_no); % check there is at least one period of this activity, otherwise skip if isempty(periods.deb) fprintf('\n - No %s data found for subj %s', curr_act_txt, data(subj_no).fix.subj_name); continue end subjs_to_keep = [subjs_to_keep, subj_no]; % check there is no more than one period of this activity if length(periods.deb)~=1 fprintf(' - There were %d periods of %s for subj %s, so used first one (lasting %.1f secs)', length(periods.deb), curr_act_txt, data(subj_no).fix.subj_name, (periods.fin(1)-periods.deb(1))/data(subj_no).ref.activity.fs) end % - identify periods of this activity t_deb = periods.deb(1)/data(subj_no).ref.activity.fs; t_fin = periods.fin(1)/data(subj_no).ref.activity.fs; % - go through signals sigs = {'ppg', 'ecg', 'ref.ind', 'acc_w', 'eda_w', 'temp_w'}; for sig_no = 1 : length(sigs) curr_sig_nom = sigs{sig_no}; eval(['curr_sig_data = data(subj_no).' curr_sig_nom ';']); t = [0:length(curr_sig_data.v)-1]./curr_sig_data.fs; rel_t = t>= t_deb & t <= t_fin; curr_sig_data.v = curr_sig_data.v(rel_t); eval(['data(subj_no).' curr_sig_nom ' = curr_sig_data;']); end % - remove irrelevant data data(subj_no).ref = rmfield(data(subj_no).ref, 'activity'); data(subj_no).fix.activity = curr_act_txt; end % eliminate subjects without data for this period data = data(subjs_to_keep); % add source details source.date_of_conversion = datetime('now'); source.matlab_conversion_script = [mfilename('fullpath'), '.m']; source.raw_data_path = up.paths.root_folder; % save data for this activity fprintf('\n - Saving data for %s', curr_act_txt) save([up.paths.save_folder, 'ppg_dalia_', strrep(curr_act_txt, ' ', '_'), '_data'], 'data', 'source') clear data end fprintf('\n\n - NB: this dataset also contains additional variables measured by the chest sensor which have not been imported'); fprintf('\n\n ~~~ DONE ~~~') end function readme_data = importfile(filename, dataLines) %IMPORTFILE Import data from a text file % S2README = IMPORTFILE(FILENAME) reads data from text file FILENAME % for the default selection. Returns the data as a table. % % S2README = IMPORTFILE(FILE, DATALINES) reads data for the specified % row interval(s) of text file FILENAME. Specify DATALINES as a % positive scalar integer or a N-by-2 array of positive scalar integers % for dis-contiguous row intervals. % % Example: % S2readme = importfile("/Users/petercharlton/Documents/Data/WESAD/raw_data/S2/S2_readme.txt", [2, 6]); % % See also READTABLE. % % Auto-generated by MATLAB on 21-May-2021 06:45:20 %% Input handling % If dataLines is not specified, define defaults if nargin < 2 dataLines = [2, Inf]; end %% Setup the Import Options and import the data opts = delimitedTextImportOptions("NumVariables", 2); % Specify range and delimiter opts.DataLines = dataLines; opts.Delimiter = ":"; % Specify column names and types opts.VariableNames = ["Personalinformation", "VarName2"]; opts.VariableTypes = ["string", "string"]; % Specify file level properties opts.ExtraColumnsRule = "ignore"; opts.EmptyLineRule = "read"; % Specify variable properties opts = setvaropts(opts, "Personalinformation", "WhitespaceRule", "preserve"); opts = setvaropts(opts, "Personalinformation", "EmptyFieldRule", "auto"); % Import the data readme_txt = readtable(filename, opts); % convert to variables for output vars = {'Age', 'Height', 'Weight', 'Gender'}; for var_no = 1 : length(vars) curr_var = vars{var_no}; rel_row = find(contains(readme_txt.Personalinformation, curr_var)); temp = readme_txt.VarName2(rel_row); temp2 = str2double(temp); if ~isnan(temp2) temp = temp2; end eval(['readme_data.' lower(curr_var) ' = temp;']); end end