% This script is the main file to process all assays, as listed in the file
% typed in this script under variable name AssayListName.
% Prcoessed data is stored in .mat or .xlsx format (the latter in summary
% form). Intermediate infomation on fits and significance testing using
% [1,2] is displayed in the command window.
% If plot_fits is set to true, a .pdf and .tiff is stored with the fit
% plotted in the folder ./Figures
%
% Date: 18-01-2024
%
% Bibliography
% [1]   Ruxton, G. D. The unequal variance t-test is an underused
%       alternative to Students t-test and the MannWhitney U test.
%       Behavioral Ecology 17, 688690 (2006).
% [2]   Holm, S. A simple sequentially rejective multiple test procedure.
%       Scandinavian journal of statistics 6570 (1979).

% Prepatory steps
clc
clearvars
close all
addpath('./Additional functions/') % Folder with the model functions

%% Define conditions for analyzing the data - this section can be adapted -

% example 1:
 AssayListName   = 'Assaylist-example1.xlsx';
 GTPases         = {'Ras'};
 LinFitEffector  = {''};
 QuadFitEffector = {''};

% example 2
% AssayListName   = 'Assaylist-example2.xlsx';
% GTPases         = {'Cdc42'};
% LinFitEffector  = {'Rga2'};
% QuadFitEffector = {'Cdc24'};

% example 1 & 2 combined 
%AssayListName   = 'Assaylist-example1and2.xlsx';
%GTPases         = {'Ras'; 'Cdc42'};
%LinFitEffector  = {'Rga2'};
%QuadFitEffector = {'Cdc24'};





act_corr    = true(1);
% Logical to determine whether terms in the rate equation that depend on
% [GTPase] are corrected for run-specific GTPase activity/concentration
% differences

num_draws   = 1e5;
% Number of random draws from distribution of rate parameters k

plot_fits   = true(1);
% Logical indicating whether fits should be outputted in .pdf and .tiff
% (false is faster)

print_fits  = true(1);
% Logical indicating whether fit results should be printed in the command window

GTP_filt    = true(1);
% Logical indicating whether data points 0.00 - 0.05 % GTP remaining
% should be disregarded for the fits

conc_corr_bounds = [0.5 1.5];
% Two-element vector that state the lower and upper bound respectively for
% the GTPase concentration factor of points to be included in pooling of
% estimates

k_low_err_filt.Cdc42    = true;
% Logical that states per GTPase whether the discard runs from pooling that
% have very low standard errors (1e-10 or lower) on the k values.
% If nothing is provided for a GTPase, default is true

%% Loop across assays: (1) extract data and (2) fit Crocodile model
% If nothing is provided for a GTPase, default k_low_err_filt is true
for i = 1 : numel(GTPases)
    if ~isfield(k_low_err_filt, GTPases{i})
        k_low_err_filt.(GTPases{i}) = true(1);
    end
end

Assays                          = Read_assay_list(AssayListName);
for assay = 1  : size(Assays, 1)
    % Extract the assay properties from the list
    Label                       = Assays.Label{assay};
    Data_file                   = Assays.File{assay};
    Tabs                        = Assays.Tabs{assay};
    GTPase_assay                = Assays.GTPase_ref{assay};   
    
    Proteins_tot                = Assays.Proteins{assay};
    Proteins.GTPase             = reshape(intersect(Proteins_tot, GTPases), [], 1);
    if isempty(Proteins.GTPase)
        Proteins.GTPase         = Assays_processed.(GTPase_assay).Proteins.GTPase(1);
    end
    Proteins.LinEff             = reshape(intersect(Proteins_tot, LinFitEffector), [], 1);
    Proteins.QuaEff             = reshape(intersect(Proteins_tot, QuadFitEffector), [], 1);
    
    if isempty(Proteins.GTPase)
        error('Error: No GTPase or incorrect GTPase ref. assay specified in assay number %0.f', assay)
    end
    if ~isempty(setdiff(Proteins_tot, cat(1, Proteins.GTPase, Proteins.LinEff, Proteins.QuaEff)))
        error('Warning: Proteins in assay that could not be classified as GTPase, linear or quadratic effector')
    end
    
    % (1) Extract the data into a standardized table from the .xls(x) files
    Assays_processed.(Label).Data = extract_activity_data(Proteins, Data_file, Tabs);
    % Define the draws of distributions GTPase rate parameters (k1, k2)
    % First the default empty/zero values
    Rate_par_draws              = struct('k1', NaN(num_draws, 1), 'k2', NaN(num_draws, 1));
    % In the case that reference assays were specified overwrite the default
    if ~all(isnan(GTPase_assay))
        Rate_par_draws.k1       = Assays_processed.(GTPase_assay).k_draws(:, 1);
        Rate_par_draws.k2       = Assays_processed.(GTPase_assay).k_draws(:, 2);
        Assays_processed.(Label).GTPase_assay     = GTPase_assay;
    end
    % Store proteins of this assay in end result structure Assays_processed
    Assays_processed.(Label).Proteins               = Proteins;
    % (2) Fit Crocodile model and store results in Assays_processed
    [Assays_processed.(Label).k_tot, Assays_processed.(Label).conc_corr, ...
        Assays_processed.(Label).k_runs, Assays_processed.(Label).k_draws] = ...
        Crocodile_model(Proteins, Rate_par_draws, Assays_processed.(Label).Data, ...
        act_corr, plot_fits, print_fits, GTP_filt, conc_corr_bounds, k_low_err_filt.(Proteins.GTPase{1}));
end

%% Store data in .mat and .xlsx format
clearvars -except Assays_processed
% Make a folder 'Data' is it does not exists for the results
if exist('./Data', 'dir') == 0
    mkdir('./Data/')
end
save('./Data/Data_assays.mat', 'Assays_processed')

assays                              = fieldnames(Assays_processed)';
ind_tot                             = [0; cumsum(structfun(@(x) numel(x.k_runs.name) + 1, Assays_processed))] + 1;
% Table for putting a summary of results in .xlsx format
Table_out                           = cell(ind_tot(end), 25);
[Table_out{:}]                      = deal('-');
% All labels in the first row
Table_out(1, :)                     = [{'Run'} {'Varied Protein 1'} {'Varied Protein 2'} ...
                                        {'Parameter 1'} {'Estimate'} {'95% lower bound'} {'95% upper bound'} {'std. err.'} ...
                                        {'Parameter 2'} {'Estimate'} {'95% lower bound'} {'95% upper bound'} {'std. err.'} ...
                                        {'Parameter 3'} {'Estimate'} {'95% lower bound'} {'95% upper bound'} {'std. err.'} ...
                                        {'Parameter 4'} {'Estimate'} {'95% lower bound'} {'95% upper bound'} {'std. err.'} ...
                                        {'Adj. R2'} {'Used for pooling'}];

% Loop over all assays
for assay = 1 : numel(assays)
    % Shorter notations for length variable names
    A                               = Assays_processed.(assays{assay});
    P                               = A.Proteins;
    GTPase                          = P.GTPase{1};
    P                               = cat(1, P.QuaEff, P.LinEff);
    if isempty(P)
        P                           = A.Proteins.GTPase;
    end
    runs                            = A.k_runs.name;
    % Row indices for table output (excluding/including pooled results row
    % after each assay respectively)
    ind_s1                          = ind_tot(assay) + 1 : ind_tot(assay + 1) - 1;
    ind_s2                          = ind_tot(assay) + 1 : ind_tot(assay + 1);
    % First column: run name
    [Table_out{ind_s1, 1}]          = deal(runs{:});
    Table_out{ind_s2(end), 1}       = 'pooled';
    % Next columns: protein names
    for p = 1 : numel(P)
        [Table_out{ind_s2, 1 + p}]  = deal(P{p});
    end
    % Extract data from Assays_processed to be placed in output table
    point_est                       = num2cell([cell2mat({A.k_runs.est}');      A.k_tot.est]);
    conf_int                        = num2cell([cell2mat({A.k_runs.conf_int}'); A.k_tot.conf_int]);
    std_err                         = num2cell([cell2mat({A.k_runs.std_err}');  A.k_tot.std_err]);
    c_corr_est                      = [num2cell(A.conc_corr); [{'-'} {'-'} {'-'} {'-'}]];
    % Depending on assay type, store k1/k2/activity correction factor/k3
    if any(cellfun(@(x) ~isempty(regexp(x, GTPase, 'once')), P))
        [Table_out{ind_s2, 4}]      = deal('k1');
        val                         = [point_est(:, 1) conf_int(:, [1 2]) std_err(:, 1)];
        [Table_out{ind_s2, 5 : 8}]  = deal(val{:});
        [Table_out{ind_s2, 9}]      = deal('k2');
        val                         = [point_est(:, 2) conf_int(:, [3 4]) std_err(:, 2)];
        [Table_out{ind_s2, 10 : 13}]= deal(val{:});
        if numel(P) > 1
            [Table_out{ind_s2, 14}] = deal(sprintf('k3_%s_%s', GTPase, P{2}));
            val                     = [point_est(:, 3) conf_int(:, [5 6]) std_err(:, 3)];
            [Table_out{ind_s2, 15 : 18}]  = deal(val{:});
        end
    else
        [Table_out{ind_s2, 4}]      = deal('c_corr');
        [Table_out{ind_s2, 5 : 8}]  = deal(c_corr_est{:});
        [Table_out{ind_s2, 9}]      = deal(sprintf('k3_%s_%s', GTPase, P{1}));
        val                         = [point_est(:, 2) conf_int(:, [3 4]) std_err(:, 2)];
        [Table_out{ind_s2, 10 : 13}]= deal(val{:});
        if numel(P) > 1
            [Table_out{ind_s2, 14}] = deal(sprintf('k3_%s_%s', GTPase, P{2}));
            val                     = [point_est(:, 3) conf_int(:, [5 6]) std_err(:, 3)];
            [Table_out{ind_s2, 15 : 18}]  = deal(val{:});
            [Table_out{ind_s2, 19}] = deal(sprintf('k3_%s_%s_%s', GTPase, P{1}, P{2}));
            val                     = [point_est(:, 4) conf_int(:, [7 8]) std_err(:, 4)];
            [Table_out{ind_s2, 20 : 23}]  = deal(val{:});
        end
    end
    % Next columns: Adjusted R-squared (set to N/A if insufficient degrees
    % of freedom to good fit)
    val_Adj_R_sq                    = {A.k_runs.Adj_R_sq}';
    val_deg_free                    = cell2mat({A.k_runs.deg_free}');
    val_Adj_R_sq2                   = cell(numel(val_deg_free), 1);
    for i = 1 : numel(val_deg_free)
        if val_deg_free(i) <= 0
            val_Adj_R_sq2{i}        = '#N/A';
        else
            val_Adj_R_sq2{i}        = val_Adj_R_sq{1}(i);            
        end
    end
    [Table_out{ind_s1, 24}]         = deal(val_Adj_R_sq2{:});
    val_pool                        = num2cell(A.k_runs.pool);
    [Table_out{ind_s1, 25}]         = deal(val_pool{:});
end

Table_cell = cell2table(Table_out);
writetable(Table_cell, 'Data_summary.xlsx')
%xlswrite('Data_summary.xlsx', Table_cell)  
% xlswrite doesnt work on mac
% convert cell to table and then use writetable instead, gives the same result

clearvars -except Assays_processed