function [ffInfo,masterList,numT,chosenInfo,probCount] = getNLDLGts(DLGstats)
%%% DLGstats is the ouput of running DLGstatsHOLD.m, then DLGstatsPLACE.m
%%%   -> DLGstats holds the info from each RCWT realization about whether
%%%   the resulting surrogate process maxima cluster
%%% ffInfo is a struct which gives the different possible maxima configurations
%%%   -> each field is a possible maxima config. (e.g. z12345 means surrogate 
%%%   processes 1-5 cluster together; z1z2345 means surrogate process 1
%%%   occurs separately over an exposure while surrogates 2-5 occur
%%%   clustered etc.)
%%%   -> within each field, we have the probability of experiencing that
%%%   maxima cluster (pz...), the info on the time series which contain 
%%%   those clusters (z... - organized the same way as DLGstats: marker zA
%%%   ZB ...), and the chosen indices based on numT, the probability of
%%%   experiencing that maxima config., and the distribution of those
%%%   maxima clusters
%%% masterList gives the indices of all chosen DLG time series based on
%%%   numT (note length(masterList)>=numT because some time series contain
%%%   un-clustered maxima, and each index of MasterList is a time series
%%%   which may excite clustered or un-clustered surrogate maxima
%%% numT is the max number of NL-DLG time series which can be assembled
%%%   without oversampling the maxima cluster distributions
%%% chosenInfo is a struct which gives similar info as ffInfo, but removes
%%%   maxima configurations where the probability of experiencing that maxima
%%%   config. is so low that we wouldn't sample any of these time series

nSP = 2;

%%% find time series markers with a value of z1, z2, z3, ... (or any combo of)
%%% if you want specific surrogates, not including previously numbered
%%% surrogates, change kz,kb,... below so ka is the first surrogate you
%%% want (e.g. if you want to look at wave groups of 2,3, and 4 waves, set
%%% nSP = 3, ka=2,kb=3,kc=4)

% right now defined to consider all 6 surrogate processes
ka = 1; kb = 2; kc = 3; kd = 4; ke = 5; kf = 6;

a = DLGstats.ziHAT(:,(ka+1))>0;
b = DLGstats.ziHAT(:,(kb+1))>0;
c = DLGstats.ziHAT(:,(kc+1))>0;
d = DLGstats.ziHAT(:,(kd+1))>0;
e = DLGstats.ziHAT(:,(ke+1))>0;
f = DLGstats.ziHAT(:,(kf+1))>0;

ziHOLD = [a, b, c, d, e, f];
loc = [1 1000; 1001 2000; 2001 3000; 3001 4000; 4001 5000; 5001 6000; 6001 7000; 7001 8000; 8001 9000; 9001 10000];
loc = [loc(ka,:); loc(kb,:); loc(kc,:); loc(kd,:); loc(ke,:); loc(kf,:)];

Pstats = failureProbALL(DLGstats,nSP,ziHOLD,loc);

nsim = 1000; %num of DLG TS for each surrogate process
allSP = 1:nSP;
[Bn, ~] = Bell(nSP);

%% Initial struct set-up
%%% create a struct to hold maxima configuration probabilities, then later
%%% hold indices of DLG time series which fit into the configurations
counter = 1;
for k = 1:nSP
    list = SetPartition(nSP,k);
    num = length(list); %number of configurations given the # of groups

    %%% go through all configurations given group number k
    for h = 1:num
        configurationNAMEp = '';
        %%% index through k parts of group
        for g = 1:k
            members = list{h,1}{1,g};
            numMembers = length(members);

            %%% put in indices of DLG TS which fit maxima config. criteria
            nameHOLDonP = 'z';
            for ii = 1:numMembers
                nameHOLDonP = strcat(nameHOLDonP,num2str(members(ii)));
            end           

            nameStrP = strcat(nameHOLDonP);
            configurationNAMEp = strcat(configurationNAMEp,nameStrP);
        end
        
        newField = configurationNAMEp;
        newFieldp = strcat('p',configurationNAMEp);
        
        ffInfo.(newField).(newFieldp) = Pstats.pConfig(counter);
        counter = counter + 1;
    end
end

%% another struct for later to hold total probabilities for similar maxima 
% groups in different maxima configs so we don't repeat DLG TS
counter = 1;
for k = 1:nSP
    list = SetPartition(nSP,k);
    num = length(list); %number of configurations given the # of groups

    %%% go through all configurations given group number k
    for h = 1:num
        %%% index through k parts of group
        for g = 1:k
            members = list{h,1}{1,g};
            numMembers = length(members);

            %%% put in indices of DLG TS which fit maxima config. criteria
            nameHOLDonP = 'z';
            for ii = 1:numMembers
                nameHOLDonP = strcat(nameHOLDonP,num2str(members(ii)));
            end           

            nameStr44 = strcat('probCount',nameHOLDonP);
            probCount.(nameStr44) = 0;
        end
        counter = counter + 1;
    end
end

%% populate struct with indices of DLG 
fields = fieldnames(ffInfo);
counter = 1;

%%% now go through 1-n groups for maxima configurations
for k = 1:nSP
    list = SetPartition(nSP,k);
    num = length(list); %number of configurations given the # of groups

    %%% go through all configurations given group number k
    for h = 1:num
        %%% index through k parts of group
        for g = 1:k
            members = list{h,1}{1,g};
            numMembers = length(members);

            %%% to concatenate all ziHAT of same "type" i.e. z123,
            %%% z213, z312 are all the "same"
            ziHOLDconcat = zeros(1,length(members)+1);
            nameHOLDon = 'z';

            for iii = 1:numMembers % k members of maxima config.   
                range = (loc(members(iii),1):loc(members(iii),2))'; 
                part = members(ismember(members,allSP));
                notPart = allSP(not(ismember(allSP,members)));
                partMat = ziHOLD(range,part);

                %%% which time series have all ziHAT in group
                mult = ones(nsim,1);
                    for y = 1:numMembers
                        mult = mult .* partMat(:,y);
                    end

                %%% minus any ziHAT not part of group
                if isempty(notPart) == 0
                    notPartMat = ziHOLD(range,notPart);

                    for z = 1:length(notPart)
                        mult = mult - notPartMat(:,z);
                    end    
                end

                %%% find time series which satisfy ziHAT condition-
                %%% 1 or 0
                haveZiHat1 = mult;
                haveZiHat1(haveZiHat1<0)=0;

                %%% find p(safety|config & design) for ziHAT 

                %%% find time series which satisfy ziHAT condition-
                %%% keep time series marker to compare with ff
                %%% also keep ziHAT value for when we do histogram later to
                %%% choose TS based on prob of occurrence
    
                haveZiHat = mult .* DLGstats.ziHAT(range,[1,members+1]);
                haveZiHat(haveZiHat<0)=0;
                haveZiHat(all(haveZiHat==0,2),:)=[];

                %%% concatenate all ziHAT of same "type"                 
                ziHOLDconcat = [ziHOLDconcat; haveZiHat];
                nameHOLDon = strcat(nameHOLDon,num2str(members(iii)));
            end

            %%% get rid of original placeholder (first row)
            ziHOLDconcat = ziHOLDconcat(2:end,:);
            nameStr = strcat(nameHOLDon);
            ffInfo.(fields{counter}).(nameStr) = ziHOLDconcat;
            
            %%% put TS into a different struct to use later 
            TSinfo.(strcat('TS',nameHOLDon)) = ziHOLDconcat;
            
            fNamesInside = fieldnames(ffInfo.(fields{counter}));
            pConfigCount = ffInfo.(fields{counter}).(fNamesInside{1});
            
            probCount.(strcat('probCount',nameHOLDon)) = probCount.(strcat('probCount',nameHOLDon)) + pConfigCount;
        end
        counter = counter + 1;
    end
end

%% now find the max number of MCS we can approximate without repeating TS

%%% now get number of MCS we are trying to replicate i.e. what would most-
%%% likely failure distribution look like if we did 'numTS' # of MCS

%%% also want to make sure that for maxima categories that are in more
%%% than 1 config, we don't repeat TS
%%% use struct probCount

count = 1;
innercount = 1;
fieldsPC = fieldnames(probCount);
gMin = zeros(length(fieldsPC),1);

%%% num TS based on maxima configuration probabilities
for k = 1:nSP
    list = SetPartition(nSP,k);
    num = length(list); %number of configurations given the # of groups
    for e = 1:num
        for h = 1:k %clusters in maxima configuration
            fieldsIN = fieldnames(ffInfo.(fields{count}));
            str = strcat('probCount',fieldsIN(h+1));
            pc = probCount.(str{:});
            
            if pc == 0
                gMin(nSP) = 0;
            else
                minimum = 100000; % artifically high for comparison
                val = length(ffInfo.(fields{count}).(fieldsIN{h+1}))/pc;
                    if val < minimum
                        minimum = val;
                    end 
                gMin(innercount) = minimum;
            end
            innercount = innercount + 1;
        end
        count = count + 1;
    end 
end
gMin(all(gMin==0,2),:)=[];

%%% number of MCS we can approximate to satisfy p(config) probabilities
%%% without repeating trials of DLG TS
numT = floor(min(gMin));

%% now get a list of DLG TS for each config which will approximate the
%%% results of "numTS" MCS

%%% use struct TSinfo so we don't repeat DLG TS over different maxima
%%% configurations that share the same maxima group type
masterList = [];
addedN = 0;

for nSP = 1:Bn
    fieldsIN = fieldnames(ffInfo.(fields{nSP}));
    pc = ffInfo.(fields{nSP}).(fieldsIN{1});
    fracInv = 1;    
    if pc == 0
        %%% move on
    else
        %%% pick DLG TS
        %%% num of TS to choose from each category within a maxima config.
        
        num = floor(numT*pc/fracInv);
        for k = 1:length(fieldsIN)-1 %1st field holds prob config, next fields hold clusters
           if num > 1 
            TSname = strcat('TS',fieldsIN{1+k});
            TS = TSinfo.(TSname)(:,1); % DLG TS indices; length(TS) >= num
            ziVal = TSinfo.(TSname)(:,2:end);           
            numZ = size(ziVal,2); % dim of our hist
            orderHist = [];
            
            %%% may need to adjust histogram bin width depending on the
            %%% number of surrogate processes, n
            %%% want this bin size to be small, but for large n this leads
            %%% to too big a histogram, and matlab cannot handle it :(
            
            binW = 0.5;
            
            if numZ == 1
                minVal = min(TSinfo.(TSname)(:,2));
                maxVal = max(TSinfo.(TSname)(:,2));
                edges = {minVal-binW:binW:maxVal+binW};
            else
                minVal = min(TSinfo.(TSname)(:,2));
                maxVal = max(TSinfo.(TSname)(:,2));
                edges = {minVal-binW:binW:maxVal+binW};            
                for h = 2:numZ
                    minVal = min(TSinfo.(TSname)(:,1+h));
                    maxVal = max(TSinfo.(TSname)(:,1+h));
                    edges{end+1} = minVal-binW:binW:maxVal+binW;
                end
                  
            end
           
            [ NN, bin ] = histcnd(ziVal,edges);
            [maxHit, ~] = max(NN(:));

            if size(bin,2) == 1
                bin = [ones(size(bin)) bin];
            end
            
            for m = maxHit:-1:1
                %%% find bins with 'm' number of occurrences
                hits = find(NN == m);

                hitsVec = hits(:);
                if max(hitsVec) > 0
                    %%% get locations of hits
                    if size(bin,2) == 2 
                        [row, col] = ind2sub(size(NN),hits);
                        if size(row,2) > 1
                            row = row';
                        end
                        
                        if  size(col,2) > 1   
                            col = col';
                        end

                        locations = [row col];     
                    else
                        %%% need to account for n-dimensions!
                        add = size(bin,2);
                        mycell = cell(1,add);

                        [mycell{1:add}] = ind2sub(size(NN),hits);
                        rowsCell = size(mycell{1},1);
                        locations = zeros(rowsCell,add);

                        for z = 1:add
                            locations(:,z) = mycell{z};
                        end 
                    end
                    
                    numSpots = size(locations,1);
                    for p = 1:numSpots
                        bin2 = bin - locations(p,:);
                        %%% row indices of TS which fit this bin
                        orderHist = [orderHist; TS(find(all(bin2==0,2)))];
                    end
                else
                    %%% do nothing
                end
            end
            nameSTR1 = strcat('chosen',fieldsIN{1+k});
            ffInfo.(fields{nSP}).(nameSTR1) = orderHist(1:num);
            chosenInfo.(fields{nSP}).(nameSTR1) = orderHist(1:num);
            masterList = [masterList; orderHist(1:num)];
            
            %%% we don't want to repeat TS
            TSinfo.(TSname) = TSinfo.(TSname)(num+1:end,:);
            addedN = addedN + num;
           end
        end   
    end
end

masterList = sort(masterList, 'ascend');

%% function to get histogram counts of n-dim ziHAT time series
    function [ n, bin ] = histcnd( x, edges )
        %HISTCND Histogram count for n dimensional data.
        % N = HISTCND(X,EDGES), for row vectors X, counts the number of values in
        % X that fall between the grid defined by the cell array of EDGES, each
        % of whose element is a vector that contain monotonically non-decreasing
        % values. N is an N-D array each of whose dimension corresponds to
        % LENGTH(EDGES{j}) and each element contains a count of data that falls
        % into the edge.
        %
        % EDGES must have the same length to the number of columns of X.
        % Alternatively, EDGES can be a numeric vector which gives a uniform
        % grid for all dimensions of X.
        % 
        % N(k1,k2,...) will count the vector X(i,:) if for each dimension
        % j = 1,2,..., EDGES{j}(kj) <= X(i,j) < EDGES{j}(kj+1). The last bin will
        % count any values of X that match EDGES(end).  Values outside the values
        % in EDGES are not counted. Use -inf and inf in EDGES to include all
        % non-NaN values.
        % 
        % [N,BIN] = HISTCND(X,EDGES) also returns subscript indices BIN.
        % BIN is zero for out of range values.
        % 
        % Example:
        %     >> X = randn(100,2);          % 100-by-2 row vectors
        %     >> edges = {-2:.4:2,-2:.5:2}; % ranges for each dimension
        %     >> histcnd(X,edges)
        % 
        %     ans =
        % 
        %          0     0     0     1     1     1     0     0     0
        %          0     1     1     2     1     1     0     0     0
        %          1     0     3     4     0     3     0     0     0
        %          0     1     2     1     3     0     1     0     0
        %          0     3     1     4     2     1     3     1     0
        %          1     1     2     3     3     4     1     0     0
        %          0     1     1     2     1     1     4     0     0
        %          0     1     2     2     2     1     0     0     0
        %          1     2     0     3     2     0     0     1     0
        %          0     1     1     0     0     0     0     0     0
        %          0     0     0     0     0     0     0     0     0
        % 
        % Class support for inputs X:
        %    float: double, single
        % 
        % See also histc.
        % Validation
        if ~isnumeric(x)
            error('Input array must be numeric.');
        end
        if ~iscell(edges)
            if isnumeric(edges) && isvector(edges)
                if isempty(x)
                    n = [];
                    bin = [];
                    return;
                end
                tmp = cell(1,size(x,2));
                tmp(:) = {edges};
                edges = tmp;
            else
                error('Edges must be cell array.');
            end
        end

        if isempty(x)
            x = reshape(x,[0 length(edges)]);
        end

        if length(edges)~=size(x,2)
            error('Invalid cell array.');
        end

        % Compute dimension of the output histogram
        dims = cellfun(@(c) length(c), edges(:)');
        if length(dims)==1
            dims = [1 dims];
        end
        % Quantize input along each dimension
        bin = cell(1,size(x,2));
        outlier = false(size(x,1),1);
        for i = 1:size(x,2)
            [tmp,bin{i}] = histc(x(:,i),edges{i});
            outlier = outlier | (bin{i}==0);
        end

        % Remove outlier before computing a linear index
        ind = bin;
        for i = 1:size(x,2)
            ind{i} = ind{i}(~outlier);
        end
        % Compute count of the index
        n = histc(sub2ind(dims,ind{:}),1:prod(dims));
        % Format the output
        n = reshape(n,dims);
        bin = cat(2,bin{:});
    end
end

