function out = bootstrap2groups (FuncIn, gp1, gp2, iterate) % % HELPFILE for 'bootstrap2groups.m' % % function out = bootstrap2groups (FuncIn, gp1, gp2, iterate) % % 'FuncIn' is the function used to calculate desired descriptor and % must be called with function handle or in single quotes % at command line e.g. bootstrap2groups (@mean, gp1, gp2) or % bootstrap2groups ('mean', gp1, gp2) % % 'gp1' and 'gp2' are 2 independent (unpaired) datasets and should % have the same dimensionality, ideally both are column vectors % % 'iterate' is scalar defining number of resampling iterations, % defaults to 10000 % % returns struct 'out' containing the following fields: % gp1desc: value of descriptor calculated by applying 'FuncIn' % to gp1 % % gp2desc: value of descriptor calculated by applying 'FuncIn' % to gp2 % % testStat: test statistic, defined here as % testStat = gp1desc - gp2desc % % distMean: mean of pseudo test stat distribution obtained from % resampling % % reflectLeft: value of testStat when reflected across distMean % OR % reflectRight % % pvalRight: number of pseudo test stats greater than positive % test stat, divided by number of resampling iterations % % pvalLeft: number of pseudo test stats less than negative % test stat, divided by number of resampling iterations % % pval: pval = pvalRight + pvalLeft % % groups: original data stored in m x 2 array, where m is size % of groups (if group sizes are equal), or size of % larger of the two groups (if group sizes are unequal) % --- CHECK INPUTS --- % First, check if user entered data, if yes % check that data are stored in vectors. % Then, check that data are column vectors, if not, transpose if nargin >= 3 gp1size = size(gp1); gp2size = size(gp2); if ~isvector(gp1) || ~isvector(gp2) error('Datasets should be vectors!') end if gp1size(1)-gp1size(2) ~= gp1size(1)-1 warning('gp1 was not a column vector') %#ok gp1 = gp1'; end if gp2size(1)-gp2size(2) ~= gp2size(1)-1 warning('gp2 was not a column vector') %#ok gp2 = gp2'; end end % if number of iterations is not defined, default to 10000 if nargin < 4, iterate = 10000; end % if user did not enter any data... % randomly generate groups for demo purposes if nargin < 3, gp2 = 5+randn(25,1); end if nargin < 2, gp1 = 5.5+randn(25,1); end % --- DONE CHECKING INPUT, DEFINE TERMS FOR RESAMPLING --- % calculate data descriptors, store in ouptut structure out.gp1desc = feval(FuncIn,gp1); out.gp2desc = feval(FuncIn,gp2); % calculate actual test stat, store in ouptut structure out.testStat = out.gp1desc - out.gp2desc; % combine data for resampling box = [gp1; gp2]; % used to be concat(gp1,gp2); % initialize vector to hold pseudo test stats pseudoStatDist = zeros(1,iterate); % --- DO RESAMPLING --- for trials = 1:iterate % generate pseudo groups pseudo_gp1 = sample(length(gp1), box); pseudo_gp2 = sample(length(gp2), box); % calculate pseudo group descriptors pgp1desc = feval(FuncIn, pseudo_gp1); pgp2desc = feval(FuncIn, pseudo_gp2); % calculate pseudo test stat and store it in vector pseudoStat = pgp1desc - pgp2desc; pseudoStatDist(trials) = pseudoStat; end % --- DONE RESAMPLING, CALCULATE P-VALS --- % calculate p-values, store in output structure out.distMean = mean(pseudoStatDist); % mean of pseudo test stats % If actual test stat is positive (or 0) reflect it to the left % back over the mean of pseudo test stat distribution. % This way we get p-vals in each tail, and can add them together if out.testStat > 0 out.reflectLeft = out.distMean - (out.testStat - out.distMean); out.pvalRight = proportion(pseudoStatDist > out.testStat); out.pvalLeft = proportion(pseudoStatDist < out.reflectLeft); out.pval = out.pvalRight + out.pvalLeft; % If actual test stat is negative, reflect it to the right over the % mean of pseudo test stat distribution else out.reflectRight = out.distMean - (out.testStat - out.distMean); out.pvalLeft = proportion(pseudoStatDist < out.testStat); out.pvalRight = proportion(pseudoStatDist > out.reflectRight); out.pval = out.pvalRight + out.pvalLeft; end % --- ARRANGE RAW DATA TO PUT IN OUTPUT STRUCT AND GRAPH --- % Check if groups have different number of datapoints % If yes, fill smaller one with NaNs so they can be stored % together in 'out.groups' % Do this here (rather than in beginning) so as not to include NaNs % in the resampling. Purely cosmetic anyway, just to store original % data in output struct and plot. if length(gp1) ~= length(gp2) sizeDiff = abs(length(gp1) - length(gp2)); fill = NaN(sizeDiff, 1); if length(gp1) > length(gp2), gp2 = [gp2; fill]; else gp1 = [gp1; fill]; end end out.groups = [gp1 gp2]; % --- GRAPHIC OUTPUT --- % Plot histogram of pseudo test stat distribution and show boxplot figure subplot(1,2,1), hist(pseudoStatDist), title('pseudoStatDist') subplot(1,2,2), boxplot(out.groups), title('gp1 vs. gp2')