%% Fig. 1.1. Illustration of data fitting with increasingly complex polynomial models
clc;close all;clear variables
x=0:.1:1;y=[0 -.6 -.9 -.9 -.4 0 .1 .6 .7 .6 -.2];V=-2:.01:2;O=[0 1 3 10];yf=NaN(length(O),length(V));
figure('Name','Figure 1.1','NumberTitle','off')
for i=1:length(O)
    yf(i,:)=polyval(polyfit(x,y,O(i)),V);
    subplot(2,2,i)
    plot(V,yf(i,:),'Linewidth',2);hold on
    plot(x,y,'ko','Markerfacecolor','k')
    set(gca,'xtick',0:.2:1,'ytick',-1:.5:1,'xlim',[-.05 1.05],'ylim',[-1.05 1.05],'FontSize',18)
    xlabel('\itx');ylabel('\ity')
    h=legend(['\rmOrder = ' num2str(O(i))],'location','northwest');set(h,'color','none')
    set(gca,'color', 'None')
end

%% Fig. 2.2. Within-subject versus between-subjects design
clear variables;rng('default')
N=30;r=0.9;
Xoff=randn(N,1); % sample with population mean = D
Xon=r*Xoff+sqrt(1-r^2)*randn(N,1);
Xoff=Xoff*10+100;
Xon=Xon*10+97;
disp(['Mean off = ' sprintf('%6.2f',mean(Xoff))])
disp(['Mean on = ' sprintf('%6.2f',mean(Xon))])
disp(['SD off = ' sprintf('%6.2f',std(Xoff))])
disp(['SD on = ' sprintf('%6.2f',std(Xon))])
disp(['r = ' sprintf('%5.2f',corr(Xoff,Xon))])
disp(['Number of datapoints for which Off > On = ' sprintf('%1.0f',sum(Xon<Xoff))])
figure('Name','Figure 2.2','NumberTitle','off')
h1=subplot(1,2,1);hold on
title('\rmWithin-subject design')
plot(Xoff,Xon,'ko','markerfacecolor','k','Markersize',10);
plot([70 130],[70 130],'--','Linewidth',3)
set(gca,'xtick',0:10:200,'ytick',0:10:200,'xlim',[70 130],'ylim',[70 130])
xlabel('Speed with system off (km/h)')
ylabel('Speed with system on (km/h)')
box on
set(gca,'color', 'None')
h2=subplot(1,2,2);hold on
title('\rmBetween-subjects design')
hlc=boxplot([Xoff Xon],'color','k');
set(hlc,'LineWidth',1,'Markersize',12);
set(gca,'Xtick',1:2,'ylim',[70 130],'Xticklabel',{'System off' 'System on'})
ylabel('Speed (km/h)')
set(gca,'color', 'None')
fig=gcf;set(findall(fig,'-property','FontSize'),'FontSize',20)

%% Fig. 2.5. Weight judgement: Wisdom of the crowd (Gorden, 1924; Eysenck, 1939)
clear variables
r=.41^2;
n1=[1 5 10 20 50 ];
robs=[.41 .68 .79 .86 .94];
n2=1:200;
R=sqrt((n2.*r)./(1+(n2-1).*r));
figure('Name','Figure 2.5','NumberTitle','off');hold on
plot(n1,robs,'ko','Markersize',14,'Markerfacecolor','k')
plot(n2,R,'Linewidth',2)
set(gca,'color', 'None');grid on;box on
set(gca,'xlim',[0 100],'ylim',[0 1])
legend('Observed correlation','Predicted correla-tion','location','southeast')
xlabel('Number of participants per group');
ylabel('Correlation with true weights')
fig=gcf;set(findall(fig,'-property','FontSize'),'FontSize',20)

%% Fig. 3.1. Probability density functions for four different distributions
clear variables
set(0,'DefaultLegendAutoUpdate','off');
x = -50:.001:50;y=NaN(3,length(x));
y(1,:)=tpdf(x,5);y(2,:)=tpdf(x,inf);y(3,:)=exppdf(x,1);
figure('Name','Figure 3.1','NumberTitle','off');hold on
set(gca, 'LooseInset', [0.01 0.01 0.01 0.01]);
plot(x,y(2,:),'k','Linewidth',2)
plot(x,y(1,:),'g','Linewidth',2)
plot(x./sqrt(5/3),y(1,:).*sqrt(5/3),'-','color',[255 165 0]/255,'Linewidth',2)
plot(x,y(3,:),'m:','Linewidth',2)
h=legend('(1) Normal : variance = 1, skewness = 0, kurtosis = 3','(2) \it{t}\rm : variance = 5/3, skewness = 0, kurtosis = 9','(3) \it{t}\rm (scaled) : variance = 1, skewness = 0, kurtosis = 9','(4) Exponential : variance = 1, skewness = 2, kurtosis = 9','location','northeast','orientation','vertical');
set(h,'color','none')
xlabel('Value')
ylabel('Density')
set(gca,'color', 'None')
set(gca,'xlim',[-5 10],'ylim',[0 1.01],'xtick',-10:1:10,'FontSize',24);
h=rectangle('position',[3 0 2 .06],'facecolor','none','Linewidth',1);
pan on
set(h,'Clipping','off')
plot([3.7 4],[.06 .23],'k-','Linewidth',1)
ah=axes('position',[.5465 .30 .35 .35]);
hold on;box on
plot(x,y(1,:),'g','Linewidth',2)
plot(x./sqrt(5/3),y(1,:).*sqrt(5/3),'-','color',[255 165 0]/255,'Linewidth',2)
plot(x,y(2,:),'k','Linewidth',2)
plot(x,y(3,:),'m:','Linewidth',2)
fig=gcf;set(findall(fig,'-property','FontSize'),'FontSize',20)
set(gca,'xlim',[3 5],'ylim',[0 .06],'FontSize',16,'color','none')
set(gca, 'LooseInset', [0.01 0.01 0.01 0.01]);

%% Fig. 3.2. Illustration of correlations
clear variables;rng('default')
set(0,'DefaultLegendAutoUpdate','off');
N=1000;RR=[0 .2 .4 .6 .8 .9];
figure('Name','Figure 3.2','NumberTitle','off');hold on
for i=1:length(RR)
    subplot(2,3,i);hold on
    set(gca,'color','none')
    plot([-10 10],[-10 10]*RR(i),'m--','Linewidth',2)
    h=legend(['\rm\it{R}\rm = ' num2str(RR(i))],'location','southeast');
    set(h,'color','none')
    x=randn(N,1);y=RR(i)*x+sqrt((1-RR(i)^2))*randn(N,1);
    plot(x,y,'ko');
    xlabel('\itx');ylabel('\ity');
    axis equal
    set(gca,'xlim',[-5 5],'ylim',[-5 5],'xtick',[-5 0 5],'ytick',[-5 0 5])
    box on
end
fig=gcf;set(findall(fig,'-property','FontSize'),'FontSize',18)

%% Fig. 3.3. Anscombe’s quartet
clear variables
d=[10.0     8.04    10.0    9.14    10.0     7.46     8.0   6.58
    8.0     6.95     8.0    8.14     8.0     6.77     8.0   5.76
    13.0     7.58    13.0    8.74    13.0     12.74    8.0   7.71
    9.0     8.81     9.0    8.77     9.0     7.11     8.0   8.84
    11.0     8.33    11.0    9.26    11.0     7.81     8.0   8.47
    14.0     9.96    14.0    8.10    14.0     8.84     8.0   7.04
    6.0     7.24     6.0    6.13     6.0     6.08     8.0   5.25
    4.0     4.26     4.0    3.10     4.0     5.39    19.0  12.50
    12.0    10.84    12.0    9.13    12.0     8.15     8.0   5.56
    7.0     4.82     7.0    7.26     7.0     6.42     8.0   7.91
    5.0     5.68     5.0    4.74     5.0     5.73     8.0   6.89];
figure('Name','Figure 3.3','NumberTitle','off');hold on
for i=1:4
    subplot(2,2,i)
    plot(d(:,i*2-1),d(:,i*2),'ko','Markersize',10,'Markerfacecolor','k')
    xlabel('\itx');ylabel('\ity');
    set(gca,'xlim',[3 20],'ylim',[4 14]);
    set(gca,'color','none')
end
disp('Means')
fprintf('%8.3f',mean(d(:,1:2:end)));fprintf('\n')
fprintf('%8.3f',mean(d(:,2:2:end)));fprintf('\n')
disp('Variances')
fprintf('%8.3f',var(d(:,1:2:end)));fprintf('\n')
fprintf('%8.3f',var(d(:,2:2:end)));fprintf('\n')
disp('Correlations')
fprintf('%8.3f',[corr(d(:,1),d(:,2)) corr(d(:,3),d(:,4)) corr(d(:,5),d(:,6)) corr(d(:,7),d(:,8))]);fprintf('\n')
fig=gcf;set(findall(fig,'-property','FontSize'),'FontSize',24)
set(gca, 'LooseInset', [0.01 0.01 0.01 0.01]);

%% Fig. 3.4. Central limit theorem - normal distribution
clear variables;rng('default')
reps=10^6;nn=[1 2 5 20 50];V=-100:0.01:100;SE=NaN(length(nn),1);
figure('Name','Figure 3.4','NumberTitle','off');
for i=1:length(nn) % loop over 5 sample sizes
    M=mean(randn(nn(i),reps),1); % sample mean (vector of length reps)
    D=histc(M,V);
    Dnorm=D./sum(D)/mean(diff(V));
    SE(i)=std(M);
    plot(V+mean(diff(V)),Dnorm,'-o','linewidth',2);hold on
end
h=legend('\it{n}\rm = 1','\it{n}\rm = 2','\it{n}\rm = 5','\it{n}\rm = 20','\it{n}\rm = 50');
set(h,'color','none')
set(gca,'xlim',[-3 3])
xlabel('Sample mean')
ylabel('Density')
set(gca,'color','None')
fig=gcf;set(findall(fig,'-property','FontSize'),'FontSize',24)
set(gca, 'LooseInset', [0.01 0.01 0.01 0.01]);
disp('1/sqrt(n)')
for i=1:length(SE);fprintf('%8.3f',1/sqrt(nn(i)));fprintf('\n');end
disp('Standard deviation of the sample mean')
for i=1:length(SE);fprintf('%8.3f',mean(SE(i)));fprintf('\n');end

%% Fig. 3.5. Central limit theorem - exponential distribution
clear variables;rng('default')
reps=10^6;nn=[1 2 5 20 50];V=-100:0.01:100;SE=NaN(length(nn),1);SD=NaN(length(nn),1);
figure('Name','Figure 3.5','NumberTitle','off')
for i=1:length(nn)
    M=mean(exprnd(1,nn(i),reps),1); % sample mean (vector of length reps)
    D=histc(M,V);
    Dnorm=D./sum(D)/mean(diff(V));
    SE(i)=std(M);
    plot(V+mean(diff(V)),Dnorm,'-o','linewidth',2);hold on
end
h=legend('\it{n}\rm = 1','\it{n}\rm = 2','\it{n}\rm = 5','\it{n}\rm = 20','\it{n}\rm = 50');
set(h,'color','none')
set(gca,'xlim',[-.1 3])
xlabel('Sample mean')
ylabel('Density')
fig=gcf;set(findall(fig,'-property','FontSize'),'FontSize',24)
set(gca,'color','none','looseInset', [0.01 0.01 0.01 0.01])
disp('Standard deviation of the sample mean')
for i=1:length(SE);fprintf('%8.3f',mean(SE(i)));fprintf('\n');end

%% Fig. 3.7. Probability density function of population distribution of males and females
clear variables
V=0:.1:300;
d_men_c=normpdf(V,182.5,7.1);
d_women_c=normpdf(V,168.7,7.1);
figure('Name','Figure 3.7','NumberTitle','off');hold on
plot(V,d_men_c,'Linewidth',3)
plot(V,d_women_c,'--','color',[double(216) double(82) double(24)]./255,'Linewidth',3)
box on
xlabel('Height (cm)')
ylabel('Density')
set(gca,'xlim',[130 230],'FontSize',24)
h=legend('Males','Females');
set(h,'color','none')
set(gca,'color','none','looseInset', [0.01 0.01 0.01 0.01])

%% Figs. 3.8 & 3.9. p values for two normal distributions with unequal means, and equal means, respectively
clear variables;rng('default')
reps=10000;nn=[3 6 10];
pu=NaN(reps,length(nn(1)));
pe=NaN(reps,length(nn(1)));
for i=1:length(nn)
    n=nn(i);
    disp(n)
    for i2=1:reps
        disp(i2)
        height_pollm=randn(n,1)*7.1+182.5;
        height_pollw=randn(n,1)*7.1+168.7;
        height_pollw2=randn(n,1)*7.1+182.5; % now assume that men and women have equal height
        [~,pu(i2,i)]=ttest2(height_pollm,height_pollw);
        [~,pe(i2,i)]=ttest2(height_pollm,height_pollw2);
    end
end
figure('Name','Figure 3.8','NumberTitle','off');hold on
plot(sort(pu),'o','Linewidth',2,'Markersize',4);hold on
plot([0 reps],[.05 .05], 'k--','Linewidth',2)
h=legend('{\itn_m_e_n} = {\itn_w_o_m_e_n} = 3','{\itn_m_e_n} = {\itn_w_o_m_e_n} = 6', '{\itn_m_e_n} = {\itn_w_o_m_e_n} = 10','location','northwest');
set(h,'color','none')
xlabel('Test number (sorted on {\itp} value)')
ylabel('{\itp} value')
box on
fig=gcf;set(findall(fig,'-property','FontSize'),'FontSize',20)
set(gca,'color','none','looseInset', [0.01 0.01 0.01 0.01])

figure('Name','Figure 3.9','NumberTitle','off');hold on
plot(sort(pe(:,3)),'o','Linewidth',2,'Markersize',4)
plot([0 reps],[.05 .05], 'k--','Linewidth',2)
xlabel('Test number (sorted on {\itp} value)')
ylabel('{\itp} value')
box on
fig=gcf;set(findall(fig,'-property','FontSize'),'FontSize',20)
set(gca,'color','none','looseInset', [0.01 0.01 0.01 0.01])

%% Fig. 3.10. p values in original studies and replication studies in the Open Sci-ence Collaboration Project
clear variables
pO=xlsread('RPPdataConverted.xlsx','DH2:DH168');
pR=xlsread('RPPdataConverted.xlsx','DT2:DT168');
figure('Name','Figure 3.10','NumberTitle','off');hold on
plot(pO,pR,'kx','Linewidth',2)
plot([0 1],[.05 .05],'m--','Linewidth',2)
plot([0.05 0.05],[0 1],'m--','Linewidth',2)
set(gca,'xlim',[0 0.06],'ylim',[0 1])
box on
xlabel('Original study {\itp} value')
ylabel('Replication study {\itp} value')
fig=gcf;set(findall(fig,'-property','FontSize'),'FontSize',24)
set(gca,'color','none','looseInset', [0.01 0.01 0.01 0.01])

%% Fig. 3.12. p-hacking. This simulation takes a few minutes to complete
clear variables;rng('default')
reps=10^6;n=25;
pp=NaN(reps,1);
V=0:0.005:1;
for i=1:reps
    if rem(i/1000,1)==0;fprintf('Percentage completed = %5.3f',100*i/reps);fprintf('\n');end
    x=randn(n,1);y=randn(n,1);
    [~,pp(i)]=ttest2(x,y);p2=ranksum(x,y);
    if pp(i)>.05 && p2 < .05
        pp(i)=p2;
    end
end
figure('Name','Figure 3.12','NumberTitle','off');hold on
D=histc(pp,V);Dnorm=D./sum(D)/mean(diff(V));
plot(V+mean(diff(V)),Dnorm,'k-o','Linewidth',2)
box on
xlabel('\itp\rm value');ylabel('Density')
set(gca,'xlim',[0 .4])
fig=gcf;set(findall(fig,'-property','FontSize'),'FontSize',20)
set(gca,'color','none','looseInset', [0.01 0.01 0.01 0.01])
