clear all;close all;clc

T = fileread('Exam 2024 responses\Exam 2024 - Q9 - 250 times.txt'); % read text file

% extract number of tokens per question
idx=strfind(T,'TOKEN');
for i=1:length(idx)
    str=T(idx(i)+20:idx(i)+70);
    numbers = str2double(regexp(str, '\d+', 'match'));
    TOKEN(i,:)=numbers;
end

% extract time taken per question
idx=strfind(T,'TIME TAKEN');
for i=1:length(idx)
    str=T(idx(i)+10:idx(i)+30);
    number = str2double(regexp(str, '\d+\.\d+', 'match'));
    TIME(i,:)=number;
end

% extract answer per question
idx=strfind(T,'QUESTION NUMBER');
for i=2:length(idx)
    str=T(idx(i)-100:idx(i)-1);
    str=strrep(str,newline,' ' );
    disp([num2str(i-1) '. ' str])
end

CORRECT = xlsread('Exam 2024 responses\Exam 2024 - Q9 - 250 times - correctness.xlsx','A1:A250'); %#ok<XLSRD>

% the TOKEN matrix consists of
% 1. Prompt tokens,
% 2. Completion tokens
% 3. Total tokens (= Prompt tokens + Completion tokens)
% 4. Reasoning tokens

% 5. Add fifth column: Visible completion tokens
TOKEN(:,5) = TOKEN(:,2)-TOKEN(:,4);
TOKEN(:,1) = []; % remove prompt tokens since it is always the same

X = [TOKEN TIME CORRECT];

idx = find(TIME(:,1)>400); % remove one output with extremely long completion time
X(idx,:)=[]; % remove this output

% 1. Completion tokens
% 2. Total tokens (= Prompt tokens + Completion tokens)
% 3. Reasoning tokens
% 4. Visible completion tokens
% 5. Completion time (s)
% 6. Answer correct (0: no, 1: yes)

disp('Mean for correct answers')
disp(mean(X(X(:,6)==1,:)))
disp('Mean for incorrect answers')
disp(mean(X(X(:,6)==0,:)))

%%
figure
set(gcf,'renderer','painters')
hold on
for i=1:size(X,1)
    if X(i,6)==0 % incorrect answer
        scatter(X(i,3)/1000,X(i,5),200,'s','markerfacecolor','r','markeredgecolor','k','LineWidth',1.5,'MarkerFaceAlpha',0.5); % plot marker
    else % correct answer
        scatter(X(i,3)/1000,X(i,5),200,'markerfacecolor','g','markeredgecolor','k','LineWidth',1.5,'MarkerFaceAlpha',0.5); % plot marker
    end
end

p1 = plot(nan, nan, 's', 'MarkerFaceColor', 'r', 'MarkerEdgeColor', 'k', 'MarkerSize', 15, 'LineWidth', 1.5);
p2 = plot(nan, nan, 'o', 'MarkerFaceColor', 'g', 'MarkerEdgeColor', 'k', 'MarkerSize', 15, 'LineWidth', 1.5);

grid on;ax=gca; ax.LineWidth=1.5;
legend([p1,p2],'Incorrect answer','Correct answer','location','northwest')
box on
set(gca, 'LooseInset',[0.01 0.01 0.01 0.01])
set(gca,'pos',[0.07 0.103 0.55 0.88])
xlabel('Number of reasoning tokens (in thousands)')
ylabel('Completion time (s)')
fig=gcf;set(findall(fig,'-property','FontName'),'FontName','Arial','Fontsize',32)

