#########################################################################################################
# Selecting 24 represenative items for the short version of the questionnaire in research: 
# Siska Fitrianie, Merijn Bruijnes, Fengxiang Li, Amal Abdulrahman, Willem-Paul Brinkman. 2022. 
# The Artificial-Social-Agent Questionnaire: Establishing the long and short questionnaire versions. 
# In ACM International Conference on Intelligent Virtual Agents (IVA’22), September, 2022, Faro, Portugal. 
# ACM, New York, NY, USA. https://doi.org/10.1145/3514197.3549612
#
# Calculate the mean different between the long and short versions of the ASA Questionnaire
# Input:  result_all.csv (raw observed data)
#########################################################################################################

## Retrieve raw observed data
d_results_all=read.csv2("../raw data/result_all.csv", header = TRUE, sep =";")

###################################### 24 constructs/dimension ##########################################
## selecting the long version of the ASA Questionnaire(#items = 90) from the raw observed data
drop90 = c("PRID","RID","STARTDATE","ENDDATE","RECORDDATE","CheckScore",
         "C01D01Q16",
         "C01D04Q8","C01D04Q9",
         "C01D05Q11","C01D05Q12",
         "R_C02D00Q11","R_C02D00Q15",
         "C03D01Q1","C03D01Q5","C03D01Q6","C03D02Q0","C03D02Q5","R_C03D02Q13","R_C03D02Q1",
         "R_C05D00Q3", "C05D00Q18",
         "R_C06D01Q13","C06D01Q8",
         "C07D00Q13", "C07D00Q14",
         "R_C08D00Q10",
         "C09D00Q5","C09D00Q3","R_C09D00Q9",
         "C10D00Q1", "C10D00Q10",
         "C11D01Q6", "C11D02Q1", "C11D02Q3", "C11D02Q8",
         "C12D00Q8", "R_C12D00Q6",
         "C13D00Q7",
         "C14D00Q15",
         "C15D00Q8", "C15D00Q12",
         "C16D00Q11", "C16D00Q16",
         "C17D00Q8",
         "C18D03Q9",
         "C19D00Q3")
d_90items = d_results_all[,!(names(d_results_all) %in% drop90)]

# 24 constructs/dimensions
n_90constructs = c("90_C01D01", "90_C01D02","90_C01D03","90_C01D04","90_C01D05","90_C02","90_C03","90_C04",
                   "90_C05","90_C06D01","90_C07","90_C08","90_C09", "90_C10", "90_C11","90_C12", "90_C13", 
                   "90_C14", "90_C15", "90_C16", "90_C17", "90_C18D01", "90_C18D03", "90_C19")
numcols = c(4,5,5,3,3,3,3,5,3,3,3,4,3,3,6,3,4,4,3,3,4,5,4,4) #the number of items of the constructs/dimensions

# function to get items of a construct/dimension
getData <- function(data, begin, end){
  dat <- data[begin:end]
  dat[1:ncol(dat)] <- sapply(dat[1:ncol(dat)],as.numeric)
  return(dat)
}

# calculate the mean of items of each of 24 construct/dimension
d_index = data.frame(matrix(ncol = 24, nrow = 0))
begin <- 2
for (i in 1:24){
  end = numcols[i]
  d<-getData(d_90items, begin, begin + end - 1)
  m <- rowMeans(d)
  row <- 1
  for (j in m){
    d_index[row,i] <- j
    row <- row + 1
  }
  begin <- begin + end
}
colnames(d_index)<-c(n_90constructs)

###################################### 24 representative items ##########################################
## selecting the 24 representative items
drop24 = c("PRID","RID","STARTDATE","ENDDATE","RECORDDATE","CheckScore","GROUP",
         "C01D01Q3", "C01D01Q13", "C01D01Q15", "C01D01Q16",
         "C01D02Q7", "C01D02Q10", "C01D02Q0", "C01D02Q1",
         "C01D03Q7", "C01D03Q9", "C01D03Q10", "C01D03Q12",
         "C01D04Q0", "C01D04Q13", "C01D04Q8", "C01D04Q9",
         "C01D05Q9", "C01D05Q0", "C01D05Q11","C01D05Q12",
         "C02D00Q2", "C02D00Q8", "R_C02D00Q11","R_C02D00Q15",
         "C03D01Q1","C03D01Q5","C03D01Q6", "C03D01Q12", 
         "C03D02Q0","C03D02Q5", "C03D02Q10", "R_C03D02Q13","R_C03D02Q1",
         "C04D00Q1", "R_C04D00Q11", "C04D00Q4", "C04D00Q12", 
         "R_C05D00Q3", "C05D00Q7", "C05D00Q1", "C05D00Q18",
         "R_C06D01Q6", "C06D01Q11", "R_C06D01Q13","C06D01Q8",
         "C07D00Q11", "R_C07D00Q15", "C07D00Q13", "C07D00Q14",
         "C08D00Q4", "C08D00Q0", "R_C08D00Q9", "R_C08D00Q10",
         "C09D00Q4", "C09D00Q1", "C09D00Q5","C09D00Q3","R_C09D00Q9",
         "C10D00Q1", "C10D00Q6", "C10D00Q16", "C10D00Q10",
         "C11D01Q6", "C11D01Q10", "C11D01Q11", "C11D01Q14", 
         "C11D02Q4", "C11D02Q7", "C11D02Q1", "C11D02Q3", "C11D02Q8",
         "C12D00Q0", "C12D00Q1", "C12D00Q8", "R_C12D00Q6",
         "C13D00Q7", "R_C13D00Q11", "R_C13D00Q6", "R_C13D00Q15",
         "C14D00Q0", "C14D00Q10", "C14D00Q16", "C14D00Q15",
         "C15D00Q11", "R_C15D00Q15", "C15D00Q8", "C15D00Q12",
         "C16D00Q11", "C16D00Q12", "C16D00Q17", "C16D00Q16",
         "C17D00Q2", "C17D00Q9", "C17D00Q10", "C17D00Q8",
         "C18D01Q2", "C18D01Q3", "C18D01Q8", "R_C18D01Q14",
         "C18D03Q0", "C18D03Q1", "C18D03Q12", "C18D03Q9",
         "C19D00Q3", "C19D00Q7", "C19D00Q14", "C19D00Q26")
d_24items = d_results_all[,!(names(d_results_all) %in% drop24)]

# 24 constructs/dimensions that are represented by the 24 representative items
n_24constructs = c("24_C01D01", "24_C01D02","24_C01D03","24_C01D04","24_C01D05","24_C02","24_C03","24_C04",
                   "24_C05","24_C06D01","24_C07","24_C08","24_C09", "24_C10", "24_C11","24_C12", "24_C13", 
                   "24_C14", "24_C15", "24_C16", "24_C17", "24_C18D01", "24_C18D03", "24_C19")
colnames(d_24items)<-c(n_24constructs)
d_24items[1:ncol(d_24items)] <- sapply(d_24items[1:ncol(d_24items)],as.numeric)

######## Calculate the absulute mean different between the long and the short version questionnaire #######
d_means<-data.frame(cbind(colMeans(d_index),colMeans(d_24items)))
d_means[,3]<-abs(d_means[,1]-d_means[,2])

