############################################################################### #R programs that evaluate the distributions of coins size estimation # reported in Judgment Decision Making journal article by Wang et al (2012) # #Written by Uri Simonsohn, August, 2013 ############################################################################### #OUTLINE OF THE R PROGRAMS BELOW #I first create two functions. # #FUnction 1/ c_table(y1,y2) # it takes two vectors, y1 and y2, and generates a contingency table. This is needed # becuase R's chi.test() program does not deal with vectors of data, but with frequencies directly. # So for example, if y1=c(5,5) and y2=c(5,8) then a contingency table # 5 8 # === # y1 2 0 # y2 1 1 # is generated # # (the returned value/table is just the frequencies, with no headings: # so that c_table(y1,y2)=2,0 # 1,1 # #Funtion 2. bootchi(simtot,y1,y2) # This function does the bootstrapping, it takes vectors y1 and y2, concatenates them # and draws with replacement simtot samples from it, apply the c_table() function from above # to the bootstrapped sample and saves into a vector chi() the resulting chi-values # ################################################################ #FUNCTION 1. CREATES CONTINGENCY TABLE FOR CHI2 TEST FROM TWO VECTORS OF DATA c_table=function(y1,y2) { #Find max and min for y to see the range of values to consider maxy=max(y1,y2) miny=min(y1,y2) #Create columns from every possible value, all integers between min and max y1f=factor(y1, levels=miny:maxy) y2f=factor(y2,levels =miny:maxy) #combine then into a table m=matrix(c(table(y1f),table(y2f)),nrow=2,ncol=(maxy-miny+1),byrow=TRUE) #drop columns from table with only 0s as they make the chi.test() program crash. m=m[,colSums(m^2) !=0] #got this code from http://stackoverflow.com/questions/6632018/delete-all-columns-with-0-from-matrix return(m) } #FUNCTION 2. BOOTSTRAP CHI2 TEST bootchi=function(simtot,y1,y2) { #SIMTOT IS THE NUMBER OF SIMULATIONS (200K BELOW) chi=c() #initialize vector with set of results yb=c() #initialize bootstrap samples vector #pool data y=c(y1,y2) for (i in 1:simtot) { #draw random sample yb=sample(y,size=length(y),replace=TRUE) #split the data into two experiments y1b=yb[1:length(y1)] #takes the first n1 elements, calls for Exp1, the rest for Exp2 y2b=yb[(length(y1)+1):length(y)] #compute the chi-square on the bootstrapped sample chi[i]=chisq.test(c_table(y1b,y2b))$statistic } return(chi) } ########################################################################## #COIN SIZE AND SHAME #Shame shame1=c(2,3,3,3,3,4,4,4,4,4,5,5,5,5,5,5,5,6,6,6,6,6,6,6,7) shame2=c(2,3,3,3,3,4,4,4,4,4,5,5,5,5,5,5,6,6,6,6,6,6,6,7) #Control control1=c(2,2,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4,5,5,6,6,6) control2=c(2,2,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,4,5,5,5,6,6,6) #x2s that's observed (.0565) chi_obs1=chisq.test(c_table(shame1,shame2))$statistic #x2 for the shame condition chi_obs2=chisq.test(c_table(control1,control2))$statistic #x2 for the control condition chi_obs=chi_obs1+chi_obs2 #summing both x2s #note: this code generates the warning that the chi-square approximation may be incorrect. # that is not a problem since we do not really use the x2 value as a chi-square. #RESULTS set.seed(126) #set seed to get same results every time simtot=200000 #200k simulations time1=Sys.time() #to time how long it takes to execute chi_r1=bootchi(simtot=simtot,y1=shame1,y2=shame2) #bootstrap the shame chi_r2=bootchi(simtot=simtot,y1=control1,y2=control2) #now the control chir_r=chi_r1+chi_r2 print(Sys.time()-time1) #print how much time it took cat("\n p for shame: ",sum(chi_r1<=chi_obs1)/simtot) #count # of simulations <=observed, turn into p-value cat("\n p for control: ",sum(chi_r2<=chi_obs2)/simtot) cat("\n p for both: ",sum(chi_r<=chi_obs)/simtot) #Save results for graphing in Excel chi_obs write(sort(round(chi_r,1)),"output.txt",sep="\n") #note: the chi.test() function in R has a built it montecarlo but it has two prolems. # the most important is that it reports the p-value for a more extreme difference including the observed # we want a less extreme one also including the observed, so we cannot use the reported p-value for this purposes # for instance, for the shame condition, the observed x2 is the lowest possible, so the p-value=1, this would mean # it is impossible to et such similar results, but this excludes the probability of exactly teh same # results. # another problem is that the montecarlo samples without replacement (a la Fisher's exact test). # i simulated that also and leads to very similar results.