#DataColada 88 - Computing an approximate CI around the hot hand estimate of miller and sanjurjo rm(list = ls()) library('rio') GVT.Study4=import("D:/Dropbox/Papers/6301-6400/6378 - Mller Sanjuanjo - Supplement-Data_Code_Statistics/0-RAWDATA/GilovichValloneTversky--CognitivePsychology--1985_CornellData.xls") #Spreadsheet original found in the supplement from the Econometrica paper, posted to http://datacolada.org/appendix/88 #number of players N.players=length(unique(GVT.Study4$sid)) #Number of players #1. Function 1 - compute outcomes after 3 shots compute.hot3=function(shots) { n=length(shots) #hot many shots total shot.next=shots[4:n] #take the last n-3 shots as the DV shot.p1=shots[3:(n-1)] #predict with 1 lag shot.p2=shots[2:(n-2)] #2 lags shot.p3=shots[1:(n-3)] #3 lag HHH=(shot.p1==shot.p2 & shot.p2==shot.p3 & shot.p1==1) #1/0: converted after HHH MMM=(shot.p1==shot.p2 & shot.p2==shot.p3 & shot.p1==0) #1/0: converted after MMM mean.HHH=mean(shot.next[HHH==T]) #Average conversion after HHH mean.MMM=mean(shot.next[MMM==T]) #Average conversion after MMM hot3=mean.HHH-mean.MMM count.HHH=sum(HHH) count.MMM=sum(MMM) c(n,mean(shots),mean.HHH,mean.MMM,count.HHH,count.MMM,hot3) } #2. Example of Function 1: player 1 in Table II in Miller & Sanjurjo, has .5 and .44, with 12 and 9 such streaks, reproduced here playerk=subset(GVT.Study4,sid==101) compute.hot3(playerk$make) # shots - phit phit|HHH phit|MMM hot.hand #3 Function that computes Table II in Miller & Sanjurjo compute.table.II=function(datak) { #Empty table first table.II=matrix(nrow=N.players,ncol=7) #Set of IDs to compute sid.all=unique(datak$sid) #Counter k=1 #loop over ids for (sidk in unique(datak$sid)) { playerk=subset(datak,sid==sidk) table.II[k,]=compute.hot3(playerk$make) k=k+1 } #Output the table table.II=data.frame(table.II) names(table.II)=c("shots",'p','p.HHH','p.MMM','N.HHH','N.MMM','hothand.3') return(table.II) } #4 Reproduce Table II with observed data #Table t2=compute.table.II(GVT.Study4) t2 #Summary row round(colMeans(t2,na.rm=T),3) #5 Bootstrap the table under the null df=GVT.Study4 #Copy the data #function to bootstrap under the null compute.summary.row.boot=function(df,replace,btot) { #Make empty results table table.boot=matrix(nrow=btot, ncol=7) #Get all IDs sid.all=unique(GVT.Study4$sid) #set of Player IDS #Bootstrap loop for (bk in 1:btot) { #Copy dataset df.null=df #Make data under null for each player for (sidk in sid.all) { #Subset the player from real data playerk=subset(df,sid==sidk) #Shuffle their shots playerk$make.shuffle=sample(playerk$make,replace=replace) #Copy paste to null data df.null$make[df.null$sid==sidk]=playerk$make.shuffle } #End loop #Compute the table for the bootstrapped sample t2.boot=compute.table.II(df.null) if (bk%%100==0) cat("...",bk) #Save summary row table.boot[bk,]=colMeans(t2.boot,na.rm=T) }#End bootstrap loop return(table.boot) }#End function #Summary of bootstrapped results under null of P(H|HHH)=P(H) set.seed(123) summary.row.boot=compute.summary.row.boot(df=GVT.Study4,replace=F,btot=1000) hh.boot=summary.row.boot[,7] mean(hh.boot) #Confidence interval: 0-19% q1=round(quantile(hh.boot,.975),3) q2=round(quantile(hh.boot,.025),3) cat(paste0("The boostrapped confidence interval under null of 0 is [",q1,",",q2,"]\n", "In words, when P(H)=P(H|HHH), hot hand artifact leads P(H|HHH)-P(H|MMM)=-.09 with this CI around it", "]\nwidth of ",q2-q1,"\nso estiamtes is observed +-", (q2-q1)/2))