#This code starts with the code written by Blake McShane for McShane, Böckenholt1, Hansen (PoPS 2016). 
#There, the data from Klein et al for Many Labs was analyzed by focusing on the 1000 observations coming from 
#MTurk, and looking at the heterogeneity across days for those observations.
#The paper reports I2=21% on average, for the 9 studies that have I2>0
#Here I,  Uri Simonsohn, put Blake's code inside a function and then assess how it performs under the null,
#by shuffling the date column and re-running the analyses. I find that on average, under the null, I2=24%, thus the observed
#21% is slightly below what's expected when there is no heterogeneity.
#
#Last update: 2019 04 16
##########################################################################################3

  rm(list = ls())


# Read in data (written by Blake) 
  library(foreign)
  library(metafor)
  
  # # #Shrinking datafile size
  #    dat <- data.frame(read.spss("C:/Dropbox/DataColada/76 Heterogeneity/R - Hansen - McShane/CleanedDataset.sav"))
  # # #Subset to keep only data from MTurk
  #    dat <- dat[dat$sample=="mturk",]
  #    save(dat,file="C:/dropbox/DataColada/76 Heterogeneity/supplement/ManyLabs1.just.Mturk.Rda")

#<<<----->>>> IMPORTANT--> Need to download the .Rda file: http://datacolada.org/appendix/76/Colada76.III.ManyLabs1.just.Mturk.Rda
  setwd("C:/Dropbox/DataColada/76 Heterogeneity/supplement/")
  load("ManyLabs1.just.Mturk.Rda")
  
  
########################## This code is from Blake McShane, he shared with Uri upon request
  
  #Format date as factor
    tmp <- substr(dat$last_update_date,1,7)
    sel <- substr(tmp,7,7)==" "
    tmp[sel] <- substr(tmp[sel], 1, 6)
    dat$fdate.obs <- factor(tmp, levels=c("8/29/13", "9/3/13", "9/4/13", "9/5/13", "9/9/13", "9/10/13", "9/11/13"))
  #Note: here Uri modified the name of the variable, it is now fdtae.obs (added .obs), so that I can shuffle it later on
    # and use the name used by Blake for it, without "obs"

#Create function with all of the code Blake wrote inside of it:
  run.blake=function() {

# Replicate meta-analyses
  mmeta <- list(NULL)
  sel <- !is.na(dat$sunkDV)
  as.vector(by(!sel, dat$fdate, sum))
  mm <- by(dat$sunkDV[sel], list(dat$fdate[sel], dat$sunkgroup[sel]), mean)
  ss <- by(dat$sunkDV[sel], list(dat$fdate[sel], dat$sunkgroup[sel]), sd)
  nn <- by(dat$sunkDV[sel], list(dat$fdate[sel], dat$sunkgroup[sel]), length)
  mmeta[[1]] <- rma(measure="SMD", m1i=mm[,1], m2i=mm[,2], sd1i=ss[,1], sd2i=ss[,2], n1i=nn[,1], n2i=nn[,2])


sel <- !is.na(dat$gainlossDV)
as.vector(by(!sel, dat$fdate, sum))
tmp <- by(dat[sel,], dat$fdate[sel], function(x){table(x$gainlossgroup, x$gainlossDV)})	# Die = Gain
aa <- sapply(tmp, function(x){x[1,2]})
bb <- sapply(tmp, function(x){x[1,1]})
cc <- sapply(tmp, function(x){x[2,2]})
dd <- sapply(tmp, function(x){x[2,1]})
mmeta[[2]] <- rma(measure="OR2D", ai=aa, bi=bb, ci=cc, di=dd, n1i=aa+bb, n2i=cc+dd)


sel <- !is.na(dat$Ranch1)
as.vector(by(!sel, dat$fdate, sum))
mm <- by(dat$Ranch1[sel], list(dat$fdate[sel], dat$anch1group[sel]), mean)
ss <- by(dat$Ranch1[sel], list(dat$fdate[sel], dat$anch1group[sel]), sd)
nn <- by(dat$Ranch1[sel], list(dat$fdate[sel], dat$anch1group[sel]), length)
mmeta[[3]] <- rma(measure="SMD", m1i=mm[,2], m2i=mm[,1], sd1i=ss[,2], sd2i=ss[,1], n1i=nn[,2], n2i=nn[,1])


sel <- !is.na(dat$Ranch2)
as.vector(by(!sel, dat$fdate, sum))
mm <- by(dat$Ranch2[sel], list(dat$fdate[sel], dat$anch2group[sel]), mean)
ss <- by(dat$Ranch2[sel], list(dat$fdate[sel], dat$anch2group[sel]), sd)
nn <- by(dat$Ranch2[sel], list(dat$fdate[sel], dat$anch2group[sel]), length)
mmeta[[4]] <- rma(measure="SMD", m1i=mm[,2], m2i=mm[,1], sd1i=ss[,2], sd2i=ss[,1], n1i=nn[,2], n2i=nn[,1])


sel <- !is.na(dat$Ranch3)
as.vector(by(!sel, dat$fdate, sum))
mm <- by(dat$Ranch3[sel], list(dat$fdate[sel], dat$anch3group[sel]), mean)
ss <- by(dat$Ranch3[sel], list(dat$fdate[sel], dat$anch3group[sel]), sd)
nn <- by(dat$Ranch3[sel], list(dat$fdate[sel], dat$anch3group[sel]), length)
mmeta[[5]] <- rma(measure="SMD", m1i=mm[,2], m2i=mm[,1], sd1i=ss[,2], sd2i=ss[,1], n1i=nn[,2], n2i=nn[,1])


sel <- !is.na(dat$Ranch4)
as.vector(by(!sel, dat$fdate, sum))
mm <- by(dat$Ranch4[sel], list(dat$fdate[sel], dat$anch4group[sel]), mean)
ss <- by(dat$Ranch4[sel], list(dat$fdate[sel], dat$anch4group[sel]), sd)
nn <- by(dat$Ranch4[sel], list(dat$fdate[sel], dat$anch4group[sel]), length)
mmeta[[6]] <- rma(measure="SMD", m1i=mm[,2], m2i=mm[,1], sd1i=ss[,2], sd2i=ss[,1], n1i=nn[,2], n2i=nn[,1])


sel <- !is.na(dat$gambfalDV)
as.vector(by(!sel, dat$fdate, sum))
mm <- by(dat$gambfalDV[sel], list(dat$fdate[sel], dat$gambfalgroup[sel]), mean)
ss <- by(dat$gambfalDV[sel], list(dat$fdate[sel], dat$gambfalgroup[sel]), sd)
nn <- by(dat$gambfalDV[sel], list(dat$fdate[sel], dat$gambfalgroup[sel]), length)
mmeta[[7]] <- rma(measure="SMD", m1i=mm[,2], m2i=mm[,1], sd1i=ss[,2], sd2i=ss[,1], n1i=nn[,2], n2i=nn[,1])


sel <- !is.na(dat$scales)
as.vector(by(!sel, dat$fdate, sum))
tmp <- by(dat[sel,], dat$fdate[sel], function(x){table(x$scalesgroup, x$scales)})
aa <- sapply(tmp, function(x){x[1,1]})
bb <- sapply(tmp, function(x){x[1,2]})
cc <- sapply(tmp, function(x){x[2,1]})
dd <- sapply(tmp, function(x){x[2,2]})
mmeta[[8]] <- rma(measure="OR2D", ai=aa, bi=bb, ci=cc, di=dd, n1i=aa+bb, n2i=cc+dd)


sel <- !is.na(dat$reciprocityus)
as.vector(by(!sel, dat$fdate, sum))
tmp <- by(dat[sel,], dat$fdate[sel], function(x){table(x$reciprocitygroup, x$reciprocityus)})
aa <- sapply(tmp, function(x){x[1,2]})
bb <- sapply(tmp, function(x){x[1,1]})
cc <- sapply(tmp, function(x){x[2,2]})
dd <- sapply(tmp, function(x){x[2,1]})
mmeta[[9]] <- rma(measure="OR2D", ai=aa, bi=bb, ci=cc, di=dd, n1i=aa+bb, n2i=cc+dd)


sel <- !is.na(dat$allowedforbidden)
as.vector(by(!sel, dat$fdate, sum))
tmp <- by(dat[sel,], dat$fdate[sel], function(x){table(x$allowedforbiddenGroup, x$allowedforbidden)})
aa <- sapply(tmp, function(x){x[1,2]})
bb <- sapply(tmp, function(x){x[1,1]})
cc <- sapply(tmp, function(x){x[2,2]})
dd <- sapply(tmp, function(x){x[2,1]})
mmeta[[10]] <- rma(measure="OR2D", ai=aa, bi=bb, ci=cc, di=dd, n1i=aa+bb, n2i=cc+dd)


sel <- !is.na(dat$quote)
as.vector(by(!sel, dat$fdate, sum))
mm <- by(dat$quote[sel], list(dat$fdate[sel], dat$quoteGroup[sel]), mean)
ss <- by(dat$quote[sel], list(dat$fdate[sel], dat$quoteGroup[sel]), sd)
nn <- by(dat$quote[sel], list(dat$fdate[sel], dat$quoteGroup[sel]), length)
mmeta[[11]] <- rma(measure="SMD", m1i=mm[,2], m2i=mm[,1], sd1i=ss[,2], sd2i=ss[,1], n1i=nn[,2], n2i=nn[,1])


sel <- !is.na(dat$flagdv)				# 0=control; 1=flag; 2=excluded
as.vector(by(!sel, dat$fdate, sum))
mm <- by(dat$flagdv[sel], list(dat$fdate[sel], dat$flagGroup[sel]), mean)
ss <- by(dat$flagdv[sel], list(dat$fdate[sel], dat$flagGroup[sel]), sd)
nn <- by(dat$flagdv[sel], list(dat$fdate[sel], dat$flagGroup[sel]), length)
mmeta[[12]] <- rma(measure="SMD", m1i=mm[,2], m2i=mm[,1], sd1i=ss[,2], sd2i=ss[,1], n1i=nn[,2], n2i=nn[,1])


sel <- !is.na(dat$Sysjust)	
as.vector(by(!sel, dat$fdate, sum))
mm <- by(dat$Sysjust[sel], list(dat$fdate[sel], dat$MoneyGroup[sel]), mean)
ss <- by(dat$Sysjust[sel], list(dat$fdate[sel], dat$MoneyGroup[sel]), sd)
nn <- by(dat$Sysjust[sel], list(dat$fdate[sel], dat$MoneyGroup[sel]), length)
mmeta[[13]] <- rma(measure="SMD", m1i=mm[,2], m2i=mm[,1], sd1i=ss[,2], sd2i=ss[,1], n1i=nn[,2], n2i=nn[,1])


sel <- !is.na(dat$Imagineddv)	
as.vector(by(!sel, dat$fdate, sum))
mm <- by(dat$Imagineddv[sel], list(dat$fdate[sel], dat$ContactGroup[sel]), mean)
ss <- by(dat$Imagineddv[sel], list(dat$fdate[sel], dat$ContactGroup[sel]), sd)
nn <- by(dat$Imagineddv[sel], list(dat$fdate[sel], dat$ContactGroup[sel]), length)
mmeta[[14]] <- rma(measure="SMD", m1i=mm[,2], m2i=mm[,1], sd1i=ss[,2], sd2i=ss[,1], n1i=nn[,2], n2i=nn[,1])


sel <- !is.na(dat$d_art) & dat$sample!="qccuny2"
as.vector(by(!sel, dat$fdate, sum))
mm <- by(dat$d_art[sel], list(dat$fdate[sel], dat$partgender[sel]), mean)
ss <- by(dat$d_art[sel], list(dat$fdate[sel], dat$partgender[sel]), sd)
nn <- by(dat$d_art[sel], list(dat$fdate[sel], dat$partgender[sel]), length)
mmeta[[15]] <- rma(measure="SMD", 		m1i=mm[,2], m2i=mm[,3], sd1i=ss[,2], sd2i=ss[,3], n1i=nn[,2], n2i=nn[,3])


sel <- dat$IATEXPfilter==1 & dat$sample!="qccuny2"
as.vector(by(!sel, dat$fdate, sum))
rr <- by(dat[sel,], dat$fdate[sel], function(x){cor(x$d_art, x$IATexp.overall,use="pair")})
nn <- by(dat[sel,], dat$fdate[sel], function(x){sum(!is.na(x$d_art & !is.na(x$IATexp.overall)))})
mmeta[[16]] <- rma(measure="COR", ri=as.vector(rr), ni=as.vector(nn))

corr.to.d <- function(r){ 2*r / sqrt(1-r^2) }
tmpr <- as.vector(rr)
tmpn <- as.vector(nn)
tmpd <- corr.to.d(tmpr)
tmpv <- 4/((1-tmpr^2)^3) * ((1-tmpr^2)^2)/(tmpn-1)		# Conversion factor * Variance(rhat)
tmpm <- rma.uni(yi=tmpd, vi=tmpv)


study.names <- c(study1="Sunk Costs", study2="Gain vs loss framing", study3a="Anchoring - Babies Born",
				 study3b="Anchoring - Mt. Everest", study3c="Anchoring - Chicago",
				 study3d="Anchoring - Distance to NYC",study4="Retrospective gambler fallacy",
				 study5="Low vs high category scales", study6="Norm of reciprocity",
				 study7="Allowed/Forbidden",study8="Quote Attribution", study9="Flag Priming",
				 study10="Currency Priming", study11="Imagined contact",
				 study12a="Sex differences in implicit math attitudes",
				 study12b="Relations between impl. and expl. math attitudes")


tmp <- data.frame( study=study.names,
			tau2=sapply(mmeta, function(x){x$tau2}),
			tau=sapply(mmeta, function(x){sqrt(x$tau2)}),
			I2=sapply(mmeta, function(x){x$I2}),
			H2=sapply(mmeta, function(x){x$H2}),
      QEp=sapply(mmeta, function(x){x$QEp}),
			null=sapply(mmeta, function(x){x$pval > 0.05}))

#Output results sorted by I2
  tmp[order(tmp$I2),]

  }#End of run.blake() function

  
#########################################################
  #For verification purposes, reproduce results by Blake, using the formula
    dat$fdate=dat$fdate.obs  #the function analyzes dat$fdate, in the loop i shuffle .obs, but here i keep as is

    #Execute all 16 meta-analyses, with code written by Blake McShane
      a=run.blake()
      mean(a$I2[8:16])  #Reproduce the 21% estimate reported in their papers
  
#Bootstrapping  under the null

    set.seed(1122)
    simtot=500
    
    #Create empty matrix to save results
    #  The I2 for each of the studies, for each loop (all that's relevant for current purposes)
      i2=matrix(nrow=simtot,ncol=16) #16 studies, so 16 meta-analyses per loop, keep the I2 of each
  
  #Run the loop
  for (simk in 1:simtot)
  {
    #Shuffle dates
      dat$fdate=sample(dat$fdate.obs)  #I changed the name of this column in the data $.obs means "observed" the actual date
                                       #I shuffle it to run under the nul
      
    #Execute all 16 meta-analyses, with code written by Blake McShane
      metas.k=run.blake()
    #Keep track of the I2
      i2[simk,]=metas.k$I2
    #Counter  
      if (simk%%50==0) cat("...",simk)
  }
      
      
  #Compute the average I2 of each study, for the bootstrapped under the null studies 
    round(colMeans(i2),1)
  #Compute the average I2 of the highest 9 I2s
    mean(round(colMeans(i2),1)[8:16]) 
  #Compute the p-value for the observed I2 of 21
    mean(colMeans(i2)[8:16] >= 21)  
  
    hist(i2)  
  
