#R Code for DataColada[58] "The Funnel Plot is Invalid Because of this Crazy Assumption: r(n,d)=0
#Written by Uri Simonsohn (urisohn@gmail.com)
#This version: 2017 03 21
#
##############################################

library(pwr)     #library to carry out power calculations
library(metafor) #library to carry out meta-analysis (funel plot and trim & fill)


#Function 1.  generate variable x correlated r with y
    #Source: http://stats.stackexchange.com/questions/38856/how-to-generate-correlated-random-numbers-given-means-variances-and-degree-of
    rx = function(x, r){
      r2 = r**2
      ve = 1-r2
      SD = sqrt(ve)
      e  = rnorm(length(x), mean=0, sd=SD)
      y  = r*x + e
      return(y)
    }
    
#Function 2: simplified power calculator 
  getn=function(d,power) pwr.t.test(d=d,power=power)$n
  
#Function 3: Simulate meta-analysis and funnel plot
  colada58=function(study.tot, power, d.mean, d.sd, r, graph=0, multi=0, ...)
  {
  #SYntax:
    #study.tot: number of studies
    #power:     power researchers use for original studies
    #d.mean     average effect size
    #d.sd       SD of effect size across studies
    #r          how correlated the guess of effect size is with the true effect size
    #graph      =1 to show the funnel plot, 0 to not show it
	#multi      =1 to show the funnel plot along side two PEESE graphs (added following feedback by Will Gervais after posting the blogpost)
        
  #Gen effect size, real and beliefs
    d=rnorm(study.tot)  #vector with effect size for each study, start with N(0,1) then change mean and SD below
    d.guess=rx(d,r)     #vector with researchers' guess of the effect size they examine, correlated r with the true d

  #Adjust mean and SD of effect size
    d=d*d.sd+d.mean                #true effect sizes now have desirved Mean and SD
    d.guess=d.guess*d.sd+d.mean    #guesed effects sizes now have desired mean and SD
    
    d=pmax(d,.05)
    d.guess=pmax(d.guess,.05)

  #Sample size of each study
    n=round(mapply(getn,d=d.guess,power=power),0)   #sample size is set based on beliefs of effect size, and desired power
    n.needed=round(mapply(getn,d=d,power=power),0)  #sample size one would actually need
        
  #Run studies
    t.obs=mapply(rt,n=1,df=2*n-2,ncp=sqrt(n/2)*d)   #use non central t to get random draw of studytresults 
    d.obs=2*t.obs/sqrt(2*n-2)                       #convert t to d
    vard.obs= 2/n +(d.obs**2)/(2*(n-3.94))          #formula to get var(d) from d and n
    sed.obs=sqrt(vard.obs)                          #sqrt to get SE(d)
 
  #Meta analysis
    meta1=rma(y=d.obs,vi=vard.obs,method="HE")
  
  #Trim-and-fill
    tf=trimfill(meta1)
  
  #PET-PEESE
    lm.pet=lm(d.obs~vard.obs,weight=1/vard.obs)
    lm.peese=lm(d.obs~sed.obs,weight=1/vard.obs)
  #Run Pet-peese not weighting by var to see if eyeball test is failing for this reason (eyeballing the funnel plot it seems the line crosses 0 for negative d)
    lm.pet.noweights=lm(d.obs~vard.obs)
    lm.peese.noweights=lm(d.obs~sed.obs)
     
 
  #Plot if graph=1  
    if (graph==1) funnel(meta1, ...)
    
    
  #Plot multigraph, funnelplot alongside PEESE graph, if multi=1 -- This is to address comments by Will Gervais after seeing post
    if (multi==1)
      {
       par(mfrow=c(1,3))
      
      #Graph 1-  funnel plot
        funnel(meta1,main="Funnel Plot")
      #Graph 2- peese with weights
        plot(sed.obs,d.obs,xlim=c(0,.4),ylim=c(-.1,1.8),main="PEESE weighting by 1/var",xlab='SE(d)',ylab='d')
        abline(lm.peese)
        abline(h=0,col='gray77')
      #Graph 3- peese without weights
        plot(sed.obs,d.obs,xlim=c(0,.4),ylim=c(-.1,1.8),main="PEESE without weights",xlab='SE(d)',ylab='d') 
        abline(lm.peese.noweights)
        abline(h=0,col='gray77')
      }
      
  
  #Report results as list
    #LIST: Simple meta average, trim and fill, PET, PEESE, sample size used, sample size needed
      list(metab=meta1$b, tfb=tf$b, 
           pet=lm.pet$coefficients[1],peese=lm.peese$coefficients[1],
           pet.noweights=lm.pet.noweights$coefficients[1],
           peese.noweights=lm.peese.noweights$coefficients[1],
          n=n,n.needed=n.needed)
    }
    
  
  #########################################
  #1) Figure with 1 funnel plot
   
    set.seed(1)
    dev.off()
    f1=colada58(graph=1,multi=0,power=.8,study.tot=100, d.mean=.6, d.sd=.15, r=.6,
             main='Asymetric Funnel Plot Without Publication Bias\nAll 100 conducted studies reported',
             cex.main=1.5,cex=1.5)
  
    
  #1.5) After posting Will Gervais wondered if PEESE wouldn't give a negative estimate for the example,
    #  I modified the code to create a graph with the PEESE estimate, with and wihtout weighting by variance
        
     set.seed(1)
     f1=colada58(graph=0,multi=1,power=.8,study.tot=100, d.mean=.6, d.sd=.15, r=.6,
             main='Asymetric Funnel Plot Without Publication Bias\nAll 100 conducted studies reported',
             cex.main=1.5,cex=1.5)
    
    
  #2) How good is the guess? Run same analysis on 500 studies
     set.seed(2)
      f2=colada58(graph=0,power=.8,study.tot=500, d.mean=.6, d.sd=.15, r=.6)
      
    #Turn to vectors
      n=f2$n
      n.needed=f2$n.needed
      
    #2.2 Plot it.
      plot(n.needed,n,main="Sample size needed vs run",xlab="Neded n",ylab="run")
    
    #2.3 Compute average error
      e=abs(n-n.needed)/n.needed
      hist(e,breaks=50,xlab="% error in n guess",main="HIstogram of % error in n guess")
      mean(e)
      median(e)
      
      
    
  #Figure with 100 funnel plots (not included in the blog post, to assess generalizability of pattern)
      set.seed(2)
      par(mfrow=c(10,10),mar=c(1,1,1,1))
      for (k in 1:100)   fx=colada58(graph=1,power=.8,study.tot=60, d.mean=.6, d.sd=.15, r=.6)
  
  #Simulations with 500 meta-analyses
    set.seed(4)
    simtot=500
    res=matrix(nrow=simtot,ncol=4)
    for (k in 1:simtot)  {
      fx=colada58(graph=0,study.tot=100, d.mean=.6, d.sd=.15, r=.6,power=.8)
      res[k,]=c(fx$metab, fx$tfb, fx$pet, fx$peese)
      if (k%%10==0) cat("...",k)
      }

    colMeans(res)
    
    
    
  
  #3 Response to L.J. Zigerell that my results supposedly depend on unreasonably high power
  # his tweet: https://twitter.com/LJZigerell/status/844176121231003649
  #here is how with lower power the funnel plot is still asymmetric
    par(mfrow=c(1,3))
    set.seed(1)
    f3.a=colada58(graph=1,multi=0,power=.8,study.tot=100, d.mean=.6, d.sd=.15, r=.6,
             main='Power=80%',
             cex.main=1.5,cex=1.5)
    f3.b=colada58(graph=1,multi=0,power=.5,study.tot=100, d.mean=.6, d.sd=.15, r=.6,
             main='Power=50%',
             cex.main=1.5,cex=1.5)
    f3.c=colada58(graph=1,multi=0,power=.33,study.tot=100, d.mean=.6, d.sd=.15, r=.6,
             main='Power=33%',
             cex.main=1.5,cex=1.5)
	#my response: https://twitter.com/uri_sohn/status/844189506593013760