#R code for calculations of actual power in replications claiming a given level of power
#Published in DataColada[4], www.datacolada.org
#Written by Uri Simonsohn during the summer of 2013 for paper "Evaluating Replication Results" http://papers.ssrn.com/sol3/papers.cfm?abstract_id=2259879
#Revised for blogpost in October, 2013
library(pwr) #Need to install this package
#Syntax of is a bit convoluted, so I create simpler function off it
#Function getn() gets sample size for a given effect size and desired power
getn = function(d,power) {
n=pwr.t.test(d=d,power=power)$n
return(round(n,digits=0))
}
#Function getpower() for given sample and effect size
getpower = function(d,n) {return(pwr.t.test(d=d,n=n)$power)}
#THIS IS THE KEY FUNCTION, GOES FROM TRUE EFFECT SIZE, ORIGINAL SAMPLE SIZE AND CLAIMED POWER, TO ACTUAL POWER OF REPLCIATION
realpower=function(nori,powerori,powerrep) {
#SYNTAX:
#nori: per cell sample size of original study (comparing two means)
#powerori: true power of original study
#powerrep: claimed power of repliation
#compute df, ncp, tc, true effect size
dfori=2*nori-2 #degrees of freedom
dori=pwr.t.test(n=nori,power=powerori)$d #true effect size leading to the set power for the original study given its sample size
ncpori=sqrt(nori)*dori #noncentrality parameter for student distribution given true effect and original sample size
tc=qt(.975,dfori) #critical t-value for p=.05 given original sample size
#generate original studies
#the way I do this is to get the distribution of possible t-values obtained, I start creating a vector with percentiles between 1-power and 99.9%
rp=seq(from=1-powerori+.001, to=.999,by=.001)
#I then find the noncentral t-values associated with each of those percentiles, effectively the noncentral distribution every 1/1000 point
t_publish=qt(p=rp,df=dfori,ncp=sqrt(nori/2)*dori)
#conver that distribution of t-values into effect sizes
d_publish=(2*t_publish)/sqrt(dfori)
#find nrep for powerrep
#for each of the possible values obtained, I compute the sample size a replicator would set seeking the claimed level of power
nrep=mapply(getn,d=d_publish,power=powerrep) #nrep: sample size of replication
#find actual power
#the true level of power is different from the published, that's why actual power is not claimed power, i here compute actual power
real=mapply(getpower,d=dori,n=nrep) #for every sample size run by the replicator, i compute the effective power
m=mean(real) #every sample size in the vector is equally likely (by construction, becuase of line 38 of code above) so the simple mean
#is the expected value of power
p33=sum(real>1/3)/length(real) #this is not reported in the blog, it gives the % of simulations with more power than different thresholds
p40=sum(real>.4)/length(real)
p50=sum(real>.5)/length(real)
p60=sum(real>.6)/length(real)
p80=sum(real>.8)/length(real)
p90=sum(real>.9)/length(real)
cat("Mean power: ",m,"\n")
cat("Prob(power>33%):",p33,"\n")
cat("Prob(power>50%):",p50,"\n")
cat("Prob(power>80%):",p80,"\n")
cat("Prob(power>90%):",p90,"\n")
#return(c(nori,powerori,powerrep,m,p33,p40,p50,p60,p80,p90))
return(real) #so the output is a vector with K elements, where K=(1-power)*999, so if 50% power, there are 499 elements
}
#First figure in post (i don't use the vector r1,r2,r3 I just read the results off the log as the code runs)
r1=realpower(nori=20,powerori=.5,powerrep=.80)
r2=realpower(nori=20,powerori=.5,powerrep=.90)
r3=realpower(nori=20,powerori=.5,powerrep=.95)
#Second figure in post
r4=realpower(nori=20,powerori=.35,powerrep=.80)
r5=realpower(nori=20,powerori=.35,powerrep=.90)
r6=realpower(nori=20,powerori=.35,powerrep=.95)
#Point 3 answering the self-righteous replicator
r7=realpower(nori=20,powerori=.8,powerrep=.80)
#percentage of replications with less than 50% power
sum(r7<.50)/length(r7)