I am running a power simulation similar to the one described in this paper using the following code: .
It involves simulating a 2-arm randomised controlled trial to determine whether the sample size and power needed to do the trial is less when using the 'Fast Tap', 'Hold', 'Type' and 'Walk' tests is less than when using the 'mJOA' test.
# Initialize covariance matrix and results storage
Sigma <- array(0, dim=rep(n_timepoints, 2))
result <- matrix(NA, nrow=n_correls*n_timepoints, ncol=6)
colnames(result) <- c(
'rho',
'endpoint',
'test',
'power',
'n_critical',
''
)
# Define simulation function
sim <- function(o_mean, o_sd, o_effect_sizes, N, test=factor(c("t","ANCOVA"))) {
while (
if (test=="t") { all(power1>beta) }
else if (test=="ANCOVA") { all(power2>beta) }
) {
# Initialise empty objects to store summary stats for each simu
C0_mean <- matrix(NA, nrow=simunum, ncol=n_timepoints) # control means
I0_mean <- matrix(NA, nrow=simunum, ncol=n_timepoints) # treatment means
C0_sd <- matrix(NA, nrow=simunum, ncol=n_timepoints) # control sd's
I0_sd <- matrix(NA, nrow=simunum, ncol=n_timepoints) # treatment sd's
C0_ci.lb <- matrix(NA, nrow=simunum, ncol=n_timepoints) # control CI_lb
I0_ci.lb <- matrix(NA, nrow=simunum, ncol=n_timepoints) # treatment CI_lb
C0_ci.ub <- matrix(NA, nrow=simunum, ncol=n_timepoints) # control CI_ub
I0_ci.ub <- matrix(NA, nrow=simunum, ncol=n_timepoints) # treatment CI_ub
p <- matrix(NA, nrow=simunum, ncol=n_timepoints) # p values
for (simu in 1:simunum) {
# Simulate all endpoints of a trial
mu_C0 <- rep(o_mean, n_timepoints) # control arm intercepts
mu_I0 <- mu_C0 + o_effect_sizes # treatment arm intercepts
C0 <- mvrnorm(N-m, mu_C0, Sigma) # control arm simulated outcomes
I0 <- mvrnorm(N-m, mu_I0, Sigma) # treatment arm simulated outcomes
for (j in 1:n_timepoints) {
# Method 1: T-score
if (test=="t") {
df <- (N-m) + (N-m) - 2
S_pooled <- ( sd(C0[,j])^2*(N-m-1) + sd(I0[,j])^2*(N-m-1) ) / df
sd_pooled <- sqrt(S_pooled)
diff <- mean(C0[,j]) - mean(I0[,j])
t <- diff / sqrt( sd_pooled*(1/(N-m)+1/(N-m)) )
P <- pt(-abs(t), df) + (1 - pt(abs(t), df))
}
# Method 2: Linear model (ANCOVA)
else if (test=="ANCOVA") {
I0.bl <- mvrnorm(N-m, o_mean, o_sd)
C0.bl <- mvrnorm(N-m, o_mean, o_sd)
endpoint <- c( I0[,j], C0[,j] )
baseline <- c( I0.bl[,1], C0.bl[,1] )
group <- factor(rep( c("Treatment","Control"), each=(N-m)) )
model <- lm(endpoint ~ baseline + group)
P <- anova(model)["group", "Pr(>F)"]
}
# Store trial stats
C0_mean[simu, j] <- mean( C0[,j] )
I0_mean[simu, j] <- mean( I0[,j] )
C0_sd[simu, j] <- sd( C0[,j] )
I0_sd[simu, j] <- sd( I0[,j] )
C0_ci.lb[simu, j] <- quantile( C0[,j], probs=0.025 )
I0_ci.lb[simu, j] <- quantile( I0[,j], probs=0.025 )
C0_ci.ub[simu, j] <- quantile( C0[,j], probs=0.975 )
I0_ci.ub[simu, j] <- quantile( I0[,j], probs=0.975 )
p[simu, j] <- P
}
}
# Calculate power
print(N-m)
if (test=="t") {
# Multiple primary endpoints
power1 <- colSums(p < alpha) / simunum
if (all(power1>beta)) { m <- m+1 ; power <- power1 }
print(power1)
}
else if (test=="ANCOVA") {
# Multiple primary endpoints
power2 <- colSums(p < alpha) / simunum
if (all(power2>beta)) { m <- m+1 ; power <- power2 }
print(power2)
}
}
# Store global simulation stats
for (j in 1:n_timepoints) {
result[3*(k-1)+j,] <- c(
rho,
timepoints[j],
test,
power[j],
N-m+1,
NA
)
}
}
result = as.data.frame(result)
print(result)
))
}
# Now I am calling the simulation function for each of the tests and want to store the resulting sample size and power values
# T-score: Head-on comparisons not adjusting for baseline or repeated measures
mJOA_result2 <- sim(mJOA_BL_mean, mJOA_BL_sd, mJOA_effect_sizes, 18, "t") # Nc=17
I would like to now do the following: add modifications to the code so that all the power1 or power2 values (depending on whether test == "t" or "ANCOVA") up until beta exceeds 0.8 (the while loop is broken) are stored as a list in a column called 'power_values' in the 'result' dataframe (along with all the other existing outputs). I would like to store the corresponding sample size (N-m) values as a list in a column called 'sample_sizes' in the result dataframe.
At the moment, the simulation function only stores the last power and corresponding sample size, but I would like all the powers and sample sizes to be stored as lists or vectors in 2 additional columns called 'power_values' and 'sample_sizes', respectively.