3. Performing Monitored Analyses with `impart` • impart

library(impart)

This vignette demonstrates how to use impart to conduct interim and final analyses in group sequential or information monitored designs. This builds upon the terminology and concepts in the vignettes on study design and information monitoring. These can be viewed as follows:

vignette("impart_study_design", package = "impart") # Design
vignette("impart_monitoring", package = "impart") # Information Monitoring

Setting Up a Monitored Design:

The design parameters in this vignette will be the same as in the previous vignettes:

# Universal Study Design Parameters
minimum_difference <- 5 # Effect Size: Difference in Means of 5 or greater
alpha <- 0.05 # Type I Error Rate
power <- 0.9 # Statistical Power
test_sides <- 2 # Direction of Alternatives

# Determine information required to achieve desired power at fixed error rate
information_single_stage <-
  impart::required_information_single_stage(
    delta = minimum_difference,
    alpha = alpha,
    power = power
  )

# Group Sequential Design Parameters
information_rates <-
  c(0.50, 0.75, 1.00) # Analyses at 50%, 75%, and 100% of the Total Information
type_of_design <- "asOF" # O'Brien-Fleming Alpha Spending
type_beta_spending <- "bsOF" # O'Brien-Fleming Beta Spending

# Set up group sequential testing procedure
trial_design <-
  rpact::getDesignGroupSequential(
    alpha = alpha,
    beta = 1 - power,
    sided = 2,
    informationRates = information_rates,
    typeOfDesign = type_of_design,
    typeBetaSpending = type_beta_spending,
    bindingFutility = FALSE
  )

# Inflate information level to account for multiple testing
information_adaptive <-
  impart::required_information_sequential(
    information_single_stage = information_single_stage,
    trial_design = trial_design
  )

# Initialize the monitored design
monitored_design <-
  initialize_monitored_design(
    trial_design = trial_design,
    null_value = 0,
    maximum_sample_size = 280,
    information_target = information_adaptive,
    orthogonalize = TRUE,
    rng_seed_analysis = 54321
  )

The data used in this example will also be the same: impart::example_1. Specifically, the data has been reverted to particular points in the study at which the information thresholds have been met:

example_1_ia_1: Data for Interim Analysis 1
example_1_ia_2: Data for Interim Analysis 2
example_1_final: Data for Final Analysis

# Data for Interim Analysis 1
example_1_ia_1 <- impart::example_1_ia_1

head(example_1_ia_1)
#>   .id        x_1        x_2        x_3        x_4 tx       .e .r_1     .t_1
#> 1   1  2.0742970  0.1971432 -0.8425884  0.2794844  0  2.24846    1 25.01538
#> 2   2  0.2165473 -0.7384296  0.1315016 -1.2419134  1 11.05565    0 55.05565
#> 3   3  0.8294726  0.4997821  1.6932555 -0.4063889  0 16.96591    0 60.96591
#> 4   4 -1.0206893 -0.2189937 -1.7719120  0.1936013  1 25.13396    1 59.84544
#> 5   5 -0.0417332  0.9282685  0.8078133  0.9317145  0 50.07301    1 75.94952
#> 6   6  0.7275778  1.1756811  0.0226265 -0.2556343  1 50.93935    1 80.29181
#>        y_1 .r_2      .t_2       y_2 .r_3      .t_3      y_3 .r_4     .t_4
#> 1 1.591873    1  56.62636 -4.535711    1  98.51499 13.98543    1 133.0050
#> 2       NA    0  85.05565        NA    0 115.05565       NA    0 145.0556
#> 3       NA    0  90.96591        NA    0 120.96591       NA    0 150.9659
#> 4 1.212620    1  81.58577 -4.533776    1 127.78659 11.17615    1 154.3419
#> 5 8.655326    1 111.21967  6.970372    1 143.00338 17.62329    1 181.4162
#> 6 6.902055    1 114.96781 17.381316    1 151.34249 -2.42570    1 177.3846
#>         y_4
#> 1 -1.320242
#> 2        NA
#> 3        NA
#> 4 -6.629545
#> 5  9.126240
#> 6  3.549977

Interim Analysis 1

Before conducting the first interim analysis, the information level should be above the pre-specified threshold: the smoothed trajectory can be used to mitigate random variation in the information level.

Analysts will need to specify estimation_function, the function used to compute the estimate, and estimation_arguments, a list of arguments aside from the data needed for this computation. Estimators may have a variance correction factor that can be computed from the analysis parameters: this can be specified using the correction_function argument.

For the standardization estimator computed by standardization(), the corresponding small-sample variance correction is standardization_correction(): this adjusts the variance according to the sample size in each arm and the number of parameters in each regression model.

# Obtain time of last event
last_event <-
  example_1_ia_1[, c(".e", ".t_1", ".t_2", ".t_3", ".t_4")] |>
  unlist() |>
  max(na.rm = TRUE) |>
  ceiling()

example_1_ia_1_prepared <-
  prepare_monitored_study_data(
    data = example_1_ia_1,
    study_time = last_event,
    id_variable = ".id",
    covariates_variables = c("x_1", "x_2", "x_3", "x_4"),
    enrollment_time_variable = ".e",
    treatment_variable = "tx",
    outcome_variables = c("y_1", "y_2", "y_3", "y_4"),
    outcome_time_variables = c(".t_1", ".t_2", ".t_3", ".t_4"), 
    # Observe missingness 1 week after target study visit
    observe_missing_times = c(30, 60, 90, 120) + 7
  )

data_ia_1_trajectory <- 
  information_trajectory(
    prepared_data = example_1_ia_1_prepared,
    monitored_design = monitored_design,
    estimation_function = standardization,
    estimation_arguments =
      list(
        estimand = "difference",
        y0_formula = y_4 ~ x_1 + x_2 + x_3 + x_4,
        y1_formula = y_4 ~ x_1 + x_2 + x_3 + x_4,
        family = gaussian,
        treatment_column = "tx",
        outcome_indicator_column = ".r_4"
      ),
    correction_function = standardization_correction,
    orthogonalize = TRUE,
    n_min = 50,
    n_increment = 3,
    rng_seed = 23456,
    control = monitored_analysis_control()
  )
#> Error in information$information[i] <- if (orthogonalize) {: replacement has length zero

data_ia_1_trajectory
#> Error in eval(expr, envir, enclos): object 'data_ia_1_trajectory' not found

Once the trajectory has been computed, it can be smoothed and plotted:

plot(
  information ~ y_4,
  data = data_ia_1_trajectory
)
#> Error in eval(m$data, eframe): object 'data_ia_1_trajectory' not found

abline(
  lm(
    formula = information ~ y_4,
    data = data_ia_1_trajectory
  ),
  lty = 1
)
#> Error in eval(mf, parent.frame()): object 'data_ia_1_trajectory' not found

# Requires `deming` package
abline(
  deming::theilsen(
    formula = information ~ y_4,
    data = data_ia_1_trajectory
  ),
  lty = 3
)
#> Error in eval(temp, parent.frame()): object 'data_ia_1_trajectory' not found

abline(
  h = monitored_design$original_design$information_thresholds,
  lty = 2
)
#> Error in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): plot.new has not been called yet

Once the information level has been confirmed to be above the threshold, conducting the analysis is similar to computing information:

interim_analysis_1 <-
  monitored_analysis(
    data = example_1_ia_1,
    monitored_design = monitored_design,
    estimation_function = standardization,
    estimation_arguments = 
      list(
        estimand = "difference",
        y0_formula = y_4 ~ x_1 + x_2 + x_3 + x_4,
        y1_formula = y_4 ~ x_1 + x_2 + x_3 + x_4,
        family = gaussian,
        treatment_column = "tx",
        outcome_indicator_column = ".r_4"
      ),
    correction_function = standardization_correction
  )

interim_analysis_1$interim_analysis_1$decision
#> [1] "Continue"
interim_analysis_1$interim_analysis_1$decision_data
#>   test_statistic efficacy  futility
#> 1       2.044012 2.930993 0.4236436
#> 2             NA 2.361025 1.2803287
#> 3             NA 2.014278        NA
interim_analysis_1$interim_analysis_1$information_fraction_orthogonal
#>           estimates
#> estimates 0.5095383

Interim Analysis 2

All subsequent analyses are identical in syntax: a new dataset is provided, and the result of the previous analysis is passed using the monitored_design argument.

example_1_ia_2 <- impart::example_1_ia_2

interim_analysis_2 <-
  monitored_analysis(
    data = example_1_ia_2,
    monitored_design = interim_analysis_1,
    estimation_function = standardization,
    estimation_arguments = 
      list(
        estimand = "difference",
        y0_formula = y_4 ~ x_1 + x_2 + x_3 + x_4,
        y1_formula = y_4 ~ x_1 + x_2 + x_3 + x_4,
        family = gaussian,
        treatment_column = "tx",
        outcome_indicator_column = ".r_4"
      ),
    correction_function = standardization_correction
  )

interim_analysis_2$interim_analysis_2$decision
#> [1] "Continue"
interim_analysis_2$interim_analysis_2$decision_data
#>   test_statistic efficacy  futility
#> 1       2.044012 2.930993 0.4311741
#> 2       2.190967 2.281878 1.4392127
#> 3             NA 2.025798        NA
interim_analysis_2$interim_analysis_2$information_fraction_orthogonal
#> [1] 0.5095383 0.7921346

Final Analysis

The syntax is identical for the final analysis:

example_1_final <- impart::example_1_final

final_analysis <-
  monitored_analysis(
    data = example_1_final,
    monitored_design = interim_analysis_2,
    estimation_function = standardization,
    estimation_arguments = 
      list(
        estimand = "difference",
        y0_formula = y_4 ~ x_1 + x_2 + x_3 + x_4,
        y1_formula = y_4 ~ x_1 + x_2 + x_3 + x_4,
        family = gaussian,
        treatment_column = "tx",
        outcome_indicator_column = ".r_4"
      ),
    correction_function = standardization_correction
  )

final_analysis$final_analysis$decision
#> [1] "Efficacy: Upper"
final_analysis$final_analysis$decision_data
#>   test_statistic efficacy futility
#> 1       2.044012 2.930993 0.271853
#> 2       2.190967 2.281878 1.199716
#> 3       2.199126 2.048468       NA
final_analysis$final_analysis$information_fraction_orthogonal
#> [1] 0.5095383 0.7921346 1.0956921