Skip to contents

This vignette demonstrates how to use impart to conduct interim and final analyses in group sequential or information monitored designs. This builds upon the terminology and concepts in the vignettes on study design and information monitoring. To see all available vignettes in impart, use the vignettes command:

vignette(package = "impart")
Title Item

Setting Up a Monitored Design:

The design parameters in this vignette will be the same as in the previous vignettes:

# Universal Study Design Parameters
minimum_difference <- 5 # Effect Size: Difference in Means of 5 or greater
alpha <- 0.05 # Type I Error Rate
power <- 0.9 # Statistical Power
test_sides <- 2 # Direction of Alternatives

# Determine information required to achieve desired power at fixed error rate
information_single_stage <-
  impart::required_information_single_stage(
    delta = minimum_difference,
    alpha = alpha,
    power = power
  )

# Group Sequential Design Parameters
information_rates <-
  c(0.50, 0.75, 1.00) # Analyses at 50%, 75%, and 100% of the Total Information
type_of_design <- "asOF" # O'Brien-Fleming Alpha Spending
type_beta_spending <- "bsOF" # O'Brien-Fleming Beta Spending

# Set up group sequential testing procedure
trial_design <-
  rpact::getDesignGroupSequential(
    alpha = alpha,
    beta = 1 - power,
    sided = 2,
    informationRates = information_rates,
    typeOfDesign = type_of_design,
    typeBetaSpending = type_beta_spending,
    bindingFutility = FALSE
  )

# Inflate information level to account for multiple testing
information_adaptive <-
  impart::required_information_sequential(
    information_single_stage = information_single_stage,
    trial_design = trial_design
  )

# Initialize the monitored design
monitored_design <-
  initialize_monitored_design(
    trial_design = trial_design,
    null_value = 0,
    maximum_sample_size = 280,
    information_target = information_adaptive,
    orthogonalize = TRUE,
    rng_seed_analysis = 54321
  )

The data used in this example will also be the same: impart::example_1. Specifically, the data has been reverted to particular points in the study at which the information thresholds have been met:

  • example_1_ia_1: Data for Interim Analysis 1
  • example_1_ia_2: Data for Interim Analysis 2
  • example_1_final: Data for Final Analysis
# Data for Interim Analysis 1
example_1_ia_1 <- impart::example_1_ia_1

head(example_1_ia_1)
#>   .id        x_1        x_2        x_3        x_4 tx       .e .r_1     .t_1
#> 1   1  2.0742970  0.1971432 -0.8425884  0.2794844  0  2.24846    1 25.01538
#> 2   2  0.2165473 -0.7384296  0.1315016 -1.2419134  1 11.05565    0 55.05565
#> 3   3  0.8294726  0.4997821  1.6932555 -0.4063889  0 16.96591    0 60.96591
#> 4   4 -1.0206893 -0.2189937 -1.7719120  0.1936013  1 25.13396    1 59.84544
#> 5   5 -0.0417332  0.9282685  0.8078133  0.9317145  0 50.07301    1 75.94952
#> 6   6  0.7275778  1.1756811  0.0226265 -0.2556343  1 50.93935    1 80.29181
#>        y_1 .r_2      .t_2       y_2 .r_3      .t_3      y_3 .r_4     .t_4
#> 1 1.591873    1  56.62636 -4.535711    1  98.51499 13.98543    1 133.0050
#> 2       NA    0  85.05565        NA    0 115.05565       NA    0 145.0556
#> 3       NA    0  90.96591        NA    0 120.96591       NA    0 150.9659
#> 4 1.212620    1  81.58577 -4.533776    1 127.78659 11.17615    1 154.3419
#> 5 8.655326    1 111.21967  6.970372    1 143.00338 17.62329    1 181.4162
#> 6 6.902055    1 114.96781 17.381316    1 151.34249 -2.42570    1 177.3846
#>         y_4
#> 1 -1.320242
#> 2        NA
#> 3        NA
#> 4 -6.629545
#> 5  9.126240
#> 6  3.549977

Interim Analysis 1

Before conducting the first interim analysis, the information level should be above the pre-specified threshold: the smoothed trajectory can be used to mitigate random variation in the information level.

Analysts will need to specify estimation_function, the function used to compute the estimate, and estimation_arguments, a list of arguments aside from the data needed for this computation. Estimators may have a variance correction factor that can be computed from the analysis parameters: this can be specified using the correction_function argument.

For the standardization estimator computed by standardization(), the corresponding small-sample variance correction is standardization_correction(): this adjusts the variance according to the sample size in each arm and the number of parameters in each regression model.

# Obtain time of last event
last_event <-
  example_1_ia_1[, c(".e", ".t_1", ".t_2", ".t_3", ".t_4")] |>
  unlist() |>
  max(na.rm = TRUE) |>
  ceiling()

example_1_ia_1_prepared <-
  prepare_monitored_study_data(
    data = example_1_ia_1,
    study_time = last_event,
    id_variable = ".id",
    covariates_variables = c("x_1", "x_2", "x_3", "x_4"),
    enrollment_time_variable = ".e",
    treatment_variable = "tx",
    outcome_variables = c("y_1", "y_2", "y_3", "y_4"),
    outcome_time_variables = c(".t_1", ".t_2", ".t_3", ".t_4"), 
    # Observe missingness 1 week after target study visit
    observe_missing_times = c(30, 60, 90, 120) + 7
  )

data_ia_1_trajectory <- 
  information_trajectory(
    prepared_data = example_1_ia_1_prepared,
    monitored_design = monitored_design,
    estimation_function = standardization,
    estimation_arguments =
      list(
        estimand = "difference",
        y0_formula = y_4 ~ x_1 + x_2 + x_3 + x_4,
        y1_formula = y_4 ~ x_1 + x_2 + x_3 + x_4,
        family = gaussian,
        treatment_column = "tx",
        outcome_indicator_column = ".r_4"
      ),
    correction_function = standardization_correction,
    orthogonalize = TRUE,
    n_min = 50,
    n_increment = 3,
    rng_seed = 23456,
    control = monitored_analysis_control()
  )

data_ia_1_trajectory
#>       times randomization y_1 y_2 y_3 y_4 information information_lag_1
#> 1  551.8682            90  69  57  53  50   0.1380683                NA
#> 2  555.9573            92  70  60  54  50   0.1339839         0.1380683
#> 3  595.6751            96  77  63  58  53   0.1425721         0.1339839
#> 4  630.7461           100  79  70  64  56   0.1482429         0.1425721
#> 5  634.3194           100  79  70  64  59   0.1711911         0.1482429
#> 6  658.6659           109  82  72  69  62   0.1696971         0.1711911
#> 7  662.6718           110  82  72  69  65   0.1665782         0.1696971
#> 8  673.9776           113  87  74  70  65   0.1633559         0.1665782
#> 9  701.1256           117  96  80  71  68   0.1805331         0.1633559
#> 10 717.5104           119  96  82  73  71   0.2098995         0.1805331
#> 11 753.7782           126 101  91  83  74   0.2179450         0.2098995
#> 12 758.3298           128 101  91  85  77   0.2021433         0.2179450
#> 13 778.4102           131 105  94  88  80   0.2070840         0.2021433
#> 14 781.6646           131 106  94  89  83   0.2181654         0.2070840
#>    information_change information_pct_change information_fraction
#> 1                  NA                     NA            0.3028952
#> 2        -0.004084420             -3.0484414            0.2939348
#> 3         0.008588275              6.0238099            0.3127758
#> 4         0.005670759              3.8253158            0.3252163
#> 5         0.022948176             13.4050071            0.3755602
#> 6        -0.001493985             -0.8803831            0.3722827
#> 7        -0.003118900             -1.8723338            0.3654404
#> 8        -0.003222324             -1.9725794            0.3583713
#> 9         0.017177195              9.5147089            0.3960547
#> 10        0.029366413             13.9907033            0.4604790
#> 11        0.008045561              3.6915549            0.4781294
#> 12       -0.015801736             -7.8170963            0.4434634
#> 13        0.004940738              2.3858613            0.4543024
#> 14        0.011081351              5.0793349            0.4786128

Once the trajectory has been computed, it can be smoothed and plotted:

plot(
  information ~ y_4,
  data = data_ia_1_trajectory
)

abline(
  lm(
    formula = information ~ y_4,
    data = data_ia_1_trajectory
  ),
  lty = 1
)

# Requires `deming` package
abline(
  deming::theilsen(
    formula = information ~ y_4,
    data = data_ia_1_trajectory
  ),
  lty = 3
)

abline(
  h = monitored_design$original_design$information_thresholds,
  lty = 2
)

Once the information level has been confirmed to be above the threshold, conducting the analysis is similar to computing information:

interim_analysis_1 <-
  monitored_analysis(
    data = example_1_ia_1,
    monitored_design = monitored_design,
    estimation_function = standardization,
    estimation_arguments = 
      list(
        estimand = "difference",
        y0_formula = y_4 ~ x_1 + x_2 + x_3 + x_4,
        y1_formula = y_4 ~ x_1 + x_2 + x_3 + x_4,
        family = gaussian,
        treatment_column = "tx",
        outcome_indicator_column = ".r_4"
      ),
    correction_function = standardization_correction
  )

interim_analysis_1$interim_analysis_1$decision
#> [1] "Continue"
interim_analysis_1$interim_analysis_1$decision_data
#>   test_statistic efficacy  futility
#> 1       2.044012 2.930993 0.4236436
#> 2             NA 2.361025 1.2803287
#> 3             NA 2.014278        NA
interim_analysis_1$interim_analysis_1$information_fraction_orthogonal
#>           estimates
#> estimates 0.5095383

Interim Analysis 2

All subsequent analyses are identical in syntax: a new dataset is provided, and the result of the previous analysis is passed using the monitored_design argument.

example_1_ia_2 <- impart::example_1_ia_2

interim_analysis_2 <-
  monitored_analysis(
    data = example_1_ia_2,
    monitored_design = interim_analysis_1,
    estimation_function = standardization,
    estimation_arguments = 
      list(
        estimand = "difference",
        y0_formula = y_4 ~ x_1 + x_2 + x_3 + x_4,
        y1_formula = y_4 ~ x_1 + x_2 + x_3 + x_4,
        family = gaussian,
        treatment_column = "tx",
        outcome_indicator_column = ".r_4"
      ),
    correction_function = standardization_correction
  )

interim_analysis_2$interim_analysis_2$decision
#> [1] "Continue"
interim_analysis_2$interim_analysis_2$decision_data
#>   test_statistic efficacy  futility
#> 1       2.044012 2.930993 0.4311741
#> 2       2.190967 2.281878 1.4392127
#> 3             NA 2.025798        NA
interim_analysis_2$interim_analysis_2$information_fraction_orthogonal
#> [1] 0.5095383 0.7921346

Final Analysis

The syntax is identical for the final analysis:

example_1_final <- impart::example_1_final

final_analysis <-
  monitored_analysis(
    data = example_1_final,
    monitored_design = interim_analysis_2,
    estimation_function = standardization,
    estimation_arguments = 
      list(
        estimand = "difference",
        y0_formula = y_4 ~ x_1 + x_2 + x_3 + x_4,
        y1_formula = y_4 ~ x_1 + x_2 + x_3 + x_4,
        family = gaussian,
        treatment_column = "tx",
        outcome_indicator_column = ".r_4"
      ),
    correction_function = standardization_correction
  )

final_analysis$final_analysis$decision
#> [1] "Efficacy: Upper"
final_analysis$final_analysis$decision_data
#>   test_statistic efficacy futility
#> 1       2.044012 2.930993 0.271853
#> 2       2.190967 2.281878 1.199716
#> 3       2.199126 2.048468       NA
final_analysis$final_analysis$information_fraction_orthogonal
#> [1] 0.5095383 0.7921346 1.0956921