3  Compute weights for NCC without full cohort

KM-type weights can be computed for the NCC sample as long the time of event/censoring for each subject is available, and the number of subjects at risk can be obtained (or approximated) elsewhere.

Examples in this section use cohort_2 as the underlying cohort, but consider a more realistic scenario where the cohort is no longer available, and subjects at risk are approximated using the number of subjects at risk in a year.

3.1 Load packages and data

library(SamplingDesignTools)
library(survival)
library(Epi) # To draw (non-counter-matched) nested case-control sample
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr)
data("cohort_2")
n_per_case <- 2
ncc_2 <- ccwc(exit = t, fail = y, controls = n_per_case, 
              match = list(age_cat, gender), include = list(x, age, z), 
              data = cohort_2, silent = TRUE)
names(ncc_2)[-(1:4)] <- c("age_cat", "gender", "x", "age", "z")
head(ncc_2, 12)
##    Set   Map       Time Fail age_cat gender x age z
## 1    1    36  0.2016047    1 (35,45]      1 1 -16 0
## 2    1 14705  0.2016047    0 (35,45]      1 1 -14 1
## 3    1  8990  0.2016047    0 (35,45]      1 1 -12 0
## 4    2   888  4.9218685    1 (35,45]      1 1 -20 1
## 5    2 49875  4.9218685    0 (35,45]      1 1 -15 0
## 6    2 17845  4.9218685    0 (35,45]      1 1 -14 0
## 7    3  1037 21.4651890    1 (35,45]      1 1 -15 1
## 8    3 17756 21.4651890    0 (35,45]      1 1 -11 0
## 9    3 93612 21.4651890    0 (35,45]      1 0 -11 0
## 10   4  1200 15.7674384    1 (35,45]      1 1 -15 1
## 11   4 91281 15.7674384    0 (35,45]      1 0 -11 1
## 12   4 51485 15.7674384    0 (35,45]      1 1 -15 0

3.2 Coarsen time

To match NCC sample to the coarsened time frame, create a variable t_yr by rounding the exact event/censoring time, t to the next integer, and use this coarsened time to compute the number at risk:

ncc_2$t <- cohort_2$t[ncc_2$Map]
ncc_2$t_yr <- ceiling(ncc_2$t)
cohort_2$t_yr <- ceiling(cohort_2$t)
risk_table_coarse <- compute_risk_table(cohort = cohort_2, t_name = "t_yr", 
                                        y_name = "y", 
                                        match_var_names = c("age_cat", "gender"))
## Start time is 0 for all subjects. Event/censoring time is given by variable t_yr.
## Joining, by = c("age_cat", "gender")
## Joining, by = "strata"
head(risk_table_coarse)
##   t_event n_event n_at_risk   age_cat gender
## 1       1       3      1165 (-Inf,35]      0
## 2       3       2      1045 (-Inf,35]      0
## 3       4       2       983 (-Inf,35]      0
## 4       5       1       928 (-Inf,35]      0
## 5       8       1       806 (-Inf,35]      0
## 6       9       3       772 (-Inf,35]      0

3.3 1:2 NCC based on coarsened time

In reality, number at risk at each event time may be approximated by, e.g., size of the relevant sub-population at mid-year. In such case, user may use the following function to generate a template for risk_table_coarse to fill in:

risk_table_template <- prepare_risk_table(ncc = ncc_2, t_match_name = "t_yr", 
                                          y_name = "Fail", 
                                          match_var_names = c("gender", "age_cat"), 
                                          csv_file = NULL)
head(risk_table_template)
##   t_event gender   age_cat n_at_risk
## 1       1      0 (-Inf,35]        NA
## 2       1      0   (35,45]        NA
## 3       1      0   (45,55]        NA
## 4       1      0   (55,65]        NA
## 5       1      0   (65,75]        NA
## 6       1      0 (75, Inf]        NA

This template will be written to a csv if specified by csv_file, making it easier to supply information regarding the cohort that is required for computing the KM-type weights.

Assuming that risk_table_coarse is the approximated risk table obtained from external sources, the KM-type weights for ncc_2 generated in the previous section can be computed using the same compute_km_weights() function and subsequently analyzed using a weighted Cox approach:

ncc_nodup2 <- compute_km_weights(ncc = ncc_2[, -1], 
                                 risk_table_manual = risk_table_coarse, 
                                 t_name = "t", y_name = "Fail", 
                                 t_match_name = "t_yr",
                                 id_name = "Map", 
                                 match_var_names = c("age_cat", "gender"), 
                                 n_per_case = 5)
## Make sure input ncc does not include ID of matched sets. Failing to do so results in Errors.
## Joining, by = c("age_cat", "gender")
## Joining, by = c(".t_event", "age_cat", "gender", "n_event")
## Joining, by = c("age_cat", "gender")
## There are 8003 unique subjects (identified by Map) in the input ncc with 8319
## rows, therefore the return data only has 8003 rows.
m_cox_ncc_2_v2 <- coxph(Surv(t, Fail) ~ x * z + age + gender, 
                        data = ncc_nodup2, weights = km_weight, robust = TRUE)

3.4 Compare results

Compare with results when the full cohort is available:

results_3 <- rbind(summary(m_cox_cohort_2)$coef, 
                   summary(m_cox_ncc_2)$coef[, -3], 
                   summary(m_cox_ncc_2_v2)$coef[, -3])
results_3 <- data.frame(Variable = rownames(results_3), results_3, 
                        check.names = FALSE)
rownames(results_3) <- NULL
kable(data.frame(
  Data = c("Full cohort", rep("", 4), 
           "NCC (weighted Cox)", rep("", 4),
           "NCC (weighted Cox, approximated weights)", rep("", 4)), 
  Variable = results_3$Variable, 
  `True HR` = rep(c(1.5, 4, 1.01, 1.01, 2), 3),
  `Estimated HR` = results_3[, "exp(coef)"], 
  `SE of log(HR)` = results_3[, "se(coef)"], 
  `p-value` = results_3[, "Pr(>|z|)"], check.names = FALSE
), digits = c(0, 0, 2, 2, 3, 3))
Data Variable True HR Estimated HR SE of log(HR) p-value
Full cohort x 1.50 1.47 0.110 0.001
z 4.00 4.46 0.126 0.000
age 1.01 1.01 0.002 0.000
gender 1.01 0.92 0.038 0.024
x:z 2.00 1.90 0.135 0.000
NCC (weighted Cox) x 1.50 1.36 0.117 0.009
z 4.00 4.68 0.143 0.000
gender 1.01 0.91 0.054 0.081
age 1.01 1.01 0.003 0.003
x:z 2.00 1.80 0.154 0.000
NCC (weighted Cox, approximated weights) x 1.50 1.31 0.110 0.014
z 4.00 3.85 0.127 0.000
age 1.01 1.00 0.002 0.698
gender 1.01 1.00 0.038 0.979
x:z 2.00 1.41 0.136 0.012