3 Compute weights for NCC without full cohort

KM-type weights can be computed for the NCC sample as long the time of event/censoring for each subject is available, and the number of subjects at risk can be obtained (or approximated) elsewhere.

Examples in this section use cohort_2 as the underlying cohort, but consider a more realistic scenario where the cohort is no longer available, and subjects at risk are approximated using the number of subjects at risk in a year.

3.1 Load packages and data

library(SamplingDesignTools)
library(survival)
library(Epi) # To draw (non-counter-matched) nested case-control sample
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr)
data("cohort_2")
n_per_case <- 2
ncc_2 <- ccwc(exit = t, fail = y, controls = n_per_case, 
              match = list(age_cat, gender), include = list(x, age, z), 
              data = cohort_2, silent = TRUE)
names(ncc_2)[-(1:4)] <- c("age_cat", "gender", "x", "age", "z")
head(ncc_2, 12)
##    Set   Map       Time Fail age_cat gender x age z
## 1    1    36  0.2016047    1 (35,45]      1 1 -16 0
## 2    1 14705  0.2016047    0 (35,45]      1 1 -14 1
## 3    1  8990  0.2016047    0 (35,45]      1 1 -12 0
## 4    2   888  4.9218685    1 (35,45]      1 1 -20 1
## 5    2 49875  4.9218685    0 (35,45]      1 1 -15 0
## 6    2 17845  4.9218685    0 (35,45]      1 1 -14 0
## 7    3  1037 21.4651890    1 (35,45]      1 1 -15 1
## 8    3 17756 21.4651890    0 (35,45]      1 1 -11 0
## 9    3 93612 21.4651890    0 (35,45]      1 0 -11 0
## 10   4  1200 15.7674384    1 (35,45]      1 1 -15 1
## 11   4 91281 15.7674384    0 (35,45]      1 0 -11 1
## 12   4 51485 15.7674384    0 (35,45]      1 1 -15 0

3.2 Coarsen time

To match NCC sample to the coarsened time frame, create a variable t_yr by rounding the exact event/censoring time, t to the next integer, and use this coarsened time to compute the number at risk:

ncc_2$t <- cohort_2$t[ncc_2$Map]
ncc_2$t_yr <- ceiling(ncc_2$t)
cohort_2$t_yr <- ceiling(cohort_2$t)
risk_table_coarse <- compute_risk_table(cohort = cohort_2, t_name = "t_yr", 
                                        y_name = "y", 
                                        match_var_names = c("age_cat", "gender"))
## Start time is 0 for all subjects. Event/censoring time is given by variable t_yr.
## Joining, by = c("age_cat", "gender")
## Joining, by = "strata"
head(risk_table_coarse)
##   t_event n_event n_at_risk   age_cat gender
## 1       1       3      1165 (-Inf,35]      0
## 2       3       2      1045 (-Inf,35]      0
## 3       4       2       983 (-Inf,35]      0
## 4       5       1       928 (-Inf,35]      0
## 5       8       1       806 (-Inf,35]      0
## 6       9       3       772 (-Inf,35]      0

3.3 1:2 NCC based on coarsened time

In reality, number at risk at each event time may be approximated by, e.g., size of the relevant sub-population at mid-year. In such case, user may use the following function to generate a template for risk_table_coarse to fill in:

risk_table_template <- prepare_risk_table(ncc = ncc_2, t_match_name = "t_yr", 
                                          y_name = "Fail", 
                                          match_var_names = c("gender", "age_cat"), 
                                          csv_file = NULL)
head(risk_table_template)
##   t_event gender   age_cat n_at_risk
## 1       1      0 (-Inf,35]        NA
## 2       1      0   (35,45]        NA
## 3       1      0   (45,55]        NA
## 4       1      0   (55,65]        NA
## 5       1      0   (65,75]        NA
## 6       1      0 (75, Inf]        NA

This template will be written to a csv if specified by csv_file, making it easier to supply information regarding the cohort that is required for computing the KM-type weights.

Assuming that risk_table_coarse is the approximated risk table obtained from external sources, the KM-type weights for ncc_2 generated in the previous section can be computed using the same compute_km_weights() function and subsequently analyzed using a weighted Cox approach:

ncc_nodup2 <- compute_km_weights(ncc = ncc_2[, -1], 
                                 risk_table_manual = risk_table_coarse, 
                                 t_name = "t", y_name = "Fail", 
                                 t_match_name = "t_yr",
                                 id_name = "Map", 
                                 match_var_names = c("age_cat", "gender"), 
                                 n_per_case = 5)
## Make sure input ncc does not include ID of matched sets. Failing to do so results in Errors.
## Joining, by = c("age_cat", "gender")
## Joining, by = c(".t_event", "age_cat", "gender", "n_event")
## Joining, by = c("age_cat", "gender")
## There are 8003 unique subjects (identified by Map) in the input ncc with 8319
## rows, therefore the return data only has 8003 rows.
m_cox_ncc_2_v2 <- coxph(Surv(t, Fail) ~ x * z + age + gender, 
                        data = ncc_nodup2, weights = km_weight, robust = TRUE)

3.4 Compare results

Compare with results when the full cohort is available:

results_3 <- rbind(summary(m_cox_cohort_2)$coef, 
                   summary(m_cox_ncc_2)$coef[, -3], 
                   summary(m_cox_ncc_2_v2)$coef[, -3])
results_3 <- data.frame(Variable = rownames(results_3), results_3, 
                        check.names = FALSE)
rownames(results_3) <- NULL
kable(data.frame(
  Data = c("Full cohort", rep("", 4), 
           "NCC (weighted Cox)", rep("", 4),
           "NCC (weighted Cox, approximated weights)", rep("", 4)), 
  Variable = results_3$Variable, 
  `True HR` = rep(c(1.5, 4, 1.01, 1.01, 2), 3),
  `Estimated HR` = results_3[, "exp(coef)"], 
  `SE of log(HR)` = results_3[, "se(coef)"], 
  `p-value` = results_3[, "Pr(>|z|)"], check.names = FALSE
), digits = c(0, 0, 2, 2, 3, 3))

Data	Variable	True HR	Estimated HR	SE of log(HR)	p-value
Full cohort	x	1.50	1.47	0.110	0.001
	z	4.00	4.46	0.126	0.000
	age	1.01	1.01	0.002	0.000
	gender	1.01	0.92	0.038	0.024
	x:z	2.00	1.90	0.135	0.000
NCC (weighted Cox)	x	1.50	1.36	0.117	0.009
	z	4.00	4.68	0.143	0.000
	gender	1.01	0.91	0.054	0.081
	age	1.01	1.01	0.003	0.003
	x:z	2.00	1.80	0.154	0.000
NCC (weighted Cox, approximated weights)	x	1.50	1.31	0.110	0.014
	z	4.00	3.85	0.127	0.000
	age	1.01	1.00	0.002	0.698
	gender	1.01	1.00	0.038	0.979
	x:z	2.00	1.41	0.136	0.012