1  Drawing counter-matched sample

This section describes how to use draw_ncc_cm() to draw counter-matched nested case-control (NCC) samples, and subsequently how to analyze the sample using a weighted conditional likelihood approach.

Examples in this section use cohort_1 as the underlying cohort, and considers the continuous variable age as the continuous exposure of interest, just for illustrative purpose.

Reference:

1.1 Load packages and data

library(SamplingDesignTools)
library(survival)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr)
data("cohort_1")

1.2 Counter-match on binary surrogate

This analysis uses dichotomous age (at 50 years) as a surrogate to the actual exposure to draw a counter-matched NCC sample with 1 control per case.

cohort_1$age_bin <- as.numeric(cohort_1$age < 50)
table(cohort_1$age_bin)
## 
##    0    1 
## 7181 2819
ncc_cm_bin <- draw_ncc_cm(cohort = cohort_1, y_name = "y", t_name = "t", 
                          match_var_name = "age_bin", 
                          include_var_name = c("age", "gender"), ml = 1)
head(ncc_cm_bin)
##      set row_id         t n_at_risk n_sampled weight y age_bin age gender
## 11    11     11  6.952504      4866         1   4866 1       0  60      0
## 6682  11   6682  6.952504      2002         1   2002 0       1  41      1
## 29    29     29 11.157250      3802         1   3802 1       0  64      0
## 6543  29   6543 11.157250      1594         1   1594 0       1  40      0
## 58    58     58 24.578458      1860         1   1860 1       0  63      0
## 1434  58   1434 24.578458       810         1    810 0       1  43      1
table(ncc_cm_bin$age_bin, ncc_cm_bin$y)
##    
##       0   1
##   0  32 550
##   1 550  32
m_clogit_bin <- clogit(y ~ age + gender + strata(set) + offset(log(weight)), 
                       data = ncc_cm_bin)

1.3 Counter-match on categorical surrogate

This analysis divided age into 4 categories (at 40, 50 and 60 years) to create a categorical surrogate to the exposure, and subsequently drew a counter-matched NCC sample with one control per age category, i.e., in total 3 controls per case.

cohort_1$age_quart <- cut(cohort_1$age, breaks = c(-Inf, 40, 50, 60, Inf), 
                          labels = 1:4, include.lowest = TRUE)
table(cohort_1$age_quart)
## 
##    1    2    3    4 
##  712 2453 3908 2927
ncc_cm_quart <- draw_ncc_cm(cohort = cohort_1, y_name = "y", t_name = "t", 
                            match_var_name = "age_quart", 
                            include_var_name = c("age", "gender"), ml = 1)
head(ncc_cm_quart, 20)
##      set row_id         t n_at_risk n_sampled weight y age_quart age gender
## 5123  11   5123  6.952504       512         1    512 0         1  39      1
## 1050  11   1050  6.952504      1756         1   1756 0         2  42      0
## 11    11     11  6.952504      2672         1   2672 1         3  60      0
## 9857  11   9857  6.952504      1928         1   1928 0         4  63      0
## 4445  29   4445 11.157250       413         1    413 0         1  28      1
## 3770  29   3770 11.157250      1392         1   1392 0         2  42      1
## 4913  29   4913 11.157250      2116         1   2116 0         3  57      0
## 29    29     29 11.157250      1475         1   1475 1         4  64      0
## 9234  58   9234 24.578458       211         1    211 0         1  36      1
## 7715  58   7715 24.578458       704         1    704 0         2  50      1
## 3727  58   3727 24.578458      1066         1   1066 0         3  51      0
## 58    58     58 24.578458       689         1    689 1         4  63      0
## 7369  67   7369  1.434794       665         1    665 0         1  25      1
## 9614  67   9614  1.434794      2300         1   2300 0         2  43      1
## 1404  67   1404  1.434794      3607         1   3607 0         3  60      0
## 67    67     67  1.434794      2683         1   2683 1         4  65      1
## 170   89    170 19.254871       269         1    269 0         1  39      1
## 3947  89   3947 19.254871       933         1    933 0         2  50      0
## 8190  89   8190 19.254871      1395         1   1395 0         3  53      0
## 89    89     89 19.254871       922         1    922 1         4  64      1
table(ncc_cm_quart$age_quart)
## 
##   1   2   3   4 
## 582 582 582 582
m_clogit_quart <- clogit(y ~ age + gender + strata(set) + offset(log(weight)), 
                         data = ncc_cm_quart)

1.4 Compare results

results_1 <- rbind(summary(m_cox_cohort_1)$coef, 
                   summary(m_clogit_bin)$coef, 
                   summary(m_clogit_quart)$coef)
rownames(results_1) <- NULL
kable(data.frame(
  Data = c("Full cohort", "", 
           "NCC-CM, 1 control per case", "", 
           "NCC-CM, 3 controls per case", ""), 
  Variable = rep(c("Age", "Male"), 3), 
  `True HR` = rep(c(1.1, 2), 3),
  `Estimated HR` = results_1[, "exp(coef)"], 
  `SE of log(HR)` = results_1[, "se(coef)"], 
  `p-value` = results_1[, "Pr(>|z|)"], check.names = FALSE
), digits = c(0, 0, 1, 2, 3, 3))
Data Variable True HR Estimated HR SE of log(HR) p-value
Full cohort Age 1.1 1.11 0.004 0
Male 2.0 2.18 0.088 0
NCC-CM, 1 control per case Age 1.1 1.13 0.013 0
Male 2.0 3.21 0.303 0
NCC-CM, 3 controls per case Age 1.1 1.11 0.006 0
Male 2.0 2.56 0.133 0