library(SamplingDesignTools)
library(survival)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
data("cohort_1")
1 Drawing counter-matched sample
This section describes how to use draw_ncc_cm()
to draw counter-matched nested case-control (NCC) samples, and subsequently how to analyze the sample using a weighted conditional likelihood approach.
Examples in this section use cohort_1
as the underlying cohort, and considers the continuous variable age as the continuous exposure of interest, just for illustrative purpose.
Reference:
- Langholz B. Counter‐Matching. Wiley StatsRef: Statistics Reference Online. 2014 Apr 14.
1.1 Load packages and data
1.2 Counter-match on binary surrogate
This analysis uses dichotomous age (at 50 years) as a surrogate to the actual exposure to draw a counter-matched NCC sample with 1 control per case.
$age_bin <- as.numeric(cohort_1$age < 50)
cohort_1table(cohort_1$age_bin)
##
## 0 1
## 7181 2819
<- draw_ncc_cm(cohort = cohort_1, y_name = "y", t_name = "t",
ncc_cm_bin match_var_name = "age_bin",
include_var_name = c("age", "gender"), ml = 1)
head(ncc_cm_bin)
## set row_id t n_at_risk n_sampled weight y age_bin age gender
## 11 11 11 6.952504 4866 1 4866 1 0 60 0
## 6682 11 6682 6.952504 2002 1 2002 0 1 41 1
## 29 29 29 11.157250 3802 1 3802 1 0 64 0
## 6543 29 6543 11.157250 1594 1 1594 0 1 40 0
## 58 58 58 24.578458 1860 1 1860 1 0 63 0
## 1434 58 1434 24.578458 810 1 810 0 1 43 1
table(ncc_cm_bin$age_bin, ncc_cm_bin$y)
##
## 0 1
## 0 32 550
## 1 550 32
<- clogit(y ~ age + gender + strata(set) + offset(log(weight)),
m_clogit_bin data = ncc_cm_bin)
1.3 Counter-match on categorical surrogate
This analysis divided age into 4 categories (at 40, 50 and 60 years) to create a categorical surrogate to the exposure, and subsequently drew a counter-matched NCC sample with one control per age category, i.e., in total 3 controls per case.
$age_quart <- cut(cohort_1$age, breaks = c(-Inf, 40, 50, 60, Inf),
cohort_1labels = 1:4, include.lowest = TRUE)
table(cohort_1$age_quart)
##
## 1 2 3 4
## 712 2453 3908 2927
<- draw_ncc_cm(cohort = cohort_1, y_name = "y", t_name = "t",
ncc_cm_quart match_var_name = "age_quart",
include_var_name = c("age", "gender"), ml = 1)
head(ncc_cm_quart, 20)
## set row_id t n_at_risk n_sampled weight y age_quart age gender
## 5123 11 5123 6.952504 512 1 512 0 1 39 1
## 1050 11 1050 6.952504 1756 1 1756 0 2 42 0
## 11 11 11 6.952504 2672 1 2672 1 3 60 0
## 9857 11 9857 6.952504 1928 1 1928 0 4 63 0
## 4445 29 4445 11.157250 413 1 413 0 1 28 1
## 3770 29 3770 11.157250 1392 1 1392 0 2 42 1
## 4913 29 4913 11.157250 2116 1 2116 0 3 57 0
## 29 29 29 11.157250 1475 1 1475 1 4 64 0
## 9234 58 9234 24.578458 211 1 211 0 1 36 1
## 7715 58 7715 24.578458 704 1 704 0 2 50 1
## 3727 58 3727 24.578458 1066 1 1066 0 3 51 0
## 58 58 58 24.578458 689 1 689 1 4 63 0
## 7369 67 7369 1.434794 665 1 665 0 1 25 1
## 9614 67 9614 1.434794 2300 1 2300 0 2 43 1
## 1404 67 1404 1.434794 3607 1 3607 0 3 60 0
## 67 67 67 1.434794 2683 1 2683 1 4 65 1
## 170 89 170 19.254871 269 1 269 0 1 39 1
## 3947 89 3947 19.254871 933 1 933 0 2 50 0
## 8190 89 8190 19.254871 1395 1 1395 0 3 53 0
## 89 89 89 19.254871 922 1 922 1 4 64 1
table(ncc_cm_quart$age_quart)
##
## 1 2 3 4
## 582 582 582 582
<- clogit(y ~ age + gender + strata(set) + offset(log(weight)),
m_clogit_quart data = ncc_cm_quart)
1.4 Compare results
<- rbind(summary(m_cox_cohort_1)$coef,
results_1 summary(m_clogit_bin)$coef,
summary(m_clogit_quart)$coef)
rownames(results_1) <- NULL
kable(data.frame(
Data = c("Full cohort", "",
"NCC-CM, 1 control per case", "",
"NCC-CM, 3 controls per case", ""),
Variable = rep(c("Age", "Male"), 3),
`True HR` = rep(c(1.1, 2), 3),
`Estimated HR` = results_1[, "exp(coef)"],
`SE of log(HR)` = results_1[, "se(coef)"],
`p-value` = results_1[, "Pr(>|z|)"], check.names = FALSE
digits = c(0, 0, 1, 2, 3, 3)) ),
Data | Variable | True HR | Estimated HR | SE of log(HR) | p-value |
---|---|---|---|---|---|
Full cohort | Age | 1.1 | 1.11 | 0.004 | 0 |
Male | 2.0 | 2.18 | 0.088 | 0 | |
NCC-CM, 1 control per case | Age | 1.1 | 1.13 | 0.013 | 0 |
Male | 2.0 | 3.21 | 0.303 | 0 | |
NCC-CM, 3 controls per case | Age | 1.1 | 1.11 | 0.006 | 0 |
Male | 2.0 | 2.56 | 0.133 | 0 |