library(SamplingDesignTools)
library(survival)
library(Epi) # To draw (non-counter-matched) nested case-control sample
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
data("cohort_2")
2 Compute KM-type weights for NCC sample
This section describes how to use compute_km_weights()
to draw NCC samples and compute Kaplan-Meier type (KM-type) weights, and subsequently how to analyze the sample using a weighted Cox approach.
Examples in this section use cohort_2
as the underlying cohort.
References:
- Samuelsen SO. A psudolikelihood approach to analysis of nested case-control studies. Biometrika. 1997 Jun 1;84(2):379-94.
- Borgan Ø, Samuelsen SO. A review of cohort sampling designs for Cox’s regression model: potentials in epidemiology. Norsk Epidemiologi. 2003;13(2).
2.1 Load packages and data
2.2 1:2 NCC matched on age and gender
Consider an idealistic scenario, where the full cohort is available to calculate the KM-type weights. A NCC sample was draw (using Epi::ccwc()
) with 2 controls per case, matched on age group and gender.
<- 2
n_per_case <- ccwc(exit = t, fail = y, controls = n_per_case,
ncc_2 match = list(age_cat, gender), include = list(x, age, z),
data = cohort_2, silent = TRUE)
names(ncc_2)[-(1:4)] <- c("age_cat", "gender", "x", "age", "z")
head(ncc_2, 12)
## Set Map Time Fail age_cat gender x age z
## 1 1 36 0.2016047 1 (35,45] 1 1 -16 0
## 2 1 14705 0.2016047 0 (35,45] 1 1 -14 1
## 3 1 8990 0.2016047 0 (35,45] 1 1 -12 0
## 4 2 888 4.9218685 1 (35,45] 1 1 -20 1
## 5 2 49875 4.9218685 0 (35,45] 1 1 -15 0
## 6 2 17845 4.9218685 0 (35,45] 1 1 -14 0
## 7 3 1037 21.4651890 1 (35,45] 1 1 -15 1
## 8 3 17756 21.4651890 0 (35,45] 1 1 -11 0
## 9 3 93612 21.4651890 0 (35,45] 1 0 -11 0
## 10 4 1200 15.7674384 1 (35,45] 1 1 -15 1
## 11 4 91281 15.7674384 0 (35,45] 1 0 -11 1
## 12 4 51485 15.7674384 0 (35,45] 1 1 -15 0
# Create the sampling and status indicator
<- numeric(nrow(cohort_2))
sample_stat unique(ncc_2$Map[ncc_2$Fail == 0])] <- 1
sample_stat[$Map[ncc_2$Fail == 1]] <- 2
sample_stat[ncc_2table(sample_stat)
## sample_stat
## 0 1 2
## 91997 5230 2773
<- compute_km_weights(cohort = cohort_2, t_name = "t", y_name = "y",
ncc_2_nodup sample_stat = sample_stat,
match_var_names = c("age_cat", "gender"),
n_per_case = n_per_case)
## Start time is 0 for all subjects. Event/censoring time is given by variable t.
## Joining, by = c("age_cat", "gender")
## Joining, by = "strata"
head(ncc_2_nodup)
## id y t x age age_cat gender z km_prob km_weight
## 18 18 0 23.0020275 1 -11 (35,45] 0 1 0.07969598 12.54769
## 36 36 1 0.2016047 1 -16 (35,45] 1 0 1.00000000 1.00000
## 40 40 0 25.0000000 1 17 (65,75] 1 0 0.09368591 10.67396
## 44 44 0 20.1856606 0 -1 (45,55] 1 0 0.07193216 13.90199
## 61 61 1 14.8098399 1 7 (55,65] 0 0 1.00000000 1.00000
## 68 68 0 9.3384792 1 10 (65,75] 0 0 0.04448197 22.48102
summary(ncc_2_nodup$km_weight)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 1.00 10.64 12.21 13.32 696.20
<- coxph(Surv(t, y) ~ x * z + gender + age, data = ncc_2_nodup,
m_cox_ncc_2 weights = km_weight, robust = TRUE)
<- clogit(Fail ~ x * z + strata(Set), data = ncc_2) m_clogit_ncc_2
By breaking the matching in the NCC and performing a weighted analysis, users are also able to estimate the absolute risk that is not available from NCC in conventional conditional approach. This is not illustrated here but have been described in following papers:
- Salim A, Delcoigne B, Villaflores K, Koh WP, Yuan JM, van Dam RM, Reilly M. Comparisons of risk prediction methods using nested case‐control data. Statistics in medicine. 2017 Feb 10;36(3):455-65.
- Delcoigne B, Colzani E, Prochazka M, Gagliardi G, Hall P, Abrahamowicz M, Czene K, Reilly M. Breaking the matching in nested case–control data offered several advantages for risk estimation. Journal of clinical epidemiology. 2017 Feb 1;82:79-86.
2.3 Compare results
Compared to conventional conditional logistic regression analysis of matched sets, weighted Cox analysis of sampled subjects produces smaller SE, and provides estimates for matching factors.
<- rbind(summary(m_cox_cohort_2)$coef,
results_2 summary(m_clogit_ncc_2)$coef,
summary(m_cox_ncc_2)$coef[, -3])
<- data.frame(Variable = rownames(results_2), results_2,
results_2 check.names = FALSE)
rownames(results_2) <- NULL
kable(data.frame(
Data = c("Full cohort", rep("", 4),
"NCC (clogit)", rep("", 2),
"NCC (weighted Cox)", rep("", 4)),
Variable = results_2$Variable,
`True HR` = c(c(1.5, 4, 1.01, 1.01, 2),
c(1.5, 4, 2), c(1.5, 4, 1.01, 1.01, 2)),
`Estimated HR` = results_2[, "exp(coef)"],
`SE of log(HR)` = results_2[, "se(coef)"],
`p-value` = results_2[, "Pr(>|z|)"], check.names = FALSE
digits = c(0, 0, 2, 2, 3, 3)) ),
Data | Variable | True HR | Estimated HR | SE of log(HR) | p-value |
---|---|---|---|---|---|
Full cohort | x | 1.50 | 1.47 | 0.110 | 0.001 |
z | 4.00 | 4.46 | 0.126 | 0.000 | |
age | 1.01 | 1.01 | 0.002 | 0.000 | |
gender | 1.01 | 0.92 | 0.038 | 0.024 | |
x:z | 2.00 | 1.90 | 0.135 | 0.000 | |
NCC (clogit) | x | 1.50 | 1.35 | 0.122 | 0.014 |
z | 4.00 | 5.05 | 0.153 | 0.000 | |
x:z | 2.00 | 1.67 | 0.163 | 0.002 | |
NCC (weighted Cox) | x | 1.50 | 1.36 | 0.117 | 0.009 |
z | 4.00 | 4.68 | 0.143 | 0.000 | |
gender | 1.01 | 0.91 | 0.054 | 0.081 | |
age | 1.01 | 1.01 | 0.003 | 0.003 | |
x:z | 2.00 | 1.80 | 0.154 | 0.000 |