Last active
July 16, 2020 21:03
-
-
Save mcshaz/b4793c19096669a89ef83522437ec79d to your computer and use it in GitHub Desktop.
Comparing Survival of a Sample to a Standard Population (Finkelstein D, Muzikansky A, Schoenfeld D) using R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# see http://hedwig.mgh.harvard.edu/biostatistics/node/30 for the original paper and an excel spreadsheet with macros | |
# the spreadsheet does much the same in VBA, but with considerably more code | |
# unlike the vba spreadsheet, this also takes into acount the fraction of age in years at entry and exit (in creating per patient risk) | |
# if the age is greater than the maximum data (101 years old), each subsequent year is a repeat of the final annual risk available | |
library(survival) | |
oneSampleSurvival <- function(df, description) { | |
# NZ data between 0 and 100 for each year | |
# 10 years of population and death data was averaged (2009-2018) for each year of age until 89 years old | |
# data is then repeated in 5 yearly blocks as more granular data was hard to obtain | |
# mortality at 0 years excludes mortality before 28 days (not relevant) | |
maleMortality <- c(0.001843361228536, 0.000426012398328, 0.000205162390909, 0.000159933000221, 0.000131106236558, 0.000109847867736, 7.54877026981029E-05, 0.000104275266973, 8.46092661589332E-05, 0.000105655560873, 9.83646791092508E-05, 9.85469708619511E-05, 0.000107171040591, 0.000136745787619, 0.000211693246977, 0.000409680498681, 0.00047908243582, 0.00068725838029, 0.000792558666334, 0.00084616668714, 0.000794068160984, 0.000907776641227, 0.000800008665019, 0.000828096770177, 0.000806997128346, 0.000840005277716, 0.000802987629156, 0.000825605870321, 0.000765479018619, 0.000763137039117, 0.000806246482394, 0.000903749143428, 0.000924492304174, 0.000857232269108, 0.00092020098225, 0.000998230025349, 0.001085433512175, 0.001027818977332, 0.001122918474693, 0.00119988265766, 0.00137785701308, 0.001442949518615, 0.00153845943647, 0.00164270871642, 0.00188256662831, 0.002023312990214, 0.002030541981486, 0.002272865187846, 0.002607033588397, 0.002696261347275, 0.002969802371852, 0.003330445145048, 0.003450994308472, 0.003766824109133, 0.004382106432976, 0.004619214946603, 0.00507076963448, 0.005168789456058, 0.006025729167394, 0.006502154100548, 0.006825708117681, 0.00758116723691, 0.008280373380387, 0.009053044757868, 0.00965343239435, 0.010853634625976, 0.012058271659544, 0.013235643983316, 0.014237071816918, 0.016323379469387, 0.01765435043422, 0.019773974552901, 0.021931935366736, 0.024825087322466, 0.027006375178927, 0.029820680952757, 0.033902362368077, 0.03837648923878, 0.042932432116485, 0.046894126378063, 0.054108256726034, 0.060411289976837, 0.068653770714218, 0.077960340417016, 0.088759284493938, 0.099283037280737, 0.112244152400086, 0.128687561933202, 0.144916978613787, 0.157930508606968, 0.229411764705882, 0.229411764705882, 0.229411764705882, 0.229411764705882, 0.229411764705882, 0.362290502793296, 0.362290502793296, 0.362290502793296, 0.362290502793296, 0.362290502793296, 0.4025) | |
femaleMortality <- c(0.001440725276587, 0.000378599537757, 0.00021709924675, 9.97502891438584E-05, 9.85006425048784E-05, 8.92050090982164E-05, 8.95206096251493E-05, 7.96114330270264E-05, 6.92762143799792E-05, 0.000101432919309, 5.08040463311748E-05, 9.22608680532037E-05, 0.000122942485987, 0.000153294565916, 0.000172502569892, 0.000260513869301, 0.000255523432279, 0.000368255538044, 0.000356330469863, 0.000267139191191, 0.000358230732596, 0.000369833546194, 0.000305476109726, 0.000319588939803, 0.000297956887714, 0.000363010043724, 0.000362859918462, 0.000432002032646, 0.000321026034948, 0.000361625954016, 0.00040452437153, 0.000431745162574, 0.000512770712544, 0.00054317376104, 0.000488063712367, 0.000486563128689, 0.00057339122496, 0.00072400070304, 0.00072060329081, 0.000754517692419, 0.000849463968011, 0.000831645042867, 0.001042677971853, 0.001132957559664, 0.001207180709918, 0.00131585417895, 0.001513946743796, 0.001606617997058, 0.001723479078906, 0.001933808631927, 0.002226989986555, 0.002231269823877, 0.002626438738956, 0.002696431092572, 0.002966902852488, 0.003080837093037, 0.003413401888315, 0.003427147560834, 0.004010391720794, 0.004217567593114, 0.004606013461916, 0.005031607028952, 0.005503746741787, 0.006108782143772, 0.006845816438536, 0.007141344621113, 0.007955395638506, 0.009127032877493, 0.010042400111416, 0.010803760162666, 0.012057728392066, 0.01323141371974, 0.014562716593054, 0.015873768931767, 0.018880147634345, 0.02056309402693, 0.022568564901875, 0.025811635811795, 0.029461369991122, 0.032920253916872, 0.038426882965907, 0.042458470397799, 0.050308505460449, 0.05592788582796, 0.063922777668064, 0.074695945529539, 0.084828676018797, 0.095857321745606, 0.110779421454886, 0.129589230549757, 0.185325602140946, 0.185325602140946, 0.185325602140946, 0.185325602140946, 0.185325602140946, 0.320109190172884, 0.320109190172884, 0.320109190172884, 0.320109190172884, 0.320109190172884, 0.444805194805195) | |
getYrMortality <- function(startAgeYr, endAgeYr, isMale){ | |
if (isMale) { | |
lookupVector <- maleMortality | |
} else { | |
lookupVector <- femaleMortality | |
} | |
startIndx <- as.integer(startAgeYr) + 1L # add 1 as mortality begins at 0 years of age (died <28d excluded) | |
endIndx <- as.integer(endAgeYr) + 1L | |
if (endIndx > length(lookupVector)) { | |
lastIndx <- length(lookupVector) | |
returnVar <- c(lookupVector[startIndx:lastIndx], rep(lookupVector[lastIndx], endIndx - lastIndx)) | |
} else { | |
returnVar <- lookupVector[startIndx:endIndx] | |
} | |
return(returnVar) | |
} | |
smrProportionalHazard <- function(observed, expected, ci = 95) { | |
stat <- (observed - expected) ^ 2 / expected | |
smr <- observed / expected | |
chiterm <- qchisq((100 + ci) / 200, 1) | |
ub <- smr + chiterm / (2 * expected) + ((chiterm * (4 * observed + chiterm)) ^ 0.5) / (2 * expected) | |
lb <- smr + chiterm / (2 * expected) - ((chiterm * (4 * observed + chiterm)) ^ 0.5) / (2 * expected) | |
return(c("smr" = smr, | |
"p_value" = 1 - pchisq(stat, 1), | |
"lower_ci" = lb, | |
"upper_ci" = ub)) | |
} | |
riskOfDeath <- by(df, seq_len(nrow(df)), function(rw){ | |
# extracts mortality data for each year of age between entry and exit date | |
# first and last date will be fractionally reduced according to how much of thay age year was remaining | |
startAgeYr <- as.numeric(rw$entry - rw$dob) / 365.25 | |
startProportion <- startAgeYr %% 1 | |
startAgeYr <- as.integer(startAgeYr) | |
endAgeYr <- as.numeric(rw$exit - rw$dob) / 365.25 | |
endProportion <- endAgeYr %% 1 | |
endAgeYr <- as.integer(endAgeYr) | |
returnVar <- getYrMortality(startAgeYr, endAgeYr, rw$male) | |
if (startAgeYr == endAgeYr) { | |
returnVar <- returnVar * (endProportion - startProportion) | |
} else { | |
returnVar[1] <- returnVar[1] * (1 - startProportion) | |
len <- length(returnVar) | |
returnVar[len] <- returnVar[len] * endProportion | |
} | |
return(sum(returnVar)) | |
}) | |
#main output | |
cat(description, "(using proportional hazards assumption)", "\n") | |
totalDeaths <- sum(df$dead) | |
totalRisk <- sum(riskOfDeath) | |
cat("observed / expected =", totalDeaths, "/", totalRisk, "\n") | |
print(smrProportionalHazard(totalDeaths, totalRisk)) | |
df$yrsOldAtEntry <- as.numeric(df$entry - df$dob) / 365.25 | |
df$yrsInStudy <- as.numeric(df$exit - df$entry) / 365.25 | |
# create reference for population hazard | |
maxYears <- ceiling(max(df$yrsInStudy)) | |
hazards <- by(df, seq_len(nrow(df)), function(rw){ | |
mortVector <- getYrMortality(rw$yrsOldAtEntry, rw$yrsOldAtEntry + maxYears, rw$male) | |
return(exp(-cumsum(mortVector))) | |
}) | |
hazards <- c(1, rowMeans(simplify2array(hazards))) | |
survData <- survfit(Surv(df$yrsInStudy, df$dead) ~ 1, data = df) | |
lowerY <- floor(survData$lower[length(survData$lower)] * 100) / 100 | |
par(cex.sub = 0.8) | |
plot(survData, | |
xlab = "Years from op", | |
ylab = "Survival probability", | |
ylim = c(lowerY, 1), | |
sub = description) | |
lines(0:(as.integer(maxYears) + 1L), hazards, col="grey") | |
legend("bottomleft", | |
inset = 0.05, | |
legend = c("general pop. matched for age/sex", "cardiac", "95% conf. int."), | |
col = c("grey", "black", "black"), | |
lty = c(1, 1, 2)) | |
} | |
df <- byfirstsurg[, c("Male", "Dead", "DateofBirth", "DateOfEvent", "DatelastKnown")] | |
# rename variables to required dataframe names for oneSampleSurvival function | |
names(df) <- c("male", "dead", "dob", "entry", "exit") | |
df$male <- df$male == 1 | |
df$dead <- df$dead == 1 | |
# looking at subsets of the data - trying to identify where/which subset the proportional hazard assumption might hold | |
oneSampleSurvival (df[df$exit - df$entry > 30,], "surviving >30 days from operation") | |
oneSampleSurvival (df[df$exit - df$entry > 30 & df$entry - df$dob > 365.25 * 15,], "surviving >30 days and >14 yr old at operation") | |
oneSampleSurvival (df[df$exit - df$entry > 365,], "surviving at least 1 yr from operation") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment