Skip to content

Commit

Permalink
Merge pull request #88 from opensafely/rerun_dereg
Browse files Browse the repository at this point in the history
Prepare rerun of mental health in response to reviwer comments
  • Loading branch information
venexia authored Apr 23, 2024
2 parents 016bddf + 316575e commit a5b7823
Show file tree
Hide file tree
Showing 56 changed files with 14,842 additions and 17,648 deletions.
10 changes: 4 additions & 6 deletions analysis/active_analyses.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ cohorts <- c("vax","unvax_extf","prevax_extf")

# Specify outcomes -------------------------------------------------------------

## Outcomes for which we will RUN ALL analyses
## Outcomes for which we will run ALL analyses

outcomes_runall <- c("out_date_depression",
"out_date_serious_mental_illness")

## Outcomes for which we will RUN MAIN analyses only
## Outcomes for which we will run MAIN analyses only

outcomes_runmain <- c(outcomes_runall,
"out_date_anxiety_general",
Expand Down Expand Up @@ -484,10 +484,8 @@ for (c in cohorts) {

# Add day 0 analyses -----------------------------------------------------------

tmp <- df
tmp$analysis <- paste0("day0_",tmp$analysis)
tmp$cut_points <- gsub("28","1;28",tmp$cut_points)
df <- rbind(df,tmp)
df$analysis <- paste0("day0_",df$analysis)
df$cut_points <- gsub("28","1;28",df$cut_points)

# Assign unique name -----------------------------------------------------------

Expand Down
35 changes: 2 additions & 33 deletions analysis/common_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def generate_common_variables(index_date_variable,exposure_end_date_variable,out

## COVID-19

### SGSS
### SGSS
tmp_exp_date_covid19_confirmed_sgss=patients.with_test_result_in_sgss(
pathogen="SARS-CoV-2",
test_result="positive",
Expand Down Expand Up @@ -123,17 +123,6 @@ def generate_common_variables(index_date_variable,exposure_end_date_variable,out
f"{index_date_variable}",
),

## Deregistraton date
dereg_date=patients.date_deregistered_from_all_supported_practices(
between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
date_format = 'YYYY-MM-DD',
return_expectations={
"date": {"earliest": study_dates["pandemic_start"], "latest": "today"},
"rate": "uniform",
"incidence": 0.01
},
),

# Subgroup variables ----------------------------------------------------------------------

## COVID-19 severity
Expand Down Expand Up @@ -484,22 +473,7 @@ def generate_common_variables(index_date_variable,exposure_end_date_variable,out

## Suicide

### SUS
tmp_out_date_suicide_hes=patients.admitted_to_hospital(
returning="date_admitted",
with_these_diagnoses=suicide_icd10,
between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
date_format="YYYY-MM-DD",
find_first_match_in_period=True,
return_expectations={
"date": {"earliest": study_dates["pandemic_start"], "latest" : "today"},
"rate": "uniform",
"incidence": 0.1,
},
),

### Death registry
tmp_out_date_suicide_death=patients.with_these_codes_on_death_certificate(
out_date_suicide=patients.with_these_codes_on_death_certificate(
suicide_icd10,
returning="date_of_death",
between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
Expand All @@ -512,11 +486,6 @@ def generate_common_variables(index_date_variable,exposure_end_date_variable,out
},
),

### Combined
out_date_suicide=patients.minimum_of(
"tmp_out_date_suicide_hes", "tmp_out_date_suicide_death"
),

## Addiction

### Primary care
Expand Down
27 changes: 20 additions & 7 deletions analysis/cox_model.do
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
local name "`1'"
local day0 "`2'"

/*

// For local testing:
local name "cohort_prevax_extf-day0_main-depression"
local name "cohort_prevax_extf-day0_sub_sex_male-depression"
local day0 "TRUE"
*/

* Set Ado file path

Expand Down Expand Up @@ -38,6 +37,11 @@ rename cov_cat_region region
local prevax_cohort = regexm("`name'", "_pre")
display "`prevax_cohort'"

* Generate sex indicator

local sub_sex = regexm("`name'", "sub_sex")
display "`sub_sex'"

* Replace NA with missing value that Stata recognises

ds , has(type string)
Expand Down Expand Up @@ -154,10 +158,19 @@ tab time outcome_status
di "Total follow-up in days: " fup_total
bysort time: summarize(fup), detail

stcox days* i.cov_cat_sex age_spline1 age_spline2, strata(region) vce(r)
est store min, title(Age_Sex)
stcox days* age_spline1 age_spline2 i.cov_cat_* cov_num_* cov_bin_*, strata(region) vce(r)
est store max, title(Maximal)
if `sub_sex'==1 {
stcox days* age_spline1 age_spline2, strata(region) vce(r)
est store min, title(Age_Sex)
stcox days* age_spline1 age_spline2 i.cov_cat_* cov_num_* cov_bin_*, strata(region) vce(r)
est store max, title(Maximal)
}
else {
stcox days* i.cov_cat_sex age_spline1 age_spline2, strata(region) vce(r)
est store min, title(Age_Sex)
stcox days* age_spline1 age_spline2 i.cov_cat_* cov_num_* cov_bin_*, strata(region) vce(r)
est store max, title(Maximal)
}


estout * using "output/stata_model_output-`name'.txt", cells("b se t ci_l ci_u p") stats(risk N_fail N_sub N N_clust) replace

Expand Down
98 changes: 40 additions & 58 deletions analysis/create_project_actions.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ run_stata <- c("cohort_prevax_extf-day0_sub_covid_hospitalised-depression",
"cohort_unvax_extf-day0_sub_ethnicity_other-depression",
"cohort_vax-day0_sub_age_80_110-serious_mental_illness",
"cohort_prevax_extf-day0_sub_history_recent-serious_mental_illness",
"cohort_vax-sub_covid_nonhospitalised-eating_disorders",
"cohort_vax-main-eating_disorders",
"cohort_prevax_extf-day0_sub_covid_hospitalised-addiction",
"cohort_prevax_extf-day0_sub_covid_hospitalised-anxiety_general",
"cohort_prevax_extf-day0_sub_covid_hospitalised-eating_disorders",
Expand All @@ -53,7 +51,11 @@ run_stata <- c("cohort_prevax_extf-day0_sub_covid_hospitalised-depression",
"cohort_unvax_extf-day0_sub_covid_hospitalised-anxiety_ptsd",
"cohort_unvax_extf-day0_sub_covid_hospitalised-serious_mental_illness",
"cohort_vax-day0_sub_covid_hospitalised-anxiety_general",
"cohort_vax-day0_sub_covid_hospitalised-depression")
"cohort_vax-day0_sub_covid_hospitalised-depression",
"cohort_prevax_extf-day0_main-anxiety_ptsd",
"cohort_prevax_extf-day0_sub_sex_male-depression",
"cohort_prevax_extf-day0_sub_history_notrecent-depression",
"cohort_prevax_extf-day0_sub_history_notrecent-serious_mental_illness")

stata <- active_analyses[active_analyses$name %in% run_stata,]
stata$save_analysis_ready <- TRUE
Expand Down Expand Up @@ -160,18 +162,23 @@ stage1_data_cleaning <- function(cohort){
needs = list("vax_eligibility_inputs",glue("preprocess_data_{cohort}")),
moderately_sensitive = list(
consort = glue("output/consort_{cohort}.csv"),
consort_rounded = glue("output/consort_{cohort}_rounded.csv")
consort_midpoint6 = glue("output/consort_{cohort}_midpoint6.csv")
),
highly_sensitive = list(
cohort = glue("output/input_{cohort}_stage1.rds")
)
),
action(
name = glue("describe_stage1_data_cleaning_{cohort}"),
run = glue("r:latest analysis/describe_file.R input_{cohort}_stage1 rds"),
needs = list(glue("stage1_data_cleaning_{cohort}")),
name = glue("stage1_data_cleaning_v2_{cohort}"),
run = glue("r:latest analysis/stage1_data_cleaning_v2.R"),
arguments = c(cohort),
needs = list("vax_eligibility_inputs",glue("preprocess_data_{cohort}")),
moderately_sensitive = list(
describe_model_input = glue("output/describe-input_{cohort}_stage1.txt")
consort = glue("output/consort_{cohort}_v2.csv"),
consort_midpoint6 = glue("output/consort_{cohort}_midpoint6_v2.csv")
),
highly_sensitive = list(
cohort = glue("output/input_{cohort}_stage1_v2.rds")
)
)
)
Expand All @@ -189,7 +196,7 @@ table1 <- function(cohort){
needs = list(glue("stage1_data_cleaning_{cohort}")),
moderately_sensitive = list(
table1 = glue("output/table1_{cohort}.csv"),
table1_rounded = glue("output/table1_{cohort}_rounded.csv")
table1_midpoint6 = glue("output/table1_{cohort}_midpoint6.csv")
)
),
action(
Expand All @@ -199,7 +206,7 @@ table1 <- function(cohort){
needs = list(glue("stage1_data_cleaning_{cohort}")),
moderately_sensitive = list(
extendedtable1 = glue("output/extendedtable1_{cohort}.csv"),
extendedtable1_rounded = glue("output/extendedtable1_{cohort}_rounded.csv")
extendedtable1_midpoint6 = glue("output/extendedtable1_{cohort}_midpoint6.csv")
)
)
)
Expand All @@ -218,7 +225,7 @@ apply_model_function <- function(name, cohort, analysis, ipw, strata,
action(
name = glue("make_model_input-{name}"),
run = glue("r:latest analysis/make_model_input.R {name}"),
needs = list("replace_suicide"),
needs = as.list(glue("stage1_data_cleaning_{cohort}")),
highly_sensitive = list(
model_input = glue("output/model_input-{name}.rds")
)
Expand All @@ -241,11 +248,11 @@ table2 <- function(cohort, focus){
table2_names <- gsub("out_date_","",unique(active_analyses[active_analyses$cohort=={cohort},]$name))

if (focus=="severity") {
table2_names <- table2_names[grepl("-main-",table2_names) | grepl("-sub_covid_",table2_names)]
table2_names <- table2_names[grepl("-day0_main-",table2_names) | grepl("-day0_sub_covid_hospitalised",table2_names) | grepl("-day0_sub_covid_nonhospitalised",table2_names)]
}

if (focus=="history") {
table2_names <- table2_names[grepl("-sub_history_",table2_names)]
table2_names <- table2_names[grepl("-day0_sub_history_",table2_names)]
}

splice(
Expand All @@ -257,7 +264,7 @@ table2 <- function(cohort, focus){
needs = c(as.list(paste0("make_model_input-",table2_names))),
moderately_sensitive = list(
table2 = glue("output/table2_{focus}_{cohort}.csv"),
table2_rounded = glue("output/table2_{focus}_{cohort}_rounded.csv")
table2_midpoint6 = glue("output/table2_{focus}_{cohort}_midpoint6.csv")
)
)
)
Expand All @@ -276,10 +283,10 @@ venn <- function(cohort){
run = "r:latest analysis/venn.R",
arguments = c(cohort),
needs = c(as.list(glue("preprocess_data_{cohort}")),
as.list(paste0(glue("make_model_input-cohort_{cohort}-main-"),venn_outcomes))),
as.list(paste0(glue("make_model_input-cohort_{cohort}-day0_main-"),venn_outcomes))),
moderately_sensitive = list(
venn = glue("output/venn_{cohort}.csv"),
venn_rounded = glue("output/venn_{cohort}_rounded.csv")
venn_midpoint6 = glue("output/venn_{cohort}_midpoint6.csv")
)
)
)
Expand Down Expand Up @@ -398,22 +405,7 @@ actions_list <- splice(
recursive = FALSE
)
),

## Replace suicide variable with data from death registry only ---------------
comment("Replace suicide variable with data from death registry only"),

action(
name = glue("replace_suicide"),
run = "r:latest analysis/replace_suicide.R",
needs = as.list(c(paste0("preprocess_data_",c("prevax_extf","vax","unvax_extf")),
paste0("stage1_data_cleaning_",c("prevax_extf","vax","unvax_extf")))),
highly_sensitive = list(
prevax_cohort = glue("output/input_prevax_extf_stage1_v1.rds"),
vax_cohort = glue("output/input_vax_stage1_v1.rds"),
unvax_cohort = glue("output/input_unvax_extf_stage1_v1.rds")
)
),


## Run models ----------------------------------------------------------------
comment("Run models"),

Expand Down Expand Up @@ -496,32 +488,22 @@ actions_list <- splice(
action(
name = "make_model_output",
run = "r:latest analysis/make_model_output.R",
needs = as.list(paste0("cox_ipw-",
setdiff(active_analyses$name,stata$name))),
needs = as.list(c(paste0("cox_ipw-",setdiff(active_analyses$name,stata$name)),
paste0("stata_cox_ipw-",stata$name))),
moderately_sensitive = list(
model_output = glue("output/model_output.csv"),
model_output_rounded = glue("output/model_output_rounded.csv")
)
),

action(
name = "make_stata_model_output",
run = "r:latest analysis/make_stata_model_output.R",
needs = as.list(paste0("stata_cox_ipw-",stata$name)),
moderately_sensitive = list(
stata_model_output = glue("output/stata_model_output.csv"),
stata_model_output_rounded = glue("output/stata_model_output_rounded.csv")
model_output_midpoint6 = glue("output/model_output_midpoint6.csv")
)
),

action(
name = "make_consort_output",
run = "r:latest analysis/make_other_output.R consort prevax_extf;vax;unvax_extf",
needs = list("stage1_data_cleaning_prevax_extf",
"stage1_data_cleaning_vax",
"stage1_data_cleaning_unvax_extf"),
moderately_sensitive = list(
consort_output_rounded = glue("output/consort_output_rounded.csv")
consort_output_midpoint6 = glue("output/consort_output_midpoint6.csv")
)
),

Expand All @@ -532,7 +514,7 @@ actions_list <- splice(
"table1_vax",
"table1_unvax_extf"),
moderately_sensitive = list(
table1_output_rounded = glue("output/table1_output_rounded.csv")
table1_output_midpoint6 = glue("output/table1_output_midpoint6.csv")
)
),

Expand All @@ -543,29 +525,29 @@ actions_list <- splice(
"extendedtable1_vax",
"extendedtable1_unvax_extf"),
moderately_sensitive = list(
table1_output_rounded = glue("output/extendedtable1_output_rounded.csv")
table1_output_midpoint6 = glue("output/extendedtable1_output_midpoint6.csv")
)
),

action(
name = "make_table2_severity_output",
run = "r:latest analysis/make_table2_severity_output.R",
run = "r:latest analysis/make_other_output.R table2_severity prevax_extf;vax;unvax_extf",
needs = list("table2_severity_prevax_extf",
"table2_severity_vax",
"table2_severity_unvax_extf"),
moderately_sensitive = list(
table2_output_rounded = glue("output/table2_severity_output_rounded.csv")
table2_output_midpoint6 = glue("output/table2_severity_output_midpoint6.csv")
)
),

action(
name = "make_table2_history_output",
run = "r:latest analysis/make_table2_history_output.R",
run = "r:latest analysis/make_other_output.R table2_history prevax_extf;vax;unvax_extf",
needs = list("table2_history_prevax_extf",
"table2_history_vax",
"table2_history_unvax_extf"),
moderately_sensitive = list(
table2_output_rounded = glue("output/table2_history_output_rounded.csv")
table2_output_midpoint6 = glue("output/table2_history_output_midpoint6.csv")
)
),

Expand All @@ -576,19 +558,19 @@ actions_list <- splice(
"venn_vax",
"venn_unvax_extf"),
moderately_sensitive = list(
venn_output_rounded = glue("output/venn_output_rounded.csv")
venn_output_midpoint6 = glue("output/venn_output_midpoint6.csv")
)
),

comment("Make absolute excess risk (AER) input"),

action(
name = "make_aer_input",
run = "r:latest analysis/make_aer_input.R",
needs = as.list(paste0("make_model_input-",active_analyses[grepl("-main-",active_analyses$name),]$name)),
run = "r:latest analysis/make_aer_input.R day0_main",
needs = as.list(paste0("make_model_input-",active_analyses[grepl("-day0_main-",active_analyses$name),]$name)),
moderately_sensitive = list(
aer_input = glue("output/aer_input-main.csv"),
aer_input_rounded = glue("output/aer_input-main-rounded.csv")
aer_input = glue("output/aer_input-day0_main.csv"),
aer_input_midpoint6 = glue("output/aer_input-day0_main-midpoint6.csv")
)
),

Expand Down
Loading

0 comments on commit a5b7823

Please sign in to comment.