Merge pull request #88 from opensafely/rerun_dereg

Prepare rerun of mental health in response to reviwer comments
opensafely · Apr 23, 2024 · a5b7823 · a5b7823
2 parents 016bddf + 316575e
commit a5b7823
Show file tree

Hide file tree

Showing 56 changed files with 14,842 additions and 17,648 deletions.
diff --git a/analysis/active_analyses.R b/analysis/active_analyses.R
@@ -45,12 +45,12 @@ cohorts <- c("vax","unvax_extf","prevax_extf")
 
 # Specify outcomes -------------------------------------------------------------
 
-## Outcomes for which we will RUN ALL analyses
+## Outcomes for which we will run ALL analyses
 
 outcomes_runall <- c("out_date_depression", 
                      "out_date_serious_mental_illness")
 
-## Outcomes for which we will RUN MAIN analyses only
+## Outcomes for which we will run MAIN analyses only
 
 outcomes_runmain <- c(outcomes_runall, 
                       "out_date_anxiety_general", 
@@ -484,10 +484,8 @@ for (c in cohorts) {
 
 # Add day 0 analyses -----------------------------------------------------------
 
-tmp <- df
-tmp$analysis <- paste0("day0_",tmp$analysis)
-tmp$cut_points <- gsub("28","1;28",tmp$cut_points)
-df <- rbind(df,tmp)
+df$analysis <- paste0("day0_",df$analysis)
+df$cut_points <- gsub("28","1;28",df$cut_points)
 
 # Assign unique name -----------------------------------------------------------
 

diff --git a/analysis/common_variables.py b/analysis/common_variables.py
@@ -36,7 +36,7 @@ def generate_common_variables(index_date_variable,exposure_end_date_variable,out
 
             ## COVID-19 
 
-                ### SGSS
+                ### SGSS
                 tmp_exp_date_covid19_confirmed_sgss=patients.with_test_result_in_sgss(
                     pathogen="SARS-CoV-2",
                     test_result="positive",
@@ -123,17 +123,6 @@ def generate_common_variables(index_date_variable,exposure_end_date_variable,out
                     f"{index_date_variable}",
                 ),
 
-            ## Deregistraton date
-                dereg_date=patients.date_deregistered_from_all_supported_practices( 
-                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
-                    date_format = 'YYYY-MM-DD',
-                    return_expectations={
-                    "date": {"earliest": study_dates["pandemic_start"], "latest": "today"},
-                    "rate": "uniform",
-                    "incidence": 0.01
-                    },
-                ),
-
         # Subgroup variables ----------------------------------------------------------------------
 
             ## COVID-19 severity
@@ -484,22 +473,7 @@ def generate_common_variables(index_date_variable,exposure_end_date_variable,out
 
             ## Suicide
 
-                ### SUS
-                tmp_out_date_suicide_hes=patients.admitted_to_hospital(
-                    returning="date_admitted",
-                    with_these_diagnoses=suicide_icd10,
-                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
-                    date_format="YYYY-MM-DD",
-                    find_first_match_in_period=True,
-                    return_expectations={
-                        "date": {"earliest": study_dates["pandemic_start"], "latest" : "today"},
-                        "rate": "uniform",
-                        "incidence": 0.1,
-                    },
-                ),
-
-                ### Death registry
-                tmp_out_date_suicide_death=patients.with_these_codes_on_death_certificate(
+               out_date_suicide=patients.with_these_codes_on_death_certificate(
                     suicide_icd10,
                     returning="date_of_death",
                     between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
@@ -512,11 +486,6 @@ def generate_common_variables(index_date_variable,exposure_end_date_variable,out
                     },
                 ),
 
-                ### Combined
-                out_date_suicide=patients.minimum_of(
-                    "tmp_out_date_suicide_hes", "tmp_out_date_suicide_death"
-                ),          
-
             ## Addiction
 
                 ### Primary care

diff --git a/analysis/cox_model.do b/analysis/cox_model.do
@@ -3,11 +3,10 @@
 local name "`1'"
 local day0 "`2'"
 
-/*
+
 // For local testing:
-local name "cohort_prevax_extf-day0_main-depression"
+local name "cohort_prevax_extf-day0_sub_sex_male-depression"
 local day0 "TRUE"
-*/
 
 * Set Ado file path
 
@@ -38,6 +37,11 @@ rename cov_cat_region region
 local prevax_cohort = regexm("`name'", "_pre")
 display "`prevax_cohort'"
 
+* Generate sex indicator
+
+local sub_sex = regexm("`name'", "sub_sex")
+display "`sub_sex'"
+
 * Replace NA with missing value that Stata recognises
 
 ds , has(type string)
@@ -154,10 +158,19 @@ tab time outcome_status
 di "Total follow-up in days: " fup_total
 bysort time: summarize(fup), detail
 
-stcox days* i.cov_cat_sex age_spline1 age_spline2, strata(region) vce(r)
-est store min, title(Age_Sex)
-stcox days* age_spline1 age_spline2 i.cov_cat_* cov_num_* cov_bin_*, strata(region) vce(r)
-est store max, title(Maximal)
+if `sub_sex'==1 {
+	stcox days* age_spline1 age_spline2, strata(region) vce(r)
+	est store min, title(Age_Sex)
+	stcox days* age_spline1 age_spline2 i.cov_cat_* cov_num_* cov_bin_*, strata(region) vce(r)
+	est store max, title(Maximal)
+}
+else {
+	stcox days* i.cov_cat_sex age_spline1 age_spline2, strata(region) vce(r)
+	est store min, title(Age_Sex)
+	stcox days* age_spline1 age_spline2 i.cov_cat_* cov_num_* cov_bin_*, strata(region) vce(r)
+	est store max, title(Maximal)	
+}
+
 
 estout * using "output/stata_model_output-`name'.txt", cells("b se t ci_l ci_u p") stats(risk N_fail N_sub N N_clust) replace 
 

diff --git a/analysis/create_project_actions.R b/analysis/create_project_actions.R
@@ -41,8 +41,6 @@ run_stata <- c("cohort_prevax_extf-day0_sub_covid_hospitalised-depression",
                "cohort_unvax_extf-day0_sub_ethnicity_other-depression",
                "cohort_vax-day0_sub_age_80_110-serious_mental_illness",
                "cohort_prevax_extf-day0_sub_history_recent-serious_mental_illness",
-               "cohort_vax-sub_covid_nonhospitalised-eating_disorders",
-               "cohort_vax-main-eating_disorders",
                "cohort_prevax_extf-day0_sub_covid_hospitalised-addiction",
                "cohort_prevax_extf-day0_sub_covid_hospitalised-anxiety_general",
                "cohort_prevax_extf-day0_sub_covid_hospitalised-eating_disorders",
@@ -53,7 +51,11 @@ run_stata <- c("cohort_prevax_extf-day0_sub_covid_hospitalised-depression",
                "cohort_unvax_extf-day0_sub_covid_hospitalised-anxiety_ptsd",
                "cohort_unvax_extf-day0_sub_covid_hospitalised-serious_mental_illness",
                "cohort_vax-day0_sub_covid_hospitalised-anxiety_general",
-               "cohort_vax-day0_sub_covid_hospitalised-depression")
+               "cohort_vax-day0_sub_covid_hospitalised-depression",
+               "cohort_prevax_extf-day0_main-anxiety_ptsd",
+               "cohort_prevax_extf-day0_sub_sex_male-depression",
+               "cohort_prevax_extf-day0_sub_history_notrecent-depression",
+               "cohort_prevax_extf-day0_sub_history_notrecent-serious_mental_illness")
 
 stata <- active_analyses[active_analyses$name %in% run_stata,]
 stata$save_analysis_ready <- TRUE
@@ -160,18 +162,23 @@ stage1_data_cleaning <- function(cohort){
       needs = list("vax_eligibility_inputs",glue("preprocess_data_{cohort}")),
       moderately_sensitive = list(
         consort = glue("output/consort_{cohort}.csv"),
-        consort_rounded = glue("output/consort_{cohort}_rounded.csv")
+        consort_midpoint6 = glue("output/consort_{cohort}_midpoint6.csv")
       ),
       highly_sensitive = list(
         cohort = glue("output/input_{cohort}_stage1.rds")
       )
     ),
     action(
-      name = glue("describe_stage1_data_cleaning_{cohort}"),
-      run = glue("r:latest analysis/describe_file.R input_{cohort}_stage1 rds"),
-      needs = list(glue("stage1_data_cleaning_{cohort}")),
+      name = glue("stage1_data_cleaning_v2_{cohort}"),
+      run = glue("r:latest analysis/stage1_data_cleaning_v2.R"),
+      arguments = c(cohort),
+      needs = list("vax_eligibility_inputs",glue("preprocess_data_{cohort}")),
       moderately_sensitive = list(
-        describe_model_input = glue("output/describe-input_{cohort}_stage1.txt")
+        consort = glue("output/consort_{cohort}_v2.csv"),
+        consort_midpoint6 = glue("output/consort_{cohort}_midpoint6_v2.csv")
+      ),
+      highly_sensitive = list(
+        cohort = glue("output/input_{cohort}_stage1_v2.rds")
       )
     )
   )
@@ -189,7 +196,7 @@ table1 <- function(cohort){
       needs = list(glue("stage1_data_cleaning_{cohort}")),
       moderately_sensitive = list(
         table1 = glue("output/table1_{cohort}.csv"),
-        table1_rounded = glue("output/table1_{cohort}_rounded.csv")
+        table1_midpoint6 = glue("output/table1_{cohort}_midpoint6.csv")
       )
     ),
     action(
@@ -199,7 +206,7 @@ table1 <- function(cohort){
       needs = list(glue("stage1_data_cleaning_{cohort}")),
       moderately_sensitive = list(
         extendedtable1 = glue("output/extendedtable1_{cohort}.csv"),
-        extendedtable1_rounded = glue("output/extendedtable1_{cohort}_rounded.csv")
+        extendedtable1_midpoint6 = glue("output/extendedtable1_{cohort}_midpoint6.csv")
       )
     )
   )
@@ -218,7 +225,7 @@ apply_model_function <- function(name, cohort, analysis, ipw, strata,
     action(
       name = glue("make_model_input-{name}"),
       run = glue("r:latest analysis/make_model_input.R {name}"),
-      needs = list("replace_suicide"),
+      needs = as.list(glue("stage1_data_cleaning_{cohort}")),
       highly_sensitive = list(
         model_input = glue("output/model_input-{name}.rds")
       )
@@ -241,11 +248,11 @@ table2 <- function(cohort, focus){
   table2_names <- gsub("out_date_","",unique(active_analyses[active_analyses$cohort=={cohort},]$name))
 
   if (focus=="severity") {
-    table2_names <- table2_names[grepl("-main-",table2_names) | grepl("-sub_covid_",table2_names)]
+    table2_names <- table2_names[grepl("-day0_main-",table2_names) | grepl("-day0_sub_covid_hospitalised",table2_names) | grepl("-day0_sub_covid_nonhospitalised",table2_names)]
   }
 
   if (focus=="history") {
-    table2_names <- table2_names[grepl("-sub_history_",table2_names)]
+    table2_names <- table2_names[grepl("-day0_sub_history_",table2_names)]
   }
 
   splice(
@@ -257,7 +264,7 @@ table2 <- function(cohort, focus){
       needs = c(as.list(paste0("make_model_input-",table2_names))),
       moderately_sensitive = list(
         table2 = glue("output/table2_{focus}_{cohort}.csv"),
-        table2_rounded = glue("output/table2_{focus}_{cohort}_rounded.csv")
+        table2_midpoint6 = glue("output/table2_{focus}_{cohort}_midpoint6.csv")
       )
     )
   )
@@ -276,10 +283,10 @@ venn <- function(cohort){
       run = "r:latest analysis/venn.R",
       arguments = c(cohort),
       needs = c(as.list(glue("preprocess_data_{cohort}")),
-                as.list(paste0(glue("make_model_input-cohort_{cohort}-main-"),venn_outcomes))),
+                as.list(paste0(glue("make_model_input-cohort_{cohort}-day0_main-"),venn_outcomes))),
       moderately_sensitive = list(
         venn = glue("output/venn_{cohort}.csv"),
-        venn_rounded = glue("output/venn_{cohort}_rounded.csv")
+        venn_midpoint6 = glue("output/venn_{cohort}_midpoint6.csv")
       )
     )
   )
@@ -398,22 +405,7 @@ actions_list <- splice(
            recursive = FALSE
     )
   ),
-
-  ## Replace suicide variable with data from death registry only ---------------
-  comment("Replace suicide variable with data from death registry only"),
-
-  action(
-    name = glue("replace_suicide"),
-    run = "r:latest analysis/replace_suicide.R",
-    needs = as.list(c(paste0("preprocess_data_",c("prevax_extf","vax","unvax_extf")),
-                      paste0("stage1_data_cleaning_",c("prevax_extf","vax","unvax_extf")))),
-    highly_sensitive = list(
-      prevax_cohort = glue("output/input_prevax_extf_stage1_v1.rds"),
-      vax_cohort = glue("output/input_vax_stage1_v1.rds"),
-      unvax_cohort = glue("output/input_unvax_extf_stage1_v1.rds")
-    )
-  ),
-
+
   ## Run models ----------------------------------------------------------------
   comment("Run models"),
 
@@ -496,32 +488,22 @@ actions_list <- splice(
   action(
     name = "make_model_output",
     run = "r:latest analysis/make_model_output.R",
-    needs = as.list(paste0("cox_ipw-",
-                           setdiff(active_analyses$name,stata$name))),
+    needs = as.list(c(paste0("cox_ipw-",setdiff(active_analyses$name,stata$name)),
+                    paste0("stata_cox_ipw-",stata$name))),
     moderately_sensitive = list(
       model_output = glue("output/model_output.csv"),
-      model_output_rounded = glue("output/model_output_rounded.csv")
-    )
-  ),
-
-  action(
-    name = "make_stata_model_output",
-    run = "r:latest analysis/make_stata_model_output.R",
-    needs = as.list(paste0("stata_cox_ipw-",stata$name)),
-    moderately_sensitive = list(
-      stata_model_output = glue("output/stata_model_output.csv"),
-      stata_model_output_rounded = glue("output/stata_model_output_rounded.csv")
+      model_output_midpoint6 = glue("output/model_output_midpoint6.csv")
     )
   ),
-  
+
   action(
     name = "make_consort_output",
     run = "r:latest analysis/make_other_output.R consort prevax_extf;vax;unvax_extf",
     needs = list("stage1_data_cleaning_prevax_extf",
                  "stage1_data_cleaning_vax",
                  "stage1_data_cleaning_unvax_extf"),
     moderately_sensitive = list(
-      consort_output_rounded = glue("output/consort_output_rounded.csv")
+      consort_output_midpoint6 = glue("output/consort_output_midpoint6.csv")
     )
   ),
 
@@ -532,7 +514,7 @@ actions_list <- splice(
                  "table1_vax",
                  "table1_unvax_extf"),
     moderately_sensitive = list(
-      table1_output_rounded = glue("output/table1_output_rounded.csv")
+      table1_output_midpoint6 = glue("output/table1_output_midpoint6.csv")
     )
   ),
 
@@ -543,29 +525,29 @@ actions_list <- splice(
                  "extendedtable1_vax",
                  "extendedtable1_unvax_extf"),
     moderately_sensitive = list(
-      table1_output_rounded = glue("output/extendedtable1_output_rounded.csv")
+      table1_output_midpoint6 = glue("output/extendedtable1_output_midpoint6.csv")
     )
   ),
 
   action(
     name = "make_table2_severity_output",
-    run = "r:latest analysis/make_table2_severity_output.R",
+    run = "r:latest analysis/make_other_output.R table2_severity prevax_extf;vax;unvax_extf",
     needs = list("table2_severity_prevax_extf",
                  "table2_severity_vax",
                  "table2_severity_unvax_extf"),
     moderately_sensitive = list(
-      table2_output_rounded = glue("output/table2_severity_output_rounded.csv")
+      table2_output_midpoint6 = glue("output/table2_severity_output_midpoint6.csv")
     )
   ),
 
   action(
     name = "make_table2_history_output",
-    run = "r:latest analysis/make_table2_history_output.R",
+    run = "r:latest analysis/make_other_output.R table2_history prevax_extf;vax;unvax_extf",
     needs = list("table2_history_prevax_extf",
                  "table2_history_vax",
                  "table2_history_unvax_extf"),
     moderately_sensitive = list(
-      table2_output_rounded = glue("output/table2_history_output_rounded.csv")
+      table2_output_midpoint6 = glue("output/table2_history_output_midpoint6.csv")
     )
   ),
 
@@ -576,19 +558,19 @@ actions_list <- splice(
                  "venn_vax",
                  "venn_unvax_extf"),
     moderately_sensitive = list(
-      venn_output_rounded = glue("output/venn_output_rounded.csv")
+      venn_output_midpoint6 = glue("output/venn_output_midpoint6.csv")
     )
   ),
 
   comment("Make absolute excess risk (AER) input"),
 
   action(
     name = "make_aer_input",
-    run = "r:latest analysis/make_aer_input.R",
-    needs = as.list(paste0("make_model_input-",active_analyses[grepl("-main-",active_analyses$name),]$name)),
+    run = "r:latest analysis/make_aer_input.R day0_main",
+    needs = as.list(paste0("make_model_input-",active_analyses[grepl("-day0_main-",active_analyses$name),]$name)),
     moderately_sensitive = list(
-      aer_input = glue("output/aer_input-main.csv"),
-      aer_input_rounded = glue("output/aer_input-main-rounded.csv")
+      aer_input = glue("output/aer_input-day0_main.csv"),
+      aer_input_midpoint6 = glue("output/aer_input-day0_main-midpoint6.csv")
     )
   ),