--- title: "Chilean prosecutor's office Data merge (Step 4)" date: "`r withr::with_locale(new = c('LC_TIME' = 'C'), code =format(Sys.time(),'%B %d, %Y'))`" output: distill::distill_article: code_folding: true fig_height: 6 fig_width: 8 theme: flatly toc: yes toc_depth: 5 toc_float: yes output_dir: "docs" toc_float: collapsed: false smooth_scroll: true --- ```{css zoom-lib-src, echo = FALSE} script src = "https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js" ``` ```{js zoom-jquery, echo = FALSE} $(document).ready(function() { $('body').prepend('
'); // onClick function for all plots (img's) $('img:not(.zoomImg)').click(function() { $('.zoomImg').attr('src', $(this).attr('src')).css({width: '100%'}); $('.zoomDiv').css({opacity: '1', width: 'auto', border: '1px solid white', borderRadius: '5px', position: 'fixed', top: '50%', left: '50%', marginRight: '-50%', transform: 'translate(-50%, -50%)', boxShadow: '0px 0px 50px #888888', zIndex: '50', overflow: 'auto', maxHeight: '100%'}); }); // onClick function for zoomImg $('img.zoomImg').click(function() { $('.zoomDiv').css({opacity: '0', width: '0%'}); }); }); ``` ```{css hideOutput-lib-src, echo = FALSE} ``` ```{js hideOutput, echo = FALSE} $(document).ready(function() { $chunks = $('.fold'); $chunks.each(function () { // add button to source code chunks if ( $(this).hasClass('s') ) { $('pre.r', this).prepend("
Show Source

"); $('pre.r', this).children('code').attr('class', 'folded'); } // add button to output chunks if ( $(this).hasClass('o') ) { $('pre:not(.r)', this).has('code').prepend("
Show Output

"); $('pre:not(.r)', this).children('code:not(r)').addClass('folded'); // add button to plots $(this).find('img').wrap('
');       
			$('pre.plot', this).prepend("
Show Plot

"); $('pre.plot', this).children('img').addClass('folded'); } }); // hide all chunks when document is loaded $('.folded').css('display', 'none') // function to toggle the visibility $('.showopt').click(function() { var label = $(this).html(); if (label.indexOf("Show") >= 0) { $(this).html(label.replace("Show", "Hide")); } else { $(this).html(label.replace("Hide", "Show")); } $(this).siblings('code, img').slideToggle('fast', 'swing'); }); }); ``` ```{=html} ``` ```{=html} ``` ```{r prev-setup, include = FALSE, cache=T, error=T} rm(list=ls());gc() #If you render multiple documents from the same script or R session, you should detach("Statamarkdown") in between documents. try(detach("Statamarkdown")) if(!grepl("4.1.2",R.version.string)){stop("Different version (must be 4.1.2)")} path<-getwd()#we define it again later in setup chunk if (grepl("CISS Fondecyt",path)==T){ try(setwd("C:/Users/CISS Fondecyt/Mi unidad/Alvacast/SISTRAT 2022 (github)"));load("C:/Users/CISS Fondecyt/Mi unidad/Alvacast/SISTRAT 2022 (github)/13.Rdata") } else if (grepl("andre",path)==T){ try(setwd('C:/Users/andre/Desktop/SUD_CL/'));load("E:/Mi unidad/Alvacast/SISTRAT 2022 (github)/13.Rdata") } else if (grepl("E:",path)==T){ try(setwd("E:/Mi unidad/Alvacast/SISTRAT 2022 (github)/SUD_CL/"));load("E:/Mi unidad/Alvacast/SISTRAT 2022 (github)/13.Rdata") } else { try(setwd(paste0(path)));load(paste0(gsub("SUD_CL","",gsub("2022","2019",path)),"/13.Rdata")) } ``` ```{r setup, include = FALSE, cache=T, error=T, echo=T} #Libraries used in the routine. Dont change the order local({r <- getOption("repos") r["CRAN"] <- "http://cran.r-project.org" options(repos=r) }) copiar_nombres <- function(x,row.names=FALSE,col.names=TRUE,dec=",",...) { if(class(try(dplyr::ungroup(x)))[1]=="tbl_df"){ if(options()$OutDec=="."){ options(OutDec = dec) write.table(format(data.frame(x)),"clipboard",sep="\t",row.names=FALSE,col.names=col.names,...) options(OutDec = ".") return(x) } else { options(OutDec = ",") write.table(format(data.frame(x)),"clipboard",sep="\t",row.names=FALSE,col.names=col.names,...) options(OutDec = ",") return(x) } } else { if(options()$OutDec=="."){ options(OutDec = dec) write.table(format(x),"clipboard",sep="\t",row.names=FALSE,col.names=col.names,...) options(OutDec = ".") return(x) } else { options(OutDec = ",") write.table(format(x),"clipboard",sep="\t",row.names=FALSE,col.names=col.names,...) options(OutDec = ",") return(x) } } } pacman::p_unlock(lib.loc = .libPaths()) #para no tener problemas reinstalando paquetes if(!require(pacman)){install.packages("pacman")} if(!require(devtools)){install.packages("devtools", type = "win.binary", dependencies=T)} pacman::p_load(APCtools, ggpattern, withr, boot, matrixStats, knitr, tidyr, stringi,stringr, ggplot2, Hmisc, kableExtra, plotly, janitor, rbokeh, zoo, broom, sqldf, devtools, codebook, data.table, panelr, RColorBrewer, lsmeans, finalfit, ggiraph, sf, treemapify, dplyr, tidyverse, epiR, survminer, ggfortify, survMisc, foreign, reshape2, stargazer, tableone, MatchIt, cobalt, eha, igraph, Amelia, DiagrammeR, DiagrammeRsvg, rsvg, mstate, htmltools, webshot, flexsurv, muhaz, Metrics, rpivotTable, caret, polycor, ClusterR, flextable, ggstatsplot, ggside, daff, explore, sjPlot, compareGroups, job, missForest, showtext, ggpattern, distill, showtext, googleVis, tidylog, magick, dlookr, easystats, tidylog, sqldf, adjustedCurves, ggpmisc, rms, install=T) #Error in if (options$noisey == TRUE) message(paste("\n", options$engine, : argument is of length zero if(!require(survcomp)){BiocManager::install("survcomp")} try(webshot::install_phantomjs()) if(!require(bpmn)){try(devtools::install_github("bergant/bpmn",upgrade ="never"))} #if(!require(Statamarkdown)){try(devtools::install_github("Hemken/Statamarkdown",upgrade ="never"))} # #Error in if (options$noisey == TRUE) message(paste("\n", options$engine, : # argumento tiene longitud cero # Calls: ... sew.list -> lapply -> FUN -> sew.character -> #easystats::install_suggested() options(scipen=2) #display numbers rather scientific number #remotes::install_github("chjackson/flexsurv-dev", upgrade = "never") #devtools::install_github("hputter/mstate", upgrade = "never") #:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#: #:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#: fitstats.flexsurvreg = function(x){ ll = x$loglik aic = x$AIC k = length(x$coefficients) n = sum(x$data$m["(weights)"]) aicc = aic + ((2 * k) * (k + 1) / (n - k - 1)) bic = - 2 * ll + (k * log(n)) data.frame( Df = k, "n2ll" = -2 * ll, AIC = aic, AICc = aicc, BIC = bic ) } #:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#: #:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#: if(.Platform$OS.type == "windows") withAutoprint({ memory.size() memory.size(TRUE) memory.limit() }) memory.limit(size=56000) path<-dirname(rstudioapi::getSourceEditorContext()$path) options(knitr.kable.NA = '') #:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#: #:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#: knitr::knit_hooks$set(time_it = local({ now <- NULL function(before, options) { if (before) { # record the current time before each chunk now <<- Sys.time() } else { # calculate the time difference after a chunk res <- ifelse(difftime(Sys.time(), now)>(60^2),difftime(Sys.time(), now)/(60^2),difftime(Sys.time(), now)/(60^1)) # return a character string to show the time x<-ifelse(difftime(Sys.time(), now)>(60^2),paste("Time for this code chunk to run:", round(res,1), "hours"),paste("Time for this code chunk to run:", round(res,1), "minutes")) paste('
', gsub('##', '\n', x),'
', sep = '\n') } } })) knitr::opts_chunk$set(time_it = TRUE) #:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#: #:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#: #to format rows in bold format_cells <- function(df, rows ,cols, value = c("italics", "bold", "strikethrough")){ # select the correct markup # one * for italics, two ** for bold map <- setNames(c("*", "**", "~~"), c("italics", "bold", "strikethrough")) markup <- map[value] for (r in rows){ for(c in cols){ # Make sure values are not factors df[[c]] <- as.character( df[[c]]) # Update formatting df[r, c] <- ifelse(nchar(df[r, c])==0,"",paste0(markup, gsub(" ", "", df[r, c]), markup)) } } return(df) } #To produce line breaks in messages and warnings knitr::knit_hooks$set( error = function(x, options) { paste('\n\n
', gsub('##', '\n', gsub('^##\ Error', '**Error**', x)), '
', sep = '\n') }, warning = function(x, options) { paste('\n\n
', gsub('##', '\n', gsub('^##\ Warning:', '**Warning**', x)), '
', sep = '\n') }, message = function(x, options) { paste('
', gsub('##', '\n', x), '
', sep = '\n') } ) ``` ::: blue - Rename variables according to studied names - We filtered relationships with a result of the proceeding with the value "FINAL JUDGMENT GUILTY" ("SENTENCIA DEFINITIVA CONDENATORIA") - In this section, we tried to identify the offenders of interest. - Also we explored the type of sentence by those that had a condemnatory sentence. - We corrected the coded types of offenses to include PREDATORY ACTS :::
# Group offenses We discussed the nature of offenses with several professionals in the judiciary system and expert lawyers. As a result, categorized offenses differently as shown in the following document. ::: controlly ```{r grouped-offenses1, echo=T, fig.align='center', message=T, error=T, eval=T} Agrupacion_de_delitos_Fiscalia_Final_26_10_2022_ <- readxl::read_excel("Agrupacion de delitos Fiscalia Final (26-10-2022).xlsx") Agrupacion_de_delitos_Fiscalia_Final_26_10_2022_ %>% knitr::kable("markdown") ``` ::: This new categorization was named as `familia_delito_rec_prof`. ```{r grouped-offenses2, echo=T, fig.align='center', message=T, error=T, eval=T} Base_fiscalia_v8<- Base_fiscalia_v8 %>% dplyr::left_join(Agrupacion_de_delitos_Fiscalia_Final_26_10_2022_[,1:5], by=c("gls_materia"="Delito")) %>% dplyr::mutate(familia_delito_rec_prof= dplyr::case_when(nchar(`Delitos relacionados al consumo de sustancias (drogas y alcohol)`)>0~ "Substance-related", nchar(`Delitos violentos`)>0~ "Violent", nchar(`Delitos adquisitivos`)>0~ "Acquisitive", nchar(`Otros delitos`)>0~ "Other", T~ NA_character_)) %>% purrr::when(dplyr::mutate(., tot_off = base::rowSums(dplyr::select(.,c(`Delitos relacionados al consumo de sustancias (drogas y alcohol)`,`Delitos violentos`, `Delitos adquisitivos`, `Otros delitos`))%>% dplyr::mutate_all(~nchar(.)>0,1,0), na.rm = T))%>% dplyr::filter(tot_off==0|tot_off>1) %>% nrow(.)>0 ~ stop("there are crimes with no category of more than one"), ~.) %>% dplyr::select(-`Delitos relacionados al consumo de sustancias (drogas y alcohol)`,-`Delitos violentos`, -`Delitos adquisitivos`, -`Otros delitos`) #dplyr::mutate(gls_materia_na=!is.na(gls_materia)) #janitor::tabyl(gls_materia_na,familia_delito_rec_prof) ```
# Filter cases We discarded relationships in which the subject considered an offender was not the patient from SENDA. Also we restricted the analysis to the patients that had a final judgment as guilty with a condemnatory sentence ("SENTENCIA DEFINITIVA CONDENATORIA"), compensation agreements ("ACUERDO REPARATORIO"), suspended finishing of the proceedings ("SUSPENSION CONDICIONAL DEL PROCEDIMIENTO") and definitive dismissal ("SOBRESEIMIENTO DEFINITIVO") provided they were nested to the Article 240 of the Civil Procedure Code. ```{r 1-filter, echo=T, fig.align='center',layout="l-body-outset", message=T, error=T, eval=T} Base_fiscalia_v9<- plyr::rename(Base_fiscalia_v8, c('rut_enc_saf' = 'id', 'fec_nacimiento' = 'dateofbirth', 'pais' = 'country', 'sexo' = 'sex', 'encontrado_como_victima' = 'victim', 'encontrado_como_imputado' = 'offender_d', 'tipo_sujeto_vic' = 'victim_type_n', 'gls_tipo_sujeto_vic' = 'victim_type_c', 'idsujeto_victima' = 'id_victim', 'reg' = 'reg', 'gls_region' = 'reg_c', 'idrelacion' = 'id_relac', 'ruc' = 'caseid', 'tipo_termino' = 'end_type_group', 'agrupa_terminos' = 'end_type', 'cod_delito' = 'crime_code', 'gls_materia' = 'crime_code_c', 'familia_delito' = 'crime_code_group', 'cod_mottermino' = 'end_type_2c', 'gls_mottermino' = 'end_type_2', 'impcod_tiposujeto' = 'offender_type', 'gls_tipo_imputado' = 'offender_type2', 'iddelito' = 'offence_type', 'fec_comision' = 'date_offending', 'marca_pena_44' = 's_sentence_1', 'marca_multa_45' = 's__fine_1', 'medida_alternativa_46' = 's_restorative_3', 'clasificacion_pena_47' = 's_kindprison_1', 'marca_pena_52' = 's_sentence_2', 'marca_multa_53' = 's__fine_2', 'medida_alternativa_54' = 's_restorative_2', 'clasificacion_pena_55' = 's_kindprison_2', 'imp_birth_date' = 'dateofbirth_imp', 'obs' = 'obs', 'edad_comision_imp' = 'age_offending_imp', 'edad_ter_rel_imp' = 'age_finish_rel_imp', 'familia_delito_rec' = 'crime_code_group_rec', 'familia_delito_rec_prof' = 'crime_code_group_rec_prof', 'sex_imp' = 'sex_imp', 'nat_imp2' = 'country_b', 'nat_imp' = 'country_a')) %>% #KEY STEP: FILTER IF THE PATIENT IS CONSIDERED AS AN OFFENDER dplyr::filter(grepl("SI",offender_d)) %>% #KEY STEP: FILTER IF THE PATIENT RECIEVES FOR THE RELATIONSHIP #end_type agrupa end_type_2 gls_mottermino dplyr::mutate(filter=dplyr::case_when( grepl("REPARATORIO|CONDICIONAL",end_type) & is.na(end_type_2)~1, grepl("REPARATORIO|SENTENCIA DEFINITIVA CONDENATORIA|240|MONIT", toupper(end_type_2), ignore.case=F)~2, T~0 )) %>% dplyr::filter(filter>0)%>% #FORMAT TO SQLDF dplyr::group_by(id) %>% dplyr::mutate(rn_id=dplyr::row_number()) %>% dplyr::mutate(obs=as.character(obs)) %>% dplyr::ungroup() %>% as.data.table() ``` ```{r 1-filter-post, echo=T, fig.align='center',layout="l-body-outset", message=T, error=T, eval=F} #para ver excluidos en el proceso #2013-05-12 plyr::rename(Base_fiscalia_v8, c('rut_enc_saf' = 'id', 'fec_nacimiento' = 'dateofbirth', 'pais' = 'country', 'sexo' = 'sex', 'encontrado_como_victima' = 'victim', 'encontrado_como_imputado' = 'offender_d', 'tipo_sujeto_vic' = 'victim_type_n', 'gls_tipo_sujeto_vic' = 'victim_type_c', 'idsujeto_victima' = 'id_victim', 'reg' = 'reg', 'gls_region' = 'reg_c', 'idrelacion' = 'id_relac', 'ruc' = 'caseid', 'tipo_termino' = 'end_type_group', 'agrupa_terminos' = 'end_type', 'cod_delito' = 'crime_code', 'gls_materia' = 'crime_code_c', 'familia_delito' = 'crime_code_group', 'cod_mottermino' = 'end_type_2c', 'gls_mottermino' = 'end_type_2', 'impcod_tiposujeto' = 'offender_type', 'gls_tipo_imputado' = 'offender_type2', 'iddelito' = 'offence_type', 'fec_comision' = 'date_offending', 'marca_pena_44' = 's_sentence_1', 'marca_multa_45' = 's__fine_1', 'medida_alternativa_46' = 's_restorative_3', 'clasificacion_pena_47' = 's_kindprison_1', 'marca_pena_52' = 's_sentence_2', 'marca_multa_53' = 's__fine_2', 'medida_alternativa_54' = 's_restorative_2', 'clasificacion_pena_55' = 's_kindprison_2', 'imp_birth_date' = 'dateofbirth_imp', 'obs' = 'obs', 'edad_comision_imp' = 'age_offending_imp', 'edad_ter_rel_imp' = 'age_finish_rel_imp', 'familia_delito_rec' = 'crime_code_group_rec', 'familia_delito_rec_prof' = 'crime_code_group_rec_prof', 'sex_imp' = 'sex_imp', 'nat_imp2' = 'country_b', 'nat_imp' = 'country_a')) %>% #KEY STEP: FILTER IF THE PATIENT IS CONSIDERED AS AN OFFENDER dplyr::filter(!grepl("SI",offender_d)) %>% dplyr::anti_join(distinct(Base_fiscalia_v9,id,.keep_all = T), by="id") %>% distinct(id) %>% nrow() plyr::rename(Base_fiscalia_v8, c('rut_enc_saf' = 'id', 'fec_nacimiento' = 'dateofbirth', 'pais' = 'country', 'sexo' = 'sex', 'encontrado_como_victima' = 'victim', 'encontrado_como_imputado' = 'offender_d', 'tipo_sujeto_vic' = 'victim_type_n', 'gls_tipo_sujeto_vic' = 'victim_type_c', 'idsujeto_victima' = 'id_victim', 'reg' = 'reg', 'gls_region' = 'reg_c', 'idrelacion' = 'id_relac', 'ruc' = 'caseid', 'tipo_termino' = 'end_type_group', 'agrupa_terminos' = 'end_type', 'cod_delito' = 'crime_code', 'gls_materia' = 'crime_code_c', 'familia_delito' = 'crime_code_group', 'cod_mottermino' = 'end_type_2c', 'gls_mottermino' = 'end_type_2', 'impcod_tiposujeto' = 'offender_type', 'gls_tipo_imputado' = 'offender_type2', 'iddelito' = 'offence_type', 'fec_comision' = 'date_offending', 'marca_pena_44' = 's_sentence_1', 'marca_multa_45' = 's__fine_1', 'medida_alternativa_46' = 's_restorative_3', 'clasificacion_pena_47' = 's_kindprison_1', 'marca_pena_52' = 's_sentence_2', 'marca_multa_53' = 's__fine_2', 'medida_alternativa_54' = 's_restorative_2', 'clasificacion_pena_55' = 's_kindprison_2', 'imp_birth_date' = 'dateofbirth_imp', 'obs' = 'obs', 'edad_comision_imp' = 'age_offending_imp', 'edad_ter_rel_imp' = 'age_finish_rel_imp', 'familia_delito_rec' = 'crime_code_group_rec', 'familia_delito_rec_prof' = 'crime_code_group_rec_prof', 'sex_imp' = 'sex_imp', 'nat_imp2' = 'country_b', 'nat_imp' = 'country_a')) %>% #KEY STEP: FILTER IF THE PATIENT IS CONSIDERED AS AN OFFENDER #dplyr::filter(!grepl("SI",offender_d)) %>% dplyr::mutate(filter=dplyr::case_when( grepl("REPARATORIO|CONDICIONAL",end_type) & is.na(end_type_2)~1, grepl("REPARATORIO|SENTENCIA DEFINITIVA CONDENATORIA|240|MONIT", toupper(end_type_2), ignore.case=F)~2, T~0 )) %>% dplyr::filter(filter==0|is.na(filter))%>% dplyr::anti_join(distinct(Base_fiscalia_v9,id,.keep_all = T), by="id") %>% distinct(id) %>% nrow() ```
We ended with `r nrow(Base_fiscalia_v9) %>% format(big.mark=",")` relationships of `r length(unique(Base_fiscalia_v9$caseid)) %>% format(big.mark=",")` court cases (`caseid`) of `r length(unique(Base_fiscalia_v9$id)) %>% format(big.mark=",")` patients. Then, we coded the crimes according to a classification discussed in formal consultations with professionals.
```{r 1-2-recode, echo=T, fig.align='center',layout="l-body-outset", message=T, error=T, eval=T} Base_fiscalia_v9<- Base_fiscalia_v9 %>% dplyr::mutate(s_kindprison_1=dplyr::case_when( s_kindprison_1== 'Presidio Mayor grado medio' ~ 'Major Prison medium grade', s_kindprison_1== 'Presidio Mayor grado máximo' ~ 'Major Prison maximum grade', s_kindprison_1== 'Presidio Mayor grado mínimo' ~ 'Major Prison minimum degree', s_kindprison_1== 'Presidio Menor grado medio' ~ 'Minor Prison medium grade', s_kindprison_1== 'Presidio Menor grado máximo' ~ 'Minor Prison maximum degree', s_kindprison_1== 'Presidio Menor grado mínimo' ~ 'Minimum term of imprisonment', s_kindprison_1== 'Presidio Perpetuo calificado' ~ 'Life imprisonment, qualified degree', s_kindprison_1== 'Presidio Perpetuo simple' ~ 'Simple life imprisonment', s_kindprison_1== 'Prisión' ~ 'Prison', T~ s_kindprison_1)) %>% #Added at 2022-11-11 #STATA= replace prision_fact=1 if s_sentence_1=="SI" & s_restorative_3!="Libertad Vigilada"& s_restorative_3!="Otra"& s_restorative_3!="Reclusión Nocturna"& s_restorative_3!="Remisión Condicional". dplyr::mutate(prision_fact= dplyr::case_when(s_sentence_1=="SI" & is.na(s_restorative_3)~1, T~0)) %>% purrr::when(nrow(.)-nrow(Base_fiscalia_v8)>0 ~ stop("more than one case by row"), ~.) # dplyr::select(id, caseid, victim, offender_d, victim_type_n, victim_type_c, id_victim, id_relac, reg, reg_c, end_type_group, end_type, crime_code, crime_code_c, crime_code_group, end_type_2c, end_type_2, offender_type, offender_type2, offence_type, date_offending, s_sentence_1, s__fine_1, s_restorative_3, s_kindprison_1, s_sentence_2, s__fine_2, s_restorative_2, s_kindprison_2, dateofbirth_imp, obs, date_offending_imp, age_offending_imp, crime_code_group_rec_prof, sex_imp, country_b, country_a) %>% # dplyr::mutate(dob_imp_num= dplyr::case_when(!is.na(dob_imp_num) & dob_imp_num!=fech_nac_num~ dob_imp_num, !is.na(dob_imp_num) & is.na(fech_nac_num)~ dob_imp_num, T~fech_nac_num)) %>% ``` # Explore values We saw the relationships by RUN(`id`) and RUC (`caseid`). ```{r 1-exp-val, echo=T, fig.align='center',layout="l-body-outset", message=T, error=T, eval=T} #- clasificacion_pena_47 #CHANGE GRADE FOR LENGTH concatenated_run_ruc_n_rel_n_offenses<- Base_fiscalia_v9 %>% dplyr::group_by(id, caseid) %>% dplyr::summarise(n=n(), n_distinct_offenses=n_distinct(crime_code_c)) #plot(cut2(concatenated_run_ruc_n_rel_n_offenses$n_distinct_offenses), main= "Number of different crimes committed within a case, by patients") #more than 1 different offenses concatenated_run_ruc_n_rel_n_offenses_f1<- Base_fiscalia_v9 %>% group_by(id, caseid) %>% dplyr::summarise(n=n(), n_distinct_offenses=n_distinct(crime_code_c)) %>% dplyr::filter(n_distinct_offenses>1) warning(paste0("There are ",length(unique(concatenated_run_ruc_n_rel_n_offenses_f1$caseid)), " rucs with more than one crime within a case")) plot(cut2(concatenated_run_ruc_n_rel_n_offenses$n), main= "Number of relationships within a case, by patients") ```
::: controlly ```{r 1b-exp-val, echo=T, fig.align='center',layout="l-body-outset", message=T, error=T, eval=T} concatenated_run_ruc_n_rel_n_offenses %>% dplyr::ungroup() %>% dplyr::mutate(n=cut2(n),n_distinct_offenses=cut2(n_distinct_offenses)) %>% dplyr::group_by(n, n_distinct_offenses) %>% count() %>% dplyr::ungroup() %>% dplyr::mutate(perc=round(nn/sum(nn)*100,2)) %>% knitr::kable("markdown", col.names = c("Number of relationships by RUC(caseid)", "Number of distinct offenses by RUC (caseid)", "n of relationships", "%")) ``` :::
Most cases had only one relationship, and nearly 95% had only one offense (even if the number of relationships is greater). We showed details.
::: controlly ```{r 2-exp-val, echo=T, fig.align='center', message=T, error=T, eval=T} Base_fiscalia_v9 %>% dplyr::filter(caseid %in% as.character(unlist(distinct(dplyr::filter(dplyr::ungroup(concatenated_run_ruc_n_rel_n_offenses),n>1),caseid)))) %>% dplyr::select(id, dateofbirth_imp, sex_imp, country_a, caseid, id_relac, id_victim, age_offending_imp, age_finish_rel_imp, crime_code_c, reg_c, end_type_2, s_sentence_1, s__fine_1, s_kindprison_1, s_sentence_2, s__fine_2, s_kindprison_2, prision_fact) %>% slice(1:40) %>% knitr::kable("markdown", caption="Glimpse of the database") ``` :::
# Join with SENDAs database We checked and resolved the inconsistencies in male users receiving women-only treatments after imputation.
::: controlly ```{r pre-join, echo=T, fig.align='center', message=T, error=F, eval=T} invisible("Exported database in 22-09-2022") #2022-11-01, added the age at discharge. CONS_C1_df_dup_SEP_2020$edad_al_egres <- #difftime(lubridate::ymd(CONS_C1_df_dup_SEP_2020$fech_egres_imp), lubridate::ymd(CONS_C1_df_dup_SEP_2020 $fech_nac))/365.25 lubridate::time_length(lubridate::interval(lubridate::ymd(CONS_C1_df_dup_SEP_2020 $fech_nac),lubridate::ymd(CONS_C1_df_dup_SEP_2020$fech_egres_imp)),unit="years") CONS_C1_df_dup_SEP_2020$edad_al_ing_fmt <- #lubridate::time_length(lubridate::ymd(CONS_C1_df_dup_SEP_2020$fech_ing), lubridate::ymd(CONS_C1_df_dup_SEP_2020 $fech_nac),"years") lubridate::time_length(lubridate::interval(lubridate::ymd(CONS_C1_df_dup_SEP_2020 $fech_nac),lubridate::ymd(CONS_C1_df_dup_SEP_2020$fech_ing)),unit="years") CONS_C1_df_dup_SEP_2020_22<- CONS_C1_df_dup_SEP_2020 %>% subset(select= c("hash_key", "fech_nac", "fech_ing", "fech_egres_imp", "dup", "ano_bd_first", "duplicates_filtered", "id_centro", "tipo_centro", "tipo_de_programa_2", "tipo_de_plan_2", "senda", "macrozona", "nombre_region", "comuna_residencia_cod", "escolaridad_rec", "estado_conyugal_2", "compromiso_biopsicosocial", "sexo_2", "edad_al_ing", "edad_al_ing_fmt", "edad_al_egres", "edad_ini_cons", "edad_ini_sus_prin" ,"edad_ini_sus_prin_grupos", "freq_cons_sus_prin", "via_adm_sus_prin_act", "sus_ini_2_mod", "sus_ini_3_mod", "sus_ini_mod", "con_quien_vive", "sus_principal_mod", "origen_ingreso_mod", "numero_de_hijos_mod", "tipo_de_vivienda_mod", "tenencia_de_la_vivienda_mod", "rubro_trabaja_mod", "cat_ocupacional", "estatus_ocupacional", "sus_ini_mod_mvv","cat_ocupacional_corr", "condicion_ocupacional_corr", "otras_sus1_mod", "otras_sus2_mod", "otras_sus3_mod", "fech_ing_num", "fech_egres_num", "motivodeegreso_mod_imp","motivoegreso_derivacion", "evaluacindelprocesoteraputico", paste0("tipo_de_plan_2_",1:10),paste0("motivodeegreso_mod_imp_",1:10), paste0("dias_treat_imp_sin_na_",1:10), "dg_trs_cons_sus_or", "dg_total_cie_10", "dg_cie_10_rec", "dg_total_dsm_iv", "dg_dsm_iv_rec", "cnt_diagnostico_trs_fisico", "diagnostico_trs_fisico", "dg_fis_anemia", "dg_fis_card", "dg_fis_in_study", "dg_fis_enf_som", "dg_fis_ets", "dg_fis_hep_alc", "dg_fis_hep_b", "dg_fis_hep_cro", "dg_fis_inf", "dg_fis_otr_cond_fis_ries_vit", "dg_fis_otr_cond_fis", "dg_fis_pat_buc", "dg_fis_pat_ges_intrau", "dg_fis_trau_sec", "otros_pr_sm_abu_sex", "otros_pr_sm_exp_com_sex", "otros_pr_sm_otros", "otros_pr_sm_vif")) %>% purrr::when(dplyr::filter(.,abs(edad_al_ing_fmt-edad_al_ing)>0.02) %>% nrow()>0 ~ stop("Age at admission was calculated differently"), ~.) %>% #2023-02-01 attention dplyr::mutate(edad_al_ing=edad_al_ing_fmt) %>% dplyr::mutate(comorbidity_icd_10=dplyr::case_when(dg_total_cie_10>=2~ "Two or more", dg_total_cie_10==1~ "One", as.character(dg_cie_10_rec)=="Diagnosis unknown (under study)"~"Diagnosis unknown (under study)", as.character(dg_cie_10_rec)=="Without psychiatric comorbidity"~"Without psychiatric comorbidity")) %>% dplyr::mutate(comorbidity_icd_10=as.factor(comorbidity_icd_10)) %>% dplyr::mutate(estatus_ocupacional= dplyr::case_when(!is.na(cat_ocupacional)&!is.na(estatus_ocupacional)~"Empleado", TRUE~as.character(estatus_ocupacional)))%>% dplyr::mutate(estatus_ocupacional= as.factor(estatus_ocupacional))%>% dplyr::mutate(cnt_mod_cie_10_dg_cons_sus_or= dplyr::case_when(as.character(dg_trs_cons_sus_or)== "Drug dependence"~ dg_total_cie_10+1, TRUE~dg_total_cie_10))%>% dplyr::mutate(freq_cons_sus_prin= dplyr::case_when(as.character(freq_cons_sus_prin)=="Did not use"~ "Less than 1 day a week", TRUE~as.character(freq_cons_sus_prin)))%>% dplyr::mutate(freq_cons_sus_prin= as.factor(freq_cons_sus_prin)) %>% dplyr::mutate(tipo_centro_pub= factor(dplyr::if_else(as.character(tipo_centro)=="Public",TRUE,FALSE,NA))) %>% dplyr::mutate(dg_trs_fis_rec= factor(dplyr::case_when(as.character(diagnostico_trs_fisico)=="En estudio"~"Diagnosis unknown (under study)",as.character(diagnostico_trs_fisico)=="Sin trastorno"~'Without physical comorbidity',cnt_diagnostico_trs_fisico>0 ~'With physical comorbidity', TRUE~NA_character_)))%>% dplyr::mutate(escolaridad_rec= readr::parse_factor(as.character(escolaridad_rec), levels=c('3-Completed primary school or less', '2-Completed high school or less', '1-More than high school'), ordered=T,trim_ws=T,include_na =F, locale=readr::locale(encoding = "Latin1"))) %>% dplyr::mutate(freq_cons_sus_prin= readr::parse_factor(as.character(freq_cons_sus_prin), levels=c('Did not use', 'Less than 1 day a week','2 to 3 days a week','4 to 6 days a week','1 day a week or more','Daily'), ordered =T,trim_ws=T,include_na =F, locale=readr::locale(encoding = "UTF-8"))) %>% dplyr::mutate(evaluacindelprocesoteraputico= dplyr::case_when(grepl("1",as.character(evaluacindelprocesoteraputico))~ '1-High Achievement',grepl("2",as.character(evaluacindelprocesoteraputico))~ '2-Medium Achievement',grepl("3",as.character(evaluacindelprocesoteraputico))~ '3-Minimum Achievement', TRUE~as.character(evaluacindelprocesoteraputico))) %>% dplyr::mutate(evaluacindelprocesoteraputico= readr::parse_factor(as.character(evaluacindelprocesoteraputico),levels=c('1-High Achievement', '2-Medium Achievement','3-Minimum Achievement'), ordered =T,trim_ws=T,include_na =F, locale=readr::locale(encoding = "UTF-8"))) %>% dplyr::mutate(tenencia_de_la_vivienda_mod= factor(dplyr::case_when(tenencia_de_la_vivienda_mod=="Allegado"~"Stays temporarily with a relative", tenencia_de_la_vivienda_mod=="Arrienda"~"Renting", tenencia_de_la_vivienda_mod=="Cedida"~"Owner/Transferred dwellings/Pays Dividends", tenencia_de_la_vivienda_mod=="Ocupación Irregular"~"Illegal Settlement", tenencia_de_la_vivienda_mod=="Otros"~"Others", tenencia_de_la_vivienda_mod=="Paga dividendo"~"Owner/Transferred dwellings/Pays Dividends", tenencia_de_la_vivienda_mod=="Propia"~"Owner/Transferred dwellings/Pays Dividends", T~NA_character_))) %>% dplyr::mutate(freq_cons_sus_prin=dplyr::case_when(freq_cons_sus_prin=="1 day a week or less"~"1 day a week or more",T~as.character(freq_cons_sus_prin))) %>% dplyr::mutate(freq_cons_sus_prin=ordered(freq_cons_sus_prin,levels=c("Did not use", "Less than 1 day a week", "1 day a week or more", "2 to 3 days a week","4 to 6 days a week", "Daily"))) %>% data.table::data.table() %>% dplyr::group_by(hash_key) %>% dplyr::mutate(rn_hash_discard=row_number())%>% dplyr::ungroup() %>% dplyr::mutate(fech_ing_num_discard=fech_ing_num, fech_egres_num_discard= fech_egres_num, fech_ing_discard= fech_ing, fech_egres_imp_discard=fech_egres_imp)%>% #WIDE tidyr::pivot_wider( names_from = rn_hash_discard, names_sep="_", values_from = c(fech_ing_num_discard, fech_egres_num_discard, edad_al_ing, edad_al_egres, fech_ing_discard, fech_egres_imp_discard))%>% #FILL COLUMNS BY PATIENT dplyr::group_by(hash_key)%>% dplyr::mutate_at(vars(fech_ing_num_discard_1:fech_egres_num_discard_10),~suppressWarnings(max(as.character(.),na.rm=T)))%>% dplyr::mutate_at(vars(edad_al_ing_1:edad_al_ing_10),~suppressWarnings(max(as.character(.),na.rm=T)))%>% #2022-11-01, added the age at discharge and the dates dplyr::mutate_at(vars(edad_al_egres_1:edad_al_egres_10),~suppressWarnings(max(as.character(.),na.rm=T)))%>% dplyr::mutate_at(vars(fech_ing_discard_1:fech_ing_discard_10),~suppressWarnings(max(as.character(.),na.rm=T)))%>% dplyr::mutate_at(vars(fech_egres_imp_discard_1:fech_egres_imp_discard_10),~suppressWarnings(max(as.character(.),na.rm=T)))%>% dplyr::ungroup() %>% purrr::when(nrow(.)>nrow(CONS_C1_df_dup_SEP_2020) ~ stop("More cases in the new database"), ~.) name_vec <- setNames(c(paste0("fech_ing_num_discard_",1:10), paste0("fech_egres_num_discard_",1:10), paste0("fech_ing_discard_",1:10), paste0("fech_egres_imp_discard_",1:10)), #names: c(paste0("fech_ing_num_",1:10), paste0("fech_egres_num_",1:10), paste0("fech_ing_",1:10), paste0("fech_egres_imp_",1:10))) # #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_# #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_ # #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_# #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_ # #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_ # Transform into numeric wide variables CONS_C1_df_dup_SEP_2020_22_b<- CONS_C1_df_dup_SEP_2020_22 %>% rename(!!!name_vec) %>% dplyr::mutate_at(vars(fech_ing_num_1:fech_ing_num_10),~suppressWarnings(as.numeric(.)))%>% dplyr::mutate_at(vars(fech_egres_num_1:fech_egres_num_10),~suppressWarnings(as.numeric(.)))%>% dplyr::mutate_at(vars(edad_al_ing_1:edad_al_ing_10),~suppressWarnings(as.numeric(.)))%>% dplyr::mutate_at(vars(edad_al_egres_1:edad_al_egres_10),~suppressWarnings(as.numeric(.)))%>% as.data.table()%>% purrr::when(nrow(.)>nrow(CONS_C1_df_dup_SEP_2020_22) ~ stop("More cases in the new database"), ~.) # #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_# # Previous join and resolution of inconsistencies #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_ # #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_ CONS_C1_df_dup_SEP_2020_22_c<- CONS_C1_df_dup_SEP_2020_22_b %>% dplyr::left_join(subset(Base_fiscalia_v9, rn_id==1,c("id","sex_imp","dateofbirth_imp")), by=c("hash_key"="id")) %>% # # #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_ # If there are inconsistencies in sex dplyr::mutate(sex= dplyr::case_when(!is.na(sex_imp) & sex_imp!=as.character(sexo_2)~ sex_imp, !is.na(sex_imp) & is.na(sexo_2)~ sex_imp, T~as.character(sexo_2))) %>% dplyr::mutate(tipo_de_plan_2= dplyr::case_when(sex=="Men" & tipo_de_plan_2=="M-PAB"~ "PG-PAB", sex=="Men" & tipo_de_plan_2=="M-PAI"~ "PG-PAI", sex=="Men" & tipo_de_plan_2=="M-PR"~ "PG-PR", T~ as.character(tipo_de_plan_2))) %>% dplyr::mutate(tipo_de_programa_2= dplyr::case_when(sex=="Men" & tipo_de_programa_2=="Women specific"~ "General population", T~ as.character(tipo_de_programa_2))) %>% dplyr::mutate_at(vars(tipo_de_plan_2_1:tipo_de_plan_2_10), ~suppressWarnings(dplyr::case_when(sex=="Men"& as.character(.)=="M-PAB"~ "PG-PAB", sex=="Men"& as.character(.)=="M-PAI"~ "PG-PAI", sex=="Men"& as.character(.)=="M-PR"~ "PG-PR", T~as.character(.))))%>% dplyr::select(hash_key, sex, everything()) %>% # # #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_ # If there are inconsistencies in the date of birth dplyr::mutate(fech_nac= lubridate::ymd(fech_nac)) %>% dplyr::mutate(dob_imp= lubridate::ymd(dateofbirth_imp)) %>% #to test if there are differences in the actual and past calculation of age at admission of SENDAs treatments only, greater than 0.002 dplyr::mutate(edad_al_ing_1_b= (fech_ing_num_1- as.numeric(lubridate::ymd(fech_nac)))/365.25) %>% purrr::when(dplyr::filter(.,abs(edad_al_ing_1-edad_al_ing_1_b)>0.02) %>% nrow()>0 ~ stop("Age at admission was calculated differently"), ~.) %>% # #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_ # replace year at admission dplyr::mutate(edad_al_ing_imp= dplyr::case_when(!is.na(dob_imp) & fech_nac!= dob_imp~ difftime(fech_ing, dob_imp)/365.25, !is.na(dob_imp) & is.na(fech_nac)~ (fech_ing- dob_imp)/365.25, T~ as.numeric(edad_al_ing_1)))%>% # replace year at discharge (2022-11-01) dplyr::mutate(edad_al_egres_imp= dplyr::case_when(!is.na(dob_imp) & fech_nac!= dob_imp~ lubridate::time_length(lubridate::interval(dob_imp, fech_egres_imp),unit="years"), !is.na(dob_imp) & is.na(fech_nac)~ lubridate::time_length(lubridate::interval(dob_imp, fech_egres_imp),unit="years"), T~ as.numeric(edad_al_egres_1)))%>% dplyr::select(-sex_imp, -sexo_2, -edad_al_ing_1_b, -edad_al_ing_imp) %>% #2022-11-01 dplyr::mutate(fech_nac_rec=dplyr::case_when(!is.na(dob_imp) & fech_nac!= dob_imp~ dob_imp, !is.na(dob_imp) & is.na(fech_nac)~ dob_imp, T~ fech_nac)) # Age at admission for each treatment: if the date of birth (PO) is not empty and the date of birth of senda is different of date of birth, for each age at admission (for different admissions), we compute the difference of the date of admission (at each admission) with the date of birth (PO) and divided by years; if the date of birth (PO) is not empty but SENDA is empty, we compute the difference of the date of admission (at each admission) with the date of birth (PO) and divided by years; else will be taken from the date at admission from the date of birth of SENDA. #The same for age at discharge #2022-11-25 CONS_C1_df_dup_SEP_2020_22_d <- CONS_C1_df_dup_SEP_2020_22_c %>% group_by(hash_key) %>% slice_min(fech_egres_num) %>% ungroup() for (i in 1:10) { yr<- 365.25 column_name <- paste0("fech_ing_",1:10)[i] column_name2 <- paste0("fech_egres_imp_",1:10)[i] log_column_name <- paste0("edad_al_ing_",1:10)[i] log_column_name2 <- paste0("edad_al_egres_",1:10)[i] CONS_C1_df_dup_SEP_2020_22_d <- CONS_C1_df_dup_SEP_2020_22_d %>% #age at admission dplyr::mutate(!!log_column_name := lubridate::time_length(lubridate::interval(fech_nac_rec, !!rlang::sym(column_name)), unit="years")) %>% #age at discharge dplyr::mutate(!!log_column_name2 := lubridate::time_length(lubridate::interval(fech_nac_rec, !!rlang::sym(column_name2)), unit="years")) %>% as.data.table()%>% purrr::when(nrow(.)>nrow(CONS_C1_df_dup_SEP_2020_22) ~ stop("More cases in the new database"), ~.) } # #_#_#_#_#_#_#_#_#_#_#_ alternativas # #set iter # digits <- paste0("fech_ing_",1:10) # j <- 0 # digits2 <- paste0("fech_egres_imp_",1:10) # f <- 0 # CONS_C1_df_dup_SEP_2020_22_d<- # CONS_C1_df_dup_SEP_2020_22_c %>% # dplyr::mutate_at(vars(paste0("edad_al_ing_",1:10)), ~dplyr::case_when(!is.na(dob_imp) & fech_nac!= dob_imp~ difftime( !! rlang::sym(digits[j<<- j+1]), dob_imp)/yr, !is.na(dob_imp) & is.na(fech_nac_num)~ difftime( !! rlang::sym(digits[j<<- j+1]), dob_imp)/yr, T~ difftime(as.numeric(.),fech_nac)/yr))%>% # # replace year at discharge (2022-11-01) # dplyr::mutate_at(vars(paste0("edad_al_egres_",1:10)), ~dplyr::case_when(!is.na(dob_imp) & fech_nac!= dob_imp~ # difftime( !! rlang::sym(digits2[f<<- f+1]), dob_imp)/yr, # !is.na(dob_imp) & is.na(fech_nac_num)~ difftime( !! rlang::sym(digits2[f<<- f+1]), dob_imp)/yr, T~ difftime(as.numeric(.),fech_nac)/yr)) # invisible("La edad está bien definida en términos relativos (entre ingresos), pero no absolutos (manualmente no llego a la misma edad calculando manualmente ), tiene una diferencia de 0,5 años") ``` ::: Once the standardization was complete, we **joined** the databases `CONS_C1_df_dup_SEP_2020_22_d` and `Base_fiscalia_v9` into a single one. The master database was SENDAs treatments, and we only used records of POs database which had an age of offending equal or posterior to the age of admission to treatment. If there were no records of PO, the age of offense were replaced with the age at censorship (2019-11-13). However, we excluded Referrals, Deaths (1 case) and Censored cases because of missing data.
::: controlly ```{r join1, echo=T, fig.align='center', message=T, error=T, eval=T} # #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_ #join # #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_ #WHERE rn_id = 1 #2023-05-26 save(CONS_C1_df_dup_SEP_2020_22_d, Base_fiscalia_v9, file = "data_acc_ser_23.RData") Base_fiscalia_v10<- sqldf("SELECT * FROM CONS_C1_df_dup_SEP_2020_22_d AS x LEFT JOIN (SELECT * FROM Base_fiscalia_v9 ) AS y ON x.hash_key == y.id AND x. edad_al_egres_imp <= y.age_offending_imp AND x.dup = 1") #2022-11-25 added dup #183307 invisible("It might be that those discharged not necessarily were, because this date corresponded to a referral") paste0("Observations of SENDA database: ",nrow(CONS_C1_df_dup_SEP_2020_22_d))#109756) paste0("Observations of PO database: ", nrow(Base_fiscalia_v10))#204,115 nrow Base_fiscalia_v11<- Base_fiscalia_v10 %>% #discrepancies in names of variables janitor::clean_names() %>% #janitor::tabyl(!is.na(dob_imp_num)) #previously recoded, dplyr::select(-sex_2, -dateofbirth_imp, -country, -victim, -id_victim, -crime_code_c , -reg_c, -end_type_2c, -cod_comunadelito, -cod_lugarocurrencia, -sex_imp, -region_delito, -filter, -id)%>% plyr::rename(c("dateofbirth_imp_2"="dateofbirth_imp")) %>% dplyr::ungroup() %>% #_#_#_#_#_#_#_#_ #generates errors with survival setting #make censorship date of age of comission purrr::when(dplyr::filter(., is.na(fech_nac_rec)) %>% nrow() >7 ~ stop("Missing values in the age"), ~.) %>% dplyr::mutate(age_offending_imp= dplyr::case_when(is.na(age_offending_imp)~ lubridate::time_length(lubridate::interval(fech_nac_rec, as.Date("2019-11-13")),unit="years"), T~ age_offending_imp)) %>% dplyr::group_by(hash_key) %>% #KEY STEP: select the first and with ties (more than one) --> 2023-04-14, was discussed, but did not had consecquences (See https://docs.google.com/document/d/1UvtQFM3ToazUyA6G9C7pBYMgk98n31zQSY2-M9d1nEo/edit#) dplyr::slice_min(age_offending_imp, n = 1, with_ties = T) %>% dplyr::ungroup() %>% purrr::when(nrow(dplyr::filter(.,age_offending_imp-edad_al_ing_1<0))>0 ~ stop("Cases with negative time after admission to commission of crime"), ~.) %>% dplyr::mutate(motivodeegreso_mod_imp_rec= dplyr::case_when(grepl("Therapeutic",motivodeegreso_mod_imp)~ "Treatment completion", grepl("Early|Late|Administrative", motivodeegreso_mod_imp) & (fech_egres_num_1-fech_ing_num_1 <90) ~ "Treatment non-completion (Early)", grepl("Early|Late|Administrative", motivodeegreso_mod_imp) & (fech_egres_num_1-fech_ing_num_1 >=90) ~ "Treatment non-completion (Late)", grepl("Referral|Death|Ongoing", motivodeegreso_mod_imp)~ "Censored", T~NA_character_)) warning(paste0("There are ",nrow(dplyr::group_by(Base_fiscalia_v11, hash_key) %>% dplyr::mutate(rn_hash=row_number()) %>% dplyr::filter(rn_hash>1))," cases with more than one offense commited at the youngest age (p= ",dplyr::group_by(Base_fiscalia_v11, hash_key) %>% dplyr::mutate(rn_hash=row_number()) %>% dplyr::filter(rn_hash>1) %>% dplyr::distinct(hash_key) %>% nrow(),")")) warning(paste0("There are ",nrow(dplyr::filter(Base_fiscalia_v11,is.na(fech_nac_rec)))," missing cases in date of birth (were ",nrow(dplyr::filter(janitor::clean_names(Base_fiscalia_v10),is.na(fech_nac_rec)))," in Base_fiscalia_v10)")) warning(paste0("There are ", scales::percent(as.numeric(table(is.na(Base_fiscalia_v11$crime_code_group_rec))[[2]])/nrow(Base_fiscalia_v11)), " observations with events of contacts with justice")) # 2022-11-01, filter Base_fiscalia_v12<- dplyr::filter(Base_fiscalia_v11, !grepl("Referral|Death|Censored|Ongoing",motivodeegreso_mod_imp)) invisible("Ver por qué los valores negativos-R: por que entre la admisión y terminar el tratamiento hay casos que registraron un delito") invisible("ver por qué no recodifica a los motivo de egresos de manera iterativa") invisible("QUEDA POR LIMPIAR LA BASE DE FISCALIA") invisible("Qué hacer con Death, Referral to another treatment, Ongoing treatment") Base_fiscalia_v12 %>% dplyr::select(hash_key, fech_nac, fech_nac_rec, motivodeegreso_mod_imp_rec, age_offending_imp, edad_al_ing_1, edad_al_egres_1, edad_comision, fec_comision_simple, caseid, crime_code_group_rec_prof, end_type_2, gls_proctermino, s_sentence_1, s_restorative_3, s_kindprison_1) %>% dplyr::filter(hash_key=="1951c4d080cda8b68759d638a840d82f") %>% knitr::kable("markdown", caption="Example of database") #b24908c527faa1b7bd5a267d5dcabd45 #23d88c2b8c6da2d8abf3f88b7ce8a4c0 anomalus case #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_ #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_ # FROM PATIENTS (p=85,048), WE JOINED THE PO DATA (n= 174,961, p=49,970) by HASHs and where offense date is equal or lower to the age at discharge from treatment, and restricted the first treatment from SENDA in v10 #v11: erased missing cases in date of birth; thus, missing age at discharge, filtered the first offense by each HASH (going from n= 132,530 to n=87,770) (n=87,770 p= 85,041) #length(unique(Base_fiscalia_v11$hash_key)) #v12: discarded patients where the first treatment corresponded to ongoing treatments (p & n=5,521), external referral (p & n=8,948) == 14,469 users were discarded. ``` ::: As a result of the dropping of Censored treatments (never ended), Referrals and deaths, we started with `r format(length(unique(Base_fiscalia_v11$hash_key)), big.mark=",")` users, but ended `r format(length(unique(Base_fiscalia_v12$hash_key)), big.mark=",")`.
::: controlly ```{r join1b, echo=T, fig.align='center', message=T, error=T, eval=T} # #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_ #join # #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_ #WHERE rn_id = 1 prision_fact Base_fiscalia_v10_pris<- sqldf("SELECT * FROM CONS_C1_df_dup_SEP_2020_22_d AS x LEFT JOIN (SELECT * FROM Base_fiscalia_v9 ) AS y ON x.hash_key == y.id AND x. edad_al_egres_imp <= y.age_offending_imp AND y.prision_fact = '1' AND x.dup = 1") #2022-11-25 added dup invisible("It might be that those discharged not necessarily were, because this date corresponded to a referral") paste0("Observations of SENDA database: ",nrow(CONS_C1_df_dup_SEP_2020_22_d))#109756) paste0("Observations of PO database: ", nrow(Base_fiscalia_v10_pris))#204,115 nrow Base_fiscalia_v11_pris<- Base_fiscalia_v10_pris %>% #discrepancies in names of variables janitor::clean_names() %>% #janitor::tabyl(!is.na(dob_imp_num)) #previously recoded, dplyr::select(-sex_2, -dateofbirth_imp, -country, -victim, -id_victim, -crime_code_c , -reg_c, -end_type_2c, -cod_comunadelito, -cod_lugarocurrencia, -sex_imp, -region_delito, -filter, -id)%>% plyr::rename(c("dateofbirth_imp_2"="dateofbirth_imp")) %>% dplyr::ungroup() %>% #_#_#_#_#_#_#_#_ #generates errors with survival setting #make censorship date of age of comission purrr::when(dplyr::filter(., is.na(fech_nac_rec)) %>% nrow() >7 ~ stop("Missing values in the age"), ~.) %>% dplyr::mutate(age_offending_imp= dplyr::case_when(is.na(prision_fact)~ lubridate::time_length(lubridate::interval(fech_nac_rec, as.Date("2019-11-13")),unit="years"), T~ age_offending_imp)) %>% dplyr::group_by(hash_key) %>% #select the first and without ties (only one) dplyr::slice_min(age_offending_imp, n = 1, with_ties = T) %>% dplyr::ungroup() %>% purrr::when(nrow(dplyr::filter(.,age_offending_imp-edad_al_ing_1<0))>0 ~ stop("Cases with negaative time after admission to commission of crime"), ~.) %>% dplyr::mutate(motivodeegreso_mod_imp_rec= dplyr::case_when(grepl("Therapeutic",motivodeegreso_mod_imp)~ "Treatment completion", grepl("Early|Late|Administrative", motivodeegreso_mod_imp) & (fech_egres_num_1-fech_ing_num_1 <90) ~ "Treatment non-completion (Early)", grepl("Early|Late|Administrative", motivodeegreso_mod_imp) & (fech_egres_num_1-fech_ing_num_1 >=90) ~ "Treatment non-completion (Late)", grepl("Referral|Death|Ongoing", motivodeegreso_mod_imp)~ "Censored", T~NA_character_)) warning(paste0("There are ",nrow(dplyr::filter(Base_fiscalia_v11_pris,is.na(fech_nac_rec)))," missing cases in date of birth (were ",nrow(dplyr::filter(janitor::clean_names(Base_fiscalia_v10_pris),is.na(fech_nac_rec)))," in Base_fiscalia_v10_pris)")) warning(paste0("There are ", scales::percent(as.numeric(table(Base_fiscalia_v11_pris$prision_fact)/nrow(Base_fiscalia_v11_pris))), " observations with events of imprisonment")) # 2022-11-11, filter Base_fiscalia_v12_pris<- dplyr::filter(Base_fiscalia_v11_pris, !grepl("Referral|Death|Censored|Ongoing",motivodeegreso_mod_imp)) Base_fiscalia_v12_pris %>% dplyr::select(hash_key, fech_nac, fech_nac_rec, motivodeegreso_mod_imp_rec, age_offending_imp, edad_al_ing_1, edad_al_egres_1, edad_comision, fec_comision_simple, caseid, crime_code_group_rec_prof, end_type_2, gls_proctermino, s_sentence_1, s_restorative_3, s_kindprison_1,prision_fact) %>% dplyr::filter(hash_key=="1951c4d080cda8b68759d638a840d82f") %>% knitr::kable("markdown", caption="Example of database") ``` ::: ```{r join2, echo=T, fig.align='center', message=T, error=T, eval=T} Base_fiscalia_v12 %>% dplyr::select(hash_key, fech_nac, fech_nac_rec, motivodeegreso_mod_imp_rec, age_offending_imp, edad_al_ing_1, edad_al_egres_1, caseid, crime_code_group_rec_prof, end_type_2, gls_proctermino, s_sentence_1, s_restorative_3, s_kindprison_1) %>% dplyr::group_by(hash_key) %>% count() %>% dplyr::filter(n>1) %>% dplyr::ungroup() %>% dplyr::summarise(total= n(),max= max(n), min= min(n), mean=round(mean(n),2), p025= quantile(n, .025), p25= quantile(n, .25), median= quantile(n, .5), p75= quantile(n, .75), p975= quantile(n, .975)) %>% data.frame() %>% kable("markdown", caption= "Summary descvriptive of users with more than one closest offense committed after the first admission (in number of relationships)") ``` ```{r join2b, echo=T, fig.align='center', message=T, error=T, eval=T} Base_fiscalia_v12_pris %>% dplyr::select(hash_key, fech_nac, fech_nac_rec, motivodeegreso_mod_imp_rec, age_offending_imp, edad_al_ing_1, caseid, crime_code_group_rec_prof, end_type_2, gls_proctermino, s_sentence_1, s_restorative_3, s_kindprison_1) %>% dplyr::group_by(hash_key) %>% count() %>% dplyr::filter(n>1) %>% dplyr::ungroup() %>% dplyr::summarise(total= n(),max= max(n), min= min(n), mean=round(mean(n),2), p025= quantile(n, .025), p25= quantile(n, .25), median= quantile(n, .5), p75= quantile(n, .75), p975= quantile(n, .975)) %>% data.frame() %>% kable("markdown", caption= "Summary descriptives of users with more than one closest offense committed after the first admission that ended in imprisonment (in number of relationships)") ```
We took `Base_fiscalia_v9` (only offenders that had a final judgment as guilty) and joined with SENDA data but only based on records before the first treatment discharge date (`Base_fiscalia_v10b`). From them, we selected the first relationship of each combination of HASH KEY, judiciary case & type of crime (`hash_key`-`caseid`-`crime_code_group_rec_prof`), and counted the number of offenses depending and dichotomized it into the presence of a record or not (Pre-treatment criminality or `n_prev_off`). We also did the same thing for Acquisitive (`n_off_acq`), violent (`n_off_vio`), substance use-related (`n_off_sud`) and other (`n_off_oth`) offenses. We replicated these actions with the database of imprisonments only.
```{r join3, echo=T, fig.align='center', message=T, error=T, eval=T} Base_fiscalia_v10b<- sqldf("SELECT * FROM CONS_C1_df_dup_SEP_2020_22_d AS x LEFT JOIN (SELECT * FROM Base_fiscalia_v9 ) AS y ON x.hash_key == y.id AND x.edad_al_egres_imp > y.age_offending_imp AND x.dup = 1") #2022-11-25 added dup // #changed the direction to past events, where age at discharge is greater than the age of commission Base_fiscalia_v11b<- Base_fiscalia_v10b %>% #discrepancies in names of variables janitor::clean_names() %>% #janitor::tabyl(!is.na(dob_imp_num)) #previously recoded, dplyr::select(-dateofbirth_imp, -country, -victim, -id_victim, -crime_code_c , -reg_c, -end_type_2c, -cod_comunadelito, -cod_lugarocurrencia, -sex_imp, -region_delito, -filter, -id)%>% plyr::rename(c("dateofbirth_imp_2"="dateofbirth_imp")) %>% dplyr::ungroup() %>% #selected the first row with distinct information regarding patient ID, case ID, crime code. dplyr::group_by(hash_key, caseid, crime_code_group_rec_prof) %>% dplyr::slice(1) %>% dplyr::ungroup() %>% dplyr::group_by(hash_key) %>% summarise(n_off_acq= ifelse(sum(crime_code_group_rec_prof=="Acquisitive", na.rm=T)>0, 1,0), n_off_vio= ifelse(sum(crime_code_group_rec_prof=="Violent", na.rm=T)>0, 1,0), n_off_sud= ifelse(sum(crime_code_group_rec_prof== "Substance-related", na.rm=T)>0, 1,0), n_off_oth= ifelse(sum(crime_code_group_rec_prof== "Other", na.rm=T)>0, 1,0)) %>% dplyr::ungroup() %>% dplyr::mutate(n_prev_off= rowSums(select(., starts_with("n_")))) warning(paste0("Users in the database of previous crimes: ",format(length(unique(Base_fiscalia_v11b$hash_key)), big.mark=","))) warning(paste0("Users in the merged database (after filtering for observations coded as referrals, deaths, censored at baseline treatment or with ongoing treatments) : ",format(length(unique(Base_fiscalia_v12$hash_key)), big.mark=","))) # Pre-treatment Criminality Base_fiscalia_v13<- Base_fiscalia_v12 %>% dplyr::group_by(hash_key) %>% #select the first and without ties (only one) dplyr::slice_min(age_offending_imp, n = 1, with_ties = F) %>% dplyr::ungroup() %>% dplyr::inner_join(Base_fiscalia_v11b, by="hash_key") %>% #to see #dplyr::select(hash_key, fech_nac_rec, n_off_acq, ) dplyr::mutate(policonsumo= ifelse(!is.na(otras_sus1_mod),1,0)) %>% dplyr::mutate(cut_fec_nac=cut2(fech_nac_rec, cuts=as.Date(attr(dlookr::binning(as.numeric(fech_nac_rec)),"breaks"))),cut_com_del=cut2(fec_comision_simple, cuts=as.Date(attr(dlookr::binning(as.numeric(fec_comision_simple)),"breaks")))) %>% dplyr::mutate(tr_modality=dplyr::case_when(grepl("PR", as.character(tipo_de_plan_2_1))~ "Residential", grepl("PAI|PAB", as.character(tipo_de_plan_2_1))~ "Ambulatory", T~ NA_character_)) %>% dplyr::mutate(time_to_off_from_adm=age_offending_imp-edad_al_egres_imp) %>% dplyr::mutate(time_to_off_from_disch=age_offending_imp-edad_al_egres_imp) %>% as.data.table()%>% purrr::when(nrow(.)>nrow(Base_fiscalia_v12) ~ stop("More cases in the new database"), ~.) #length(unique(Base_fiscalia_v13$hash_key)) warning(paste0("Number of cases that are different by at least 0,02 years between 'edad_al_egres_imp' & 'edad_al_egres_1'= ", nrow(cbind.data.frame(round(Base_fiscalia_v13$edad_al_egres_imp,4),round(Base_fiscalia_v13$edad_al_egres_1,4)) %>% dplyr::filter(abs(.[[1]]-.[[2]])>0.02))," probably due to the discrepancies in getting the differences of dates")) ``` ```{r join3b, echo=T, fig.align='center', message=T, error=T, eval=T} invisible("Still I dont know how to make this, because there were different dates, this criteria is more stringent, so we may discard more observations that could fullfill the conditions of previous crime. It would be more easy if we only took crimes that ended in imprisonment") Base_fiscalia_v12_pris_sel<- Base_fiscalia_v12_pris %>% dplyr::group_by(hash_key) %>% #select the first and without ties (only one) dplyr::slice_min(edad_al_egres_imp, n = 1, with_ties = F) %>% dplyr::ungroup() %>% dplyr::select(hash_key, edad_al_egres_imp) #obtained crimes that were committed only before the matched crime age Base_fiscalia_v10b_pris<- sqldf("SELECT * FROM Base_fiscalia_v12_pris_sel AS x LEFT JOIN (SELECT * FROM Base_fiscalia_v9 ) AS y ON x.hash_key == y.id AND x.edad_al_egres_imp > y.age_offending_imp") Base_fiscalia_v11b_pris<- Base_fiscalia_v10b_pris %>% #discrepancies in names of variables janitor::clean_names() %>% #janitor::tabyl(!is.na(dob_imp_num)) #previously recoded, dplyr::select(-dateofbirth_imp, -country, -victim, -id_victim, -crime_code_c , -reg_c, -end_type_2c, -cod_comunadelito, -cod_lugarocurrencia, -sex_imp, -region_delito, -filter, -id)%>% dplyr::ungroup() %>% #selected the first row with distinct information regarding patient ID, case ID, crime code. dplyr::group_by(hash_key, caseid, crime_code_group_rec_prof) %>% dplyr::slice(1) %>% dplyr::ungroup() %>% dplyr::group_by(hash_key) %>% summarise(n_off_acq= ifelse(sum(crime_code_group_rec_prof=="Acquisitive", na.rm=T)>0, 1,0), n_off_vio= ifelse(sum(crime_code_group_rec_prof=="Violent", na.rm=T)>0, 1,0), n_off_sud= ifelse(sum(crime_code_group_rec_prof== "Substance-related", na.rm=T)>0, 1,0), n_off_oth= ifelse(sum(crime_code_group_rec_prof== "Other", na.rm=T)>0, 1,0)) %>% dplyr::ungroup() %>% dplyr::mutate(n_prev_off= rowSums(select(., starts_with("n_")))) # Pre-treatment Criminality Base_fiscalia_v13_pris<- Base_fiscalia_v12_pris %>% dplyr::group_by(hash_key) %>% #select the first and without ties (only one) dplyr::slice_min(age_offending_imp, n = 1, with_ties = F) %>% dplyr::ungroup() %>% dplyr::inner_join(Base_fiscalia_v11b_pris, by="hash_key") %>% #to see #dplyr::select(hash_key, fech_nac_rec, n_off_acq, ) dplyr::mutate(policonsumo= ifelse(!is.na(otras_sus1_mod),1,0)) %>% dplyr::mutate(cut_fec_nac=cut2(fech_nac_rec, cuts=as.Date(attr(dlookr::binning(as.numeric(fech_nac_rec)),"breaks"))),cut_com_del=cut2(fec_comision_simple, cuts=as.Date(attr(dlookr::binning(as.numeric(fec_comision_simple)),"breaks")))) %>% dplyr::mutate(tr_modality=dplyr::case_when(grepl("PR", as.character(tipo_de_plan_2_1))~ "Residential", grepl("PAI|PAB", as.character(tipo_de_plan_2_1))~ "Ambulatory", T~ NA_character_)) %>% dplyr::mutate(time_to_off_from_adm=age_offending_imp-edad_al_egres_imp) %>% dplyr::mutate(time_to_off_from_disch=age_offending_imp-edad_al_egres_imp) %>% as.data.table()%>% purrr::when(nrow(.)>nrow(Base_fiscalia_v12) ~ stop("More cases in the new database"), ~.) #length(unique(Base_fiscalia_v13$hash_key)) warning(paste0("Number of cases that are different by at least 0,02 years between 'edad_al_egres_imp' & 'edad_al_egres_1'= ", nrow(cbind.data.frame(round(Base_fiscalia_v13_pris$edad_al_egres_imp,4),round(Base_fiscalia_v13_pris$edad_al_egres_1,4)) %>% dplyr::filter(abs(.[[1]]-.[[2]])>0.02))," probably due to the discrepancies in getting the differences of dates")) ``` ```{r join4_pregunta_ACC, echo=T, fig.align='center', message=T, error=T, eval=F} #Registrar hurtos, robos, violencia intrafamiliar y otras acciones cometidas en las últimas 4 semanas #Violencia Intrafamiliar (Maltrato físico o psicológico) CONS_TOP_2022<- # 107307 CONS_TOP%>% dplyr::left_join(subset(dplyr::mutate(dplyr::group_by(Base_fiscalia_v13, hash_key), hash_rn=row_number())%>% ungroup(), hash_rn==1), by= c("HASH_KEY" = "hash_key"))%>% dplyr::mutate(fech_ap_top_num= as.numeric(as.Date(str_sub(as.character(lubridate::parse_date_time(Fecha.Aplicación.TOP, c("%Y-%m-%d"),exact=T)),1,10))))%>% #No parse failures dplyr::select(HASH_KEY,fech_ap_top_num, dateofbirth_imp, Hurto, Robo, Venta.Drogas, Riña, Total.VIF, Otro) %>% dplyr::filter(!is.na(HASH_KEY)) %>% dplyr::mutate_at(vars("Hurto", "Robo", "Venta.Drogas", "Riña", "Otro"), ~ifelse(.=="S",1,0)) %>% dplyr::mutate(Total.VIF= ifelse(Total.VIF>0,1,0))%>% dplyr::mutate(tot_off_top = base::rowSums(dplyr::select(.,c(Hurto, Robo, Venta.Drogas, Riña, Total.VIF, Otro)), na.rm = T)) %>% dplyr::mutate(dateofbirth_imp_num= as.numeric(dateofbirth_imp),edad_a_ap_top_num= (fech_ap_top_num-dateofbirth_imp_num)/365.25) %>% dplyr::select(-dateofbirth_imp, -dateofbirth_imp_num) %>% dplyr::filter(!is.na(edad_a_ap_top_num)) Base_fiscalia_v12b<- sqldf("SELECT * FROM Base_fiscalia_v11b AS x LEFT JOIN (SELECT * FROM CONS_TOP_2022 ) AS y ON x.hash_key == y.HASH_KEY AND x. edad_al_egres_imp > y.age_offending_imp") #changed the direction to past events, where age at discharge is greater than the age of commission Base_fiscalia_v12b %>% dplyr::filter(!is.na(HASH_KEY)) tidyr::pivot_wider( names_from = rn_hash_discard, names_sep="_", values_from = c(fech_ing_num_discard, fech_egres_num_discard, edad_al_ing))%>% dplyr::group_by(hash_key)%>% dplyr::mutate_at(vars(fech_ing_num_discard_1:fech_egres_num_discard_10),~suppressWarnings(max(as.character(.),na.rm=T)))%>% Base_fiscalia_v12b%>% dplyr::left_join(CONS_TOP,) ```
# Characteristics ::: controlly ```{r desc, echo=T, fig.align='center', message=T, error=T, eval=T} # Treatment status (Early dropout/Late dropout/Treatment completion) cont_vars_desc<- c("edad_al_ing_1", "edad_ini_cons", "dias_treat_imp_sin_na_1") cat_vars_desc<- c("sex", "escolaridad_rec", "sus_principal_mod", "freq_cons_sus_prin", "compromiso_biopsicosocial", "tenencia_de_la_vivienda_mod", "dg_cie_10_rec", "dg_trs_cons_sus_or", "macrozona", "policonsumo", "tr_modality", "tipo_centro", "condicion_ocupacional_cor", "origen_ingreso_mod", "numero_de_hijos_mod") cat_vars_desc_off <- c("n_prev_off", "n_off_vio", "n_off_acq", "n_off_sud", "n_off_oth") attr(Base_fiscalia_v13$edad_al_ing_1,"label") <- "Age (admission to treatment)" attr(Base_fiscalia_v13$sex,"label") <- "Sex" attr(Base_fiscalia_v13$escolaridad_rec,"label") <- "Educational Attainment" attr(Base_fiscalia_v13$sus_principal_mod,"label") <- "Primary Substance (admission to treatment)" attr(Base_fiscalia_v13$freq_cons_sus_prin,"label") <- "Substance use frequency (primary substance)" attr(Base_fiscalia_v13$edad_ini_cons,"label") <- "Age of Onset of Substance Use" # I added it attr(Base_fiscalia_v13$compromiso_biopsicosocial,"label") <- "Bio-psychosocial status" attr(Base_fiscalia_v13$tenencia_de_la_vivienda_mod,"label") <- "Housing situation (tenure status of households)" attr(Base_fiscalia_v13$policonsumo,"label") <- "Co-occurring SUD" attr(Base_fiscalia_v13$dg_cie_10_rec,"label") <- "Comorbidity (ICD-10)" attr(Base_fiscalia_v13$dg_trs_cons_sus_or,"label") <- "SUD Severity (Dependence status)" attr(Base_fiscalia_v13$dias_treat_imp_sin_na_1,"label") <- "Days in treatment" attr(Base_fiscalia_v13$tr_modality,"label") <- "Treatment Modality" attr(Base_fiscalia_v13$tipo_centro,"label") <- "Type of Center" attr(Base_fiscalia_v13$condicion_ocupacional_cor,"label") <- "Occupational Status Corrected" attr(Base_fiscalia_v13$origen_ingreso_mod,"label") <- "Motive of Admission to Treatment" attr(Base_fiscalia_v13$numero_de_hijos_mod,"label") <- "Number of Children " attr(Base_fiscalia_v13$n_prev_off,"label") <- "Pre-treatment Criminality (Dich.)" attr(Base_fiscalia_v13$n_off_acq,"label") <- "Acquisitive crime, Pre-treatment Criminality" attr(Base_fiscalia_v13$n_off_oth,"label") <- "Other crime, Pre-treatment Criminality" attr(Base_fiscalia_v13$n_off_sud,"label") <- "Substance use-related crime, Pre-treatment Criminality" attr(Base_fiscalia_v13$n_off_vio,"label") <- "Violent crime, Pre-treatment Criminality" #added attr(Base_fiscalia_v13$cut_com_del,"label") <- "Binned comission date" attr(Base_fiscalia_v13$cut_fec_nac,"label") <- "Binned birth date" attr(Base_fiscalia_v13$macrozona,"label") <- "Macro administrative Chilean zone" attr(Base_fiscalia_v13$time_to_off_from_adm,"label") <- "Time-to-offense from Admission" attr(Base_fiscalia_v13$time_to_off_from_disch,"label") <- "Time-to-offense from Discharge" attr(Base_fiscalia_v13$age_offending_imp,"label") <- "Age when the offense was committed" attr(Base_fiscalia_v13$edad_al_egres_imp,"label") <- "Age at discharge (mod)" attr(Base_fiscalia_v13$fech_nac_rec,"label") <- "Corrected date of birth" attr(Base_fiscalia_v13$fech_egres_imp,"label") <- "Date of discharge" tbone_desc_merge4<- CreateTableOne(vars=c(cont_vars_desc, cat_vars_desc, cat_vars_desc_off, "cut_com_del", "cut_fec_nac"), data= subset(Base_fiscalia_v13, select= c(cont_vars_desc, cat_vars_desc, cat_vars_desc_off, "cut_com_del", "cut_fec_nac", "motivodeegreso_mod_imp_rec")), factorVars = c(cat_vars_desc, cat_vars_desc_off, "cut_com_del", "cut_fec_nac"), smd=T, strata="motivodeegreso_mod_imp_rec", addOverall = T, includeNA=T, test=T) as.data.frame.TableOne(tbone_desc_merge4, smd=T, nonnormal= T)%>% dplyr::mutate(char2=characteristic) %>% tidyr::fill(char2) %>% dplyr::select(char2,everything()) %>% dplyr::mutate(level=ifelse(is.na(level),"[Missing]",level)) %>% dplyr::mutate(char2=dplyr::case_when(characteristic=="NA"~NA_character_,T~as.character(characteristic))) %>% format_cells(1, 1:length(names(.)), "bold") %>% dplyr::select(-1) %>% knitr::kable(size=10, format="markdown",caption= "Summary descriptives, Condemnatory sentence(1), Found as an imputed (YES), by Baseline Treatment Status", escape=T) #kable(size=10, format="html",caption= "Summary descriptives, by Baseline Treatment Status") %>% kableExtra::kable_classic() ``` ::: ::: controlly ```{r descb, echo=T, fig.align='center', message=T, error=T, eval=T} # Treatment status (Early dropout/Late dropout/Treatment completion) cont_vars_desc<- c("edad_al_ing_1", "edad_ini_cons", "dias_treat_imp_sin_na_1") cat_vars_desc<- c("sex", "escolaridad_rec", "sus_principal_mod", "freq_cons_sus_prin", "compromiso_biopsicosocial", "tenencia_de_la_vivienda_mod", "dg_cie_10_rec", "dg_trs_cons_sus_or", "macrozona", "policonsumo", "tr_modality", "tipo_centro", "condicion_ocupacional_cor", "origen_ingreso_mod", "numero_de_hijos_mod") cat_vars_desc_off <- c("n_prev_off", "n_off_vio", "n_off_acq", "n_off_sud", "n_off_oth") attr(Base_fiscalia_v13_pris$edad_al_ing_1,"label") <- "Age (admission to treatment)" attr(Base_fiscalia_v13_pris$sex,"label") <- "Sex" attr(Base_fiscalia_v13_pris$escolaridad_rec,"label") <- "Educational Attainment" attr(Base_fiscalia_v13_pris$sus_principal_mod,"label") <- "Primary Substance (admission to treatment)" attr(Base_fiscalia_v13_pris$freq_cons_sus_prin,"label") <- "Substance use frequency (primary substance)" attr(Base_fiscalia_v13_pris$edad_ini_cons,"label") <- "Age of Onset of Substance Use" # I added it attr(Base_fiscalia_v13_pris$compromiso_biopsicosocial,"label") <- "Bio-psychosocial status" attr(Base_fiscalia_v13_pris$tenencia_de_la_vivienda_mod,"label") <- "Housing situation (tenure status of households)" attr(Base_fiscalia_v13_pris$policonsumo,"label") <- "Co-occurring SUD" attr(Base_fiscalia_v13_pris$dg_cie_10_rec,"label") <- "Comorbidity (ICD-10)" attr(Base_fiscalia_v13_pris$dg_trs_cons_sus_or,"label") <- "SUD Severity (Dependence status)" attr(Base_fiscalia_v13_pris$dias_treat_imp_sin_na_1,"label") <- "Days in treatment" attr(Base_fiscalia_v13_pris$tr_modality,"label") <- "Treatment Modality" attr(Base_fiscalia_v13_pris$tipo_centro,"label") <- "Type of Center" attr(Base_fiscalia_v13_pris$condicion_ocupacional_cor,"label") <- "Occupational Status Corrected" attr(Base_fiscalia_v13_pris$origen_ingreso_mod,"label") <- "Motive of Admission to Treatment" attr(Base_fiscalia_v13_pris$numero_de_hijos_mod,"label") <- "Number of Children " attr(Base_fiscalia_v13_pris$n_prev_off,"label") <- "Pre-treatment Criminality (Dich.)" attr(Base_fiscalia_v13_pris$n_off_acq,"label") <- "Acquisitive crime, Pre-treatment Criminality" attr(Base_fiscalia_v13_pris$n_off_oth,"label") <- "Other crime, Pre-treatment Criminality" attr(Base_fiscalia_v13_pris$n_off_sud,"label") <- "Substance use-related crime, Pre-treatment Criminality" attr(Base_fiscalia_v13_pris$n_off_vio,"label") <- "Violent crime, Pre-treatment Criminality" #added attr(Base_fiscalia_v13_pris$cut_com_del,"label") <- "Binned comission date" attr(Base_fiscalia_v13_pris$cut_fec_nac,"label") <- "Binned birth date" attr(Base_fiscalia_v13_pris$macrozona,"label") <- "Macro administrative Chilean zone" attr(Base_fiscalia_v13_pris$time_to_off_from_adm,"label") <- "Time-to-offense from Admission" attr(Base_fiscalia_v13_pris$time_to_off_from_disch,"label") <- "Time-to-offense from Discharge" attr(Base_fiscalia_v13_pris$age_offending_imp,"label") <- "Age when the offense was committed" attr(Base_fiscalia_v13_pris$edad_al_egres_imp,"label") <- "Age at discharge (mod)" attr(Base_fiscalia_v13_pris$fech_nac_rec,"label") <- "Corrected date of birth" attr(Base_fiscalia_v13_pris$fech_egres_imp,"label") <- "Date of discharge" tbone_desc_merge4b<- CreateTableOne(vars=c(cont_vars_desc, cat_vars_desc, cat_vars_desc_off, "cut_com_del", "cut_fec_nac"), data= subset(Base_fiscalia_v13_pris, select= c(cont_vars_desc, cat_vars_desc, cat_vars_desc_off, "cut_com_del", "cut_fec_nac", "motivodeegreso_mod_imp_rec")), factorVars = c(cat_vars_desc, cat_vars_desc_off, "cut_com_del", "cut_fec_nac"), smd=T, strata="motivodeegreso_mod_imp_rec", addOverall = T, includeNA=T, test=T) as.data.frame.TableOne(tbone_desc_merge4b, smd=T, nonnormal= T)%>% dplyr::mutate(char2=characteristic) %>% tidyr::fill(char2) %>% dplyr::select(char2,everything()) %>% dplyr::mutate(level=ifelse(is.na(level),"[Missing]",level)) %>% dplyr::mutate(char2=dplyr::case_when(characteristic=="NA"~NA_character_,T~as.character(characteristic))) %>% format_cells(1, 1:length(names(.)), "bold") %>% dplyr::select(-1) %>% kable(size=10, format="markdown",caption= "Summary descriptives, Condemnatory sentence(1), Found as an imputed (YES) and Imprisonment, by Baseline Treatment Status", escape=T) #kable(size=10, format="html",caption= "Summary descriptives, by Baseline Treatment Status") %>% kableExtra::kable_classic() ``` :::
# Explore Survival setting ```{r exp-surv1, echo=T, fig.align='center', message=T, error=T, eval=T} biostat3::survRate(Surv((age_offending_imp- edad_al_egres_imp), !is.na(dateofbirth_imp)) ~ motivodeegreso_mod_imp_1, data= dplyr::filter(Base_fiscalia_v13,age_offending_imp- edad_al_egres_imp>=0)) %>% dplyr::mutate_if(is.numeric,~round(.,2)) %>% knitr::kable("markdown", caption= "Glimpse of the survival analysis, Offending from admission by Cause of Discharge of the First Treatment") biostat3::survRate(Surv((age_offending_imp- edad_al_egres_imp), !is.na(dateofbirth_imp)) ~ motivodeegreso_mod_imp_rec, data= dplyr::filter(Base_fiscalia_v13,age_offending_imp- edad_al_egres_imp>=0)) %>% select(-1) %>% dplyr::mutate_if(is.numeric,~round(.,2)) %>% knitr::kable("markdown", caption= "Glimpse of the survival analysis, Offending from admission by Cause of Discharge of the First Treatment") ``` `r paste0("We discarded cases with ", nrow(dplyr::filter(Base_fiscalia_v11, age_offending_imp- edad_al_egres_imp<0)) %>% format(big.mark=","), " relationships with a contact with the system before finishing treatment")` **Difference with age of offense from discharge** ```{r exp-surv2, echo=T, fig.align='center', message=T, error=T, eval=T, fig.cap= "Difference with age of offense from discharge"} mot_egreso_fit<- survfit(Surv(age_offending_imp- edad_al_egres_imp, !is.na(dateofbirth_imp)) ~motivodeegreso_mod_imp_rec, data=dplyr::filter(Base_fiscalia_v13,age_offending_imp- edad_al_egres_imp>=0, motivodeegreso_mod_imp_rec!="Censored"), type = "kaplan-meier", error = "greenwood", conf.type = "log-log") mot_egreso_fit_na <- mot_egreso_fit %>% fortify %>% group_by(strata) %>% mutate(CumHaz = cumsum(n.event/n.risk)) ggsurvplot(mot_egreso_fit, fun = "cumhaz", conf.int = TRUE, legend.labs = c("Tr Comp", "Tr Non-Comp (Early)", "Tr Non-Comp (Late)"), risk.table = "abs_pct", #ncensor.plot = TRUE, ggtheme = theme_classic2(base_size=15), risk.table.y.text.col = F, risk.table.col="black", font.tickslab = c(10), risk.table.height = .2, risk.table.fontsize = 2.5, #break.time.by = 365.25, pval = T, #ylim=c(0,10), legend = c(0.88, 0.2), legend.title="Cause of discharge", xlab= "Time (in years)", #cumevents=T, surv.connect = T, censor= F ) # ``` `r paste0("We discarded cases with ", nrow(dplyr::filter(Base_fiscalia_v11_pris, age_offending_imp- edad_al_egres_imp<0)) %>% format(big.mark=","), " relationships with a contact with the system before finishing treatment (imprisonment only)")` **Difference with age of offense from discharge (imprisonment only)** ```{r exp-surv2-pris, echo=T, fig.align='center', message=T, error=T, eval=T, fig.cap= "Difference with age of offense from discharge (imprisonment only)"} mot_egreso_fit_pris<- survfit(Surv(age_offending_imp- edad_al_egres_imp, !is.na(dateofbirth_imp)) ~motivodeegreso_mod_imp_rec, data=dplyr::filter(Base_fiscalia_v13_pris,age_offending_imp- edad_al_egres_imp>=0, motivodeegreso_mod_imp_rec!="Censored"), type = "kaplan-meier", error = "greenwood", conf.type = "log-log") mot_egreso_fit_na <- mot_egreso_fit_pris %>% fortify %>% group_by(strata) %>% mutate(CumHaz = cumsum(n.event/n.risk)) ggsurvplot(mot_egreso_fit_pris, fun = "cumhaz", conf.int = TRUE, legend.labs = c("Tr Comp", "Tr Non-Comp (Early)", "Tr Non-Comp (Late)"), risk.table = "abs_pct", #ncensor.plot = TRUE, ggtheme = theme_classic2(base_size=15), risk.table.y.text.col = F, risk.table.col="black", font.tickslab = c(10), risk.table.height = .2, risk.table.fontsize = 2.5, #break.time.by = 365.25, pval = T, #ylim=c(0,10), legend = c(0.88, 0.17), legend.title="Cause of discharge", xlab= "Time (in years)", #cumevents=T, surv.connect = T, censor= F ) # ``` Given that our data has a staggered entry to the analysis (depending on the discharge) **Survival of ages of offense from discharge (staggered entry)** ```{r exp-surv3, echo=T, fig.align='center', message=T, error=T, eval=T, fig.cap= "Difference with age of offense from discharge"} no_at_risk<- survcomp::no.at.risk(formula.s=Surv(time2=age_offending_imp, time=edad_al_egres_imp, !is.na(dateofbirth_imp)) ~motivodeegreso_mod_imp_rec, data.s=Base_fiscalia_v13, sub.s="all", t.step=round(min(Base_fiscalia_v13$edad_al_egres_imp)), t.end=round(max(Base_fiscalia_v13$age_offending_imp))) mot_egreso_fit2<- survfit(Surv(time2=age_offending_imp, time=edad_al_egres_imp, !is.na(dateofbirth_imp)) ~motivodeegreso_mod_imp_rec, data=dplyr::filter(Base_fiscalia_v13,age_offending_imp- edad_al_egres_imp>=0, motivodeegreso_mod_imp_rec!="Censored"), type = "fleming-harrington", conf.type = "log-log") fortify(mot_egreso_fit2, fun = "cumhaz") %>% data.frame() %>% ggplot(aes(x=time, y=surv, fill=strata, color=strata, group=strata))+ geom_step(size=.8)+ geom_ribbon(aes(ymin = lower, ymax = upper), alpha = .2) + theme_classic2(base_size=15)+ annotate(geom = "table", x = 3, y = 8, label = list(no_at_risk), table.theme = ttheme_gtminimal) ``` **Survival of ages of offense from discharge (staggered entry) (imprisonment only)** ```{r exp-surv3-pris, echo=T, fig.align='center', message=T, error=T, eval=T, fig.cap= "Difference with age of offense from discharge(imprisonment only)"} no_at_risk_pris<- survcomp::no.at.risk(formula.s=Surv(time2=age_offending_imp, time=edad_al_egres_imp, !is.na(dateofbirth_imp)) ~motivodeegreso_mod_imp_rec, data.s=Base_fiscalia_v13_pris, sub.s="all", t.step=round(min(Base_fiscalia_v13_pris$edad_al_egres_imp)), t.end=round(max(Base_fiscalia_v13_pris$age_offending_imp))) mot_egreso_fit2_pris<- survfit(Surv(time2=age_offending_imp, time=edad_al_egres_imp, !is.na(dateofbirth_imp)) ~motivodeegreso_mod_imp_rec, data=dplyr::filter(Base_fiscalia_v13_pris,age_offending_imp- edad_al_egres_imp>=0, motivodeegreso_mod_imp_rec!="Censored"), type = "fleming-harrington", conf.type = "log-log") fortify(mot_egreso_fit2_pris, fun = "cumhaz") %>% data.frame() %>% ggplot(aes(x=time, y=surv, fill=strata, color=strata, group=strata))+ geom_step(size=.8)+ geom_ribbon(aes(ymin = lower, ymax = upper), alpha = .2) + theme_classic2(base_size=15)+ annotate(geom = "table", x = 3, y = 1.75, label = list(no_at_risk_pris), table.theme = ttheme_gtminimal) ```
::: superbigimage ```{r scho-res, echo=T, fig.align='center', message=T, error=T, eval=T, fig.cap= "Schoefeld residuals", fig.retina= 2} #https://rpubs.com/linpearl89/TTE-RCT #https://search.r-project.org/CRAN/refmans/adjustedCurves/html/surv_aiptw_pseudo.html #https://search.r-project.org/CRAN/refmans/adjustedCurves/html/surv_iptw_cox.html #https://cran.r-project.org/web/packages/RISCA/RISCA.pdf coxfit <- coxph(Surv(age_offending_imp- edad_al_egres_imp, !is.na(dateofbirth_imp)) ~ motivodeegreso_mod_imp_rec + edad_al_ing_1 + edad_ini_cons + dias_treat_imp_sin_na_1 + escolaridad_rec + sus_principal_mod + freq_cons_sus_prin + compromiso_biopsicosocial + origen_ingreso_mod + numero_de_hijos_mod + tenencia_de_la_vivienda_mod + dg_cie_10_rec + dg_trs_cons_sus_or + macrozona + n_prev_off + n_off_vio + n_off_acq + n_off_sud + n_off_oth, ties="efron", data= Base_fiscalia_v13) ggcoxdiagnostics(coxfit, type = "schoenfeld") ggsave("_figs/prueba.png", dpi=640, height=15, width= 15) ``` ::: ::: superbigimage ```{r scho-res2, echo=T, fig.align='center', message=T, error=T, eval=T, fig.cap= "Schoefeld residuals (left censored)", fig.retina= 2} #https://rpubs.com/linpearl89/TTE-RCT #https://search.r-project.org/CRAN/refmans/adjustedCurves/html/surv_aiptw_pseudo.html #https://search.r-project.org/CRAN/refmans/adjustedCurves/html/surv_iptw_cox.html #https://cran.r-project.org/web/packages/RISCA/RISCA.pdf coxfit2 <- coxph(Surv(edad_al_egres_imp, age_offending_imp, !is.na(dateofbirth_imp)) ~ motivodeegreso_mod_imp_rec + edad_al_ing_1 + edad_ini_cons + dias_treat_imp_sin_na_1 + escolaridad_rec + sus_principal_mod + freq_cons_sus_prin + compromiso_biopsicosocial + origen_ingreso_mod + numero_de_hijos_mod + tenencia_de_la_vivienda_mod + dg_cie_10_rec + dg_trs_cons_sus_or + macrozona + n_prev_off + n_off_vio + n_off_acq + n_off_sud + n_off_oth, ties="efron", data= Base_fiscalia_v13)#[1:500,] ggcoxdiagnostics(coxfit2, type = "schoenfeld") ggsave("_figs/prueba2.png", dpi=640, height=15, width= 15) warning(paste0("Number of users with age of offending lower than age at discharge: ", nrow(dplyr::filter(Base_fiscalia_v13,age_offending_imp% dplyr::filter(edad_al_egres_imp<=14|edad_al_egres_imp>=80|age_offending_imp<=14|age_offending_imp>=80) errors_age_after_join_pris<- Base_fiscalia_v13_pris %>% dplyr::filter(edad_al_egres_imp<=14|edad_al_egres_imp>=80|age_offending_imp<=14|age_offending_imp>=80) #dplyr::filter(hash_key %in% errores_edad) ``` `r paste0(dplyr::filter(Base_fiscalia_v13, edad_al_egres_imp<=14|edad_al_egres_imp>=80) %>% nrow(), " patients being discharged at 14 years of age or less, or at 80 years of age or more ")` `r paste0(dplyr::filter(Base_fiscalia_v13, age_offending_imp<=14|age_offending_imp>=80) %>% nrow(), " patients that offended and ended with prison at 14 years of age or less, or at 80 years of age or more ")` In total, these cases represent the `r scales::percent(nrow(errors_age_after_join)/nrow(Base_fiscalia_v13), accuracy=.01)` of the database. `r paste0(dplyr::filter(Base_fiscalia_v13_pris, edad_al_egres_imp<=14|edad_al_egres_imp>=80) %>% nrow(), " patients being discharged at 14 years of age or less, or at 80 years of age or more ")` `r paste0(dplyr::filter(Base_fiscalia_v13_pris, age_offending_imp<=14|age_offending_imp>=80) %>% nrow(), " patients that offended and ended with prison at 14 years of age or less, or at 80 years of age or more ")` In total, these cases represent the `r scales::percent(nrow(errors_age_after_join_pris)/nrow(Base_fiscalia_v13_pris), accuracy=.01)` of the database. ```{r com2, echo=T, fig.align='center', message=T, error=T, eval=T} #_#_#_#_#_#_#_#_#_ invisible("2.Explore RUCs") paste0("Number of unique RUCs: ", length(unique(Base_fiscalia_v13$caseid)), " of ", length(Base_fiscalia_v13$caseid)," rows") paste0("Number of unique RUCs (imprisonment): ", length(unique(Base_fiscalia_v13_pris$caseid)), " of ", length(Base_fiscalia_v13_pris$caseid)," rows") warning(paste0("Records that share a RUC n= ", Base_fiscalia_v13 %>% dplyr::ungroup() %>% dplyr::group_by(caseid) %>% dplyr::mutate(dis_rut=n_distinct(hash_key)) %>% dplyr::ungroup() %>% #filter people that was not paired but keep people with more than one RUC dplyr::filter(dis_rut>1,dis_rut<5000) %>% nrow() )) warning(paste0("Records that share a RUC (imprisonment) n= ", Base_fiscalia_v13_pris %>% dplyr::ungroup() %>% dplyr::group_by(caseid) %>% dplyr::mutate(dis_rut=n_distinct(hash_key)) %>% dplyr::ungroup() %>% #filter people that was not paired but keep people with more than one RUC dplyr::filter(dis_rut>1,dis_rut<5000) %>% nrow() )) Base_fiscalia_v13 %>% dplyr::ungroup() %>% dplyr::group_by(caseid) %>% dplyr::mutate(dis_rut=n_distinct(hash_key)) %>% dplyr::ungroup() %>% #filter people that was not paired but keep people with more than one RUC dplyr::filter(dis_rut<5000) %>% dplyr::summarise(total= n(),max= max(dis_rut), min= min(dis_rut), mean=round(mean(dis_rut),2), p025= quantile(dis_rut, .025), p25= quantile(dis_rut, .25), median= quantile(dis_rut, .5), p75= quantile(dis_rut, .75), p975= quantile(dis_rut, .975)) %>% knitr::kable("markdown", caption="Summary descriptives, distinct IDs by Case ID") #table(Base_fiscalia_v13$caseid) %>% data.frame() %>% arrange(-Freq) %>% slice(1:10) #table(Base_fiscalia_v13_pris$caseid) %>% data.frame() %>% arrange(-Freq) %>% slice(1:10) Base_fiscalia_v13_pris %>% dplyr::ungroup() %>% dplyr::group_by(caseid) %>% dplyr::mutate(dis_rut=n_distinct(hash_key)) %>% dplyr::ungroup() %>% #filter people that was not paired but keep people with more than one RUC dplyr::filter(dis_rut<5000) %>% dplyr::summarise(total= n(),max= max(dis_rut), min= min(dis_rut), mean=round(mean(dis_rut),2), p025= quantile(dis_rut, .025), p25= quantile(dis_rut, .25), median= quantile(dis_rut, .5), p75= quantile(dis_rut, .75), p975= quantile(dis_rut, .975)) %>% knitr::kable("markdown", caption="Summary descriptives, distinct IDs by Case ID (Imprisonment)") ``` ::: controlly ```{r com3, echo=T, fig.align='center', message=T, error=T, eval=T} #If the value of VIF is less than 1: no correlation - If the value of VIF is between 1-5, there is moderate correlation - If the value of VIF is above 5: severe correlation #_#_#_#_#_#_#_#_#_ invisible("3.Cox, collinearity") #global covs_3 "i.caus_disch_mod_imp_rec edad_al_ing_1 edad_ini_cons i.sex_enc i.esc_rec i.sus_prin_mod i.fr_sus_prin i.comp_biosoc i.ten_viv i.dg_cie_10_rec i.sud_severity_icd10 i.macrozone i.policonsumo i.n_off_vio i.n_off_acq i.n_off_sud " library(rms) f1 <- cph(Surv(age_offending_imp- edad_al_egres_imp, !is.na(dateofbirth_imp)) ~ motivodeegreso_mod_imp_rec + edad_al_ing_1 + edad_ini_cons + escolaridad_rec + sus_principal_mod + freq_cons_sus_prin + compromiso_biopsicosocial + origen_ingreso_mod + numero_de_hijos_mod + tenencia_de_la_vivienda_mod + dg_cie_10_rec + dg_trs_cons_sus_or + macrozona + n_prev_off + n_off_vio + n_off_acq + n_off_sud, data= Base_fiscalia_v13, x=T, y=T) warning("X matrix deemed to be singular; variable n_off_oth") cvif <- rms::vif(f1) f2 <- cph(Surv(edad_al_egres_imp,age_offending_imp, !is.na(dateofbirth_imp)) ~ motivodeegreso_mod_imp_rec + edad_al_ing_1 + edad_ini_cons + escolaridad_rec + sus_principal_mod + freq_cons_sus_prin + compromiso_biopsicosocial + origen_ingreso_mod + numero_de_hijos_mod + tenencia_de_la_vivienda_mod + dg_cie_10_rec + dg_trs_cons_sus_or + macrozona + n_prev_off + n_off_vio + n_off_acq + n_off_sud, data= Base_fiscalia_v13, x=T, y=T) warning("X matrix deemed to be singular; variable n_off_oth") cvif2 <- rms::vif(f2) f3 <- cph(Surv(age_offending_imp- edad_al_egres_imp, !is.na(dateofbirth_imp)) ~ motivodeegreso_mod_imp_rec + edad_al_ing_1 + edad_ini_cons + escolaridad_rec + sus_principal_mod + freq_cons_sus_prin + compromiso_biopsicosocial + origen_ingreso_mod + numero_de_hijos_mod + tenencia_de_la_vivienda_mod + dg_cie_10_rec + dg_trs_cons_sus_or + macrozona + n_prev_off + n_off_vio + n_off_acq + n_off_sud, data= Base_fiscalia_v13, x=T, y=T) warning("X matrix deemed to be singular; variable n_off_oth") cvif3 <- rms::vif(f3) f4 <- cph(Surv(edad_al_egres_imp,age_offending_imp, !is.na(dateofbirth_imp)) ~ motivodeegreso_mod_imp_rec + edad_al_ing_1 + edad_ini_cons + escolaridad_rec + sus_principal_mod + freq_cons_sus_prin + compromiso_biopsicosocial + origen_ingreso_mod + numero_de_hijos_mod + tenencia_de_la_vivienda_mod + dg_cie_10_rec + dg_trs_cons_sus_or + macrozona + n_prev_off + n_off_vio + n_off_acq + n_off_sud, data= Base_fiscalia_v13, x=T, y=T) warning("X matrix deemed to be singular; variable n_off_oth") cvif4 <- rms::vif(f4) cbind.data.frame(Time0_cond_sent=cvif, Time0_prison=cvif2, Stag_ent_cond_sent=cvif3, Stag_ent_prison=cvif4) %>% knitr::kable("markdown", caption= "Variance Inflation Factors in Cox Regressions") # dplyr::mutate_if(is.numeric,~round(.,2)) %>% # DT::datatable() ``` ::: The Total number of previous offenses showed a severe correlation (VIF >6.5), and Housing situation (tenure status of households) (VIF > 11). Must consider that the biopsychosocial compromise and Substance use frequency (primary substance) had a moderate correlation (~5 VIF).
```{r com4, echo=T, fig.align='center', message=T, error=T, eval=T} #_#_#_#_#_#_#_#_#_ invisible("4.Why there are more patients in prison database than contacts with sentence data base?") #unique(Base_fiscalia_v13_pris$hash_key) #unique(Base_fiscalia_v13$hash_key) warning(paste0("Patients that are registered in the sentence database but not in the prison data base: ", Base_fiscalia_v13 %>% dplyr::filter(!hash_key %in% unique(Base_fiscalia_v13_pris$hash_key)) %>% nrow() )) warning(paste0("Patients that are registered in the prison database but not in the sentence data base: ", Base_fiscalia_v13_pris %>% dplyr::filter(!hash_key %in% unique(Base_fiscalia_v13$hash_key)) %>% nrow() )) hash_key_non_sentence_but_pris<- Base_fiscalia_v12_pris %>% dplyr::filter(!hash_key %in% unique(Base_fiscalia_v13$hash_key)) %>% distinct(hash_key) %>% unlist(as.character()) #23d88c2b8c6da2d8abf3f88b7ce8a4c0 #In case of the patient above, when he was matched with the requirements of prison (wich is more stringent), it allowed that 2 treatments were elligible to match: one in 2011-01-06 to 2011-02-28 and another in 2011-12-05 to 2012-03-05. The problem is that we wanted to match with baseline treatments, hence prison skips the first treatment that ended in a referral and ended matching with the second. At 2022-11-25 I ended correcting it. ``` ```{r com5, echo=T, fig.align='center', message=T, error=T, eval=F} # stipw (logit motivodeegreso_mod_imp_rec2 edad_al_ing_1 edad_ini_cons sex_enc esc_rec sus_prin_mod fr_sus_prin comp_biosoc ten_viv dg_cie_10_rec sud_severity_icd10 macrozone policonsumo n_off_vio n_off_acq n_off_sud clas), distribution(rp) df(10) ipwtype(stabilised) vce(mestimation) eform # estimates store df10_stipw f4 <- cph(Surv(edad_al_egres_imp,age_offending_imp, !is.na(dateofbirth_imp)) ~ motivodeegreso_mod_imp_rec + edad_al_ing_1 + edad_ini_cons + sex+ escolaridad_rec + sus_principal_mod + freq_cons_sus_prin + compromiso_biopsicosocial + tenencia_de_la_vivienda_mod + dg_cie_10_rec + dg_trs_cons_sus_or + macrozona + policonsumo + n_off_vio + n_off_acq + n_off_sud, data= Base_fiscalia_v13, x=T, y=T) #origen_ingreso_mod + numero_de_hijos_mod + = no estaban en Stata # falta clas ```
# Flowchart ```{r export-svg,echo=T, paged.print=TRUE, eval=T, error=T} if(isTRUE(getOption('knitr.in.progress'))==T){ } else { #path<-ifelse(!grepl("$\\/",getwd()),paste0(getwd(),"/"),getwd()) path<- getwd() } tab1_lab_aft_d<- paste0('Original C1 Dataset \n(n = ', formatC(nrow(CONS_C1), format='f', big.mark=',', digits=0), ';\npatients: ',formatC(CONS_C1%>% dplyr::distinct(HASH_KEY)%>% nrow(), format='f', big.mark=',', digits=0),')') tab2_lab_aft_d<- paste0('•Remove duplicated entries\\\\\\l•Overlapping treatments of patients\\\\\\l•Intermediate treatment events (continuous referrals) \\\\\\l') tab3_lab_aft_d<- paste0(' C1 Dataset \n(n = ', formatC(nrow(CONS_C1_df_dup_SEP_2020), format='f', big.mark=',', digits=0), ';\npatients: ',formatC(CONS_C1_df_dup_SEP_2020%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')') tab4_lab_aft_d<- paste0('Original Prosecutors Office\n(n = ',format(nrow(Base_fiscalia_v2),big.mark=","), ';\nCauses= ',Base_fiscalia_v2%>% dplyr::distinct(ruc)%>% nrow() %>% format(big.mark=','), ';\nRel.=',Base_fiscalia_v2%>%dplyr::distinct(idrelacion)%>%nrow()%>%format(big.mark=','), ';\nRUC_Vic_Imp=',Base_fiscalia_v2%>%dplyr::mutate(rel=paste0(ruc,"_",idsujeto_victima,"_",idsujeto_imputado,"_","iddelito"))%>%dplyr::distinct(rel)%>%nrow()%>%format(big.mark=','), ';\nindividuals= ',Base_fiscalia_v2%>% dplyr::distinct(rut_enc_saf)%>% nrow() %>% format(big.mark=','),')') #crimes committed after study follow-up tab5_lab_aft_d1<- paste0("(p= ",format(nrow(unique(subset(Base_fiscalia_v2,fec_comision_simple>as.Date("2019-11-13"),"rut_enc_saf"))),big.mark=","),"; RUCs= ", format(nrow(unique(subset(Base_fiscalia_v2,fec_comision_simple>as.Date("2019-11-13"),"ruc"))), big.mark=","),";n= ", format(nrow(subset(Base_fiscalia_v2,fec_comision_simple>as.Date("2019-11-13"),"rut_enc_saf")), big.mark=","),")") #erase entries with missing values in fec_comision_simple y termino_relacion_simple leftovers_Base_fiscalia_v3<- Base_fiscalia_v3 %>% dplyr::left_join(after_imp_Base_fiscalia_v3_db[,c("rut_enc_saf","imp_birth_date","flowch_age")], by="rut_enc_saf") %>% dplyr::rename("obs"="flowch_age") %>% dplyr::mutate(imp_birth_date=dplyr::case_when(!is.na(imp_birth_date)~imp_birth_date,T~fec_nacimiento_simple))%>% dplyr::mutate(edad_comision_imp=as.numeric(fec_comision_simple-imp_birth_date)/365.25) %>% dplyr::mutate(edad_ter_rel_imp=as.numeric(termino_relacion_simple-imp_birth_date)/365.25) %>% #arrange the rut from the first date of comission of a crime, but we are not detecting if he/she is the victim or not dplyr::arrange(rut_enc_saf, edad_comision_imp) %>% #566884 dplyr::filter(dplyr::case_when(!is.na(edad_comision_imp)~T,T~F)) %>% #566644 dplyr::filter(imp_birth_date=="1900-01-01"|is.na(imp_birth_date)) tab5_lab_aft_d12<- paste0("(p= ",format(nrow(dplyr::distinct(leftovers_Base_fiscalia_v3,rut_enc_saf)),big.mark=","),"; RUCs= ", format(nrow(dplyr::distinct(leftovers_Base_fiscalia_v3,ruc)), big.mark=","),";n= ", format(nrow(leftovers_Base_fiscalia_v3), big.mark=","),")") #minor to 14 years old tab5_lab_aft_d13<- paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v4,edad_comision_imp<14),rut_enc_saf)),big.mark=","),"; RUCs= ", format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v4,edad_comision_imp<14),ruc)), big.mark=","),";n= ", format(nrow(dplyr::filter(Base_fiscalia_v4,edad_comision_imp<14)), big.mark=","),")") #Remove duplicated entries tab5_lab_aft_d2<- paste0("(p= ",format(length(unique(eliminated_duplicates$rut_enc_saf)),big.mark=","),"; RUCs= ", format(length(unique(eliminated_duplicates$ruc)), big.mark=","),";n= ", format(nrow(eliminated_duplicates), big.mark=","),")") #before 2010 tab5_lab_aft_d3<- paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,fec_comision_simple<"2010-01-01"),rut_enc_saf)),big.mark=","),"; RUCs= ", format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,fec_comision_simple<"2010-01-01"),ruc)), big.mark=","),";n= ", format(nrow(dplyr::filter(Base_fiscalia_v7,fec_comision_simple<"2010-01-01")), big.mark=","),")") #remove administrative annulment tab5_lab_aft_d4<- paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="ANULACI¿N ADMINISTRATIVA"),rut_enc_saf)),big.mark=","),"; RUCs= ", format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="ANULACI¿N ADMINISTRATIVA"),ruc)), big.mark=","),";n= ", format(nrow(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="ANULACI¿N ADMINISTRATIVA")), big.mark=","),")") #remove grouped to another case tab5_lab_aft_d5<- paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="AGRUPACI¿N A OTRO CASO"),rut_enc_saf)),big.mark=","),"; RUCs= ", format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="AGRUPACI¿N A OTRO CASO"),ruc)), big.mark=","),";n= ", format(nrow(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="AGRUPACI¿N A OTRO CASO")), big.mark=","),")") tab5_lab_aft_d<- paste0('•Filter crimes committed after study follow-up period',tab5_lab_aft_d1,'\\\\\\l•Remove duplicated entries',tab5_lab_aft_d2,'\\\\\\l•Correct dates (birth, comission of crime, end of judicial proceedings), missing nationality and sex\\\\\\l•Erase entries with missing values in comission of crime, end of judicial proceedings',tab5_lab_aft_d12,'\\\\\\l•Erase entries with values in comission of crime when minor to 14 years old after imputation',tab5_lab_aft_d13,'\\\\\\l•Filter crimes committed before study follow-up',tab5_lab_aft_d3,'\\\\\\l•Filter records with cause of end of the proceedings= administrative annulment',tab5_lab_aft_d4,'\\\\\\l•Filter records with cause of end of the proceedings= grouped to another case',tab5_lab_aft_d5,'\\\\\\l') tab6_lab_aft_d<- paste0("O.P. Dataset \n(n= ", formatC(nrow(Base_fiscalia_v8),big.mark = ","),";\nindividuals= ",Base_fiscalia_v8%>% dplyr::distinct(rut_enc_saf)%>% nrow()%>% formatC(big.mark = ","),")") #not coded as an offender tab7_lab_aft_d1<- paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v8,!grepl("SI",encontrado_como_imputado)),rut_enc_saf)),big.mark=","),"; RUCs= ", format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v8,!grepl("SI",encontrado_como_imputado)),ruc)), big.mark=","),";n= ", format(nrow(dplyr::filter(Base_fiscalia_v8,!grepl("SI",encontrado_como_imputado))), big.mark=","),")") #FILTER IF THE PATIENT RECIEVES FOR THE RELATIONSHIP AMONG THOSE THAT WERE OFFENDERS # end of proceeding tab7_lab_aft_d2<- paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v8,grepl("SI",encontrado_como_imputado)) %>% dplyr::filter(dplyr::case_when( grepl("REPARATORIO|CONDICIONAL",gls_mottermino) & is.na(agrupa_terminos)~F, grepl("REPARATORIO|SENTENCIA DEFINITIVA CONDENATORIA|240|MONIT", toupper(agrupa_terminos), ignore.case=F)~F, T~T)),rut_enc_saf)),big.mark=","),"; RUCs= ", format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v8,grepl("SI",encontrado_como_imputado)) %>% dplyr::filter(dplyr::case_when( grepl("REPARATORIO|CONDICIONAL",gls_mottermino) & is.na(agrupa_terminos)~F, grepl("REPARATORIO|SENTENCIA DEFINITIVA CONDENATORIA|240|MONIT", toupper(agrupa_terminos), ignore.case=F)~F, T~T)),ruc)), big.mark=","),";n= ", format(nrow(dplyr::filter(Base_fiscalia_v8,grepl("SI",encontrado_como_imputado)) %>% dplyr::filter(dplyr::case_when( grepl("REPARATORIO|CONDICIONAL",gls_mottermino) & is.na(agrupa_terminos)~F, grepl("REPARATORIO|SENTENCIA DEFINITIVA CONDENATORIA|240|MONIT", toupper(agrupa_terminos), ignore.case=F)~F, T~T))), big.mark=","),")") #FILTER USERS WITHOUT BIRTH DATE AFTER JOIN (7) tab7_lab_aft_d3<- paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(janitor::clean_names(Base_fiscalia_v10),is.na(fech_nac_rec)),hash_key)),big.mark=","),"; RUCs= ", format(nrow(dplyr::distinct(dplyr::filter(janitor::clean_names(Base_fiscalia_v10),is.na(fech_nac_rec)),caseid)), big.mark=","), "(had no case ID)",";n= ", format(nrow(dplyr::filter(janitor::clean_names(Base_fiscalia_v10),is.na(fech_nac_rec))), big.mark=","),")") #FILTER USERS THAT HAD AN ONGOING TREATMENT tab7_lab_aft_d4<- paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v11, grepl("Referral|Death|Censored|Ongoing",motivodeegreso_mod_imp)),hash_key)),big.mark=","),"; RUCs= ", format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v11, grepl("Referral|Death|Censored|Ongoing",motivodeegreso_mod_imp)),caseid)), big.mark=","),";n= ", format(nrow(dplyr::filter(Base_fiscalia_v11, grepl("Referral|Death|Censored|Ongoing",motivodeegreso_mod_imp))), big.mark=","),")") tab7_lab_aft_d<- paste0('•Discard observations coded as victims rather than ofenders ',tab7_lab_aft_d1,'\\\\\\l•Discard observations depending on the values of the end of the proceedings among offenders',tab7_lab_aft_d2,' \\\\\\l•Discard observations without birth date ',tab7_lab_aft_d3,'\\\\\\l•Discard observations coded as referrals, deaths, censored at baseline treatment or with ongoing treatments ',tab7_lab_aft_d4,' \\\\\\l•Long-to-wide relationships/crimes, end of judicial proceedings, penalty \\\\\\l•Group crimes into violent, drug-related, etc. \\\\\\l') #not coded as an offender tab7b_lab_aft_d1<- paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v8,!grepl("SI",encontrado_como_imputado)),rut_enc_saf)),big.mark=","),"; RUCs= ", format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v8,!grepl("SI",encontrado_como_imputado)),ruc)), big.mark=","),";n= ", format(nrow(dplyr::filter(Base_fiscalia_v8,!grepl("SI",encontrado_como_imputado))), big.mark=","),")") #FILTER IF THE PATIENT RECIEVES FOR THE RELATIONSHIP AMONG THOSE THAT WERE OFFENDERS # end of proceeding tab7b_lab_aft_d2<- paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v8,grepl("SI",encontrado_como_imputado)) %>% dplyr::filter(dplyr::case_when( grepl("REPARATORIO|CONDICIONAL",gls_mottermino) & is.na(agrupa_terminos)~F, grepl("REPARATORIO|SENTENCIA DEFINITIVA CONDENATORIA|240|MONIT", toupper(agrupa_terminos), ignore.case=F)~F, T~T)),rut_enc_saf)),big.mark=","),"; RUCs= ", format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v8,grepl("SI",encontrado_como_imputado)) %>% dplyr::filter(dplyr::case_when( grepl("REPARATORIO|CONDICIONAL",gls_mottermino) & is.na(agrupa_terminos)~F, grepl("REPARATORIO|SENTENCIA DEFINITIVA CONDENATORIA|240|MONIT", toupper(agrupa_terminos), ignore.case=F)~F, T~T)),ruc)), big.mark=","),";n= ", format(nrow(dplyr::filter(Base_fiscalia_v8,grepl("SI",encontrado_como_imputado)) %>% dplyr::filter(dplyr::case_when( grepl("REPARATORIO|CONDICIONAL",gls_mottermino) & is.na(agrupa_terminos)~F, grepl("REPARATORIO|SENTENCIA DEFINITIVA CONDENATORIA|240|MONIT", toupper(agrupa_terminos), ignore.case=F)~F, T~T))), big.mark=","),")") #FILTER IF THE PATIENT RECIEVES & PRISON tab7b_lab_aft_d25<- paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v8,grepl("SI",encontrado_como_imputado)) %>% dplyr::filter(dplyr::case_when( grepl("REPARATORIO|CONDICIONAL",gls_mottermino) & is.na(agrupa_terminos)~T, grepl("REPARATORIO|SENTENCIA DEFINITIVA CONDENATORIA|240|MONIT", toupper(agrupa_terminos), ignore.case=F)~T, T~F)) %>% dplyr::filter(dplyr::case_when(marca_pena_44=="SI" & is.na(medida_alternativa_46)~F, T~T)),rut_enc_saf)),big.mark=","),"; RUCs= ", format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v8,grepl("SI",encontrado_como_imputado)) %>% dplyr::filter(dplyr::case_when( grepl("REPARATORIO|CONDICIONAL",gls_mottermino) & is.na(agrupa_terminos)~T, grepl("REPARATORIO|SENTENCIA DEFINITIVA CONDENATORIA|240|MONIT", toupper(agrupa_terminos), ignore.case=F)~T, T~F)) %>% dplyr::filter(dplyr::case_when(marca_pena_44=="SI" & is.na(medida_alternativa_46)~F, T~T)),ruc)), big.mark=","),";n= ", format(nrow(dplyr::filter(Base_fiscalia_v8,grepl("SI",encontrado_como_imputado)) %>% dplyr::filter(dplyr::case_when( grepl("REPARATORIO|CONDICIONAL",gls_mottermino) & is.na(agrupa_terminos)~T, grepl("REPARATORIO|SENTENCIA DEFINITIVA CONDENATORIA|240|MONIT", toupper(agrupa_terminos), ignore.case=F)~T, T~F)) %>% dplyr::filter(dplyr::case_when(marca_pena_44=="SI" & is.na(medida_alternativa_46)~F, T~T))), big.mark=","),")") #FILTER USERS WITHOUT BIRTH DATE AFTER JOIN (7) tab7b_lab_aft_d3<- paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(janitor::clean_names(Base_fiscalia_v10_pris),is.na(fech_nac_rec)),hash_key)),big.mark=","),"; RUCs= ", format(nrow(dplyr::distinct(dplyr::filter(janitor::clean_names(Base_fiscalia_v10_pris),is.na(fech_nac_rec)),caseid)), big.mark=","), "(had no case ID)",";n= ", format(nrow(dplyr::filter(janitor::clean_names(Base_fiscalia_v10_pris),is.na(fech_nac_rec))), big.mark=","),")") #FILTER USERS THAT HAD AN ONGOING TREATMENT tab7b_lab_aft_d4<- paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v11_pris, grepl("Referral|Death|Censored|Ongoing",motivodeegreso_mod_imp)),hash_key)),big.mark=","),"; RUCs= ", format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v11_pris, grepl("Referral|Death|Censored|Ongoing",motivodeegreso_mod_imp)),caseid)), big.mark=","),";n= ", format(nrow(dplyr::filter(Base_fiscalia_v11_pris, grepl("Referral|Death|Censored|Ongoing",motivodeegreso_mod_imp))), big.mark=","),")") tab7b_lab_aft_d<- paste0('•Discard observations coded as victims rather than ofenders ',tab7b_lab_aft_d1,'\\\\\\l•Discard observations depending on the values of the end of the proceedings among offenders',tab7b_lab_aft_d2,' \\\\\\l•Discard observations not ending in prison among offenders that ended proceedings with a sentence',tab7b_lab_aft_d25,' \\\\\\l•Discard observations without birth date ',tab7b_lab_aft_d3,'\\\\\\l•Discard observations coded as referrals, deaths, censored at baseline treatment or with ongoing treatments ',tab7b_lab_aft_d4,' \\\\\\l•Long-to-wide relationships/crimes, end of judicial proceedings, penalty \\\\\\l•Group crimes into violent, drug-related, etc. \\\\\\l') tab9_merged<- paste0("Contacts\n(p= ",format(nrow(dplyr::distinct(Base_fiscalia_v13,hash_key)),big.mark=","),";\nRUCs= ", format(nrow(dplyr::distinct(Base_fiscalia_v13,caseid)),big.mark=","),";\nno.PO= ", format(nrow(dplyr::filter(Base_fiscalia_v13,!is.na(prision_fact))), big.mark=","),";\nn= ", format(nrow(Base_fiscalia_v13), big.mark=","),")") tab10_merged<- paste0("Prison\n(p= ",format(nrow(dplyr::distinct(Base_fiscalia_v13_pris,hash_key)),big.mark=","),";\nRUCs= ", format(nrow(dplyr::distinct(Base_fiscalia_v13_pris,caseid)),big.mark=","),";\nno.PO= ", format(nrow(dplyr::filter(Base_fiscalia_v13_pris,!is.na(prision_fact))), big.mark=","),";\nn= ", format(nrow(Base_fiscalia_v13_pris), big.mark=","),")") library(DiagrammeR) #⋉ plot_merge_flowchart_after_dates<- grViz("digraph flowchart { fontname='Comic Sans MS' # node definitions with substituted label text node [shape = rectangle,fontsize = 9] tab1 [label = '@@1'] blank [label = '', width = 0.0001, height = 0.0001] tab2 [label = '@@2',fontsize = 7] tab3 [label = '@@3'] tab4 [label = '@@4',fontsize = 8] blank2 [label = '', width = 0.0001, height = 0.0001] tab5 [label = '@@5',fontsize = 7] tab6 [label= '@@6'] blank3 [label = '', width = 0.0001, height = 0.0001] blank4 [label = '', width = 0.0001, height = 0.0001] blank5 [label = '', width = 0.0001, height = 0.0001] blank6 [label = '', width = 0.0001, height = 0.0001] blank6b [label = '', width = 0.0001, height = 0.0001] tab7 [label = '@@7',fontsize = 7] tab7b [label = '@@8',fontsize = 7] tab8 [label= '@@9'] tab9 [label= '@@10'] # edge definitions with the node IDs rankdir='TB'; rank= same; tab1 -> blank [arrowhead = none,label=' Data wrangling and normalization process',fontsize = 8]; rankdir='TB'; rank= same; tab1; tab3; blank -> tab2; subgraph { rank = same; tab2; blank; } rankdir='TB'; rank= same; blank -> tab3; tab4 -> blank2 [arrowhead = none,label=' Data wrangling and normalization process',fontsize = 8]; blank2 -> tab5 blank2 -> tab6 blank3 -> blank6 [arrowhead= none, label=' ⟖'] blank5 -> blank6b [arrowhead= none, label=' ⟖'] subgraph { rankdir='BT'; rank= same; tab7 -> blank6 [dir=back]; } blank6b -> tab7b subgraph { rank = same; tab7b; blank6; blank6b; tab7; } rank = same; blank6 -> tab8; rank = same; blank6b -> tab9; subgraph { rank = same; tab5; blank2; } subgraph { rank= same; tab3 -> blank3 -> blank4 -> blank5 -> tab6 [arrowhead= none] } } subgraph { rank = same; tab3; tab6; } subgraph { rank = same; tab1; tab4; } subgraph { rank = same; tab2; tab5; } subgraph { rank = same; tab1; tab3; rankdir=TB rank=same } subgraph { rank = same; tab4; tab6; rankdir=TB rank=same } [1]: tab1_lab_aft_d [2]: tab2_lab_aft_d [3]: tab3_lab_aft_d [4]: tab4_lab_aft_d [5]: tab5_lab_aft_d [6]: tab6_lab_aft_d [7]: tab7_lab_aft_d [8]: tab7b_lab_aft_d [9]: tab9_merged [10]: tab10_merged ", width = 1200, height = 900) DPI = 1200 WidthCM = 11 HeightCM = 8 sysfonts::font_add(family = "Rooney Sans", regular = paste0(sub("2019 \\(github\\)/SUD_CL","2022 \\(github\\)",path),"/_style/RooneySansRegular.otf")) showtext::showtext_begin() plot_merge_flowchart_after_dates %>% export_svg %>% charToRaw %>% rsvg_pdf(paste0(sub("2019 \\(github\\)/SUD_CL","2022 \\(github\\)",path),"/_figs/_flowchart_merge3.pdf")) plot_merge_flowchart_after_dates %>% export_svg()%>%charToRaw %>% rsvg(width = WidthCM *(DPI/2.54), height = HeightCM *(DPI/2.54)) %>% png::writePNG(paste0(sub("2019 \\(github\\)/SUD_CL","2022 \\(github\\)",path),"/_figs/_flowchart_merge_wo_fmt3.png")) htmlwidgets::saveWidget(plot_merge_flowchart_after_dates, paste0(sub("2019 \\(github\\)/SUD_CL","2022 \\(github\\)",path),"/_figs/_flowchart_merge_222_3.html")) webshot::webshot(paste0(sub("2019 \\(github\\)/SUD_CL","2022 \\(github\\)",path),"/_figs/_flowchart_merge_222_3.html"), paste0(sub("2019 \\(github\\)/SUD_CL","2022 \\(github\\)",path),"/_figs/_flowchart_merge_formatted_3.png"),vwidth = 1200, vheight = 900, zoom = 2) ```
# Label ```{r change-labels,echo=T, paged.print=TRUE, eval=T} vars_desc <- c("age_offending_imp", "edad_al_egres_imp", "fech_nac_rec", "motivodeegreso_mod_imp_rec") list_codebook <- list( hash_key = "Hash (ID)", edad_al_ing_1 = "Age (admission to treatment)", edad_ini_cons = "Age of Onset of Substance Use", # I added it dias_treat_imp_sin_na_1 = "Days in treatment", sex = "Sex", escolaridad_rec = "Educational Attainment", sus_principal_mod = "Primary Substance (admission to treatment)", freq_cons_sus_prin = "Substance use frequency (primary substance)", compromiso_biopsicosocial = "Bio-psychosocial status", tenencia_de_la_vivienda_mod = "Housing situation (tenure status of households)", dg_cie_10_rec = "Comorbidity (ICD-10)", dg_trs_cons_sus_or = "SUD Severity (Dependence status)", macrozona = "Macro administrative Chilean zone", policonsumo = "Co-occurring SUD", tr_modality = "Treatment Modality", #Added in 2022-11-11 tipo_centro = "Center ID", condicion_ocupacional_cor = "Occupational Status Corrected", origen_ingreso_mod = "Motive of Admission to Treatment", numero_de_hijos_mod = "Number of Children ", motivodeegreso_mod_imp_rec = "Baseline treatment status", edad_al_ing_1 = "Age at admission (mod)", fech_egres_imp = "Date of discharge", n_prev_off = "Pre-treatment Criminality (Dich.)", n_off_vio = "Violent crime, Pre-treatment Criminality", n_off_acq = "Acquisitive crime, Pre-treatment Criminality", n_off_sud = "Substance use-related crime, Pre-treatment Criminality", n_off_oth = "Other crime, Pre-treatment Criminality", age_offending_imp = "Age when the offense was committed", edad_al_egres_imp = "Age at discharge (mod)", fech_nac_rec = "Corrected date of birth", motivodeegreso_mod_imp_rec = "Treatment status", time_to_off_from_adm = "Time-to-offense from Admission", time_to_off_from_disch = "Time-to-offense from Discharge", id_centro = "Treatment center ID", comuna_residencia_cod = "Municipality or District of Residence", fech_ing_num_1 = "Date of admission", #added cut_com_del = "Binned comission date", cut_fec_nac = "Binned birth date" ) codebook::var_label(Base_fiscalia_v13)<- list_codebook codebook::var_label(Base_fiscalia_v13_pris)<- list_codebook ``` # Session Info ```{r session-info, echo=T, error=T, message=TRUE, paged.print=TRUE} message(Sys.getenv("R_LIBS_USER")) Sys.Date() message(paste0("Editor context: ", path)) if (grepl("CISS Fondecyt",rstudioapi::getSourceEditorContext()$path)==T){ save.image("C:/Users/CISS Fondecyt/Mi unidad/Alvacast/SISTRAT 2022 (github)/14.RData") } else if (grepl("andre",rstudioapi::getSourceEditorContext()$path)==T){ save.image("C:/Users/andre/Desktop/SUD_CL/14.RData") } else if (grepl("E:",rstudioapi::getSourceEditorContext()$path)==T){ save.image("E:/Mi unidad/Alvacast/SISTRAT 2022 (github)/14.RData") } else { save.image(paste0(sub("2019","2022",sub("SUD_CL","",path)),"14.RData")) } sesion_info <- devtools::session_info() dplyr::select( tibble::as_tibble(sesion_info$packages), c(package, loadedversion, source) ) %>% DT::datatable(filter = 'top', colnames = c('Row number' =1,'Variable' = 2, 'Percentage'= 3), caption = htmltools::tags$caption( style = 'caption-side: top; text-align: left;', '', htmltools::em('Packages')), options=list( initComplete = htmlwidgets::JS( "function(settings, json) {", "$(this.api().tables().body()).css({ 'font-family': 'Helvetica Neue', 'font-size': '50%', 'code-inline-font-size': '15%', 'white-space': 'nowrap', 'line-height': '0.75em', 'min-height': '0.5em' });",#; "}"))) ``` # Export ```{r export, warning=FALSE, echo=T, error=T, eval=T} subset(Base_fiscalia_v13, select=c("hash_key", cont_vars_desc, cat_vars_desc, cat_vars_desc_off, vars_desc, "id_centro", "comuna_residencia_cod", "fech_ing_num_1", "fech_egres_imp", "cut_com_del", "cut_fec_nac","offender_d")) %>% dplyr::arrange(hash_key, fech_ing_num_1) %>% data.table::data.table() %>% rio::export(file = paste0("fiscalia_mariel_oct_2022_match_SENDA.dta")) subset(Base_fiscalia_v13_pris, select=c("hash_key", cont_vars_desc, cat_vars_desc, cat_vars_desc_off, vars_desc, "id_centro", "comuna_residencia_cod", "fech_ing_num_1", "fech_egres_imp", "cut_com_del", "cut_fec_nac","offender_d")) %>% dplyr::arrange(hash_key, fech_ing_num_1) %>% data.table::data.table() %>% rio::export(file = paste0("fiscalia_mariel_oct_2022_match_SENDA_pris.dta")) ``` ::: controlly ```{r label_to_stata, echo=T, paged.print=TRUE} #put name of the file file<- "fiscalia_mariel_oct_2022_match_SENDA.dta" export_lab_stata_merge<- tibble::rownames_to_column(data.frame(Hmisc::label(dplyr::select(Base_fiscalia_v13, all_of(c("hash_key", cont_vars_desc, cat_vars_desc, cat_vars_desc_off, vars_desc, "fech_ing_num_1", "fech_egres_imp", "cut_com_del", "cut_fec_nac","offender_d"))))))%>% data.frame() %>% dplyr::rename("code" = !!names(.[1]), "label" = !!names(.[2]))%>% data.frame()%>% dplyr::mutate(first= "cap noi label variable")%>% dplyr::mutate(final= paste0(first, " ",code,' "',label,'"'))%>% dplyr::select(-code,-label,-first)%>% dplyr::rename("*clear all"="final") %>% rbind(paste0('cap noi save "', gsub('/', '\\', path, fixed=T),'\\',file,'", replace'))%>% rbind(paste0('cap noi save "', gsub('/', '\\', path, fixed=T),'\\',file,'", replace')) rbind(paste0('cap noi use "', gsub('/', '\\', path, fixed=T),'\\',file,'", clear'),export_lab_stata_merge) %>% knitr::kable("markdown") write.table(rbind(paste0('cap noi use "', gsub('/', '\\', path, fixed=T),'\\',file,'", clear'),export_lab_stata_merge), file = paste0(path,"/_label_var_to_stata.do"), sep = "",row.names = FALSE, quote = FALSE, fileEncoding="UTF-8") ``` :::
```{stata 2, collectcode=F, include=T, error=T, cleanlog=F} *should be in the same folder of the .Rmd to work cap noi do _label_var_to_stata.do ```