######################################################################### # # R PROGRAMS TO EXTRACT INJURY DATA # FROM TQIP PUF (FORMERLY NTDB) AND FROM NIS # AND ANALYZE ROCMAX OPTIONS FOR ICDPIC-R # # PART A: EXTRACT DATA # # This part mostly translated from Stata used in earlier version # David Clark, 2019-2021 # # Version from 200402 modified to not distinguish open fractures # in ICD10base table # Version from 210102 modifed to create unique identifier # for NIS data # ########################################################################## #1 #CLEAR WORKSPACE, SET WORKING DIRECTORY, #LOAD REQUIRED PACKAGES, IF NOT LOADED ALREADY rm(list=ls()) setwd("Y:/Data_Event/NTDB/Analytic_Steps") require(tidyverse) require(skimr) require(janitor) require(broom) require(pROC) require(lme4) require(ggplot2) #2a #GET TQIP DATA #Import 2017 TQIP data and identify ER or inpatient mortality d0<-read_csv("Y:/Data_Event/NTDB/Original_Source/PUF AY 2017/CSV/PUF_TRAUMA.csv") d1<-rename(d0,INC_KEY=inc_key,ISSAIS=ISS_05) #Demonstrate that INC_KEY is unique identifier d1<-group_by(d1,INC_KEY) d1<-mutate(d1,seq=row_number()) d1<-ungroup(d1) tabyl(d1,seq) #Note "DEATHINED" means no Vital Signs on ED admission, but some do survive tabyl(d1,HOSPDISCHARGEDISPOSITION,DEATHINED) tabyl(d1,HOSPDISCHARGEDISPOSITION_BIU,DEATHINED) #Only counted as "died" if discharge dispositon is explicitly dead # or explicitly missing after having no vital signs on ED admission d2<-mutate(d1,died=case_when( HOSPDISCHARGEDISPOSITION==5 ~ 1, (DEATHINED==1 & HOSPDISCHARGEDISPOSITION_BIU==1) ~ 1, TRUE ~ 0 )) tabyl(d2,died,DEATHINED) d3<-select(d2,INC_KEY,died,ISSAIS) write_csv(d3,"tqipmort2017.csv") #Obtain ICD-10 codes for each TQIP patient d0<-read_csv("Y:/Data_Event/NTDB/Original_Source/PUF AY 2017/CSV/PUF_ICDDIAGNOSIS.csv") d1<-rename(d0,INC_KEY=inc_key,icdcm=ICDDIAGNOSISCODE) #Identify duplicates d2<-group_by(d1,INC_KEY,icdcm) d2<-mutate(d2,dxseq=row_number()) d2<-mutate(d2,dxrep=max(dxseq)) d2<-ungroup(d2) d2test<-filter(d2,dxrep>1) d2test<-arrange(d2test,INC_KEY,icdcm,dxseq,dxrep) #View(d2test) #Drop duplicates d3<-filter(d2,dxseq==1) d3<-select(d3,INC_KEY,icdcm) d3<-arrange(d3,INC_KEY,icdcm) write_csv(d3,"tqipdcode2017.csv") #Merge TQIP patient diagnoses and mortality, save result d1<-read_csv("tqipdcode2017.csv") d2<-read_csv("tqipmort2017.csv") d3<-full_join(d1,d2,by="INC_KEY") write_csv(d3,"tqipmerged2017.csv") #2b #GET NIS DATA #Import previously extracted 2016 NIS cases with any diagnosis starting with S or T #This pre-extraction was done using Stata, but is surely possible in R as well d0<-read_csv("y:/Data_Event/HCUP_NIS/Converted_Files/core2016ST.csv") #See if KEY_NIS is unique d0<-group_by(d0,KEY_NIS) d0<-mutate(d0,seq=row_number()) d0<-ungroup(d0) tabyl(d0,seq) #It is NOT, so create a new unique identifying number d0<-mutate(d0,INC_KEY=10000000+row_number()) d0<-group_by(d0,INC_KEY) d0<-mutate(d0,seq=row_number()) d0<-ungroup(d0) tabyl(d0,seq) #Include only admissions that were not elective and originated in Emergency Department tabyl(d0,ELECTIVE,HCUP_ED) d0<-filter(d0,ELECTIVE==0,HCUP_ED!=0) tabyl(d0,ELECTIVE,HCUP_ED) #Restrict to variables of interest #Rename mechanism of injury variables d1<-select(d0,INC_KEY,DIED,I10_DX1,I10_DX2,I10_DX3,I10_DX4,I10_DX5,I10_DX6, I10_DX7,I10_DX8,I10_DX9,I10_DX10,I10_DX10,I10_DX12,I10_DX13,I10_DX14, I10_DX15,I10_DX16,I10_DX17,I10_DX18,I10_DX19,I10_DX20,I10_DX21,I10_DX22, I10_DX23,I10_DX24,I10_DX25,I10_DX26,I10_DX27,I10_DX28,I10_DX29,I10_DX30, I10_ECAUSE1,I10_ECAUSE2,I10_ECAUSE3,I10_ECAUSE4) d1<-rename(d1,I10_DX31=I10_ECAUSE1) d1<-rename(d1,I10_DX32=I10_ECAUSE2) d1<-rename(d1,I10_DX33=I10_ECAUSE3) d1<-rename(d1,I10_DX34=I10_ECAUSE4) #Reshape to "long" format d2<-gather(d1,I10_DX1,I10_DX2,I10_DX3,I10_DX4,I10_DX5,I10_DX6, I10_DX7,I10_DX8,I10_DX9,I10_DX10,I10_DX10,I10_DX12,I10_DX13,I10_DX14, I10_DX15,I10_DX16,I10_DX17,I10_DX18,I10_DX19,I10_DX20,I10_DX21,I10_DX22, I10_DX23,I10_DX24,I10_DX25,I10_DX26,I10_DX27,I10_DX28,I10_DX29,I10_DX30, I10_DX31,I10_DX32,I10_DX33,I10_DX34,key="original",value="rawdx") #Reformat ICD10 codes to have a decimal point in the 4th position #Rename key field d3<-mutate(d2,dxpart1=str_sub(rawdx,1,3)) d3<-mutate(d3,dxpart2=str_sub(rawdx,4,7)) d3<-mutate(d3,icdcm=str_c(dxpart1,".",dxpart2)) d3<-rename(d3,died=DIED) #Identify duplicates d3a<-group_by(d3,INC_KEY,icdcm) d3a<-mutate(d3a,dxseq=row_number()) d3a<-mutate(d3a,dxrep=max(dxseq)) d3a<-ungroup(d3a) d3atest<-filter(d3a,dxrep>1) d3atest<-arrange(d3atest,INC_KEY,icdcm,dxseq,dxrep) #View(d3atest) #Drop duplicates (mostly NA) d3b<-filter(d3a,dxseq==1) d3b<-select(d3b,INC_KEY,icdcm,died) d3b<-arrange(d3b,INC_KEY,icdcm) #Save result write_csv(d3b,"nisreshaped2016.csv") #3a #PREPARE TQIP OR NIS DATA FOR ANALYSIS #d0<-read_csv("tqipmerged2017.csv") d0<-read_csv("nisreshaped2016.csv") #Restrict to cases with at least one valid ICD10CM Anatomic Injury Code # as defined by National Trauma Data Standard #This includes fracture codes ending in B or C (denoting open fractures) d4<-mutate(d0,validcode=case_when( str_sub(icdcm,1,1)=="S" & str_sub(icdcm,8,8)=="A" ~ 1, str_sub(icdcm,1,1)=="S" & str_sub(icdcm,8,8)=="B" ~ 1, str_sub(icdcm,1,1)=="S" & str_sub(icdcm,8,8)=="C" ~ 1, str_sub(icdcm,1,1)=="T" & str_sub(icdcm,2,3)=="07" ~ 1, str_sub(icdcm,1,1)=="T" & str_sub(icdcm,2,3)=="14" ~ 1, str_sub(icdcm,1,1)=="T" & str_sub(icdcm,2,2)=="2" & str_sub(icdcm,3,3)!="9" & str_sub(icdcm,8,8)=="A" ~ 1, str_sub(icdcm,1,1)=="T" & str_sub(icdcm,2,3)=="30" ~ 1, str_sub(icdcm,1,1)=="T" & str_sub(icdcm,2,3)=="31" ~ 1, str_sub(icdcm,1,1)=="T" & str_sub(icdcm,2,3)=="32" ~ 1, str_sub(icdcm,1,5)=="T79.A" & str_sub(icdcm,8,8)=="A" ~ 1, TRUE ~ 0 ) ) #Inspect whether valid codes properly identified d4test<-group_by(d4,icdcm) d4test<-mutate(d4test,dxseq=row_number()) d4test<-filter(d4test,dxseq==1) d4test<-ungroup(d4test) d4test<-arrange(d4test,icdcm) #View(d4test) #Identify valid cases (with at least one valid code, and died==0 or 1) # and discard invalid cases and codes d5<-group_by(d4,INC_KEY) d5<-mutate(d5,idseq=row_number()) d5<-mutate(d5,validcase=max(validcode)) d5<-ungroup(d5) d5test<-filter(d5,idseq==1) tabyl(d5test,validcase) d6<-filter(d5,validcase==1,validcode==1) d6<-filter(d6,died>=0) #Reassign a few codes with explicit outcome (survival or death) # Replacing "6", "7", or "8" in position 7 with "9" d7<-mutate(d6,explicit=case_when( str_sub(icdcm,1,3)=="S06" & str_sub(icdcm,7,7)=="6" ~ 1, str_sub(icdcm,1,3)=="S06" & str_sub(icdcm,7,7)=="7" ~ 1, str_sub(icdcm,1,3)=="S06" & str_sub(icdcm,7,7)=="8" ~ 1, TRUE ~ 0 ) ) d7<-mutate(d7,icdcm=ifelse(explicit, str_c(str_sub(icdcm,1,6),"9A"), icdcm)) #Identfy duplicates created by above reassignment d8<-group_by(d7,INC_KEY,icdcm) d8<-mutate(d8,dxseq=row_number()) d8<-mutate(d8,dxrep=max(dxseq)) d8<-ungroup(d8) tabyl(d8,dxseq) d8test<-filter(d8,dxrep>1) d8test<-arrange(d8test,INC_KEY,icdcm,dxseq,dxrep) #View(d8test) #Drop these duplicates d9<-filter(d8,dxseq==1) #Add body region for each valid diagnosis d10<-mutate(d9,issbr=case_when( str_sub(icdcm,1,3)=="S00" & str_sub(icdcm,5,5)=="1" ~ "Face", str_sub(icdcm,1,3)=="S00" & str_sub(icdcm,5,5)=="2" ~ "Face", str_sub(icdcm,1,3)=="S00" & str_sub(icdcm,5,5)=="3" ~ "Face", str_sub(icdcm,1,3)=="S00" & str_sub(icdcm,5,5)=="4" ~ "Face", str_sub(icdcm,1,3)=="S00" & str_sub(icdcm,5,5)=="5" ~ "Face", str_sub(icdcm,1,3)=="S01" & str_sub(icdcm,5,5)=="1" ~ "Face", str_sub(icdcm,1,3)=="S01" & str_sub(icdcm,5,5)=="2" ~ "Face", str_sub(icdcm,1,3)=="S01" & str_sub(icdcm,5,5)=="3" ~ "Face", str_sub(icdcm,1,3)=="S01" & str_sub(icdcm,5,5)=="4" ~ "Face", str_sub(icdcm,1,3)=="S01" & str_sub(icdcm,5,5)=="5" ~ "Face", str_sub(icdcm,1,3)=="S02" & str_sub(icdcm,5,5)=="1" ~ "Face", str_sub(icdcm,1,3)=="S02" & str_sub(icdcm,5,5)=="2" ~ "Face", str_sub(icdcm,1,3)=="S02" & str_sub(icdcm,5,5)=="3" ~ "Face", str_sub(icdcm,1,3)=="S02" & str_sub(icdcm,5,5)=="4" ~ "Face", str_sub(icdcm,1,3)=="S02" & str_sub(icdcm,5,5)=="5" ~ "Face", str_sub(icdcm,1,3)=="S02" & str_sub(icdcm,5,5)=="6" ~ "Face", str_sub(icdcm,1,3)=="S03" & str_sub(icdcm,5,5)=="0" ~ "Face", str_sub(icdcm,1,3)=="S03" & str_sub(icdcm,5,5)=="1" ~ "Face", str_sub(icdcm,1,3)=="S03" & str_sub(icdcm,5,5)=="2" ~ "Face", str_sub(icdcm,1,3)=="S03" & str_sub(icdcm,5,5)=="3" ~ "Face", str_sub(icdcm,1,3)=="S03" & str_sub(icdcm,5,5)=="4" ~ "Face", str_sub(icdcm,1,3)=="S03" & str_sub(icdcm,5,5)=="5" ~ "Face", str_sub(icdcm,1,3)=="S05" ~ "Face", str_sub(icdcm,1,5)=="S07.0" ~ "Face", str_sub(icdcm,1,5)=="S08.1" ~ "Face", str_sub(icdcm,1,5)=="S09.2" ~ "Face", str_sub(icdcm,1,2)=="S0" ~ "Head/Neck", str_sub(icdcm,1,2)=="S1" ~ "Head/Neck", str_sub(icdcm,1,2)=="S2" ~ "Chest", str_sub(icdcm,1,2)=="S3" ~ "Abdomen", str_sub(icdcm,1,2)=="S4" ~ "Extremities", str_sub(icdcm,1,2)=="S5" ~ "Extremities", str_sub(icdcm,1,2)=="S6" ~ "Extremities", str_sub(icdcm,1,2)=="S7" ~ "Extremities", str_sub(icdcm,1,2)=="S8" ~ "Extremities", str_sub(icdcm,1,2)=="S9" ~ "Extremities", str_sub(icdcm,1,5)=="T24.2" ~ "Extremities", str_sub(icdcm,1,5)=="T79.A" ~ "Extremities", str_sub(icdcm,1,1)=="T" ~ "General", TRUE ~ "None" ) ) tabyl(d10,issbr) #Discard temporary analytic variables, and save results d11<-select(d10,-validcode,-idseq,-validcase,-explicit,-dxseq,-dxrep) #write_csv(d11,"tqip2017cm.csv") write_csv(d11,"nis2016cm.csv") #3b #PREPARE A SIMILAR FILE WITH BASIC ICD-10 CODES #(Used for death certificates in USA and for clinical data in other countries) rm(list=ls()) #d1<-read_csv("tqip2017cm.csv") d1<-read_csv("nis2016cm.csv") d2<-mutate(d1,icdbase=str_sub(icdcm,1,5)) #(Earlier version added extra digit to denote open fractures. Not done here.) #Identfy duplicates created by collapsing diagnoses above d3<-group_by(d2,INC_KEY,icdbase) d3<-mutate(d3,dxseq=row_number()) d3<-mutate(d3,dxrep=max(dxseq)) d3<-ungroup(d3) tabyl(d3,dxseq) d3test<-filter(d3,dxrep>1) d3test<-arrange(d3test,INC_KEY,icdcm,icdbase,dxseq,dxrep) #View(d3test) #Drop these duplicates d4<-filter(d3,dxseq==1) d4test<-filter(d4,str_length(icdcm)==3) #View(d4test) #Drop temporary analytic variables, and save result d4<-select(d4,-icdcm,-dxseq,-dxrep) # write_csv(d4,"tqip2017base.csv") write_csv(d4,"nis2016base.csv")