####################################################################### # # R PROGRAMS TO EXTRACT INJURY DATA # FROM TQIP PUF (FORMERLY NTDB) AND FROM NIS # AND ANALYZE ROCMAX OPTIONS FOR ICDPIC-R # # PART C: FROM EFFECTS OF INJURY SEVERITY, ASSIGN OPTIMAL AIS CUTOFFS # CREATE TABLE TO USE IN ICDPIC-R # David Clark, January 2021 # ######################################################################## #1 #CLEAR WORKSPACE, SET WORKING DIRECTORY, #LOAD REQUIRED PACKAGES, IF NOT LOADED ALREADY rm(list=ls()) setwd("Y:/Data_Event/NTDB/Analytic_Steps") require(tidyverse) require(skimr) require(janitor) require(broom) require(pROC) require(lme4) require(ggplot2) require(glmnet) #2 #IMPORT DATA WITH MAXIMUM EFFECT IN EACH BODY REGION FOR EACH PERSON #IMPLEMENT "GREEDY" ALGORITHM TO SEEK OPTIMAL CUTPOINTS FOR SUM OF SQUARES # d1<-read_csv("Y:/Data_Event/NTDB/Analytic_Steps/tqip2017cm_effects.csv") d1<-read_csv("Y:/Data_Event/NTDB/Analytic_Steps/nis2016cm_effects.csv") # d1<-read_csv("Y:/Data_Event/NTDB/Analytic_Steps/tqip2017base_effects.csv") # d1<-read_csv("Y:/Data_Event/NTDB/Analytic_Steps/nis2016base_effects.csv") d3<-filter(d1,idseq==1) #Define a function to return a triangular random variate # with range a to b and mode m. trirandom<-function(a,b,m) { k=(m-a)/(b-a) ru01=runif(1) rtri01k=if_else(ru01<=k,sqrt(k*ru01),(1-sqrt((1-k)*(1-ru01)))) rtriabm=a+(b-a)*rtri01k return(rtriabm) } #Define a function to return a beta random variate # with range a to b, mode m, and "convexity" c (>1 is more peaked). betarandom<-function(a,b,m,c) { k=(m-a)/(b-a) a2=c a1=a2*(k/(1-k)) rbeta01k=rbeta(1,a1,a2) rbetaabm=a+(b-a)*rbeta01k return(rbetaabm) } d4<-d3 sink("x200225.txt",split=T) starttime=Sys.time() cut12=-0.0795 cut23=.157 cut34=.501 cut45=.75 bestroc=.5 i=0 while(i<1000) { i=i+1 convexity=floor(i/100)+1 old12=cut12 old23=cut23 old34=cut34 old45=cut45 if(i%%4==1) { cut12=trirandom(-4,cut23,cut12) } if(i%%4==2) { cut23=trirandom(cut12,cut34,cut23) } if(i%%4==3) { cut34=trirandom(cut23,cut45,cut34) } if(i%%4==0) { cut45=trirandom(cut34,4,cut45) } d4<-mutate(d4,a=case_when( amax>=-9 & amax=cut12 & amax=cut23 & amax=cut34 & amax=cut45 ~ 5, TRUE ~ 0 ) ) d4<-mutate(d4,c=case_when( cmax>=-9 & cmax=cut12 & cmax=cut23 & cmax=cut34 & cmax=cut45 ~ 5, TRUE ~ 0 ) ) d4<-mutate(d4,e=case_when( emax>=-9 & emax=cut12 & emax=cut23 & emax=cut34 & emax=cut45 ~ 5, TRUE ~ 0 ) ) d4<-mutate(d4,f=case_when( fmax>=-9 & fmax=cut12 & fmax=cut23 & fmax=cut34 & fmax=cut45 ~ 5, TRUE ~ 0 ) ) d4<-mutate(d4,h=case_when( hmax>=-9 & hmax=cut12 & hmax=cut23 & hmax=cut34 & hmax=cut45 ~ 5, TRUE ~ 0 ) ) d4<-mutate(d4,s=case_when( smax>=-9 & smax=cut12 & smax=cut23 & smax=cut34 & smax=cut45 ~ 5, TRUE ~ 0 ) ) d4<-mutate(d4,a2=a^2) d4<-mutate(d4,c2=c^2) d4<-mutate(d4,e2=e^2) d4<-mutate(d4,f2=f^2) d4<-mutate(d4,h2=h^2) d4<-mutate(d4,s2=s^2) d4<-mutate(d4,sumsquares=a2+c2+e2+f2+h2+s2) testroc<-roc(d4$died,d4$sumsquares,quiet=T) newroc=as.numeric(auc(testroc)) if(newroc<=bestroc) { cut12=old12 cut23=old23 cut34=old34 cut45=old45 } if(newroc>bestroc){ print(c("i:",i,"Cuts:",format(cut12,digits=3),format(cut23,digits=3),format(cut34,digits=3), format(cut45,digits=3),"AUC:",format(newroc,digits=4))) bestroc=newroc } if(i%%100==0){ print(c("i:",i)) } } #while endtime=Sys.time() endtime-starttime sink(NULL) #Best for TQIPcm: .165, .335, .750, 1.26: R-squared Sum Squares = .8560, ISS = .8567 #Best for NIScm: -.0694, .119, .418, .877; R-squared sum squares = .7505, ISS = .7510 #Best for TQIPbase: .241, .423, .766, 1.25; R-squared sum squares = .8445, ISS = .8403 #Best for NISbase: -.0817, .162, .502, .75; R-squared sum squares = .7320, ISS = .7325 #3 #FOR EACH DATABASE, VERIFY SUM OF SQUARES AND CALCULATE ISS cut12=-.0694 cut23=.119 cut34=.418 cut45=.877 d5<-mutate(d3,a=case_when( amax>-9 & amax=cut12 & amax=cut23 & amax=cut34 & amax=cut45 ~ 5, TRUE ~ 0 ) ) d5<-mutate(d5,c=case_when( cmax>-9 & cmax=cut12 & cmax=cut23 & cmax=cut34 & cmax=cut45 ~ 5, TRUE ~ 0 ) ) d5<-mutate(d5,e=case_when( emax>-9 & emax=cut12 & emax=cut23 & emax=cut34 & emax=cut45 ~ 5, TRUE ~ 0 ) ) d5<-mutate(d5,f=case_when( fmax>-9 & fmax=cut12 & fmax=cut23 & fmax=cut34 & fmax=cut45 ~ 5, TRUE ~ 0 ) ) d5<-mutate(d5,h=case_when( hmax>-9 & hmax=cut12 & hmax=cut23 & hmax=cut34 & hmax=cut45 ~ 5, TRUE ~ 0 ) ) d5<-mutate(d5,s=case_when( smax>-9 & smax=cut12 & smax=cut23 & smax=cut34 & smax=cut45 ~ 5, TRUE ~ 0 ) ) d5<-mutate(d5,a2=a^2) d5<-mutate(d5,c2=c^2) d5<-mutate(d5,e2=e^2) d5<-mutate(d5,f2=f^2) d5<-mutate(d5,h2=h^2) d5<-mutate(d5,s2=s^2) d5<-mutate(d5,sumsquares=a2+c2+e2+f2+h2+s2) skim(d5,a,c,e,f,h,s,sumsquares) roc(d5$died,d5$sumsquares) tabyl(d5,sumsquares) %>% adorn_pct_formatting(digits=2) #Calculate actual ISS d6<-gather(d5,a2,c2,e2,f2,h2,s2,key="reg",value="regmax") d6<-group_by(d6,INC_KEY) d6<-arrange(d6,desc(regmax)) d6<-mutate(d6,regorder=row_number()) d6<-mutate(d6,xiss=ifelse(regorder<=3,cumsum(regmax),0)) d6<-mutate(d6,riss=max(xiss)) d6<-ungroup(d6) d6<-filter(d6,regorder==1) d6<-select(d6,-reg,-regmax,-regorder,-xiss) #Compare results to ISS in registry data d7<-mutate(d6,risscat=case_when( riss>0 & riss<=8 ~ "01-08", riss>8 & riss<=15 ~ "09-15", riss>15 & riss<=24 ~ "16-24", riss>24 & riss<=40 ~ "25-40", riss>40 & riss<=49 ~ "41-49", riss>49 & riss<=75 ~ "50-75", TRUE ~ "Unk" ) ) d7<-mutate(d7,zisscat=case_when( ISSAIS>0 & ISSAIS<=8 ~ "01-08", ISSAIS>8 & ISSAIS<=15 ~ "09-15", ISSAIS>15 & ISSAIS<=24 ~ "16-24", ISSAIS>24 & ISSAIS<=40 ~ "25-40", ISSAIS>40 & ISSAIS<=49 ~ "41-49", ISSAIS>49 & ISSAIS<=75 ~ "50-75", TRUE ~ "Unk" ) ) d7<-mutate(d7,ISSAIS=ifelse(ISSAIS==-2,NaN,ISSAIS)) roc(d7$died,d7$ISSAIS) roc(d7$died,d7$riss) t<-tabyl(d7,zisscat,died) t2<-adorn_percentages(t,"row") t3<-adorn_pct_formatting(t2, digits=2) t4<-adorn_ns(t3,"front") t4 t<-tabyl(d7,risscat,died) t2<-adorn_percentages(t,"row") t3<-adorn_pct_formatting(t2, digits=2) t4<-adorn_ns(t3,"front") t4 t5<-tabyl(d7,zisscat,risscat) t5 t6<-adorn_percentages(t5,"all") t7<-adorn_pct_formatting(t6, digits=2) t8<-adorn_ns(t7,"front") t8 ################################################################################## #4 #CREATE TABLE OF DIAGNOSIS CODES WITH EFFECTS AND AIS SCORES # Use optimal AIS cutpoints for each source, as determined above rm(list=ls()) # d1<-read_csv("tqip2017cm_effects.csv") d1<-read_csv("nis2016cm_effects.csv") # d1<-read_csv("tqip2017base_effects.csv") # d1<-read_csv("nis2016base_effects.csv") #Identify and remove duplicates, drop unnecessary variables d2<-group_by(d1,dx) d2<-mutate(d2,dxseq=row_number()) d2<-mutate(d2,dxrep=max(dxseq)) d2<-ungroup(d2) d3<-filter(d2,dxseq==1) d3<-arrange(d3,dx) d3<-select(d3,dx,issbr,effect,ridge_int,dxrep) #################################################### # ENTER APPROPRIATE CUTPOINTS AND ASSIGN AIS VALUES #################################################### #Best for TQIPcm: .165, .335, .750, 1.26: R-squared Sum Squares = .8560, ISS = .8567 #Best for NIScm: -.0694, .119, .418, .877; R-squared sum squares = .7505, ISS = .7510 #Best for TQIPbase: .241, .423, .766, 1.25; R-squared sum squares = .8445, ISS = .8403 #Best for NISbase: -.0817, .162, .502, .75; R-squared sum squares = .7320, ISS = .7325 cut12= -.0694 cut23=0.119 cut34=0.418 cut45=0.877 #Assign AIS values d4<-mutate(d3,ais=case_when( effect>-99 & effect=cut12 & effect=cut23 & effect=cut34 & effect=cut45 ~ 5, TRUE ~ 0 ) ) ######################################### # ONLY RUN THE APPROPRIATE SECTION BELOW ######################################### #Rename diagnosis code type as appropriate and save results #####FOR TQIP CM d5<-rename(d4,icdcm=dx) d5<-rename(d5,TQIPeffect=effect) d5<-rename(d5,TQIPint=ridge_int) d5<-rename(d5,TQIPais=ais) d5<-rename(d5,TQIPbr=issbr) #Add back codes that prematurely specify mortality outcome #Use results from codes modified in Section A3a above d5temp<-filter(d5,str_sub(icdcm,1,3)=="S06" & str_sub(icdcm,7,7)=="9") d5temp6<-mutate(d5temp,icdcm=str_c(str_sub(icdcm,1,6),"6A")) d5temp7<-mutate(d5temp,icdcm=str_c(str_sub(icdcm,1,6),"7A")) d5temp8<-mutate(d5temp,icdcm=str_c(str_sub(icdcm,1,6),"8A")) d5temp678<-bind_rows(list(d5temp6,d5temp7,d5temp8)) d6<-bind_rows(list(d5,d5temp678)) write_csv(d6,"tqip2017cm_ais.csv") #####FOR NIS CM d5<-rename(d4,icdcm=dx) d5<-rename(d5,NISeffect=effect) d5<-rename(d5,NISint=ridge_int) d5<-rename(d5,NISais=ais) d5<-rename(d5,NISn=dxrep) d5<-rename(d5,NISbr=issbr) #Add back codes that prematurely specify mortality outcome #Use results from codes modified in Section A3a above d5temp<-filter(d5,str_sub(icdcm,1,3)=="S06" & str_sub(icdcm,7,7)=="9") d5temp6<-mutate(d5temp,icdcm=str_c(str_sub(icdcm,1,6),"6A")) d5temp7<-mutate(d5temp,icdcm=str_c(str_sub(icdcm,1,6),"7A")) d5temp8<-mutate(d5temp,icdcm=str_c(str_sub(icdcm,1,6),"8A")) d5temp678<-bind_rows(list(d5temp6,d5temp7,d5temp8)) d6<-bind_rows(list(d5,d5temp678)) write_csv(d6,"nis2016cm_ais.csv") #####FOR TQIP BASE d5<-rename(d4,icdbase=dx) d5<-rename(d5,TQIPeffect=effect) d5<-rename(d5,TQIPint=ridge_int) d5<-rename(d5,TQIPais=ais) d5<-rename(d5,TQIPn=dxrep) d5<-rename(d5,TQIPbr=issbr) write_csv(d5,"tqip2017base_ais.csv") #####FOR NIS BASE d5<-rename(d4,icdbase=dx) d5<-rename(d5,NISeffect=effect) d5<-rename(d5,NISint=ridge_int) d5<-rename(d5,NISais=ais) d5<-rename(d5,NISn=dxrep) d5<-rename(d5,NISbr=issbr) write_csv(d5,"nis2016base_ais.csv") #5 #MERGE AIS TABLES #Create two working tables for ICDPIC-R rm(list=ls()) d1<-read_csv("tqip2017cm_ais.csv") d2<-read_csv("nis2016cm_ais.csv") d3<-full_join(d1,d2,by="icdcm") rm(list=ls()) d1<-read_csv("tqip2017base_ais.csv") d2<-read_csv("nis2016base_ais.csv") d3<-full_join(d1,d2,by="icdbase") #Compare TQIP and NIS results - quite different! tabyl(d3,TQIPais,NISais) tabyl(d3,TQIPbr,NISbr) #If AIS or Body Region missing in one database, borrow from the other d4<-mutate(d3,TQIPais_mod=if_else(is.na(TQIPais),NISais,TQIPais)) d4<-mutate(d4,NISais_mod=if_else(is.na(NISais),TQIPais,NISais)) d4<-mutate(d4,TQIPbr_mod=if_else(is.na(TQIPbr),NISbr,TQIPbr)) d4<-mutate(d4,NISbr_mod=if_else(is.na(NISbr),TQIPbr,NISbr)) tabyl(d4,TQIPais,NISais) tabyl(d4,TQIPais_mod,NISais_mod) tabyl(d4,TQIPbr_mod,NISbr_mod) #Sort and save results for application to ICDPIC-R d4<-arrange(d4,icdcm) write_csv(d4,"TQIP_NIS_ais_cm.csv") d4<-arrange(d4,icdbase) write_csv(d4,"TQIP_NIS_ais_base.csv") #6 #TEST WHAT WORKS BEST FOR OUT-OF-SAMPLE PREDICTION # d1<-read_csv("Y:/Data_Event/NTDB/Analytic_Steps/tqip2017cm_effects.csv") # d1<-read_csv("Y:/Data_Event/NTDB/Analytic_Steps/nis2016cm_effects.csv") # d1<-read_csv("Y:/Data_Event/NTDB/Analytic_Steps/tqip2017base_effects.csv") d1<-read_csv("Y:/Data_Event/NTDB/Analytic_Steps/nis2016base_effects.csv") d2<-read_csv("TQIP_NIS_ais_cm.csv") d2<-read_csv("TQIP_NIS_ais_base.csv") #Specify whether the data are ICD-10-CM or basic ICD-10 #d2<-rename(d2,dx=icdcm) d2<-rename(d2,dx=icdbase) d3<-full_join(d1,d2,by="dx") #Specify which reference database is to be used for calculations d3<-mutate(d3,ais=NISais) d3<-mutate(d3,Pint=NISint) d3<-mutate(d3,Peff=NISeffect) d3<-mutate(d3,issbr=NISbr_mod) #Obtain maximum AIS for each person and body region d4<-group_by(d3,INC_KEY,issbr) d4<-mutate(d4,a0=ifelse(issbr=="Abdomen",max(ais),0)) d4<-mutate(d4,c0=ifelse(issbr=="Chest",max(ais),0)) d4<-mutate(d4,e0=ifelse(issbr=="Extremities",max(ais),0)) d4<-mutate(d4,f0=ifelse(issbr=="Face",max(ais),0)) d4<-mutate(d4,h0=ifelse(issbr=="Head/Neck",max(ais),0)) d4<-mutate(d4,s0=ifelse(issbr=="General",max(ais),0)) d4<-ungroup(d4) d4<-ungroup(d4) d4<-group_by(d4,INC_KEY) d4<-mutate(d4,idseq=row_number()) d4<-mutate(d4,amax=max(a0)) d4<-mutate(d4,cmax=max(c0)) d4<-mutate(d4,emax=max(e0)) d4<-mutate(d4,fmax=max(f0)) d4<-mutate(d4,hmax=max(h0)) d4<-mutate(d4,smax=max(s0)) d4<-ungroup(d4) d4<-arrange(d4,INC_KEY,dx) #calculate ISS and ROC d5<-mutate(d4,a=amax) d5<-mutate(d5,c=cmax) d5<-mutate(d5,e=emax) d5<-mutate(d5,f=fmax) d5<-mutate(d5,h=hmax) d5<-mutate(d5,s=smax) d5<-select(d5,INC_KEY,dx,died,ais,a,c,e,f,h,s) d5<-mutate(d5,a2=a^2) d5<-mutate(d5,c2=c^2) d5<-mutate(d5,e2=e^2) d5<-mutate(d5,f2=f^2) d5<-mutate(d5,h2=h^2) d5<-mutate(d5,s2=s^2) d5<-select(d5,-a,-c,-e,-f,-h,-s) #Get one observation per person d5<-group_by(d5,INC_KEY) d5<-mutate(d5,idseq=row_number()) d5<-ungroup(d5) d5<-filter(d5,idseq==1) #Calculate ISS d6<-gather(d5,a2,c2,e2,f2,h2,s2,key="reg",value="regmax") d6<-group_by(d6,INC_KEY) d6<-arrange(d6,desc(regmax)) d6<-mutate(d6,regorder=row_number()) d6<-mutate(d6,xiss=ifelse(regorder<=3,cumsum(regmax),0)) d6<-mutate(d6,riss=max(xiss)) d6<-ungroup(d6) d6<-filter(d6,regorder==1) d6<-select(d6,-ais,-idseq,-reg,-regmax,-regorder,-xiss) d6<-arrange(d6,INC_KEY,dx) roc(d6$died,d6$riss) #Calculate Pmort d7<-group_by(d3,INC_KEY) d7<-mutate(d7,idseq=row_number()) d7<-mutate(d7,sumeff=sum(Peff)) d7<-ungroup(d7) d7<-filter(d7,idseq==1) d7<-mutate(d7,xb=sumeff+Pint) d7<-mutate(d7,Pmort=1/(1+exp(-xb))) roc(d7$died,d7$Pmort)