# # Script to run the CSR Model on data from the CAS Loss Reserve Database # Uses Stan for the MCMC run # by Glenn Meyers ####### Most of the script is in functions - They are called at the end. # rm(list = ls()) # clear workspace t0=Sys.time() # # some input # losstype="cpdloss" #"incloss" if incurred loss or "cpdloss" if paid loss # # get packages # library(rstan) rstan_options(auto_write = TRUE) options(mc.cores = parallel::detectCores()) library(loo) library(parallel) library(doParallel) scodeU = " data{ int len_data; real logprem[len_data]; real logloss[len_data]; int w[len_data]; int d[len_data]; } parameters{ real r_alpha[9]; real r_beta[9]; real logelr; real a_ig[10]; real gamma; } transformed parameters{ real alpha[10]; real beta[10]; real speedup[10]; real sig2[10]; real sig[10]; real mu[len_data]; alpha[1] = 0; for (i in 2:10) alpha[i] = r_alpha[i-1]; for (i in 1:9) beta[i] = r_beta[i]; beta[10] = 0; speedup[1] = 1; for (i in 2:10) speedup[i] = speedup[i-1]*(1-gamma); sig2[10] = gamma_cdf(1/a_ig[10],1,1); for (i in 1:9) sig2[10-i] = sig2[11-i]+gamma_cdf(1/a_ig[i],1,1); for (i in 1:10) sig[i] = sqrt(sig2[i]); for (i in 1:len_data){ mu[i] = logprem[i]+logelr+alpha[w[i]]+beta[d[i]]*speedup[w[i]]; } } model { r_alpha ~ normal(0,3.162); r_beta ~ normal(0,3.162); for (i in 1:10) a_ig[i] ~ inv_gamma(1,1); logelr ~ normal(-.4,3.162); gamma ~ normal(0,0.05); //gamma ~ normal(0,0.0005); for (i in 1:len_data) logloss[i] ~ normal(mu[i],sig[d[i]]); } generated quantities{ vector[len_data] log_lik; for (i in 1:len_data) log_lik[i] = normal_lpdf(logloss[i]|mu[i],sig[d[i]]); } " # # function to get Schedule P triangle data given ins group and line of business # ins.line.data=function(g.code){ b=subset(a,a$GRCODE==g.code) name=b$GRNAME grpcode=b$GRCODE w=b$AccidentYear d=b$DevelopmentLag cum_incloss=b[,6] cum_pdloss=b[,7] bulk_loss=b[,8] dir_premium=b[,9] ced_premium=b[,10] net_premium=b[,11] single=b[,12] posted_reserve97=b[,13] # get incremental paid losses - assume data is sorted by ay and lag inc_pdloss=numeric(0) for (i in unique(w)){ s=(w==i) pl=c(0,cum_pdloss[s]) ndev=length(pl)-1 il=rep(0,ndev) for (j in 1:ndev){ il[j]=pl[j+1]-pl[j] } inc_pdloss=c(inc_pdloss,il) } data.out=data.frame(grpcode,w,d,net_premium,dir_premium,ced_premium, cum_pdloss,cum_incloss,bulk_loss,inc_pdloss,single,posted_reserve97) return(data.out) } # # initialization function for scodeU # initU=function(chain_id){ set.seed(123+chain_id) list(r_alpha=rnorm(9,0,0.2),r_beta=runif(9),a_ig=runif(10), logelr=runif(1,-0.75,-0.5),gamma=rnorm(1,0,0.1)) } pars.list=c("alpha","beta","gamma","logelr","sig","log_lik") # # dummy data for compiling # data.dummy=list(len_data = 55, logprem = rep(8,55), logloss = rep(8,55), w = c(1:10,1:9,1:8,1:7,1:6,1:5,1:4,1:3,1,2,1), d = c(rep(1,10),rep(2,9),rep(3,8),rep(4,7),rep(5,6), rep(6,5),rep(7,4),rep(8,3),rep(9,2),10)) # # compile the univariate model # fitU = stan(model_code=scodeU,data=data.dummy,seed=123,init=initU,chains=0) # # set up function to run stan model and create output # model_function=function(grpcode){ # # read and aggregate the insurer data and # set up training and test data frames # cdata=ins.line.data(grpcode) w=cdata$w-1987 d=cdata$d # # sort the data in order of d, then w within d # o1=100*d+w o=order(o1) w=w[o] d=d[o] premium=cdata$net_premium[o] cpdloss=cdata$cum_pdloss[o] cpdloss=pmax(cpdloss,1) incloss=cdata$cum_incloss[o]-cdata$bulk_loss[o] incloss=pmax(incloss,1) adata=data.frame(grpcode,w,d,premium,cpdloss,incloss) rdata=subset(adata,(adata$w+adata$d)<12) if(losstype=="incloss") rloss=rdata$incloss else rloss=rdata$cpdloss if(losstype=="incloss") aloss=adata$incloss else aloss=adata$cpdloss # # data for the model # data.u=list(len_data = length(rdata$w), logprem = log(rdata$premium), logloss = log(rloss), w = rdata$w, d = rdata$d) # # run the model # stan_thin=1 stan_iter=5000 Rhat_target=1.05 max_Rhat=2 while ((max_Rhat > Rhat_target)&(stan_thin<65)){ fitU1=stan(fit = fitU, data = data.u,init=initU, seed = 123,iter=stan_iter,thin=stan_thin, chains = 4,pars=pars.list, control=list(adapt_delta=.9999), refresh=0) fitU1_summary=as.matrix(summary(fitU1)$summary)[1:32,c(1,3,10)] mrh=subset(fitU1_summary,is.na(fitU1_summary[,3])==F) max_Rhat=round(max(mrh[,3]),4) print(paste("Maximum Rhat =",max_Rhat,"Thin =",stan_thin)) stan_thin=2*stan_thin stan_iter=2*stan_iter } stan_thin=stan_thin/2 # # goodness of fit statistics for comparing models # loglik1=extract_log_lik(fitU1) loo1 <- loo(loglik1) # # extract information from stan output to process in R # b=extract(fitU1) alpha=b$alpha beta=b$beta gamma=b$gamma logelr=b$logelr sig=b$sig # # simulate outcomes for d=10 using parallel processing # set.seed(123) at.wd10=matrix(rdata$cpdloss[55],length(logelr),10) for (w in 2:10){ at.wd10[,w]=rlnorm(length(logelr),log(premium[w])+logelr+alpha[,w], sig[,10]) } # # calculate loss statistics and output to data frame # Premium=subset(rdata,rdata$d==1)$premium ss.wd10=rep(0,10) ms.wd10=rep(0,10) # ms.wd10[1]=mean(at.wd10[,1]) for (w in 2:10){ ms.wd10[w]=mean(at.wd10[,w]) ss.wd10[w]=sd(at.wd10[,w]) } Pred.CSR=rowSums(at.wd10) ms.td10=mean(Pred.CSR) ss.td10=sd(Pred.CSR) CSR.Estimate=round(ms.wd10) CSR.SE=round(ss.wd10) CSR.CV=round(CSR.SE/CSR.Estimate,4) act=sum(subset(aloss,adata$d==10)[1:10]) pct.CSR=sum(Pred.CSR<=act)/length(Pred.CSR)*100 # # put CSR accident year statistics into a data frame # CSR.Estimate=c(CSR.Estimate,round(ms.td10)) CSR.SE=c(CSR.SE,round(ss.td10)) CSR.CV=c(CSR.CV,round(ss.td10/ms.td10,4)) Premium=c(Premium,sum(Premium)) Outcome=subset(aloss,adata$d==10) Outcome=c(Outcome,sum(Outcome)) Group=rep(grpcode,11) CSR.Pct=c(rep(NA,10),pct.CSR) W=c(1:10,"Total") risk=data.frame(Group,W,Premium,CSR.Estimate,CSR.SE,CSR.CV,Outcome,CSR.Pct) Group=grpcode Premium=Premium[11] CSR.Estimate=CSR.Estimate[11] CSR.SE=CSR.SE[11] Outcome=Outcome[11] CSR.Pct=CSR.Pct[11] mean_gamma=round(mean(gamma),3) elpd_loo=round(loo1$elpd_loo,3) se_elpd_loo=round(loo1$se_elpd_loo,3) p_loo=round(loo1$p_loo,3) se_p_loo=round(loo1$se_p_loo,3) looic=round(loo1$looic,3) se_looic=round(loo1$se_looic,3) SumStats=data.frame(Group,Premium,CSR.Estimate,CSR.SE,Outcome,CSR.Pct, mean_gamma,elpd_loo,se_elpd_loo,p_loo,se_p_loo, looic,se_looic,stan_thin) output=list(risk=risk,Predictive_Outcome=Pred.CSR, ParmSummary=fitU1_summary, logelr=logelr,alpha=alpha,beta=beta, gamma=gamma,sig=sig,SumStats=SumStats) return(output) } # # Single triangle # # insurer.data="The directory and file name for the CAS Loss Reserve Database" insurer.data="http://www.casact.org/research/reserve_data/comauto_pos.csv" g=353 #outfilename=paste("CSR CA",g,".csv") a=read.csv(insurer.data) co_model=model_function(g) print(co_model$ParmSummary) print(" ") print(co_model$risk) print("") print(co_model$SumStats) par(mfrow=c(1,1)) hist(co_model$Predictive_Outcome,xlab="Simulated Outcomes", main="Predictive Distribution of Outcomes", sub=paste("Mean =",co_model$SumStats$CSR.Estimate, " SE =",co_model$SumStats$CSR.SE)) #write.csv(co_model$risk,file=outfilename) t1=Sys.time() print(t1-t0) #