# R snippets for DALEX
# read more about this tool at
# Explanatory Model Analysis
# https://pbiecek.github.io/ema/
# for pipes
library("magrittr")
# Prepare data
library("DALEX")
head(titanic_imputed)
dim(titanic_imputed)
# Train a model
library("ranger")
set.seed(1313)
titanic_rf <- ranger(survived ~ class + gender + age +
sibsp + parch + fare + embarked,
data = titanic_imputed,
probability = TRUE,
classification = TRUE)
titanic_rf
# Prepare an explainer
library("DALEX")
# basic use
titanic_ex <- explain(titanic_rf,
data = titanic_imputed,
y = titanic_imputed$survived,
label = "Regression Forest")
# advanced use
titanic_ex <- explain(titanic_rf,
data = titanic_imputed,
y = titanic_imputed$survived,
label = "Regression Forest",
predict_function = function(model, data)
matrix(predict(model, data,
probability = TRUE)$predictions,
ncol=2)[,2]
)
# internals
titanic_ex$model # encapsulated model
titanic_ex$model_info # version of model factory
titanic_ex$data %>% head # encapsulated data
titanic_ex$predict_function # derived predict
titanic_ex$y_hat %>% head # calculated predictions
titanic_ex$residuals %>% head # calculated residuals
titanic_ex$label # the model label
# explanations from the DALEX package
# instance level
(single_passanger <- titanic_imputed[5,])
# prediction
titanic_ex %>%
predict(single_passanger)
# prediction parts (see more in episode 3 and 4)
titanic_ex %>%
predict_parts(new_observation = single_passanger) %>%
plot()
# prediction profile (see more in episode 5)
titanic_ex %>%
predict_profile(new_observation = single_passanger) %>%
plot(variables = c("age", "fare", "parch"))
# dataset level
# model performance (see more in episode 6)
titanic_ex %>%
model_performance() %>%
plot(geom = "roc")
# model parts (see more in episode 7)
titanic_ex %>%
model_parts() %>%
plot(show_boxplots = FALSE)
# model profile (see more in episode 8)
titanic_ex %>%
model_profile() %>%
plot(variables = c("age", "fare", "parch"))
# model diagnostic (see more in episode 9)
titanic_ex %>%
model_diagnostics() %>%
plot(variable = "age", yvariable = "abs_residuals")
# champion challenger
# second model - logistic regression with rms
library("rms")
set.seed(1313)
titanic_lmr <- lrm(survived == "yes" ~ gender + rcs(age) +
class + sibsp + parch + fare +
embarked, titanic)
titanic_ex2 <- explain(titanic_lmr,
data = titanic_imputed,
y = titanic_imputed$survived,
label = "Logistic regression")
# ROC for both
plot(model_performance(titanic_ex) ,
model_performance(titanic_ex2) ,
geom = "roc")
# LIFT for both
plot(model_performance(titanic_ex) ,
model_performance(titanic_ex2) ,
geom = "lift")
# PDP for both
plot(model_profile(titanic_ex)$agr_profiles ,
model_profile(titanic_ex2)$agr_profiles ,
variables = c("age", "fare", "parch"))
# The DrWhy.AI universe
DALEXtra -> wrappers for other models, like scikit learn
-> polazac strone z show cases
ingredients, iBreakdown, auditor, drifter -> DrWhy.AI
modelDown
modelStudio
# example for model down
# static HTML site with data explainers for models
library("modelDown")
modelDown(titanic_ex, titanic_ex2)
# example for model studio
# interactive HTML site with data explainers for models
library("modelStudio")
modelStudio(titanic_ex, single_passanger)