# +++++++++++++++++++++++++ # +++ R BOOTCAMP MISC +++ # +++ MontyHall +++ # +++++++++++++++++++++++++ # + AUTHOR: David Dobolyi + # +++++++++++++++++++++++++ # ----- # Notes # ----- # This script provides one possible simulation of the Monty Hall problem (that is intentionally relatively verbose). For other R implementations that helped inspire this example, see: # http://www.bodowinter.com/tutorial/bw_doodling_monty_hall.pdf # https://www.lilianccheung.com/data-blog/the-monty-hall-problem-with-r # http://www.samsi.info/sites/default/files/Sun_R_lab_february2012.pdf # --------- # Libraries # --------- library(dplyr) library(ggplot2) library(scales) # -------------- # Misc Functions # -------------- # Establish required outersect function (source: https://www.r-bloggers.com/outersect-the-opposite-of-rs-intersect-function/): outersect <- function(x, y) { sort(c(setdiff(x, y), setdiff(y, x))) } # ------------------------------ # Monty Hall Simulation Function # ------------------------------ montyHall <- function(decision = "stay", iterations = 10000, seed = 123) { # note: alternatively, R 4.1 introduces a new shorthand notation for defining functions, i.e., \(decision = "stay", iterations = 10000, seed = 123) # set random seed (for replicability) set.seed(seed) # establish the doors doors <- c("Prize", "No Prize", "No Prize") # create an empty vector for simulation outcomes theResult <- rep(NA, iterations) # run the simulation repeatedly for multiple iterations via for loop for (i in 1:iterations) { # randomize the doors theDoors <- sample(doors) # randomly choose a door for the contestant theChoice <- sample(1:3, 1) # determine host choice based on contest choice if (theDoors[theChoice] == "No Prize") theHostChoice <- outersect(which(theDoors == "No Prize"), theChoice) else if (theDoors[theChoice] == "Prize") theHostChoice <- sample(which(theDoors == "No Prize"), 1) # determine the contestant's final choice based on switch or stay decision preference if (decision == "switch") theFinalChoice <- outersect(c(theChoice, theHostChoice), 1:3) else if (decision == "stay") theFinalChoice <- theChoice # determine the result (i.e., win or lose) theResult[i] <- ifelse(theDoors[theFinalChoice] == "Prize", "Win", "Lose") } # calculate the proportion of wins winProp <- sum(theResult == "Win") / length(theResult) # print the win percentage to the console message("Proportion of Wins: ", winProp * 100, "% (", decision, ")\n", sep = "") # return results return( list( WinProp = winProp, SimulationResults = theResult ) ) } # -------------------------------------- # Run the Simulation for Stay vs. Switch # -------------------------------------- stayResults <- montyHall(decision = "stay", iterations = 10000) switchResults <- montyHall(decision = "switch", iterations = 10000) # ---------------- # Organize Results # ---------------- # Create a data.frame of the simulation results based on decision type: overallResults <- data.frame( Decision = c(rep("Stay", length(stayResults$SimulationResults)), rep("Switch", length(switchResults$SimulationResults))), Outcome = c(stayResults$SimulationResults, switchResults$SimulationResults) ) head(overallResults) # --------------------------- # Manipulate and Plot Results # --------------------------- # Calculate win/lose percentages by decision: plotDat <- overallResults %>% group_by(Decision) %>% count(Outcome) %>% mutate(Percent = n/sum(n)) head(plotDat) ggplot(plotDat, aes(x = Decision, y = Percent, fill = Outcome)) + geom_bar(stat = "identity", color = "black", size = 2) + geom_text(aes(label = paste0(sprintf("%1.1f", Percent * 100), "%")), position = position_stack(vjust = 0.5), color = "white", fontface = "bold", size = 8) + scale_y_continuous(labels = percent) + ylab("Percentage of Trials") + theme_bw(base_size = 20) + labs(title = "Monty Hall Simulation Results", caption = paste0("Total Trials: ", nrow(overallResults) %>% formatC(format = "f", big.mark = ",", digits = 0), " (", nrow(filter(overallResults, Decision == "Stay")) %>% formatC(format = "f", big.mark = ",", digits = 0), " Stay and ", nrow(filter(overallResults, Decision == "Switch")) %>% formatC(format = "f", big.mark = ",", digits = 0), " Switch)")) + theme(plot.caption = element_text(hjust = 0.5)) # To understand this code (e.g., percentage labels using geom_text), see: https://stackoverflow.com/questions/37817809/r-ggplot-stacked-bar-chart-with-counts-on-y-axis-but-percentage-as-label