###################################################################### ### Title: "Week 4 - Multiple Regression: Polynomials" ### Course: STA 235H ### Semester: Fall 2023 ### Professor: Magdalena Bennett ####################################################################### # Clears memory rm(list = ls()) # Clears console cat("\014") # scipen=999 removes scientific notation; scipen=0 turns it on. options(scipen = 0) ### Load libraries # If you don't have one of these packages installed already, you will need to run install.packages() line library(tidyverse) library(vtable) ################################################################################ ### In-class exercises ################################################################################ ## Ames Housing dataset: Data for the housing market in Ames, Iowa. ## You can check the codebook here: https://sta235.com/Classes/Week3/2_OLS_Issues/data/ames_codebook.csv housing = read.csv("https://raw.githubusercontent.com/maibennett/sta235/main/exampleSite/content/Classes/Week3/2_OLS_Issues/data/AmesHousing.csv") # Only keep single family housing: (Bldg.Type) housing = housing %>% filter(Bldg.Type=="1Fam") # Concentrate only on # lot units that are under 20,000 sqft: housing = housing %>% filter(Lot.Area<20000) # Q1: Create a scatter plot between SalePrice (Y) and Lot.Area (X) and # fit a linear model. Complete the following code: ggplot() + #COMPLETE THIS LINE geom_point(color = "pink3") + geom_smooth(method = "lm", se = FALSE, color = "purple4") + theme_minimal() # Q2: Create a scatter plot between SalePrice (Y) and Lot.Area (X) and # fit a quadratic model between Area and Price. Complete the following code: # In this case, formula = y ~ x + I(x^2) is including the quadratic line! ggplot() + #COMPLETE THIS LINE geom_point(color = "pink3") + geom_smooth(method = "lm", formula = y ~ x + I(x^2), se = FALSE, color = "purple4") + theme_minimal() # Q3: Fit a regression of SalePrice, Lot.Area, and Lot.Area^2. # Interpret the coefficient for Lot.Area: # Note: To include a quadratic term in a formula, you # wrap the variable in I(): e.g. I(Lot.Area^2). # What is the association between Price and Area # for an increase of 10,000 to 10,001 sqft? lm_quad = lm() #COMPLETE THIS LINE summary(lm_quad)