# -----------------------------
# Basic dataframe operations
# -----------------------------
# Create a dataframe of boat sale data called bsale
bsale <- data.frame(name = c("a", "b", "c", "d", "e", "f", "g", "h", "i", "j"),
color = c("black", "green", "pink", "blue", "blue",
"green", "green", "yellow", "black", "black"),
age = c(143, 53, 356, 23, 647, 24, 532, 43, 66, 86),
price = c(53, 87, 54, 66, 264, 32, 532, 58, 99, 132),
cost = c(52, 80, 20, 100, 189, 12, 520, 68, 80, 100),
stringsAsFactors = FALSE) # Don't convert strings to factors!
# Explore the bsale dataset:
head(bsale) # Show me the first few rows
str(bsale) # Show me the structure of the data
View(bsale) # Open the data in a new window
names(bsale) # What are the names of the columns?
nrow(bsale) # How many rows are there in the data?
# Calculating statistics from column vectors
mean(bsale$age) # What was the mean age?
table(bsale$color) # How many boats were there of each color?
max(bsale$price) # What was the maximum price?
# Adding new columns
bsale$id <- 1:nrow(bsale)
bsale$age.decades <- bsale$age / 10
bsale$profit <- bsale$price - bsale$cost
# What was the mean price of green boats?
with(bsale, mean(price[color == "green"]))
# What were the names of boats older than 100 years?
with(bsale, name[age > 100])
# What percent of black boats had a positive profit?
with(subset(bsale, color == "black"), mean(profit > 0))
# Save only the price and cost columns in a new dataframe
bsale.2 <- bsale[c("price", "cost")]
# Change the names of the columns to "p" and "c"
names(bsale.2) <- c("p", "c")
# Create a dataframe called old.black.bsale containing only data from black boats older than 50 years
old.black.bsale <- subset(bsale, color == "black" & age > 50)