In [None]:
from datascience import *
import numpy as np

## Maps

In [None]:
stations = Table.read_table('station.csv').relabel("name","labels")
stations

In [None]:
Marker.map_table(stations.select('lat', 'long', 'labels'))

In [None]:
#Change size or color of circles with "radius=" or "color="
Circle.map_table(stations.select('lat', 'long', 'labels'), radius=150, color='green')

In [None]:
trip = Table.read_table('trip.csv').where('Duration', are.below(1800)).select(3, 6, 1).relabeled(0, 'Start').relabeled(1, 'End')

In [None]:
starts = trip.group('Start').sort('count', descending=True)
starts

In [None]:
station_starts = stations.join('labels', starts, 'Start')
station_starts

In [None]:
landmarks = stations.group('landmark')
landmarks

In [None]:
landmarks = landmarks.with_column(
 'colors',
 make_array('blue', 'red', 'yellow', 'orange', 'purple')
)
landmarks

In [None]:
station_starts = station_starts.join('landmark', landmarks.drop('count'))
station_starts

In [None]:
station_starts = station_starts.with_column(
 'areas',
 station_starts.column('count')/10
)
station_starts

In [None]:
Circle.map_table(station_starts.select('lat', 'long', 'labels', 'colors', 'areas'))

## Table examples

In [None]:
drinks = Table(['Drink', 'Cafe', 'Price']).with_rows([
 ['Milk Tea', 'Tea One', 4],
 ['Espresso', 'Nefeli', 2],
 ['Coffee', 'Nefeli', 3],
 ['Espresso', "Abe's", 2]
])
drinks

In [None]:
discounts = Table().with_columns(
 'Coupon % off', make_array(5, 50, 25),
 'Location', make_array('Tea One', 'Nefeli', 'Tea One')
)
discounts

In [None]:
#Discussion question: Generate a table with one row per cafe that 
#has the name and discounted price of its cheapest discounted drink
# Link (join) drinks with discounts
combined = drinks.join('Cafe', discounts, 'Location')

# Compute discounted prices
discounted_prices = combined.column('Price') * (1 - combined.column('Coupon % off')/100)

discounted_drinks = combined.with_column(
 'Discounted price', discounted_prices
)
discounted_drinks
# Sort

In [None]:
#Correct, Espresso is cheaper
discounted_drinks.sort('Discounted price').sort('Cafe', distinct=True) 

In [None]:
#Incorrect - need to sort by "Discounted price" first
discounted_drinks.sort('Cafe', distinct=True) 

In [None]:
#Incorrect, Coffee is first alphabetically
discounted_drinks.group('Cafe', min) 

## Spring 2016 Midterm, Question 2(b)

Challenge yourself and try to solve these on your own before looking at the solutions!

In [None]:
trip0 = Table.read_table("trip.csv")
trip = Table().with_columns(
"Start", trip0.column("Start Station"),
"End", trip0.column("End Station"),
"Duration", trip0.column("Duration"))
trip.show(3)

In [None]:
# The name of the station where the most rentals ended 
#(assume no ties).

In [None]:
# The number of stations for which the average duration ending 
# at that station was more than 10 minutes.

In [None]:
# The number of stations that have more than 500 starts 
# AND more than 500 ends

In [None]:
# The name of the station where the most rentals ended (assume no ties).
# First, find end counts
# Then, find the station with the highest end count
trip.group('End').sort('count', descending=True).column(0).item(0)





In [None]:
# The number of stations for which the average duration ending 
# at that station was more than 10 minutes.

# First, find the average end time for each station
# Then, keep the ones above 10 minutes
# Then, count them
trip.group('End', np.average).where(2, are.above(10*60)).num_rows





In [None]:
# The number of stations that have more than 500 starts 
# AND more than 500 ends
# First, find the start counts
starting = trip.group('Start').relabeled('count', 'Start count').relabeled('Start', 'Station')
# Then, find the end counts
ending = trip.group('End').relabeled('count', 'End count').relabeled('End', 'Station')
# Combine them with join
starting.join('Station', ending).where('Start count', are.above(500)).where('End count', are.above(500)).num_rows




## Comparison ##

In [None]:
3 > 1

In [None]:
type(3 > 1)

In [None]:
3 < 1

In [None]:
True

In [None]:
3 == 3

In [None]:
3 = 3

In [None]:
x = 14
y = 3

In [None]:
x > 10

In [None]:
12 < x < 18

In [None]:
12 < x

In [None]:
x < 18

In [None]:
12 < x-y < 18

In [None]:
x > 10 and y > 5

## Comparisons with arrays

In [None]:
pets = make_array('cat', 'dog', 'cat', 'cat', 'dog', 'rabbit')
pets

In [None]:
pets == 'dog'

In [None]:
0 + 1 + 0 + 0 + 1 + 0

In [None]:
sum(make_array(False, True, False, False, True, False))

In [None]:
sum(pets == 'dog')

In [None]:
np.count_nonzero(pets == 'dog')

In [None]:
pets > 'cat'

In [None]:
sum(pets > 'cat')

In [None]:
"cat" < "catastrophe"

## Predicates and advanced `where`

In [None]:
terms = Table().with_column('Semester', np.arange(1, 9))
terms

In [None]:
terms.where('Semester', are.above(6))

In [None]:
is_senior = are.above(6)

In [None]:
is_senior(4)

In [None]:
def also_is_senior(x):
 return x > 6

In [None]:
also_is_senior(5)

In [None]:
terms.apply(also_is_senior, 'Semester')

In [None]:
terms.where('Semester', are.above(6))

In [None]:
terms.where('Semester', is_senior)

In [None]:
terms.where('Semester', also_is_senior)

In [None]:
terms.where(terms.apply(also_is_senior, 'Semester'))