{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from datascience import *\n", "import numpy as np" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Maps" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "stations = Table.read_table('station.csv').relabel(\"name\",\"labels\")\n", "stations" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "Marker.map_table(stations.select('lat', 'long', 'labels'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Change size or color of circles with \"radius=\" or \"color=\"\n", "Circle.map_table(stations.select('lat', 'long', 'labels'), radius=150, color='green')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "trip = Table.read_table('trip.csv').where('Duration', are.below(1800)).select(3, 6, 1).relabeled(0, 'Start').relabeled(1, 'End')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "starts = trip.group('Start').sort('count', descending=True)\n", "starts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "station_starts = stations.join('labels', starts, 'Start')\n", "station_starts" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "landmarks = stations.group('landmark')\n", "landmarks" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "landmarks = landmarks.with_column(\n", " 'colors',\n", " make_array('blue', 'red', 'yellow', 'orange', 'purple')\n", ")\n", "landmarks" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "station_starts = station_starts.join('landmark', landmarks.drop('count'))\n", "station_starts" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "station_starts = station_starts.with_column(\n", " 'areas',\n", " station_starts.column('count')/10\n", ")\n", "station_starts" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "Circle.map_table(station_starts.select('lat', 'long', 'labels', 'colors', 'areas'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Table examples" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "drinks = Table(['Drink', 'Cafe', 'Price']).with_rows([\n", " ['Milk Tea', 'Tea One', 4],\n", " ['Espresso', 'Nefeli', 2],\n", " ['Coffee', 'Nefeli', 3],\n", " ['Espresso', \"Abe's\", 2]\n", "])\n", "drinks" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "discounts = Table().with_columns(\n", " 'Coupon % off', make_array(5, 50, 25),\n", " 'Location', make_array('Tea One', 'Nefeli', 'Tea One')\n", ")\n", "discounts" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Discussion question: Generate a table with one row per cafe that \n", "#has the name and discounted price of its cheapest discounted drink\n", "# Link (join) drinks with discounts\n", "combined = drinks.join('Cafe', discounts, 'Location')\n", "\n", "# Compute discounted prices\n", "discounted_prices = combined.column('Price') * (1 - combined.column('Coupon % off')/100)\n", "\n", "discounted_drinks = combined.with_column(\n", " 'Discounted price', discounted_prices\n", ")\n", "discounted_drinks\n", "# Sort" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Correct, Espresso is cheaper\n", "discounted_drinks.sort('Discounted price').sort('Cafe', distinct=True) " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Incorrect - need to sort by \"Discounted price\" first\n", "discounted_drinks.sort('Cafe', distinct=True) " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Incorrect, Coffee is first alphabetically\n", "discounted_drinks.group('Cafe', min) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Spring 2016 Midterm, Question 2(b)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Challenge yourself and try to solve these on your own before looking at the solutions!" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "trip0 = Table.read_table(\"trip.csv\")\n", "trip = Table().with_columns(\n", "\"Start\", trip0.column(\"Start Station\"),\n", "\"End\", trip0.column(\"End Station\"),\n", "\"Duration\", trip0.column(\"Duration\"))\n", "trip.show(3)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# The name of the station where the most rentals ended \n", "#(assume no ties)." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# The number of stations for which the average duration ending \n", "# at that station was more than 10 minutes." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# The number of stations that have more than 500 starts \n", "# AND more than 500 ends" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# The name of the station where the most rentals ended (assume no ties).\n", "# First, find end counts\n", "# Then, find the station with the highest end count\n", "trip.group('End').sort('count', descending=True).column(0).item(0)\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# The number of stations for which the average duration ending \n", "# at that station was more than 10 minutes.\n", "\n", "# First, find the average end time for each station\n", "# Then, keep the ones above 10 minutes\n", "# Then, count them\n", "trip.group('End', np.average).where(2, are.above(10*60)).num_rows\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# The number of stations that have more than 500 starts \n", "# AND more than 500 ends\n", "# First, find the start counts\n", "starting = trip.group('Start').relabeled('count', 'Start count').relabeled('Start', 'Station')\n", "# Then, find the end counts\n", "ending = trip.group('End').relabeled('count', 'End count').relabeled('End', 'Station')\n", "# Combine them with join\n", "starting.join('Station', ending).where('Start count', are.above(500)).where('End count', are.above(500)).num_rows\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Comparison ##" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "3 > 1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "type(3 > 1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "3 < 1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "True" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "3 == 3" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "3 = 3" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x = 14\n", "y = 3" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x > 10" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "12 < x < 18" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "12 < x" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x < 18" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "12 < x-y < 18" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x > 10 and y > 5" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Comparisons with arrays" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pets = make_array('cat', 'dog', 'cat', 'cat', 'dog', 'rabbit')\n", "pets" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pets == 'dog'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "0 + 1 + 0 + 0 + 1 + 0" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sum(make_array(False, True, False, False, True, False))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sum(pets == 'dog')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "np.count_nonzero(pets == 'dog')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pets > 'cat'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sum(pets > 'cat')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\"cat\" < \"catastrophe\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Predicates and advanced `where`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "terms = Table().with_column('Semester', np.arange(1, 9))\n", "terms" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "terms.where('Semester', are.above(6))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "is_senior = are.above(6)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "is_senior(4)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def also_is_senior(x):\n", " return x > 6" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "also_is_senior(5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "terms.apply(also_is_senior, 'Semester')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "terms.where('Semester', are.above(6))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "terms.where('Semester', is_senior)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "terms.where('Semester', also_is_senior)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "terms.where(terms.apply(also_is_senior, 'Semester'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 2 }