{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from datascience import *\n", "import numpy as np\n", "\n", "%matplotlib inline\n", "import matplotlib.pyplot as plots\n", "plots.style.use('fivethirtyeight')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Lecture 7" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Census ##" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "full = Table.read_table('nc-est2014-agesex-res.csv')\n", "full" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "partial = full.select('SEX', 'AGE', 'POPESTIMATE2010', 'POPESTIMATE2014')\n", "partial" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "simple = partial.relabeled('POPESTIMATE2010', '2010').relabeled('POPESTIMATE2014', '2014')\n", "simple" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "simple.sort('AGE', descending=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "no_999 = simple.where('AGE', are.below(999))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "everyone = no_999.where('SEX', 0).drop('SEX')\n", "males = no_999.where('SEX', 1).drop('SEX')\n", "females = no_999.where('SEX', 2).drop('SEX')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "females" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "females.sort('2014', descending=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "males.sort('2014', descending=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pop_2014 = Table().with_columns(\n", " 'Age', males.column('AGE'),\n", " 'Males', males.column('2014'),\n", " 'Females', females.column('2014')\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pop_2014" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "percent_females = 100 *pop_2014.column('Females')/(pop_2014.column('Males') + pop_2014.column('Females'))\n", "counts_and_percents = pop_2014.with_column('Percent Female', percent_females)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "counts_and_percents" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Line Graphs ##" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "counts_and_percents.plot('Age', 'Percent Female')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pop_2014" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pop_2014.plot('Age')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pop_2014.where('Age', are.between(65, 75))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "2014 - np.arange(67, 73)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "everyone" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "everyone = everyone.with_columns(\n", " 'Change', everyone.column('2014') - everyone.column('2010')\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "everyone.sort('Change', descending=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "everyone.with_columns(\n", " 'Growth Rate', (everyone.column('2014')/everyone.column('2010')) ** (1/4) - 1\n", ").sort('Growth Rate', descending=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Scatter Plots ##" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "actors = Table.read_table('actors.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "actors" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "actors.scatter('Number of Movies', 'Total Gross')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "actors.scatter('Number of Movies', 'Average per Movie')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "actors.where('Average per Movie', are.above(400))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.5" } }, "nbformat": 4, "nbformat_minor": 1 }