{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Introductory examples" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1.usa.gov data from bit.ly" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%pwd" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "path = 'ch02/usagov_bitly_data2012-03-16-1331923249.txt'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "open(path).readline()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import json\n", "path = 'ch02/usagov_bitly_data2012-03-16-1331923249.txt'\n", "records = [json.loads(line) for line in open(path)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "records[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "records[0]['tz']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "print(records[0]['tz'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Counting time zones in pure Python" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "time_zones = [rec['tz'] for rec in records]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "time_zones = [rec['tz'] for rec in records if 'tz' in rec]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "time_zones[:10]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def get_counts(sequence):\n", " counts = {}\n", " for x in sequence:\n", " if x in counts:\n", " counts[x] += 1\n", " else:\n", " counts[x] = 1\n", " return counts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from collections import defaultdict\n", "\n", "def get_counts2(sequence):\n", " counts = defaultdict(int) # values will initialize to 0\n", " for x in sequence:\n", " counts[x] += 1\n", " return counts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "counts = get_counts(time_zones)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "counts['America/New_York']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "len(time_zones)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def top_counts(count_dict, n=10):\n", " value_key_pairs = [(count, tz) for tz, count in count_dict.items()]\n", " value_key_pairs.sort()\n", " return value_key_pairs[-n:]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "top_counts(counts)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from collections import Counter" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "counts = Counter(time_zones)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "counts.most_common(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Counting time zones with pandas" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from __future__ import division\n", "from numpy.random import randn\n", "import numpy as np\n", "import os\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "plt.rc('figure', figsize=(10, 6))\n", "np.set_printoptions(precision=4)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import json\n", "path = 'ch02/usagov_bitly_data2012-03-16-1331923249.txt'\n", "lines = open(path).readlines()\n", "records = [json.loads(line) for line in lines]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from pandas import DataFrame, Series\n", "import pandas as pd\n", "\n", "frame = DataFrame(records)\n", "frame" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "frame['tz'][:10]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tz_counts = frame['tz'].value_counts()\n", "tz_counts[:10]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "clean_tz = frame['tz'].fillna('Missing')\n", "clean_tz[clean_tz == ''] = 'Unknown'\n", "tz_counts = clean_tz.value_counts()\n", "tz_counts[:10]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plt.figure(figsize=(10, 4))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tz_counts[:10].plot(kind='barh', rot=0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "frame['a'][1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "frame['a'][50]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "frame['a'][51]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "results = Series([x.split()[0] for x in frame.a.dropna()])\n", "results[:5]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "results.value_counts()[:8]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "cframe = frame[frame.a.notnull()]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "operating_system = np.where(cframe['a'].str.contains('Windows'),\n", " 'Windows', 'Not Windows')\n", "operating_system[:5]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "by_tz_os = cframe.groupby(['tz', operating_system])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "agg_counts = by_tz_os.size().unstack().fillna(0)\n", "agg_counts[:10]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Use to sort in ascending order\n", "indexer = agg_counts.sum(1).argsort()\n", "indexer[:10]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "count_subset = agg_counts.take(indexer)[-10:]\n", "count_subset" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plt.figure()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "count_subset.plot(kind='barh', stacked=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plt.figure()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "normed_subset = count_subset.div(count_subset.sum(1), axis=0)\n", "normed_subset.plot(kind='barh', stacked=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## MovieLens 1M data set" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "encoding = 'latin1'\n", "\n", "upath = os.path.expanduser('ch02/movielens/users.dat')\n", "rpath = os.path.expanduser('ch02/movielens/ratings.dat')\n", "mpath = os.path.expanduser('ch02/movielens/movies.dat')\n", "\n", "unames = ['user_id', 'gender', 'age', 'occupation', 'zip']\n", "rnames = ['user_id', 'movie_id', 'rating', 'timestamp']\n", "mnames = ['movie_id', 'title', 'genres']\n", "\n", "users = pd.read_csv(upath, sep='::', header=None, names=unames, encoding=encoding)\n", "ratings = pd.read_csv(rpath, sep='::', header=None, names=rnames, encoding=encoding)\n", "movies = pd.read_csv(mpath, sep='::', header=None, names=mnames, encoding=encoding)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "users[:5]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ratings[:5]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "movies[:5]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ratings" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data = pd.merge(pd.merge(ratings, users), movies)\n", "data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data.ix[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "mean_ratings = data.pivot_table('rating', index='title',\n", " columns='gender', aggfunc='mean')\n", "mean_ratings[:5]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ratings_by_title = data.groupby('title').size()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ratings_by_title[:5]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "active_titles = ratings_by_title.index[ratings_by_title >= 250]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "active_titles[:10]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "mean_ratings = mean_ratings.ix[active_titles]\n", "mean_ratings" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "mean_ratings = mean_ratings.rename(index={'Seven Samurai (The Magnificent Seven) (Shichinin no samurai) (1954)':\n", " 'Seven Samurai (Shichinin no samurai) (1954)'})" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "top_female_ratings = mean_ratings.sort_index(by='F', ascending=False)\n", "top_female_ratings[:10]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Measuring rating disagreement" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "mean_ratings['diff'] = mean_ratings['M'] - mean_ratings['F']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sorted_by_diff = mean_ratings.sort_index(by='diff')\n", "sorted_by_diff[:15]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Reverse order of rows, take first 15 rows\n", "sorted_by_diff[::-1][:15]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Standard deviation of rating grouped by title\n", "rating_std_by_title = data.groupby('title')['rating'].std()\n", "# Filter down to active_titles\n", "rating_std_by_title = rating_std_by_title.ix[active_titles]\n", "# Order Series by value in descending order\n", "rating_std_by_title.order(ascending=False)[:10]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### US Baby Names 1880-2010" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from __future__ import division\n", "from numpy.random import randn\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "plt.rc('figure', figsize=(12, 5))\n", "np.set_printoptions(precision=4)\n", "%pwd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "http://www.ssa.gov/oact/babynames/limits.html" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "!head -n 10 ch02/names/yob1880.txt" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import pandas as pd\n", "names1880 = pd.read_csv('ch02/names/yob1880.txt', names=['name', 'sex', 'births'])\n", "names1880" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "names1880.groupby('sex').births.sum()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# 2010 is the last available year right now\n", "years = range(1880, 2011)\n", "\n", "pieces = []\n", "columns = ['name', 'sex', 'births']\n", "\n", "for year in years:\n", " path = 'ch02/names/yob%d.txt' % year\n", " frame = pd.read_csv(path, names=columns)\n", "\n", " frame['year'] = year\n", " pieces.append(frame)\n", "\n", "# Concatenate everything into a single DataFrame\n", "names = pd.concat(pieces, ignore_index=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "total_births = names.pivot_table('births', index='year',\n", " columns='sex', aggfunc=sum)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "total_births.tail()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "total_births.plot(title='Total births by sex and year')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def add_prop(group):\n", " # Integer division floors\n", " births = group.births.astype(float)\n", "\n", " group['prop'] = births / births.sum()\n", " return group\n", "names = names.groupby(['year', 'sex']).apply(add_prop)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "names" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "np.allclose(names.groupby(['year', 'sex']).prop.sum(), 1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def get_top1000(group):\n", " return group.sort_index(by='births', ascending=False)[:1000]\n", "grouped = names.groupby(['year', 'sex'])\n", "top1000 = grouped.apply(get_top1000)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pieces = []\n", "for year, group in names.groupby(['year', 'sex']):\n", " pieces.append(group.sort_index(by='births', ascending=False)[:1000])\n", "top1000 = pd.concat(pieces, ignore_index=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "top1000.index = np.arange(len(top1000))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "top1000" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Analyzing naming trends" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "boys = top1000[top1000.sex == 'M']\n", "girls = top1000[top1000.sex == 'F']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "total_births = top1000.pivot_table('births', index='year', columns='name',\n", " aggfunc=sum)\n", "total_births" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "subset = total_births[['John', 'Harry', 'Mary', 'Marilyn']]\n", "subset.plot(subplots=True, figsize=(12, 10), grid=False,\n", " title=\"Number of births per year\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Measuring the increase in naming diversity" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plt.figure()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "table = top1000.pivot_table('prop', index='year',\n", " columns='sex', aggfunc=sum)\n", "table.plot(title='Sum of table1000.prop by year and sex',\n", " yticks=np.linspace(0, 1.2, 13), xticks=range(1880, 2020, 10))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df = boys[boys.year == 2010]\n", "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "prop_cumsum = df.sort_index(by='prop', ascending=False).prop.cumsum()\n", "prop_cumsum[:10]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "prop_cumsum.values.searchsorted(0.5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df = boys[boys.year == 1900]\n", "in1900 = df.sort_index(by='prop', ascending=False).prop.cumsum()\n", "in1900.values.searchsorted(0.5) + 1" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def get_quantile_count(group, q=0.5):\n", " group = group.sort_index(by='prop', ascending=False)\n", " return group.prop.cumsum().values.searchsorted(q) + 1\n", "\n", "diversity = top1000.groupby(['year', 'sex']).apply(get_quantile_count)\n", "diversity = diversity.unstack('sex')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def get_quantile_count(group, q=0.5):\n", " group = group.sort_index(by='prop', ascending=False)\n", " return group.prop.cumsum().values.searchsorted(q) + 1\n", "diversity = top1000.groupby(['year', 'sex']).apply(get_quantile_count)\n", "diversity = diversity.unstack('sex')\n", "diversity.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "diversity.plot(title=\"Number of popular names in top 50%\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### The \"Last letter\" Revolution" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# extract last letter from name column\n", "get_last_letter = lambda x: x[-1]\n", "last_letters = names.name.map(get_last_letter)\n", "last_letters.name = 'last_letter'\n", "\n", "table = names.pivot_table('births', index=last_letters,\n", " columns=['sex', 'year'], aggfunc=sum)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "subtable = table.reindex(columns=[1910, 1960, 2010], level='year')\n", "subtable.head()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "subtable.sum()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "letter_prop = subtable / subtable.sum().astype(float)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "\n", "fig, axes = plt.subplots(2, 1, figsize=(10, 8))\n", "letter_prop['M'].plot(kind='bar', rot=0, ax=axes[0], title='Male')\n", "letter_prop['F'].plot(kind='bar', rot=0, ax=axes[1], title='Female',\n", " legend=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plt.subplots_adjust(hspace=0.25)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "letter_prop = table / table.sum().astype(float)\n", "\n", "dny_ts = letter_prop.ix[['d', 'n', 'y'], 'M'].T\n", "dny_ts.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plt.close('all')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "dny_ts.plot()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Boy names that became girl names (and vice versa)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "all_names = top1000.name.unique()\n", "mask = np.array(['lesl' in x.lower() for x in all_names])\n", "lesley_like = all_names[mask]\n", "lesley_like" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "filtered = top1000[top1000.name.isin(lesley_like)]\n", "filtered.groupby('name').births.sum()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "table = filtered.pivot_table('births', index='year',\n", " columns='sex', aggfunc='sum')\n", "table = table.div(table.sum(1), axis=0)\n", "table.tail()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plt.close('all')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "table.plot(style={'M': 'k-', 'F': 'k--'})" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.10" } }, "nbformat": 4, "nbformat_minor": 0 }