{
"metadata": {
"name": "",
"signature": "sha256:eba89a5303a3bfeb8cd3c5e2ffb4d81a96cbed610317fe5c18d05898ff4e3a35"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import folium as fm\n",
"import geopy\n",
"\n",
"from IPython.display import HTML\n",
"\n",
"from sklearn import linear_model, naive_bayes, feature_selection, metrics, tree\n",
"from sklearn.cross_validation import train_test_split\n",
"\n",
"%matplotlib inline"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"tips_adj_df = pd.read_pickle('./dumps/tips_complete_features.pkl')\n",
"zipcode_dummies = pd.get_dummies(tips_adj_df['ZIPCODE'].astype(int))\n",
"tips_adj_df = tips_adj_df.join(zipcode_dummies)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 34
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Predicting Grade \"A\" Restaurants by zipcode"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"zip_score_list_A = []\n",
"\n",
"for zipcode in zipcode_dummies.columns.values:\n",
" tip_zip_subset = tips_adj_df[tips_adj_df[zipcode] == 1]\n",
" \n",
" X_adjs = tip_zip_subset.ix[:, 34:289]\n",
" y = tip_zip_subset['grade_A']\n",
" \n",
" clf = naive_bayes.MultinomialNB()\n",
" clf = clf.fit(X_adjs, y)\n",
" score = clf.score(X_adjs, y)\n",
" \n",
" zip_score_list_A.append({'zipcode': zipcode, 'score': score, 'tip_count': len(tip_zip_subset)})"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 35
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"tip_by_zip_df_A = pd.DataFrame(zip_score_list_A)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 36
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"map = fm.Map(location=[40.714623, -74.006605], zoom_start=11, tiles='Stamen Toner')\n",
"map.geo_json(geo_path='./nyc-zip-code-tabulation-areas-polygons.geojson',\n",
" data=tip_by_zip_df_A, columns=['zipcode', 'score'],\n",
" key_on='feature.properties.postalCode',\n",
" threshold_scale=[0.50, 0.8, 0.85, 0.90, 0.95, 0.99],\n",
" fill_color='RdPu', fill_opacity=0.65, line_opacity=0.5)\n",
"map.create_map(path='foursquare_zips_all_A.html')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 37
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"HTML('')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
""
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 38,
"text": [
""
]
}
],
"prompt_number": 38
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"tip_threshold_by_zip_A_df = tip_by_zip_df_A[tip_by_zip_df_A['tip_count'] >= 50]\n",
"\n",
"map = fm.Map(location=[40.714623, -74.006605], zoom_start=11, tiles='Stamen Toner')\n",
"map.geo_json(geo_path='./nyc-zip-code-tabulation-areas-polygons.geojson',\n",
" data=tip_threshold_by_zip_A_df, columns=['zipcode', 'score'],\n",
" key_on='feature.properties.postalCode',\n",
" threshold_scale=[0.50, 0.8, 0.85, 0.90, 0.95, 0.99],\n",
" fill_color='RdPu', fill_opacity=0.65, line_opacity=0.5)\n",
"map.create_map(path='foursquare_zips_threshold_A.html')\n",
"\n",
"HTML('')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
""
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 39,
"text": [
""
]
}
],
"prompt_number": 39
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Predicting Grade \"C\" Restaurants by zipcode"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"zip_score_list_C = []\n",
"\n",
"for zipcode in zipcode_dummies.columns.values:\n",
" tip_zip_subset = tips_adj_df[tips_adj_df[zipcode] == 1]\n",
" \n",
" X_adjs = tip_zip_subset.ix[:, 34:289]\n",
" y = tip_zip_subset['grade_C']\n",
" \n",
" clf = naive_bayes.MultinomialNB()\n",
" clf = clf.fit(X_adjs, y)\n",
" score = clf.score(X_adjs, y)\n",
" \n",
" zip_score_list_C.append({'zipcode': zipcode, 'score': score, 'tip_count': len(tip_zip_subset)})"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 40
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"tip_by_zip_C_df = pd.DataFrame(zip_score_list_C)\n",
"\n",
"map = fm.Map(location=[40.714623, -74.006605], zoom_start=11, tiles='Stamen Toner')\n",
"map.geo_json(geo_path='./nyc-zip-code-tabulation-areas-polygons.geojson',\n",
" data=tip_by_zip_C_df, columns=['zipcode', 'score'],\n",
" key_on='feature.properties.postalCode',\n",
" threshold_scale=[0.50, 0.8, 0.85, 0.90, 0.95, 0.99],\n",
" fill_color='BuPu', fill_opacity=0.65, line_opacity=0.5)\n",
"map.create_map(path='foursquare_zips_all_C.html')\n",
"\n",
"HTML('')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
""
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 41,
"text": [
""
]
}
],
"prompt_number": 41
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"tip_threshold_by_zip_C_df = tip_by_zip_C_df[tip_by_zip_C_df['tip_count'] >= 50]\n",
"\n",
"map = fm.Map(location=[40.714623, -74.006605], zoom_start=11, tiles='Stamen Toner')\n",
"map.geo_json(geo_path='./nyc-zip-code-tabulation-areas-polygons.geojson',\n",
" data=tip_threshold_by_zip_C_df, columns=['zipcode', 'score'],\n",
" key_on='feature.properties.postalCode',\n",
" threshold_scale=[0.50, 0.8, 0.85, 0.90, 0.95, 0.99],\n",
" fill_color='BuPu', fill_opacity=0.65, line_opacity=0.5)\n",
"map.create_map(path='foursquare_zips_threshold_C.html')\n",
"\n",
"HTML('')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
""
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 42,
"text": [
""
]
}
],
"prompt_number": 42
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}