{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "http://shanghai.sodachallenges.com/data.html" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2017-07-19T16:12:10.971924", "start_time": "2017-07-19T16:12:10.221101" }, "collapsed": true }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2017-07-19T16:14:57.643991", "start_time": "2017-07-19T16:14:57.058263" } }, "outputs": [], "source": [ "df = pd.read_csv('/Users/chengjun/bigdata/soda-2017-sample/2017/Mobike_SODA_Sample/shanghai_sample.csv')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2017-07-19T16:15:01.772053", "start_time": "2017-07-19T16:15:01.728675" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orderidbikeiduseridstart_timestart_location_xstart_location_yend_timeend_location_xend_location_ytrack
057099471747172016-08-02 17:23:43121.41031.3062016-08-02 17:31:08121.40231.311121.402,31.310#121.402,31.311#121.403,31.309#1...
11652624134132016-08-04 20:17:09121.40731.2882016-08-04 20:19:58121.40831.291121.407,31.288#121.408,31.289#121.408,31.290#1...
28023399339932016-08-02 08:16:52121.46231.3122016-08-02 08:25:10121.44931.305121.449,31.305#121.450,31.305#121.451,31.305#1...
3148871134013402016-08-05 18:37:20121.53131.3132016-08-05 18:45:19121.51931.308121.519,31.308#121.519,31.309#121.520,31.309#1...
410222417441742016-08-06 11:04:50121.40131.2322016-08-06 11:21:04121.37631.237121.374,31.234#121.375,31.233#121.375,31.235#1...
\n", "
" ], "text/plain": [ " orderid bikeid userid start_time start_location_x \\\n", "0 57099 4717 4717 2016-08-02 17:23:43 121.410 \n", "1 165262 413 413 2016-08-04 20:17:09 121.407 \n", "2 8023 3993 3993 2016-08-02 08:16:52 121.462 \n", "3 148871 1340 1340 2016-08-05 18:37:20 121.531 \n", "4 10222 4174 4174 2016-08-06 11:04:50 121.401 \n", "\n", " start_location_y end_time end_location_x end_location_y \\\n", "0 31.306 2016-08-02 17:31:08 121.402 31.311 \n", "1 31.288 2016-08-04 20:19:58 121.408 31.291 \n", "2 31.312 2016-08-02 08:25:10 121.449 31.305 \n", "3 31.313 2016-08-05 18:45:19 121.519 31.308 \n", "4 31.232 2016-08-06 11:21:04 121.376 31.237 \n", "\n", " track \n", "0 121.402,31.310#121.402,31.311#121.403,31.309#1... \n", "1 121.407,31.288#121.408,31.289#121.408,31.290#1... \n", "2 121.449,31.305#121.450,31.305#121.451,31.305#1... \n", "3 121.519,31.308#121.519,31.309#121.520,31.309#1... \n", "4 121.374,31.234#121.375,31.233#121.375,31.235#1... " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2017-07-19T16:16:28.677109", "start_time": "2017-07-19T16:16:28.672381" } }, "outputs": [ { "data": { "text/plain": [ "101259" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "ExecuteTime": { "end_time": "2017-07-19T17:16:13.982688", "start_time": "2017-07-19T17:16:13.966004" } }, "outputs": [ { "data": { "text/plain": [ "5054 37\n", "6913 36\n", "101 35\n", "1718 35\n", "726 35\n", "5290 35\n", "6590 33\n", "3627 33\n", "6354 33\n", "1011 32\n", "5235 32\n", "2439 32\n", "3651 32\n", "197 31\n", "2899 31\n", "5447 31\n", "5928 31\n", "4591 31\n", "5858 31\n", "5663 31\n", "6131 31\n", "4518 31\n", "20 30\n", "4550 30\n", "1491 30\n", "2483 30\n", "6337 30\n", "4714 30\n", "2869 30\n", "6239 30\n", " ..\n", "7627 1\n", "9137 1\n", "800 1\n", "9073 1\n", "8868 1\n", "7947 1\n", "3913 1\n", "8075 1\n", "2676 1\n", "8831 1\n", "8612 1\n", "8867 1\n", "8548 1\n", "8973 1\n", "9008 1\n", "2366 1\n", "8305 1\n", "5259 1\n", "1289 1\n", "7836 1\n", "3046 1\n", "1439 1\n", "8462 1\n", "2379 1\n", "7268 1\n", "8590 1\n", "714 1\n", "9038 1\n", "9166 1\n", "9056 1\n", "Name: userid, dtype: int64" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[\"userid\"].value_counts()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "ExecuteTime": { "end_time": "2017-07-19T16:30:32.456324", "start_time": "2017-07-19T16:30:17.198730" }, "collapsed": true }, "outputs": [], "source": [ "starts = []\n", "ends = []\n", "edges = []\n", "for i in df.index:\n", " starts.append((df.start_location_y[i], df.start_location_x[i]))\n", " ends.append((df.end_location_y[i], df.end_location_x[i]))\n", " edges.append([(df.start_location_y[i], df.start_location_x[i]), (df.end_location_y[i], df.end_location_x[i])])" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "ExecuteTime": { "end_time": "2017-07-19T17:27:52.476325", "start_time": "2017-07-19T17:27:52.467410" }, "collapsed": true }, "outputs": [], "source": [ "df5054 = df[df['userid']==20]\n", "\n", "edges5054 = []\n", "for i in df5054.index:\n", " edges5054.append([(df5054.start_location_y[i], df5054.start_location_x[i]), \\\n", " (df5054.end_location_y[i], df5054.end_location_x[i])])" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2017-07-19T16:28:38.687108", "start_time": "2017-07-19T16:28:38.681646" } }, "outputs": [ { "data": { "text/plain": [ "[(31.305999999999997, 121.41),\n", " (31.288, 121.40700000000001),\n", " (31.311999999999998, 121.462),\n", " (31.313000000000002, 121.53100000000001),\n", " (31.231999999999999, 121.40100000000001)]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "starts[:5]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2017-07-19T16:23:52.614184", "start_time": "2017-07-19T16:23:52.378251" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/chengjun/anaconda/lib/python2.7/site-packages/folium/__init__.pyc\n", "0.2.0\n" ] } ], "source": [ "import folium, jinja2, vincent\n", "from IPython.display import IFrame\n", "from IPython.core.display import HTML\n", "print(folium.__file__)\n", "print(folium.__version__)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2017-07-19T16:24:30.392651", "start_time": "2017-07-19T16:24:30.270568" }, "collapsed": true }, "outputs": [], "source": [ "shanghai = folium.Map(location=(31.306,121.410), zoom_start=10)\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2017-07-19T16:24:38.169497", "start_time": "2017-07-19T16:24:38.160118" } }, "outputs": [ { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "shanghai" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "ExecuteTime": { "end_time": "2017-07-19T17:49:24.475791", "start_time": "2017-07-19T17:49:24.340200" } }, "outputs": [ { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from folium import plugins\n", "\n", "shanghai = folium.Map(location=(31.306,121.410), zoom_start=10)\n", "shanghai.add_children(plugins.HeatMap(starts[:1000]))\n", "\n", "shanghai" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "ExecuteTime": { "end_time": "2017-07-19T16:40:50.965285", "start_time": "2017-07-19T16:40:41.737465" } }, "outputs": [], "source": [ "shanghai = folium.Map(location=(31.306,121.410), zoom_start=10)\n", "\n", "for loc in edges[:1000]:\n", " line = folium.PolyLine(locations=loc)\n", " shanghai.add_children(line)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "ExecuteTime": { "end_time": "2017-07-19T16:40:53.566239", "start_time": "2017-07-19T16:40:52.568404" } }, "outputs": [ { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "shanghai" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "ExecuteTime": { "end_time": "2017-07-19T17:27:57.804443", "start_time": "2017-07-19T17:27:57.400991" } }, "outputs": [ { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "shanghai5054 = folium.Map(location=(31.306,121.410), zoom_start=10)\n", "\n", "for loc in edges5054:\n", " line = folium.PolyLine(locations=loc)\n", " shanghai5054.add_children(line)\n", "\n", "shanghai5054" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:anaconda]", "language": "python", "name": "conda-env-anaconda-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.4" }, "latex_envs": { "bibliofile": "biblio.bib", "cite_by": "apalike", "current_citInitial": 1, "eqLabelWithNumbers": true, "eqNumInitial": 0 }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 1 }