{ "metadata": { "gist_id": "b5342eea9ff20b79c5c7", "name": "", "signature": "sha256:e798cf5777d300cbeafbf9161efc637458788f88540772f52f7bd645306acc59" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Parallel Processing Example" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Email me: email.ryan.kelly@gmail.com" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import numpy.random as npr\n", "import numpy as np\n", "import pandas as pd\n", "from pandas import DataFrame, date_range\n", "\n", "from datetime import datetime, timedelta" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "np.random.seed(111)\n", "\n", "# Function to generate test data\n", "def CreateDataSet(Number=1):\n", " \n", " Output = []\n", " \n", " for i in range(Number):\n", " \n", " # Create a date range with hour frequency\n", " date = date_range(start='10/1/2012', end='10/31/2012', freq='H')\n", " \n", " # Create long lat data\n", " laty = npr.normal(4815862, 5000,size=len(date))\n", " longx = npr.normal(687993, 5000,size=len(date))\n", " \n", " # status of interest\n", " status = [0,1]\n", " \n", " # Make a random list of statuses\n", " random_status = [status[npr.randint(low=0,high=len(status))] for i in range(len(date))]\n", " \n", " # user pool\n", " user = ['sally','derik','james','bob','ryan','chris']\n", " \n", " # Make a random list of users \n", " random_user = [user[npr.randint(low=0,high=len(user))] for i in range(len(date))]\n", " \n", " Output.extend(zip(random_user, random_status, date, longx, laty))\n", " \n", " return pd.DataFrame(Output, columns = ['user', 'status', 'date', 'long', 'lat'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "data = CreateDataSet(20)\n", "data.head()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", " | user | \n", "status | \n", "date | \n", "long | \n", "lat | \n", "
---|---|---|---|---|---|
0 | \n", "ryan | \n", "0 | \n", "2012-10-01 00:00:00 | \n", "692823.716714 | \n", "4810192.808328 | \n", "
1 | \n", "ryan | \n", "1 | \n", "2012-10-01 01:00:00 | \n", "679549.965772 | \n", "4817783.595967 | \n", "
2 | \n", "bob | \n", "0 | \n", "2012-10-01 02:00:00 | \n", "686339.324152 | \n", "4823344.768882 | \n", "
3 | \n", "ryan | \n", "0 | \n", "2012-10-01 03:00:00 | \n", "677609.798732 | \n", "4814085.088514 | \n", "
4 | \n", "sally | \n", "1 | \n", "2012-10-01 04:00:00 | \n", "689556.379975 | \n", "4811924.332295 | \n", "
5 rows \u00d7 5 columns
\n", "