{ "metadata": { "name": "", "signature": "sha256:b977da61c069914a8d98d54655498b9c0fd2461c76a90c2e94140efbd1fff69c" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "import numpy as np\n", "from ggplot import *" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "df = pd.read_excel(\"1033-program-foia-may-2014.xlsx\")" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "df.columns" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 3, "text": [ "Index([u'State', u'County', u'NSN', u'Item Name', u'Quantity', u'UI', u'Acquisition Cost', u'Ship Date'], dtype='object')" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "df.info()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "<class 'pandas.core.frame.DataFrame'>\n", "Int64Index: 73028 entries, 0 to 73027\n", "Data columns (total 8 columns):\n", "State 73028 non-null object\n", "County 73028 non-null object\n", "NSN 72983 non-null object\n", "Item Name 71732 non-null object\n", "Quantity 73028 non-null int64\n", "UI 73028 non-null object\n", "Acquisition Cost 73028 non-null float64\n", "Ship Date 73028 non-null object\n", "dtypes: float64(1), int64(1), object(6)" ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "df.describe()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Quantity</th>\n", " <th>Acquisition Cost</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>count</th>\n", " <td> 73028.000000</td>\n", " <td> 73028.000000</td>\n", " </tr>\n", " <tr>\n", " <th>mean</th>\n", " <td> 15.006792</td>\n", " <td> 7967.575490</td>\n", " </tr>\n", " <tr>\n", " <th>std</th>\n", " <td> 384.623930</td>\n", " <td> 197293.243356</td>\n", " </tr>\n", " <tr>\n", " <th>min</th>\n", " <td> 1.000000</td>\n", " <td> 0.000000</td>\n", " </tr>\n", " <tr>\n", " <th>25%</th>\n", " <td> 1.000000</td>\n", " <td> 58.710000</td>\n", " </tr>\n", " <tr>\n", " <th>50%</th>\n", " <td> 1.000000</td>\n", " <td> 200.000000</td>\n", " </tr>\n", " <tr>\n", " <th>75%</th>\n", " <td> 5.000000</td>\n", " <td> 499.000000</td>\n", " </tr>\n", " <tr>\n", " <th>max</th>\n", " <td> 91000.000000</td>\n", " <td> 18000000.000000</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "metadata": {}, "output_type": "pyout", "prompt_number": 5, "text": [ " Quantity Acquisition Cost\n", "count 73028.000000 73028.000000\n", "mean 15.006792 7967.575490\n", "std 384.623930 197293.243356\n", "min 1.000000 0.000000\n", "25% 1.000000 58.710000\n", "50% 1.000000 200.000000\n", "75% 5.000000 499.000000\n", "max 91000.000000 18000000.000000" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "# 91000? Is that right?\n", "df.sort(\"Quantity\", ascending=False)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>State</th>\n", " <th>County</th>\n", " <th>NSN</th>\n", " <th>Item Name</th>\n", " <th>Quantity</th>\n", " <th>UI</th>\n", " <th>Acquisition Cost</th>\n", " <th>Ship Date</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>52530</th>\n", " <td> CA</td>\n", " <td> VENTURA </td>\n", " <td> 6145-01-254-4040</td>\n", " <td> WIRE,ELECTRICAL</td>\n", " <td> 91000</td>\n", " <td> Foot</td>\n", " <td> 0.58</td>\n", " <td> 2013-10-02 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>36221</th>\n", " <td> CA</td>\n", " <td> LOS ANGELES </td>\n", " <td> 5305-01-205-0041</td>\n", " <td> SCREW,CAP,SOCKET HEAD</td>\n", " <td> 43822</td>\n", " <td> Each</td>\n", " <td> 0.23</td>\n", " <td> 2014-03-11 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>52399</th>\n", " <td> CA</td>\n", " <td> VENTURA </td>\n", " <td> 5975-01-331-5006</td>\n", " <td> STRAP,TIEDOWN,ELECTRICAL COMPONENTS</td>\n", " <td> 6000</td>\n", " <td> Each</td>\n", " <td> 0.27</td>\n", " <td> 2013-06-05 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>52536</th>\n", " <td> CA</td>\n", " <td> VENTURA </td>\n", " <td> 6145-DS-00C-ABLE</td>\n", " <td> CABLE COAX</td>\n", " <td> 6000</td>\n", " <td> FT</td>\n", " <td> 0.11</td>\n", " <td> 20120302</td>\n", " </tr>\n", " <tr>\n", " <th>39189</th>\n", " <td> CA</td>\n", " <td> LOS ANGELES </td>\n", " <td> 9320-00-641-3051</td>\n", " <td> RUBBER SHEET,SOLID</td>\n", " <td> 6000</td>\n", " <td> Each</td>\n", " <td> 10.82</td>\n", " <td> 2013-03-14 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>36226</th>\n", " <td> CA</td>\n", " <td> LOS ANGELES </td>\n", " <td> 5310-01-283-6216</td>\n", " <td> WASHER,FLAT</td>\n", " <td> 5181</td>\n", " <td> Each</td>\n", " <td> 0.23</td>\n", " <td> 2014-02-22 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>53473</th>\n", " <td> CA</td>\n", " <td> YOLO </td>\n", " <td> 6510-01-515-7528</td>\n", " <td> BANDAGE KIT</td>\n", " <td> 5000</td>\n", " <td> KT</td>\n", " <td> 7.76</td>\n", " <td> 20120217</td>\n", " </tr>\n", " <tr>\n", " <th>39190</th>\n", " <td> CA</td>\n", " <td> LOS ANGELES </td>\n", " <td> 9320-00-641-3051</td>\n", " <td> RUBBER SHEET,SOLID</td>\n", " <td> 5000</td>\n", " <td> Each</td>\n", " <td> 10.82</td>\n", " <td> 2013-03-14 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>65245</th>\n", " <td> FL</td>\n", " <td> BRADFORD </td>\n", " <td> 4240-00-165-5026</td>\n", " <td> FILTER ELEMENT SET,</td>\n", " <td> 4245</td>\n", " <td> SE</td>\n", " <td> 20.42</td>\n", " <td> 20090625</td>\n", " </tr>\n", " <tr>\n", " <th>37344</th>\n", " <td> CA</td>\n", " <td> LOS ANGELES </td>\n", " <td> 7340-00-243-5390</td>\n", " <td> SPOON,FIELD MESS</td>\n", " <td> 3824</td>\n", " <td> Each</td>\n", " <td> 3.21</td>\n", " <td> 2014-03-19 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>3520 </th>\n", " <td> AL</td>\n", " <td> CHEROKEE </td>\n", " <td> 5660-00-262-9914</td>\n", " <td> POST,FENCE,METAL</td>\n", " <td> 3306</td>\n", " <td> Each</td>\n", " <td> 8.49</td>\n", " <td> 2014-02-28 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>3517 </th>\n", " <td> AL</td>\n", " <td> CHEROKEE </td>\n", " <td> 5660-00-262-9914</td>\n", " <td> POST,FENCE,METAL</td>\n", " <td> 3306</td>\n", " <td> Each</td>\n", " <td> 8.49</td>\n", " <td> 2014-02-27 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>3516 </th>\n", " <td> AL</td>\n", " <td> CHEROKEE </td>\n", " <td> 5660-00-262-9914</td>\n", " <td> POST,FENCE,METAL</td>\n", " <td> 3306</td>\n", " <td> Each</td>\n", " <td> 8.49</td>\n", " <td> 2014-02-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>3518 </th>\n", " <td> AL</td>\n", " <td> CHEROKEE </td>\n", " <td> 5660-00-262-9914</td>\n", " <td> POST,FENCE,METAL</td>\n", " <td> 3306</td>\n", " <td> Each</td>\n", " <td> 8.49</td>\n", " <td> 2014-02-28 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>5861 </th>\n", " <td> AL</td>\n", " <td> CULLMAN </td>\n", " <td> 9540-01-014-0400</td>\n", " <td> BEAM,STRUCTURAL</td>\n", " <td> 3000</td>\n", " <td> Foot</td>\n", " <td> 27.79</td>\n", " <td> 2012-01-16 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>39621</th>\n", " <td> CA</td>\n", " <td> MENDOCINO </td>\n", " <td> 6145-01-254-4040</td>\n", " <td> WIRE,ELECTRICAL</td>\n", " <td> 3000</td>\n", " <td> Foot</td>\n", " <td> 0.57</td>\n", " <td> 2013-07-29 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>52535</th>\n", " <td> CA</td>\n", " <td> VENTURA </td>\n", " <td> 6145-DS-00C-ABLE</td>\n", " <td> CABLE VIDEO</td>\n", " <td> 3000</td>\n", " <td> FT</td>\n", " <td> 0.10</td>\n", " <td> 20120302</td>\n", " </tr>\n", " <tr>\n", " <th>36220</th>\n", " <td> CA</td>\n", " <td> LOS ANGELES </td>\n", " <td> 5305-01-174-7044</td>\n", " <td> SCREW,MACHINE</td>\n", " <td> 2905</td>\n", " <td> Each</td>\n", " <td> 9.74</td>\n", " <td> 2014-02-25 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>39188</th>\n", " <td> CA</td>\n", " <td> LOS ANGELES </td>\n", " <td> 8970-00-149-1094</td>\n", " <td> MEAL,INDIVIDUAL</td>\n", " <td> 2873</td>\n", " <td> Box</td>\n", " <td> 99.95</td>\n", " <td> 2014-04-02 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>36763</th>\n", " <td> CA</td>\n", " <td> LOS ANGELES </td>\n", " <td> 6515-00-967-6983</td>\n", " <td> SUTURE,NONABSORBABLE,SURGICAL</td>\n", " <td> 2755</td>\n", " <td> Package</td>\n", " <td> 42.98</td>\n", " <td> 2013-01-28 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>62059</th>\n", " <td> DE</td>\n", " <td> KENT </td>\n", " <td> 5305-01-253-2993</td>\n", " <td> SCREW,TAPPING</td>\n", " <td> 2580</td>\n", " <td> Each</td>\n", " <td> 0.35</td>\n", " <td> 2013-03-13 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>37482</th>\n", " <td> CA</td>\n", " <td> LOS ANGELES </td>\n", " <td> 7920-00-240-6350</td>\n", " <td> BROOM,WHISK</td>\n", " <td> 2240</td>\n", " <td> Each</td>\n", " <td> 5.37</td>\n", " <td> 2014-03-19 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>11002</th>\n", " <td> AL</td>\n", " <td> JEFFERSON </td>\n", " <td> 7340-00-240-7436</td>\n", " <td> KNIFE,FIELD MESS </td>\n", " <td> 2217</td>\n", " <td> EA</td>\n", " <td> 3.58</td>\n", " <td> 20120801</td>\n", " </tr>\n", " <tr>\n", " <th>11000</th>\n", " <td> AL</td>\n", " <td> JEFFERSON </td>\n", " <td> 7340-00-240-7436</td>\n", " <td> KNIFE,FIELD MESS</td>\n", " <td> 2217</td>\n", " <td> Each</td>\n", " <td> 3.58</td>\n", " <td> 2012-07-06 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>11006</th>\n", " <td> AL</td>\n", " <td> JEFFERSON </td>\n", " <td> 7340-00-243-5390</td>\n", " <td> SPOON,FIELD MESS </td>\n", " <td> 2205</td>\n", " <td> EA</td>\n", " <td> 3.06</td>\n", " <td> 20120801</td>\n", " </tr>\n", " <tr>\n", " <th>11003</th>\n", " <td> AL</td>\n", " <td> JEFFERSON </td>\n", " <td> 7340-00-243-5390</td>\n", " <td> SPOON,FIELD MESS</td>\n", " <td> 2205</td>\n", " <td> Each</td>\n", " <td> 3.06</td>\n", " <td> 2012-07-05 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>36049</th>\n", " <td> CA</td>\n", " <td> LOS ANGELES </td>\n", " <td> 5120-01-054-7141</td>\n", " <td> WRENCH,BOX AND OPEN END,COMBINATION</td>\n", " <td> 2190</td>\n", " <td> Each</td>\n", " <td> 15.01</td>\n", " <td> 2013-03-25 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>44883</th>\n", " <td> CA</td>\n", " <td> SACRAMENTO </td>\n", " <td> 9520-00-277-4901</td>\n", " <td> ANGLE,STRUCTURAL</td>\n", " <td> 2092</td>\n", " <td> Foot</td>\n", " <td> 2.12</td>\n", " <td> 2013-01-16 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>38709</th>\n", " <td> CA</td>\n", " <td> LOS ANGELES </td>\n", " <td> 8465-01-416-4634</td>\n", " <td> SPECTACLES,BALLISTIC AND LASER PROTECTIVE</td>\n", " <td> 2043</td>\n", " <td> Each</td>\n", " <td> 20.30</td>\n", " <td> 2012-03-25 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>21405</th>\n", " <td> AZ</td>\n", " <td> MARICOPA </td>\n", " <td> 1005-00-921-5004</td>\n", " <td> MAGAZINE,CARTRIDGE</td>\n", " <td> 2000</td>\n", " <td> Each</td>\n", " <td> 9.31</td>\n", " <td> 2013-06-21 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>29738</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29739</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29740</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29718</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29716</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29692</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-18 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29703</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29693</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-18 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29694</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-18 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29695</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29696</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29697</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29698</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29699</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29700</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29701</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29702</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29704</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29715</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29705</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29706</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29707</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29708</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29709</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29710</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29711</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29712</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29713</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>29714</th>\n", " <td> CA</td>\n", " <td> KERN </td>\n", " <td> 1005-00-073-9421</td>\n", " <td> RIFLE,5.56 MILLIMETER</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 499.00</td>\n", " <td> 2006-12-26 00:00:00</td>\n", " </tr>\n", " <tr>\n", " <th>73027</th>\n", " <td> FL</td>\n", " <td> WASHINGTON </td>\n", " <td> 7830-DS-TRE-ADM1</td>\n", " <td> TREADMILL</td>\n", " <td> 1</td>\n", " <td> Each</td>\n", " <td> 1500.00</td>\n", " <td> 2012-11-08 00:00:00</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>73028 rows \u00d7 8 columns</p>\n", "</div>" ], "metadata": {}, "output_type": "pyout", "prompt_number": 6, "text": [ " State County NSN \\\n", "52530 CA VENTURA 6145-01-254-4040 \n", "36221 CA LOS ANGELES 5305-01-205-0041 \n", "52399 CA VENTURA 5975-01-331-5006 \n", "52536 CA VENTURA 6145-DS-00C-ABLE \n", "39189 CA LOS ANGELES 9320-00-641-3051 \n", "36226 CA LOS ANGELES 5310-01-283-6216 \n", "53473 CA YOLO 6510-01-515-7528 \n", "39190 CA LOS ANGELES 9320-00-641-3051 \n", "65245 FL BRADFORD 4240-00-165-5026 \n", "37344 CA LOS ANGELES 7340-00-243-5390 \n", "3520 AL CHEROKEE 5660-00-262-9914 \n", "3517 AL CHEROKEE 5660-00-262-9914 \n", "3516 AL CHEROKEE 5660-00-262-9914 \n", "3518 AL CHEROKEE 5660-00-262-9914 \n", "5861 AL CULLMAN 9540-01-014-0400 \n", "39621 CA MENDOCINO 6145-01-254-4040 \n", "52535 CA VENTURA 6145-DS-00C-ABLE \n", "36220 CA LOS ANGELES 5305-01-174-7044 \n", "39188 CA LOS ANGELES 8970-00-149-1094 \n", "36763 CA LOS ANGELES 6515-00-967-6983 \n", "62059 DE KENT 5305-01-253-2993 \n", "37482 CA LOS ANGELES 7920-00-240-6350 \n", "11002 AL JEFFERSON 7340-00-240-7436 \n", "11000 AL JEFFERSON 7340-00-240-7436 \n", "11006 AL JEFFERSON 7340-00-243-5390 \n", "11003 AL JEFFERSON 7340-00-243-5390 \n", "36049 CA LOS ANGELES 5120-01-054-7141 \n", "44883 CA SACRAMENTO 9520-00-277-4901 \n", "38709 CA LOS ANGELES 8465-01-416-4634 \n", "21405 AZ MARICOPA 1005-00-921-5004 \n", "... ... ... ... \n", "29738 CA KERN 1005-00-073-9421 \n", "29739 CA KERN 1005-00-073-9421 \n", "29740 CA KERN 1005-00-073-9421 \n", "29718 CA KERN 1005-00-073-9421 \n", "29716 CA KERN 1005-00-073-9421 \n", "29692 CA KERN 1005-00-073-9421 \n", "29703 CA KERN 1005-00-073-9421 \n", "29693 CA KERN 1005-00-073-9421 \n", "29694 CA KERN 1005-00-073-9421 \n", "29695 CA KERN 1005-00-073-9421 \n", "29696 CA KERN 1005-00-073-9421 \n", "29697 CA KERN 1005-00-073-9421 \n", "29698 CA KERN 1005-00-073-9421 \n", "29699 CA KERN 1005-00-073-9421 \n", "29700 CA KERN 1005-00-073-9421 \n", "29701 CA KERN 1005-00-073-9421 \n", "29702 CA KERN 1005-00-073-9421 \n", "29704 CA KERN 1005-00-073-9421 \n", "29715 CA KERN 1005-00-073-9421 \n", "29705 CA KERN 1005-00-073-9421 \n", "29706 CA KERN 1005-00-073-9421 \n", "29707 CA KERN 1005-00-073-9421 \n", "29708 CA KERN 1005-00-073-9421 \n", "29709 CA KERN 1005-00-073-9421 \n", "29710 CA KERN 1005-00-073-9421 \n", "29711 CA KERN 1005-00-073-9421 \n", "29712 CA KERN 1005-00-073-9421 \n", "29713 CA KERN 1005-00-073-9421 \n", "29714 CA KERN 1005-00-073-9421 \n", "73027 FL WASHINGTON 7830-DS-TRE-ADM1 \n", "\n", " Item Name Quantity UI \\\n", "52530 WIRE,ELECTRICAL 91000 Foot \n", "36221 SCREW,CAP,SOCKET HEAD 43822 Each \n", "52399 STRAP,TIEDOWN,ELECTRICAL COMPONENTS 6000 Each \n", "52536 CABLE COAX 6000 FT \n", "39189 RUBBER SHEET,SOLID 6000 Each \n", "36226 WASHER,FLAT 5181 Each \n", "53473 BANDAGE KIT 5000 KT \n", "39190 RUBBER SHEET,SOLID 5000 Each \n", "65245 FILTER ELEMENT SET, 4245 SE \n", "37344 SPOON,FIELD MESS 3824 Each \n", "3520 POST,FENCE,METAL 3306 Each \n", "3517 POST,FENCE,METAL 3306 Each \n", "3516 POST,FENCE,METAL 3306 Each \n", "3518 POST,FENCE,METAL 3306 Each \n", "5861 BEAM,STRUCTURAL 3000 Foot \n", "39621 WIRE,ELECTRICAL 3000 Foot \n", "52535 CABLE VIDEO 3000 FT \n", "36220 SCREW,MACHINE 2905 Each \n", "39188 MEAL,INDIVIDUAL 2873 Box \n", "36763 SUTURE,NONABSORBABLE,SURGICAL 2755 Package \n", "62059 SCREW,TAPPING 2580 Each \n", "37482 BROOM,WHISK 2240 Each \n", "11002 KNIFE,FIELD MESS 2217 EA \n", "11000 KNIFE,FIELD MESS 2217 Each \n", "11006 SPOON,FIELD MESS 2205 EA \n", "11003 SPOON,FIELD MESS 2205 Each \n", "36049 WRENCH,BOX AND OPEN END,COMBINATION 2190 Each \n", "44883 ANGLE,STRUCTURAL 2092 Foot \n", "38709 SPECTACLES,BALLISTIC AND LASER PROTECTIVE 2043 Each \n", "21405 MAGAZINE,CARTRIDGE 2000 Each \n", "... ... ... ... \n", "29738 RIFLE,5.56 MILLIMETER 1 Each \n", "29739 RIFLE,5.56 MILLIMETER 1 Each \n", "29740 RIFLE,5.56 MILLIMETER 1 Each \n", "29718 RIFLE,5.56 MILLIMETER 1 Each \n", "29716 RIFLE,5.56 MILLIMETER 1 Each \n", "29692 RIFLE,5.56 MILLIMETER 1 Each \n", "29703 RIFLE,5.56 MILLIMETER 1 Each \n", "29693 RIFLE,5.56 MILLIMETER 1 Each \n", "29694 RIFLE,5.56 MILLIMETER 1 Each \n", "29695 RIFLE,5.56 MILLIMETER 1 Each \n", "29696 RIFLE,5.56 MILLIMETER 1 Each \n", "29697 RIFLE,5.56 MILLIMETER 1 Each \n", "29698 RIFLE,5.56 MILLIMETER 1 Each \n", "29699 RIFLE,5.56 MILLIMETER 1 Each \n", "29700 RIFLE,5.56 MILLIMETER 1 Each \n", "29701 RIFLE,5.56 MILLIMETER 1 Each \n", "29702 RIFLE,5.56 MILLIMETER 1 Each \n", "29704 RIFLE,5.56 MILLIMETER 1 Each \n", "29715 RIFLE,5.56 MILLIMETER 1 Each \n", "29705 RIFLE,5.56 MILLIMETER 1 Each \n", "29706 RIFLE,5.56 MILLIMETER 1 Each \n", "29707 RIFLE,5.56 MILLIMETER 1 Each \n", "29708 RIFLE,5.56 MILLIMETER 1 Each \n", "29709 RIFLE,5.56 MILLIMETER 1 Each \n", "29710 RIFLE,5.56 MILLIMETER 1 Each \n", "29711 RIFLE,5.56 MILLIMETER 1 Each \n", "29712 RIFLE,5.56 MILLIMETER 1 Each \n", "29713 RIFLE,5.56 MILLIMETER 1 Each \n", "29714 RIFLE,5.56 MILLIMETER 1 Each \n", "73027 TREADMILL 1 Each \n", "\n", " Acquisition Cost Ship Date \n", "52530 0.58 2013-10-02 00:00:00 \n", "36221 0.23 2014-03-11 00:00:00 \n", "52399 0.27 2013-06-05 00:00:00 \n", "52536 0.11 20120302 \n", "39189 10.82 2013-03-14 00:00:00 \n", "36226 0.23 2014-02-22 00:00:00 \n", "53473 7.76 20120217 \n", "39190 10.82 2013-03-14 00:00:00 \n", "65245 20.42 20090625 \n", "37344 3.21 2014-03-19 00:00:00 \n", "3520 8.49 2014-02-28 00:00:00 \n", "3517 8.49 2014-02-27 00:00:00 \n", "3516 8.49 2014-02-26 00:00:00 \n", "3518 8.49 2014-02-28 00:00:00 \n", "5861 27.79 2012-01-16 00:00:00 \n", "39621 0.57 2013-07-29 00:00:00 \n", "52535 0.10 20120302 \n", "36220 9.74 2014-02-25 00:00:00 \n", "39188 99.95 2014-04-02 00:00:00 \n", "36763 42.98 2013-01-28 00:00:00 \n", "62059 0.35 2013-03-13 00:00:00 \n", "37482 5.37 2014-03-19 00:00:00 \n", "11002 3.58 20120801 \n", "11000 3.58 2012-07-06 00:00:00 \n", "11006 3.06 20120801 \n", "11003 3.06 2012-07-05 00:00:00 \n", "36049 15.01 2013-03-25 00:00:00 \n", "44883 2.12 2013-01-16 00:00:00 \n", "38709 20.30 2012-03-25 00:00:00 \n", "21405 9.31 2013-06-21 00:00:00 \n", "... ... ... \n", "29738 499.00 2006-12-26 00:00:00 \n", "29739 499.00 2006-12-26 00:00:00 \n", "29740 499.00 2006-12-26 00:00:00 \n", "29718 499.00 2006-12-26 00:00:00 \n", "29716 499.00 2006-12-26 00:00:00 \n", "29692 499.00 2006-12-18 00:00:00 \n", "29703 499.00 2006-12-26 00:00:00 \n", "29693 499.00 2006-12-18 00:00:00 \n", "29694 499.00 2006-12-18 00:00:00 \n", "29695 499.00 2006-12-26 00:00:00 \n", "29696 499.00 2006-12-26 00:00:00 \n", "29697 499.00 2006-12-26 00:00:00 \n", "29698 499.00 2006-12-26 00:00:00 \n", "29699 499.00 2006-12-26 00:00:00 \n", "29700 499.00 2006-12-26 00:00:00 \n", "29701 499.00 2006-12-26 00:00:00 \n", "29702 499.00 2006-12-26 00:00:00 \n", "29704 499.00 2006-12-26 00:00:00 \n", "29715 499.00 2006-12-26 00:00:00 \n", "29705 499.00 2006-12-26 00:00:00 \n", "29706 499.00 2006-12-26 00:00:00 \n", "29707 499.00 2006-12-26 00:00:00 \n", "29708 499.00 2006-12-26 00:00:00 \n", "29709 499.00 2006-12-26 00:00:00 \n", "29710 499.00 2006-12-26 00:00:00 \n", "29711 499.00 2006-12-26 00:00:00 \n", "29712 499.00 2006-12-26 00:00:00 \n", "29713 499.00 2006-12-26 00:00:00 \n", "29714 499.00 2006-12-26 00:00:00 \n", "73027 1500.00 2012-11-08 00:00:00 \n", "\n", "[73028 rows x 8 columns]" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "# What's wrong with the above table? (Foot, Ship Date, No county codes)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "df.UI.value_counts()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 7, "text": [ "Each 51581\n", "EA 13370\n", "Pair 1381\n", "PR 1285\n", "Unknown 982\n", "Kit 732\n", "Set 463\n", "KT 441\n", "SE 360\n", "Package 355\n", "PG 288\n", "Box 246\n", "BX 190\n", "Assembly 140\n", "Roll 114\n", "...\n", "ST 2\n", "LG 1\n", "Skein 1\n", "CY 1\n", "Bolt 1\n", "MR 1\n", "MX 1\n", "Board Foot 1\n", "JR 1\n", "E4 1\n", "Cartridge 1\n", "SK 1\n", "UU 1\n", "SP 1\n", "Hank 1\n", "Length: 94, dtype: int64" ] } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "df = df.replace({\"UI\":[\"EA\", \"EACH\", \"PR\", \"PG\", \"KT\", \"SE\", \"BX\", \"RO\", \"FT\", \"LT\", \"SL\", \"DZ\", \"HD\", \"AY\", \"CL\", \"LB\", \"RL\", \"SH\", \"CN\"]},\n", " {\"UI\":[\"Each\", \"Each\", \"Pair\", \"Package\", \"Kit\", \"Set\", \"Box\", \"Roll\", \"Foot\", \"Lot\", \"Spool\", \"Dozen\", \"Hundred\", \"Assembly\", \"Coil\", \"Pound\", \"Reel\", \"Sheet\", \"Container\"]})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "df.UI.value_counts()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 9, "text": [ "Each 64956\n", "Pair 2666\n", "Kit 1173\n", "Unknown 982\n", "Set 823\n", "Package 643\n", "Box 436\n", "Assembly 206\n", "Roll 204\n", "Foot 182\n", "Lot 112\n", "Dozen 80\n", "Hundred 77\n", "Spool 63\n", "Pound 53\n", "...\n", "Square Foot 2\n", "UU 1\n", "SK 1\n", "LG 1\n", "Skein 1\n", "E4 1\n", "CY 1\n", "JR 1\n", "MX 1\n", "Board Foot 1\n", "Cartridge 1\n", "Hank 1\n", "Bolt 1\n", "MR 1\n", "SP 1\n", "Length: 75, dtype: int64" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "df = df.replace({\"UI\":\"Unknown\"}, {\"UI\":np.NaN})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "df.UI.value_counts()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 11, "text": [ "Each 64956\n", "Pair 2666\n", "Kit 1173\n", "Set 823\n", "Package 643\n", "Box 436\n", "Assembly 206\n", "Roll 204\n", "Foot 182\n", "Lot 112\n", "Dozen 80\n", "Hundred 77\n", "Spool 63\n", "Reel 53\n", "Pound 53\n", "...\n", "Gross 2\n", "LG 1\n", "CY 1\n", "SK 1\n", "Hank 1\n", "Bolt 1\n", "MR 1\n", "MX 1\n", "Cartridge 1\n", "Board Foot 1\n", "JR 1\n", "E4 1\n", "Skein 1\n", "UU 1\n", "SP 1\n", "Length: 74, dtype: int64" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "df.UI.value_counts()[20:]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 12, "text": [ "Assortment 10\n", "GL 9\n", "Can 8\n", "OT 7\n", "BD 7\n", "Drum 7\n", "Plate 7\n", "AT 6\n", "PT 6\n", "DR 5\n", "Tube 5\n", "Bundle 5\n", "Strip 4\n", "CO 4\n", "Bale 4\n", "Outfit 4\n", "BP 4\n", "BG 4\n", "BE 4\n", "YD 4\n", "Thousand 3\n", "SF 3\n", "BA 3\n", "Bag 3\n", "CA 3\n", "TU 3\n", "Length 2\n", "PM 2\n", "Packet 2\n", "ST 2\n", "Pad 2\n", "Gallon 2\n", "Meter 2\n", "Square yard 2\n", "CT 2\n", "QT 2\n", "EE 2\n", "Square Foot 2\n", "SY 2\n", "Gross 2\n", "LG 1\n", "CY 1\n", "SK 1\n", "Hank 1\n", "Bolt 1\n", "MR 1\n", "MX 1\n", "Cartridge 1\n", "Board Foot 1\n", "JR 1\n", "E4 1\n", "Skein 1\n", "UU 1\n", "SP 1\n", "Length: 54, dtype: int64" ] } ], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "dontcare = df.UI.value_counts()[20:]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [ "df = df[~df.UI.isin(dontcare.index)]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 17 }, { "cell_type": "code", "collapsed": false, "input": [ "date_col = df[\"Ship Date\"].astype(str)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 18 }, { "cell_type": "code", "collapsed": false, "input": [ "dates_a = pd.to_datetime(date_col[date_col.str.contains(\"(\\d{4}-\\d{2}-\\d{2})\")])\n", "dates_b = pd.to_datetime(date_col[date_col.str.contains(\"(\\d{4}\\d{2}\\d{2})\")])" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ "/Users/makmana/scicompute/env/lib/python2.7/site-packages/pandas/core/strings.py:188: UserWarning: This pattern has match groups. To actually get the groups, use str.extract.\n", " \" groups, use str.extract.\", UserWarning)\n" ] } ], "prompt_number": 19 }, { "cell_type": "code", "collapsed": false, "input": [ "dates_new = pd.concat([dates_a.dropna(), dates_b.dropna()]).reindex_like(df)\n", "dates_new" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 22, "text": [ "0 2012-08-30\n", "1 2012-08-30\n", "2 2012-08-30\n", "3 2012-08-30\n", "4 2012-08-30\n", "5 2012-08-30\n", "6 2012-08-30\n", "7 2012-08-30\n", "8 2012-08-30\n", "9 2012-08-30\n", "10 2012-08-30\n", "11 2012-08-30\n", "12 2012-08-30\n", "13 2012-08-30\n", "14 2012-08-30\n", "...\n", "73013 2013-06-07\n", "73014 2013-06-07\n", "73015 2013-08-19\n", "73016 2008-04-15\n", "73017 2007-01-03\n", "73018 2007-01-03\n", "73019 2008-04-14\n", "73020 2008-04-14\n", "73021 2008-04-14\n", "73022 2008-04-14\n", "73023 2011-03-30\n", "73024 2011-03-30\n", "73025 2012-11-08\n", "73026 2012-11-08\n", "73027 2012-11-08\n", "Name: Ship Date, Length: 72854, dtype: datetime64[ns]" ] } ], "prompt_number": 22 }, { "cell_type": "code", "collapsed": false, "input": [ "df[\"Ship Date\"] = dates_new" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 23 }, { "cell_type": "code", "collapsed": false, "input": [ "# See what the costs look like" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "df[\"Acquisition Cost\"].hist(bins=100)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "df.sort(\"Acquisition Cost\")" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }