{ "metadata": { "name": "Pandas" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": "print \"Before Pandas\", len(dir())\nimport pandas\nfrom pandas import *\nprint \"After Pandas\", len(dir())\nfrom numpy import *\nprint \"After NumPy\", len(dir())", "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": "Before Pandas 21\nAfter Pandas" }, { "output_type": "stream", "stream": "stdout", "text": " 165\nAfter NumPy 706\n" } ], "prompt_number": 1 }, { "cell_type": "markdown", "metadata": {}, "source": "**Reading Data**" }, { "cell_type": "code", "collapsed": false, "input": "import urlparse\nimport httplib\nimport pandas\n\ncsv_data = \"http://priede.bf.lu.lv/ftp/grozs/Datorlietas/Geog5028/TIS_PRG/HomePlanet/ASTEROID.CSV\"\nfwf_data = \"http://ssd.jpl.nasa.gov/dat/ELEMENTS.NUMBR\"\ntax_data = \"http://sbn.psi.edu/ferret/reformatTable.action?productId=TAXONOMY10_TAB&dataSetId=EAR-A-5-DDR-TAXONOMY-V6.0\"\n\ndef save_file(location):\n \"\"\"Read data at url\"\"\"\n url = urlparse.urlparse(location)\n connection = httplib.HTTPConnection(url.netloc)\n connection.connect()\n connection.request('GET', url.path)\n response = connection.getresponse()\n data = response.read()\n filename = '/tmp/' + url.path.split('/')[-1]\n print filename\n with open(filename, 'rw+') as tmp:\n tmp.write(data)\n return filename\n\n\n# Read NASA asteroid data\n#filename = save_file(fwf_data)\nfilename = \"data/ELEMENTS.NUMBR.txt\"\nwith open(filename, 'r') as f:\n widths = map(lambda line: len(line) + 1, f.read().splitlines()[1].split(' '))\nasteroids_nasa = pandas.read_fwf(filename, widths=widths, skiprows=[1])\n\nwidths = [11, 18, 11, 13, 13, 14, 14, 14, 14, 13, 13, 12, 12, 10, 10, 15, 15, 16, 15, 22]\nasteroids_taxonomy = pandas.read_fwf(tax_data, widths=widths, skiprows=[1])\n\n", "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": "print asteroids_nasa.xs(0)\nprint '\\n'\nprint asteroids_taxonomy.xs(0)", "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": "Num 1\nAST_NAME Ceres\nEpoch 56600\nSemimajor Axis 2.766807\nEccentricity 0.07579726\ni 10.59398\nw 72.29215\nNode 80.32764\nM 10.55758\nH 3.34\nG 0.12\nRef\\n JPL 32\\n\nName: 0, dtype: object\n\n\nAST_NUMBER 1\nAST_NAME Ceres\nPROV_ID -\nTHOLEN_CLASS G\nTHOLEN_PARAM 7G\nBARUCCI_CLASS G0\nBARUCCI_PARAM 7I\nTEDESCO_CLASS G?\nTEDESCO_PARAM 2I\nHOWELL_CLASS CvB\nHOWELL_PARAM 65\nSMASS_CLASS -\nSMASS_PARAM -\nBUS_CLASS C\nBUS_PARAM s\nS3OS2_CLASS_TH C\nS3OS2_CLASS_BB C\nBUS_DEMEO_CLASS C\nDEMEO_REF_CODE a\nCOMMENT \\n - \\n\nName: 0, dtype: object\n" } ], "prompt_number": 71 }, { "cell_type": "markdown", "metadata": {}, "source": "**Descriptions**" }, { "cell_type": "code", "collapsed": false, "input": "asteroids_nasa.ix[:, 0:2].head(10)", "language": "python", "metadata": {}, "outputs": [ { "html": "
\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NumName
0 1 Ceres
1 2 Pallas
2 3 Juno
3 4 Vesta
4 5 Astraea
5 6 Hebe
6 7 Iris
7 8 Flora
8 9 Metis
9 10 Hygiea
\n
", "output_type": "pyout", "prompt_number": 26, "text": " Num Name\n0 1 Ceres\n1 2 Pallas\n2 3 Juno\n3 4 Vesta\n4 5 Astraea\n5 6 Hebe\n6 7 Iris\n7 8 Flora\n8 9 Metis\n9 10 Hygiea" } ], "prompt_number": 26 }, { "cell_type": "code", "collapsed": false, "input": "asteroids_nasa.describe()", "language": "python", "metadata": {}, "outputs": [ { "html": "
\n<class 'pandas.core.frame.DataFrame'>\nIndex: 8 entries, count to max\nData columns (total 10 columns):\nNum      8  non-null values\nEpoch    8  non-null values\na        8  non-null values\ne        8  non-null values\ni        8  non-null values\nw        8  non-null values\nNode     8  non-null values\nM        8  non-null values\nH        8  non-null values\nG        8  non-null values\ndtypes: float64(10)\n
", "output_type": "pyout", "prompt_number": 15, "text": " Num Epoch a e \\\ncount 369956.000000 369956.000000 369956.000000 369956.000000 \nmean 184978.500000 56599.954484 2.714804 0.141123 \nstd 106797.242431 12.140078 2.343523 0.071773 \nmin 1.000000 52045.000000 0.617616 0.000093 \n25% 92489.750000 56600.000000 2.381873 0.088329 \n50% 184978.500000 56600.000000 2.625409 0.135797 \n75% 277467.250000 56600.000000 2.949065 0.185448 \nmax 369956.000000 56600.000000 780.919318 0.969913 \n\n i w Node M \\\ncount 369956.000000 369956.000000 369956.000000 369956.000000 \nmean 7.861723 180.868751 168.365091 178.125203 \nstd 5.744538 103.414791 102.345112 103.393876 \nmin 0.004510 0.000010 0.001770 0.000071 \n25% 3.576205 91.075698 81.163162 89.246209 \n50% 6.322225 181.707315 160.348955 175.939274 \n75% 10.870740 270.507750 250.499795 267.171522 \nmax 170.352670 359.999930 359.999390 359.999558 \n\n H G \ncount 369956.000000 369956.000000 \nmean 15.733793 0.150010 \nstd 1.290917 0.002427 \nmin -1.190000 -0.250000 \n25% 15.020000 0.150000 \n50% 15.880000 0.150000 \n75% 16.610000 0.150000 \nmax 24.210000 0.600000 " } ], "prompt_number": 15 }, { "cell_type": "code", "collapsed": false, "input": "asteroids_nasa.ix[:, 3].describe()", "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 18, "text": "count 369956.000000\nmean 2.714804\nstd 2.343523\nmin 0.617616\n25% 2.381873\n50% 2.625409\n75% 2.949065\nmax 780.919318\ndtype: float64" } ], "prompt_number": 18 }, { "cell_type": "code", "collapsed": false, "input": "asteroids_nasa.ix[:, 'a'].describe()", "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 19, "text": "count 369956.000000\nmean 2.714804\nstd 2.343523\nmin 0.617616\n25% 2.381873\n50% 2.625409\n75% 2.949065\nmax 780.919318\ndtype: float64" } ], "prompt_number": 19 }, { "cell_type": "code", "collapsed": false, "input": "asteroids_nasa.ix[:, 3:7].corr()", "language": "python", "metadata": {}, "outputs": [ { "html": "
\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
aeiw
a 1.000000 0.028508 0.057180 0.001323
e 0.028508 1.000000 0.102916 0.014825
i 0.057180 0.102916 1.000000-0.000701
w 0.001323 0.014825-0.000701 1.000000
\n
", "output_type": "pyout", "prompt_number": 30, "text": " a e i w\na 1.000000 0.028508 0.057180 0.001323\ne 0.028508 1.000000 0.102916 0.014825\ni 0.057180 0.102916 1.000000 -0.000701\nw 0.001323 0.014825 -0.000701 1.000000" } ], "prompt_number": 30 }, { "cell_type": "code", "collapsed": false, "input": "asteroids_nasa.rename(columns={'Name': 'AST_NAME', 'a': 'Semimajor Axis', 'e': 'Eccentricity'})", "language": "python", "metadata": {}, "outputs": [ { "html": "
\n<class 'pandas.core.frame.DataFrame'>\nInt64Index: 369956 entries, 0 to 369955\nData columns (total 12 columns):\nNum               369956  non-null values\nAST_NAME          369956  non-null values\nEpoch             369956  non-null values\nSemimajor Axis    369956  non-null values\nEccentricity      369956  non-null values\ni                 369956  non-null values\nw                 369956  non-null values\nNode              369956  non-null values\nM                 369956  non-null values\nH                 369956  non-null values\nG                 369956  non-null values\nRef\n              369956  non-null values\ndtypes: float64(8), int64(2), object(2)\n
", "output_type": "pyout", "prompt_number": 31, "text": "\nInt64Index: 369956 entries, 0 to 369955\nData columns (total 12 columns):\nNum 369956 non-null values\nAST_NAME 369956 non-null values\nEpoch 369956 non-null values\nSemimajor Axis 369956 non-null values\nEccentricity 369956 non-null values\ni 369956 non-null values\nw 369956 non-null values\nNode 369956 non-null values\nM 369956 non-null values\nH 369956 non-null values\nG 369956 non-null values\nRef\n 369956 non-null values\ndtypes: float64(8), int64(2), object(2)" } ], "prompt_number": 31 }, { "cell_type": "code", "collapsed": false, "input": "asteroids_nasa", "language": "python", "metadata": {}, "outputs": [ { "html": "
\n<class 'pandas.core.frame.DataFrame'>\nInt64Index: 369956 entries, 0 to 369955\nData columns (total 12 columns):\nNum      369956  non-null values\nName     369956  non-null values\nEpoch    369956  non-null values\na        369956  non-null values\ne        369956  non-null values\ni        369956  non-null values\nw        369956  non-null values\nNode     369956  non-null values\nM        369956  non-null values\nH        369956  non-null values\nG        369956  non-null values\nRef\n     369956  non-null values\ndtypes: float64(8), int64(2), object(2)\n
", "output_type": "pyout", "prompt_number": 32, "text": "\nInt64Index: 369956 entries, 0 to 369955\nData columns (total 12 columns):\nNum 369956 non-null values\nName 369956 non-null values\nEpoch 369956 non-null values\na 369956 non-null values\ne 369956 non-null values\ni 369956 non-null values\nw 369956 non-null values\nNode 369956 non-null values\nM 369956 non-null values\nH 369956 non-null values\nG 369956 non-null values\nRef\n 369956 non-null values\ndtypes: float64(8), int64(2), object(2)" } ], "prompt_number": 32 }, { "cell_type": "code", "collapsed": false, "input": "asteroids_nasa.rename(columns={'Name': 'AST_NAME', 'a': 'Semimajor Axis', 'e': 'Eccentricity'}, inplace=True)", "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": "asteroids_nasa", "language": "python", "metadata": {}, "outputs": [ { "html": "
\n<class 'pandas.core.frame.DataFrame'>\nInt64Index: 369956 entries, 0 to 369955\nData columns (total 12 columns):\nNum               369956  non-null values\nAST_NAME          369956  non-null values\nEpoch             369956  non-null values\nSemimajor Axis    369956  non-null values\nEccentricity      369956  non-null values\ni                 369956  non-null values\nw                 369956  non-null values\nNode              369956  non-null values\nM                 369956  non-null values\nH                 369956  non-null values\nG                 369956  non-null values\nRef\n              369956  non-null values\ndtypes: float64(8), int64(2), object(2)\n
", "output_type": "pyout", "prompt_number": 34, "text": "\nInt64Index: 369956 entries, 0 to 369955\nData columns (total 12 columns):\nNum 369956 non-null values\nAST_NAME 369956 non-null values\nEpoch 369956 non-null values\nSemimajor Axis 369956 non-null values\nEccentricity 369956 non-null values\ni 369956 non-null values\nw 369956 non-null values\nNode 369956 non-null values\nM 369956 non-null values\nH 369956 non-null values\nG 369956 non-null values\nRef\n 369956 non-null values\ndtypes: float64(8), int64(2), object(2)" } ], "prompt_number": 34 }, { "cell_type": "code", "collapsed": false, "input": "asteroids_taxonomy", "language": "python", "metadata": {}, "outputs": [ { "html": "
\n<class 'pandas.core.frame.DataFrame'>\nInt64Index: 2615 entries, 0 to 2614\nData columns (total 20 columns):\nAST_NUMBER                2615  non-null values\nAST_NAME                  2615  non-null values\nPROV_ID                   2615  non-null values\nTHOLEN_CLASS              2615  non-null values\nTHOLEN_PARAM              2615  non-null values\nBARUCCI_CLASS             2615  non-null values\nBARUCCI_PARAM             2615  non-null values\nTEDESCO_CLASS             2615  non-null values\nTEDESCO_PARAM             2615  non-null values\nHOWELL_CLASS              2615  non-null values\nHOWELL_PARAM              2615  non-null values\nSMASS_CLASS               2615  non-null values\nSMASS_PARAM               2615  non-null values\nBUS_CLASS                 2615  non-null values\nBUS_PARAM                 2615  non-null values\nS3OS2_CLASS_TH            2615  non-null values\nS3OS2_CLASS_BB            2615  non-null values\nBUS_DEMEO_CLASS           2615  non-null values\nDEMEO_REF_CODE            2615  non-null values\nCOMMENT              \n    2615  non-null values\ndtypes: int64(1), object(19)\n
", "output_type": "pyout", "prompt_number": 55, "text": "\nInt64Index: 2615 entries, 0 to 2614\nData columns (total 20 columns):\nAST_NUMBER 2615 non-null values\nAST_NAME 2615 non-null values\nPROV_ID 2615 non-null values\nTHOLEN_CLASS 2615 non-null values\nTHOLEN_PARAM 2615 non-null values\nBARUCCI_CLASS 2615 non-null values\nBARUCCI_PARAM 2615 non-null values\nTEDESCO_CLASS 2615 non-null values\nTEDESCO_PARAM 2615 non-null values\nHOWELL_CLASS 2615 non-null values\nHOWELL_PARAM 2615 non-null values\nSMASS_CLASS 2615 non-null values\nSMASS_PARAM 2615 non-null values\nBUS_CLASS 2615 non-null values\nBUS_PARAM 2615 non-null values\nS3OS2_CLASS_TH 2615 non-null values\nS3OS2_CLASS_BB 2615 non-null values\nBUS_DEMEO_CLASS 2615 non-null values\nDEMEO_REF_CODE 2615 non-null values\nCOMMENT \n 2615 non-null values\ndtypes: int64(1), object(19)" } ], "prompt_number": 55 }, { "cell_type": "code", "collapsed": false, "input": "merged = asteroids_nasa.merge(asteroids_taxonomy, on='AST_NAME')\nprint merged", "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": "\nInt64Index: 2415 entries, 0 to 2414\nData columns (total 31 columns):\nNum 2415 non-null values\nAST_NAME 2415 non-null values\nEpoch 2415 non-null values\nSemimajor Axis 2415 non-null values\nEccentricity 2415 non-null values\ni 2415 non-null values\nw 2415 non-null values\nNode 2415 non-null values\nM 2415 non-null values\nH 2415 non-null values\nG 2415 non-null values\nRef\n 2415 non-null values\nAST_NUMBER 2415 non-null values\nPROV_ID 2415 non-null values\nTHOLEN_CLASS 2415 non-null values\nTHOLEN_PARAM 2415 non-null values\nBARUCCI_CLASS 2415 non-null values\nBARUCCI_PARAM 2415 non-null values\nTEDESCO_CLASS 2415 non-null values\nTEDESCO_PARAM 2415 non-null values\nHOWELL_CLASS 2415 non-null values\nHOWELL_PARAM 2415 non-null values\nSMASS_CLASS 2415 non-null values\nSMASS_PARAM 2415 non-null values\nBUS_CLASS 2415 non-null values\nBUS_PARAM 2415 non-null values\nS3OS2_CLASS_TH 2415 non-null values\nS3OS2_CLASS_BB 2415 non-null values\nBUS_DEMEO_CLASS 2415 non-null values\nDEMEO_REF_CODE 2415 non-null values\nCOMMENT \n 2415 non-null values\ndtypes: float64(8), int64(3), object(20)\n" } ], "prompt_number": 63 }, { "cell_type": "code", "collapsed": false, "input": "merged.pop('Ref\\n')", "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 64, "text": "0 JPL 32\\n\n1 JPL 26\\n\n2 JPL 102\\n\n3 JPL 33\\n\n4 JPL 83\\n\n5 JPL 82\\n\n6 JPL 105\\n\n7 JPL 90\\n\n8 JPL 90\\n\n9 JPL 85\\n\n10 JPL 70\\n\n11 JPL 87\\n\n12 JPL 60\\n\n13 JPL 57\\n\n14 JPL 70\\n\n...\n2400 JPL 21\\n\n2401 JPL 2\\n\n2402 JPL 3\\n\n2403 JPL 50\\n\n2404 JPL 120\\n\n2405 JPL 1\\n\n2406 JPL 31\\n\n2407 JPL 1\\n\n2408 JPL 3\\n\n2409 JPL 1\\n\n2410 JPL 3\\n\n2411 JPL 1\\n\n2412 JPL 1\\n\n2413 JPL 1\\n\n2414 JPL 127\\n\nName: Ref\n, Length: 2415, dtype: object" } ], "prompt_number": 64 }, { "cell_type": "code", "collapsed": false, "input": "merged.pop('COMMENT \\n')", "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 65, "text": "0 - \\n\n1 - \\n\n2 - \\n\n3 - \\n\n4 - \\n\n5 - \\n\n6 - \\n\n7 - \\n\n8 - \\n\n9 - \\n\n10 - \\n\n11 - \\n\n12 - \\n\n13 - \\n\n14 - \\n\n...\n2400 - \\n\n2401 - \\n\n2402 - \\n\n2403 - \\n\n2404 - \\n\n2405 - \\n\n2406 - \\n\n2407 - \\n\n2408 - \\n\n2409 - \\n\n2410 - \\n\n2411 - \\n\n2412 - \\n\n2413 - \\n\n2414 - \\n\nName: COMMENT \n, Length: 2415, dtype: object" } ], "prompt_number": 65 }, { "cell_type": "code", "collapsed": false, "input": "merged.columns", "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 66, "text": "Index([Num, AST_NAME, Epoch, Semimajor Axis, Eccentricity, i, w, Node, M, H, G, AST_NUMBER, PROV_ID, THOLEN_CLASS, THOLEN_PARAM, BARUCCI_CLASS, BARUCCI_PARAM, TEDESCO_CLASS, TEDESCO_PARAM, HOWELL_CLASS, HOWELL_PARAM, SMASS_CLASS, SMASS_PARAM, BUS_CLASS, BUS_PARAM, S3OS2_CLASS_TH, S3OS2_CLASS_BB, BUS_DEMEO_CLASS, DEMEO_REF_CODE], dtype=object)" } ], "prompt_number": 66 }, { "cell_type": "code", "collapsed": false, "input": "ref_codes = merged.pop('DEMEO_REF_CODE')\nmerged", "language": "python", "metadata": {}, "outputs": [ { "html": "
\n<class 'pandas.core.frame.DataFrame'>\nInt64Index: 2415 entries, 0 to 2414\nData columns (total 28 columns):\nNum                2415  non-null values\nAST_NAME           2415  non-null values\nEpoch              2415  non-null values\nSemimajor Axis     2415  non-null values\nEccentricity       2415  non-null values\ni                  2415  non-null values\nw                  2415  non-null values\nNode               2415  non-null values\nM                  2415  non-null values\nH                  2415  non-null values\nG                  2415  non-null values\nAST_NUMBER         2415  non-null values\nPROV_ID            2415  non-null values\nTHOLEN_CLASS       2415  non-null values\nTHOLEN_PARAM       2415  non-null values\nBARUCCI_CLASS      2415  non-null values\nBARUCCI_PARAM      2415  non-null values\nTEDESCO_CLASS      2415  non-null values\nTEDESCO_PARAM      2415  non-null values\nHOWELL_CLASS       2415  non-null values\nHOWELL_PARAM       2415  non-null values\nSMASS_CLASS        2415  non-null values\nSMASS_PARAM        2415  non-null values\nBUS_CLASS          2415  non-null values\nBUS_PARAM          2415  non-null values\nS3OS2_CLASS_TH     2415  non-null values\nS3OS2_CLASS_BB     2415  non-null values\nBUS_DEMEO_CLASS    2415  non-null values\ndtypes: float64(8), int64(3), object(17)\n
", "output_type": "pyout", "prompt_number": 67, "text": "\nInt64Index: 2415 entries, 0 to 2414\nData columns (total 28 columns):\nNum 2415 non-null values\nAST_NAME 2415 non-null values\nEpoch 2415 non-null values\nSemimajor Axis 2415 non-null values\nEccentricity 2415 non-null values\ni 2415 non-null values\nw 2415 non-null values\nNode 2415 non-null values\nM 2415 non-null values\nH 2415 non-null values\nG 2415 non-null values\nAST_NUMBER 2415 non-null values\nPROV_ID 2415 non-null values\nTHOLEN_CLASS 2415 non-null values\nTHOLEN_PARAM 2415 non-null values\nBARUCCI_CLASS 2415 non-null values\nBARUCCI_PARAM 2415 non-null values\nTEDESCO_CLASS 2415 non-null values\nTEDESCO_PARAM 2415 non-null values\nHOWELL_CLASS 2415 non-null values\nHOWELL_PARAM 2415 non-null values\nSMASS_CLASS 2415 non-null values\nSMASS_PARAM 2415 non-null values\nBUS_CLASS 2415 non-null values\nBUS_PARAM 2415 non-null values\nS3OS2_CLASS_TH 2415 non-null values\nS3OS2_CLASS_BB 2415 non-null values\nBUS_DEMEO_CLASS 2415 non-null values\ndtypes: float64(8), int64(3), object(17)" } ], "prompt_number": 67 }, { "cell_type": "code", "collapsed": false, "input": "merged.insert(11, 'DEMEO_REF_CODE', ref_codes)", "language": "python", "metadata": {}, "outputs": [], "prompt_number": 68 }, { "cell_type": "code", "collapsed": false, "input": "merged", "language": "python", "metadata": {}, "outputs": [ { "html": "
\n<class 'pandas.core.frame.DataFrame'>\nInt64Index: 2415 entries, 0 to 2414\nData columns (total 29 columns):\nNum                2415  non-null values\nAST_NAME           2415  non-null values\nEpoch              2415  non-null values\nSemimajor Axis     2415  non-null values\nEccentricity       2415  non-null values\ni                  2415  non-null values\nw                  2415  non-null values\nNode               2415  non-null values\nM                  2415  non-null values\nH                  2415  non-null values\nG                  2415  non-null values\nDEMEO_REF_CODE     2415  non-null values\nAST_NUMBER         2415  non-null values\nPROV_ID            2415  non-null values\nTHOLEN_CLASS       2415  non-null values\nTHOLEN_PARAM       2415  non-null values\nBARUCCI_CLASS      2415  non-null values\nBARUCCI_PARAM      2415  non-null values\nTEDESCO_CLASS      2415  non-null values\nTEDESCO_PARAM      2415  non-null values\nHOWELL_CLASS       2415  non-null values\nHOWELL_PARAM       2415  non-null values\nSMASS_CLASS        2415  non-null values\nSMASS_PARAM        2415  non-null values\nBUS_CLASS          2415  non-null values\nBUS_PARAM          2415  non-null values\nS3OS2_CLASS_TH     2415  non-null values\nS3OS2_CLASS_BB     2415  non-null values\nBUS_DEMEO_CLASS    2415  non-null values\ndtypes: float64(8), int64(3), object(18)\n
", "output_type": "pyout", "prompt_number": 69, "text": "\nInt64Index: 2415 entries, 0 to 2414\nData columns (total 29 columns):\nNum 2415 non-null values\nAST_NAME 2415 non-null values\nEpoch 2415 non-null values\nSemimajor Axis 2415 non-null values\nEccentricity 2415 non-null values\ni 2415 non-null values\nw 2415 non-null values\nNode 2415 non-null values\nM 2415 non-null values\nH 2415 non-null values\nG 2415 non-null values\nDEMEO_REF_CODE 2415 non-null values\nAST_NUMBER 2415 non-null values\nPROV_ID 2415 non-null values\nTHOLEN_CLASS 2415 non-null values\nTHOLEN_PARAM 2415 non-null values\nBARUCCI_CLASS 2415 non-null values\nBARUCCI_PARAM 2415 non-null values\nTEDESCO_CLASS 2415 non-null values\nTEDESCO_PARAM 2415 non-null values\nHOWELL_CLASS 2415 non-null values\nHOWELL_PARAM 2415 non-null values\nSMASS_CLASS 2415 non-null values\nSMASS_PARAM 2415 non-null values\nBUS_CLASS 2415 non-null values\nBUS_PARAM 2415 non-null values\nS3OS2_CLASS_TH 2415 non-null values\nS3OS2_CLASS_BB 2415 non-null values\nBUS_DEMEO_CLASS 2415 non-null values\ndtypes: float64(8), int64(3), object(18)" } ], "prompt_number": 69 }, { "cell_type": "code", "collapsed": false, "input": "from pandas.stats.api import ols\nmodel = ols(y=asteroids_nasa.xs('Semimajor Axis', axis=1), x=asteroids_nasa.xs('i', axis=1))\nprint model", "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": "\n-------------------------Summary of Regression Analysis-------------------------\n\nFormula: Y ~ + \n\nNumber of Observations: 369956\nNumber of Degrees of Freedom: 2\n\nR-squared: 0.0033\nAdj R-squared: 0.0033\n\nRmse: 2.3397\n\nF-stat (1, 369954): 1213.5610, p-value: 0.0000\n\nDegrees of Freedom: model 1, resid 369954\n\n-----------------------Summary of Estimated Coefficients------------------------\n Variable Coef Std Err t-stat p-value CI 2.5% CI 97.5%\n--------------------------------------------------------------------------------\n x 0.0233 0.0007 34.84 0.0000 0.0220 0.0246\n intercept 2.5314 0.0065 388.25 0.0000 2.5186 2.5442\n---------------------------------End of Summary---------------------------------\n\n" } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": "import matplotlib.pyplot as plt\n", "language": "python", "metadata": {}, "outputs": [ { "ename": "ImportError", "evalue": "No module named matplotlib.pyplot", "output_type": "pyerr", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpyplot\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mImportError\u001b[0m: No module named matplotlib.pyplot" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": "", "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }