{
 "metadata": {
  "name": ""
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "# 6. \ub370\uc774\ud130 \ub85c\ub529, \uc800\uc7a5, \ud30c\uc77c \ud615\uc2dd\n",
      "\n",
      "### \uc785\u2219\ucd9c\ub825 \ubc29\ubc95\n",
      "\n",
      "- \ud14d\uc2a4\ud2b8 \ud30c\uc77c \uc774\uc6a9\ud558\ub294 \ubc29\ubc95\n",
      "- \ub370\uc774\ud130\ubca0\uc774\uc2a4 \uc774\uc6a9\ud558\ub294 \ubc29\ubc95\n",
      "- \uc6f9 API \uc774\uc6a9\ud574\uc11c \ub124\ud2b8\uc6cc\ud06c\ub97c \ud1b5\ud574 \ubd88\ub7ec\uc624\ub294 \ubc29\ubc95"
     ]
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "6.1 \ud14d\uc2a4\ud2b8 \ud30c\uc77c \uc774\uc6a9\ud558\ub294 \ubc29\ubc95"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### \ud30c\uc774\uc36c \uc88b\uc740 \uc774\uc720\n",
      "\n",
      "- \ub2e8\uc21c\ud55c \ubb38\ubc95\n",
      "- \uc9c1\uad00\uc801\uc778 \uc790\ub8cc \uad6c\uc870\n",
      "- \ud29c\ud50c\uc5d0 \ub370\uc774\ud130\ub97c \uc800\uc7a5\ud558\uace0 \uc77d\uc5b4\ub0b4\ub294 \ud3b8\ub9ac\ud55c \uae30\ub2a5\n",
      "\n",
      "#### pandas \ud30c\uc77c \ud30c\uc2f1 \ud568\uc218\n",
      "\n",
      "\ud568\uc218 | \uc124\uba85\n",
      "--- | ---\n",
      "read_csv | \ud30c\uc77c, URL \ub610\ub294 \ud30c\uc77c\uacfc \uc720\uc0ac\ud55c \uac1d\uccb4\ub85c\ubd80\ud130 \uad6c\ubd84\ub41c \ub370\uc774\ud130\ub97c \uc77d\uc5b4\uc628\ub2e4. \ub370\uc774\ud130 \uad6c\ubd84\uc790\ub294 \uc27c\ud45c(,)\ub97c \uae30\ubcf8\uc73c\ub85c \ud55c\ub2e4.\n",
      "read_table | \ud30c\uc77c, URL \ub610\ub294 \ud30c\uc77c\uacfc \uc720\uc0ac\ud55c \uac1d\uccb4\ub85c\ubd80\ud130 \uad6c\ubd84\ub41c \ub370\uc774\ud130\ub97c \uc77d\uc5b4\uc628\ub2e4. \ub370\uc774\ud130 \uad6c\ubd84\uc790\ub294 \ud0ed('\\t')\uc744 \uae30\ubcf8\uc73c\ub85c \ud55c\ub2e4.\n",
      "read_fwf | \uace0\uc815\ud3ed \uce7c\ub7fc \ud615\uc2dd\uc5d0\uc11c \ub370\uc774\ud130\ub97c \uc77d\uc5b4\uc628\ub2e4(\uad6c\ubd84\uc790\uac00 \uc5c6\ub294 \ub370\uc774\ud130)\n",
      "read_clipboard | \ud074\ub9bd\ubcf4\ub4dc\uc5d0 \uc788\ub294 \ub370\uc774\ud130\ub97c \uc77d\uc5b4\uc624\ub294 read_table \ud568\uc218. \uc6f9\ud398\uc774\uc9c0\uc5d0\uc11c \ud45c\ub97c \uae01\uc5b4\uc62c \ub54c \uc720\uc6a9\ud558\ub2e4.\n",
      "\n",
      "#### pandas \ud30c\uc77c \ud30c\uc2f1 \ud568\uc218 \uc635\uc158\n",
      "\n",
      "- **\uc0c9\uc778**: \ubc18\ud658\ud558\ub294 DataFrame\uc5d0\uc11c \ud558\ub098 \uc774\uc0c1\uc758 \uce7c\ub7fc\uc744 \uc0c9\uc778\uc73c\ub85c \uc9c0\uc815\ud560 \uc218 \uc788\ub2e4. \ud30c\uc77c\uc774\ub098 \uc0ac\uc6a9\uc790\ub85c\ubd80\ud130 \uce7c\ub7fc\uc758 \uc774\ub984\uc744 \ubc1b\uac70\ub098 \uc544\ubb34\uac83\ub3c4 \ubc1b\uc9c0 \uc54a\uc744 \uc218 \uc788\ub2e4.\n",
      "- **\uc790\ub8cc\ud615 \ucd94\ub860\uacfc \ub370\uc774\ud130 \ubcc0\ud658**: \uc0ac\uc6a9\uc790 \uc815\uc758 \uac12 \ubcc0\ud658\uacfc \ube44\uc5b4\uc788\ub294 \uac12\uc744 \uc704\ud55c \uc0ac\uc6a9\uc790 \ub9ac\uc2a4\ud2b8\ub97c \ud3ec\ud568\ud55c\ub2e4.\n",
      "- **\ub0a0\uc9dc \ubd84\uc11d**: \uc5ec\ub7ec \uce7c\ub7fc\uc5d0 \uac78\uccd0 \uc788\ub294 \ub0a0\uc9dc\uc640 \uc2dc\uac04 \uc815\ubcf4\ub97c \ud558\ub098\uc758 \uce7c\ub7fc\uc5d0 \uc870\ud569\ud574\uc11c \uacb0\uacfc\uc5d0 \ubc18\uc601\ud55c\ub2e4.\n",
      "- **\ubc18\ubcf5**: \uc5ec\ub7ec \ud30c\uc77c\uc5d0 \uac78\uccd0 \uc788\ub294 \uc790\ub8cc\ub97c \ubc18\ubcf5\uc801\uc73c\ub85c \uc77d\uc5b4\uc62c \uc218 \uc788\ub2e4.\n",
      "- **\uc815\uc81c\ub418\uc9c0 \uc54a\ub294 \ub370\uc774\ud130 \ucc98\ub9ac**: \ub85c\uc6b0\ub098 \uaf2c\ub9ac\ub9d0, \uc8fc\uc11d \uac74\ub108\ub6f0\uae30 \ub610\ub294 \ucc9c \ub2e8\uc704\ub9c8\ub2e4 \uc27c\ud45c\ub85c \uad6c\ubd84\ub41c \uc22b\uc790 \uac19\uc740 \uc0ac\uc18c\ud55c \uc77c\uc744 \ucc98\ub9ac\ud574\uc900\ub2e4.\n",
      "\n",
      "#### \uc790\ub8cc\ud615 \ucd94\ub860\uc740 \ub9e4\uc6b0 \uc911\uc694\n",
      "\n",
      "- \uc5b4\ub5a4 \uce7c\ub7fc\uc774 \uc22b\uc790\uc778\uc9c0 \ubd88\ub9ac\uc5b8\uc778\uc9c0 \uc9c0\uc815\ud574\uc904 \ud544\uc694\uac00 \uc5c6\ub2e4"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from pandas import DataFrame, Series\n",
      "import pandas as pd"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 179
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!cat ch06/ex1.csv"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "a,b,c,d,message\r\n",
        "1,2,3,4,hello\r\n",
        "5,6,7,8,world\r\n",
        "9,10,11,12,foo"
       ]
      }
     ],
     "prompt_number": 180
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "df = pd.read_csv('ch06/ex1.csv')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 181
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.read_csv('ch06/ex1.csv', header=None)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>0</th>\n",
        "      <th>1</th>\n",
        "      <th>2</th>\n",
        "      <th>3</th>\n",
        "      <th>4</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> a</td>\n",
        "      <td>  b</td>\n",
        "      <td>  c</td>\n",
        "      <td>  d</td>\n",
        "      <td> message</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 1</td>\n",
        "      <td>  2</td>\n",
        "      <td>  3</td>\n",
        "      <td>  4</td>\n",
        "      <td>   hello</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 5</td>\n",
        "      <td>  6</td>\n",
        "      <td>  7</td>\n",
        "      <td>  8</td>\n",
        "      <td>   world</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td> 9</td>\n",
        "      <td> 10</td>\n",
        "      <td> 11</td>\n",
        "      <td> 12</td>\n",
        "      <td>     foo</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 182,
       "text": [
        "   0   1   2   3        4\n",
        "0  a   b   c   d  message\n",
        "1  1   2   3   4    hello\n",
        "2  5   6   7   8    world\n",
        "3  9  10  11  12      foo"
       ]
      }
     ],
     "prompt_number": 182
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# \uc6d0\ub798 \uc788\ub358 Column\uba85 \ubb34\uc2dc\ud558\uace0 \ub0b4\uac00 \uc6d0\ud558\ub294 Column\uba85 \uc124\uc815\n",
      "pd.read_csv('ch06/ex1.csv', names=[5,6,7,8,9])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>5</th>\n",
        "      <th>6</th>\n",
        "      <th>7</th>\n",
        "      <th>8</th>\n",
        "      <th>9</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> a</td>\n",
        "      <td>  b</td>\n",
        "      <td>  c</td>\n",
        "      <td>  d</td>\n",
        "      <td> message</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 1</td>\n",
        "      <td>  2</td>\n",
        "      <td>  3</td>\n",
        "      <td>  4</td>\n",
        "      <td>   hello</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 5</td>\n",
        "      <td>  6</td>\n",
        "      <td>  7</td>\n",
        "      <td>  8</td>\n",
        "      <td>   world</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td> 9</td>\n",
        "      <td> 10</td>\n",
        "      <td> 11</td>\n",
        "      <td> 12</td>\n",
        "      <td>     foo</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 183,
       "text": [
        "   5   6   7   8        9\n",
        "0  a   b   c   d  message\n",
        "1  1   2   3   4    hello\n",
        "2  5   6   7   8    world\n",
        "3  9  10  11  12      foo"
       ]
      }
     ],
     "prompt_number": 183
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.read_csv('ch06/ex1.csv', names=['a1', 'b1', 'c1', 'd1', 'message1'])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>a1</th>\n",
        "      <th>b1</th>\n",
        "      <th>c1</th>\n",
        "      <th>d1</th>\n",
        "      <th>message1</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> a</td>\n",
        "      <td>  b</td>\n",
        "      <td>  c</td>\n",
        "      <td>  d</td>\n",
        "      <td> message</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 1</td>\n",
        "      <td>  2</td>\n",
        "      <td>  3</td>\n",
        "      <td>  4</td>\n",
        "      <td>   hello</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 5</td>\n",
        "      <td>  6</td>\n",
        "      <td>  7</td>\n",
        "      <td>  8</td>\n",
        "      <td>   world</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td> 9</td>\n",
        "      <td> 10</td>\n",
        "      <td> 11</td>\n",
        "      <td> 12</td>\n",
        "      <td>     foo</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 184,
       "text": [
        "  a1  b1  c1  d1 message1\n",
        "0  a   b   c   d  message\n",
        "1  1   2   3   4    hello\n",
        "2  5   6   7   8    world\n",
        "3  9  10  11  12      foo"
       ]
      }
     ],
     "prompt_number": 184
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "df"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>a</th>\n",
        "      <th>b</th>\n",
        "      <th>c</th>\n",
        "      <th>d</th>\n",
        "      <th>message</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> 1</td>\n",
        "      <td>  2</td>\n",
        "      <td>  3</td>\n",
        "      <td>  4</td>\n",
        "      <td> hello</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 5</td>\n",
        "      <td>  6</td>\n",
        "      <td>  7</td>\n",
        "      <td>  8</td>\n",
        "      <td> world</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 9</td>\n",
        "      <td> 10</td>\n",
        "      <td> 11</td>\n",
        "      <td> 12</td>\n",
        "      <td>   foo</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 185,
       "text": [
        "   a   b   c   d message\n",
        "0  1   2   3   4   hello\n",
        "1  5   6   7   8   world\n",
        "2  9  10  11  12     foo"
       ]
      }
     ],
     "prompt_number": 185
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# csv\ub294 DataFrame\uc73c\ub85c \uc77d\uc5b4\uc628\ub2e4.\n",
      "type(df)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 186,
       "text": [
        "pandas.core.frame.DataFrame"
       ]
      }
     ],
     "prompt_number": 186
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.read_table('ch06/ex1.csv', sep=',')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>a</th>\n",
        "      <th>b</th>\n",
        "      <th>c</th>\n",
        "      <th>d</th>\n",
        "      <th>message</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> 1</td>\n",
        "      <td>  2</td>\n",
        "      <td>  3</td>\n",
        "      <td>  4</td>\n",
        "      <td> hello</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 5</td>\n",
        "      <td>  6</td>\n",
        "      <td>  7</td>\n",
        "      <td>  8</td>\n",
        "      <td> world</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 9</td>\n",
        "      <td> 10</td>\n",
        "      <td> 11</td>\n",
        "      <td> 12</td>\n",
        "      <td>   foo</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 187,
       "text": [
        "   a   b   c   d message\n",
        "0  1   2   3   4   hello\n",
        "1  5   6   7   8   world\n",
        "2  9  10  11  12     foo"
       ]
      }
     ],
     "prompt_number": 187
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.read_table('ch06/ex1.csv', sep=',', header=None)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>0</th>\n",
        "      <th>1</th>\n",
        "      <th>2</th>\n",
        "      <th>3</th>\n",
        "      <th>4</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> a</td>\n",
        "      <td>  b</td>\n",
        "      <td>  c</td>\n",
        "      <td>  d</td>\n",
        "      <td> message</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 1</td>\n",
        "      <td>  2</td>\n",
        "      <td>  3</td>\n",
        "      <td>  4</td>\n",
        "      <td>   hello</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 5</td>\n",
        "      <td>  6</td>\n",
        "      <td>  7</td>\n",
        "      <td>  8</td>\n",
        "      <td>   world</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td> 9</td>\n",
        "      <td> 10</td>\n",
        "      <td> 11</td>\n",
        "      <td> 12</td>\n",
        "      <td>     foo</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 188,
       "text": [
        "   0   1   2   3        4\n",
        "0  a   b   c   d  message\n",
        "1  1   2   3   4    hello\n",
        "2  5   6   7   8    world\n",
        "3  9  10  11  12      foo"
       ]
      }
     ],
     "prompt_number": 188
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!cat ch06/ex2.csv"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "1,2,3,4,hello\r\n",
        "5,6,7,8,world\r\n",
        "9,10,11,12,foo"
       ]
      }
     ],
     "prompt_number": 189
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# header \uc790\ub3d9 \uc0dd\uc131\n",
      "pd.read_csv('ch06/ex2.csv', header=None)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>0</th>\n",
        "      <th>1</th>\n",
        "      <th>2</th>\n",
        "      <th>3</th>\n",
        "      <th>4</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> 1</td>\n",
        "      <td>  2</td>\n",
        "      <td>  3</td>\n",
        "      <td>  4</td>\n",
        "      <td> hello</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 5</td>\n",
        "      <td>  6</td>\n",
        "      <td>  7</td>\n",
        "      <td>  8</td>\n",
        "      <td> world</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 9</td>\n",
        "      <td> 10</td>\n",
        "      <td> 11</td>\n",
        "      <td> 12</td>\n",
        "      <td>   foo</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 190,
       "text": [
        "   0   1   2   3      4\n",
        "0  1   2   3   4  hello\n",
        "1  5   6   7   8  world\n",
        "2  9  10  11  12    foo"
       ]
      }
     ],
     "prompt_number": 190
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# header \uc635\uc158\uc774 \uc5c6\uc744\uc2dc header\ub97c \uccab\ubc88\uc9f8 \uc904\ub85c \uc774\uc6a9\n",
      "pd.read_csv('ch06/ex2.csv')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>1</th>\n",
        "      <th>2</th>\n",
        "      <th>3</th>\n",
        "      <th>4</th>\n",
        "      <th>hello</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> 5</td>\n",
        "      <td>  6</td>\n",
        "      <td>  7</td>\n",
        "      <td>  8</td>\n",
        "      <td> world</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 9</td>\n",
        "      <td> 10</td>\n",
        "      <td> 11</td>\n",
        "      <td> 12</td>\n",
        "      <td>   foo</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 191,
       "text": [
        "   1   2   3   4  hello\n",
        "0  5   6   7   8  world\n",
        "1  9  10  11  12    foo"
       ]
      }
     ],
     "prompt_number": 191
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Column\uba85 \ucd94\uac00\n",
      "pd.read_csv('ch06/ex2.csv', names=['a', 'b', 'c', 'message'])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>a</th>\n",
        "      <th>b</th>\n",
        "      <th>c</th>\n",
        "      <th>message</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>  2</td>\n",
        "      <td>  3</td>\n",
        "      <td>  4</td>\n",
        "      <td> hello</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
        "      <td>  6</td>\n",
        "      <td>  7</td>\n",
        "      <td>  8</td>\n",
        "      <td> world</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
        "      <td> 10</td>\n",
        "      <td> 11</td>\n",
        "      <td> 12</td>\n",
        "      <td>   foo</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 192,
       "text": [
        "    a   b   c message\n",
        "1   2   3   4   hello\n",
        "5   6   7   8   world\n",
        "9  10  11  12     foo"
       ]
      }
     ],
     "prompt_number": 192
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "names = ['a', 'b', 'c', 'd', 'message']"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 193
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.read_csv('ch06/ex2.csv', names=names)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>a</th>\n",
        "      <th>b</th>\n",
        "      <th>c</th>\n",
        "      <th>d</th>\n",
        "      <th>message</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> 1</td>\n",
        "      <td>  2</td>\n",
        "      <td>  3</td>\n",
        "      <td>  4</td>\n",
        "      <td> hello</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 5</td>\n",
        "      <td>  6</td>\n",
        "      <td>  7</td>\n",
        "      <td>  8</td>\n",
        "      <td> world</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 9</td>\n",
        "      <td> 10</td>\n",
        "      <td> 11</td>\n",
        "      <td> 12</td>\n",
        "      <td>   foo</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 194,
       "text": [
        "   a   b   c   d message\n",
        "0  1   2   3   4   hello\n",
        "1  5   6   7   8   world\n",
        "2  9  10  11  12     foo"
       ]
      }
     ],
     "prompt_number": 194
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# message -> index\n",
      "pd.read_csv('ch06/ex2.csv', names=names, index_col='message')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>a</th>\n",
        "      <th>b</th>\n",
        "      <th>c</th>\n",
        "      <th>d</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>message</th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>hello</th>\n",
        "      <td> 1</td>\n",
        "      <td>  2</td>\n",
        "      <td>  3</td>\n",
        "      <td>  4</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>world</th>\n",
        "      <td> 5</td>\n",
        "      <td>  6</td>\n",
        "      <td>  7</td>\n",
        "      <td>  8</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>foo</th>\n",
        "      <td> 9</td>\n",
        "      <td> 10</td>\n",
        "      <td> 11</td>\n",
        "      <td> 12</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 195,
       "text": [
        "         a   b   c   d\n",
        "message               \n",
        "hello    1   2   3   4\n",
        "world    5   6   7   8\n",
        "foo      9  10  11  12"
       ]
      }
     ],
     "prompt_number": 195
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.read_csv('ch06/ex2.csv', names=names, index_col='a')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>b</th>\n",
        "      <th>c</th>\n",
        "      <th>d</th>\n",
        "      <th>message</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>a</th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>  2</td>\n",
        "      <td>  3</td>\n",
        "      <td>  4</td>\n",
        "      <td> hello</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
        "      <td>  6</td>\n",
        "      <td>  7</td>\n",
        "      <td>  8</td>\n",
        "      <td> world</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
        "      <td> 10</td>\n",
        "      <td> 11</td>\n",
        "      <td> 12</td>\n",
        "      <td>   foo</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 196,
       "text": [
        "    b   c   d message\n",
        "a                    \n",
        "1   2   3   4   hello\n",
        "5   6   7   8   world\n",
        "9  10  11  12     foo"
       ]
      }
     ],
     "prompt_number": 196
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!cat ch06/csv_mindex.csv"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "key1,key2,value1,value2\r\n",
        "one,a,1,2\r\n",
        "one,b,3,4\r\n",
        "one,c,5,6\r\n",
        "one,d,7,8\r\n",
        "two,a,9,10\r\n",
        "two,b,11,12\r\n",
        "two,c,13,14\r\n",
        "two,d,15,16\r\n"
       ]
      }
     ],
     "prompt_number": 197
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### \uacc4\uce35\uc801 \uc0c9\uc778\uc744 \uc9c0\uc815\ud558\uace0 \uc2f6\ub2e4\uba74 \uce7c\ub7fc \ubc88\ud638\ub098 \uc774\ub984\uc758 \ub9ac\uc2a4\ud2b8\ub97c \ub118\uae34\ub2e4\n",
      "\n",
      "- 2\ubc88\uc9f8 \uacf5\ubd80\ud558\uba74\uc11c \uc815\ub9ac\ud558\ub2c8 \uacc4\uce35\uc801 \uc0c9\uc778\uc744 \uc5b4\ub5bb\uac8c \uc0ac\uc6a9\ud558\ub294\uc9c0 \uc870\uae08\uc740 \uc774\ud574\uac00 \uac04\ub2e4."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "parsed = pd.read_csv('ch06/csv_mindex.csv', index_col=['key1', 'key2'])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 198
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "parsed"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th>value1</th>\n",
        "      <th>value2</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>key1</th>\n",
        "      <th>key2</th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th rowspan=\"4\" valign=\"top\">one</th>\n",
        "      <th>a</th>\n",
        "      <td>  1</td>\n",
        "      <td>  2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>b</th>\n",
        "      <td>  3</td>\n",
        "      <td>  4</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>c</th>\n",
        "      <td>  5</td>\n",
        "      <td>  6</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>d</th>\n",
        "      <td>  7</td>\n",
        "      <td>  8</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th rowspan=\"4\" valign=\"top\">two</th>\n",
        "      <th>a</th>\n",
        "      <td>  9</td>\n",
        "      <td> 10</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>b</th>\n",
        "      <td> 11</td>\n",
        "      <td> 12</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>c</th>\n",
        "      <td> 13</td>\n",
        "      <td> 14</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>d</th>\n",
        "      <td> 15</td>\n",
        "      <td> 16</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 199,
       "text": [
        "           value1  value2\n",
        "key1 key2                \n",
        "one  a          1       2\n",
        "     b          3       4\n",
        "     c          5       6\n",
        "     d          7       8\n",
        "two  a          9      10\n",
        "     b         11      12\n",
        "     c         13      14\n",
        "     d         15      16"
       ]
      }
     ],
     "prompt_number": 199
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### \uace0\uc815\ub41c \uad6c\ubd84\uc790\uac00 \uc5c6\ub2e4\uba74 read_table\uc758 \uad6c\ubd84\uc790\ub85c \uc815\uaddc\ud45c\ud604\uc2dd\uc744 \uc0ac\uc6a9\ud558\uba74 \ub41c\ub2e4.\n",
      "\n",
      "- [\ud30c\uc774\uc36c \u2013 \uc815\uaddc\uc2dd\ud45c\ud604\uc2dd(Regular Expression) \ubaa8\ub4c8](http://devanix.tistory.com/296)\n",
      "- [\ubc88\uc5ed \ud30c\uc774\uc36c \uc815\uaddc\ud45c\ud604\uc2dd](http://codeflow.co.kr/question/1061/%ED%8C%8C%EC%9D%B4%EC%8D%AC-%EC%A0%95%EA%B7%9C-%ED%91%9C%ED%98%84%EC%8B%9D/)\n",
      "- [tutorial point](http://www.tutorialspoint.com/python/python_reg_expressions.htm)\n",
      "- [\ud30c\uc774\uc36c - \uc815\uaddc\ud45c\ud604\uc2dd \ubaa8\ub4c8](http://devanix.tistory.com/296)"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "list(open('ch06/ex3.txt'))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 200,
       "text": [
        "['            A         B         C\\n',\n",
        " 'aaa -0.264438 -1.026059 -0.619500\\n',\n",
        " 'bbb  0.927272  0.302904 -0.032399\\n',\n",
        " 'ccc -0.264273 -0.386314 -0.217601\\n',\n",
        " 'ddd -0.871858 -0.348382  1.100491\\n']"
       ]
      }
     ],
     "prompt_number": 200
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### \uc9c1\uc811 \ud30c\uc77c\uc744 \uace0\uccd0\ub3c4 \ub418\uc9c0\ub9cc \uc774 \ud30c\uc77c\uc740 \uc5ec\ub7ec \uac1c\uc758 \uacf5\ubc31\ubb38\uc790\ub85c \ud544\ub4dc\uac00 \uad6c\ubd84\ub418\uc5b4 \uc788\uc73c\ubbc0\ub85c \uc774\ub97c \ud45c\ud604\ud560 \uc218 \uc788\ub294 \uc815\uaddc\ud45c\ud604\uc2dd \\s+\ub97c \uc0ac\uc6a9\ud574\uc11c \ucc98\ub9ac"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "result = pd.read_table('ch06/ex3.txt', sep='\\s+')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 201
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "result"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>A</th>\n",
        "      <th>B</th>\n",
        "      <th>C</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>aaa</th>\n",
        "      <td>-0.264438</td>\n",
        "      <td>-1.026059</td>\n",
        "      <td>-0.619500</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>bbb</th>\n",
        "      <td> 0.927272</td>\n",
        "      <td> 0.302904</td>\n",
        "      <td>-0.032399</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>ccc</th>\n",
        "      <td>-0.264273</td>\n",
        "      <td>-0.386314</td>\n",
        "      <td>-0.217601</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>ddd</th>\n",
        "      <td>-0.871858</td>\n",
        "      <td>-0.348382</td>\n",
        "      <td> 1.100491</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 202,
       "text": [
        "            A         B         C\n",
        "aaa -0.264438 -1.026059 -0.619500\n",
        "bbb  0.927272  0.302904 -0.032399\n",
        "ccc -0.264273 -0.386314 -0.217601\n",
        "ddd -0.871858 -0.348382  1.100491"
       ]
      }
     ],
     "prompt_number": 202
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### \uc774 \uacbd\uc6b0, \uccab\ubc88\uc9f8 \ub85c\uc6b0\ub294 \ub2e4\ub978 \ub85c\uc6b0\ubcf4\ub2e4 \uce7c\ub7fc\uc774 \ud558\ub098 \uc801\uae30 \ub54c\ubb38\uc5d0 read_table\uc740 \uccab \ubc88\uc9f8 \uce7c\ub7fc\uc774 DataFrame\uc758 \uc0c9\uc778\uc774 \ub418\uc5b4\uc57c \ud55c\ub2e4\uace0 \ucd94\ub860"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "-------\n",
      "\n",
      "### read_table\uacfc read_csv\uc758 \ucc28\uc774\uc810\uc740??\n",
      "\n",
      "- read_csv: \ud30c\uc77c, URL \ub610\ub294 \ud30c\uc77c\uacfc \uc720\uc0ac\ud55c \uac1d\uccb4\ub85c\ubd80\ud130 \uad6c\ubd84\ub41c \ub370\uc774\ud130\ub97c \uc77d\uc5b4\uc628\ub2e4. \ub370\uc774\ud130 \uad6c\ubd84\uc790\ub294 \uc27c\ud45c(,)\ub97c \uae30\ubcf8\uc73c\ub85c \ud55c\ub2e4.\n",
      "- read_table: \ud30c\uc77c, URL \ub610\ub294 \ud30c\uc77c\uacfc \uc720\uc0ac\ud55c \uac1d\uccb4\ub85c\ubd80\ud130 \uad6c\ubd84\ub41c \ub370\uc774\ud130\ub97c \uc77d\uc5b4\uc628\ub2e4. \ub370\uc774\ud130 \uad6c\ubd84\uc790\ub294 \ud0ed('\\t')\ub97c \uae30\ubcf8\uc73c\ub85c \ud55c\ub2e4.\n",
      "\n",
      "#### \uadf8\ub7ec\ub2c8 \ub458 \ub2e4 \uc0ac\uc6a9\uc744 \ud574\ub3c4 \ub418\uc9c0\ub9cc \uc660\ub9cc\ud558\uba74 read_csv \uac19\uc740 \uacbd\uc6b0\ub294 csv \ud30c\uc77c\ub9cc \uc0ac\uc6a9\uc744 \ud558\uace0 \ub098\uba38\uc9c0 \ud2b9\ubcc4\ud55c \uacbd\uc6b0\ub97c read_table\ub85c \ud65c\uc6a9\n",
      "\n",
      "--------"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.read_csv('ch06/ex3.txt', delimiter='\\s+')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>A</th>\n",
        "      <th>B</th>\n",
        "      <th>C</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>aaa</th>\n",
        "      <td>-0.264438</td>\n",
        "      <td>-1.026059</td>\n",
        "      <td>-0.619500</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>bbb</th>\n",
        "      <td> 0.927272</td>\n",
        "      <td> 0.302904</td>\n",
        "      <td>-0.032399</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>ccc</th>\n",
        "      <td>-0.264273</td>\n",
        "      <td>-0.386314</td>\n",
        "      <td>-0.217601</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>ddd</th>\n",
        "      <td>-0.871858</td>\n",
        "      <td>-0.348382</td>\n",
        "      <td> 1.100491</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 203,
       "text": [
        "            A         B         C\n",
        "aaa -0.264438 -1.026059 -0.619500\n",
        "bbb  0.927272  0.302904 -0.032399\n",
        "ccc -0.264273 -0.386314 -0.217601\n",
        "ddd -0.871858 -0.348382  1.100491"
       ]
      }
     ],
     "prompt_number": 203
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### [IO Tools(Text, CSV, HDF5, \u22ef) example](http://pandas.pydata.org/pandas-docs/stable/io.html)\n",
      "\n",
      "- \ud30c\uc11c \ud568\uc218\ub294 \ud30c\uc77c \ud615\uc2dd\uc5d0\uc11c \ubc1c\uc0dd\ud560 \uc218 \uc788\ub294 \ub9e4\uc6b0 \ub2e4\uc591\ud55c \uc608\uc678\ub97c \uc798 \ucc98\ub9ac\ud560 \uc218 \uc788\ub3c4\ub85d \ub9ce\uc740 \ucd94\uac00 \uc778\uc790\ub97c \uac00\uc9c0\uace0 \uc788\ub2e4.\n",
      "- skiprows\ub97c \uc774\uc6a9\ud574\uc11c \uccab\ubc88\uc9f8, \uc138\ubc88\uc9f8, \ub124\ubc88\uc9f8 \ub85c\uc6b0\ub97c \uac74\ub108\ub6f8 \uc218 \uc788\uc74c"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Read CSV(comma-separated) file into DataFrame\n",
      "pd.read_csv?"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 204
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!cat ch06/ex4.csv"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "# hey!\r\n",
        "a,b,c,d,message\r\n",
        "# just wanted to make things more difficult for you\r\n",
        "# who reads CSV files with computers, anyway?\r\n",
        "1,2,3,4,hello\r\n",
        "5,6,7,8,world\r\n",
        "9,10,11,12,foo"
       ]
      }
     ],
     "prompt_number": 205
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.read_csv('ch06/ex4.csv', skiprows=[0, 2, 3])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>a</th>\n",
        "      <th>b</th>\n",
        "      <th>c</th>\n",
        "      <th>d</th>\n",
        "      <th>message</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> 1</td>\n",
        "      <td>  2</td>\n",
        "      <td>  3</td>\n",
        "      <td>  4</td>\n",
        "      <td> hello</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 5</td>\n",
        "      <td>  6</td>\n",
        "      <td>  7</td>\n",
        "      <td>  8</td>\n",
        "      <td> world</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 9</td>\n",
        "      <td> 10</td>\n",
        "      <td> 11</td>\n",
        "      <td> 12</td>\n",
        "      <td>   foo</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 206,
       "text": [
        "   a   b   c   d message\n",
        "0  1   2   3   4   hello\n",
        "1  5   6   7   8   world\n",
        "2  9  10  11  12     foo"
       ]
      }
     ],
     "prompt_number": 206
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "- **\ub204\ub77d\ub41c \uac12\uc744 \uc798 \ucc98\ub9ac\ud558\ub294 \uc77c**\uc740 \ud30c\uc77c\uc744 \uc77d\ub294 \uacfc\uc815\uc5d0\uc11c **\uc790\uc8fc \ubc1c\uc0dd**\ud558\ub294 \uc77c\uc774\uace0 **\uc911\uc694\ud55c \ubb38\uc81c**\n",
      "- \ub204\ub77d\ub41c \uac12\uc740 \ud45c\uae30\ud558\uc9c0 \uc54a\uac70\ub098(\ube44\uc5b4\uc788\ub294 \ubb38\uc790\uc5f4) \uad6c\ubd84\ud558\uae30 \uc26c\uc6b4 \ud2b9\uc218\ud55c \ubb38\uc790\ub85c \ud45c\uae30\n",
      "- NA, -1, #IND, NULL\ucc98\ub7fc \ube44\uc5b4\uc788\ub294 \uac12\uc73c\ub85c \uc778\uc2dd"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!cat ch06/ex5.csv"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "something,a,b,c,d,message\r\n",
        "one,1,2,3,4,NA\r\n",
        "two,5,6,,8,world\r\n",
        "three,9,10,11,12,foo"
       ]
      }
     ],
     "prompt_number": 207
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "result = pd.read_csv('ch06/ex5.csv')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 208
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "result"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>something</th>\n",
        "      <th>a</th>\n",
        "      <th>b</th>\n",
        "      <th>c</th>\n",
        "      <th>d</th>\n",
        "      <th>message</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>   one</td>\n",
        "      <td> 1</td>\n",
        "      <td>  2</td>\n",
        "      <td>  3</td>\n",
        "      <td>  4</td>\n",
        "      <td>   NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>   two</td>\n",
        "      <td> 5</td>\n",
        "      <td>  6</td>\n",
        "      <td>NaN</td>\n",
        "      <td>  8</td>\n",
        "      <td> world</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> three</td>\n",
        "      <td> 9</td>\n",
        "      <td> 10</td>\n",
        "      <td> 11</td>\n",
        "      <td> 12</td>\n",
        "      <td>   foo</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 209,
       "text": [
        "  something  a   b   c   d message\n",
        "0       one  1   2   3   4     NaN\n",
        "1       two  5   6 NaN   8   world\n",
        "2     three  9  10  11  12     foo"
       ]
      }
     ],
     "prompt_number": 209
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.isnull(result)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>something</th>\n",
        "      <th>a</th>\n",
        "      <th>b</th>\n",
        "      <th>c</th>\n",
        "      <th>d</th>\n",
        "      <th>message</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> False</td>\n",
        "      <td> False</td>\n",
        "      <td> False</td>\n",
        "      <td> False</td>\n",
        "      <td> False</td>\n",
        "      <td>  True</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> False</td>\n",
        "      <td> False</td>\n",
        "      <td> False</td>\n",
        "      <td>  True</td>\n",
        "      <td> False</td>\n",
        "      <td> False</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> False</td>\n",
        "      <td> False</td>\n",
        "      <td> False</td>\n",
        "      <td> False</td>\n",
        "      <td> False</td>\n",
        "      <td> False</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 210,
       "text": [
        "  something      a      b      c      d message\n",
        "0     False  False  False  False  False    True\n",
        "1     False  False  False   True  False   False\n",
        "2     False  False  False  False  False   False"
       ]
      }
     ],
     "prompt_number": 210
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### na_values \uc635\uc158\uc740 \ub9ac\uc2a4\ud2b8\ub098 \ubb38\uc790\uc5f4 \uc9d1\ud569\uc744 \ubc1b\uc544\uc11c \ub204\ub77d\ub41c \uac12\uc744 \ucc98\ub9ac\n",
      "\n",
      "----------\n",
      "\n",
      "### Why? na_values\ub97c \uc0ac\uc6a9\ud558\uc9c0?\n",
      "\n",
      "- \ud2b9\uc815\ud55c \uac12\ub4e4\uc740 \uacc4\uc0b0\uc744 \ud558\uc9c0 \uc54a\uc73c\ub824\uace0??? \uc74c..\n",
      "\n",
      "-----------"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "result = pd.read_csv('ch06/ex5.csv', na_values=['NULL'])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 211
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "result"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>something</th>\n",
        "      <th>a</th>\n",
        "      <th>b</th>\n",
        "      <th>c</th>\n",
        "      <th>d</th>\n",
        "      <th>message</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>   one</td>\n",
        "      <td> 1</td>\n",
        "      <td>  2</td>\n",
        "      <td>  3</td>\n",
        "      <td>  4</td>\n",
        "      <td>   NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>   two</td>\n",
        "      <td> 5</td>\n",
        "      <td>  6</td>\n",
        "      <td>NaN</td>\n",
        "      <td>  8</td>\n",
        "      <td> world</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> three</td>\n",
        "      <td> 9</td>\n",
        "      <td> 10</td>\n",
        "      <td> 11</td>\n",
        "      <td> 12</td>\n",
        "      <td>   foo</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 212,
       "text": [
        "  something  a   b   c   d message\n",
        "0       one  1   2   3   4     NaN\n",
        "1       two  5   6 NaN   8   world\n",
        "2     three  9  10  11  12     foo"
       ]
      }
     ],
     "prompt_number": 212
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# world\ub97c NA\uac12\uc73c\ub85c \ucc98\ub9ac\ud558\ub2c8 NaN\uc73c\ub85c \ub098\uc628\ub2e4.\n",
      "# \ud2b9\uc815\ud55c \uac12\uc744 NA \ucc98\ub9ac\ud560 \uc218 \uc788\uc744\uac83 \uac19\ub2e4.\n",
      "pd.read_csv('ch06/ex5.csv', na_values=['world'])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>something</th>\n",
        "      <th>a</th>\n",
        "      <th>b</th>\n",
        "      <th>c</th>\n",
        "      <th>d</th>\n",
        "      <th>message</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>   one</td>\n",
        "      <td> 1</td>\n",
        "      <td>  2</td>\n",
        "      <td>  3</td>\n",
        "      <td>  4</td>\n",
        "      <td> NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>   two</td>\n",
        "      <td> 5</td>\n",
        "      <td>  6</td>\n",
        "      <td>NaN</td>\n",
        "      <td>  8</td>\n",
        "      <td> NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> three</td>\n",
        "      <td> 9</td>\n",
        "      <td> 10</td>\n",
        "      <td> 11</td>\n",
        "      <td> 12</td>\n",
        "      <td> foo</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 213,
       "text": [
        "  something  a   b   c   d message\n",
        "0       one  1   2   3   4     NaN\n",
        "1       two  5   6 NaN   8     NaN\n",
        "2     three  9  10  11  12     foo"
       ]
      }
     ],
     "prompt_number": 213
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### \uc5f4\ub9c8\ub2e4 \ub2e4\ub978 NA \ubb38\uc790\ub97c \uc0ac\uc804 \uac12\uc73c\ub85c \ub118\uaca8 \ucc98\ub9ac \uac00\ub2a5"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "sentinels = {'message': ['foo', 'NA'], 'something': ['two']}"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 214
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.read_csv('ch06/ex5.csv', na_values=sentinels)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>something</th>\n",
        "      <th>a</th>\n",
        "      <th>b</th>\n",
        "      <th>c</th>\n",
        "      <th>d</th>\n",
        "      <th>message</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>   one</td>\n",
        "      <td> 1</td>\n",
        "      <td>  2</td>\n",
        "      <td>  3</td>\n",
        "      <td>  4</td>\n",
        "      <td>   NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>   NaN</td>\n",
        "      <td> 5</td>\n",
        "      <td>  6</td>\n",
        "      <td>NaN</td>\n",
        "      <td>  8</td>\n",
        "      <td> world</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> three</td>\n",
        "      <td> 9</td>\n",
        "      <td> 10</td>\n",
        "      <td> 11</td>\n",
        "      <td> 12</td>\n",
        "      <td>   NaN</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 215,
       "text": [
        "  something  a   b   c   d message\n",
        "0       one  1   2   3   4     NaN\n",
        "1       NaN  5   6 NaN   8   world\n",
        "2     three  9  10  11  12     NaN"
       ]
      }
     ],
     "prompt_number": 215
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### read_csv / read_table \ud568\uc218 \uc778\uc790\n",
      "\n",
      "\uc778\uc790 | \uc124\uba85\n",
      "--- | ---\n",
      "path | \ud30c\uc77c \uc2dc\uc2a4\ud15c\uc5d0\uc11c\uc758 \uc704\uce58, URL, \ud30c\uc77c \uac1d\uccb4\ub97c \ub098\ud0c0\ub0b4\ub294 \ubb38\uc790\uc5f4\n",
      "sep or delimiter | \ud544\ub4dc\ub97c \uad6c\ubd84\ud558\uae30 \uc704\ud574 \uc0ac\uc6a9\ud560 \uc5f0\uc18d\ub41c \ubb38\uc790\ub098 \uc815\uaddc\ud45c\ud604\uc2dd\n",
      "header | \uce7c\ub7fc\uc758 \uc774\ub984\uc73c\ub85c \uc0ac\uc6a9\ud560 \ub85c\uc6b0\uc758 \ubc88\ud638, \uae30\ubcf8 \uac12\uc740 0(\uccab \ub85c\uc6b0)\uc774\uba70 \ud5e4\ub354\uac00 \uc5c6\uc73c\uba74 None\uc73c\ub85c \uc9c0\uc815\ud560 \uc218 \uc788\ub2e4.\n",
      "index_col | \uc0c9\uc778\uc73c\ub85c \uc0ac\uc6a9\ud560 \uce7c\ub7fc \ubc88\ud638\ub098 \uc774\ub984, \uacc4\uce35\uc801 \uc0c9\uc778\uc744 \uc9c0\uc815\ud560 \uacbd\uc6b0 \ub9ac\uc2a4\ud2b8\ub97c \ub118\uae38 \uc218 \uc788\ub2e4.\n",
      "names | \uceec\ub7fc \uc774\ub984\uc73c\ub85c \uc0ac\uc6a9\ud560 \ub9ac\uc2a4\ud2b8. header = None\uacfc \ud568\uaed8 \uc0ac\uc6a9\ud55c\ub2e4.\n",
      "skiprows | \ud30c\uc77c\uc758 \uc2dc\uc791\ubd80\ud130 \ubb34\uc2dc\ud560 \ub85c\uc6b0\uc758 \uac1c\uc218 \ub610\ub294 \ubb34\uc2dc\ud560 \ub85c\uc6b0 \ubc88\ud638\uac00 \ub2f4\uae34 \ub9ac\uc2a4\ud2b8\n",
      "na_values | NA \uac12\uc73c\ub85c \ucc98\ub9ac\ud560 \uac12\ub4e4\uc758 \ub098\uc5f4\n",
      "comment | \uc8fc\uc11d\uc73c\ub85c \ubd84\ub958\ub418\uc5b4 \ud30c\uc2f1\ud558\uc9c0 \uc54a\uc744 \ubb38\uc790 \ud639\uc740 \ubb38\uc790\uc5f4\n",
      "parse_dates | \ub0a0\uc9dc\ub97c datetime\uc73c\ub85c \ubcc0\ud658\ud560\uc9c0\uc758 \uc5ec\ubd80. \uae30\ubcf8\uac12\uc740 False\uc774\uba70, True\uc77c \uacbd\uc6b0 \ubaa8\ub4e0 \uce7c\ub7fc\uc5d0 \ub2e4 \uc801\uc6a9\ub41c\ub2e4. \ub9ac\uc2a4\ud2b8\ub97c \ub118\uae30\uba74 \ubcc0\ud658\ud560 \uce7c\ub7fc\uc744 \uc9c0\uc815\ud560 \uc218 \uc788\ub294\ub370, [1, 2, 3]\uc744 \ub118\uae30\uba74 \uac01\uac01\uc758 \uce7c\ub7fc\uc744 datetime\uc73c\ub85c \ubcc0\ud658\ud558\uace0, [[1, 3]]\uc744 \ub118\uae30\uba74 1, 3\ubc88 \uce7c\ub7fc\uc744 \uc870\ud569\ud574\uc11c \ud558\ub098\uc758 datetime\uc73c\ub85c \ubcc0\ud658\ud55c\ub2e4.\n",
      "keep_date_col | \uc5ec\ub7ec \uce7c\ub7fc\uc744 datetime\uc73c\ub85c \ubcc0\ud658\ud588\uc744 \uacbd\uc6b0 \uc6d0\ub798 \uce7c\ub7fc\uc744 \ub0a8\uaca8\ub458\uc9c0\uc758 \uc5ec\ubd80. \uae30\ubcf8\uac12\uc740 False\n",
      "converters | \ubcc0\ud658 \uc2dc \uce7c\ub7fc\uc5d0 \uc801\uc6a9\ud560 \ud568\uc218\ub97c \uc9c0\uc815\ud55c\ub2e4. \uc608\ub97c \ub4e4\uc5b4 {'foo': f}\ub294 'foo'\uce7c\ub7fc\uc5d0 f \ud568\uc218\ub97c \uc801\uc6a9\ud55c\ub2e4. \uc804\ub2ec\ud558\ub294 \uc0ac\uc804\uc758 \ud0a4 \uac12\uc740 \uce7c\ub7fc \uc774\ub984\uc774\ub098 \ubc88\ud638\uac00 \ub420 \uc218 \uc788\ub2e4.\n",
      "dayfirst | \ubaa8\ud638\ud55c \ub0a0\uc9dc \ud615\uc2dd\uc77c \uacbd\uc6b0 \uad6d\uc81c \ud615\uc2dd\uc73c\ub85c \uac04\uc8fc\ud55c\ub2e4(7/6/2012\ub294 2012\ub144 6\uc6d4 7\uc77c\ub85c \uac04\uc8fc\ud55c\ub2e4). \uae30\ubcf8\uac12\uc740 False\n",
      "date_parser | \ub0a0\uc9dc \ubcc0\ud658 \uc2dc \uc0ac\uc6a9\ud560 \ud568\uc218\n",
      "nrows | \ud30c\uc77c\uc758 \uccab \uc77c\ubd80\ub9cc \uc77d\uc5b4\uc62c \ub54c \ucc98\uc74c \uba87 \uc904\uc744 \uc77d\uc744 \uac83\uc778\uc9c0 \uc9c0\uc815\ud55c\ub2e4.\n",
      "iterator | \ud30c\uc77c\uc744 \uc870\uae08\uc529 \uc77d\uc744 \ub54c \uc0ac\uc6a9\ud558\ub3c4\ub85d TextParser \uac1d\uccb4\ub97c \ubc18\ud658\ud558\ub3c4\ub85d \ud55c\ub2e4. \uae30\ubcf8\uac12\uc740 False\n",
      "chunksize | TextParser \uac1d\uccb4\uc5d0\uc11c \uc0ac\uc6a9\ud560, \ud55c \ubc88\uc5d0 \uc77d\uc744 \ud30c\uc77c\uc758 \ud06c\uae30\n",
      "skip_footer | \ubb34\uc2dc\ud560 \ud30c\uc77c\uc758 \ub9c8\uc9c0\ub9c9 \uc904 \uc218\n",
      "verbose | \ud30c\uc2f1 \uacb0\uacfc\uc5d0 \ub300\ud55c \uc815\ubcf4\ub97c \ucd9c\ub825\ud55c\ub2e4. \uc22b\uc790\uac00 \uc544\ub2cc \uac12\ub4e4\uc774 \ub4e4\uc5b4\uc788\ub294 \uce7c\ub7fc\uc774\uba74\uc11c \ub204\ub77d\ub41c \uac12\uc774 \uc788\ub2e4\uba74 \uc904 \ubc88\ud638\ub97c \ucd9c\ub825\ud55c\ub2e4. \uae30\ubcf8\uac12\uc740 False\n",
      "encoding | \uc720\ub2c8\ucf54\ub4dc \uc778\ucf54\ub529 \uc885\ub958\ub97c \uc9c0\uc815\ud55c\ub2e4. UTF-8\ub85c \uc778\ucf54\ub529\ub41c \ud14d\uc2a4\ud2b8\uc77c \uacbd\uc6b0 'utf-8'\ub85c \uc9c0\uc815\ud55c\ub2e4.\n",
      "squeeze | \ub85c\uc6b0\uac00 \ud558\ub098\ubfd0\uc774\ub77c\uba74 Series \uac1d\uccb4\ub97c \ubc18\ud658\ud55c\ub2e4. \uae30\ubcf8\uac12\uc740 False\n",
      "thousands | \uc22b\uc790\ub97c \ucc9c \ub2e8\uc704\ub85c \ub04a\uc744 \ub54c \uc0ac\uc6a9\ud560 ', '\ub098 '.' \uac19\uc740 \uad6c\ubd84\uc790"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# \uc774 \uba85\ub839\uc5b4\ub85c \uc5b4\ub5a4 \ud568\uc218\uc778\uc9c0, \uc5b4\ub5a4 \ud30c\ub77c\ubbf8\ud130\ub97c \ub118\uaca8\uc57c \ud558\ub294\uc9c0 \uc815\ud655\ud788 \uc54c \uc218 \uc788\ub2e4.\n",
      "# \uad73\uc774 \uba85\ub839\uc5b4\ub4e4\uc744 \ub530\ub77c\uce60 \ud544\uc694\ub294 \uc5c6\ub294\ub370 \uc5b4\ub5a4 \ud30c\ub77c\ubbf8\ud130\ub4e4\uc744 \ub118\uae30\ub294\uc9c0 \ud55c \ubc88 \uacf5\ubd80\ud558\ub294 \uacb8\uacb8\ud574\uc11c \uccd0\ubd24\ub2e4.\n",
      "pd.read_csv?"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 216
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "Type:        function\n",
      "String form: <function read_csv at 0x1087a0938>\n",
      "File:        /Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/io/parsers.py\n",
      "Definition:  pd.read_csv(filepath_or_buffer, sep=',', dialect=None, compression=None, doublequote=True, escapechar=None, quotechar='\"', quoting=0, skipinitialspace=False, lineterminator=None, header='infer', index_col=None, names=None, prefix=None, skiprows=None, skipfooter=None, skip_footer=0, na_values=None, na_fvalues=None, true_values=None, false_values=None, delimiter=None, converters=None, dtype=None, usecols=None, engine='c', delim_whitespace=False, as_recarray=False, na_filter=True, compact_ints=False, use_unsigned=False, low_memory=True, buffer_lines=None, warn_bad_lines=True, error_bad_lines=True, keep_default_na=True, thousands=None, comment=None, decimal='.', parse_dates=False, keep_date_col=False, dayfirst=False, date_parser=None, memory_map=False, nrows=None, iterator=False, chunksize=None, verbose=False, encoding=None, squeeze=False, mangle_dupe_cols=True, tupleize_cols=True)\n",
      "Docstring:\n",
      "Read CSV (comma-separated) file into DataFrame\n",
      "\n",
      "Also supports optionally iterating or breaking of the file\n",
      "into chunks.\n",
      "\n",
      "Parameters\n",
      "----------\n",
      "filepath_or_buffer : string or file handle / StringIO. The string could be\n",
      "    a URL. Valid URL schemes include http, ftp, s3, and file. For file URLs, a host\n",
      "    is expected. For instance, a local file could be\n",
      "    file ://localhost/path/to/table.csv\n",
      "sep : string, default ','\n",
      "    Delimiter to use. If sep is None, will try to automatically determine\n",
      "    this. Regular expressions are accepted.\n",
      "\n",
      "lineterminator : string (length 1), default None\n",
      "    Character to break file into lines. Only valid with C parser\n",
      "quotechar : string\n",
      "    The character to used to denote the start and end of a quoted item. Quoted items can include the delimiter and it will be ignored.\n",
      "quoting : int\n",
      "    Controls whether quotes should be recognized. Values are taken from\n",
      "    `csv.QUOTE_*` values. Acceptable values are 0, 1, 2, and 3 for\n",
      "    QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONE, and QUOTE_NONNUMERIC, respectively.\n",
      "skipinitialspace : boolean, default False\n",
      "    Skip spaces after delimiter\n",
      "escapechar : string\n",
      "dtype : Type name or dict of column -> type\n",
      "    Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}\n",
      "compression : {'gzip', 'bz2', None}, default None\n",
      "    For on-the-fly decompression of on-disk data\n",
      "dialect : string or csv.Dialect instance, default None\n",
      "    If None defaults to Excel dialect. Ignored if sep longer than 1 char\n",
      "    See csv.Dialect documentation for more details\n",
      "header : int, default 0 if names parameter not specified,\n",
      "    Row to use for the column labels of the parsed DataFrame. Specify None if\n",
      "    there is no header row. Can be a list of integers that specify row\n",
      "    locations for a multi-index on the columns E.g. [0,1,3]. Interveaning\n",
      "    rows that are not specified (E.g. 2 in this example are skipped)\n",
      "skiprows : list-like or integer\n",
      "    Row numbers to skip (0-indexed) or number of rows to skip (int)\n",
      "    at the start of the file\n",
      "index_col : int or sequence or False, default None\n",
      "    Column to use as the row labels of the DataFrame. If a sequence is given, a\n",
      "    MultiIndex is used. If you have a malformed file with delimiters at the end\n",
      "    of each line, you might consider index_col=False to force pandas to _not_\n",
      "    use the first column as the index (row names)\n",
      "names : array-like\n",
      "    List of column names to use. If file contains no header row, then you\n",
      "    should explicitly pass header=None\n",
      "prefix : string or None (default)\n",
      "    Prefix to add to column numbers when no header, e.g 'X' for X0, X1, ...\n",
      "na_values : list-like or dict, default None\n",
      "    Additional strings to recognize as NA/NaN. If dict passed, specific\n",
      "    per-column NA values\n",
      "true_values : list\n",
      "    Values to consider as True\n",
      "false_values : list\n",
      "    Values to consider as False\n",
      "keep_default_na : bool, default True\n",
      "    If na_values are specified and keep_default_na is False the default NaN\n",
      "    values are overridden, otherwise they're appended to\n",
      "parse_dates : boolean, list of ints or names, list of lists, or dict\n",
      "    If True -> try parsing the index.\n",
      "    If [1, 2, 3] -> try parsing columns 1, 2, 3 each as a separate date column.\n",
      "    If [[1, 3]] -> combine columns 1 and 3 and parse as a single date column.\n",
      "    {'foo' : [1, 3]} -> parse columns 1, 3 as date and call result 'foo'\n",
      "keep_date_col : boolean, default False\n",
      "    If True and parse_dates specifies combining multiple columns then\n",
      "    keep the original columns.\n",
      "date_parser : function\n",
      "    Function to use for converting a sequence of string columns to an\n",
      "    array of datetime instances. The default uses dateutil.parser.parser\n",
      "    to do the conversion.\n",
      "dayfirst : boolean, default False\n",
      "    DD/MM format dates, international and European format\n",
      "thousands : str, default None\n",
      "    Thousands separator\n",
      "comment : str, default None\n",
      "    Indicates remainder of line should not be parsed\n",
      "    Does not support line commenting (will return empty line)\n",
      "decimal : str, default '.'\n",
      "    Character to recognize as decimal point. E.g. use ',' for European data\n",
      "nrows : int, default None\n",
      "    Number of rows of file to read. Useful for reading pieces of large files\n",
      "iterator : boolean, default False\n",
      "    Return TextFileReader object\n",
      "chunksize : int, default None\n",
      "    Return TextFileReader object for iteration\n",
      "skipfooter : int, default 0\n",
      "    Number of line at bottom of file to skip\n",
      "converters : dict. optional\n",
      "    Dict of functions for converting values in certain columns. Keys can either\n",
      "    be integers or column labels\n",
      "verbose : boolean, default False\n",
      "    Indicate number of NA values placed in non-numeric columns\n",
      "delimiter : string, default None\n",
      "    Alternative argument name for sep. Regular expressions are accepted.\n",
      "encoding : string, default None\n",
      "    Encoding to use for UTF when reading/writing (ex. 'utf-8')\n",
      "squeeze : boolean, default False\n",
      "    If the parsed data only contains one column then return a Series\n",
      "na_filter: boolean, default True\n",
      "    Detect missing value markers (empty strings and the value of na_values). In\n",
      "    data without any NAs, passing na_filter=False can improve the performance\n",
      "    of reading a large file\n",
      "usecols : array-like\n",
      "    Return a subset of the columns.\n",
      "    Results in much faster parsing time and lower memory usage.\n",
      "mangle_dupe_cols: boolean, default True\n",
      "    Duplicate columns will be specified as 'X.0'...'X.N', rather than 'X'...'X'\n",
      "tupleize_cols: boolean, default False\n",
      "    Leave a list of tuples on columns as is (default is to convert to\n",
      "    a Multi Index on the columns)\n",
      "\n",
      "Returns\n",
      "-------\n",
      "result : DataFrame or TextParser"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### 6.1.1 \ud14d\uc2a4\ud2b8 \ud30c\uc77c \uc870\uae08\uc529 \uc77d\uc5b4\uc624\uae30"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "result = pd.read_csv('ch06/ex6.csv')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 217
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "result"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<pre>\n",
        "&lt;class 'pandas.core.frame.DataFrame'&gt;\n",
        "Int64Index: 10000 entries, 0 to 9999\n",
        "Data columns (total 5 columns):\n",
        "one      10000  non-null values\n",
        "two      10000  non-null values\n",
        "three    10000  non-null values\n",
        "four     10000  non-null values\n",
        "key      10000  non-null values\n",
        "dtypes: float64(4), object(1)\n",
        "</pre>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 218,
       "text": [
        "<class 'pandas.core.frame.DataFrame'>\n",
        "Int64Index: 10000 entries, 0 to 9999\n",
        "Data columns (total 5 columns):\n",
        "one      10000  non-null values\n",
        "two      10000  non-null values\n",
        "three    10000  non-null values\n",
        "four     10000  non-null values\n",
        "key      10000  non-null values\n",
        "dtypes: float64(4), object(1)"
       ]
      }
     ],
     "prompt_number": 218
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### nrows\ub85c \ucc98\uc74c \uba87 \uc904\ub9cc \uc77d\uc5b4\ubcfc \uc218 \uc788\ub2e4.\n",
      "\n",
      "- \ub9ac\ub205\uc2a4\uc758 head \uc758 \uae30\ub2a5\uacfc \uac19\ub2e4\uace0 \uc0dd\uac01\ud558\uba74 \ub41c\ub2e4."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.read_csv('ch06/ex6.csv', nrows=5)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>one</th>\n",
        "      <th>two</th>\n",
        "      <th>three</th>\n",
        "      <th>four</th>\n",
        "      <th>key</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> 0.467976</td>\n",
        "      <td>-0.038649</td>\n",
        "      <td>-0.295344</td>\n",
        "      <td>-1.824726</td>\n",
        "      <td> L</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>-0.358893</td>\n",
        "      <td> 1.404453</td>\n",
        "      <td> 0.704965</td>\n",
        "      <td>-0.200638</td>\n",
        "      <td> B</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>-0.501840</td>\n",
        "      <td> 0.659254</td>\n",
        "      <td>-0.421691</td>\n",
        "      <td>-0.057688</td>\n",
        "      <td> G</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td> 0.204886</td>\n",
        "      <td> 1.074134</td>\n",
        "      <td> 1.388361</td>\n",
        "      <td>-0.982404</td>\n",
        "      <td> R</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td> 0.354628</td>\n",
        "      <td>-0.133116</td>\n",
        "      <td> 0.283763</td>\n",
        "      <td>-0.837063</td>\n",
        "      <td> Q</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 219,
       "text": [
        "        one       two     three      four key\n",
        "0  0.467976 -0.038649 -0.295344 -1.824726   L\n",
        "1 -0.358893  1.404453  0.704965 -0.200638   B\n",
        "2 -0.501840  0.659254 -0.421691 -0.057688   G\n",
        "3  0.204886  1.074134  1.388361 -0.982404   R\n",
        "4  0.354628 -0.133116  0.283763 -0.837063   Q"
       ]
      }
     ],
     "prompt_number": 219
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### TextParser \uac1d\uccb4\ub97c \uc774\uc6a9\ud574\uc11c chunksize\uc5d0 \ub530\ub77c \ubd84\ub9ac\ub41c \ud30c\uc77c\uc744 \uc21c\ud68c \uac00\ub2a5"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "chunker = pd.read_csv('ch06/ex6.csv', chunksize=1000)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 220
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "chunker"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 221,
       "text": [
        "<pandas.io.parsers.TextFileReader at 0x10c3d2490>"
       ]
      }
     ],
     "prompt_number": 221
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "chunker = pd.read_csv('ch06/ex6.csv', chunksize=1000)\n",
      "\n",
      "tot = Series([])\n",
      "for piece in chunker:\n",
      "    # piece['key']\uc5d0 \uc788\ub294 E, X, L \ub4f1\uc758 \uc22b\uc790\ub97c \uc13c\ub2e4. \uac12\uc774 \uc5c6\ub294 \uac83\ub4e4\uc740 0\uc73c\ub85c \ucc44\uc6b4\ub2e4.\n",
      "    tot = tot.add( piece['key'].value_counts(), fill_value=0)\n",
      "\n",
      "# Key\uac00 \uc544\ub2cc \uac12\uc744(order) \uae30\uc900\uc73c\ub85c \ub0b4\ub9bc\ucc28\uc21c \uc815\ub9ac\n",
      "tot = tot.order(ascending=False)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 222
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "tot[:10]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 223,
       "text": [
        "E    368\n",
        "X    364\n",
        "L    346\n",
        "O    343\n",
        "Q    340\n",
        "M    338\n",
        "J    337\n",
        "F    335\n",
        "K    334\n",
        "H    330\n",
        "dtype: float64"
       ]
      }
     ],
     "prompt_number": 223
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "6.1.2 \ub370\uc774\ud130\ub97c \ud14d\uc2a4\ud2b8 \ud615\uc2dd\uc73c\ub85c \uae30\ub85d\ud558\uae30"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "data = pd.read_csv('ch06/ex5.csv')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 224
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "data"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>something</th>\n",
        "      <th>a</th>\n",
        "      <th>b</th>\n",
        "      <th>c</th>\n",
        "      <th>d</th>\n",
        "      <th>message</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>   one</td>\n",
        "      <td> 1</td>\n",
        "      <td>  2</td>\n",
        "      <td>  3</td>\n",
        "      <td>  4</td>\n",
        "      <td>   NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>   two</td>\n",
        "      <td> 5</td>\n",
        "      <td>  6</td>\n",
        "      <td>NaN</td>\n",
        "      <td>  8</td>\n",
        "      <td> world</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> three</td>\n",
        "      <td> 9</td>\n",
        "      <td> 10</td>\n",
        "      <td> 11</td>\n",
        "      <td> 12</td>\n",
        "      <td>   foo</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 225,
       "text": [
        "  something  a   b   c   d message\n",
        "0       one  1   2   3   4     NaN\n",
        "1       two  5   6 NaN   8   world\n",
        "2     three  9  10  11  12     foo"
       ]
      }
     ],
     "prompt_number": 225
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "data.to_csv('ch06/out.csv')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 226
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!cat ch06/out.csv"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        ",something,a,b,c,d,message\r\n",
        "0,one,1,2,3.0,4,\r\n",
        "1,two,5,6,,8,world\r\n",
        "2,three,9,10,11.0,12,foo\r\n"
       ]
      }
     ],
     "prompt_number": 227
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# csv\ub85c \uc9c0\uc815\ud558\ub294\ub370 output\uc740 \ud45c\uc900\uc544\uc6c3\ud48b(\ubaa8\ub2c8\ud130), separator\ub294 '|'\n",
      "data.to_csv(sys.stdout, sep='|')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "|something|a|b|c|d|message\n",
        "0|one|1|2|3.0|4|\n",
        "1|two|5|6||8|world\n",
        "2|three|9|10|11.0|12|foo\n"
       ]
      }
     ],
     "prompt_number": 228
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Write DataFrame to a comma-separated value (csv) file\n",
      "# na_rep -> Missing data representation. NA REPresentation\n",
      "data.to_csv?"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 229
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### na_rep\ub85c \ub204\ub77d\ub41c\uac12\uc744 \uc6d0\ud558\ub294 \uac12\uc73c\ub85c \ubcc0\uacbd \uac00\ub2a5"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "data.to_csv(sys.stdout, na_rep='NULL')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        ",something,a,b,c,d,message\n",
        "0,one,1,2,3.0,4,NULL\n",
        "1,two,5,6,NULL,8,world\n",
        "2,three,9,10,11.0,12,foo\n"
       ]
      }
     ],
     "prompt_number": 230
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "data.to_csv(sys.stdout, na_rep='NaN')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        ",something,a,b,c,d,message\n",
        "0,one,1,2,3.0,4,NaN\n",
        "1,two,5,6,NaN,8,world\n",
        "2,three,9,10,11.0,12,foo\n"
       ]
      }
     ],
     "prompt_number": 231
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### row, column \uac12\uc744 \uc800\uc7a5\ud560 \uac83\uc778\uc9c0 \uc120\ud0dd \uac00\ub2a5"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "data.to_csv(sys.stdout, index=False, header=False)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "one,1,2,3.0,4,\n",
        "two,5,6,,8,world\n",
        "three,9,10,11.0,12,foo\n"
       ]
      }
     ],
     "prompt_number": 232
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### \uceec\ub7fc\uc758 \uc77c\ubd80\ubd84\ub9cc \uae30\ub85d \uac00\ub2a5, \uc21c\uc11c\ub97c \uc9c1\uc811 \uc9c0\uc815 \uac00\ub2a5"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "data.to_csv(sys.stdout, index=False, cols=['a', 'b', 'c'])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "a,b,c\n",
        "1,2,3.0\n",
        "5,6,\n",
        "9,10,11.0\n"
       ]
      }
     ],
     "prompt_number": 233
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### Series\uc5d0\ub3c4 to_csv method \uc874\uc7ac"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "dates = pd.date_range('1/1/2000', periods=7)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 234
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts = Series(np.arange(7), index=dates)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 235
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.to_csv('ch06/tseries.csv')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 236
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!cat ch06/tseries.csv"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "2000-01-01,0\r\n",
        "2000-01-02,1\r\n",
        "2000-01-03,2\r\n",
        "2000-01-04,3\r\n",
        "2000-01-05,4\r\n",
        "2000-01-06,5\r\n",
        "2000-01-07,6\r\n"
       ]
      }
     ],
     "prompt_number": 237
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### \uc57d\uac04 \ubcf5\uc7a1\ud558\uac8c \ud5e4\ub354\ub97c \uc5c6\uc560\uace0 \uccab \ubc88\uc9f8 \uce7c\ub7fc\uc744 \uc0c9\uc778\uc73c\ub85c \ud558\uba74 read_csv \uba54\uc11c\ub4dc\ub85c Series \uac1d\uccb4\ub97c \uc5bb\uc744 \uc218 \uc788\uc9c0\ub9cc from_csv \uba54\uc11c\ub4dc\uac00 \uc880 \ub354 \ud3b8\ub9ac\ud558\uace0 \uac04\ub2e8\ud558\uac8c \ubb38\uc81c \ud574\uacb0"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.DataFrame.to_csv?"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 238
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "Series.from_csv('ch06/tseries.csv', parse_dates=True)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 239,
       "text": [
        "2000-01-01    0\n",
        "2000-01-02    1\n",
        "2000-01-03    2\n",
        "2000-01-04    3\n",
        "2000-01-05    4\n",
        "2000-01-06    5\n",
        "2000-01-07    6\n",
        "dtype: int64"
       ]
      }
     ],
     "prompt_number": 239
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "type( Series.from_csv('ch06/tseries.csv', parse_dates=True) )"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 240,
       "text": [
        "pandas.core.series.Series"
       ]
      }
     ],
     "prompt_number": 240
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# parse dates: boolean, default True.\n",
      "# Parse dates. Different default from read_table\n",
      "Series.from_csv?"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 241
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### read_csv\ub97c Series\ub85c \uc77d\uc744 \uc218 \uc788\ub2e4\uace0 \uc2e4\ud5d8\ud558\ub294 \uc911\uc778\ub370 \uc798 \uc548\ub418\ub124.. \n",
      "\n",
      "- DataFrame\uc73c\ub85c \uc77d\uc5b4\uc9d0"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.read_csv('ch06/tseries.csv', header=None)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>0</th>\n",
        "      <th>1</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> 2000-01-01</td>\n",
        "      <td> 0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 2000-01-02</td>\n",
        "      <td> 1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 2000-01-03</td>\n",
        "      <td> 2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td> 2000-01-04</td>\n",
        "      <td> 3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td> 2000-01-05</td>\n",
        "      <td> 4</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
        "      <td> 2000-01-06</td>\n",
        "      <td> 5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
        "      <td> 2000-01-07</td>\n",
        "      <td> 6</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 242,
       "text": [
        "            0  1\n",
        "0  2000-01-01  0\n",
        "1  2000-01-02  1\n",
        "2  2000-01-03  2\n",
        "3  2000-01-04  3\n",
        "4  2000-01-05  4\n",
        "5  2000-01-06  5\n",
        "6  2000-01-07  6"
       ]
      }
     ],
     "prompt_number": 242
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "type(pd.read_csv('ch06/tseries.csv', header=None))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 243,
       "text": [
        "pandas.core.frame.DataFrame"
       ]
      }
     ],
     "prompt_number": 243
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.read_csv?"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 244
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "###6.1.3 \uc218\ub3d9\uc73c\ub85c \uad6c\ubd84 \ud615\uc2dd \ucc98\ub9ac\ud558\uae30"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### read_table\uc5d0\uc11c \uc77d\uc744 \uc218 \uc5c6\ub294 \uc798\ubabb\ub41c \ud615\uc2dd\uc758 \uc904\uc774 \ud3ec\ud568\ub41c \ub370\uc774\ud130\uac00 \ub4dc\ubb3c\uac8c \ubc1c\uacac \ub428 -> \uc218\ub3d9 \ucc98\ub9ac"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!cat ch06/ex7.csv"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\"a\",\"b\",\"c\"\r\n",
        "\"1\",\"2\",\"3\"\r\n",
        "\"1\",\"2\",\"3\",\"4\"\r\n"
       ]
      }
     ],
     "prompt_number": 245
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import csv\n",
      "f = open('ch06/ex7.csv')\n",
      "\n",
      "reader = csv.reader(f)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 246
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "for line in reader:\n",
      "    print line"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "['a', 'b', 'c']\n",
        "['1', '2', '3']\n",
        "['1', '2', '3', '4']\n"
       ]
      }
     ],
     "prompt_number": 247
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "lines = list(csv.reader(open('ch06/ex7.csv')))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 248
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "header, values = lines[0], lines[1:]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 249
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "header"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 250,
       "text": [
        "['a', 'b', 'c']"
       ]
      }
     ],
     "prompt_number": 250
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "values"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 251,
       "text": [
        "[['1', '2', '3'], ['1', '2', '3', '4']]"
       ]
      }
     ],
     "prompt_number": 251
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# header = a,b,c\n",
      "# values\ub97c 1,1\uc744 \uac19\uc774 \ubb36\ub294\ub2e4. 2,2 \ubb36\uace0. 3,3 \ubb36\uace0. 4\ub294 header\uac00 a,b,c 3\uac1c \ubc16\uc5d0 \uc5c6\uae30 \ub54c\ubb38\uc5d0 \ud3ec\ud568\ub418\uc9c0 \uc54a\ub294\ub2e4.\n",
      "data_dict = {h: v for h, v in zip(header, zip(*values))}"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 252
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "data_dict"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 253,
       "text": [
        "{'a': ('1', '1'), 'b': ('2', '2'), 'c': ('3', '3')}"
       ]
      }
     ],
     "prompt_number": 253
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### CSV \ud30c\uc77c\uc740 \ub2e4\uc591\ud55c \ud30c\uc77c \uc874\uc7ac\ud558\uae30 \ub54c\ubb38\uc5d0 \ub2e4\uc591\ud55c \uc635\uc158\ub4e4\uc740 csv.Dialect \uc0c1\uc18d\ubc1b\uc544 \ud574\uacb0\n",
      "\n",
      "- \ub2e4\uc591\ud55c \uad6c\ubd84\uc790\n",
      "- \ubb38\uc790\uc5f4\uc744 \ub458\ub7ec\uc2f8\ub294 \ubc29\ubc95\n",
      "- \uac1c\ud589\ubb38\uc790"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "class my_dialect(csv.Dialect):\n",
      "    lineterminator = '\\n'\n",
      "    delimiter = ';'\n",
      "    quotechar = '\"'\n",
      "\n",
      "reader = csv.reader"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 254
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "reader = csv.reader?"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 255
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "reader = csv.reader"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "reader = csv.reader"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 256
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### TypeError: \"quoting\" must be an integer\n",
      "\n",
      "- [_csv.Error: field larger than field limit (131072) \ucc38\uace0](http://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072)"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# quoting\uc774 \uaf2d integer\uc5ec\uc57c \ud55c\ub2e4\ub294 \uc624\ub958\uac00 \ubc1c\uc0dd\ud574\uc11c \uc0bd\uc9c8\ud558\ub2e4\uac00 \ub4a4\uc5d0 quoting keyword\ub97c \ubd99\uc5ec\uc90c..\n",
      "reader = csv.reader(f, dialect=my_dialect)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "ename": "TypeError",
       "evalue": "\"quoting\" must be an integer",
       "output_type": "pyerr",
       "traceback": [
        "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
        "\u001b[0;32m<ipython-input-257-92557f61d368>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# quoting\uc774 \uaf2d integer\uc5ec\uc57c \ud55c\ub2e4\ub294 \uc624\ub958\uac00 \ubc1c\uc0dd\ud574\uc11c \uc0bd\uc9c8\ud558\ub2e4\uac00 \ub4a4\uc5d0 quoting keyword\ub97c \ubd99\uc5ec\uc90c..\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mreader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcsv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdialect\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmy_dialect\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
        "\u001b[0;31mTypeError\u001b[0m: \"quoting\" must be an integer"
       ]
      }
     ],
     "prompt_number": 257
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "reader = csv.reader(f, dialect=my_dialect, quoting=csv.QUOTE_NONE)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 258
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "csv.QUOTE_NONE"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 259,
       "text": [
        "3"
       ]
      }
     ],
     "prompt_number": 259
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "reader = csv.reader(f, delimiter='|')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 260
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### [13.1. csv \u2014 CSV File Reading and Writing](https://docs.python.org/3.1/library/csv.html#dialects-and-formatting-parameters)"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# \uc5b4\ub5a4 \uc635\uc158\ub4e4 \uc788\ub294\uc9c0 \ubcf4\ub824\uace0 \ud588\ub354\ub2c8 \uc548 \ubcf4\uc5ec\uc8fc\ub124...\n",
      "csv.reader??"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 261
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### CSV Note\n",
      "\n",
      "- \uc880 \ub354 \ubcf5\uc7a1\ud558\uac70\ub098 \uad6c\ubd84\uc790\uac00 \ud55c \uae00\uc790\ub97c \ucd08\uacfc\ud558\ub294 \uace0\uc815 \uae38\uc774\ub97c \uac00\uc9c4\ub2e4\uba74 csv \ubaa8\ub4c8\uc744 \uc0ac\uc6a9\ud560 \uc218 \uc5c6\ub2e4.\n",
      "- \uc774\ub7f0 \uacbd\uc6b0\uc5d0\ub294 \uc904\uc744 \ub098\ub204\uace0 \ubb38\uc790\uc5f4\uc758 split \uba54\uc11c\ub4dc\ub098 \uc815\uaddc\ud45c\ud604\uc2dd \uba54\uc11c\ub4dc\uc778 re.split \ub4f1\uc744 \uc774\uc6a9\ud574\uc11c \uac00\uacf5\ud558\ub294 \uc791\uc5c5\uc744 \ud574\uc57c \ud55c\ub2e4."
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### CSV \ud30c\uc77c \uae30\ub85d"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "with open('ch06/mydata.csv', 'w') as f:\n",
      "    writer = csv.writer(f, dialect=my_dialect, quoting=csv.QUOTE_NONE)\n",
      "    writer.writerow(('one', 'two', 'three'))\n",
      "    writer.writerow(('1', '2', '3'))\n",
      "    writer.writerow(('4', '5', '6'))\n",
      "    writer.writerow(('7', '8', '9'))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 262
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!cat ch06/mydata.csv"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "one;two;three\r\n",
        "1;2;3\r\n",
        "4;5;6\r\n",
        "7;8;9\r\n"
       ]
      }
     ],
     "prompt_number": 263
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### JSON \ub370\uc774\ud130\n",
      "\n",
      "- JSON(JavaScript Object Notation)\uc740 \uc6f9\ube0c\ub77c\uc6b0\uc800\uc640 \ub2e4\ub978 \uc560\ud50c\ub9ac\ucf00\uc774\uc158\uc774 HTTP \uc694\uccad\uc73c\ub85c \ub370\uc774\ud130\ub97c \ubcf4\ub0bc \ub54c \ub110\ub9ac \uc0ac\uc6a9\ud558\ub294 \ud45c\uc900 \ud30c\uc77c \ud615\uc2dd \uc911 \ud558\ub098\ub2e4.\n",
      "- JSON\uc740 CSV \uac19\uc740 \ud45c \ud615\uc2dd\uc758 \ud14d\uc2a4\ud2b8\ubcf4\ub2e4 \uc880 \ub354 \uc720\uc5f0\ud55c \ub370\uc774\ud130 \ud615\uc2dd\uc774\uba70, JSON \ub370\uc774\ud130\uc758 \uc608\ub294 \ub2e4\uc74c\uacfc \uac19\ub2e4."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# json\uc740 python\uc5d0\uc11c\ucc98\ub7fc '\uc73c\ub85c \ud558\uba74 \uc548\ub41c\ub2e4. \ud604\uc7ac \"\"\"\ub85c \uac10\uc2f8 \ubb38\uc790\uc5f4\ub85c \uc800\uc7a5\ub418\uc5b4 \uc788\uae30 \ub54c\ubb38\uc5d0 \n",
      "# javascript\uc5d0\uc11c\ub294 '\ub97c string \uac12\uc73c\ub85c \uc778\uc2dd\ud558\uc9c0 \uc54a\uc544\uc11c \uc5d0\ub7ec \ubc1c\uc0dd\n",
      "obj = \"\"\"\n",
      "{\n",
      "    'name': 'Wes',\n",
      "    'places_lived': ['United States', 'Spain', 'Germany'],\n",
      "    'pet': null, 'siblings': [{'name': 'Scott', 'age':25, 'pet':'Zuko'},\n",
      "                                {'name': 'Katie', 'age':33, 'pet': 'Cisco'}]\n",
      "}\n",
      "\"\"\""
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 264
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import json"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 265
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# ValueError: Expecting property name: line 3 column 5 (char 7)\n",
      "result = json.loads(obj)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "ename": "ValueError",
       "evalue": "Expecting property name: line 3 column 5 (char 7)",
       "output_type": "pyerr",
       "traceback": [
        "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
        "\u001b[0;32m<ipython-input-266-f05e1f9794f2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# ValueError: Expecting property name: line 3 column 5 (char 7)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloads\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
        "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/__init__.pyc\u001b[0m in \u001b[0;36mloads\u001b[0;34m(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[0m\n\u001b[1;32m    336\u001b[0m             \u001b[0mparse_int\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mparse_float\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m \u001b[0;32mand\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    337\u001b[0m             parse_constant is None and object_pairs_hook is None and not kw):\n\u001b[0;32m--> 338\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0m_default_decoder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    339\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mcls\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    340\u001b[0m         \u001b[0mcls\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mJSONDecoder\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
        "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/decoder.pyc\u001b[0m in \u001b[0;36mdecode\u001b[0;34m(self, s, _w)\u001b[0m\n\u001b[1;32m    363\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    364\u001b[0m         \"\"\"\n\u001b[0;32m--> 365\u001b[0;31m         \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraw_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0m_w\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    366\u001b[0m         \u001b[0mend\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_w\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    367\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mend\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
        "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/decoder.pyc\u001b[0m in \u001b[0;36mraw_decode\u001b[0;34m(self, s, idx)\u001b[0m\n\u001b[1;32m    379\u001b[0m         \"\"\"\n\u001b[1;32m    380\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 381\u001b[0;31m             \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscan_once\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    382\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mStopIteration\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    383\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"No JSON object could be decoded\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
        "\u001b[0;31mValueError\u001b[0m: Expecting property name: line 3 column 5 (char 7)"
       ]
      }
     ],
     "prompt_number": 266
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "obj = \"\"\"\n",
      "{\n",
      "    \"name\": \"Wes\",\n",
      "    \"places_lived\": [\"United States\", \"Spain\", \"Germany\"],\n",
      "    \"pet\": null, \"siblings\": [{\"name\": \"Scott\", \"age\":25, \"pet\":\"Zuko\"},\n",
      "                                {\"name\": \"Katie\", \"age\":33, \"pet\": \"Cisco\"}]\n",
      "}\n",
      "\"\"\""
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 267
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "obj"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 268,
       "text": [
        "'\\n{\\n    \"name\": \"Wes\",\\n    \"places_lived\": [\"United States\", \"Spain\", \"Germany\"],\\n    \"pet\": null, \"siblings\": [{\"name\": \"Scott\", \"age\":25, \"pet\":\"Zuko\"},\\n                                {\"name\": \"Katie\", \"age\":33, \"pet\": \"Cisco\"}]\\n}\\n'"
       ]
      }
     ],
     "prompt_number": 268
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### JSON\uc740 \ub110 \uac12\uc778 null\uacfc \ub2e4\ub978 \uba87 \uac00\uc9c0 \uc0ac\uc18c\ud55c \uc8fc\uc758\uc0ac\ud56d(\ub9ac\uc2a4\ud2b8\uc758 \ub9c8\uc9c0\ub9c9\uc5d0 \uc27c\ud45c\uac00 \uc788\uc73c\uba74 \uc548\ub41c\ub2e4\ub358\uac00 \ud558\ub294)\uc744 \uc81c\uc678\ud558\uba74 \ud30c\uc774\uc36c \ucf54\ub4dc\uc640 \uac70\uc758 \uc720\uc0ac\n",
      "\n",
      "- \uae30\ubcf8 \uc790\ub8cc\ud615\uc740 \uac1d\uccb4(\uc0ac\uc804), \ubc30\uc5f4(\ub9ac\uc2a4\ud2b8), \ubb38\uc790\uc5f4, \uc22b\uc790, \ubd88\ub9ac\uc5b8 \uadf8\ub9ac\uace0 \ub110\n",
      "- \uac1d\uccb4\uc758 \ud0a4\ub294 \ubc18\ub4dc\uc2dc \ubb38\uc790\uc5f4\n",
      "- JSON \uc77d\uace0 \uc4f8 \uc218 \uc788\ub294 \ub77c\uc774\ube0c\ub7ec\ub9ac\uac00 \uba87 \uac1c \uc788\uc9c0\ub9cc \ud45c\uc900 \ub77c\uc774\ube0c\ub7ec\ub9ac\uc778 json \uc0ac\uc6a9"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# ValueError: Expecting property name: line 3 column 5 (char 7)\n",
      "result = json.loads(obj)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 269
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "result"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 270,
       "text": [
        "{u'name': u'Wes',\n",
        " u'pet': None,\n",
        " u'places_lived': [u'United States', u'Spain', u'Germany'],\n",
        " u'siblings': [{u'age': 25, u'name': u'Scott', u'pet': u'Zuko'},\n",
        "  {u'age': 33, u'name': u'Katie', u'pet': u'Cisco'}]}"
       ]
      }
     ],
     "prompt_number": 270
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### json.dumps\ub294 \ud30c\uc774\uc36c \uac1d\uccb4\ub97c JSON \ud615\ud0dc\ub85c \ubcc0\ud658"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "asjson = json.dumps(result)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 271
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# '\uac00 \uc544\ub2c8\ub77c \"\uc778 \uac83\uc744 \ud655\uc778\ud558\uc790\n",
      "asjson"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 272,
       "text": [
        "'{\"pet\": null, \"siblings\": [{\"pet\": \"Zuko\", \"age\": 25, \"name\": \"Scott\"}, {\"pet\": \"Cisco\", \"age\": 33, \"name\": \"Katie\"}], \"name\": \"Wes\", \"places_lived\": [\"United States\", \"Spain\", \"Germany\"]}'"
       ]
      }
     ],
     "prompt_number": 272
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### JSON \uac1d\uccb4\ub098 \uac1d\uccb4\uc758 \ub9ac\uc2a4\ud2b8\ub97c DataFrame\uc774\ub098 \ub2e4\ub978 \uc790\ub8cc \uad6c\uc870\ub85c \uc5b4\ub5bb\uac8c \ubcc0\ud658\ud574\uc11c \ubd84\uc11d\uc744 \ud560 \uac83\uc778\uc9c0\ub294 \ub3c5\uc790\uc758 \ubaab\n",
      "\n",
      "- JSON \uac1d\uccb4\uc758 \ub9ac\uc2a4\ud2b8\ub97c DataFrame \uc0dd\uc131\uc790\ub85c \ub118\uae30\uace0 \ub370\uc774\ud130 \ud544\ub4dc \uc120\ud0dd \uac00\ub2a5"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "siblings = DataFrame(result['siblings'], columns=['name', 'age'])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 273
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "siblings"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>name</th>\n",
        "      <th>age</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> Scott</td>\n",
        "      <td> 25</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> Katie</td>\n",
        "      <td> 33</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 274,
       "text": [
        "    name  age\n",
        "0  Scott   25\n",
        "1  Katie   33"
       ]
      }
     ],
     "prompt_number": 274
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# \ucc45\uc5d0 \ub098\uc640\uc788\uc9c0 \uc54a\uc740 \ub0b4\uc6a9\uc744 \ud55c \ubc88 \ub354 \ud574\ubd10\uc57c \uc27d\uac8c \uc774\ud574\uac00 \ub418\ub294\ub4ef\n",
      "siblings2 = DataFrame(result['siblings'], columns=['name', 'age', 'pet'])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 275
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "siblings2"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>name</th>\n",
        "      <th>age</th>\n",
        "      <th>pet</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> Scott</td>\n",
        "      <td> 25</td>\n",
        "      <td>  Zuko</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> Katie</td>\n",
        "      <td> 33</td>\n",
        "      <td> Cisco</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 276,
       "text": [
        "    name  age    pet\n",
        "0  Scott   25   Zuko\n",
        "1  Katie   33  Cisco"
       ]
      }
     ],
     "prompt_number": 276
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### pandas\uc5d0\uc11c JSON\uc744 \ube60\ub974\uac8c \uc77d\uace0(from_json) \uc4f0\ub294(to_json) \ub124\uc774\ud2f0\ube0c \uad6c\ud604\uc911"
     ]
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "6.1.5 XML\uacfc HTML: \uc6f9 \ub0b4\uc6a9 \uae01\uc5b4\uc624\uae30"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### [lxml](http://lxml.de)\n",
      "\n",
      "- \uc544\uc8fc \ud070 \ud30c\uc77c\uc744 \ube60\ub974\uac8c \ucc98\ub9ac \uac00\ub2a5\n",
      "- \uc5ec\ub7ec \uc885\ub958\uc758 \uc778\ud130\ud398\uc774\uc2a4 \uc81c\uacf5\n",
      "- lxml.html: HTML \ucc98\ub9ac\n",
      "- lxml.objectify: XML \ucc98\ub9ac\n",
      "\n",
      "#### \ub300\ubd80\ubd84\uc758 \uc6f9\uc0ac\uc774\ud2b8\ub294 \ub531 \ud544\uc694\ud55c \ub0b4\uc6a9\ub9cc \ub4e4\uc5b4\uc788\ub294 JSON\uc774\ub098 XML\uc744 \ub9ce\uc774 \uc0ac\uc6a9\ud558\uc9c0 \uc54a\uace0 HTML\uc744 \uc0ac\uc6a9"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from lxml.html import parse\n",
      "from urllib2 import urlopen\n",
      "\n",
      "# \ub370\uc774\ud130\ub97c \uac00\uc838 \uc62c url\uc744 \ub118\uae34 \ud6c4\n",
      "# \ub370\uc774\ud130\ub97c \ubc1b\uc544 \uc628 \ud6c4 parse\n",
      "parsed = parse(urlopen('http://finance.yahoo.com/q/op?s=AAPL+Options'))\n",
      "\n",
      "doc = parsed.getroot()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 277
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### doc \uac1d\uccb4\uc5d0\ub294 \ubaa8\ub4e0 HTML \ud0dc\uadf8 \ucd94\ucd9c\n",
      "\n",
      "- \uc6b0\ub9ac\uac00 \uad00\uc2ec \uac00\uc838\uc57c \ud560 table \ud0dc\uadf8\ub3c4 \ud3ec\ud568\n",
      "- \uc5b4\ub5bb\uac8c \ub3d9\uc791\ud558\ub294\uc9c0 \ud655\uc778\ud558\uae30 \uc704\ud574 \uae01\uc5b4\uc628 HTML \ubb38\uc11c\uc5d0\uc11c \uc678\ubd80 \uc5f0\uacb0 URL\uc744 \ubaa8\ub450 \ucc3e\uc544\ubcf4\uc790.\n",
      "- \uc678\ubd80 \uc5f0\uacb0\uc740 a \ud0dc\uadf8\ub85c \uc9c0\uc815\n",
      "- findall \uba54\uc11c\ub4dc\uc5d0 XPath(\ubb38\uc11c \uc9c8\uc758 \uc5b8\uc5b4)\ub97c \uc0ac\uc6a9\ud574\uc11c \ud574\ub2f9 \uc5d8\ub9ac\uba3c\ud2b8\ub97c \uac00\uc838\uc62c \uc218 \uc788\ub2e4.\n",
      "\n",
      "#### XPath tutorial site\n",
      "\n",
      "- [W3schools](http://www.w3schools.com/XPath/)\n",
      "- [XPath and XSLT with lxml](http://lxml.de/xpathxslt.html)\n",
      "- [Using Chrome Developer Tools](http://stackoverflow.com/questions/3030487/is-there-a-way-to-get-the-xpath-in-google-chrome)"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "links = doc.findall('.//a')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 278
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# \uc774 \uac1d\uccb4\ub294 HTML \uc5d8\ub9ac\uba58\ud2b8\ub97c \ud45c\ud604\ud558\ub294 \uac1d\uccb4\uc77c\ubfd0\n",
      "# URL\uacfc \ub9c1\ud06c \uc774\ub984\uc744 \uac00\uc838\uc624\ub824\uba74 \uac01 \uc5d8\ub9ac\uba3c\ud2b8\uc5d0 \ub300\ud574 get \uba54\uc11c\ub4dc\ub97c \ud638\ucd9c\ud558\uc5ec URL\uc744 \uc5bb\uace0\n",
      "# text_content \uba54\uc11c\ub4dc\ub97c \uc0ac\uc6a9\ud574\uc11c \ub9c1\ud06c \uc774\ub984\uc744 \uac00\uc838\uc640\uc57c \ud55c\ub2e4.\n",
      "links[15:20]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 279,
       "text": [
        "[<Element a at 0x10c3e2ad0>,\n",
        " <Element a at 0x10c3e2b30>,\n",
        " <Element a at 0x10c3e2b90>,\n",
        " <Element a at 0x10c3e2bf0>,\n",
        " <Element a at 0x10c3e2c50>]"
       ]
      }
     ],
     "prompt_number": 279
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### \uc774 \uac1d\uccb4\ub294 HTML \uc5d8\ub9ac\uba3c\ud2b8\ub97c \ud45c\ud604\ud558\ub294 \uac1d\uccb4\uc77c \ubfd0\n",
      "\n",
      "- \uc5d8\ub9ac\uba3c\ud2b8\ub97c \ud45c\ud604\ud558\ub294 \uac1d\uccb4\ub77c\uace0 \uc0dd\uac01\ud558\uc790. \uc548 \uadf8\ub7ec\uba74 \uc0bd\uc9c8\ud558\uac8c \ub41c\ub2e4!\n",
      "- URL\uacfc \ub9c1\ud06c \uc774\ub984\uc744 \uac00\uc838\uc624\ub824\uba74 \uac01 \uc5d8\ub9ac\uba3c\ud2b8\uc5d0 \ub300\ud574 get \uba54\uc11c\ub4dc\ub97c \ud638\ucd9c\ud558\uc5ec URL\uc744 \uc5bb\uace0, text_content \uba54\uc11c\ub4dc\ub97c \uc774\uc6a9\ud574\uc11c \ub9c1\ud06c \uc774\ub984\uc744 \uac00\uc838\uc640\uc57c \ud55c\ub2e4."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "lnk = links[28]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 280
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "lnk"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 281,
       "text": [
        "<Element a at 0x10c3e2fb0>"
       ]
      }
     ],
     "prompt_number": 281
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "lnk.get('href')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 282,
       "text": [
        "'https://edit.yahoo.com/mc2.0/eval_profile?.intl=us&.lang=en-US&.done=http://finance.yahoo.com/q/op%3fs=AAPL%2bOptions&.src=quote&.intl=us&.lang=en-US'"
       ]
      }
     ],
     "prompt_number": 282
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "lnk.text_content()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 283,
       "text": [
        "'Account Info'"
       ]
      }
     ],
     "prompt_number": 283
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### [list comprehensions in Python](http://www.pythonforbeginners.com/lists/list-comprehensions-in-python/)"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "urls = [lnk.get('href') for lnk in doc.findall('.//a')]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 284
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "len(urls)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 285,
       "text": [
        "1239"
       ]
      }
     ],
     "prompt_number": 285
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "urls[-3:-1]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 286,
       "text": [
        "['http://www.capitaliq.com', 'http://www.csidata.com']"
       ]
      }
     ],
     "prompt_number": 286
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "urls[-10:]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 287,
       "text": [
        "['/q?s=AAPL140517P00780000',\n",
        " '/q/op?s=AAPL&k=800.000000',\n",
        " '/q?s=AAPL140517P00800000',\n",
        " '/q/op?s=AAPL&k=805.000000',\n",
        " '/q?s=AAPL140517P00805000',\n",
        " '/q/os?s=AAPL&m=2014-05-30',\n",
        " 'http://help.yahoo.com/l/us/yahoo/finance/quotes/fitadelay.html',\n",
        " 'http://www.capitaliq.com',\n",
        " 'http://www.csidata.com',\n",
        " 'http://www.morningstar.com/']"
       ]
      }
     ],
     "prompt_number": 287
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### \ucc3e\uace0\uc790 \ud558\ub294 table \uc77c\uc77c\uc774 \ud655\uc778\n",
      "\n",
      "- \uba87\uba87 \uc6f9\uc0ac\uc774\ud2b8\ub294 table\ub9c8\ub2e4 id \uc18d\uc131\uc744 \uc918\uc11c \uc27d\uac8c \ud560 \uc218 \uc788\uc9c0\ub9cc \uc5b4\ub514 \uc138\uc0c1 \uc77c\uc774 \uc27d\uac8c \ub418\ub294\uac8c \uc788\ub098? \ub178\uac00\ub2e4 \ud574\uc57c\uc9c0.."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "tables = doc.findall('.//table')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 288
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "tables"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 289,
       "text": [
        "[<Element table at 0x100769470>,\n",
        " <Element table at 0x1007694d0>,\n",
        " <Element table at 0x100769530>,\n",
        " <Element table at 0x100769590>,\n",
        " <Element table at 0x1007695f0>,\n",
        " <Element table at 0x100769650>,\n",
        " <Element table at 0x1007696b0>,\n",
        " <Element table at 0x100769710>,\n",
        " <Element table at 0x100769770>,\n",
        " <Element table at 0x1007697d0>,\n",
        " <Element table at 0x100769830>,\n",
        " <Element table at 0x100769890>,\n",
        " <Element table at 0x100769ad0>,\n",
        " <Element table at 0x100769b30>,\n",
        " <Element table at 0x100769b90>,\n",
        " <Element table at 0x100769bf0>,\n",
        " <Element table at 0x100769c50>,\n",
        " <Element table at 0x100769cb0>]"
       ]
      }
     ],
     "prompt_number": 289
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "calls = tables[9]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 290
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "calls"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 291,
       "text": [
        "<Element table at 0x1007697d0>"
       ]
      }
     ],
     "prompt_number": 291
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "puts = tables[13]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 292
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "rows = calls.findall('.//tr')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 293
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "rows"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 294,
       "text": [
        "[<Element tr at 0x100769d10>,\n",
        " <Element tr at 0x100769d70>,\n",
        " <Element tr at 0x100769e30>,\n",
        " <Element tr at 0x100769e90>,\n",
        " <Element tr at 0x100769ef0>,\n",
        " <Element tr at 0x100769f50>,\n",
        " <Element tr at 0x100769fb0>,\n",
        " <Element tr at 0x100790b90>,\n",
        " <Element tr at 0x100790ad0>,\n",
        " <Element tr at 0x100790050>,\n",
        " <Element tr at 0x1007900b0>,\n",
        " <Element tr at 0x100790110>,\n",
        " <Element tr at 0x100790170>,\n",
        " <Element tr at 0x1007901d0>,\n",
        " <Element tr at 0x100790230>,\n",
        " <Element tr at 0x100790290>,\n",
        " <Element tr at 0x1007902f0>,\n",
        " <Element tr at 0x100790350>,\n",
        " <Element tr at 0x1007903b0>,\n",
        " <Element tr at 0x100790410>,\n",
        " <Element tr at 0x100790470>,\n",
        " <Element tr at 0x1007904d0>,\n",
        " <Element tr at 0x100790530>,\n",
        " <Element tr at 0x100790590>,\n",
        " <Element tr at 0x1007905f0>,\n",
        " <Element tr at 0x100790650>,\n",
        " <Element tr at 0x1007906b0>,\n",
        " <Element tr at 0x100790710>,\n",
        " <Element tr at 0x100790770>,\n",
        " <Element tr at 0x1007907d0>,\n",
        " <Element tr at 0x100790830>,\n",
        " <Element tr at 0x100790890>,\n",
        " <Element tr at 0x1007908f0>,\n",
        " <Element tr at 0x100790950>,\n",
        " <Element tr at 0x1007909b0>,\n",
        " <Element tr at 0x100790a10>,\n",
        " <Element tr at 0x100790a70>,\n",
        " <Element tr at 0x10c3f3d10>,\n",
        " <Element tr at 0x10c3f3d70>,\n",
        " <Element tr at 0x10c3f3dd0>,\n",
        " <Element tr at 0x10c3f3e30>,\n",
        " <Element tr at 0x10c3f3e90>,\n",
        " <Element tr at 0x10c3f3ef0>,\n",
        " <Element tr at 0x10c3f3f50>,\n",
        " <Element tr at 0x10c3f3fb0>,\n",
        " <Element tr at 0x10078d050>,\n",
        " <Element tr at 0x10078d0b0>,\n",
        " <Element tr at 0x10078d110>,\n",
        " <Element tr at 0x10078d170>,\n",
        " <Element tr at 0x10078d1d0>,\n",
        " <Element tr at 0x10078d230>,\n",
        " <Element tr at 0x10078d290>,\n",
        " <Element tr at 0x10078d2f0>,\n",
        " <Element tr at 0x10078d350>,\n",
        " <Element tr at 0x10078d3b0>,\n",
        " <Element tr at 0x10078d410>,\n",
        " <Element tr at 0x10078d470>,\n",
        " <Element tr at 0x10078d4d0>,\n",
        " <Element tr at 0x10078d530>,\n",
        " <Element tr at 0x10078d590>,\n",
        " <Element tr at 0x10078d5f0>,\n",
        " <Element tr at 0x10078d650>,\n",
        " <Element tr at 0x10078d6b0>,\n",
        " <Element tr at 0x10078d710>,\n",
        " <Element tr at 0x10078d770>,\n",
        " <Element tr at 0x10078d7d0>,\n",
        " <Element tr at 0x10078d830>,\n",
        " <Element tr at 0x10078d890>,\n",
        " <Element tr at 0x10078d8f0>,\n",
        " <Element tr at 0x10078d950>,\n",
        " <Element tr at 0x10078d9b0>,\n",
        " <Element tr at 0x10078da10>,\n",
        " <Element tr at 0x10078da70>,\n",
        " <Element tr at 0x10078dad0>,\n",
        " <Element tr at 0x10078db30>,\n",
        " <Element tr at 0x10078db90>,\n",
        " <Element tr at 0x10078dbf0>,\n",
        " <Element tr at 0x10078dc50>,\n",
        " <Element tr at 0x10078dcb0>,\n",
        " <Element tr at 0x10078dd10>,\n",
        " <Element tr at 0x10078dd70>,\n",
        " <Element tr at 0x10078ddd0>,\n",
        " <Element tr at 0x10078de30>,\n",
        " <Element tr at 0x10078de90>,\n",
        " <Element tr at 0x10078def0>,\n",
        " <Element tr at 0x10078df50>,\n",
        " <Element tr at 0x10078dfb0>,\n",
        " <Element tr at 0x100796050>,\n",
        " <Element tr at 0x1007960b0>,\n",
        " <Element tr at 0x100796110>,\n",
        " <Element tr at 0x100796170>,\n",
        " <Element tr at 0x1007961d0>,\n",
        " <Element tr at 0x100796230>,\n",
        " <Element tr at 0x100796290>,\n",
        " <Element tr at 0x1007962f0>,\n",
        " <Element tr at 0x100796350>,\n",
        " <Element tr at 0x1007963b0>,\n",
        " <Element tr at 0x100796410>,\n",
        " <Element tr at 0x100796470>,\n",
        " <Element tr at 0x1007964d0>,\n",
        " <Element tr at 0x100796530>,\n",
        " <Element tr at 0x100796590>,\n",
        " <Element tr at 0x1007965f0>,\n",
        " <Element tr at 0x100796650>,\n",
        " <Element tr at 0x1007966b0>,\n",
        " <Element tr at 0x100796710>,\n",
        " <Element tr at 0x100796770>,\n",
        " <Element tr at 0x1007967d0>,\n",
        " <Element tr at 0x100796830>,\n",
        " <Element tr at 0x100796890>,\n",
        " <Element tr at 0x1007968f0>,\n",
        " <Element tr at 0x100796950>,\n",
        " <Element tr at 0x1007969b0>,\n",
        " <Element tr at 0x100796a10>,\n",
        " <Element tr at 0x100796a70>,\n",
        " <Element tr at 0x100796ad0>,\n",
        " <Element tr at 0x100796b30>,\n",
        " <Element tr at 0x100796b90>,\n",
        " <Element tr at 0x100796bf0>,\n",
        " <Element tr at 0x100796c50>,\n",
        " <Element tr at 0x100796cb0>,\n",
        " <Element tr at 0x100796d10>,\n",
        " <Element tr at 0x100796d70>,\n",
        " <Element tr at 0x100796dd0>,\n",
        " <Element tr at 0x100796e30>,\n",
        " <Element tr at 0x100796e90>,\n",
        " <Element tr at 0x100796ef0>,\n",
        " <Element tr at 0x100796f50>,\n",
        " <Element tr at 0x100796fb0>,\n",
        " <Element tr at 0x100791050>,\n",
        " <Element tr at 0x1007910b0>,\n",
        " <Element tr at 0x100791110>,\n",
        " <Element tr at 0x100791170>,\n",
        " <Element tr at 0x1007911d0>,\n",
        " <Element tr at 0x100791230>,\n",
        " <Element tr at 0x100791290>,\n",
        " <Element tr at 0x1007912f0>,\n",
        " <Element tr at 0x100791350>,\n",
        " <Element tr at 0x1007913b0>,\n",
        " <Element tr at 0x100791410>,\n",
        " <Element tr at 0x100791470>,\n",
        " <Element tr at 0x1007914d0>,\n",
        " <Element tr at 0x100791530>,\n",
        " <Element tr at 0x100791590>,\n",
        " <Element tr at 0x1007915f0>,\n",
        " <Element tr at 0x100791650>,\n",
        " <Element tr at 0x1007916b0>,\n",
        " <Element tr at 0x100791710>,\n",
        " <Element tr at 0x100791770>,\n",
        " <Element tr at 0x1007917d0>,\n",
        " <Element tr at 0x100791830>,\n",
        " <Element tr at 0x100791890>,\n",
        " <Element tr at 0x1007918f0>,\n",
        " <Element tr at 0x100791950>,\n",
        " <Element tr at 0x1007919b0>,\n",
        " <Element tr at 0x100791a10>,\n",
        " <Element tr at 0x100791a70>,\n",
        " <Element tr at 0x100791ad0>,\n",
        " <Element tr at 0x100791b30>,\n",
        " <Element tr at 0x100791b90>,\n",
        " <Element tr at 0x100791bf0>,\n",
        " <Element tr at 0x100791c50>,\n",
        " <Element tr at 0x100791cb0>,\n",
        " <Element tr at 0x100791d10>,\n",
        " <Element tr at 0x100791d70>,\n",
        " <Element tr at 0x100791dd0>,\n",
        " <Element tr at 0x100791e30>,\n",
        " <Element tr at 0x100791e90>,\n",
        " <Element tr at 0x100791ef0>,\n",
        " <Element tr at 0x100791f50>,\n",
        " <Element tr at 0x100791fb0>,\n",
        " <Element tr at 0x100794050>,\n",
        " <Element tr at 0x1007940b0>,\n",
        " <Element tr at 0x100794110>,\n",
        " <Element tr at 0x100794170>,\n",
        " <Element tr at 0x1007941d0>,\n",
        " <Element tr at 0x100794230>,\n",
        " <Element tr at 0x100794290>,\n",
        " <Element tr at 0x1007942f0>,\n",
        " <Element tr at 0x100794350>,\n",
        " <Element tr at 0x1007943b0>,\n",
        " <Element tr at 0x100794410>,\n",
        " <Element tr at 0x100794470>,\n",
        " <Element tr at 0x1007944d0>,\n",
        " <Element tr at 0x100794530>,\n",
        " <Element tr at 0x100794590>,\n",
        " <Element tr at 0x1007945f0>,\n",
        " <Element tr at 0x100794650>,\n",
        " <Element tr at 0x1007946b0>,\n",
        " <Element tr at 0x100794710>,\n",
        " <Element tr at 0x100794770>,\n",
        " <Element tr at 0x1007947d0>,\n",
        " <Element tr at 0x100794830>,\n",
        " <Element tr at 0x100794890>,\n",
        " <Element tr at 0x1007948f0>,\n",
        " <Element tr at 0x100794950>,\n",
        " <Element tr at 0x1007949b0>,\n",
        " <Element tr at 0x100794a10>,\n",
        " <Element tr at 0x100794a70>,\n",
        " <Element tr at 0x100794ad0>,\n",
        " <Element tr at 0x100794b30>,\n",
        " <Element tr at 0x100794b90>,\n",
        " <Element tr at 0x100794bf0>,\n",
        " <Element tr at 0x100794c50>,\n",
        " <Element tr at 0x100794cb0>,\n",
        " <Element tr at 0x100794d10>,\n",
        " <Element tr at 0x100794d70>,\n",
        " <Element tr at 0x100794dd0>,\n",
        " <Element tr at 0x100794e30>,\n",
        " <Element tr at 0x100794e90>,\n",
        " <Element tr at 0x100794ef0>,\n",
        " <Element tr at 0x100794f50>,\n",
        " <Element tr at 0x100794fb0>,\n",
        " <Element tr at 0x100797050>,\n",
        " <Element tr at 0x1007970b0>,\n",
        " <Element tr at 0x100797110>,\n",
        " <Element tr at 0x100797170>,\n",
        " <Element tr at 0x1007971d0>,\n",
        " <Element tr at 0x100797230>,\n",
        " <Element tr at 0x100797290>,\n",
        " <Element tr at 0x1007972f0>,\n",
        " <Element tr at 0x100797350>,\n",
        " <Element tr at 0x1007973b0>,\n",
        " <Element tr at 0x100797410>,\n",
        " <Element tr at 0x100797470>,\n",
        " <Element tr at 0x1007974d0>,\n",
        " <Element tr at 0x100797530>,\n",
        " <Element tr at 0x100797590>,\n",
        " <Element tr at 0x1007975f0>,\n",
        " <Element tr at 0x100797650>,\n",
        " <Element tr at 0x1007976b0>,\n",
        " <Element tr at 0x100797710>,\n",
        " <Element tr at 0x100797770>,\n",
        " <Element tr at 0x1007977d0>,\n",
        " <Element tr at 0x100797830>,\n",
        " <Element tr at 0x100797890>,\n",
        " <Element tr at 0x1007978f0>,\n",
        " <Element tr at 0x100797950>,\n",
        " <Element tr at 0x1007979b0>,\n",
        " <Element tr at 0x100797a10>,\n",
        " <Element tr at 0x100797a70>,\n",
        " <Element tr at 0x100797ad0>]"
       ]
      }
     ],
     "prompt_number": 294
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### \uc6f9\ud398\uc774\uc9c0 \uad6c\uc870\uac00 \uc548 \ubc14\ub00c\uc5c8\ub124.\n",
      "\n",
      "- \ucc45\uc744 \uc4f4\uac8c 2012\ub144 10\uc6d4 29\uc77c\uc778\ub370 \uc544\uc9c1\uae4c\uc9c0 \uc548 \ubc14\ub00c\ub2e4\ub2c8..."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def _unpack(row, kind='td'):\n",
      "    elts = row.findall('.//%s' % kind)\n",
      "    return [val.text_content() for val in elts]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 295
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "_unpack(rows[0], kind='th')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 296,
       "text": [
        "['Strike', 'Symbol', 'Last', 'Chg', 'Bid', 'Ask', 'Vol', 'Open Int']"
       ]
      }
     ],
     "prompt_number": 296
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "_unpack(rows[1], kind='td')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 297,
       "text": [
        "['330.00',\n",
        " 'AAPL140517C00330000',\n",
        " '263.00',\n",
        " ' 0.00',\n",
        " '255.70',\n",
        " '258.25',\n",
        " '6',\n",
        " '2']"
       ]
      }
     ],
     "prompt_number": 297
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### Yahoo finance HTML structure in Chrome Developer Tools\n",
      "\n",
      "- \ucc98\uc74c \ubcf4\ub294 \uc0ac\ub78c\uc740 \uc798 \uc774\ud574\uac00 \uc548 \ub420 \uac83\uc774\ub2e4.\n",
      "- Yahoo \uc0ac\uc774\ud2b8\uc758 HTML \uad6c\uc870 \uba3c\uc800 \ud30c\uc545\uc744 \ud558\uace0 \uc2dc\uc791\ud558\uc790.\n",
      "- \uc774 \uad6c\uc870\uac00 \uc774\ud574 \uc548\ub418\uba74 2\ubc88 \ubd10\ub77c.\n",
      "\n",
      "<img src=\"files/images/unpack_aapl.png\" />"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### \ub2e8\uacc4\ub4e4\uc744 \ud1b5\ud569\ud558\uc5ec \uc6f9\uc5d0\uc11c \uae01\uc5b4\uc628 \ub370\uc774\ud130\ub97c DataFrame\uc73c\ub85c \ubcc0\ud658\n",
      "\n",
      "- \uc22b\uc790 \ub370\uc774\ud130\uc9c0\ub9cc \uc5ec\uc804\ud788 \ubb38\uc790\uc5f4 \ud615\uc2dd\uc73c\ub85c \uc800\uc7a5\ub418\uc5b4 \uc788\uc73c\ubbc0\ub85c \uc801\uc808\ud558\uac8c \ubcc0\ud658\uc744 \ud574\uc918\uc57c \ud558\ub294\ub370 \ubaa8\ub4e0 \ub370\uc774\ud130\uac00 \uc2e4\uc218\ud615\uc740 \uc544\ub2d0 \uac83\uc774\ubbc0\ub85c \uc774 \uc791\uc5c5\uc740 \uc218\ub3d9\uc73c\ub85c \ucc98\ub9ac\n",
      "- \ud558\uc9c0\ub9cc \uc6b4 \uc88b\uaca0\ub3c4 pandas\uc5d0\ub294 TextParser \ud074\ub798\uc2a4\uac00 \uc788\uc5b4 \uc790\ub3d9 \ud615 \ubcc0\ud658\uc744 \uc801\uc808\ud558\uac8c \uc218\ud589\ud574\uc900\ub2e4.\n",
      "- TextParser \ud074\ub798\uc2a4\ub294 read_csv \ud568\uc218\uc640 \ub2e4\ub978 \ud30c\uc2f1 \ud568\uc218\uc5d0\uc11c\ub3c4 \uc0ac\uc6a9"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from pandas.io.parsers import TextParser\n",
      "\n",
      "def parse_options_data(table):\n",
      "    rows = table.findall('.//tr')\n",
      "    # rows[0]\uc740 header\n",
      "    header = _unpack(rows[0], kind='th')\n",
      "    # rows[1:] \ubd80\ud130 \uc2e4\uc81c\uc801\uc778 data\n",
      "    data = [_unpack(r) for r in rows[1:]]\n",
      "    # TextParser\uc5d0 data\ub97c \ub118\uae30\uace0 column\uba85\uc73c\ub85c header\ub97c \uc0ac\uc6a9\n",
      "    return TextParser(data, names=header).get_chunk()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 298
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### \ub9c8\uc9c0\ub9c9\uc73c\ub85c lxml \ud14c\uc774\ube14 \uac1d\uccb4\ub97c \uc704\uc5d0\uc11c \uc791\uc131\ud55c \ud30c\uc2f1 \ud568\uc218\ub97c \uc774\uc6a9\ud574\uc11c \ucc98\ub9ac\ud558\uba74 DataFrame \uacb0\uacfc\uac12 \uc5bb\uc744 \uc218 \uc788\ub2e4"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### [\uc635\uc158(\uae08\uc735) - wiki kr](http://ko.wikipedia.org/wiki/%EC%98%B5%EC%85%98_(%EA%B8%88%EC%9C%B5)\n",
      "\n",
      "- \uae08\uc735 \ub370\uc774\ud130\ub97c \ubd84\uc11d\ud558\ub294 \uac83\uc774\uae30 \ub54c\ubb38\uc5d0 \uae08\uc735 \ud30c\ud2b8\uc5d0 \ub300\ud55c \ub3c4\uba54\uc778 \uc9c0\uc2dd\uc774 \uc788\uc5b4\uc57c \ud55c\ub2e4. \ub0b4\uac00 \ubd84\uc11d\ud558\ub824\ub294 \ub370\uc774\ud130\uac00 \uc5b4\ub5a0\ud55c \uc5ed\ud560\uc744 \ud558\ub294\uc9c0 \ubaa8\ub974\uba74 \ub9d0\uc9f1 \ud669!\n",
      "- \uc635\uc158(option)\uc740 \ud30c\uc0dd \uc0c1\ud488\uc758 \uc77c\uc885\uc774\uba70, \ubbf8\ub9ac \uacb0\uc815\ub41c \uae30\uac04 \uc548\uc5d0 \ud2b9\uc815 \uc0c1\ud488\uc744 \uc815\ud574\uc9c4 \uac00\uaca9\uc73c\ub85c \uc0ac\uace0 \ud314 \uc218 \uc788\ub294 \uad8c\ub9ac\ub97c \ub9d0\ud55c\ub2e4. \n",
      "- call option: \ud2b9\uc815 \uae08\uc735 \uc0c1\ud488\uc744 \uc815\ud574\uc9c4 \uac00\uaca9\uc5d0 \ub9e4\uc785\ud560 \uc218 \uc788\ub294 \uad8c\ub9ac\ub97c \uac00\uc9c4 \ub9e4\uc785 \uc635\uc158(call option). \uc2dc\uc7a5\uc5d0\uc11c \ub0b4\uac00 \uc5bc\ub9c8 \uc904\ud14c\ub2c8 \ud314\uc544\ub77c \ud558\ub294 \ud615\uc2dd\uc774\ub77c call \uc774\ub77c \ubd80\ub974\ub294\ub4ef\n",
      "- put option: \ub9e4\ub3c4\ud560 \uc218 \uc788\ub294 \uad8c\ub9ac\ub97c \uac00\uc9c4 \ub9e4\ub3c4 \uc635\uc158(put option)\uc73c\ub85c \ub098\ub25c\ub2e4. put. \ubc00\ub2e4. \uc2dc\uc7a5\uc5d0 \ub0b4\uac00 \uc5bc\ub9c8\uc5d0 \ud314\uaca0\ub2e4\uace0 \ubbfc\ub2e4. \ub77c\uace0 \uc0dd\uac01\ud558\uba74 \ud3b8\ud560\ub4ef"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# call option data\n",
      "call_data = parse_options_data(calls)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 299
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# put option data\n",
      "put_data = parse_options_data(puts)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 300
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "call_data[:10]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>Strike</th>\n",
        "      <th>Symbol</th>\n",
        "      <th>Last</th>\n",
        "      <th>Chg</th>\n",
        "      <th>Bid</th>\n",
        "      <th>Ask</th>\n",
        "      <th>Vol</th>\n",
        "      <th>Open Int</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> 330</td>\n",
        "      <td>  AAPL140517C00330000</td>\n",
        "      <td> 263.00</td>\n",
        "      <td> 0.00</td>\n",
        "      <td> 255.70</td>\n",
        "      <td> 258.25</td>\n",
        "      <td>   6</td>\n",
        "      <td>  2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 400</td>\n",
        "      <td>  AAPL140517C00400000</td>\n",
        "      <td> 193.00</td>\n",
        "      <td> 0.00</td>\n",
        "      <td> 186.40</td>\n",
        "      <td> 187.95</td>\n",
        "      <td>   1</td>\n",
        "      <td>  9</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 410</td>\n",
        "      <td>  AAPL140517C00410000</td>\n",
        "      <td> 181.30</td>\n",
        "      <td> 0.00</td>\n",
        "      <td> 175.70</td>\n",
        "      <td> 178.10</td>\n",
        "      <td>  68</td>\n",
        "      <td> 10</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td> 420</td>\n",
        "      <td>  AAPL140517C00420000</td>\n",
        "      <td> 170.80</td>\n",
        "      <td> 0.00</td>\n",
        "      <td> 165.85</td>\n",
        "      <td> 168.65</td>\n",
        "      <td> 124</td>\n",
        "      <td>  1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td> 430</td>\n",
        "      <td>  AAPL140517C00430000</td>\n",
        "      <td> 160.75</td>\n",
        "      <td> 0.00</td>\n",
        "      <td> 155.85</td>\n",
        "      <td> 158.05</td>\n",
        "      <td> 376</td>\n",
        "      <td>  2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
        "      <td> 440</td>\n",
        "      <td>  AAPL140517C00440000</td>\n",
        "      <td> 152.87</td>\n",
        "      <td> 0.00</td>\n",
        "      <td> 146.35</td>\n",
        "      <td> 147.90</td>\n",
        "      <td>   1</td>\n",
        "      <td>  1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
        "      <td> 445</td>\n",
        "      <td>  AAPL140517C00445000</td>\n",
        "      <td> 145.55</td>\n",
        "      <td> 0.00</td>\n",
        "      <td> 140.95</td>\n",
        "      <td> 143.25</td>\n",
        "      <td> 106</td>\n",
        "      <td>  1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
        "      <td> 450</td>\n",
        "      <td>  AAPL140517C00450000</td>\n",
        "      <td> 137.00</td>\n",
        "      <td> 5.00</td>\n",
        "      <td> 136.35</td>\n",
        "      <td> 137.90</td>\n",
        "      <td>   1</td>\n",
        "      <td> 48</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
        "      <td> 450</td>\n",
        "      <td> AAPL7140517C00450000</td>\n",
        "      <td> 137.86</td>\n",
        "      <td> 2.39</td>\n",
        "      <td> 135.05</td>\n",
        "      <td> 138.80</td>\n",
        "      <td>   1</td>\n",
        "      <td>  1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
        "      <td> 455</td>\n",
        "      <td>  AAPL140517C00455000</td>\n",
        "      <td> 138.00</td>\n",
        "      <td> 0.00</td>\n",
        "      <td> 131.35</td>\n",
        "      <td> 132.90</td>\n",
        "      <td>   2</td>\n",
        "      <td>  2</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 301,
       "text": [
        "   Strike                Symbol    Last   Chg     Bid     Ask  Vol Open Int\n",
        "0     330   AAPL140517C00330000  263.00  0.00  255.70  258.25    6        2\n",
        "1     400   AAPL140517C00400000  193.00  0.00  186.40  187.95    1        9\n",
        "2     410   AAPL140517C00410000  181.30  0.00  175.70  178.10   68       10\n",
        "3     420   AAPL140517C00420000  170.80  0.00  165.85  168.65  124        1\n",
        "4     430   AAPL140517C00430000  160.75  0.00  155.85  158.05  376        2\n",
        "5     440   AAPL140517C00440000  152.87  0.00  146.35  147.90    1        1\n",
        "6     445   AAPL140517C00445000  145.55  0.00  140.95  143.25  106        1\n",
        "7     450   AAPL140517C00450000  137.00  5.00  136.35  137.90    1       48\n",
        "8     450  AAPL7140517C00450000  137.86  2.39  135.05  138.80    1        1\n",
        "9     455   AAPL140517C00455000  138.00  0.00  131.35  132.90    2        2"
       ]
      }
     ],
     "prompt_number": 301
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "put_data[:10]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>Strike</th>\n",
        "      <th>Symbol</th>\n",
        "      <th>Last</th>\n",
        "      <th>Chg</th>\n",
        "      <th>Bid</th>\n",
        "      <th>Ask</th>\n",
        "      <th>Vol</th>\n",
        "      <th>Open Int</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> 280</td>\n",
        "      <td> AAPL140517P00280000</td>\n",
        "      <td> 0.05</td>\n",
        "      <td> 0</td>\n",
        "      <td> N/A</td>\n",
        "      <td> 0.04</td>\n",
        "      <td>   2</td>\n",
        "      <td>   6</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 290</td>\n",
        "      <td> AAPL140517P00290000</td>\n",
        "      <td> 0.02</td>\n",
        "      <td> 0</td>\n",
        "      <td> N/A</td>\n",
        "      <td> 0.01</td>\n",
        "      <td>  11</td>\n",
        "      <td>  11</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 295</td>\n",
        "      <td> AAPL140517P00295000</td>\n",
        "      <td> 0.01</td>\n",
        "      <td> 0</td>\n",
        "      <td> N/A</td>\n",
        "      <td> 0.04</td>\n",
        "      <td>   3</td>\n",
        "      <td>   8</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td> 300</td>\n",
        "      <td> AAPL140517P00300000</td>\n",
        "      <td> 0.05</td>\n",
        "      <td> 0</td>\n",
        "      <td> N/A</td>\n",
        "      <td> 0.01</td>\n",
        "      <td>   1</td>\n",
        "      <td>  23</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td> 305</td>\n",
        "      <td> AAPL140517P00305000</td>\n",
        "      <td> 0.05</td>\n",
        "      <td> 0</td>\n",
        "      <td> N/A</td>\n",
        "      <td> 0.01</td>\n",
        "      <td>  10</td>\n",
        "      <td>  20</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
        "      <td> 310</td>\n",
        "      <td> AAPL140517P00310000</td>\n",
        "      <td> 0.10</td>\n",
        "      <td> 0</td>\n",
        "      <td> N/A</td>\n",
        "      <td> 0.04</td>\n",
        "      <td>   0</td>\n",
        "      <td>   1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
        "      <td> 315</td>\n",
        "      <td> AAPL140517P00315000</td>\n",
        "      <td> 0.12</td>\n",
        "      <td> 0</td>\n",
        "      <td> N/A</td>\n",
        "      <td> 0.04</td>\n",
        "      <td>   0</td>\n",
        "      <td>   1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
        "      <td> 320</td>\n",
        "      <td> AAPL140517P00320000</td>\n",
        "      <td> 0.02</td>\n",
        "      <td> 0</td>\n",
        "      <td> N/A</td>\n",
        "      <td> 0.04</td>\n",
        "      <td>   1</td>\n",
        "      <td>   7</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
        "      <td> 325</td>\n",
        "      <td> AAPL140517P00325000</td>\n",
        "      <td> 0.05</td>\n",
        "      <td> 0</td>\n",
        "      <td> N/A</td>\n",
        "      <td> 0.01</td>\n",
        "      <td> 185</td>\n",
        "      <td> 342</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
        "      <td> 330</td>\n",
        "      <td> AAPL140517P00330000</td>\n",
        "      <td> 0.02</td>\n",
        "      <td> 0</td>\n",
        "      <td> N/A</td>\n",
        "      <td> 0.04</td>\n",
        "      <td>   5</td>\n",
        "      <td>   5</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 302,
       "text": [
        "   Strike               Symbol  Last  Chg  Bid   Ask  Vol Open Int\n",
        "0     280  AAPL140517P00280000  0.05    0  N/A  0.04    2        6\n",
        "1     290  AAPL140517P00290000  0.02    0  N/A  0.01   11       11\n",
        "2     295  AAPL140517P00295000  0.01    0  N/A  0.04    3        8\n",
        "3     300  AAPL140517P00300000  0.05    0  N/A  0.01    1       23\n",
        "4     305  AAPL140517P00305000  0.05    0  N/A  0.01   10       20\n",
        "5     310  AAPL140517P00310000  0.10    0  N/A  0.04    0        1\n",
        "6     315  AAPL140517P00315000  0.12    0  N/A  0.04    0        1\n",
        "7     320  AAPL140517P00320000  0.02    0  N/A  0.04    1        7\n",
        "8     325  AAPL140517P00325000  0.05    0  N/A  0.01  185      342\n",
        "9     330  AAPL140517P00330000  0.02    0  N/A  0.04    5        5"
       ]
      }
     ],
     "prompt_number": 302
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### lxml.objectify \uc774\uc6a9\ud574 XML \ud30c\uc2f1\ud558\uae30\n",
      "\n",
      "- [XML(eXtensible Markup Language)](http://en.wikipedia.org/wiki/Xml)\uc740 \uacc4\uce35\uc801 \uad6c\uc870\uc640 \uba54\ud0c0\ub370\uc774\ud130\ub97c \ud3ec\ud568\ud558\ub294 \uc911\ucca9\ub41c \ub370\uc774\ud130 \uad6c\uc870\ub97c \uc9c0\uc6d0\ud558\ub294 \ub610 \ub2e4\ub978 \uc720\uba85\ud55c \ub370\uc774\ud130 \ud615\uc2dd\uc774\ub2e4. \uc9c0\uae08 \uc774 \ucc45\ub3c4 \uc2e4\uc81c\ub85c\ub294 XML \ubb38\uc11c\ub85c \uc791\uc131\n",
      "- \ub274\uc695 MTA(Metropolitan Transportation Authority)\ub294 \ubc84\uc2a4\uc640 \uc804\ucca0 \uc6b4\uc601\uc5d0 \uad00\ud55c \uc5ec\ub7ec \uac00\uc9c0 [\ub370\uc774\ud130 \uacf5\uac1c](http://www.mta.info/developers/download.html)\n",
      "- \uc0b4\ud3b4\ubcfc \uac83\uc740 \uc5ec\ub7ec XML \ud30c\uc77c\ub85c \uc81c\uacf5\ub418\ub294 \uc2e4\uc801 \uc790\ub8cc\n",
      "- \uc804\ucca0\uacfc \ubc84\uc2a4 \uc6b4\uc601\uc740 \ub9e4\uc6d4 \uc544\ub798\uc640 \ube44\uc2b7\ud55c \ub0b4\uc6a9\uc758 \uac01\uac01 \ub2e4\ub978 \ud30c\uc77c(Metro-North Railroad\uc758 \uacbd\uc6b0 Preformance_MNR.xml \uac19\uc740)\ub85c \uc81c\uacf5\n",
      "- Preformance_MNR.xml \uc774 \ud30c\uc77c\uc744 \ubabb \ucc3e\uc558\ub294\ub370 \uc784\uc8fc\uc601\ub2d8\uaed8\uc11c \ucc3e\uc544\uc11c \uc62c\ub824\uc8fc\uc168\uc2b5\ub2c8\ub2e4. github ch06 \ub514\ub809\ud1a0\ub9ac\uc5d0 \uc800\uc7a5\ud574 \ub450\uc5c8\uc2b5\ub2c8\ub2e4. \uadf8\ub7f0\ub370 \uacc4\uc18d \uc624\ub958\uac00 \ub098\ub294\uad70\uc694. < PERFORMANCE > \uc774 \ud0dc\uadf8 \ub54c\ubb38\uc778\ub4ef\ud55c\ub370. \ud574\uacb0\ucc45 \uc54c\uace0 \uacc4\uc2e0\ubd84\uc740 \uc880 \uc54c\ub824\uc8fc\uc138\uc694. \uacc4\uc18d \uc624\ub958\uac00 \ubc1c\uc0dd\ud558\uc5ec \ucc45\uc5d0 \ub098\uc640\uc788\ub294 \uac83\uc73c\ub85c \ud14c\uc2a4\ud2b8 \ud558\uc600\uc2b5\ub2c8\ub2e4."
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### Performance_MNR.xml\uc744 \uc5b4\ub5bb\uac8c \ubc1b\ub294\uc9c0 \ubaa8\ub974\uaca0\ub2e4.\n",
      "\n",
      "- \uc18c\uc2a4\ud30c\uc77c\uc744 6\uc7a5\uc744 \ub4a4\uc838\ubd10\ub3c4 \uc5c6\uace0\n",
      "- \ud648\ud398\uc774\uc9c0\uc5d0\ub294 \uc544\ub9c8 XML \uad6c\uc870\uac00 \ubc14\ub010\ub4ef \uc2f6\ub2e4.\n",
      "- \uadf8\ub798\uc11c \ucd5c\ud6c4\uc758 \uc218\ub2e8\uc73c\ub85c \uc5bc\ub9c8 \uc548\ub418\uc11c \uadf8\ub0e5 \ub0b4\uac00 \uc77c\uc77c\uc774 \ucce4\ub2e4.\n",
      "- XML\uc740 \uc5c4\uaca9\ud558\uae30 \ub54c\ubb38\uc5d0 \ud558\ub098\ub77c\ub3c4 \uc624\ud0c0\uac00 \uc788\uc73c\uba74 \uc624\ub958 \ubc1c\uc0dd\ud558\ubbc0\ub85c \uc8fc\uc758!"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "%%writefile ch06/Performance_MNR.xml\n",
      "<INDICATOR>\n",
      "    <INDICATOR_SEQ>373889</INDICATOR_SEQ>\n",
      "    <PARENT_SEQ></PARENT_SEQ>\n",
      "    <AGENCY_NAME>MEtro-North Railroad</AGENCY_NAME>\n",
      "    <INDICATOR_NAME>Escalator Availability</INDICATOR_NAME>\n",
      "    <DESCRIPTION>Percent of the time that escalators are operational systemwide. The availability rate is based on physical observations performed the morning of regular business days only. This is a new indicator the agency began reporting in 2009.</DESCRIPTION>\n",
      "    <PERIOD_YEAR>2011</PERIOD_YEAR>\n",
      "    <PERIOD_MONTH>12</PERIOD_MONTH>\n",
      "    <CATEGORY>Service Indicators</CATEGORY>\n",
      "    <FREQUENCY>M</FREQUENCY>\n",
      "    <DESIRED_CHANGE>U</DESIRED_CHANGE>\n",
      "    <INDICATOR_UNIT>%</INDICATOR_UNIT>\n",
      "    <DECIMAL_PLACES>1</DECIMAL_PLACES>\n",
      "    <YTD_TARGET>97.00</YTD_TARGET>\n",
      "    <YTD_ACTUAL></YTD_ACTUAL>\n",
      "    <MONTHLY_TARGET>97.00</MONTHLY_TARGET>\n",
      "    <MONTHLY_ACTUAL></MONTHLY_ACTUAL>\n",
      "</INDICATOR>"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Overwriting ch06/Performance_MNR.xml\n"
       ]
      }
     ],
     "prompt_number": 303
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from lxml import objectify\n",
      "import urllib2\n",
      "\n",
      "path = 'Performance_MNR.xml'\n",
      "# online_path = 'http://www.mta.info/developers/data/lirr/lirr_gtfs.xml'\n",
      "\n",
      "# data = urllib2.urlopen(online_path).read()\n",
      "# f = open(path, 'w')\n",
      "# f.write(data)\n",
      "# f.close()\n",
      "\n",
      "# objectify\ub97c \uc774\uc6a9\ud574\uc11c \ud30c\uc77c \ud30c\uc2f1\n",
      "parsed = objectify.parse(open(path))\n",
      "root = parsed.getroot()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "ename": "IOError",
       "evalue": "[Errno 2] No such file or directory: 'Performance_MNR.xml'",
       "output_type": "pyerr",
       "traceback": [
        "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mIOError\u001b[0m                                   Traceback (most recent call last)",
        "\u001b[0;32m<ipython-input-304-a212823090af>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     12\u001b[0m \u001b[0;31m# objectify\ub97c \uc774\uc6a9\ud574\uc11c \ud30c\uc77c \ud30c\uc2f1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 13\u001b[0;31m \u001b[0mparsed\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mobjectify\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     14\u001b[0m \u001b[0mroot\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparsed\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetroot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
        "\u001b[0;31mIOError\u001b[0m: [Errno 2] No such file or directory: 'Performance_MNR.xml'"
       ]
      }
     ],
     "prompt_number": 304
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "data = []"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "skip_fields = ['PARENT_SEQ', 'INDICATOR_SEQ',\n",
      "               'DESIRED_CHANGE', 'DECIMAL_PLACES']"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### root.INDICATOR\ub97c \ud1b5\ud574 \ubaa8\ub4e0 <INDICATOR> \uc5d8\ub9ac\uba3c\ud2b8\ub97c \ub044\uc9d1\uc5b4 \ub0bc \uc218 \uc788\ub2e4\n",
      "\n",
      "- \uac01\uac01\uc758 \ud56d\ubaa9\uc5d0 \ub300\ud574 \uba87\uba87 \ud0dc\uadf8\ub294 \uc81c\uc678\ud558\uace0 \ud0dc\uadf8 \uc774\ub984(YTD_ACTUAL \uac19\uc740)\uc744 \ud0a4 \uac12\uc73c\ub85c \ud558\ub294 \uc0ac\uc804\uc744 \ub9cc\ub4e4\uc5b4 \ub0c4"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# root.INDICATOR -> root\n",
      "\n",
      "for elt in root:\n",
      "    el_data = {}\n",
      "    for child in elt.getchildren():\n",
      "        if child.tag in skip_fields:\n",
      "            continue\n",
      "        el_data[child.tag] = child.pyval\n",
      "    data.append(el_data)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "data"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# \uc704\uc758 \uac12\uacfc \ube44\uad50\ud558\uae30 \uc704\ud574 \ud14c\uc2a4\ud2b8 \ud574\ubcf8 \uac83\n",
      "for elt in root:\n",
      "    for child in elt.getchildren():\n",
      "        print child.tag, child.pyval"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### 5\uc7a5\uc5d0\uc11c \uc0ac\uc804 \ud615\uc2dd\uc740 DataFrame\uc73c\ub85c \ubcc0\ud658\ud560 \uc218 \uc788\ub2e4\ub294 \uac83 \ucc38\uace0"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "perf = DataFrame(data)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "perf"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "6.2 \uc774\uc9c4 \ub370\uc774\ud130 \ud615\uc2dd"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### \ub370\uc774\ud130\ub97c \ud6a8\uc728\uc801\uc73c\ub85c \uc800\uc7a5\ud558\ub294 \uac00\uc7a5 \uc190\uc26c\uc6b4 \ubc29\ubc95\n",
      "\n",
      "- \ud30c\uc774\uc36c\uc5d0 \uae30\ubcf8\uc73c\ub85c \ub0b4\uc7a5\ub418\uc5b4 \uc788\ub294 pickle \uc9c1\ub82c\ud654\ub97c \ud1b5\ud574 \ub370\uc774\ud130\ub97c \uc774\uc9c4 \ud615\uc2dd\uc73c\ub85c \uc800\uc7a5\ud558\ub294 \uac83\n",
      "- \ud3b8\ub9ac\ud558\uac8c\ub3c4 pandas\uc758 \uac1d\uccb4\ub294 \ubaa8\ub450 pickle\uc744 \uc774\uc6a9\ud574\uc11c \ub370\uc774\ud130\ub97c \uc800\uc7a5\ud558\ub294 save \uba54\uc11c\ub4dc \uc788\uc74c"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "frame = pd.read_csv('ch06/ex1.csv')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "frame"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "frame.save('ch06/frame_pickle')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.load('ch06/frame_pickle')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### pickle \uc0ac\uc6a9\uc2dc \uc8fc\uc758\uc0ac\ud56d\n",
      "\n",
      "- pickle\uc740 \uc624\ub798 \ubcf4\uad00\ud560 \ud544\uc694\uac00 \uc5c6\ub294 \ub370\uc774\ud130\uc5d0\ub9cc \ucd94\ucc9c\n",
      "- \uc624\ub79c \uc2dc\uac04\uc774 \uc9c0\ub098\ub3c4 \uc548\uc815\uc801\uc73c\ub85c \ub370\uc774\ud130\ub97c \uc800\uc7a5\ud560 \uac70\ub77c\uace0 \ubcf4\uc7a5 \ubabb\ud568"
     ]
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "6.2.1 HDF5 \ud615\uc2dd \uc0ac\uc6a9\ud558\uae30"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "- \ub514\uc2a4\ud06c\uc5d0 \uc774\uc9c4 \ud615\uc2dd\uc73c\ub85c \uc800\uc7a5\ub41c \ub300\uc6a9\ub7c9\uc758 \uacfc\ud559 \uc790\ub8cc\ub97c \ud6a8\uc728\uc801\uc73c\ub85c \uc77d\uace0 \uc4f8 \uc218 \uc788\ub294 \ub2e4\uc591\ud55c \ub3c4\uad6c \uc874\uc7ac\n",
      "- \uc0b0\uc5c5 \uae30\uc900\uc5d0 \ub9de\ub294 \uc778\uae30 \ub77c\uc774\ube0c\ub7ec\ub9ac\uc911 \ud558\ub098\uac00 HDF5(Hierarchical Data Format), \uacc4\uce35\uc801 \ub370\uc774\ud130 \ud615\uc2dd\n",
      "- \ub0b4\ubd80\uc801\uc73c\ub85c \ud30c\uc77c \uc2dc\uc2a4\ud15c \uac19\uc740 \ub178\ub4dc \uad6c\uc870\n",
      "- \uc5ec\ub7ec \uac1c\uc758 \ub370\uc774\ud130\uc14b\uc744 \uc800\uc7a5\ud558\uace0 \ubd80\uac00 \uc815\ubcf4 \uae30\ub85d \uac00\ub2a5\n",
      "- \ub2e4\uc591\ud55c \uc555\ucd95 \uae30\uc220\uc744 \uc0ac\uc6a9\ud574\uc11c on-the-fly(\uc2e4\uc2dc\uac04) \uc555\ucd95 \uc9c0\uc6d0\n",
      "- \ubc18\ubcf5\ub418\ub294 \ud328\ud134\uc744 \uac00\uc9c4 \ub370\uc774\ud130 \uc880 \ub354 \ud6a8\uacfc\uc801 \uc800\uc7a5\n",
      "- \uba54\ubaa8\ub9ac\uc5d0 \ubaa8\ub450 \uc801\uc7ac\ud560 \uc218 \uc5c6\ub294 \uc5c4\uccad\ub098\uac8c \ud070 \ub370\uc774\ud130\ub97c \uc544\uc8fc \ud070 \ubc30\uc5f4\uc5d0\uc11c \ud544\uc694\ud55c \ub9cc\ud07c\uc758 \uc791\uc740 \ubd80\ubd84\ub4e4\ub9cc \ud6a8\uacfc\uc801\uc73c\ub85c \uc77d\uace0 \uc4f8 \uc218 \uc788\ub294 \ud6cc\ub96d\ud55c \uc120\ud0dd\n",
      "\n",
      "- PyTables: HDF5\ub97c \ucd94\uc0c1\ud654\ud558\uc5ec \uc5ec\ub7ec\uac00\uc9c0 \uc720\uc5f0\ud55c \ub370\uc774\ud130 \ucee8\ud14c\uc774\ub108\uc640 \ud14c\uc774\ube14 \uc0c9\uc778, \uc9c8\uc758 \uae30\ub2a5 \uadf8\ub9ac\uace0 \uc678\ubd80 \uba54\ubaa8\ub9ac \uc5f0\uc0b0(out-of-core, external memory algorithm) \uc9c0\uc6d0\n",
      "- h5py: \uc9c1\uc811\uc801\uc774\uc9c0\ub9cc \uace0\uc218\uc900\uc758 HDF5 API\uc5d0 \ub300\ud55c \uc778\ud130\ud398\uc774\uc2a4 \uc81c\uacf5\n",
      "- pandas\ub294 PyTable\ub97c \uc774\uc6a9\ud55c HDFStore\ub77c\ub294 \uac00\ubcbc\uc6b4 \uc0ac\uc804 \ud074\ub798\uc2a4\ub97c \ud1b5\ud574 pandas \uac1d\uccb4\ub97c \uc800\uc7a5"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### PyTables\ub97c \uc124\uce58 \uc548\ud588\uc744 \ub54c \ub098\uc624\ub294 \uc0c1\ud669\n",
      "\n",
      "- \uadf8\ub7fc \uc77c\ubc18\uc801\uc778 \uc0ac\uc6a9\uc790\ub4e4\uc740 numexpr \uc778\uac00? \uc774 \ub77c\uc774\ube0c\ub7ec\ub9ac\uac00 \ub610 \uc5c6\ub2e4\uace0 \ud55c\ub2e4. \uadf8\ub7fc \uc774 \ub77c\uc774\ube0c\ub7ec\ub9ac\ub97c \uba3c\uc800 \uc124\uce58\ud558\uace0 tables\ub97c \uc124\uce58\ud55c\ub2e4.\n",
      "\n",
      "- \uc124\uce58 \ubc29\ubc951: sudo easy_install tables(\uc624\ub958 \ud655\uc778)\n",
      "- \uc124\uce58 \ubc29\ubc952: sudo easy_install numexpr(\ud655\uc2e4\ud558\uc9c0 \uc54a\uc74c. \ud655\uc778 \uc694\ud568)\n",
      "- \uc124\uce58 \ubc29\ubc953: sudo easy_install tables"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "\n",
      "    ---------------------------------------------------------------------------\n",
      "    Exception                                 Traceback (most recent call last)\n",
      "    <ipython-input-135-35f4287dfd8f> in <module>()\n",
      "        ----> 1 store = pd.HDFStore('ch06/mydata.h5')\n",
      "\n",
      "    /Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/io/pytables.pyc in __init__(self, path, mode, complevel, complib, fletcher32, **kwargs)\n",
      "        343             import tables as _\n",
      "        344         except ImportError:  # pragma: no cover\n",
      "    --> 345             raise Exception('HDFStore requires PyTables')\n",
      "        346 \n",
      "        347         self._path = path\n",
      "\n",
      "    Exception: HDFStore requires PyTables"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### HDFStore\ub97c \uc0ac\uc6a9\ud558\uae30 \uc704\ud574\uc11c\ub294 PyTables \ub77c\uc774\ube0c\ub7ec\ub9ac\ub97c \uc124\uce58\ud574\uc57c \ud55c\ub2e4.\n",
      "\n",
      "- \uc774\ub7f0\uac8c \uc788\ub2e4\ub294 \uac83\uc744 indexing \ud574\ub450\uace0 \ub098\uc911\uc5d0 \ud544\uc694\ud558\uba74 \ucc3e\uc544\ubcf4\uc790!"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# \ub77c\uc774\ube0c\ub7ec\ub9ac \uc124\uce58\ud574\ubcf4\uace0 \ud14c\uc2a4\ud2b8 \ud574\ubcf4\ub77c.\n",
      "\n",
      "store = pd.HDFStore('ch06/mydata.h5')\n",
      "store['obj1'] = frame\n",
      "store['obj1_col'] = frame['a']\n",
      "store\n",
      "store['obj1']"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### \ub370\uc774\ud130 \ubd84\uc11d \ubb38\uc81c\n",
      "\n",
      "- \ub300\ubd80\ubd84 CPU\ubcf4\ub2e4\ub294 IO \uc131\ub2a5\uc5d0 \uc758\uc874\uc801\n",
      "- **HDF5\ub294 \ub370\uc774\ud130\ubca0\uc774\uc2a4\uac00 \uc544\ub2c8\ub2e4.** HDF5\ub294 **\ud55c \ubc88\ub9cc \uae30\ub85d**\ud558\uace0 **\uc5ec\ub7ec \ubc88 \uc790\uc8fc \uc77d\uc5b4\uc57c** \ud558\ub294 \ub370\uc774\ud130\uc5d0 \ucd5c\uc801\ud654\ub418\uc5b4 \uc788\ub2e4. \ub370\uc774\ud130\ub294 \uc544\ubb34\ub54c\ub098 \ud30c\uc77c\uc5d0 \ucd94\uac00\ud560 \uc218 \uc788\uc9c0\ub9cc \ub9cc\uc57d \uc5ec\ub7ec \uacf3\uc5d0\uc11c \ub3d9\uc2dc\uc5d0 \ud30c\uc77c\uc744 \uc4f4\ub2e4\uba74 \ud30c\uc77c\uc774 \uae68\uc9c0\ub294 \ubb38\uc81c\uac00 \ubc1c\uc0dd\ud560 \uc218 \uc788\ub2e4."
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### 6.2.2 \ub9c8\uc774\ud06c\ub85c\uc18c\ud504\ud2b8 \uc5d1\uc140 \ud30c\uc77c\uc5d0\uc11c \ub370\uc774\ud130 \uc77d\uc5b4\uc624\uae30\n",
      "\n",
      "- pandas\ub294 ExcelFile \ud074\ub798\uc2a4\ub97c \ud1b5\ud574 \ub9c8\uc774\ud06c\ub85c\uc18c\ud504\ud2b8 \uc5d1\uc140 2003 \uc774\ud6c4 \ubc84\uc804\uc758 \ub370\uc774\ud130\ub97c \uc77d\uae30 \uac00\ub2a5\n",
      "- \ub0b4\ubd80\uc801\uc73c\ub85c ExcelFile \ud074\ub798\uc2a4\ub294 xlrd, openpyxl \ud328\ud0a4\uc9c0 \ud65c\uc6a9. \uc0ac\uc6a9\ud558\uae30 \uc804\uc5d0 \uba3c\uc800 \uc124\uce58\n",
      "\n",
      "#### Excel \uc791\uc5c5\uc2dc \uc8fc\uc758\uc0ac\ud56d\n",
      "\n",
      "- **\ud604\uc5c5**\uc5d0\uc11c\ub294 Excel \uc5d0\uc11c \uc624\ub958\uac00 \ub9ce\uc774 \ubc1c\uc0dd\ud558\uae30 \ub54c\ubb38\uc5d0 **csv\ub85c \ubcc0\uacbd \ud6c4\uc5d0 \uc791\uc5c5**\ud55c\ub2e4\uace0 \ud55c\ub2e4. \uadf8\ub7ec\ub2c8 \uad73\uc774 excel \ud30c\uc77c\ub85c \ud558\uc5ec Error\ub97c \ub9cc\ub4e4\uc9c0 \ub9d0\uace0 \uc548\uc804\ud558\uac8c csv \ud30c\uc77c\ub85c \ubcc0\uacbd \ud6c4\uc5d0 \uc0ac\uc6a9\ud558\uc790.\n",
      "- \uc774\ub7f0\uac83\uc774 \uc788\ub2e4 \uc815\ub3c4\ub9cc \uc54c\uc544\ub193\uc790.\n",
      "- \uc5ec\uae30\ub294 \uadf8\ub0e5 skip \ud558\uaca0\ub2e4."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "xls_file = pd.ExcelFile('data.xls')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "table = xls_file.parse('Sheet1')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "6.3 HTML, \uc6f9 API\uc640 \ud568\uaf10 \uc0ac\uc6a9\ud558\uae30"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### [Requests: HTTP for Humans](http://docs.python-requests.org/en/latest/#)\n",
      "\n",
      "- urllib2\ubcf4\ub2e4 \ub354 \uac04\ud3b8\n",
      "- [similar code, without Requests.](https://gist.github.com/973705)\n",
      "\n",
      "### \ud2b8\uc704\ud130 \ubd84\uc11d \ubb38\uc81c \ubc1c\uc0dd\n",
      "\n",
      "- \ud2b8\uc704\ud130\ub294 \ucc98\uc74c\uc5d0 \uc544\ubb34\ub7f0 \uc778\uc99d\uc5c6\uc774 API\ub97c \uc81c\uacf5\ud558\ub2e4 \ub9dd \uacfc\ubd80\ud558\uac00 \ubc1c\uc0dd\ud558\uc790 OAuth \uc778\uc99d \ubc29\uc2dd\uc73c\ub85c \ubcc0\uacbd\n",
      "- [OAuth2 - API \uc778\uc99d\uc744 \uc704\ud55c \ub9cc\ub2a5\ub3c4\uad6c\uc0c1\uc790](http://www.slideshare.net/tebica/oauth2-api)\n",
      "- [Twitter API](https://dev.twitter.com/docs/auth/sign-twitter)\n",
      "- \uc9c0\uae08\uc740 \uc778\uc99d \ubb38\uc81c \ub54c\ubb38\uc5d0 pass \ud558\uaca0\uc74c"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import requests"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "url = 'http://search.twitter.com/search.json?q=python%20pandas'"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "resp = requests.get(url)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "resp"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 305,
       "text": [
        "<Response [401]>"
       ]
      }
     ],
     "prompt_number": 305
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "resp.text"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 306,
       "text": [
        "u'{\"errors\":[{\"message\":\"The Twitter REST API v1 is no longer active. Please migrate to API v1.1. https://dev.twitter.com/docs/api/1.1/overview.\",\"code\":64}]}'"
       ]
      }
     ],
     "prompt_number": 306
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#### \uc870\uae08\ub9cc \uc218\uace0\ub97c \ud558\uba74 \ud3c9\ubc94\ud55c \uc6f9 API\ub97c \uc704\ud55c \uace0\uc218\uc900\uc758 \uc778\ud130\ud398\uc774\uc2a4\ub97c \ub9cc\ub4e4\uc5b4\uc11c DataFrame\uc5d0 \uc800\uc7a5\ud558\uace0 \uc27d\uac8c \ubd84\uc11d \uc791\uc5c5 \uc218\ud589 \uac00\ub2a5"
     ]
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "6.4 \ub370\uc774\ud130\ubca0\uc774\uc2a4\uc640 \ud568\uaed8 \uc0ac\uc6a9\ud558\uae30"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "- \ub300\ubd80\ubd84\uc758 \uc560\ud50c\ub9ac\ucf00\uc774\uc158\uc740 \ud14d\uc2a4\ud2b8 \ud30c\uc77c\uc5d0\uc11c \ub370\uc774\ud130\ub97c \uc77d\uc5b4\uc624\uc9c0 \uc54a\uc74c\n",
      "- \uc65c\ub0d0\ud558\uba74 \ub300\uc6a9\ub7c9\uc758 \ub370\uc774\ud130\ub97c \uc800\uc7a5\ud558\uae30\uc5d0 \ud14d\uc2a4\ud2b8 \ud30c\uc77c\uc740 \uc0c1\ub2f9\ud788 \ube44\ud6a8\uc728\uc801\n",
      "- SQL \uae30\ubc18\uc758 \uad00\uacc4\ud615 \ub370\uc774\ud130 \ubca0\uc774\uc2a4\uac00 \ub9ce\uc774 \uc0ac\uc6a9\ub428. MySql \uac19\uc740\n",
      "- \ucd5c\uadfc \uc720\uba85\ud574\uc9c4 NoSQL\uc774\ub77c \ubd88\ub9ac\ub294 \ube44 SQL \uae30\ubc18\uc758 \ub370\uc774\ud130\ubca0\uc774\uc2a4\ub3c4 \ub9ce\uc774 \uc0ac\uc6a9\ub428\n",
      "- SQL vs NoSQL\uc740 \uc11c\ub85c \uac01\uac01\uc758 \uc7a5\uc810\uc744 \ud30c\uc545\ud558\uace0 \uc790\uc2e0\uc758 \uc5c5\ubb34\uc5d0 \ub9de\ub294 DB\ub97c \uc120\ud0dd\n",
      "- SQL\uc5d0\uc11c \ub370\uc774\ud130\ub97c \uc77d\uc5b4\uc640\uc11c DataFrame\uc5d0 \uc800\uc7a5\ud558\ub294 \ubc29\ubc95\uc740 \uaf64 \uc9c1\uad00\uc801"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import sqlite3\n",
      "\n",
      "query = \"\"\"\n",
      "CREATE TABLE test\n",
      "(a VARCHAR(20), b VARCHAR(20),\n",
      "c REAL, d INTEGER\n",
      ");\"\"\"\n",
      "\n",
      "con = sqlite3.connect(':memory:')\n",
      "con.execute(query)\n",
      "con.commit()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 307
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "data = [('Atlanta', 'Georgia', 1.25, 6),\n",
      "        ('Tallahassee', 'Florida', 2.6, 3),\n",
      "        ('Sacramento', 'California', 1.7, 5)]\n",
      "\n",
      "stmt = \"INSERT INTO test VALUES(?, ?, ?, ?)\"\n",
      "\n",
      "con.executemany(stmt, data)\n",
      "con.commit()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 308
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "\ub300\ubd80\ubd84\uc758 \ud30c\uc774\uc36c SQL \ub4dc\ub77c\uc774\ubc84(PyODBC, psycopg2, MySQLdb, pymssql \ub4f1)\ub294 \ud14c\uc774\ube14\uc5d0 \ub300\ud574 select \ucffc\ub9ac\ub97c \uc218\ud589\ud558\uba74 \ud29c\ud50c \ub9ac\uc2a4\ud2b8\ub97c \ubc18\ud658\ud55c\ub2e4"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cursor = con.execute('select * from test')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 309
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "rows = cursor.fetchall()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 310
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "rows"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 311,
       "text": [
        "[(u'Atlanta', u'Georgia', 1.25, 6),\n",
        " (u'Tallahassee', u'Florida', 2.6, 3),\n",
        " (u'Sacramento', u'California', 1.7, 5)]"
       ]
      }
     ],
     "prompt_number": 311
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "\ubc18\ud658\ub41c \ud29c\ud50c \ub9ac\uc2a4\ud2b8\ub97c DataFrame \uc0dd\uc131\uc790\uc5d0 \ubc14\ub85c \uc804\ub2ec\ud574\ub3c4 \ub418\uc9c0\ub9cc \uce7c\ub7fc\uc758 \uc774\ub984\uc744 \uc9c0\uc815\ud574\uc8fc\uba74 \ub354 \ud3b8\ud558\ub2e4. [cursor\uc758 description](https://code.google.com/p/pyodbc/wiki/Cursor) \uc18d\uc131\uc744 \ud65c\uc6a9\ud558\uc790.\n",
      "\n",
      "This read-only attribute is a list of 7-item tuples, each containing (name, type_code, display_size, internal_size, precision, scale, null_ok). pyodbc only provides values for name, type_code, internal_size, and null_ok. The other values are set to None.\n",
      "\n",
      "This attribute will be None for operations that do not return rows or if one of the execute methods has not been called.\n",
      "\n",
      "The type_code member is the class type used to create the Python objects when reading rows. For example, a varchar column's type will be str.\n",
      "\n",
      "#### cursor.desccription \uc774 \uc544\uc9c1\ub3c4 \ubb54\uc9c0 \uc798 \ubaa8\ub974\uaca0\ub2e4. \uc65c a, b, c, d\ub85c \uc815\ud574\uc9c4 \uac83\ub3c4 \uc798 \ubaa8\ub974\uaca0\uace0.."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cursor.description?"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 312
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "    Type:        tuple\n",
      "    String form: (('a', None, None, None, None, None, None), ('b', None, None, None, None, None, None), ('c', None, None, None, None, None, None), ('d', None, None, None, None, None, None))\n",
      "    Length:      4\n",
      "    Docstring:\n",
      "    tuple() -> empty tuple\n",
      "    tuple(iterable) -> tuple initialized from iterable's items\n",
      "\n",
      "    If the argument is a tuple, the return value is the same object."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cursor.description"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 313,
       "text": [
        "(('a', None, None, None, None, None, None),\n",
        " ('b', None, None, None, None, None, None),\n",
        " ('c', None, None, None, None, None, None),\n",
        " ('d', None, None, None, None, None, None))"
       ]
      }
     ],
     "prompt_number": 313
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# cursor.description\uc744 \uc5ec\ub7ec\uac1c \ubc1b\uc544\uc11c 0\ubc88\uc9f8 \uac12\ub4e4\uc744 zip\uc73c\ub85c \ubb36\ub294\ub2e4.\n",
      "zip(*cursor.description)[0]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 314,
       "text": [
        "('a', 'b', 'c', 'd')"
       ]
      }
     ],
     "prompt_number": 314
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "DataFrame(rows, columns=zip(*cursor.description)[0])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>a</th>\n",
        "      <th>b</th>\n",
        "      <th>c</th>\n",
        "      <th>d</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>     Atlanta</td>\n",
        "      <td>    Georgia</td>\n",
        "      <td> 1.25</td>\n",
        "      <td> 6</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> Tallahassee</td>\n",
        "      <td>    Florida</td>\n",
        "      <td> 2.60</td>\n",
        "      <td> 3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>  Sacramento</td>\n",
        "      <td> California</td>\n",
        "      <td> 1.70</td>\n",
        "      <td> 5</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 315,
       "text": [
        "             a           b     c  d\n",
        "0      Atlanta     Georgia  1.25  6\n",
        "1  Tallahassee     Florida  2.60  3\n",
        "2   Sacramento  California  1.70  5"
       ]
      }
     ],
     "prompt_number": 315
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "DataFrame(rows, columns=zip(*cursor.description)[1])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>None</th>\n",
        "      <th>None</th>\n",
        "      <th>None</th>\n",
        "      <th>None</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>     Atlanta</td>\n",
        "      <td>    Georgia</td>\n",
        "      <td> 1.25</td>\n",
        "      <td> 6</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> Tallahassee</td>\n",
        "      <td>    Florida</td>\n",
        "      <td> 2.60</td>\n",
        "      <td> 3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>  Sacramento</td>\n",
        "      <td> California</td>\n",
        "      <td> 1.70</td>\n",
        "      <td> 5</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 316,
       "text": [
        "           NaN         NaN   NaN  NaN\n",
        "0      Atlanta     Georgia  1.25    6\n",
        "1  Tallahassee     Florida  2.60    3\n",
        "2   Sacramento  California  1.70    5"
       ]
      }
     ],
     "prompt_number": 316
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# readonly attribute \ub780\ub2e4.\n",
      "# \ub09c cursor.description\uc744 \uc218\uc815\ud574\uc11c \ub0b4\uac00 \uc6d0\ud558\ub294 \uceec\ub7fc\uac12\uc73c\ub85c \ubcc0\uacbd\ud558\ub824\uace0 \ud588\ub294\ub370..\n",
      "# \uadf8\ub7fc \uc5b4\ub5bb\uac8c \ubcc0\uacbd\uc744 \ud574\uc57c\ud558\uc9c0?\n",
      "cursor.description = '1'"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "ename": "TypeError",
       "evalue": "readonly attribute",
       "output_type": "pyerr",
       "traceback": [
        "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
        "\u001b[0;32m<ipython-input-317-4bf5306f81d9>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0;31m# \ub09c cursor.description\uc744 \uc218\uc815\ud574\uc11c \ub0b4\uac00 \uc6d0\ud558\ub294 \uceec\ub7fc\uac12\uc73c\ub85c \ubcc0\uacbd\ud558\ub824\uace0 \ud588\ub294\ub370..\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;31m# \uadf8\ub7fc \uc5b4\ub5bb\uac8c \ubcc0\uacbd\uc744 \ud574\uc57c\ud558\uc9c0?\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mcursor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdescription\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'1'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
        "\u001b[0;31mTypeError\u001b[0m: readonly attribute"
       ]
      }
     ],
     "prompt_number": 317
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# \uadf8\ub0e5 column\uc5d0 \ub0b4\uac00 \uc4f0\uace0 \uc2f6\uc740\uac83 \uc815\ud558\uba74 \ub418\ub124..\n",
      "DataFrame(rows, columns=['country', 'state', 'grade1', 'grade2'])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>country</th>\n",
        "      <th>state</th>\n",
        "      <th>grade1</th>\n",
        "      <th>grade2</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>     Atlanta</td>\n",
        "      <td>    Georgia</td>\n",
        "      <td> 1.25</td>\n",
        "      <td> 6</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> Tallahassee</td>\n",
        "      <td>    Florida</td>\n",
        "      <td> 2.60</td>\n",
        "      <td> 3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>  Sacramento</td>\n",
        "      <td> California</td>\n",
        "      <td> 1.70</td>\n",
        "      <td> 5</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 318,
       "text": [
        "       country       state  grade1  grade2\n",
        "0      Atlanta     Georgia    1.25       6\n",
        "1  Tallahassee     Florida    2.60       3\n",
        "2   Sacramento  California    1.70       5"
       ]
      }
     ],
     "prompt_number": 318
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "- \ub370\uc774\ud130\ubca0\uc774\uc2a4\uc5d0 \ucffc\ub9ac\ub97c \ubcf4\ub0b4\ub824\uace0 \ub9e4\ubc88 \uc774\ub807\uac8c \ud558\ub294\uac74 \ub108\ubb34 \uadc0\ucc2e\uc74c\n",
      "- pandas.io.sql \ubaa8\ub4c8\uc758 read_frame \ud568\uc218\ub97c \uc774\uc6a9\ud558\uba74 \uac04\ud3b8\ud558\uac8c \ud574\uacb0\n",
      "- \uadf8\ub0e5 select \ucffc\ub9ac\ubb38\uacfc \ub370\uc774\ud130 \ubca0\uc774\uc2a4 \uc5f0\uacb0 \uac1d\uccb4(con)\ub9cc \ub118\uae30\uba74 \ub41c\ub2e4"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import pandas.io.sql as sql"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 319
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "sql.read_frame('select * from test', con)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>a</th>\n",
        "      <th>b</th>\n",
        "      <th>c</th>\n",
        "      <th>d</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>     Atlanta</td>\n",
        "      <td>    Georgia</td>\n",
        "      <td> 1.25</td>\n",
        "      <td> 6</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> Tallahassee</td>\n",
        "      <td>    Florida</td>\n",
        "      <td> 2.60</td>\n",
        "      <td> 3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>  Sacramento</td>\n",
        "      <td> California</td>\n",
        "      <td> 1.70</td>\n",
        "      <td> 5</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 320,
       "text": [
        "             a           b     c  d\n",
        "0      Atlanta     Georgia  1.25  6\n",
        "1  Tallahassee     Florida  2.60  3\n",
        "2   Sacramento  California  1.70  5"
       ]
      }
     ],
     "prompt_number": 320
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "6.4.1 MongoDB\uc5d0 \ub370\uc774\ud130 \uc800\uc7a5\ud558\uace0 \ubd88\ub7ec\uc624\uae30"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "- NoSQL \ub370\uc774\ud130\ubca0\uc774\uc2a4\ub294 \ub9e4\uc6b0 \ub2e4\uc591\ud55c \ud615\ud0dc\n",
      "- \ubc84\ud074\ub9acDB\ub098 \ub3c4\ucfc4\uce90\ube44\ub2db \uac19\uc740 \uac83\uc740 \uc0ac\uc804\ucc98\ub7fc \ud0a4-\uac12\uc744 \uc800\uc7a5\ud558\uae30\ub3c4 \ud558\uace0\n",
      "- \ub610 \ub2e4\ub978 \uac83\uc740 \uae30\ubcf8 \uc800\uc7a5\uc18c\ub294 \uc0ac\uc804 \uac19\uc740 \uac1d\uccb4\ub97c \uc0ac\uc6a9\ud558\uba70 \ubb38\uc11c \uae30\ubc18\uc73c\ub85c \ub370\uc774\ud130\ub97c \uc800\uc7a5\ud558\uae30\ub3c4 \ud55c\ub2e4.\n",
      "- \uc774 \ucc45\uc5d0\uc11c\ub294 [MongoDB](http://mongodb.org)\ub97c \uc608\uc81c\ub85c \uc120\ud0dd\n",
      "- MongoDB \uc11c\ubc84\ub97c \ub85c\uceec\uc5d0 \uc124\uce58\ud558\uace0 \uacf5\uc2dd \ub4dc\ub77c\uc774\ubc84\uc778 pymongo\ub97c \uc0ac\uc6a9\ud574\uc11c \uae30\ubcf8 \ud3ec\ud2b8\ub85c \ubc88\ud638\ub85c \uc5f0\uacb0\n",
      "- \ud604\uc7ac \ud544\uc790\uc758 \ucef4\ud4e8\ud130\uc5d0\ub294 \uc544\uc9c1 \uc124\uce58\ud558\uc9c0 \uc54a\uc74c. \uc774\ub7f0 \ud615\uc2dd\uc73c\ub85c \ud55c\ub2e4\ub294 \ub290\ub08c\ub9cc \uac00\uc9c0\uc790"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "-------\n",
      "\n",
      "### \uc2a4\ud130\ub514\ud560 \ub54c \ub098\uc654\ub358 \uc8fc\uc758\uc0ac\ud56d\ub4e4\n",
      "\n",
      "- \ud604\uc5c5\uc5d0\uc11c\ub294 read_csv\ub098 read_table\uc744 \ub9ce\uc774 \uc4f4\ub2e4.\n",
      "- \uc5d1\uc140 -> csv\ub85c \ubcc0\ud658 \ud6c4 \uc0ac\uc6a9\ud55c\ub2e4.\n",
      "- \uc65c\ub0d0\ud558\uba74 csv\uc5d0 \ud6e8\uc52c \uac15\ub825\ud55c \ub77c\uc774\ube0c\ub7ec\ub9ac\ub4e4\uc774 \uc788\ub294\ub370 \uad73\uc774 \uc5d1\uc140\uc744 \ud65c\uc6a9\ud560 \ud544\uc694\uac00 \uc5c6\ub2e4. \ub610\ud55c \uc5d1\uc140\uc740 \ubcf4\uc774\uc9c0 \uc54a\ub294 \uac12\ub4e4\uc774 \ub9ce\uc544 \uc624\ub958\uc758 \uc8fc\ubc94\n",
      "- sep\uc5d0 Regular Expression\uc744 \uc0ac\uc6a9\ud560 \uc218 \uc788\ub2e4. \n",
      "- [\uc5d1\uc140\uc5d0\uc11c XML \ub370\uc774\ud130 \uac00\uc838\uc624\uae30](http://office.microsoft.com/ko-kr/excel-help/HP010206405.aspx#BMimport_an_xml_file_as_an_xml_list_wit)\n",
      "\n",
      "------"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "## [About my IPython in github](https://github.com/re4lfl0w/ipython)"
     ]
    }
   ],
   "metadata": {}
  }
 ]
}