{ "metadata": { "name": "", "signature": "sha256:3b7c0da066835df2d372d5f622fbdba586eba2c48933fc5c4abb3750e5aaa882" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "data = \"\"\"type,price,quantity\n", "Apples\n", "Cortland,0.30,24\n", "Red Delicious,0.40,24\n", "Oranges\n", "Navel,0.50,12\n", "\"\"\"" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "import petl.interactive as etl\n", "from petl.io import StringSource" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "tbl1 = (etl\n", " .fromcsv(StringSource(data))\n", ")\n", "tbl1" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "
typepricequantity
Apples
Cortland0.3024
Red Delicious0.4024
Oranges
Navel0.5012
\r\n" ], "metadata": {}, "output_type": "pyout", "prompt_number": 3, "text": [ "+-----------------+---------+------------+\n", "| 'type' | 'price' | 'quantity' |\n", "+=================+=========+============+\n", "| 'Apples' | | |\n", "+-----------------+---------+------------+\n", "| 'Cortland' | '0.30' | '24' |\n", "+-----------------+---------+------------+\n", "| 'Red Delicious' | '0.40' | '24' |\n", "+-----------------+---------+------------+\n", "| 'Oranges' | | |\n", "+-----------------+---------+------------+\n", "| 'Navel' | '0.50' | '12' |\n", "+-----------------+---------+------------+" ] } ], "prompt_number": 3 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Option 1 - using existing petl functions" ] }, { "cell_type": "code", "collapsed": false, "input": [ "def make_room_for_category(row):\n", " if len(row) == 1:\n", " return (row[0], 'X', 'X', 'X')\n", " else:\n", " return (None,) + tuple(row)\n", "\n", "tbl2 = tbl1.rowmap(make_room_for_category, fields=['category', 'type', 'price', 'quantity'])\n", "tbl2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "
categorytypepricequantity
ApplesXXX
NoneCortland0.3024
NoneRed Delicious0.4024
OrangesXXX
NoneNavel0.5012
\r\n" ], "metadata": {}, "output_type": "pyout", "prompt_number": 4, "text": [ "+------------+-----------------+---------+------------+\n", "| 'category' | 'type' | 'price' | 'quantity' |\n", "+============+=================+=========+============+\n", "| 'Apples' | 'X' | 'X' | 'X' |\n", "+------------+-----------------+---------+------------+\n", "| None | 'Cortland' | '0.30' | '24' |\n", "+------------+-----------------+---------+------------+\n", "| None | 'Red Delicious' | '0.40' | '24' |\n", "+------------+-----------------+---------+------------+\n", "| 'Oranges' | 'X' | 'X' | 'X' |\n", "+------------+-----------------+---------+------------+\n", "| None | 'Navel' | '0.50' | '12' |\n", "+------------+-----------------+---------+------------+" ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "tbl3 = tbl2.filldown()\n", "tbl3" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "
categorytypepricequantity
ApplesXXX
ApplesCortland0.3024
ApplesRed Delicious0.4024
OrangesXXX
OrangesNavel0.5012
\r\n" ], "metadata": {}, "output_type": "pyout", "prompt_number": 5, "text": [ "+------------+-----------------+---------+------------+\n", "| 'category' | 'type' | 'price' | 'quantity' |\n", "+============+=================+=========+============+\n", "| 'Apples' | 'X' | 'X' | 'X' |\n", "+------------+-----------------+---------+------------+\n", "| 'Apples' | 'Cortland' | '0.30' | '24' |\n", "+------------+-----------------+---------+------------+\n", "| 'Apples' | 'Red Delicious' | '0.40' | '24' |\n", "+------------+-----------------+---------+------------+\n", "| 'Oranges' | 'X' | 'X' | 'X' |\n", "+------------+-----------------+---------+------------+\n", "| 'Oranges' | 'Navel' | '0.50' | '12' |\n", "+------------+-----------------+---------+------------+" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "tbl4 = tbl3.ne('type', 'X')\n", "tbl4" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "
categorytypepricequantity
ApplesCortland0.3024
ApplesRed Delicious0.4024
OrangesNavel0.5012
\r\n" ], "metadata": {}, "output_type": "pyout", "prompt_number": 6, "text": [ "+------------+-----------------+---------+------------+\n", "| 'category' | 'type' | 'price' | 'quantity' |\n", "+============+=================+=========+============+\n", "| 'Apples' | 'Cortland' | '0.30' | '24' |\n", "+------------+-----------------+---------+------------+\n", "| 'Apples' | 'Red Delicious' | '0.40' | '24' |\n", "+------------+-----------------+---------+------------+\n", "| 'Oranges' | 'Navel' | '0.50' | '12' |\n", "+------------+-----------------+---------+------------+" ] } ], "prompt_number": 6 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Option 2 - custom transformer" ] }, { "cell_type": "code", "collapsed": false, "input": [ "class CustomTransformer(object):\n", " \n", " def __init__(self, source):\n", " self.source = source\n", " \n", " def __iter__(self):\n", " it = iter(self.source)\n", " \n", " # construct new header\n", " source_fields = it.next()\n", " out_fields = ('category',) + tuple(source_fields)\n", " yield out_fields\n", " \n", " # transform data\n", " current_category = None\n", " for row in it:\n", " if len(row) == 1:\n", " current_category = row[0]\n", " else:\n", " yield (current_category,) + tuple(row)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "tbl5 = CustomTransformer(tbl1)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "# just so it formats nicely as HTML in the notebook...\n", "etl.wrap(tbl5)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "
categorytypepricequantity
ApplesCortland0.3024
ApplesRed Delicious0.4024
OrangesNavel0.5012
\r\n" ], "metadata": {}, "output_type": "pyout", "prompt_number": 9, "text": [ "+------------+-----------------+---------+------------+\n", "| 'category' | 'type' | 'price' | 'quantity' |\n", "+============+=================+=========+============+\n", "| 'Apples' | 'Cortland' | '0.30' | '24' |\n", "+------------+-----------------+---------+------------+\n", "| 'Apples' | 'Red Delicious' | '0.40' | '24' |\n", "+------------+-----------------+---------+------------+\n", "| 'Oranges' | 'Navel' | '0.50' | '12' |\n", "+------------+-----------------+---------+------------+" ] } ], "prompt_number": 9 } ], "metadata": {} } ] }