{
"metadata": {
"name": "",
"signature": "sha256:3b7c0da066835df2d372d5f622fbdba586eba2c48933fc5c4abb3750e5aaa882"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"data = \"\"\"type,price,quantity\n",
"Apples\n",
"Cortland,0.30,24\n",
"Red Delicious,0.40,24\n",
"Oranges\n",
"Navel,0.50,12\n",
"\"\"\""
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import petl.interactive as etl\n",
"from petl.io import StringSource"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"tbl1 = (etl\n",
" .fromcsv(StringSource(data))\n",
")\n",
"tbl1"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"
\r\n",
"\r\n",
"\r\n",
"type | \r\n",
"price | \r\n",
"quantity | \r\n",
"
\r\n",
"\r\n",
"\r\n",
"\r\n",
"Apples | \r\n",
"
\r\n",
"\r\n",
"Cortland | \r\n",
"0.30 | \r\n",
"24 | \r\n",
"
\r\n",
"\r\n",
"Red Delicious | \r\n",
"0.40 | \r\n",
"24 | \r\n",
"
\r\n",
"\r\n",
"Oranges | \r\n",
"
\r\n",
"\r\n",
"Navel | \r\n",
"0.50 | \r\n",
"12 | \r\n",
"
\r\n",
"\r\n",
"
\r\n"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 3,
"text": [
"+-----------------+---------+------------+\n",
"| 'type' | 'price' | 'quantity' |\n",
"+=================+=========+============+\n",
"| 'Apples' | | |\n",
"+-----------------+---------+------------+\n",
"| 'Cortland' | '0.30' | '24' |\n",
"+-----------------+---------+------------+\n",
"| 'Red Delicious' | '0.40' | '24' |\n",
"+-----------------+---------+------------+\n",
"| 'Oranges' | | |\n",
"+-----------------+---------+------------+\n",
"| 'Navel' | '0.50' | '12' |\n",
"+-----------------+---------+------------+"
]
}
],
"prompt_number": 3
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"Option 1 - using existing petl functions"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def make_room_for_category(row):\n",
" if len(row) == 1:\n",
" return (row[0], 'X', 'X', 'X')\n",
" else:\n",
" return (None,) + tuple(row)\n",
"\n",
"tbl2 = tbl1.rowmap(make_room_for_category, fields=['category', 'type', 'price', 'quantity'])\n",
"tbl2"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\r\n",
"\r\n",
"\r\n",
"category | \r\n",
"type | \r\n",
"price | \r\n",
"quantity | \r\n",
"
\r\n",
"\r\n",
"\r\n",
"\r\n",
"Apples | \r\n",
"X | \r\n",
"X | \r\n",
"X | \r\n",
"
\r\n",
"\r\n",
"None | \r\n",
"Cortland | \r\n",
"0.30 | \r\n",
"24 | \r\n",
"
\r\n",
"\r\n",
"None | \r\n",
"Red Delicious | \r\n",
"0.40 | \r\n",
"24 | \r\n",
"
\r\n",
"\r\n",
"Oranges | \r\n",
"X | \r\n",
"X | \r\n",
"X | \r\n",
"
\r\n",
"\r\n",
"None | \r\n",
"Navel | \r\n",
"0.50 | \r\n",
"12 | \r\n",
"
\r\n",
"\r\n",
"
\r\n"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 4,
"text": [
"+------------+-----------------+---------+------------+\n",
"| 'category' | 'type' | 'price' | 'quantity' |\n",
"+============+=================+=========+============+\n",
"| 'Apples' | 'X' | 'X' | 'X' |\n",
"+------------+-----------------+---------+------------+\n",
"| None | 'Cortland' | '0.30' | '24' |\n",
"+------------+-----------------+---------+------------+\n",
"| None | 'Red Delicious' | '0.40' | '24' |\n",
"+------------+-----------------+---------+------------+\n",
"| 'Oranges' | 'X' | 'X' | 'X' |\n",
"+------------+-----------------+---------+------------+\n",
"| None | 'Navel' | '0.50' | '12' |\n",
"+------------+-----------------+---------+------------+"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"tbl3 = tbl2.filldown()\n",
"tbl3"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\r\n",
"\r\n",
"\r\n",
"category | \r\n",
"type | \r\n",
"price | \r\n",
"quantity | \r\n",
"
\r\n",
"\r\n",
"\r\n",
"\r\n",
"Apples | \r\n",
"X | \r\n",
"X | \r\n",
"X | \r\n",
"
\r\n",
"\r\n",
"Apples | \r\n",
"Cortland | \r\n",
"0.30 | \r\n",
"24 | \r\n",
"
\r\n",
"\r\n",
"Apples | \r\n",
"Red Delicious | \r\n",
"0.40 | \r\n",
"24 | \r\n",
"
\r\n",
"\r\n",
"Oranges | \r\n",
"X | \r\n",
"X | \r\n",
"X | \r\n",
"
\r\n",
"\r\n",
"Oranges | \r\n",
"Navel | \r\n",
"0.50 | \r\n",
"12 | \r\n",
"
\r\n",
"\r\n",
"
\r\n"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"text": [
"+------------+-----------------+---------+------------+\n",
"| 'category' | 'type' | 'price' | 'quantity' |\n",
"+============+=================+=========+============+\n",
"| 'Apples' | 'X' | 'X' | 'X' |\n",
"+------------+-----------------+---------+------------+\n",
"| 'Apples' | 'Cortland' | '0.30' | '24' |\n",
"+------------+-----------------+---------+------------+\n",
"| 'Apples' | 'Red Delicious' | '0.40' | '24' |\n",
"+------------+-----------------+---------+------------+\n",
"| 'Oranges' | 'X' | 'X' | 'X' |\n",
"+------------+-----------------+---------+------------+\n",
"| 'Oranges' | 'Navel' | '0.50' | '12' |\n",
"+------------+-----------------+---------+------------+"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"tbl4 = tbl3.ne('type', 'X')\n",
"tbl4"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\r\n",
"\r\n",
"\r\n",
"category | \r\n",
"type | \r\n",
"price | \r\n",
"quantity | \r\n",
"
\r\n",
"\r\n",
"\r\n",
"\r\n",
"Apples | \r\n",
"Cortland | \r\n",
"0.30 | \r\n",
"24 | \r\n",
"
\r\n",
"\r\n",
"Apples | \r\n",
"Red Delicious | \r\n",
"0.40 | \r\n",
"24 | \r\n",
"
\r\n",
"\r\n",
"Oranges | \r\n",
"Navel | \r\n",
"0.50 | \r\n",
"12 | \r\n",
"
\r\n",
"\r\n",
"
\r\n"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 6,
"text": [
"+------------+-----------------+---------+------------+\n",
"| 'category' | 'type' | 'price' | 'quantity' |\n",
"+============+=================+=========+============+\n",
"| 'Apples' | 'Cortland' | '0.30' | '24' |\n",
"+------------+-----------------+---------+------------+\n",
"| 'Apples' | 'Red Delicious' | '0.40' | '24' |\n",
"+------------+-----------------+---------+------------+\n",
"| 'Oranges' | 'Navel' | '0.50' | '12' |\n",
"+------------+-----------------+---------+------------+"
]
}
],
"prompt_number": 6
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"Option 2 - custom transformer"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"class CustomTransformer(object):\n",
" \n",
" def __init__(self, source):\n",
" self.source = source\n",
" \n",
" def __iter__(self):\n",
" it = iter(self.source)\n",
" \n",
" # construct new header\n",
" source_fields = it.next()\n",
" out_fields = ('category',) + tuple(source_fields)\n",
" yield out_fields\n",
" \n",
" # transform data\n",
" current_category = None\n",
" for row in it:\n",
" if len(row) == 1:\n",
" current_category = row[0]\n",
" else:\n",
" yield (current_category,) + tuple(row)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"tbl5 = CustomTransformer(tbl1)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# just so it formats nicely as HTML in the notebook...\n",
"etl.wrap(tbl5)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\r\n",
"\r\n",
"\r\n",
"category | \r\n",
"type | \r\n",
"price | \r\n",
"quantity | \r\n",
"
\r\n",
"\r\n",
"\r\n",
"\r\n",
"Apples | \r\n",
"Cortland | \r\n",
"0.30 | \r\n",
"24 | \r\n",
"
\r\n",
"\r\n",
"Apples | \r\n",
"Red Delicious | \r\n",
"0.40 | \r\n",
"24 | \r\n",
"
\r\n",
"\r\n",
"Oranges | \r\n",
"Navel | \r\n",
"0.50 | \r\n",
"12 | \r\n",
"
\r\n",
"\r\n",
"
\r\n"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 9,
"text": [
"+------------+-----------------+---------+------------+\n",
"| 'category' | 'type' | 'price' | 'quantity' |\n",
"+============+=================+=========+============+\n",
"| 'Apples' | 'Cortland' | '0.30' | '24' |\n",
"+------------+-----------------+---------+------------+\n",
"| 'Apples' | 'Red Delicious' | '0.40' | '24' |\n",
"+------------+-----------------+---------+------------+\n",
"| 'Oranges' | 'Navel' | '0.50' | '12' |\n",
"+------------+-----------------+---------+------------+"
]
}
],
"prompt_number": 9
}
],
"metadata": {}
}
]
}