{ "metadata": { "name": "", "signature": "sha256:de89820c5fab8cef559e00bba60995491ff1479456a94e72176224881326d1d8" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Notes supporting [issue #256](https://github.com/alimanfoo/petl/issues/256)." ] }, { "cell_type": "code", "collapsed": false, "input": [ "import petl.interactive as etl" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "t1 = etl.wrap([['foo', 'bar'], [1, 'a'], [2, 'b']])\n", "t1" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "
foobar
1a
2b
\r\n" ], "metadata": {}, "output_type": "pyout", "prompt_number": 2, "text": [ "+-------+-------+\n", "| 'foo' | 'bar' |\n", "+=======+=======+\n", "| 1 | 'a' |\n", "+-------+-------+\n", "| 2 | 'b' |\n", "+-------+-------+" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "t2 = etl.wrap([['foo', 'bar'], [1, 'a'], [2, 'c']])\n", "t2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "
foobar
1a
2c
\r\n" ], "metadata": {}, "output_type": "pyout", "prompt_number": 3, "text": [ "+-------+-------+\n", "| 'foo' | 'bar' |\n", "+=======+=======+\n", "| 1 | 'a' |\n", "+-------+-------+\n", "| 2 | 'c' |\n", "+-------+-------+" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "t3 = etl.merge(t1, t2, key='foo')\n", "t3" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "
foobar
1a
2Conflict(['c', 'b'])
\r\n" ], "metadata": {}, "output_type": "pyout", "prompt_number": 5, "text": [ "+-------+----------------------+\n", "| 'foo' | 'bar' |\n", "+=======+======================+\n", "| 1 | 'a' |\n", "+-------+----------------------+\n", "| 2 | Conflict(['c', 'b']) |\n", "+-------+----------------------+" ] } ], "prompt_number": 5 }, { "cell_type": "markdown", "metadata": {}, "source": [ "The problem with the above is that you cannot tell from inspecting *t3* alone which conflicting value comes from which source.\n", "\n", "A workaround as suggested by [@pawl](https://github.com/pawl) is to use the [*conflicts()*](http://petl.readthedocs.org/en/latest/#petl.conflicts) function, e.g.: " ] }, { "cell_type": "code", "collapsed": false, "input": [ "t4 = (etl\n", " .cat(\n", " t1.addfield('source', 1),\n", " t2.addfield('source', 2)\n", " )\n", " .conflicts(key='foo', exclude='source')\n", ")\n", "t4" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "
foobarsource
2b1
2c2
\r\n" ], "metadata": {}, "output_type": "pyout", "prompt_number": 9, "text": [ "+-------+-------+----------+\n", "| 'foo' | 'bar' | 'source' |\n", "+=======+=======+==========+\n", "| 2 | 'b' | 1 |\n", "+-------+-------+----------+\n", "| 2 | 'c' | 2 |\n", "+-------+-------+----------+" ] } ], "prompt_number": 9 } ], "metadata": {} } ] }