{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Resolving conflicts" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "There is currently no in-built support for resolving conflicts within a table using petl, however this notebook gives an example of a workaround strategy." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "sys.version_info(major=3, minor=4, micro=2, releaselevel='final', serial=0)" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import sys\n", "sys.version_info" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'1.0.6'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import petl as etl\n", "etl.__version__" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
idnamevalueagemaster_age
1Tressa12034242
2Phil23997NoneNone
3DariusNone7878
4Delinda965016464
5Adelina965085050
\n" ], "text/plain": [ "+-----+-----------+---------+------+------------+\n", "| id | name | value | age | master_age |\n", "+=====+===========+=========+======+============+\n", "| '1' | 'Tressa' | '1203' | '42' | '42' |\n", "+-----+-----------+---------+------+------------+\n", "| '2' | 'Phil' | '23997' | None | None |\n", "+-----+-----------+---------+------+------------+\n", "| '3' | 'Darius' | None | '78' | '78' |\n", "+-----+-----------+---------+------+------------+\n", "| '4' | 'Delinda' | '96501' | '64' | '64' |\n", "+-----+-----------+---------+------+------------+\n", "| '5' | 'Adelina' | '96508' | '50' | '50' |\n", "+-----+-----------+---------+------+------------+" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_master = b\"\"\"id name value age\n", " 1 Tressa 1203 42\n", " 2 Phil 23997 \n", " 3 Darius . 78\n", " 4 Delinda 96501 64\n", " 5 Adelina 96508 50\n", "\"\"\"\n", "tbl_master = (\n", " etl\n", " .fromtext(etl.MemorySource(data_master))\n", " .split('lines', r'\\s+')\n", " .skip(1)\n", " .replaceall('.', None)\n", " .addfield('master_age', lambda row: row.age)\n", ")\n", "tbl_master" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
idnamevalueage
2PhilNone53
3Darius500076
\n" ], "text/plain": [ "+-----+----------+--------+------+\n", "| id | name | value | age |\n", "+=====+==========+========+======+\n", "| '2' | 'Phil' | None | '53' |\n", "+-----+----------+--------+------+\n", "| '3' | 'Darius' | '5000' | '76' |\n", "+-----+----------+--------+------+" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_other = b\"\"\"id name value age\n", " 2 Phil . 53\n", " 3 Darius 5000 76\n", "\"\"\"\n", "tbl_other = (\n", " etl\n", " .fromtext(etl.MemorySource(data_other))\n", " .split('lines', r'\\s+')\n", " .skip(1)\n", " .replaceall('.', None)\n", ")\n", "tbl_other" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
idnamevalueagemaster_age
1Tressa12034242
2Phil2399753None
3Darius5000Conflict({'76', '78'})78
4Delinda965016464
5Adelina965085050
\n" ], "text/plain": [ "+-----+-----------+---------+------------------------+------------+\n", "| id | name | value | age | master_age |\n", "+=====+===========+=========+========================+============+\n", "| '1' | 'Tressa' | '1203' | '42' | '42' |\n", "+-----+-----------+---------+------------------------+------------+\n", "| '2' | 'Phil' | '23997' | '53' | None |\n", "+-----+-----------+---------+------------------------+------------+\n", "| '3' | 'Darius' | '5000' | Conflict({'76', '78'}) | '78' |\n", "+-----+-----------+---------+------------------------+------------+\n", "| '4' | 'Delinda' | '96501' | '64' | '64' |\n", "+-----+-----------+---------+------------------------+------------+\n", "| '5' | 'Adelina' | '96508' | '50' | '50' |\n", "+-----+-----------+---------+------------------------+------------+" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tbl_merge = etl.merge(tbl_master, tbl_other, key='id')\n", "tbl_merge" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
idnamevalueage
1Tressa120342
2Phil2399753
3Darius500078
4Delinda9650164
5Adelina9650850
\n" ], "text/plain": [ "+-----+-----------+---------+------+\n", "| id | name | value | age |\n", "+=====+===========+=========+======+\n", "| '1' | 'Tressa' | '1203' | '42' |\n", "+-----+-----------+---------+------+\n", "| '2' | 'Phil' | '23997' | '53' |\n", "+-----+-----------+---------+------+\n", "| '3' | 'Darius' | '5000' | '78' |\n", "+-----+-----------+---------+------+\n", "| '4' | 'Delinda' | '96501' | '64' |\n", "+-----+-----------+---------+------+\n", "| '5' | 'Adelina' | '96508' | '50' |\n", "+-----+-----------+---------+------+" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tbl_merge_resolved = (\n", " tbl_merge\n", " .convert('age', lambda v, row: (row.master_age if isinstance(v, etl.Conflict) else v),\n", " pass_row=True)\n", " .cutout('master_age')\n", ")\n", "tbl_merge_resolved" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.2" } }, "nbformat": 4, "nbformat_minor": 0 }