{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Resolving conflicts"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"There is currently no in-built support for resolving conflicts within a table using petl, however this notebook gives an example of a workaround strategy."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"sys.version_info(major=3, minor=4, micro=2, releaselevel='final', serial=0)"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import sys\n",
"sys.version_info"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'1.0.6'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import petl as etl\n",
"etl.__version__"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"\n",
"id | \n",
"name | \n",
"value | \n",
"age | \n",
"master_age | \n",
"
\n",
"\n",
"\n",
"\n",
"1 | \n",
"Tressa | \n",
"1203 | \n",
"42 | \n",
"42 | \n",
"
\n",
"\n",
"2 | \n",
"Phil | \n",
"23997 | \n",
"None | \n",
"None | \n",
"
\n",
"\n",
"3 | \n",
"Darius | \n",
"None | \n",
"78 | \n",
"78 | \n",
"
\n",
"\n",
"4 | \n",
"Delinda | \n",
"96501 | \n",
"64 | \n",
"64 | \n",
"
\n",
"\n",
"5 | \n",
"Adelina | \n",
"96508 | \n",
"50 | \n",
"50 | \n",
"
\n",
"\n",
"
\n"
],
"text/plain": [
"+-----+-----------+---------+------+------------+\n",
"| id | name | value | age | master_age |\n",
"+=====+===========+=========+======+============+\n",
"| '1' | 'Tressa' | '1203' | '42' | '42' |\n",
"+-----+-----------+---------+------+------------+\n",
"| '2' | 'Phil' | '23997' | None | None |\n",
"+-----+-----------+---------+------+------------+\n",
"| '3' | 'Darius' | None | '78' | '78' |\n",
"+-----+-----------+---------+------+------------+\n",
"| '4' | 'Delinda' | '96501' | '64' | '64' |\n",
"+-----+-----------+---------+------+------------+\n",
"| '5' | 'Adelina' | '96508' | '50' | '50' |\n",
"+-----+-----------+---------+------+------------+"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_master = b\"\"\"id name value age\n",
" 1 Tressa 1203 42\n",
" 2 Phil 23997 \n",
" 3 Darius . 78\n",
" 4 Delinda 96501 64\n",
" 5 Adelina 96508 50\n",
"\"\"\"\n",
"tbl_master = (\n",
" etl\n",
" .fromtext(etl.MemorySource(data_master))\n",
" .split('lines', r'\\s+')\n",
" .skip(1)\n",
" .replaceall('.', None)\n",
" .addfield('master_age', lambda row: row.age)\n",
")\n",
"tbl_master"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"\n",
"id | \n",
"name | \n",
"value | \n",
"age | \n",
"
\n",
"\n",
"\n",
"\n",
"2 | \n",
"Phil | \n",
"None | \n",
"53 | \n",
"
\n",
"\n",
"3 | \n",
"Darius | \n",
"5000 | \n",
"76 | \n",
"
\n",
"\n",
"
\n"
],
"text/plain": [
"+-----+----------+--------+------+\n",
"| id | name | value | age |\n",
"+=====+==========+========+======+\n",
"| '2' | 'Phil' | None | '53' |\n",
"+-----+----------+--------+------+\n",
"| '3' | 'Darius' | '5000' | '76' |\n",
"+-----+----------+--------+------+"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_other = b\"\"\"id name value age\n",
" 2 Phil . 53\n",
" 3 Darius 5000 76\n",
"\"\"\"\n",
"tbl_other = (\n",
" etl\n",
" .fromtext(etl.MemorySource(data_other))\n",
" .split('lines', r'\\s+')\n",
" .skip(1)\n",
" .replaceall('.', None)\n",
")\n",
"tbl_other"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"\n",
"id | \n",
"name | \n",
"value | \n",
"age | \n",
"master_age | \n",
"
\n",
"\n",
"\n",
"\n",
"1 | \n",
"Tressa | \n",
"1203 | \n",
"42 | \n",
"42 | \n",
"
\n",
"\n",
"2 | \n",
"Phil | \n",
"23997 | \n",
"53 | \n",
"None | \n",
"
\n",
"\n",
"3 | \n",
"Darius | \n",
"5000 | \n",
"Conflict({'76', '78'}) | \n",
"78 | \n",
"
\n",
"\n",
"4 | \n",
"Delinda | \n",
"96501 | \n",
"64 | \n",
"64 | \n",
"
\n",
"\n",
"5 | \n",
"Adelina | \n",
"96508 | \n",
"50 | \n",
"50 | \n",
"
\n",
"\n",
"
\n"
],
"text/plain": [
"+-----+-----------+---------+------------------------+------------+\n",
"| id | name | value | age | master_age |\n",
"+=====+===========+=========+========================+============+\n",
"| '1' | 'Tressa' | '1203' | '42' | '42' |\n",
"+-----+-----------+---------+------------------------+------------+\n",
"| '2' | 'Phil' | '23997' | '53' | None |\n",
"+-----+-----------+---------+------------------------+------------+\n",
"| '3' | 'Darius' | '5000' | Conflict({'76', '78'}) | '78' |\n",
"+-----+-----------+---------+------------------------+------------+\n",
"| '4' | 'Delinda' | '96501' | '64' | '64' |\n",
"+-----+-----------+---------+------------------------+------------+\n",
"| '5' | 'Adelina' | '96508' | '50' | '50' |\n",
"+-----+-----------+---------+------------------------+------------+"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tbl_merge = etl.merge(tbl_master, tbl_other, key='id')\n",
"tbl_merge"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"\n",
"id | \n",
"name | \n",
"value | \n",
"age | \n",
"
\n",
"\n",
"\n",
"\n",
"1 | \n",
"Tressa | \n",
"1203 | \n",
"42 | \n",
"
\n",
"\n",
"2 | \n",
"Phil | \n",
"23997 | \n",
"53 | \n",
"
\n",
"\n",
"3 | \n",
"Darius | \n",
"5000 | \n",
"78 | \n",
"
\n",
"\n",
"4 | \n",
"Delinda | \n",
"96501 | \n",
"64 | \n",
"
\n",
"\n",
"5 | \n",
"Adelina | \n",
"96508 | \n",
"50 | \n",
"
\n",
"\n",
"
\n"
],
"text/plain": [
"+-----+-----------+---------+------+\n",
"| id | name | value | age |\n",
"+=====+===========+=========+======+\n",
"| '1' | 'Tressa' | '1203' | '42' |\n",
"+-----+-----------+---------+------+\n",
"| '2' | 'Phil' | '23997' | '53' |\n",
"+-----+-----------+---------+------+\n",
"| '3' | 'Darius' | '5000' | '78' |\n",
"+-----+-----------+---------+------+\n",
"| '4' | 'Delinda' | '96501' | '64' |\n",
"+-----+-----------+---------+------+\n",
"| '5' | 'Adelina' | '96508' | '50' |\n",
"+-----+-----------+---------+------+"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tbl_merge_resolved = (\n",
" tbl_merge\n",
" .convert('age', lambda v, row: (row.master_age if isinstance(v, etl.Conflict) else v),\n",
" pass_row=True)\n",
" .cutout('master_age')\n",
")\n",
"tbl_merge_resolved"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}