{ "metadata": { "name": "", "signature": "sha256:e33329756d366a4e9d0643f56d7eaab5b311404c9a5bae0c66dc53fc988c46b2" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import petl.interactive as etl\n", "etl.__version__" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 1, "text": [ "'0.26'" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "table1 = (('name', 'kids'),\n", " ('John', '1'),\n", " ('Jenny', '2'),\n", " ('James', '2'),\n", " ('Joan', '4'))\n", "\n", "table2 = (('name', 'age'),\n", " ('John', '33'),\n", " ('Jenni', ''),\n", " ('Jomes', '20'),\n", " ('Joan', ''))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "from fuzzywuzzy import fuzz" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "table3 = (etl\n", " .wrap(table1)\n", " .prefixheader('l_')\n", " .crossjoin(etl.wrap(table2).prefixheader('r_'))\n", " .addfield('fuzz', lambda row: fuzz.partial_ratio(row.l_name, row.r_name))\n", " .selectge('fuzz', 80)\n", ")\n", "table3" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
l_name | \r\n", "l_kids | \r\n", "r_name | \r\n", "r_age | \r\n", "fuzz | \r\n", "
---|---|---|---|---|
John | \r\n", "1 | \r\n", "John | \r\n", "33 | \r\n", "100 | \r\n", "
Jenny | \r\n", "2 | \r\n", "Jenni | \r\n", "\r\n", " | 80 | \r\n", "
James | \r\n", "2 | \r\n", "Jomes | \r\n", "20 | \r\n", "80 | \r\n", "
Joan | \r\n", "4 | \r\n", "Joan | \r\n", "\r\n", " | 100 | \r\n", "