{ "metadata": { "name": "", "signature": "sha256:e33329756d366a4e9d0643f56d7eaab5b311404c9a5bae0c66dc53fc988c46b2" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import petl.interactive as etl\n", "etl.__version__" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 1, "text": [ "'0.26'" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "table1 = (('name', 'kids'),\n", " ('John', '1'),\n", " ('Jenny', '2'),\n", " ('James', '2'),\n", " ('Joan', '4'))\n", "\n", "table2 = (('name', 'age'),\n", " ('John', '33'),\n", " ('Jenni', ''),\n", " ('Jomes', '20'),\n", " ('Joan', ''))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "from fuzzywuzzy import fuzz" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "table3 = (etl\n", " .wrap(table1)\n", " .prefixheader('l_')\n", " .crossjoin(etl.wrap(table2).prefixheader('r_'))\n", " .addfield('fuzz', lambda row: fuzz.partial_ratio(row.l_name, row.r_name))\n", " .selectge('fuzz', 80)\n", ")\n", "table3" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "
l_namel_kidsr_namer_agefuzz
John1John33100
Jenny2Jenni80
James2Jomes2080
Joan4Joan100
\r\n" ], "metadata": {}, "output_type": "pyout", "prompt_number": 4, "text": [ "+----------+----------+----------+---------+--------+\n", "| 'l_name' | 'l_kids' | 'r_name' | 'r_age' | 'fuzz' |\n", "+==========+==========+==========+=========+========+\n", "| 'John' | '1' | 'John' | '33' | 100 |\n", "+----------+----------+----------+---------+--------+\n", "| 'Jenny' | '2' | 'Jenni' | '' | 80 |\n", "+----------+----------+----------+---------+--------+\n", "| 'James' | '2' | 'Jomes' | '20' | 80 |\n", "+----------+----------+----------+---------+--------+\n", "| 'Joan' | '4' | 'Joan' | '' | 100 |\n", "+----------+----------+----------+---------+--------+" ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 } ], "metadata": {} } ] }