{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Introduction"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This IPython notebook illustrates how to remove features from feature table.\n",
    "First, we need to import py_entitymatching package and other libraries as follows:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import py_entitymatching package\n",
    "import py_entitymatching as em\n",
    "import os\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Then, read the (sample) input tables for blocking purposes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Get the datasets directory\n",
    "datasets_dir = em.get_install_path() + os.sep + 'datasets'\n",
    "\n",
    "# Get the paths of the input tables\n",
    "path_A = datasets_dir + os.sep + 'person_table_A.csv'\n",
    "path_B = datasets_dir + os.sep + 'person_table_B.csv'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read the CSV files and set 'ID' as the key attribute\n",
    "A = em.read_csv_metadata(path_A, key='ID')\n",
    "B = em.read_csv_metadata(path_B, key='ID')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Get features (for blocking)\n",
    "feature_table = em.get_features_for_blocking(A, B, validate_inferred_attr_types=False)\n",
    "# Get features (for matching)\n",
    "# feature_table = em.get_features_for_matching(A, B)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Removing Features from Feature Table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.frame.DataFrame"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(feature_table)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature_name</th>\n",
       "      <th>left_attribute</th>\n",
       "      <th>right_attribute</th>\n",
       "      <th>left_attr_tokenizer</th>\n",
       "      <th>right_attr_tokenizer</th>\n",
       "      <th>simfunction</th>\n",
       "      <th>function</th>\n",
       "      <th>function_source</th>\n",
       "      <th>is_auto_generated</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ID_ID_lev_dist</td>\n",
       "      <td>ID</td>\n",
       "      <td>ID</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>lev_dist</td>\n",
       "      <td>&lt;function ID_ID_lev_dist at 0x10b5987b8&gt;</td>\n",
       "      <td>from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ID_ID_lev_sim</td>\n",
       "      <td>ID</td>\n",
       "      <td>ID</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>lev_sim</td>\n",
       "      <td>&lt;function ID_ID_lev_sim at 0x10f9b0620&gt;</td>\n",
       "      <td>from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ID_ID_jar</td>\n",
       "      <td>ID</td>\n",
       "      <td>ID</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>jaro</td>\n",
       "      <td>&lt;function ID_ID_jar at 0x10f9b0950&gt;</td>\n",
       "      <td>from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ID_ID_jwn</td>\n",
       "      <td>ID</td>\n",
       "      <td>ID</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>jaro_winkler</td>\n",
       "      <td>&lt;function ID_ID_jwn at 0x10f9b09d8&gt;</td>\n",
       "      <td>from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>ID_ID_exm</td>\n",
       "      <td>ID</td>\n",
       "      <td>ID</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>exact_match</td>\n",
       "      <td>&lt;function ID_ID_exm at 0x10f9b08c8&gt;</td>\n",
       "      <td>from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     feature_name left_attribute right_attribute left_attr_tokenizer  \\\n",
       "0  ID_ID_lev_dist             ID              ID                None   \n",
       "1   ID_ID_lev_sim             ID              ID                None   \n",
       "2       ID_ID_jar             ID              ID                None   \n",
       "3       ID_ID_jwn             ID              ID                None   \n",
       "4       ID_ID_exm             ID              ID                None   \n",
       "\n",
       "  right_attr_tokenizer   simfunction  \\\n",
       "0                 None      lev_dist   \n",
       "1                 None       lev_sim   \n",
       "2                 None          jaro   \n",
       "3                 None  jaro_winkler   \n",
       "4                 None   exact_match   \n",
       "\n",
       "                                   function  \\\n",
       "0  <function ID_ID_lev_dist at 0x10b5987b8>   \n",
       "1   <function ID_ID_lev_sim at 0x10f9b0620>   \n",
       "2       <function ID_ID_jar at 0x10f9b0950>   \n",
       "3       <function ID_ID_jwn at 0x10f9b09d8>   \n",
       "4       <function ID_ID_exm at 0x10f9b08c8>   \n",
       "\n",
       "                                                                                       function_source  \\\n",
       "0  from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...   \n",
       "1  from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...   \n",
       "2  from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...   \n",
       "3  from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...   \n",
       "4  from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...   \n",
       "\n",
       "  is_auto_generated  \n",
       "0              True  \n",
       "1              True  \n",
       "2              True  \n",
       "3              True  \n",
       "4              True  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "feature_table.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Drop first row\n",
    "feature_table = feature_table.drop(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature_name</th>\n",
       "      <th>left_attribute</th>\n",
       "      <th>right_attribute</th>\n",
       "      <th>left_attr_tokenizer</th>\n",
       "      <th>right_attr_tokenizer</th>\n",
       "      <th>simfunction</th>\n",
       "      <th>function</th>\n",
       "      <th>function_source</th>\n",
       "      <th>is_auto_generated</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ID_ID_lev_sim</td>\n",
       "      <td>ID</td>\n",
       "      <td>ID</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>lev_sim</td>\n",
       "      <td>&lt;function ID_ID_lev_sim at 0x10f9b0620&gt;</td>\n",
       "      <td>from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ID_ID_jar</td>\n",
       "      <td>ID</td>\n",
       "      <td>ID</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>jaro</td>\n",
       "      <td>&lt;function ID_ID_jar at 0x10f9b0950&gt;</td>\n",
       "      <td>from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ID_ID_jwn</td>\n",
       "      <td>ID</td>\n",
       "      <td>ID</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>jaro_winkler</td>\n",
       "      <td>&lt;function ID_ID_jwn at 0x10f9b09d8&gt;</td>\n",
       "      <td>from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>ID_ID_exm</td>\n",
       "      <td>ID</td>\n",
       "      <td>ID</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>exact_match</td>\n",
       "      <td>&lt;function ID_ID_exm at 0x10f9b08c8&gt;</td>\n",
       "      <td>from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>ID_ID_jac_qgm_3_qgm_3</td>\n",
       "      <td>ID</td>\n",
       "      <td>ID</td>\n",
       "      <td>qgm_3</td>\n",
       "      <td>qgm_3</td>\n",
       "      <td>jaccard</td>\n",
       "      <td>&lt;function ID_ID_jac_qgm_3_qgm_3 at 0x10f9b0a60&gt;</td>\n",
       "      <td>from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            feature_name left_attribute right_attribute left_attr_tokenizer  \\\n",
       "1          ID_ID_lev_sim             ID              ID                None   \n",
       "2              ID_ID_jar             ID              ID                None   \n",
       "3              ID_ID_jwn             ID              ID                None   \n",
       "4              ID_ID_exm             ID              ID                None   \n",
       "5  ID_ID_jac_qgm_3_qgm_3             ID              ID               qgm_3   \n",
       "\n",
       "  right_attr_tokenizer   simfunction  \\\n",
       "1                 None       lev_sim   \n",
       "2                 None          jaro   \n",
       "3                 None  jaro_winkler   \n",
       "4                 None   exact_match   \n",
       "5                qgm_3       jaccard   \n",
       "\n",
       "                                          function  \\\n",
       "1          <function ID_ID_lev_sim at 0x10f9b0620>   \n",
       "2              <function ID_ID_jar at 0x10f9b0950>   \n",
       "3              <function ID_ID_jwn at 0x10f9b09d8>   \n",
       "4              <function ID_ID_exm at 0x10f9b08c8>   \n",
       "5  <function ID_ID_jac_qgm_3_qgm_3 at 0x10f9b0a60>   \n",
       "\n",
       "                                                                                       function_source  \\\n",
       "1  from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...   \n",
       "2  from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...   \n",
       "3  from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...   \n",
       "4  from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...   \n",
       "5  from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...   \n",
       "\n",
       "  is_auto_generated  \n",
       "1              True  \n",
       "2              True  \n",
       "3              True  \n",
       "4              True  \n",
       "5              True  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "feature_table.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Remove all the features except involving name (Include only the features where the left attribute is name)\n",
    "feature_table = feature_table[feature_table.left_attribute=='name']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature_name</th>\n",
       "      <th>left_attribute</th>\n",
       "      <th>right_attribute</th>\n",
       "      <th>left_attr_tokenizer</th>\n",
       "      <th>right_attr_tokenizer</th>\n",
       "      <th>simfunction</th>\n",
       "      <th>function</th>\n",
       "      <th>function_source</th>\n",
       "      <th>is_auto_generated</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>name_name_jac_qgm_3_qgm_3</td>\n",
       "      <td>name</td>\n",
       "      <td>name</td>\n",
       "      <td>qgm_3</td>\n",
       "      <td>qgm_3</td>\n",
       "      <td>jaccard</td>\n",
       "      <td>&lt;function name_name_jac_qgm_3_qgm_3 at 0x10f9b0ae8&gt;</td>\n",
       "      <td>from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>name_name_cos_dlm_dc0_dlm_dc0</td>\n",
       "      <td>name</td>\n",
       "      <td>name</td>\n",
       "      <td>dlm_dc0</td>\n",
       "      <td>dlm_dc0</td>\n",
       "      <td>cosine</td>\n",
       "      <td>&lt;function name_name_cos_dlm_dc0_dlm_dc0 at 0x10f9b0b70&gt;</td>\n",
       "      <td>from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>name_name_jac_dlm_dc0_dlm_dc0</td>\n",
       "      <td>name</td>\n",
       "      <td>name</td>\n",
       "      <td>dlm_dc0</td>\n",
       "      <td>dlm_dc0</td>\n",
       "      <td>jaccard</td>\n",
       "      <td>&lt;function name_name_jac_dlm_dc0_dlm_dc0 at 0x10f9b0bf8&gt;</td>\n",
       "      <td>from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>name_name_mel</td>\n",
       "      <td>name</td>\n",
       "      <td>name</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>monge_elkan</td>\n",
       "      <td>&lt;function name_name_mel at 0x10f9b0c80&gt;</td>\n",
       "      <td>from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>name_name_lev_dist</td>\n",
       "      <td>name</td>\n",
       "      <td>name</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>lev_dist</td>\n",
       "      <td>&lt;function name_name_lev_dist at 0x10f9b0d08&gt;</td>\n",
       "      <td>from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>name_name_lev_sim</td>\n",
       "      <td>name</td>\n",
       "      <td>name</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>lev_sim</td>\n",
       "      <td>&lt;function name_name_lev_sim at 0x10f9b0d90&gt;</td>\n",
       "      <td>from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>name_name_nmw</td>\n",
       "      <td>name</td>\n",
       "      <td>name</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>needleman_wunsch</td>\n",
       "      <td>&lt;function name_name_nmw at 0x10f9b0e18&gt;</td>\n",
       "      <td>from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>name_name_sw</td>\n",
       "      <td>name</td>\n",
       "      <td>name</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>smith_waterman</td>\n",
       "      <td>&lt;function name_name_sw at 0x10f9b0ea0&gt;</td>\n",
       "      <td>from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     feature_name left_attribute right_attribute  \\\n",
       "6       name_name_jac_qgm_3_qgm_3           name            name   \n",
       "7   name_name_cos_dlm_dc0_dlm_dc0           name            name   \n",
       "8   name_name_jac_dlm_dc0_dlm_dc0           name            name   \n",
       "9                   name_name_mel           name            name   \n",
       "10             name_name_lev_dist           name            name   \n",
       "11              name_name_lev_sim           name            name   \n",
       "12                  name_name_nmw           name            name   \n",
       "13                   name_name_sw           name            name   \n",
       "\n",
       "   left_attr_tokenizer right_attr_tokenizer       simfunction  \\\n",
       "6                qgm_3                qgm_3           jaccard   \n",
       "7              dlm_dc0              dlm_dc0            cosine   \n",
       "8              dlm_dc0              dlm_dc0           jaccard   \n",
       "9                 None                 None       monge_elkan   \n",
       "10                None                 None          lev_dist   \n",
       "11                None                 None           lev_sim   \n",
       "12                None                 None  needleman_wunsch   \n",
       "13                None                 None    smith_waterman   \n",
       "\n",
       "                                                   function  \\\n",
       "6       <function name_name_jac_qgm_3_qgm_3 at 0x10f9b0ae8>   \n",
       "7   <function name_name_cos_dlm_dc0_dlm_dc0 at 0x10f9b0b70>   \n",
       "8   <function name_name_jac_dlm_dc0_dlm_dc0 at 0x10f9b0bf8>   \n",
       "9                   <function name_name_mel at 0x10f9b0c80>   \n",
       "10             <function name_name_lev_dist at 0x10f9b0d08>   \n",
       "11              <function name_name_lev_sim at 0x10f9b0d90>   \n",
       "12                  <function name_name_nmw at 0x10f9b0e18>   \n",
       "13                   <function name_name_sw at 0x10f9b0ea0>   \n",
       "\n",
       "                                                                                        function_source  \\\n",
       "6   from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...   \n",
       "7   from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...   \n",
       "8   from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...   \n",
       "9   from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...   \n",
       "10  from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...   \n",
       "11  from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...   \n",
       "12  from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...   \n",
       "13  from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...   \n",
       "\n",
       "   is_auto_generated  \n",
       "6               True  \n",
       "7               True  \n",
       "8               True  \n",
       "9               True  \n",
       "10              True  \n",
       "11              True  \n",
       "12              True  \n",
       "13              True  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "feature_table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Remove all the features except involving jaccard (Include only the features where the sim function is jaccard)\n",
    "feature_table = feature_table[feature_table.simfunction=='jaccard']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature_name</th>\n",
       "      <th>left_attribute</th>\n",
       "      <th>right_attribute</th>\n",
       "      <th>left_attr_tokenizer</th>\n",
       "      <th>right_attr_tokenizer</th>\n",
       "      <th>simfunction</th>\n",
       "      <th>function</th>\n",
       "      <th>function_source</th>\n",
       "      <th>is_auto_generated</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>name_name_jac_qgm_3_qgm_3</td>\n",
       "      <td>name</td>\n",
       "      <td>name</td>\n",
       "      <td>qgm_3</td>\n",
       "      <td>qgm_3</td>\n",
       "      <td>jaccard</td>\n",
       "      <td>&lt;function name_name_jac_qgm_3_qgm_3 at 0x10f9b0ae8&gt;</td>\n",
       "      <td>from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>name_name_jac_dlm_dc0_dlm_dc0</td>\n",
       "      <td>name</td>\n",
       "      <td>name</td>\n",
       "      <td>dlm_dc0</td>\n",
       "      <td>dlm_dc0</td>\n",
       "      <td>jaccard</td>\n",
       "      <td>&lt;function name_name_jac_dlm_dc0_dlm_dc0 at 0x10f9b0bf8&gt;</td>\n",
       "      <td>from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                    feature_name left_attribute right_attribute  \\\n",
       "6      name_name_jac_qgm_3_qgm_3           name            name   \n",
       "8  name_name_jac_dlm_dc0_dlm_dc0           name            name   \n",
       "\n",
       "  left_attr_tokenizer right_attr_tokenizer simfunction  \\\n",
       "6               qgm_3                qgm_3     jaccard   \n",
       "8             dlm_dc0              dlm_dc0     jaccard   \n",
       "\n",
       "                                                  function  \\\n",
       "6      <function name_name_jac_qgm_3_qgm_3 at 0x10f9b0ae8>   \n",
       "8  <function name_name_jac_dlm_dc0_dlm_dc0 at 0x10f9b0bf8>   \n",
       "\n",
       "                                                                                       function_source  \\\n",
       "6  from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...   \n",
       "8  from py_entitymatching.feature.simfunctions import *\\nfrom py_entitymatching.feature.tokenizers ...   \n",
       "\n",
       "  is_auto_generated  \n",
       "6              True  \n",
       "8              True  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "feature_table"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}