{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Week3_Assignment.ipynb",
      "provenance": [],
      "collapsed_sections": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "metadata": {
        "id": "bs5dRVOjZ6pT",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 221
        },
        "outputId": "2801a080-a77a-4e62-d58c-732d318416b3"
      },
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/data/')\n",
        "data_dir = '/data/My Drive/Colab Notebooks/FEC dataset'\n",
        "!ls '/data/My Drive/Colab Notebooks/FEC dataset'\n",
        "!pip install matplotlib"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Mounted at /data/\n",
            "ccl20.zip\t     cm_header_file.csv  indiv_header_file.csv\n",
            "ccl_header_file.csv  cn20.zip\t\t pas220.zip\n",
            "CD_trends.xlsx\t     cn_header_file.csv  pas2_header_file.csv\n",
            "cm20.zip\t     indiv20.zip\n",
            "Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (3.2.2)\n",
            "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (2.4.7)\n",
            "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (2.8.1)\n",
            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (0.10.0)\n",
            "Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (1.18.5)\n",
            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (1.2.0)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.1->matplotlib) (1.15.0)\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "uivLBlKyuC2V"
      },
      "source": [
        "import zipfile\n",
        "zip = zipfile.ZipFile(data_dir+'/indiv20.zip')\n",
        "#zip.namelist()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "m-0PQq0Oufje",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 428
        },
        "outputId": "b79a482b-6444-49f8-f88a-d79975a0442c"
      },
      "source": [
        "import pandas as pd\n",
        "header = pd.read_csv(data_dir+'/indiv_header_file.csv')\n",
        "\n",
        "data=pd.read_csv(zip.open('by_date/itcont_2020_20200630_20300630.txt'), sep='|', names=header.columns)\n",
        "data.head()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py:2718: DtypeWarning: Columns (10,16,18,19) have mixed types.Specify dtype option on import or set low_memory=False.\n",
            "  interactivity=interactivity, compiler=compiler, result=result)\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>CMTE_ID</th>\n",
              "      <th>AMNDT_IND</th>\n",
              "      <th>RPT_TP</th>\n",
              "      <th>TRANSACTION_PGI</th>\n",
              "      <th>IMAGE_NUM</th>\n",
              "      <th>TRANSACTION_TP</th>\n",
              "      <th>ENTITY_TP</th>\n",
              "      <th>NAME</th>\n",
              "      <th>CITY</th>\n",
              "      <th>STATE</th>\n",
              "      <th>ZIP_CODE</th>\n",
              "      <th>EMPLOYER</th>\n",
              "      <th>OCCUPATION</th>\n",
              "      <th>TRANSACTION_DT</th>\n",
              "      <th>TRANSACTION_AMT</th>\n",
              "      <th>OTHER_ID</th>\n",
              "      <th>TRAN_ID</th>\n",
              "      <th>FILE_NUM</th>\n",
              "      <th>MEMO_CD</th>\n",
              "      <th>MEMO_TEXT</th>\n",
              "      <th>SUB_ID</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>C00363317</td>\n",
              "      <td>A</td>\n",
              "      <td>YE</td>\n",
              "      <td>P2020</td>\n",
              "      <td>202004199219743280</td>\n",
              "      <td>15E</td>\n",
              "      <td>IND</td>\n",
              "      <td>LITTLE, WILLIAM</td>\n",
              "      <td>NEW YORK</td>\n",
              "      <td>NY</td>\n",
              "      <td>1.0128e+08</td>\n",
              "      <td>NOT EMPLOYED</td>\n",
              "      <td>NOT EMPLOYED</td>\n",
              "      <td>12162020</td>\n",
              "      <td>500</td>\n",
              "      <td>NaN</td>\n",
              "      <td>4017159</td>\n",
              "      <td>1402014</td>\n",
              "      <td>NaN</td>\n",
              "      <td>* EARMARKED CONTRIBUTION: SEE BELOW</td>\n",
              "      <td>4042120201737536230</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>C00723122</td>\n",
              "      <td>A</td>\n",
              "      <td>YE</td>\n",
              "      <td>P2020</td>\n",
              "      <td>202007159244979799</td>\n",
              "      <td>15E</td>\n",
              "      <td>IND</td>\n",
              "      <td>STOWE, BARBARA</td>\n",
              "      <td>RESTON</td>\n",
              "      <td>VA</td>\n",
              "      <td>2.01942e+08</td>\n",
              "      <td>NOT EMPLOYED</td>\n",
              "      <td>NOT EMPLOYED</td>\n",
              "      <td>12282020</td>\n",
              "      <td>100</td>\n",
              "      <td>C00193433</td>\n",
              "      <td>4753483</td>\n",
              "      <td>1423440</td>\n",
              "      <td>NaN</td>\n",
              "      <td>* EARMARKED CONTRIBUTION: SEE BELOW</td>\n",
              "      <td>4072620201794577716</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>C00290825</td>\n",
              "      <td>A</td>\n",
              "      <td>YE</td>\n",
              "      <td>P2020</td>\n",
              "      <td>202004159216892816</td>\n",
              "      <td>15E</td>\n",
              "      <td>IND</td>\n",
              "      <td>MEHIEL, KAREN</td>\n",
              "      <td>NEW YORK</td>\n",
              "      <td>NY</td>\n",
              "      <td>1.01281e+08</td>\n",
              "      <td>KAMPACK, INC.</td>\n",
              "      <td>EXECUTIVE</td>\n",
              "      <td>12182020</td>\n",
              "      <td>2800</td>\n",
              "      <td>C00401224</td>\n",
              "      <td>3965375</td>\n",
              "      <td>1398991</td>\n",
              "      <td>NaN</td>\n",
              "      <td>* EARMARKED CONTRIBUTION: SEE BELOW</td>\n",
              "      <td>4050620201741858091</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>C00363317</td>\n",
              "      <td>A</td>\n",
              "      <td>M12</td>\n",
              "      <td>P2020</td>\n",
              "      <td>202004199219742982</td>\n",
              "      <td>15E</td>\n",
              "      <td>IND</td>\n",
              "      <td>LITTLE, WILLIAM</td>\n",
              "      <td>NEW YORK</td>\n",
              "      <td>NY</td>\n",
              "      <td>1.0128e+08</td>\n",
              "      <td>NOT EMPLOYED</td>\n",
              "      <td>NOT EMPLOYED</td>\n",
              "      <td>10302020</td>\n",
              "      <td>500</td>\n",
              "      <td>C00401224</td>\n",
              "      <td>4017173</td>\n",
              "      <td>1401993</td>\n",
              "      <td>NaN</td>\n",
              "      <td>* EARMARKED CONTRIBUTION: SEE BELOW</td>\n",
              "      <td>4042120201737536220</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>C00589309</td>\n",
              "      <td>A</td>\n",
              "      <td>YE</td>\n",
              "      <td>P</td>\n",
              "      <td>202002209187171385</td>\n",
              "      <td>15E</td>\n",
              "      <td>IND</td>\n",
              "      <td>DAVIDSON, GREG</td>\n",
              "      <td>REDONDO BEACH</td>\n",
              "      <td>CA</td>\n",
              "      <td>9.02782e+08</td>\n",
              "      <td>NORTHROP GRUMMAN</td>\n",
              "      <td>AEROSPACE MANAGER</td>\n",
              "      <td>12312020</td>\n",
              "      <td>100</td>\n",
              "      <td>C00401224</td>\n",
              "      <td>VVBX0QHNGR6</td>\n",
              "      <td>1385228</td>\n",
              "      <td>NaN</td>\n",
              "      <td>* EARMARKED CONTRIBUTION: SEE BELOW</td>\n",
              "      <td>4022920201700018835</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "     CMTE_ID  ...               SUB_ID\n",
              "0  C00363317  ...  4042120201737536230\n",
              "1  C00723122  ...  4072620201794577716\n",
              "2  C00290825  ...  4050620201741858091\n",
              "3  C00363317  ...  4042120201737536220\n",
              "4  C00589309  ...  4022920201700018835\n",
              "\n",
              "[5 rows x 21 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 3
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "6PB4UgTa1Bih",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "5339e7e6-3336-494c-edb9-cfd45fa8cee4"
      },
      "source": [
        "print(data['TRANSACTION_AMT'].max())"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "10000000\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "owm7xZS11HEB",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 326
        },
        "outputId": "6769a41a-fe29-4f5b-acab-71ab9a292fd8"
      },
      "source": [
        "sort_amt = data.sort_values(by='TRANSACTION_AMT', ascending=False)\n",
        "sort_amt.head()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>CMTE_ID</th>\n",
              "      <th>AMNDT_IND</th>\n",
              "      <th>RPT_TP</th>\n",
              "      <th>TRANSACTION_PGI</th>\n",
              "      <th>IMAGE_NUM</th>\n",
              "      <th>TRANSACTION_TP</th>\n",
              "      <th>ENTITY_TP</th>\n",
              "      <th>NAME</th>\n",
              "      <th>CITY</th>\n",
              "      <th>STATE</th>\n",
              "      <th>ZIP_CODE</th>\n",
              "      <th>EMPLOYER</th>\n",
              "      <th>OCCUPATION</th>\n",
              "      <th>TRANSACTION_DT</th>\n",
              "      <th>TRANSACTION_AMT</th>\n",
              "      <th>OTHER_ID</th>\n",
              "      <th>TRAN_ID</th>\n",
              "      <th>FILE_NUM</th>\n",
              "      <th>MEMO_CD</th>\n",
              "      <th>MEMO_TEXT</th>\n",
              "      <th>SUB_ID</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>990582</th>\n",
              "      <td>C00571703</td>\n",
              "      <td>N</td>\n",
              "      <td>M8</td>\n",
              "      <td>P</td>\n",
              "      <td>202008209266851913</td>\n",
              "      <td>10</td>\n",
              "      <td>IND</td>\n",
              "      <td>MELLON, TIMOTHY</td>\n",
              "      <td>SARATOGA</td>\n",
              "      <td>WY</td>\n",
              "      <td>823311500</td>\n",
              "      <td>SELF-EMPLOYED</td>\n",
              "      <td>INVESTMENTS</td>\n",
              "      <td>7092020</td>\n",
              "      <td>10000000</td>\n",
              "      <td>NaN</td>\n",
              "      <td>SA11A.15446</td>\n",
              "      <td>1434706</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>4090120201833903380</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>990568</th>\n",
              "      <td>C00571703</td>\n",
              "      <td>N</td>\n",
              "      <td>M8</td>\n",
              "      <td>P</td>\n",
              "      <td>202008209266851908</td>\n",
              "      <td>10</td>\n",
              "      <td>IND</td>\n",
              "      <td>SCHWARZMAN, STEPHEN A.</td>\n",
              "      <td>NEW YORK</td>\n",
              "      <td>NY</td>\n",
              "      <td>101543302</td>\n",
              "      <td>BLACKSTONE</td>\n",
              "      <td>CHAIRMAN &amp; CEO</td>\n",
              "      <td>7012020</td>\n",
              "      <td>10000000</td>\n",
              "      <td>NaN</td>\n",
              "      <td>SA11A.15411</td>\n",
              "      <td>1434706</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>4090120201833903366</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>469388</th>\n",
              "      <td>C00637512</td>\n",
              "      <td>N</td>\n",
              "      <td>M8</td>\n",
              "      <td>P</td>\n",
              "      <td>202008209266413693</td>\n",
              "      <td>10</td>\n",
              "      <td>ORG</td>\n",
              "      <td>AMERICA FIRST POLICIES, INC.</td>\n",
              "      <td>ARLINGTON</td>\n",
              "      <td>VA</td>\n",
              "      <td>22202</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>7202020</td>\n",
              "      <td>10000000</td>\n",
              "      <td>NaN</td>\n",
              "      <td>SA11AI.165580</td>\n",
              "      <td>1434640</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>4082920201831236982</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1151552</th>\n",
              "      <td>C00484642</td>\n",
              "      <td>N</td>\n",
              "      <td>M7</td>\n",
              "      <td>P</td>\n",
              "      <td>202007209260164631</td>\n",
              "      <td>10</td>\n",
              "      <td>ORG</td>\n",
              "      <td>MAJORITY FORWARD</td>\n",
              "      <td>WASHINGTON</td>\n",
              "      <td>DC</td>\n",
              "      <td>200055998</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>6302020</td>\n",
              "      <td>8000000</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1973314</td>\n",
              "      <td>1427419</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>4072920201808862242</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1351670</th>\n",
              "      <td>C00747246</td>\n",
              "      <td>N</td>\n",
              "      <td>Q2</td>\n",
              "      <td>P</td>\n",
              "      <td>202007159245095555</td>\n",
              "      <td>15</td>\n",
              "      <td>ORG</td>\n",
              "      <td>SIXTEEN THIRTY FUND</td>\n",
              "      <td>WASHINGTON</td>\n",
              "      <td>DC</td>\n",
              "      <td>200362605</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>6302020</td>\n",
              "      <td>5700000</td>\n",
              "      <td>NaN</td>\n",
              "      <td>12295463</td>\n",
              "      <td>1423930</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>4071720201791015689</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "           CMTE_ID AMNDT_IND RPT_TP  ... MEMO_CD  MEMO_TEXT               SUB_ID\n",
              "990582   C00571703         N     M8  ...     NaN        NaN  4090120201833903380\n",
              "990568   C00571703         N     M8  ...     NaN        NaN  4090120201833903366\n",
              "469388   C00637512         N     M8  ...     NaN        NaN  4082920201831236982\n",
              "1151552  C00484642         N     M7  ...     NaN        NaN  4072920201808862242\n",
              "1351670  C00747246         N     Q2  ...     NaN        NaN  4071720201791015689\n",
              "\n",
              "[5 rows x 21 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 5
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "WuJLWjlA29OT"
      },
      "source": [
        "df = pd.DataFrame(data, columns=['CMTE_ID', 'NAME', 'CITY', 'STATE', 'ZIP_CODE', 'EMPLOYER', 'OCCUPATION', 'TRANSACTION_DT', 'TRANSACTION_AMT'])"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "I9CI-mdn29Fq",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 238
        },
        "outputId": "5c059bc9-6a43-4365-db85-a24049a7568b"
      },
      "source": [
        "from zipfile import ZipFile\n",
        "import pandas as pd\n",
        "header = pd.read_csv(data_dir+'/cn_header_file.csv')\n",
        "\n",
        "with ZipFile(data_dir+'/cn20.zip') as zip:\n",
        "  candidates = pd.read_csv(zip.open('cn.txt'), sep='|', names=header.columns)\n",
        "candidates.head()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>CAND_ID</th>\n",
              "      <th>CAND_NAME</th>\n",
              "      <th>CAND_PTY_AFFILIATION</th>\n",
              "      <th>CAND_ELECTION_YR</th>\n",
              "      <th>CAND_OFFICE_ST</th>\n",
              "      <th>CAND_OFFICE</th>\n",
              "      <th>CAND_OFFICE_DISTRICT</th>\n",
              "      <th>CAND_ICI</th>\n",
              "      <th>CAND_STATUS</th>\n",
              "      <th>CAND_PCC</th>\n",
              "      <th>CAND_ST1</th>\n",
              "      <th>CAND_ST2</th>\n",
              "      <th>CAND_CITY</th>\n",
              "      <th>CAND_ST</th>\n",
              "      <th>CAND_ZIP</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>H0AK00105</td>\n",
              "      <td>LAMB, THOMAS</td>\n",
              "      <td>NNE</td>\n",
              "      <td>2020</td>\n",
              "      <td>AK</td>\n",
              "      <td>H</td>\n",
              "      <td>0.0</td>\n",
              "      <td>C</td>\n",
              "      <td>N</td>\n",
              "      <td>C00607515</td>\n",
              "      <td>1861 W LAKE LUCILLE DR</td>\n",
              "      <td>NaN</td>\n",
              "      <td>WASILLA</td>\n",
              "      <td>AK</td>\n",
              "      <td>99654.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>H0AK00113</td>\n",
              "      <td>TUGATUK, RAY SEAN</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>AK</td>\n",
              "      <td>H</td>\n",
              "      <td>0.0</td>\n",
              "      <td>C</td>\n",
              "      <td>N</td>\n",
              "      <td>NaN</td>\n",
              "      <td>PO BOX 172</td>\n",
              "      <td>NaN</td>\n",
              "      <td>MANAKOTAK</td>\n",
              "      <td>AK</td>\n",
              "      <td>99628.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>H0AK01046</td>\n",
              "      <td>CATALANO, THOMAS</td>\n",
              "      <td>OTH</td>\n",
              "      <td>2020</td>\n",
              "      <td>AK</td>\n",
              "      <td>H</td>\n",
              "      <td>0.0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>N</td>\n",
              "      <td>NaN</td>\n",
              "      <td>188 WEST NORTHERN LIGHTS BOULEVARD</td>\n",
              "      <td>NaN</td>\n",
              "      <td>ANCHORAGE</td>\n",
              "      <td>AK</td>\n",
              "      <td>99503.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>H0AL01055</td>\n",
              "      <td>CARL, JERRY LEE, JR</td>\n",
              "      <td>REP</td>\n",
              "      <td>2020</td>\n",
              "      <td>AL</td>\n",
              "      <td>H</td>\n",
              "      <td>1.0</td>\n",
              "      <td>O</td>\n",
              "      <td>C</td>\n",
              "      <td>C00697789</td>\n",
              "      <td>PO BOX 852138</td>\n",
              "      <td>NaN</td>\n",
              "      <td>MOBILE</td>\n",
              "      <td>AL</td>\n",
              "      <td>36685.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>H0AL01063</td>\n",
              "      <td>LAMBERT, DOUGLAS WESTLEY III</td>\n",
              "      <td>REP</td>\n",
              "      <td>2020</td>\n",
              "      <td>AL</td>\n",
              "      <td>H</td>\n",
              "      <td>1.0</td>\n",
              "      <td>O</td>\n",
              "      <td>C</td>\n",
              "      <td>C00701557</td>\n",
              "      <td>7194 STILLWATER BLVD</td>\n",
              "      <td>NaN</td>\n",
              "      <td>SPANISH FORT</td>\n",
              "      <td>AL</td>\n",
              "      <td>36527.0</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "     CAND_ID                     CAND_NAME  ... CAND_ST  CAND_ZIP\n",
              "0  H0AK00105                  LAMB, THOMAS  ...      AK   99654.0\n",
              "1  H0AK00113             TUGATUK, RAY SEAN  ...      AK   99628.0\n",
              "2  H0AK01046              CATALANO, THOMAS  ...      AK   99503.0\n",
              "3  H0AL01055           CARL, JERRY LEE, JR  ...      AL   36685.0\n",
              "4  H0AL01063  LAMBERT, DOUGLAS WESTLEY III  ...      AL   36527.0\n",
              "\n",
              "[5 rows x 15 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 6
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "uF9YJ-SQ6psu"
      },
      "source": [
        "candidates_final = pd.DataFrame(candidates, columns=['CAND_ID', 'CAND_PTY_AFFILIATION'])"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "H4_26uJ23RXX",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 204
        },
        "outputId": "543f7301-8524-4da0-85da-b3f3c2a4e6a2"
      },
      "source": [
        "header = pd.read_csv(data_dir+'/ccl_header_file.csv')\n",
        "\n",
        "with ZipFile(data_dir+'/ccl20.zip') as zip:\n",
        "  #print(zip.namelist())\n",
        "  linkage = pd.read_csv(zip.open('ccl.txt'), sep='|', names=header.columns)\n",
        "\n",
        "linkage.head()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>CAND_ID</th>\n",
              "      <th>CAND_ELECTION_YR</th>\n",
              "      <th>FEC_ELECTION_YR</th>\n",
              "      <th>CMTE_ID</th>\n",
              "      <th>CMTE_TP</th>\n",
              "      <th>CMTE_DSGN</th>\n",
              "      <th>LINKAGE_ID</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>C00713602</td>\n",
              "      <td>2019</td>\n",
              "      <td>2020</td>\n",
              "      <td>C00712851</td>\n",
              "      <td>O</td>\n",
              "      <td>U</td>\n",
              "      <td>228963</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>H0AK00105</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>C00607515</td>\n",
              "      <td>H</td>\n",
              "      <td>P</td>\n",
              "      <td>229250</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>H0AL01055</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>C00697789</td>\n",
              "      <td>H</td>\n",
              "      <td>P</td>\n",
              "      <td>226125</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>H0AL01063</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>C00701557</td>\n",
              "      <td>H</td>\n",
              "      <td>P</td>\n",
              "      <td>227053</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>H0AL01071</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>C00701409</td>\n",
              "      <td>H</td>\n",
              "      <td>P</td>\n",
              "      <td>227054</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "     CAND_ID  CAND_ELECTION_YR  FEC_ELECTION_YR  ... CMTE_TP CMTE_DSGN LINKAGE_ID\n",
              "0  C00713602              2019             2020  ...       O         U     228963\n",
              "1  H0AK00105              2020             2020  ...       H         P     229250\n",
              "2  H0AL01055              2020             2020  ...       H         P     226125\n",
              "3  H0AL01063              2020             2020  ...       H         P     227053\n",
              "4  H0AL01071              2020             2020  ...       H         P     227054\n",
              "\n",
              "[5 rows x 7 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 8
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "wyK3OZ3y7Srb",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 204
        },
        "outputId": "632f3820-90b7-4759-99e7-089714c8243b"
      },
      "source": [
        "df_merge = pd.merge(candidates_final, linkage, on='CAND_ID')\n",
        "df_merge.head()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>CAND_ID</th>\n",
              "      <th>CAND_PTY_AFFILIATION</th>\n",
              "      <th>CAND_ELECTION_YR</th>\n",
              "      <th>FEC_ELECTION_YR</th>\n",
              "      <th>CMTE_ID</th>\n",
              "      <th>CMTE_TP</th>\n",
              "      <th>CMTE_DSGN</th>\n",
              "      <th>LINKAGE_ID</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>H0AK00105</td>\n",
              "      <td>NNE</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>C00607515</td>\n",
              "      <td>H</td>\n",
              "      <td>P</td>\n",
              "      <td>229250</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>H0AL01055</td>\n",
              "      <td>REP</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>C00697789</td>\n",
              "      <td>H</td>\n",
              "      <td>P</td>\n",
              "      <td>226125</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>H0AL01063</td>\n",
              "      <td>REP</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>C00701557</td>\n",
              "      <td>H</td>\n",
              "      <td>P</td>\n",
              "      <td>227053</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>H0AL01071</td>\n",
              "      <td>REP</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>C00701409</td>\n",
              "      <td>H</td>\n",
              "      <td>P</td>\n",
              "      <td>227054</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>H0AL01089</td>\n",
              "      <td>REP</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>C00703066</td>\n",
              "      <td>H</td>\n",
              "      <td>P</td>\n",
              "      <td>227266</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "     CAND_ID CAND_PTY_AFFILIATION  ...  CMTE_DSGN  LINKAGE_ID\n",
              "0  H0AK00105                  NNE  ...          P      229250\n",
              "1  H0AL01055                  REP  ...          P      226125\n",
              "2  H0AL01063                  REP  ...          P      227053\n",
              "3  H0AL01071                  REP  ...          P      227054\n",
              "4  H0AL01089                  REP  ...          P      227266\n",
              "\n",
              "[5 rows x 8 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 9
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "8i2m3TRG3QWd"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "8ICMtX8B3TZi"
      },
      "source": [
        "sort_amt.dropna(subset = [\"EMPLOYER\", \"OCCUPATION\"], inplace=True)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "jNiWeB9J3TN7",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 343
        },
        "outputId": "7d11d265-7680-4c8f-ccba-dc19ec310ec3"
      },
      "source": [
        "sort_amt.head()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>CMTE_ID</th>\n",
              "      <th>AMNDT_IND</th>\n",
              "      <th>RPT_TP</th>\n",
              "      <th>TRANSACTION_PGI</th>\n",
              "      <th>IMAGE_NUM</th>\n",
              "      <th>TRANSACTION_TP</th>\n",
              "      <th>ENTITY_TP</th>\n",
              "      <th>NAME</th>\n",
              "      <th>CITY</th>\n",
              "      <th>STATE</th>\n",
              "      <th>ZIP_CODE</th>\n",
              "      <th>EMPLOYER</th>\n",
              "      <th>OCCUPATION</th>\n",
              "      <th>TRANSACTION_DT</th>\n",
              "      <th>TRANSACTION_AMT</th>\n",
              "      <th>OTHER_ID</th>\n",
              "      <th>TRAN_ID</th>\n",
              "      <th>FILE_NUM</th>\n",
              "      <th>MEMO_CD</th>\n",
              "      <th>MEMO_TEXT</th>\n",
              "      <th>SUB_ID</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>990582</th>\n",
              "      <td>C00571703</td>\n",
              "      <td>N</td>\n",
              "      <td>M8</td>\n",
              "      <td>P</td>\n",
              "      <td>202008209266851913</td>\n",
              "      <td>10</td>\n",
              "      <td>IND</td>\n",
              "      <td>MELLON, TIMOTHY</td>\n",
              "      <td>SARATOGA</td>\n",
              "      <td>WY</td>\n",
              "      <td>823311500</td>\n",
              "      <td>SELF-EMPLOYED</td>\n",
              "      <td>INVESTMENTS</td>\n",
              "      <td>7092020</td>\n",
              "      <td>10000000</td>\n",
              "      <td>NaN</td>\n",
              "      <td>SA11A.15446</td>\n",
              "      <td>1434706</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>4090120201833903380</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>990568</th>\n",
              "      <td>C00571703</td>\n",
              "      <td>N</td>\n",
              "      <td>M8</td>\n",
              "      <td>P</td>\n",
              "      <td>202008209266851908</td>\n",
              "      <td>10</td>\n",
              "      <td>IND</td>\n",
              "      <td>SCHWARZMAN, STEPHEN A.</td>\n",
              "      <td>NEW YORK</td>\n",
              "      <td>NY</td>\n",
              "      <td>101543302</td>\n",
              "      <td>BLACKSTONE</td>\n",
              "      <td>CHAIRMAN &amp; CEO</td>\n",
              "      <td>7012020</td>\n",
              "      <td>10000000</td>\n",
              "      <td>NaN</td>\n",
              "      <td>SA11A.15411</td>\n",
              "      <td>1434706</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>4090120201833903366</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>988418</th>\n",
              "      <td>C00547349</td>\n",
              "      <td>N</td>\n",
              "      <td>M8</td>\n",
              "      <td>P</td>\n",
              "      <td>202008209266445875</td>\n",
              "      <td>10</td>\n",
              "      <td>IND</td>\n",
              "      <td>STEYER, THOMAS F.</td>\n",
              "      <td>SAN FRANCISCO</td>\n",
              "      <td>CA</td>\n",
              "      <td>941049007</td>\n",
              "      <td>FAHR, LLC</td>\n",
              "      <td>FOUNDER</td>\n",
              "      <td>7012020</td>\n",
              "      <td>3479294</td>\n",
              "      <td>NaN</td>\n",
              "      <td>VNVNVHN8SQ0</td>\n",
              "      <td>1434668</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>4082920201831239483</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1001457</th>\n",
              "      <td>C00495028</td>\n",
              "      <td>N</td>\n",
              "      <td>M8</td>\n",
              "      <td>P</td>\n",
              "      <td>202008209266639943</td>\n",
              "      <td>10</td>\n",
              "      <td>IND</td>\n",
              "      <td>SIMONS, JAMES H.</td>\n",
              "      <td>NEW YORK</td>\n",
              "      <td>NY</td>\n",
              "      <td>100107007</td>\n",
              "      <td>EUCLIDEAN CAPITAL</td>\n",
              "      <td>PRESIDENT</td>\n",
              "      <td>7152020</td>\n",
              "      <td>2500000</td>\n",
              "      <td>NaN</td>\n",
              "      <td>VN8FNNJW723</td>\n",
              "      <td>1434687</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NON-CONTRIBUTION ACCOUNT</td>\n",
              "      <td>4090220201833936065</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>860246</th>\n",
              "      <td>C00620971</td>\n",
              "      <td>N</td>\n",
              "      <td>M8</td>\n",
              "      <td>P</td>\n",
              "      <td>202008209266126372</td>\n",
              "      <td>10</td>\n",
              "      <td>IND</td>\n",
              "      <td>STEYER, THOMAS</td>\n",
              "      <td>SAN FRANCISCO</td>\n",
              "      <td>CA</td>\n",
              "      <td>9.41045e+08</td>\n",
              "      <td>FAHR LLC</td>\n",
              "      <td>PHILANTHROPY AND ADVOCACY</td>\n",
              "      <td>7242020</td>\n",
              "      <td>2500000</td>\n",
              "      <td>NaN</td>\n",
              "      <td>VSH7WMSTV40</td>\n",
              "      <td>1434556</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>4090120201833903301</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "           CMTE_ID AMNDT_IND  ...                 MEMO_TEXT               SUB_ID\n",
              "990582   C00571703         N  ...                       NaN  4090120201833903380\n",
              "990568   C00571703         N  ...                       NaN  4090120201833903366\n",
              "988418   C00547349         N  ...                       NaN  4082920201831239483\n",
              "1001457  C00495028         N  ...  NON-CONTRIBUTION ACCOUNT  4090220201833936065\n",
              "860246   C00620971         N  ...                       NaN  4090120201833903301\n",
              "\n",
              "[5 rows x 21 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 25
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ZpDCZF044orD",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 102
        },
        "outputId": "43935d99-c06b-4fcd-e723-4a5c5affe85d"
      },
      "source": [
        "sort_amt[sort_amt['OCCUPATION']=='EXECUTIVE']['EMPLOYER'].describe()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "count                   6193\n",
              "unique                  2349\n",
              "top       SOUTHERN CA EDISON\n",
              "freq                     215\n",
              "Name: EMPLOYER, dtype: object"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 38
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "hyiY1HCz4oaE"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "k0qWZ1iFAlCE",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 204
        },
        "outputId": "490d0372-c2ff-4de9-8ad7-77783c72dd63"
      },
      "source": [
        "df_newdup = df[(df['EMPLOYER'].duplicated()) &\n",
        "                 (df['EMPLOYER']!='NOT EMPLOYED') &\n",
        "               (df['EMPLOYER']!='RETIRED')]\n",
        "\n",
        "df_newdup.head()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>CMTE_ID</th>\n",
              "      <th>NAME</th>\n",
              "      <th>CITY</th>\n",
              "      <th>STATE</th>\n",
              "      <th>ZIP_CODE</th>\n",
              "      <th>EMPLOYER</th>\n",
              "      <th>OCCUPATION</th>\n",
              "      <th>TRANSACTION_DT</th>\n",
              "      <th>TRANSACTION_AMT</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>C00706333</td>\n",
              "      <td>ALVAREZ, JACK</td>\n",
              "      <td>TRACY</td>\n",
              "      <td>CA</td>\n",
              "      <td>95304</td>\n",
              "      <td>ALVAREZ FARMS, INC.</td>\n",
              "      <td>PRESIDENT</td>\n",
              "      <td>9302020</td>\n",
              "      <td>2300</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8</th>\n",
              "      <td>C00706333</td>\n",
              "      <td>ALVAREZ, JACK</td>\n",
              "      <td>TRACY</td>\n",
              "      <td>CA</td>\n",
              "      <td>95304</td>\n",
              "      <td>ALVAREZ FARMS, INC.</td>\n",
              "      <td>PRESIDENT</td>\n",
              "      <td>9302020</td>\n",
              "      <td>200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>13</th>\n",
              "      <td>C00431932</td>\n",
              "      <td>COOPER, DAVID</td>\n",
              "      <td>NEW BRAUNFELS</td>\n",
              "      <td>TX</td>\n",
              "      <td>78132</td>\n",
              "      <td>OVINTIV SERVICES INC.</td>\n",
              "      <td>DRILLING COORDINATOR</td>\n",
              "      <td>6302020</td>\n",
              "      <td>104</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>14</th>\n",
              "      <td>C00431932</td>\n",
              "      <td>CURRAN, KENT</td>\n",
              "      <td>LITTLETON</td>\n",
              "      <td>CO</td>\n",
              "      <td>80127</td>\n",
              "      <td>OVINTIV SERVICES INC.</td>\n",
              "      <td>SENIOR LAND NEGOTIATOR</td>\n",
              "      <td>6302020</td>\n",
              "      <td>20</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>15</th>\n",
              "      <td>C00431932</td>\n",
              "      <td>DARLINGTON, BRUCE</td>\n",
              "      <td>SPRING</td>\n",
              "      <td>TX</td>\n",
              "      <td>77379</td>\n",
              "      <td>OVINTIV SERVICES INC.</td>\n",
              "      <td>SR. MANAGER, DRILLING &amp; COMPL</td>\n",
              "      <td>6302020</td>\n",
              "      <td>50</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "      CMTE_ID               NAME  ... TRANSACTION_DT TRANSACTION_AMT\n",
              "7   C00706333      ALVAREZ, JACK  ...        9302020            2300\n",
              "8   C00706333      ALVAREZ, JACK  ...        9302020             200\n",
              "13  C00431932      COOPER, DAVID  ...        6302020             104\n",
              "14  C00431932       CURRAN, KENT  ...        6302020              20\n",
              "15  C00431932  DARLINGTON, BRUCE  ...        6302020              50\n",
              "\n",
              "[5 rows x 9 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 12
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "0OznZbEdSjB1",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "outputId": "91335d8e-6ce1-44c8-c9ba-1a7856cacdcb"
      },
      "source": [
        "set(df_newdup['EMPLOYER'])"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "{'CONSULTANT',\n",
              " 'BANNER HEALTH',\n",
              " 'YOUR PART-TIME CONTROLLER LLC',\n",
              " 'WRIGHT COLLEGE',\n",
              " 'II-VI INC.',\n",
              " 'AREAS APPRAISERS INC',\n",
              " 'SAIONTZ & KIRK, P.A.',\n",
              " \"UCSF BENIOFF CHILDREN'S HOSPITAL OAKLA\",\n",
              " 'NICKELSPORN &LUNDIN PC',\n",
              " 'LOCUS IMPACT INVESTING',\n",
              " 'GRAVLEE HOMES INC.',\n",
              " 'CENTURY CONTRACTORS',\n",
              " 'ONPOINT MARKETING INC.',\n",
              " 'GEORGE FOX UNIVERSITY',\n",
              " 'TURN IT OVER CLEANING',\n",
              " 'COLLIERS',\n",
              " 'PRIDGEON AND CLAY, INC.',\n",
              " 'STERLING REALTORS',\n",
              " 'MAINE STATE CU',\n",
              " 'CIVIL LIBERTIES LIST',\n",
              " 'CALIFORNIA',\n",
              " 'AMERICAN CONCRETE',\n",
              " 'GARRISON PC',\n",
              " 'SCHOOL OF ART INSTITUTE OF CHICAGO',\n",
              " 'WASHINGTON STATE HOSPITAL ASSOCIATION',\n",
              " 'JFNNJ',\n",
              " 'NAVAIR SETA (HOFFMAN ENGINEERING)',\n",
              " 'PEACH & LILY',\n",
              " 'EATON SALES',\n",
              " 'MILWAUKEE NEPHROLOGI',\n",
              " 'ADELPHI TECHNOLOGY INC.',\n",
              " 'DEANE DANCE CENTER',\n",
              " 'ODESSA FENCE',\n",
              " 'NXP SEMICONDUCTOR',\n",
              " 'NATIONAL FLATBED LLC',\n",
              " 'T.T.DUNPHY',\n",
              " 'KB DEVELOPMENT',\n",
              " 'SM CONSULTANT',\n",
              " 'PNWRCC',\n",
              " 'STORCH AMINI PC',\n",
              " 'MONTOGOMERY COUNTY GOVERNMENT',\n",
              " 'NIKE, INC',\n",
              " 'CAPE ELECTRIC',\n",
              " 'GD MISSION SYSTEMS INC',\n",
              " 'AKIN GUMP ET AL',\n",
              " 'MINUTEMAN POWER SERVICES LLC',\n",
              " 'BOS DAIRY, LLC',\n",
              " 'SILICONES PLUS INC',\n",
              " 'COCA-COLA CONSOLIDATED, INC.',\n",
              " 'CENTERSTONE',\n",
              " 'A. LEE KIRK ATTORNEY AT LAW',\n",
              " 'LMR FREIGHT',\n",
              " 'FOLEY ABBOTT LLC',\n",
              " 'WIND RIVER TRANSPORT',\n",
              " 'WARNER BROTHERS',\n",
              " 'MOSES & SINGER',\n",
              " 'STACKBIT',\n",
              " 'ROWPAR PHARMACEUTICALS',\n",
              " 'ASURINT',\n",
              " 'GUBB & BARSHAY',\n",
              " 'TYLER TECHNOLOGIES',\n",
              " 'YALE UNIVERSITY',\n",
              " 'CUNY / ISLG',\n",
              " 'OSCARRENDA CONT',\n",
              " 'HAHN & HAHN LLP',\n",
              " 'SBT',\n",
              " 'A BETTER CHANCE FOR OUR',\n",
              " 'GARMIN INTERNATIONAL',\n",
              " 'CA-LOTTS CREDIT & CAR SALES',\n",
              " 'EXXONMOBIL PRODUCTION US',\n",
              " 'BOSTON SYMPHONY ORCHESTRA',\n",
              " 'DECADES OF WHEELS LLC',\n",
              " 'SPORTS ROCKET INC',\n",
              " 'CTVHCS',\n",
              " 'ENTERTAINMENT ONE',\n",
              " 'GIDEON INFORMATICS INC',\n",
              " 'CRAYOLA LLC',\n",
              " 'ENERBANK USA',\n",
              " 'MAXIM CRANE WORKS, LP',\n",
              " 'UMASS MEDICAL SCHOOL',\n",
              " 'GROSSMAN IRON ANS STEEL CO',\n",
              " 'US CONCRETE, INC.',\n",
              " 'MARTIN M RON ASSOCIATES',\n",
              " 'OAKTON COMMUNITY COLLEGE',\n",
              " 'SANTA CRUZ IHSS',\n",
              " 'CENTRAL PACIFIC BANK',\n",
              " 'ICON VALUATION',\n",
              " 'CURETON MIDSTREAM',\n",
              " 'FIS GROU0',\n",
              " 'A TUMBLING T RANCHES',\n",
              " 'THOMAS MEDIA GROUP LLC',\n",
              " 'XCEL ENERGY',\n",
              " 'GSSM',\n",
              " 'I.S. ENVIRONMENTAL PROTECTION AGENCY',\n",
              " 'REESE NURSING SERVICE 51',\n",
              " 'IMPLUS LLC',\n",
              " 'MOORE CAPITAL MANAGEMENT',\n",
              " 'DONKAGELE FARMSINC.',\n",
              " 'PPMM',\n",
              " 'TTA APPRAISAL',\n",
              " 'CENTENNIAL INSURANCE AGENCY',\n",
              " 'KISABETH FURNITURE',\n",
              " 'INSTA LUBE PH CORP',\n",
              " 'UNVERSITY OF ALABAMA BIRMINGHA',\n",
              " \"ST. CATHERINE'S SCHOOL\",\n",
              " 'TATOOSH SEAFOODS',\n",
              " 'EMERGENCY PHYSICIAN',\n",
              " 'MERRILL BANK OF AMERICA',\n",
              " 'ONEAL AND ASSOCIATES',\n",
              " 'METROPOLITAN TRANSPORTATION AUTHORITY',\n",
              " 'NATHAN LITTAUER HOSPITAL',\n",
              " 'RETIRRD',\n",
              " 'ROPER ST FRANCIS HEALTHCARE',\n",
              " 'JGNEIL',\n",
              " 'PD&C',\n",
              " 'CVE',\n",
              " 'PIRE',\n",
              " 'SELF EMPLOYED - WOMAN OWNED SMALL BUSI',\n",
              " 'CUSHEES INC.',\n",
              " 'BEALLS',\n",
              " 'VALLEY PHYSICIANS ALLIANCE',\n",
              " 'FRIENDSHIP HOUSE',\n",
              " 'PATERSON CITY',\n",
              " 'INFO TECH, INC',\n",
              " 'ROSENDIN ELECTRIC',\n",
              " 'MCDERMOTT',\n",
              " 'GCCMHC',\n",
              " 'LOCKHART WORK PROGRAM FACILITY',\n",
              " 'FOOD LION, LLC',\n",
              " 'NTESS, LLC',\n",
              " 'ETRN - WAYNESBURG',\n",
              " 'UPPER IOWA UNIVERSITY',\n",
              " \"HOM SOTHEBY'S\",\n",
              " 'BAC LOCAL 01 MN',\n",
              " 'MURRAY IND',\n",
              " 'ARVEST BANK',\n",
              " 'GRIFFIN ELECTRIC.INC.',\n",
              " 'LAND TITLE',\n",
              " 'SAN PASQUAL BAND OF MISSION INDIANS',\n",
              " 'BLOOMER BIOTECH',\n",
              " 'GEORGIA-PACIFIC WOOD PRODUCTS LLC',\n",
              " \"ST. ANN'S WAREHOUSE\",\n",
              " 'COLTON JOINT UNIFIED',\n",
              " 'WINGATE WEST SPRINGFIELD',\n",
              " 'INSIGHTSQUARED',\n",
              " 'WASATCH DISTRIBUTING CO',\n",
              " 'LOGISTICS HEALTH INC',\n",
              " 'HOMES ARE US INC',\n",
              " 'MANPOWER',\n",
              " 'LOUISIANA ORTHOPEDIC SPECIALISTS',\n",
              " 'BHATE CONSTRUCTION',\n",
              " 'CORNUCOPIA CRUISE LI',\n",
              " 'WAKE FOREST',\n",
              " 'SALT RIVER PROJECT',\n",
              " 'ADVANCE FIRE SYSTEMS INC',\n",
              " 'THE WINDWARD SCHOOL',\n",
              " 'LIBERTY BANK',\n",
              " 'FAITH BAPTIST CHURCH',\n",
              " 'MORRIS AUTOMOTIVE MACHINE',\n",
              " 'SACTO. PUB. LIBRARY JOINT POWERS AUTH.',\n",
              " 'TRAILWEST BANK',\n",
              " 'A-1 AFFORDABLE SIGN CO.',\n",
              " 'TUMAC LUMBER CO',\n",
              " 'PINECONE APARTMENTS',\n",
              " 'APR SOULTIONS',\n",
              " 'VBCPS',\n",
              " 'QUORA, INC.',\n",
              " 'KOCH BUSINESS SOLUTIONS, LP',\n",
              " 'DRIGGERS SCHULTZ & HERBST',\n",
              " 'SVB FINANCIAL GROUP',\n",
              " 'SERRA & GARRITY PC',\n",
              " 'BUSINESS PERFORMANCE INC.',\n",
              " 'RTI-HS',\n",
              " 'HIGHLAND EXCAVATION',\n",
              " 'AMOS WILKINSON, CRNA',\n",
              " 'COMMUNICATIONS DIRECTOR',\n",
              " 'THE LIGHT SOURCE INC',\n",
              " 'MULLALLY DEVELOPMENT',\n",
              " 'SILICON LABS',\n",
              " 'BERNDT CPA LLC',\n",
              " 'CAREY INTERNATIONAL',\n",
              " 'ANJALEONI ENTERPRISES INC',\n",
              " 'HAMPSHIRE',\n",
              " 'AMICA',\n",
              " 'LIVINGMIND PROJECT, INC.',\n",
              " 'NICOR',\n",
              " 'STAR BODY AND PAINT',\n",
              " 'TARANTINO AUTO BODY',\n",
              " 'FPSR',\n",
              " 'AUTOMATE ASSOCIATES',\n",
              " 'DEMOCRATIC NATIONAL CONVENTION COMMITT',\n",
              " 'HOME & OFFICE CABINETRY',\n",
              " 'NUCOR STEEL FLORIDA INC.',\n",
              " 'THE PROPERTY SHOP',\n",
              " 'HOPKINS SCHOOL',\n",
              " 'SCRUBS ETC',\n",
              " 'ROCHESTER COMMUNITY SCHOOL DIS',\n",
              " 'BHE RENEWABLES, LLC',\n",
              " 'COMSEWOGUE SD',\n",
              " 'ZOGENIX INC.',\n",
              " 'NATIONAL AQUARIUMN',\n",
              " 'KIESEL LAW LLP',\n",
              " 'UNITARIAN UNIVERSITY',\n",
              " 'POSEF',\n",
              " 'CHENHALL SERVICES',\n",
              " 'STILLWATER PUBLIC SCHOOLS',\n",
              " 'GARCIA MARBLE & TILE',\n",
              " 'HENDERSON ENGINEERING CO.',\n",
              " 'ALLIANZ OF AMERICA CORP',\n",
              " 'FERMAN BMW',\n",
              " 'BRISBANE SCHOOL DISTRICT',\n",
              " 'DAWSON HOLDINGS INC',\n",
              " 'U. S. DEPT OF VETERANS AFFAIRS',\n",
              " 'EARLES ARCHITECTS AND ASSOCIATES',\n",
              " 'BLUFF POINT ASSOCIATES',\n",
              " 'OVESCO',\n",
              " 'RYAN COYLE',\n",
              " 'AMERICAN ENTERPRISE INV. SRVCS',\n",
              " 'VISITING NURSE ASSOCIATION',\n",
              " 'SMG',\n",
              " 'ASHNU INTERNATIONAL INC',\n",
              " 'MOLDEX METRIC',\n",
              " 'ROSWELL PARK CANCER INSTITUTE INC',\n",
              " 'PECCAINC',\n",
              " 'COEUR ALASKA',\n",
              " 'MRA LABRATORIES',\n",
              " 'PETERBOROUGH PLAYERS',\n",
              " 'AMERESCO',\n",
              " 'SUNY DOWNSTATE',\n",
              " 'BCBS',\n",
              " 'S M STOLLER',\n",
              " 'REAL ESTATE DEV CO',\n",
              " 'BAPTIST HEALTH',\n",
              " 'JONATHAN D. SALK M.D.',\n",
              " 'ALPHAPORT',\n",
              " 'PRECISION AUTOMOTIVE PLASTICS',\n",
              " 'CITY OF RIALTO',\n",
              " 'UMIVERSITY PF DENVER',\n",
              " 'SAN JUAN COLLEGE',\n",
              " 'SPENCER STUART',\n",
              " 'CHICAGO AREA LECET',\n",
              " \"WOMEN'S RESOURCE CENTER\",\n",
              " 'BAKER PERKINS',\n",
              " 'BOE REAL ESTATE',\n",
              " 'L.A.BELL MOTOR LINES INC.',\n",
              " 'CAPGEMINI AMERICA',\n",
              " 'ORION ENGINEERING CONSTRUCTION',\n",
              " 'GOSHEN FAMILY PHYSICIANS',\n",
              " 'ORANGE VILLAGE',\n",
              " 'SO TEX EXTERM',\n",
              " 'AIR PRODUCTS',\n",
              " 'MEDICAL GROUP',\n",
              " 'BOSTON CAPITAL',\n",
              " 'FOX NEWS NETWORK  LLC',\n",
              " 'LSPM',\n",
              " 'SUPERMICRO COMPUTER INC',\n",
              " 'REDD REALTY',\n",
              " 'CUMMINS INC.',\n",
              " 'CAREY PERKINS',\n",
              " 'RHAMILTON CONSULTING',\n",
              " 'UCS',\n",
              " 'SAINT MARYS COUNTY PUBLIC SCHOOLS',\n",
              " 'NYSOMS',\n",
              " 'CODESTREAM INC.',\n",
              " 'CONNER MKTG  SALES',\n",
              " 'BURGERBUSTERS INC',\n",
              " 'NEUROCRINE',\n",
              " 'FIRST AMERICAN',\n",
              " 'DURDEN  CONSTRUCTION',\n",
              " 'TRUCKERS INSURANCE ASSOCIATES, INC.',\n",
              " 'YOUNG SOMMER',\n",
              " 'BERNARDS APPRAISAL ASSCOCIATES',\n",
              " 'C.L. BARNHOUSE CO.',\n",
              " 'FIVES MACHINING SYSTEMS',\n",
              " 'RDO',\n",
              " 'NYEMASTER GOODE PC',\n",
              " 'UNVERSITY OF COLORADO BOULDER',\n",
              " 'JIM DOYLE & ASSOCIATES',\n",
              " 'POLING & CUTLER',\n",
              " 'URIST FINANCIAL AND RETIREMENT PLANNIN',\n",
              " 'COUNCIL FOR RESPONSIBLE NUTRIT',\n",
              " 'USD 289',\n",
              " 'ICANN',\n",
              " 'VAPOTHERM',\n",
              " 'SMITHFIELD FOODS',\n",
              " 'CROCKETT PROPERTIES',\n",
              " 'CELEBRATION CHURCH',\n",
              " 'COASTAL RESOURCES',\n",
              " 'PALM BEACH COUNTY FIRE RESCUE',\n",
              " 'TEK SYSTEMS',\n",
              " 'WABASH VALLEY POWER ASSN., INC.',\n",
              " 'KAIFER INS',\n",
              " 'CENTRA',\n",
              " 'PBS MENTAL HEALTH ASSOCIATES',\n",
              " 'FYZICAL',\n",
              " 'META HOUSING CORPORATION',\n",
              " 'FLATIRON WORKS',\n",
              " 'CENTER FOR ECONOMIC DEVELOPMENT LAW',\n",
              " 'OMAHA PUBLIC SCHOOL',\n",
              " 'CONSTELLATION',\n",
              " 'WESTERRA CREDIT UNION',\n",
              " 'BREYMAN PROPERTIES',\n",
              " 'XXX',\n",
              " 'HMHP',\n",
              " 'MARY KAY INC',\n",
              " 'THE STANDARD',\n",
              " 'U OF UTAH HEALTH HOSPITALS AND CLINICS',\n",
              " 'TAKEDA PHARMACEUTICALS U.S.A. INC.',\n",
              " 'MCDERMOTT WILL & EMERY',\n",
              " 'AYA HEALTHCARE',\n",
              " 'GRAMBLING STATE UNIVERSITY',\n",
              " 'DUKE CUSTOM FABRICATION',\n",
              " 'TETRATECH',\n",
              " 'DAI',\n",
              " 'AVIANDS',\n",
              " 'FIDES LLC',\n",
              " 'EDUCATION FIRST FCU',\n",
              " 'CEM',\n",
              " 'BHG RAND REALTY',\n",
              " 'COMPOSITE & CASTING SUPPLY INC',\n",
              " 'DESIGN VITTORPIA LLC',\n",
              " 'MAC ARTHUR FOUNDATIO',\n",
              " 'LA CANADA WEST',\n",
              " 'BARJAC INC',\n",
              " 'MORRIS DEV',\n",
              " 'BROOKS, WILBURN, & LOGAN CO',\n",
              " 'SALVATION ARMY AND',\n",
              " 'BRAUN & BRAUN',\n",
              " 'BUCHER  CHRISTIAN',\n",
              " 'VERITIV CORP',\n",
              " 'NANSEMOND PRE-CAST',\n",
              " 'JORDAN SCHOOL DISTRICT',\n",
              " 'CENTERSTAGE PRODUCTIONS',\n",
              " 'BTCO, INC.',\n",
              " 'SALEM CLINIC',\n",
              " 'RBC WEALTH MANAGEMENT',\n",
              " 'EMMANUEL MEDICAL',\n",
              " 'COMMUNITY GROUP INC',\n",
              " 'FINANCIAL BROKERAGE',\n",
              " 'SWISHER INTERNATIONAL, INC.',\n",
              " 'OPSALESINC',\n",
              " 'EXELTECH CONSULTING INC',\n",
              " 'OHIO CONFERENCE OF COMMUNITY DEVELOPME',\n",
              " 'THE CHAPIN SCHOOL',\n",
              " 'PHILLIP SAN SEBASTIAN',\n",
              " 'STATE OF VERMONT',\n",
              " 'RICK HAMM CONSTRUCTION',\n",
              " 'TIMMONS SHEET METAL',\n",
              " 'TVHO',\n",
              " 'UNITED TEACHERS LOS ANGELES',\n",
              " 'ST JOSEPH',\n",
              " 'WORCESTER PUBLIC SCHOOLS',\n",
              " 'LORDS VALLEY SELF STORAGE',\n",
              " 'FPN',\n",
              " 'MOUNT SINAI HOSPITAL MANHATTAN',\n",
              " 'KIDS DEVELOPMENTAL THERAPY',\n",
              " 'VETERANS AFFAIRS',\n",
              " \"MY FRIEND'S PLACE\",\n",
              " 'PAINT WIZARDS INC.',\n",
              " 'EDG CONSULTING ENGINEERS',\n",
              " 'FINISH KARE PRODUCTS',\n",
              " 'E-DEVELOPMENT INTERNATIONAL',\n",
              " 'JAMES F STEARNS CO',\n",
              " 'NUMERIX',\n",
              " 'PARK NICOLLET CLINIC',\n",
              " 'TUSCOLA ISD',\n",
              " 'INDEPENDENT REPAIR',\n",
              " 'KUMIN INSURANCE GROUP',\n",
              " 'COGHLAN CROWSON LLP',\n",
              " 'PASSAGE TO INDIA',\n",
              " 'PAWNEE HEALTH AND WELLNESS',\n",
              " 'M L BERGER & CO.',\n",
              " 'HP PRODUCTIONS',\n",
              " 'STRIBLING',\n",
              " 'ROBSON COMMUNITES',\n",
              " 'BANKERS FINANCIAL CORP',\n",
              " 'PEGASYSTEMS',\n",
              " 'AZ STAGE SOUND  LIGHTS',\n",
              " 'LAW OFFICE OF DALE WAGNER',\n",
              " 'BRAYTON PURCELL LLP',\n",
              " 'NATIVEENERGY',\n",
              " 'FULTON COUNTY',\n",
              " 'ENCORE',\n",
              " 'ROOFEX',\n",
              " 'GCEI',\n",
              " 'NEW YORK CITY POLICEPENSION FUND',\n",
              " 'AT&T CORP.',\n",
              " 'KIPP DC',\n",
              " 'PARKER REALTY & ASSOCIATES',\n",
              " 'AMA CONSULTING ENGINEERS',\n",
              " 'SCORP GROUP INC.',\n",
              " 'VILLAGE SUPERMARKETS DBA SHOPRITE',\n",
              " 'GREG COLEMAN LAW PC',\n",
              " 'SALESFORCE',\n",
              " 'RAPID CPAP LLC',\n",
              " 'ARTIST',\n",
              " \"READ N' POST\",\n",
              " 'MONIMEL CORP',\n",
              " 'ORANGE COUNTY COMMUNITY COLLEGE',\n",
              " 'C MYERS CORP',\n",
              " 'LIGHTNING ORCHARD',\n",
              " 'CUNNINGHAM JEWELERS',\n",
              " 'FRANKLIN MUTUAL INSURANCE COMPANY',\n",
              " 'PCSD',\n",
              " 'DOCTOR',\n",
              " 'CDFW',\n",
              " \"ST. DUNSTAN'S ANGLICAN CHURCH\",\n",
              " 'ACME SUPERMARKET',\n",
              " 'MENARDS',\n",
              " 'CLAREMONT',\n",
              " 'LAWSON, DAVIS, PICKREN & SEYDEL',\n",
              " 'CHRISTIAN LEADERS INSTITUTE',\n",
              " \"SJOERD'S PRO TOOLS\",\n",
              " 'WHITE HILL CHURCH OF BRETHREN',\n",
              " 'BURNS MCDONNELL ENGINEERING COMPANY I',\n",
              " 'MATANKY',\n",
              " 'WOMBLE BOND DICKINSON (US) LLP',\n",
              " 'LUIMAN REAL ESTATE INC',\n",
              " 'HERZOG TECHNOLOGIES, INC.',\n",
              " 'PHILIPS HEALTH SYSTEMS',\n",
              " 'BENDER ENGINEERING',\n",
              " 'MEV',\n",
              " 'FOX VALLEY IMAGING',\n",
              " 'METROPOLITAN BAPTIST CHURCH',\n",
              " 'ROSEMOUNT CENTER',\n",
              " 'GREATER LAWRENCE TECH SCHOOL',\n",
              " 'RE/MAX REALTY ASSOCIATES-CHA',\n",
              " 'MORRISON FOERSTER',\n",
              " 'THE CARLYLE GROUP INC.',\n",
              " 'SENATOR LEW FREDERICK',\n",
              " 'HUNGRY PLANET INTELLIGENCE',\n",
              " 'MULLIGAN SECURITY COMPANY',\n",
              " 'SNC-LAVALIN',\n",
              " 'BSC',\n",
              " 'PRA',\n",
              " 'CLEAN WATER OF VA',\n",
              " 'ASA STAFFING',\n",
              " 'M/E ENGINEERING',\n",
              " 'SERVICE EMPLOYEES INTERNATIONAL UNION',\n",
              " 'PRECISIONEFFECT',\n",
              " 'SEAWORLD CALIFORNIA',\n",
              " 'AFSCME CA LOC 3299',\n",
              " 'WILDWOOD',\n",
              " 'GE PLASTICS',\n",
              " 'US TRANSPORTATION',\n",
              " 'MONTEFIORE MEDICAL CENTER',\n",
              " 'PCG',\n",
              " 'CTS',\n",
              " 'CEDAR FALLS COMM SCHOOLS',\n",
              " 'MERCANTILE BANK',\n",
              " 'THE POKEMON COMPANY INTERNATIONAL',\n",
              " 'FIFTH STREET RENAISSANCE',\n",
              " 'METROPOLITAN NASHVILLE BD OF ED',\n",
              " 'SPRINGETTSBURY TOWNSHIP',\n",
              " 'GETTYSBURG COLLEGE',\n",
              " 'SSES',\n",
              " 'CONTINENTAL AUTOMOTIVE',\n",
              " 'AMERICAN INSTITUTES FOR REASEARCH',\n",
              " 'DEER VALLEY RESORT',\n",
              " 'CARGILLE-SACHER LABS, INC.',\n",
              " 'JP MORGAN',\n",
              " 'CARDIOVASCULAR',\n",
              " 'PERFORMANCE SYSTEMS',\n",
              " 'KLD',\n",
              " 'FLORIDA',\n",
              " 'THE ARLINGTON SLEEP DISORDER CENTER',\n",
              " 'DE WINNE CONSTRUCTION',\n",
              " 'CBRE, INC',\n",
              " 'FISHER PHILLIPS',\n",
              " 'IC MANAGE',\n",
              " 'DELANEY CORPORATE SERVICES',\n",
              " 'HOMESTEAD INC',\n",
              " 'KERING',\n",
              " 'ONEOK FIELD SERVICES COMPANY',\n",
              " 'COWLES PARKWAY FORD, INC.',\n",
              " 'GIM CAPITAL MANAGEMENT',\n",
              " 'STANFORD MEDICAL GROUP',\n",
              " 'KILLIAN &DONOHUE',\n",
              " 'JENSEN TRAVELON',\n",
              " 'WMLM',\n",
              " 'MATTESON MARINE SEV',\n",
              " 'CRAFT COFFEE',\n",
              " 'INSTANT CARE',\n",
              " 'NOT IN WORKFORCE',\n",
              " 'HIGH-MARK SYSTEMS',\n",
              " 'TRINSEO LLC',\n",
              " 'HOYT ARCHITECTS',\n",
              " 'TIVERITY CONSULTING',\n",
              " 'LED SUPPLY',\n",
              " 'MELINDA MOTLAGH',\n",
              " 'CALIFORNIA STATE UNIVERSITY LA',\n",
              " 'UNC CHAPEL HILL',\n",
              " 'CMC CONSTRUCTION',\n",
              " 'G M NORTHRUP CORP',\n",
              " 'GROW MARKETING',\n",
              " 'SWISSRAY CUSTOMER CARE LLC',\n",
              " 'GREECE CENTRAL SCHOOL DISTRICT',\n",
              " 'BEVERLY-HANKS & ASSOCIATES',\n",
              " 'ASG REAL ESTATE CO.',\n",
              " 'BACK TO THE PAST',\n",
              " 'CHARLOTTE MECKLENBURG SCHOOLS',\n",
              " \"CONNOLLY'S TOWING INC\",\n",
              " 'UNIVERSITY OF PITTSBURGH SCHOOL OF MED',\n",
              " 'DOCTORS MAKING HOUSECALLS',\n",
              " 'MINITAB',\n",
              " 'HDR ARCHITECTURE INC.',\n",
              " 'NAR',\n",
              " 'THE MONEY STORE',\n",
              " 'LAMAR STATE COLLEGE - PORT ARTHUR',\n",
              " 'GGUSD',\n",
              " 'SHERATON UNIVERSAL HOTEL',\n",
              " 'STACY AND BAKER LAW',\n",
              " 'GJAC',\n",
              " 'LOBIS TECHNOLOGY CONSULTANTS LLC',\n",
              " 'ACCRUENT',\n",
              " 'CANCIO NADAL & RIVERA LLC',\n",
              " 'OLD VINE MANAGEMENT GROUP',\n",
              " 'NATIONAL PATIENT ADVOCATE FOUNDATION',\n",
              " 'GARNET VALLEY SCHOOL DISTRICT',\n",
              " 'GUARANTEE INS AGCY',\n",
              " 'TRINITY CONSULTANTS',\n",
              " 'COOK COUNTY OF IL',\n",
              " 'AONL',\n",
              " 'NOSSAMAN LLP',\n",
              " 'BREAD FOR THE WORLD',\n",
              " 'FNC',\n",
              " 'NORTH SHORE SENIOR CENTER',\n",
              " \"HAY'S\",\n",
              " 'SELF ORIGINAL ARTISTS NYC',\n",
              " 'POWER SUPPLY',\n",
              " 'WIDGEON MGT CORP',\n",
              " 'RADIAN GUARANTY INC.',\n",
              " 'JENISON PUBLIC SCHOOLS',\n",
              " 'A PITTSBURGH PLUMBER LLC',\n",
              " 'PENASQUITOS PET CLINIC',\n",
              " 'NEA FED. GOVT. AGENCY',\n",
              " 'MA LEAGUE OF CHCS',\n",
              " 'STATE FARM INS.',\n",
              " 'KANYEZI AFRICA SAFARI',\n",
              " 'UFCW LOCAL NO. 328',\n",
              " 'ABLE ELECTRICAL SVC.',\n",
              " 'KAREN G BINDER',\n",
              " 'VALLEY EMERGENCY CARE',\n",
              " 'SUMMIT REHAB UPMC',\n",
              " 'THE FLORIDA AQUARIUM',\n",
              " 'BRUCE LEE',\n",
              " 'SOUND COMMUNITY SOLUTIONS',\n",
              " 'FOOD SCIENCES CORP.',\n",
              " 'JOHN MORRELL & COMPANY',\n",
              " 'UN ENVIRONMENT PROGRAMME',\n",
              " 'JJ MARQUIS ELECTRIC',\n",
              " 'COMMUNITIES ACTIVELY LIVING INDEPENDEN',\n",
              " 'USONIAN REALTY',\n",
              " 'ZUMIEZ',\n",
              " 'ROYAL FLEX CIRCUITS',\n",
              " 'COMMERCEHUB',\n",
              " 'GENESIS MEDICAL CENTR',\n",
              " \"CHILDREN'S HOSPITAL BOSTON\",\n",
              " 'INDATA CORPORATION',\n",
              " 'EPIC LLC',\n",
              " 'AUDERE PARTNERS',\n",
              " 'CLARK CONSTRUCTION',\n",
              " 'RJH SCIENTIFIC INC',\n",
              " 'TBWBHL',\n",
              " 'MUNGER TOLLES & OLSON',\n",
              " 'HERE',\n",
              " 'SAP NATIONAL SECURITY SER',\n",
              " 'FORTINET',\n",
              " 'CATHERINE WILCOX DDS',\n",
              " 'HEMCON MEDICAL TECHNOLOGIES INC',\n",
              " 'RAYA RADIOLOGY',\n",
              " 'BROWNSTEIN HYATT FARBER SCHRECK',\n",
              " 'BLRG',\n",
              " 'BASD',\n",
              " 'PARIS BRIDGE ACADEMY',\n",
              " 'HOME FURNITURE',\n",
              " 'JDS&A ADVISORS',\n",
              " 'NVI',\n",
              " 'DISNEY ANIMATION STUDIOS',\n",
              " 'TELLIGENT MASONRY LLC',\n",
              " 'REI',\n",
              " 'HOLLYWOOD CASINO',\n",
              " 'SAPPHIRE COMPUTERS INC.',\n",
              " 'SEABULK TANKERS, INC.',\n",
              " 'TAURIAINEN ENGINEERING',\n",
              " 'SIMPLYEZ HDM LLC',\n",
              " 'LAFAYETTE GENERAL HEALTH',\n",
              " 'WELLTOWER, INC.',\n",
              " 'KIRKLAND AND ELLIS',\n",
              " \"CABELA'S INC.\",\n",
              " 'VJSTURDIVANTINC',\n",
              " 'GARDEN CITY SCHOOLS DIST',\n",
              " 'SEARIVER MARITIME INC',\n",
              " 'MADISON FIRE DEPT.',\n",
              " 'POWERS MUSIC SCHOOL',\n",
              " 'LA MESA SPRING VALLEY SCHOOLS',\n",
              " 'BHHS REAL ESTATE',\n",
              " 'NEW ENGLAND GRANITE MARBLE',\n",
              " 'CAPROCK DAIRY',\n",
              " 'RACHIO',\n",
              " 'MCPHEE PLUMBING',\n",
              " 'TERRE HAUTE HEART CENTER',\n",
              " 'MORENO',\n",
              " 'SENTINELONE',\n",
              " 'BERRY PLASTICS',\n",
              " 'COSTAL CONNECTION',\n",
              " 'GLOBAL VILLAGE ACADEMY',\n",
              " 'MARK WINKLER',\n",
              " 'PENN STATE UNIVERSITY',\n",
              " 'COLUMBIA MUTUAL INSURANCE COMPANY',\n",
              " 'BOB BARKER',\n",
              " 'HATCHERY PLANNING',\n",
              " 'UMECO',\n",
              " 'COMMERCIAL DEVELOPER',\n",
              " 'TRUCK-TECH',\n",
              " 'NEDERLANDER ORGANIZATION',\n",
              " 'MURRAY & MURRAY',\n",
              " 'WJW ARCHITECTS',\n",
              " 'HOLMES MURPHY',\n",
              " 'PEOPLE READY',\n",
              " 'COLUMBUS STATE UNIVERSITY',\n",
              " 'CARAHSOFT',\n",
              " \"FEDERAL GOV'T\",\n",
              " 'REIW CONSULTING LLC',\n",
              " 'I&CO',\n",
              " 'CHURCH OF. HRIST',\n",
              " 'OCCUCARE INTERNATIONAL',\n",
              " 'BP AMERICA',\n",
              " 'TEMPEST CAPITAL LTD',\n",
              " 'WEST LAFAYETTE COM SCHOOL CORP',\n",
              " 'ALEXANDRIA REAL ESTATE',\n",
              " 'JHU/APPLIED PHYSICS LAB.',\n",
              " 'DESERT HOUSE OF PRAYER',\n",
              " 'UPDEGRAFF CLINIC',\n",
              " 'SHANTI POOLS LLC',\n",
              " 'MPL',\n",
              " 'GLENN MITCHELL INSURANCE',\n",
              " 'SWITCHBACK TRAVEL LLC',\n",
              " 'PUBLIC EDUCATION',\n",
              " 'SELFEMPLOMENT',\n",
              " 'BCD MEETINGS & EVENTS',\n",
              " 'COLUMBIA PRESBYTERIAN HOSPITAL',\n",
              " 'SANOFI PASTEUR',\n",
              " 'KOPPEL AND SCHER',\n",
              " 'APEX-STUDIO SUAREZ',\n",
              " 'DEPT OF THE AIR FORCE',\n",
              " 'DURANGO',\n",
              " 'IRAD SERVICES LLC',\n",
              " 'WINGATE AT WEST SPRINGFIELD',\n",
              " 'LWV-DENVER',\n",
              " 'DOSS REALTY GROUP',\n",
              " 'CAPSTAR ADVISORS',\n",
              " 'SCHOOL CITY OF HAMMOND',\n",
              " 'NORBORD',\n",
              " 'FAMILY HERITAGE',\n",
              " 'TRACTOR SUPPLY OMPANY',\n",
              " 'JAMESTOWN ASSOCIATES',\n",
              " 'PIEDMONT TRIAD ANESTHESIA, PA',\n",
              " 'LIONS SHARE FCU',\n",
              " 'LOCKARD, INC.',\n",
              " 'GREENFIELD POWER EQUIPMENT, INC.',\n",
              " 'LOCHEED MARTIN',\n",
              " 'NUCOR STEEL AUBURN, INC.',\n",
              " 'SLMC',\n",
              " 'HANES INC.',\n",
              " 'OHHP',\n",
              " 'LANCASTER GENERAL HE',\n",
              " 'TELEPHONICS SYSTEMS ENGINEERING GROUP',\n",
              " 'INTEGRA',\n",
              " 'RESMED',\n",
              " 'DISCOVERY INSTITUTE',\n",
              " 'STOCKHOLM UNIVERSITY',\n",
              " 'CENTURY 21 MEYER',\n",
              " 'JACKSONLEWIS(PARTNER)',\n",
              " 'US DOT',\n",
              " 'WOODS PRECISION PRODUCTS',\n",
              " 'ENGINEWORLD LLC',\n",
              " 'THE KIRLIN COMPANY',\n",
              " 'W.A. HYNES & CO.',\n",
              " 'MORRISON & FOERSTER, LLP',\n",
              " 'SYMMETROCM',\n",
              " 'AUBURN HOUSING AUTHORITY',\n",
              " 'CALPINE',\n",
              " 'TOTAL E&P USA',\n",
              " 'ECONOMIC POLICY INSTITUTE',\n",
              " 'NEVADA STATE MUSEUM',\n",
              " 'HUNTER COLLGE',\n",
              " 'CITY OF HOUSTON',\n",
              " 'COLORADO CARE ASSISTANCE',\n",
              " 'BEAUREGARD ELECTRIC CO-OP, INC.',\n",
              " 'DF LEVIN ASSOCIATES',\n",
              " 'SOCIAL CAPITAL GROUP LLC',\n",
              " 'MRS.',\n",
              " 'WIT CREEK PARTNERS',\n",
              " 'SHONDALAND',\n",
              " 'NETSAGE',\n",
              " 'BGR, INC.',\n",
              " 'VERIZON CORP',\n",
              " 'FRIENDS SCHOOL OF BALTIMORE',\n",
              " 'TAYLOR CORPORATION',\n",
              " 'KAMIN IND',\n",
              " 'PROVIDENCE ANESTHESIOLOGY ASSOCIATES,',\n",
              " 'TTUHSC',\n",
              " 'VERRILL DANA, LLP',\n",
              " 'EL CAMINO COLLEGE',\n",
              " 'METROMILE',\n",
              " 'ROPER AND ROPER',\n",
              " 'IGLER/PEARLMAN PA',\n",
              " 'PROQUEST',\n",
              " 'MIRAGE FINE FOODS, INC.',\n",
              " 'AMSTED INTERNATIONAL',\n",
              " 'SOUTHWEST FAMILY GUIDANCE CENTER',\n",
              " 'CITIZENS MEDICAL CENTER',\n",
              " 'FRESNO STATE',\n",
              " \"ST. MARY'S UNIVERSITY\",\n",
              " 'BLUE HERON WELLNESS',\n",
              " 'RINGCENTRAL',\n",
              " 'RUST COLLEGE',\n",
              " 'NEXTEER',\n",
              " 'VOL STATE CC',\n",
              " 'PEOPLES GROUP SELF-EMPLOYED',\n",
              " 'RIA',\n",
              " 'VIMAR',\n",
              " 'GREATSCAPES',\n",
              " 'DAILY JOURNAL',\n",
              " 'GOULD KILLIAN CPA GROUP',\n",
              " 'FREDRICK MANAGEMENT',\n",
              " 'STRONGHOLD',\n",
              " 'GENISIS HEALTHCARE',\n",
              " 'DEMOCRATIC INTELLIGENCE',\n",
              " 'STADIUM TOYOTA',\n",
              " 'LB CONSOLIDATED',\n",
              " 'THE STATE BANK OF FARIBAULT',\n",
              " 'U.S. AGENCY FOR INTERNATIONAL DEVELOPM',\n",
              " 'COOK COUNTY',\n",
              " 'SPARKS WILLSON PC',\n",
              " 'GDK CONSTRUCTION',\n",
              " 'US GOVT ACCOUNTABILITY OFFICE',\n",
              " 'CENTRA INC.',\n",
              " 'LAWWA',\n",
              " 'VERITE',\n",
              " 'MOLZEN CORBIN',\n",
              " '831 DON CUBERO AVE',\n",
              " 'IOWA TALENTED AND GIFTED ASSOCIATION',\n",
              " 'THE ROADRUNNER PRESS',\n",
              " 'ACME GLASS AND MIRROR',\n",
              " 'HABITAT AMERICA',\n",
              " 'POWERS LAW',\n",
              " 'EXPRESSO',\n",
              " 'CSU SAN MARCOS',\n",
              " 'BWXT',\n",
              " 'PREMIER RADIOLOGY',\n",
              " 'WA STATE NURSES ASSOCIATION',\n",
              " 'TOURISM ASSN.',\n",
              " 'EKLHEALTH LLC',\n",
              " 'RODAN+FIELDS',\n",
              " 'UFCW LOCAL NO. 876',\n",
              " 'FRIENDSHIP ACRES PARK INC',\n",
              " 'MOORE AND VAN ALLEN PLLC',\n",
              " 'SAGE V FOODS',\n",
              " 'DR. SUE CAREY PLLC',\n",
              " 'KELLY AUTOMOTIVE GROUP',\n",
              " 'EDX',\n",
              " 'AMHS',\n",
              " 'ESI TOTAL FUEL MANAGEMENT',\n",
              " 'PRIZE LOGIC',\n",
              " 'WINSTEAD PC',\n",
              " 'MEDSTAR GEORGETOWN UNIVERSITY HOSPITAL',\n",
              " 'COMCAST CORPORATION',\n",
              " 'MOSES & SINGER LLP',\n",
              " 'SANDHILLS COMMUNITY COLLEGE',\n",
              " 'MILLIKEN',\n",
              " 'VA DCR',\n",
              " 'GWATNEY CHEVROLET',\n",
              " 'ORTHOPEDIC SPINE THERAPY',\n",
              " 'BERING STRAITS NATIVE CORPORATION',\n",
              " 'SIKORSKY',\n",
              " 'GREVE FOUNDATION',\n",
              " 'SOLIC',\n",
              " 'LUKE',\n",
              " 'CH ROBINSON',\n",
              " 'UCDAVIS CANCER CENTER',\n",
              " 'JAMS INC.',\n",
              " 'MCCOOL FARM AND CATTLE',\n",
              " 'VASSAR ELECTRIC INC',\n",
              " 'NWP',\n",
              " 'COTRONICS CORPORATION',\n",
              " 'MOVEMENT FOR LIFE',\n",
              " 'GILBERT CONSTRUCTION',\n",
              " 'MOUNT VERNON CITY SD',\n",
              " 'CAS',\n",
              " 'NSWCLA',\n",
              " 'CATHOLIC DIOCESE OF ROCKFORD',\n",
              " 'NAP ENGINEERS',\n",
              " 'DIRECT MARKETING CONCEPTS, INC.',\n",
              " 'FMCSA',\n",
              " 'SCIENTIAE LLC',\n",
              " 'MODA HEALTH',\n",
              " 'FLORIDA HIGH SCHOOL ATHLETIC ASSOC.',\n",
              " 'SUITECX',\n",
              " 'EVANS LAW FIRM, INC.',\n",
              " 'COMMUNITY LEGAL AID SERVICES',\n",
              " 'TONI SHERMAN INTERIORS LLC',\n",
              " 'AGS CONSTRUCTION',\n",
              " 'CCRMC',\n",
              " 'MOLINA HEALTHCARE OF FL',\n",
              " 'REPEAT CONSULTANTS',\n",
              " 'EWL INC.',\n",
              " 'WILMERHALE',\n",
              " 'TOWNSEND REAL ESTATE',\n",
              " 'CENTINEL FINANCIAL GROUP',\n",
              " 'AZARA LLC',\n",
              " 'GEORGETOWN UNIVERSITY LAW CENTER',\n",
              " 'CROSSROADS ANESTHESIAP',\n",
              " 'AMPLITY HEALTH',\n",
              " 'IMAGE ONE CORP',\n",
              " 'TRADELINK LLC',\n",
              " 'GIBBON PUBLIC SCHOOLS',\n",
              " 'MERCY FAMILY CENTER',\n",
              " 'SILVERSAND SERVICES',\n",
              " 'CITY OF CHESAPEAKE',\n",
              " 'HOWMET AEROSPACE INC.',\n",
              " 'SOUTHERN TRUCK AND EQUIPMENT',\n",
              " 'UNIVERSITY OF TEXAS MEDICAL BRANCH AT',\n",
              " 'KORDICH CONSTRUCTION',\n",
              " 'ALTSHULER BERZON LLP',\n",
              " 'SUNTRUST ROBINSON HUMPHREY INC.',\n",
              " 'AHRENS COMPANIES',\n",
              " 'HAL SYSTEMS CORP',\n",
              " 'PACIFIC RIM CAPITAL, INC.',\n",
              " 'APF',\n",
              " 'PREMIER ASSET MGMT., INC.',\n",
              " 'TEAMSTERS LOCAL UNION 191',\n",
              " 'ADLER GIERSCH',\n",
              " 'SGF',\n",
              " 'MICHIGAN STATE UNIVERSITY',\n",
              " 'ALLIED BARTON',\n",
              " 'RAINFOCUS',\n",
              " 'D.U.E BRANDS',\n",
              " 'WHEATON COLLEGE NORTON MA',\n",
              " 'GBP CONTRACTING',\n",
              " 'MOORE PUBLIC SCHOOLS',\n",
              " 'AIR TRANSPORT ASSOCIATION, INC',\n",
              " 'SSCI',\n",
              " 'THE RUSSELL GROU UNITED LLC',\n",
              " 'SONOMA COUNTY REGIONAL PARKS FOUNDATIO',\n",
              " 'FTLF',\n",
              " 'DOOR 2 DOOR INCOME INC',\n",
              " 'PROFESSIONAL LOSS ADJUSTERS INC.',\n",
              " 'AMERICAN IRON & ALLOYS',\n",
              " 'LASHLY & BAER P.C.',\n",
              " 'UNIVERSITY OF CALIFORNIA, LA',\n",
              " 'PEARL PROPERTIES',\n",
              " 'MID MICH INS',\n",
              " 'BURROW JAN',\n",
              " 'PATRICIA FLORES',\n",
              " 'WARNER BROTHERS TELEVISION',\n",
              " 'MASSACHUSETTS MUTUAL LIFE INSURANCE CO',\n",
              " 'AMERICAN FEDERATION OF TEACHERS',\n",
              " \"ST.PETER'S EPISCOPAL CHURCH\",\n",
              " 'RANDOLPH-BROOKS FCU',\n",
              " 'UNIVERSITY OF MASS',\n",
              " 'EQT CORP.',\n",
              " 'HAMILTON CITY SD',\n",
              " 'NAPER ENTERPRISES',\n",
              " 'NEW MEXICO ORTHOPAEDICS',\n",
              " 'BAYVIEW LOAN SERVICING',\n",
              " 'PICASSO TILE',\n",
              " 'TERRY ROBERTS CONSULTING INC',\n",
              " 'CONFLUENCE DISTRIBUTION INC.',\n",
              " 'HBSPECIALTY FOODS',\n",
              " 'AVONWORTH',\n",
              " 'ASSOCIATED UNIVERSIT',\n",
              " 'FAUSTOLLEAN',\n",
              " 'AVMED',\n",
              " 'EJME',\n",
              " 'SUPERIOR AIR GROUND AMBLANCE',\n",
              " 'UBER(RIDESHARE OPERATOR)',\n",
              " 'MAGIC TOUCH PAINTING',\n",
              " 'CITY OF PHOENIX',\n",
              " 'GRANDVIEW RADIOLOGY',\n",
              " 'LUNDEBERG SCHOOL OF SEAMANSHIP',\n",
              " 'SCHEEN&SMITH PSC',\n",
              " 'LIBERAIL KENWORK',\n",
              " 'DUKE ENERGY OHIO, INC.',\n",
              " 'CITY OF HUNTINGTON WOODS',\n",
              " 'SPIRIT PHARMACEUTICALS LLC',\n",
              " 'WILEY,WILSON, INC.',\n",
              " 'MOUNT SINAI WEST',\n",
              " 'THE METHODIST HOSPITAL',\n",
              " 'PRIMERA ENGINEERS',\n",
              " 'TOUR-SARKISSIAN LAW OFFICES LLP',\n",
              " 'SIBCY CLINE',\n",
              " 'C.G. REIN DEVELOPMENT CO.',\n",
              " 'LAKE TRUCKING CO.',\n",
              " 'POPE, HARDWICKE',\n",
              " 'AEROSPACE CORPORATION',\n",
              " 'INNOVATIVE THERAPY CONCEPTS INC.',\n",
              " 'ASSOC RADIOLOSISTS',\n",
              " 'RADIANT REFINING',\n",
              " 'CAMPO SANTO PRODUCTIONS LLC',\n",
              " 'KANSAS CITY BALLET',\n",
              " 'NATIONALITIES SERVICE CENTER',\n",
              " 'AIRSWIFT',\n",
              " 'NEW HARVEST MINISTRIES INC.',\n",
              " 'EASTCHESTER FIRE DISTRICT',\n",
              " 'THERMOSEAL',\n",
              " 'ADVANTEDGE',\n",
              " 'NC DEPT. OF PUBLIC SAFETY',\n",
              " 'ACCUSTAR',\n",
              " 'EXECUTIVE ENERGY MANAGEMENT, LLC',\n",
              " 'GPG',\n",
              " 'IMEX MEDIA',\n",
              " 'NTP',\n",
              " 'SP MANAGEMENT',\n",
              " 'BROWN CAPITAL MANAGEMENT',\n",
              " 'CIGNA DENTAL HEALTH, INC.',\n",
              " 'CSI COMPANIES',\n",
              " 'OHIO EQUITIES INC.',\n",
              " 'THE RUBY BRINK',\n",
              " 'MVWSD',\n",
              " 'HEALTH CARE SERVICE CORP',\n",
              " 'GREAT PLAINS TECHNOLOGY CENTER',\n",
              " 'NEW TEACHER CENTER',\n",
              " 'ANYTIME PLUMBING INC',\n",
              " 'CALVO ENTERPRISES',\n",
              " 'ARCHDIOCESE OF NEWARK',\n",
              " 'UNIVERSITY OF DELAWARE THEATRE DEPARTM',\n",
              " 'GREEN HASSON JANKS',\n",
              " 'OAKLEIGH LTD.',\n",
              " 'UNIVERSITY OF BRISTOL',\n",
              " 'POLSINELLI',\n",
              " 'CHRISTIAN WORSHIP CENTER',\n",
              " 'BILL BRAVO AUTOMOTIVE PORTRAITS',\n",
              " 'JOHN DEERE FINANCIAL',\n",
              " 'CONDOMINIUM MGMT SVCS',\n",
              " 'SALTCHUK',\n",
              " 'JUST FOR SHOW INC.',\n",
              " 'OXFORD UNIVERSITY PRESS',\n",
              " 'CHARLES J GARRISON',\n",
              " 'LAWRENCE MEMORIALS HOSPITAL',\n",
              " 'JACKSON HEALTHCARE',\n",
              " 'SIERRA PACIFIC',\n",
              " 'NEW SOUTH RIVER BAPTIST ASSO',\n",
              " 'UNIVERSITY OF MAINE',\n",
              " 'ALPHA ELECTRIC CO',\n",
              " 'KEYIMPACT',\n",
              " 'IL. DEPT OF HUMAN SERVICES',\n",
              " 'PANJIVA',\n",
              " 'FACIAL PLASTIC SURGERY ASSOCIATES',\n",
              " 'GREEN MOUNTAIN TREATMENT CENTER',\n",
              " 'CINTERRA GROUP',\n",
              " 'NIWCC',\n",
              " 'SOLTAGE LLC',\n",
              " 'PEPSI COLA',\n",
              " 'RLA NATIONAL REHABILITATION CENTER',\n",
              " 'CARE HAWAII',\n",
              " 'IVAN & DAUGUSTINIS',\n",
              " 'ALLIANCE RADIOLOGY',\n",
              " 'UNIV. OF CALIFORNIA',\n",
              " 'PARKVIEW COMMUNITY HOSPITAL',\n",
              " 'SPORTS LEICHT RESTORATIONS INC',\n",
              " 'NONE RETIRED',\n",
              " 'RENVYLE PARTNERS',\n",
              " 'PORT APARTMENTS',\n",
              " 'ECD',\n",
              " 'MO. DMH DD',\n",
              " 'DECAHEALTH',\n",
              " 'NESS INC',\n",
              " 'NJ DEPARTMENT OF HEALTH',\n",
              " 'AV INC.',\n",
              " 'ALLIED UNIVERSAL SECURITY SERVICES',\n",
              " 'ONI RISK PARTNERS',\n",
              " 'GROVEPORT MADISON',\n",
              " 'CAMBREX CHARLES CITY INC.',\n",
              " 'GILROY UNIFIED SCHOOL DISTRICT',\n",
              " 'MJUSD',\n",
              " 'ILCJA&TP',\n",
              " '4J ENERGY LLC',\n",
              " 'HIGHLINE MEDICAL CENTERE',\n",
              " 'SHIELD RESTRATINTS',\n",
              " '8 MILE FARM',\n",
              " 'R DIXON SPEAS ASSOCIATES, INC.',\n",
              " 'RED HOT AHIR',\n",
              " 'UNITY',\n",
              " 'EAST BATON ROUGE PARISH SCHOOL',\n",
              " 'GP',\n",
              " 'CHOATE HALL & STEWART',\n",
              " 'THE PATRIOT FINANCIAL GROUP, LLC',\n",
              " 'ANALYSIS GROUP',\n",
              " 'MORRIS JAMES LLP',\n",
              " 'MORRIS TEAM REALTY, LLC',\n",
              " 'MAYER BROWN LLP',\n",
              " \"LABORERS' LOCAL 225\",\n",
              " 'FREEDOM MOBILITY',\n",
              " 'DOCTORS FOR EMERGENCY SERVICES',\n",
              " 'CUSTOM VAULT CORP',\n",
              " 'XDSI',\n",
              " 'YCSD',\n",
              " 'REHAB WITHOUT WALLS',\n",
              " ...}"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 24
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "UTIiuGxW938o",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "7ef9a292-4ce5-4a57-c2c1-73d96de700e9"
      },
      "source": [
        "len(set(df_newdup['EMPLOYER']))"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "65420"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 13
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "BTZ4nqIiaA55",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 102
        },
        "outputId": "7bf205a2-b579-4044-b982-c41ec5790f23"
      },
      "source": [
        "df_newdup.dropna(subset = [\"EMPLOYER\"], inplace=True)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
            "A value is trying to be set on a copy of a slice from a DataFrame\n",
            "\n",
            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
            "  \"\"\"Entry point for launching an IPython kernel.\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "yUEct7Y5XyPf",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 979
        },
        "outputId": "b2bd54a6-52f1-43d2-cb48-71dd48bd9c81"
      },
      "source": [
        "df_aero = df_newdup[df_newdup['EMPLOYER'].str.contains('AEROSPACE CORPORATION')]\n",
        "df_aero"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>CMTE_ID</th>\n",
              "      <th>NAME</th>\n",
              "      <th>CITY</th>\n",
              "      <th>STATE</th>\n",
              "      <th>ZIP_CODE</th>\n",
              "      <th>EMPLOYER</th>\n",
              "      <th>OCCUPATION</th>\n",
              "      <th>TRANSACTION_DT</th>\n",
              "      <th>TRANSACTION_AMT</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>136824</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>DAVIS, LORRIE</td>\n",
              "      <td>LOS ANGELES</td>\n",
              "      <td>CA</td>\n",
              "      <td>900561529</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>SENIOR PROJECT ENGINEER</td>\n",
              "      <td>7112020</td>\n",
              "      <td>20</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>150573</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SIMPSON, MARK M</td>\n",
              "      <td>LONG BEACH</td>\n",
              "      <td>CA</td>\n",
              "      <td>908083812</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7242020</td>\n",
              "      <td>200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>150574</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SIMPSON, MARK M</td>\n",
              "      <td>LONG BEACH</td>\n",
              "      <td>CA</td>\n",
              "      <td>908083812</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7292020</td>\n",
              "      <td>200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>191693</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>YOUNG, KAROLYN</td>\n",
              "      <td>REDONDO BEACH</td>\n",
              "      <td>CA</td>\n",
              "      <td>9.02771e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7252020</td>\n",
              "      <td>250</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>201669</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>STUTTERHEIM, KENNETH B.</td>\n",
              "      <td>PASADENA</td>\n",
              "      <td>MD</td>\n",
              "      <td>2.11223e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEERING SPECIALIST</td>\n",
              "      <td>7162020</td>\n",
              "      <td>250</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>246960</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>JAGER, AMY</td>\n",
              "      <td>INDIAN HARBOUR BEACH</td>\n",
              "      <td>FL</td>\n",
              "      <td>329373526</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7272020</td>\n",
              "      <td>15</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>404391</th>\n",
              "      <td>C00193433</td>\n",
              "      <td>SIMPSON, MARK M. MR.</td>\n",
              "      <td>LONG BEACH</td>\n",
              "      <td>CA</td>\n",
              "      <td>90808</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7292020</td>\n",
              "      <td>200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>493316</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>FARAGO, ZOLTAN L. MR.</td>\n",
              "      <td>BROAD RUN</td>\n",
              "      <td>VA</td>\n",
              "      <td>2.01372e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>PROJECT ENGINEER</td>\n",
              "      <td>7232020</td>\n",
              "      <td>10</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>541906</th>\n",
              "      <td>C00000935</td>\n",
              "      <td>ALVAREZ, MANUEL</td>\n",
              "      <td>SAN PEDRO</td>\n",
              "      <td>CA</td>\n",
              "      <td>9.07311e+08</td>\n",
              "      <td>AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7232020</td>\n",
              "      <td>35</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>547077</th>\n",
              "      <td>C00000935</td>\n",
              "      <td>GUNAY, DEVIN</td>\n",
              "      <td>LOS ANGELES</td>\n",
              "      <td>CA</td>\n",
              "      <td>9.00347e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>SOFTWARE ENGINEER</td>\n",
              "      <td>7132020</td>\n",
              "      <td>40</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>585671</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>FARAGO, ZOLTAN L. MR.</td>\n",
              "      <td>BROAD RUN</td>\n",
              "      <td>VA</td>\n",
              "      <td>2.01372e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>PROJECT ENGINEER</td>\n",
              "      <td>7042020</td>\n",
              "      <td>25</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>625961</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>CINLEMIS, MICHELLE</td>\n",
              "      <td>COLORADO SPRINGS</td>\n",
              "      <td>CO</td>\n",
              "      <td>8.09114e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>SENIOR PROJECT LEADER</td>\n",
              "      <td>7302020</td>\n",
              "      <td>100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>627176</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>FARAGO, ZOLTAN L. MR.</td>\n",
              "      <td>BROAD RUN</td>\n",
              "      <td>VA</td>\n",
              "      <td>2.01372e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>PROJECT ENGINEER</td>\n",
              "      <td>7262020</td>\n",
              "      <td>10</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>920917</th>\n",
              "      <td>C00010603</td>\n",
              "      <td>MERRILL, ALBERT W</td>\n",
              "      <td>VENICE</td>\n",
              "      <td>CA</td>\n",
              "      <td>90291</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7262020</td>\n",
              "      <td>25</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>922676</th>\n",
              "      <td>C00010603</td>\n",
              "      <td>ESLINGER, SUELLEN</td>\n",
              "      <td>REDONDO BEACH</td>\n",
              "      <td>CA</td>\n",
              "      <td>9.02782e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7262020</td>\n",
              "      <td>300</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>934612</th>\n",
              "      <td>C00010603</td>\n",
              "      <td>MERRILL, ALBERT W</td>\n",
              "      <td>VENICE</td>\n",
              "      <td>CA</td>\n",
              "      <td>90291</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7312020</td>\n",
              "      <td>25</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>936453</th>\n",
              "      <td>C00010603</td>\n",
              "      <td>MERRILL, ALBERT W</td>\n",
              "      <td>VENICE</td>\n",
              "      <td>CA</td>\n",
              "      <td>90291</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7312020</td>\n",
              "      <td>25</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>938548</th>\n",
              "      <td>C00010603</td>\n",
              "      <td>MERRILL, ALBERT W</td>\n",
              "      <td>VENICE</td>\n",
              "      <td>CA</td>\n",
              "      <td>90291</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7262020</td>\n",
              "      <td>12</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>946916</th>\n",
              "      <td>C00010603</td>\n",
              "      <td>BYERS, MARK</td>\n",
              "      <td>SAN DIEGO</td>\n",
              "      <td>CA</td>\n",
              "      <td>9.21096e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7122020</td>\n",
              "      <td>65</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>947929</th>\n",
              "      <td>C00010603</td>\n",
              "      <td>FRICKS, KATHRYN</td>\n",
              "      <td>GREENBELT</td>\n",
              "      <td>MD</td>\n",
              "      <td>2.07704e+08</td>\n",
              "      <td>AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7312020</td>\n",
              "      <td>500</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>992812</th>\n",
              "      <td>C00484642</td>\n",
              "      <td>ALVAREZ, MANUEL</td>\n",
              "      <td>SAN PEDRO</td>\n",
              "      <td>CA</td>\n",
              "      <td>907311416</td>\n",
              "      <td>AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7242020</td>\n",
              "      <td>50</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1014099</th>\n",
              "      <td>C00484642</td>\n",
              "      <td>ALVAREZ, MANUEL</td>\n",
              "      <td>SAN PEDRO</td>\n",
              "      <td>CA</td>\n",
              "      <td>907311416</td>\n",
              "      <td>AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7152020</td>\n",
              "      <td>75</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1107728</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>CINLEMIS, MICHELLE</td>\n",
              "      <td>COLORADO SPRINGS</td>\n",
              "      <td>CO</td>\n",
              "      <td>809113801</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>SENIOR PROJECT LEADER</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1109284</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>CINLEMIS, MICHELLE</td>\n",
              "      <td>COLORADO SPRINGS</td>\n",
              "      <td>CO</td>\n",
              "      <td>809113801</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>SENIOR PROJECT LEADER</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1157904</th>\n",
              "      <td>C00003418</td>\n",
              "      <td>BAUER, SPENCER J. MR.</td>\n",
              "      <td>EL SEGUNDO</td>\n",
              "      <td>CA</td>\n",
              "      <td>902453728</td>\n",
              "      <td>AEROSPACE CORPORATION</td>\n",
              "      <td>DIRECTOR</td>\n",
              "      <td>6302020</td>\n",
              "      <td>50</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1213364</th>\n",
              "      <td>C00696526</td>\n",
              "      <td>HOLLANDER, SIDNEY</td>\n",
              "      <td>GLENDALE</td>\n",
              "      <td>AZ</td>\n",
              "      <td>853180038</td>\n",
              "      <td>AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7092020</td>\n",
              "      <td>250</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1254043</th>\n",
              "      <td>C00401224</td>\n",
              "      <td>WHITE, RUSSELL</td>\n",
              "      <td>FAIRFAX</td>\n",
              "      <td>VA</td>\n",
              "      <td>220305208</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>SCIENTIST</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1363622</th>\n",
              "      <td>C00126847</td>\n",
              "      <td>SMITH, DARLENE</td>\n",
              "      <td>CHARLESTOWN</td>\n",
              "      <td>RI</td>\n",
              "      <td>02813</td>\n",
              "      <td>KAMAN AEROSPACE CORPORATION</td>\n",
              "      <td>VP GM AIR VEHICLES</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1460070</th>\n",
              "      <td>C00694323</td>\n",
              "      <td>CINLEMIS, MICHELLE</td>\n",
              "      <td>COLORADO SPRINGS</td>\n",
              "      <td>CO</td>\n",
              "      <td>809113801</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>SENIOR PROJECT LEADER</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1507410</th>\n",
              "      <td>C00694323</td>\n",
              "      <td>CINLEMIS, MICHELLE</td>\n",
              "      <td>COLORADO SPRINGS</td>\n",
              "      <td>CO</td>\n",
              "      <td>809113801</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>SENIOR PROJECT LEADER</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "           CMTE_ID                     NAME  ... TRANSACTION_DT TRANSACTION_AMT\n",
              "136824   C00703975            DAVIS, LORRIE  ...        7112020              20\n",
              "150573   C00703975          SIMPSON, MARK M  ...        7242020             200\n",
              "150574   C00703975          SIMPSON, MARK M  ...        7292020             200\n",
              "191693   C00703975           YOUNG, KAROLYN  ...        7252020             250\n",
              "201669   C00703975  STUTTERHEIM, KENNETH B.  ...        7162020             250\n",
              "246960   C00703975               JAGER, AMY  ...        7272020              15\n",
              "404391   C00193433     SIMPSON, MARK M. MR.  ...        7292020             200\n",
              "493316   C00075820    FARAGO, ZOLTAN L. MR.  ...        7232020              10\n",
              "541906   C00000935          ALVAREZ, MANUEL  ...        7232020              35\n",
              "547077   C00000935             GUNAY, DEVIN  ...        7132020              40\n",
              "585671   C00075820    FARAGO, ZOLTAN L. MR.  ...        7042020              25\n",
              "625961   C00075820       CINLEMIS, MICHELLE  ...        7302020             100\n",
              "627176   C00075820    FARAGO, ZOLTAN L. MR.  ...        7262020              10\n",
              "920917   C00010603        MERRILL, ALBERT W  ...        7262020              25\n",
              "922676   C00010603        ESLINGER, SUELLEN  ...        7262020             300\n",
              "934612   C00010603        MERRILL, ALBERT W  ...        7312020              25\n",
              "936453   C00010603        MERRILL, ALBERT W  ...        7312020              25\n",
              "938548   C00010603        MERRILL, ALBERT W  ...        7262020              12\n",
              "946916   C00010603              BYERS, MARK  ...        7122020              65\n",
              "947929   C00010603          FRICKS, KATHRYN  ...        7312020             500\n",
              "992812   C00484642          ALVAREZ, MANUEL  ...        7242020              50\n",
              "1014099  C00484642          ALVAREZ, MANUEL  ...        7152020              75\n",
              "1107728  C00075820       CINLEMIS, MICHELLE  ...        6302020             100\n",
              "1109284  C00075820       CINLEMIS, MICHELLE  ...        6302020             100\n",
              "1157904  C00003418    BAUER, SPENCER J. MR.  ...        6302020              50\n",
              "1213364  C00696526        HOLLANDER, SIDNEY  ...        7092020             250\n",
              "1254043  C00401224           WHITE, RUSSELL  ...        6302020             100\n",
              "1363622  C00126847           SMITH, DARLENE  ...        6302020             100\n",
              "1460070  C00694323       CINLEMIS, MICHELLE  ...        6302020             100\n",
              "1507410  C00694323       CINLEMIS, MICHELLE  ...        6302020             100\n",
              "\n",
              "[30 rows x 9 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 34
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "n1Em1PItYDEH",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 758
        },
        "outputId": "6fc5d917-59ed-4b2b-8b0e-aebf68ebcfb9"
      },
      "source": [
        "df_aero_merge = pd.merge(df_bob, df_merge, on='CMTE_ID')\n",
        "df_aero_merge"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>CMTE_ID</th>\n",
              "      <th>NAME</th>\n",
              "      <th>CITY</th>\n",
              "      <th>STATE</th>\n",
              "      <th>ZIP_CODE</th>\n",
              "      <th>EMPLOYER</th>\n",
              "      <th>OCCUPATION</th>\n",
              "      <th>TRANSACTION_DT</th>\n",
              "      <th>TRANSACTION_AMT</th>\n",
              "      <th>CAND_ID</th>\n",
              "      <th>CAND_PTY_AFFILIATION</th>\n",
              "      <th>CAND_ELECTION_YR</th>\n",
              "      <th>FEC_ELECTION_YR</th>\n",
              "      <th>CMTE_TP</th>\n",
              "      <th>CMTE_DSGN</th>\n",
              "      <th>LINKAGE_ID</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>DAVIS, LORRIE</td>\n",
              "      <td>LOS ANGELES</td>\n",
              "      <td>CA</td>\n",
              "      <td>900561529</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>SENIOR PROJECT ENGINEER</td>\n",
              "      <td>7112020</td>\n",
              "      <td>20</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SIMPSON, MARK M</td>\n",
              "      <td>LONG BEACH</td>\n",
              "      <td>CA</td>\n",
              "      <td>908083812</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7242020</td>\n",
              "      <td>200</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SIMPSON, MARK M</td>\n",
              "      <td>LONG BEACH</td>\n",
              "      <td>CA</td>\n",
              "      <td>908083812</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7292020</td>\n",
              "      <td>200</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>YOUNG, KAROLYN</td>\n",
              "      <td>REDONDO BEACH</td>\n",
              "      <td>CA</td>\n",
              "      <td>9.02771e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7252020</td>\n",
              "      <td>250</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>STUTTERHEIM, KENNETH B.</td>\n",
              "      <td>PASADENA</td>\n",
              "      <td>MD</td>\n",
              "      <td>2.11223e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEERING SPECIALIST</td>\n",
              "      <td>7162020</td>\n",
              "      <td>250</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>JAGER, AMY</td>\n",
              "      <td>INDIAN HARBOUR BEACH</td>\n",
              "      <td>FL</td>\n",
              "      <td>329373526</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7272020</td>\n",
              "      <td>15</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>FARAGO, ZOLTAN L. MR.</td>\n",
              "      <td>BROAD RUN</td>\n",
              "      <td>VA</td>\n",
              "      <td>2.01372e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>PROJECT ENGINEER</td>\n",
              "      <td>7232020</td>\n",
              "      <td>10</td>\n",
              "      <td>H0NY27090</td>\n",
              "      <td>REP</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>Y</td>\n",
              "      <td>U</td>\n",
              "      <td>232064</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>FARAGO, ZOLTAN L. MR.</td>\n",
              "      <td>BROAD RUN</td>\n",
              "      <td>VA</td>\n",
              "      <td>2.01372e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>PROJECT ENGINEER</td>\n",
              "      <td>7042020</td>\n",
              "      <td>25</td>\n",
              "      <td>H0NY27090</td>\n",
              "      <td>REP</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>Y</td>\n",
              "      <td>U</td>\n",
              "      <td>232064</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>CINLEMIS, MICHELLE</td>\n",
              "      <td>COLORADO SPRINGS</td>\n",
              "      <td>CO</td>\n",
              "      <td>8.09114e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>SENIOR PROJECT LEADER</td>\n",
              "      <td>7302020</td>\n",
              "      <td>100</td>\n",
              "      <td>H0NY27090</td>\n",
              "      <td>REP</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>Y</td>\n",
              "      <td>U</td>\n",
              "      <td>232064</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>FARAGO, ZOLTAN L. MR.</td>\n",
              "      <td>BROAD RUN</td>\n",
              "      <td>VA</td>\n",
              "      <td>2.01372e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>PROJECT ENGINEER</td>\n",
              "      <td>7262020</td>\n",
              "      <td>10</td>\n",
              "      <td>H0NY27090</td>\n",
              "      <td>REP</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>Y</td>\n",
              "      <td>U</td>\n",
              "      <td>232064</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>10</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>CINLEMIS, MICHELLE</td>\n",
              "      <td>COLORADO SPRINGS</td>\n",
              "      <td>CO</td>\n",
              "      <td>809113801</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>SENIOR PROJECT LEADER</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "      <td>H0NY27090</td>\n",
              "      <td>REP</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>Y</td>\n",
              "      <td>U</td>\n",
              "      <td>232064</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>11</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>CINLEMIS, MICHELLE</td>\n",
              "      <td>COLORADO SPRINGS</td>\n",
              "      <td>CO</td>\n",
              "      <td>809113801</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>SENIOR PROJECT LEADER</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "      <td>H0NY27090</td>\n",
              "      <td>REP</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>Y</td>\n",
              "      <td>U</td>\n",
              "      <td>232064</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>12</th>\n",
              "      <td>C00696526</td>\n",
              "      <td>HOLLANDER, SIDNEY</td>\n",
              "      <td>GLENDALE</td>\n",
              "      <td>AZ</td>\n",
              "      <td>853180038</td>\n",
              "      <td>AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7092020</td>\n",
              "      <td>250</td>\n",
              "      <td>S0AZ00350</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>S</td>\n",
              "      <td>P</td>\n",
              "      <td>225862</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "      CMTE_ID                     NAME  ... CMTE_DSGN LINKAGE_ID\n",
              "0   C00703975            DAVIS, LORRIE  ...         P     227491\n",
              "1   C00703975          SIMPSON, MARK M  ...         P     227491\n",
              "2   C00703975          SIMPSON, MARK M  ...         P     227491\n",
              "3   C00703975           YOUNG, KAROLYN  ...         P     227491\n",
              "4   C00703975  STUTTERHEIM, KENNETH B.  ...         P     227491\n",
              "5   C00703975               JAGER, AMY  ...         P     227491\n",
              "6   C00075820    FARAGO, ZOLTAN L. MR.  ...         U     232064\n",
              "7   C00075820    FARAGO, ZOLTAN L. MR.  ...         U     232064\n",
              "8   C00075820       CINLEMIS, MICHELLE  ...         U     232064\n",
              "9   C00075820    FARAGO, ZOLTAN L. MR.  ...         U     232064\n",
              "10  C00075820       CINLEMIS, MICHELLE  ...         U     232064\n",
              "11  C00075820       CINLEMIS, MICHELLE  ...         U     232064\n",
              "12  C00696526        HOLLANDER, SIDNEY  ...         P     225862\n",
              "\n",
              "[13 rows x 16 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 35
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "4qQlV6tnlega"
      },
      "source": [
        "CD = ['CA-37', 'CA-47', 'CA-47', 'CA-33', 'MD-03', 'FL-08', 'VA-05', 'VA-05', 'CO-05', 'VA-05', 'CO-05', 'CO-05', 'AZ-07']"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "PPqAAylhx0mj"
      },
      "source": [
        "df_aero_merge['CD'] = CD "
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Ls7gegLCyafO"
      },
      "source": [
        "df_aero_merge = df_aero_merge.drop(columns=['CAND_ELECTION_YR', 'FEC_ELECTION_YR', 'CMTE_TP', 'CMTE_DSGN'])"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Sn-H_tkYzBcc",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 673
        },
        "outputId": "b9c7ea40-2dcf-4b44-a00b-f4b7caf32df4"
      },
      "source": [
        "df_aero_final = pd.merge(df_aero_merge,  \n",
        "                      trends,  \n",
        "                      on ='CD',  \n",
        "                      how ='inner') \n",
        "df_aero_final"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>CMTE_ID</th>\n",
              "      <th>NAME</th>\n",
              "      <th>CITY</th>\n",
              "      <th>STATE</th>\n",
              "      <th>ZIP_CODE</th>\n",
              "      <th>EMPLOYER</th>\n",
              "      <th>OCCUPATION</th>\n",
              "      <th>TRANSACTION_DT</th>\n",
              "      <th>TRANSACTION_AMT</th>\n",
              "      <th>CAND_ID</th>\n",
              "      <th>CAND_PTY_AFFILIATION</th>\n",
              "      <th>LINKAGE_ID</th>\n",
              "      <th>CD</th>\n",
              "      <th>Party</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>DAVIS, LORRIE</td>\n",
              "      <td>LOS ANGELES</td>\n",
              "      <td>CA</td>\n",
              "      <td>900561529</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>SENIOR PROJECT ENGINEER</td>\n",
              "      <td>7112020</td>\n",
              "      <td>20</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>CA-37</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SIMPSON, MARK M</td>\n",
              "      <td>LONG BEACH</td>\n",
              "      <td>CA</td>\n",
              "      <td>908083812</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7242020</td>\n",
              "      <td>200</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>CA-47</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SIMPSON, MARK M</td>\n",
              "      <td>LONG BEACH</td>\n",
              "      <td>CA</td>\n",
              "      <td>908083812</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7292020</td>\n",
              "      <td>200</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>CA-47</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>YOUNG, KAROLYN</td>\n",
              "      <td>REDONDO BEACH</td>\n",
              "      <td>CA</td>\n",
              "      <td>9.02771e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7252020</td>\n",
              "      <td>250</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>CA-33</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>STUTTERHEIM, KENNETH B.</td>\n",
              "      <td>PASADENA</td>\n",
              "      <td>MD</td>\n",
              "      <td>2.11223e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEERING SPECIALIST</td>\n",
              "      <td>7162020</td>\n",
              "      <td>250</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MD-03</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>JAGER, AMY</td>\n",
              "      <td>INDIAN HARBOUR BEACH</td>\n",
              "      <td>FL</td>\n",
              "      <td>329373526</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7272020</td>\n",
              "      <td>15</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>FL-08</td>\n",
              "      <td>(R)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>FARAGO, ZOLTAN L. MR.</td>\n",
              "      <td>BROAD RUN</td>\n",
              "      <td>VA</td>\n",
              "      <td>2.01372e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>PROJECT ENGINEER</td>\n",
              "      <td>7232020</td>\n",
              "      <td>10</td>\n",
              "      <td>H0NY27090</td>\n",
              "      <td>REP</td>\n",
              "      <td>232064</td>\n",
              "      <td>VA-05</td>\n",
              "      <td>(R)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>FARAGO, ZOLTAN L. MR.</td>\n",
              "      <td>BROAD RUN</td>\n",
              "      <td>VA</td>\n",
              "      <td>2.01372e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>PROJECT ENGINEER</td>\n",
              "      <td>7042020</td>\n",
              "      <td>25</td>\n",
              "      <td>H0NY27090</td>\n",
              "      <td>REP</td>\n",
              "      <td>232064</td>\n",
              "      <td>VA-05</td>\n",
              "      <td>(R)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>FARAGO, ZOLTAN L. MR.</td>\n",
              "      <td>BROAD RUN</td>\n",
              "      <td>VA</td>\n",
              "      <td>2.01372e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>PROJECT ENGINEER</td>\n",
              "      <td>7262020</td>\n",
              "      <td>10</td>\n",
              "      <td>H0NY27090</td>\n",
              "      <td>REP</td>\n",
              "      <td>232064</td>\n",
              "      <td>VA-05</td>\n",
              "      <td>(R)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>CINLEMIS, MICHELLE</td>\n",
              "      <td>COLORADO SPRINGS</td>\n",
              "      <td>CO</td>\n",
              "      <td>8.09114e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>SENIOR PROJECT LEADER</td>\n",
              "      <td>7302020</td>\n",
              "      <td>100</td>\n",
              "      <td>H0NY27090</td>\n",
              "      <td>REP</td>\n",
              "      <td>232064</td>\n",
              "      <td>CO-05</td>\n",
              "      <td>(R)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>10</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>CINLEMIS, MICHELLE</td>\n",
              "      <td>COLORADO SPRINGS</td>\n",
              "      <td>CO</td>\n",
              "      <td>809113801</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>SENIOR PROJECT LEADER</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "      <td>H0NY27090</td>\n",
              "      <td>REP</td>\n",
              "      <td>232064</td>\n",
              "      <td>CO-05</td>\n",
              "      <td>(R)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>11</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>CINLEMIS, MICHELLE</td>\n",
              "      <td>COLORADO SPRINGS</td>\n",
              "      <td>CO</td>\n",
              "      <td>809113801</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>SENIOR PROJECT LEADER</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "      <td>H0NY27090</td>\n",
              "      <td>REP</td>\n",
              "      <td>232064</td>\n",
              "      <td>CO-05</td>\n",
              "      <td>(R)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>12</th>\n",
              "      <td>C00696526</td>\n",
              "      <td>HOLLANDER, SIDNEY</td>\n",
              "      <td>GLENDALE</td>\n",
              "      <td>AZ</td>\n",
              "      <td>853180038</td>\n",
              "      <td>AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7092020</td>\n",
              "      <td>250</td>\n",
              "      <td>S0AZ00350</td>\n",
              "      <td>DEM</td>\n",
              "      <td>225862</td>\n",
              "      <td>AZ-07</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "      CMTE_ID                     NAME  ...     CD Party\n",
              "0   C00703975            DAVIS, LORRIE  ...  CA-37   (D)\n",
              "1   C00703975          SIMPSON, MARK M  ...  CA-47   (D)\n",
              "2   C00703975          SIMPSON, MARK M  ...  CA-47   (D)\n",
              "3   C00703975           YOUNG, KAROLYN  ...  CA-33   (D)\n",
              "4   C00703975  STUTTERHEIM, KENNETH B.  ...  MD-03   (D)\n",
              "5   C00703975               JAGER, AMY  ...  FL-08   (R)\n",
              "6   C00075820    FARAGO, ZOLTAN L. MR.  ...  VA-05   (R)\n",
              "7   C00075820    FARAGO, ZOLTAN L. MR.  ...  VA-05   (R)\n",
              "8   C00075820    FARAGO, ZOLTAN L. MR.  ...  VA-05   (R)\n",
              "9   C00075820       CINLEMIS, MICHELLE  ...  CO-05   (R)\n",
              "10  C00075820       CINLEMIS, MICHELLE  ...  CO-05   (R)\n",
              "11  C00075820       CINLEMIS, MICHELLE  ...  CO-05   (R)\n",
              "12  C00696526        HOLLANDER, SIDNEY  ...  AZ-07   (D)\n",
              "\n",
              "[13 rows x 14 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 42
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "R1wEhe8Rzaa_"
      },
      "source": [
        "df_aero_final['INDEX']= [1 if x =='DEM' else 0 for x in df_aero_final['CAND_PTY_AFFILIATION']] "
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Ix6G1cxzzuUA"
      },
      "source": [
        "df_aero_final['INDEX_BOSS']=1"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "dQqF_pwqldjH",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 673
        },
        "outputId": "4a60d88a-2393-487c-f12e-9f420683be30"
      },
      "source": [
        "df_aero_final\n"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>CMTE_ID</th>\n",
              "      <th>NAME</th>\n",
              "      <th>CITY</th>\n",
              "      <th>STATE</th>\n",
              "      <th>ZIP_CODE</th>\n",
              "      <th>EMPLOYER</th>\n",
              "      <th>OCCUPATION</th>\n",
              "      <th>TRANSACTION_DT</th>\n",
              "      <th>TRANSACTION_AMT</th>\n",
              "      <th>CAND_ID</th>\n",
              "      <th>CAND_PTY_AFFILIATION</th>\n",
              "      <th>LINKAGE_ID</th>\n",
              "      <th>CD</th>\n",
              "      <th>Party</th>\n",
              "      <th>INDEX</th>\n",
              "      <th>INDEX_BOSS</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>DAVIS, LORRIE</td>\n",
              "      <td>LOS ANGELES</td>\n",
              "      <td>CA</td>\n",
              "      <td>900561529</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>SENIOR PROJECT ENGINEER</td>\n",
              "      <td>7112020</td>\n",
              "      <td>20</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>CA-37</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SIMPSON, MARK M</td>\n",
              "      <td>LONG BEACH</td>\n",
              "      <td>CA</td>\n",
              "      <td>908083812</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7242020</td>\n",
              "      <td>200</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>CA-47</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SIMPSON, MARK M</td>\n",
              "      <td>LONG BEACH</td>\n",
              "      <td>CA</td>\n",
              "      <td>908083812</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7292020</td>\n",
              "      <td>200</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>CA-47</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>YOUNG, KAROLYN</td>\n",
              "      <td>REDONDO BEACH</td>\n",
              "      <td>CA</td>\n",
              "      <td>9.02771e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7252020</td>\n",
              "      <td>250</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>CA-33</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>STUTTERHEIM, KENNETH B.</td>\n",
              "      <td>PASADENA</td>\n",
              "      <td>MD</td>\n",
              "      <td>2.11223e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEERING SPECIALIST</td>\n",
              "      <td>7162020</td>\n",
              "      <td>250</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MD-03</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>JAGER, AMY</td>\n",
              "      <td>INDIAN HARBOUR BEACH</td>\n",
              "      <td>FL</td>\n",
              "      <td>329373526</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7272020</td>\n",
              "      <td>15</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>FL-08</td>\n",
              "      <td>(R)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>FARAGO, ZOLTAN L. MR.</td>\n",
              "      <td>BROAD RUN</td>\n",
              "      <td>VA</td>\n",
              "      <td>2.01372e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>PROJECT ENGINEER</td>\n",
              "      <td>7232020</td>\n",
              "      <td>10</td>\n",
              "      <td>H0NY27090</td>\n",
              "      <td>REP</td>\n",
              "      <td>232064</td>\n",
              "      <td>VA-05</td>\n",
              "      <td>(R)</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>FARAGO, ZOLTAN L. MR.</td>\n",
              "      <td>BROAD RUN</td>\n",
              "      <td>VA</td>\n",
              "      <td>2.01372e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>PROJECT ENGINEER</td>\n",
              "      <td>7042020</td>\n",
              "      <td>25</td>\n",
              "      <td>H0NY27090</td>\n",
              "      <td>REP</td>\n",
              "      <td>232064</td>\n",
              "      <td>VA-05</td>\n",
              "      <td>(R)</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>FARAGO, ZOLTAN L. MR.</td>\n",
              "      <td>BROAD RUN</td>\n",
              "      <td>VA</td>\n",
              "      <td>2.01372e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>PROJECT ENGINEER</td>\n",
              "      <td>7262020</td>\n",
              "      <td>10</td>\n",
              "      <td>H0NY27090</td>\n",
              "      <td>REP</td>\n",
              "      <td>232064</td>\n",
              "      <td>VA-05</td>\n",
              "      <td>(R)</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>CINLEMIS, MICHELLE</td>\n",
              "      <td>COLORADO SPRINGS</td>\n",
              "      <td>CO</td>\n",
              "      <td>8.09114e+08</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>SENIOR PROJECT LEADER</td>\n",
              "      <td>7302020</td>\n",
              "      <td>100</td>\n",
              "      <td>H0NY27090</td>\n",
              "      <td>REP</td>\n",
              "      <td>232064</td>\n",
              "      <td>CO-05</td>\n",
              "      <td>(R)</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>10</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>CINLEMIS, MICHELLE</td>\n",
              "      <td>COLORADO SPRINGS</td>\n",
              "      <td>CO</td>\n",
              "      <td>809113801</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>SENIOR PROJECT LEADER</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "      <td>H0NY27090</td>\n",
              "      <td>REP</td>\n",
              "      <td>232064</td>\n",
              "      <td>CO-05</td>\n",
              "      <td>(R)</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>11</th>\n",
              "      <td>C00075820</td>\n",
              "      <td>CINLEMIS, MICHELLE</td>\n",
              "      <td>COLORADO SPRINGS</td>\n",
              "      <td>CO</td>\n",
              "      <td>809113801</td>\n",
              "      <td>THE AEROSPACE CORPORATION</td>\n",
              "      <td>SENIOR PROJECT LEADER</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "      <td>H0NY27090</td>\n",
              "      <td>REP</td>\n",
              "      <td>232064</td>\n",
              "      <td>CO-05</td>\n",
              "      <td>(R)</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>12</th>\n",
              "      <td>C00696526</td>\n",
              "      <td>HOLLANDER, SIDNEY</td>\n",
              "      <td>GLENDALE</td>\n",
              "      <td>AZ</td>\n",
              "      <td>853180038</td>\n",
              "      <td>AEROSPACE CORPORATION</td>\n",
              "      <td>ENGINEER</td>\n",
              "      <td>7092020</td>\n",
              "      <td>250</td>\n",
              "      <td>S0AZ00350</td>\n",
              "      <td>DEM</td>\n",
              "      <td>225862</td>\n",
              "      <td>AZ-07</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "      CMTE_ID                     NAME  ... INDEX INDEX_BOSS\n",
              "0   C00703975            DAVIS, LORRIE  ...     1          1\n",
              "1   C00703975          SIMPSON, MARK M  ...     1          1\n",
              "2   C00703975          SIMPSON, MARK M  ...     1          1\n",
              "3   C00703975           YOUNG, KAROLYN  ...     1          1\n",
              "4   C00703975  STUTTERHEIM, KENNETH B.  ...     1          1\n",
              "5   C00703975               JAGER, AMY  ...     1          1\n",
              "6   C00075820    FARAGO, ZOLTAN L. MR.  ...     0          1\n",
              "7   C00075820    FARAGO, ZOLTAN L. MR.  ...     0          1\n",
              "8   C00075820    FARAGO, ZOLTAN L. MR.  ...     0          1\n",
              "9   C00075820       CINLEMIS, MICHELLE  ...     0          1\n",
              "10  C00075820       CINLEMIS, MICHELLE  ...     0          1\n",
              "11  C00075820       CINLEMIS, MICHELLE  ...     0          1\n",
              "12  C00696526        HOLLANDER, SIDNEY  ...     1          1\n",
              "\n",
              "[13 rows x 16 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 45
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "JZdHyMWo0Pbl"
      },
      "source": [
        "subset2 = df_aero_final[['INDEX','INDEX_BOSS']]"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "4-m6o5ek0Pup"
      },
      "source": [
        "from sklearn.linear_model import LinearRegression"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "UQeN6gFQ0CoX",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "b5f639bc-9878-4fd2-bfec-445ecc3aba55"
      },
      "source": [
        "linear_regressor = LinearRegression()\n",
        "from sklearn.preprocessing import MinMaxScaler\n",
        "scaler1 = MinMaxScaler()\n",
        "scaler1.fit(subset2)\n",
        "inner_join_scaled=scaler1.transform(subset2)\n",
        "\n",
        "x = inner_join_scaled[:,0].reshape(-1,1)\n",
        "y = inner_join_scaled[:,1].reshape(-1,1)\n",
        "\n",
        "linear_regressor.fit(x, y)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 48
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "kdgNzW9Q0k7v"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "vqZW4AL43o0r",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 419
        },
        "outputId": "76681360-3984-4485-f175-2fa4f4713437"
      },
      "source": [
        "df4 = df_newdup[df_newdup['EMPLOYER'].str.contains('AT&T')]\n",
        "df4"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>CMTE_ID</th>\n",
              "      <th>NAME</th>\n",
              "      <th>CITY</th>\n",
              "      <th>STATE</th>\n",
              "      <th>ZIP_CODE</th>\n",
              "      <th>EMPLOYER</th>\n",
              "      <th>OCCUPATION</th>\n",
              "      <th>TRANSACTION_DT</th>\n",
              "      <th>TRANSACTION_AMT</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>13062</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>FAVARA, RICHARD</td>\n",
              "      <td>FREEHOLD</td>\n",
              "      <td>NJ</td>\n",
              "      <td>7.72843e+07</td>\n",
              "      <td>AT&amp;T</td>\n",
              "      <td>SALES</td>\n",
              "      <td>7082020</td>\n",
              "      <td>25</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>13063</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>FAVARA, RICHARD</td>\n",
              "      <td>FREEHOLD</td>\n",
              "      <td>NJ</td>\n",
              "      <td>7.72843e+07</td>\n",
              "      <td>AT&amp;T</td>\n",
              "      <td>SALES</td>\n",
              "      <td>7142020</td>\n",
              "      <td>17</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>13064</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>FAVARA, RICHARD</td>\n",
              "      <td>FREEHOLD</td>\n",
              "      <td>NJ</td>\n",
              "      <td>7.72843e+07</td>\n",
              "      <td>AT&amp;T</td>\n",
              "      <td>SALES</td>\n",
              "      <td>7192020</td>\n",
              "      <td>17</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>13622</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EMERSON, TERRY</td>\n",
              "      <td>DALLAS</td>\n",
              "      <td>TX</td>\n",
              "      <td>7.52242e+08</td>\n",
              "      <td>AT&amp;T</td>\n",
              "      <td>PROJECT MANAGER</td>\n",
              "      <td>7012020</td>\n",
              "      <td>5</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>13623</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EMERSON, TERRY</td>\n",
              "      <td>DALLAS</td>\n",
              "      <td>TX</td>\n",
              "      <td>7.52242e+08</td>\n",
              "      <td>AT&amp;T</td>\n",
              "      <td>PROJECT MANAGER</td>\n",
              "      <td>7162020</td>\n",
              "      <td>21</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>...</th>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1583204</th>\n",
              "      <td>C00694323</td>\n",
              "      <td>HERNANDEZ, JOE</td>\n",
              "      <td>SAN BRUNO</td>\n",
              "      <td>CA</td>\n",
              "      <td>940661112</td>\n",
              "      <td>AT&amp;T</td>\n",
              "      <td>SPLICING TECHNICIAN</td>\n",
              "      <td>6302020</td>\n",
              "      <td>10</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1587874</th>\n",
              "      <td>C00694323</td>\n",
              "      <td>HERNANDEZ, JOE</td>\n",
              "      <td>SAN BRUNO</td>\n",
              "      <td>CA</td>\n",
              "      <td>940661112</td>\n",
              "      <td>AT&amp;T</td>\n",
              "      <td>SPLICING TECHNICIAN</td>\n",
              "      <td>6302020</td>\n",
              "      <td>20</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1595694</th>\n",
              "      <td>C00694323</td>\n",
              "      <td>HERNANDEZ, JOE</td>\n",
              "      <td>SAN BRUNO</td>\n",
              "      <td>CA</td>\n",
              "      <td>940661112</td>\n",
              "      <td>AT&amp;T</td>\n",
              "      <td>SPLICING TECHNICIAN</td>\n",
              "      <td>6302020</td>\n",
              "      <td>10</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1600117</th>\n",
              "      <td>C00694323</td>\n",
              "      <td>HERNANDEZ, JOE</td>\n",
              "      <td>SAN BRUNO</td>\n",
              "      <td>CA</td>\n",
              "      <td>940661112</td>\n",
              "      <td>AT&amp;T</td>\n",
              "      <td>SPLICING TECHNICIAN</td>\n",
              "      <td>6302020</td>\n",
              "      <td>25</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1603559</th>\n",
              "      <td>C00694323</td>\n",
              "      <td>ORTIZ, LISA</td>\n",
              "      <td>RIVERSIDE</td>\n",
              "      <td>CA</td>\n",
              "      <td>925035708</td>\n",
              "      <td>AT&amp;T</td>\n",
              "      <td>PM</td>\n",
              "      <td>6302020</td>\n",
              "      <td>35</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>4226 rows × 9 columns</p>\n",
              "</div>"
            ],
            "text/plain": [
              "           CMTE_ID             NAME  ... TRANSACTION_DT TRANSACTION_AMT\n",
              "13062    C00703975  FAVARA, RICHARD  ...        7082020              25\n",
              "13063    C00703975  FAVARA, RICHARD  ...        7142020              17\n",
              "13064    C00703975  FAVARA, RICHARD  ...        7192020              17\n",
              "13622    C00703975   EMERSON, TERRY  ...        7012020               5\n",
              "13623    C00703975   EMERSON, TERRY  ...        7162020              21\n",
              "...            ...              ...  ...            ...             ...\n",
              "1583204  C00694323   HERNANDEZ, JOE  ...        6302020              10\n",
              "1587874  C00694323   HERNANDEZ, JOE  ...        6302020              20\n",
              "1595694  C00694323   HERNANDEZ, JOE  ...        6302020              10\n",
              "1600117  C00694323   HERNANDEZ, JOE  ...        6302020              25\n",
              "1603559  C00694323      ORTIZ, LISA  ...        6302020              35\n",
              "\n",
              "[4226 rows x 9 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 16
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "YvlPckei7uEA",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 204
        },
        "outputId": "757de2e5-e546-4bd7-d851-5a318d8008a0"
      },
      "source": [
        "df5 = pd.merge(df4, df_merge, on='CMTE_ID')\n",
        "df5.tail()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>CMTE_ID</th>\n",
              "      <th>NAME</th>\n",
              "      <th>CITY</th>\n",
              "      <th>STATE</th>\n",
              "      <th>ZIP_CODE</th>\n",
              "      <th>EMPLOYER</th>\n",
              "      <th>OCCUPATION</th>\n",
              "      <th>TRANSACTION_DT</th>\n",
              "      <th>TRANSACTION_AMT</th>\n",
              "      <th>CAND_ID</th>\n",
              "      <th>CAND_PTY_AFFILIATION</th>\n",
              "      <th>CAND_ELECTION_YR</th>\n",
              "      <th>FEC_ELECTION_YR</th>\n",
              "      <th>CMTE_TP</th>\n",
              "      <th>CMTE_DSGN</th>\n",
              "      <th>LINKAGE_ID</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>243</th>\n",
              "      <td>C00711549</td>\n",
              "      <td>COLLINS, RICK</td>\n",
              "      <td>LAKEWOOD</td>\n",
              "      <td>WA</td>\n",
              "      <td>98498</td>\n",
              "      <td>AT&amp;T</td>\n",
              "      <td>SALES CONSULTANT</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "      <td>S0KY00339</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>S</td>\n",
              "      <td>P</td>\n",
              "      <td>228669</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>244</th>\n",
              "      <td>C00711549</td>\n",
              "      <td>NURSE, CHRIS</td>\n",
              "      <td>ROCKVILLE</td>\n",
              "      <td>MD</td>\n",
              "      <td>20850</td>\n",
              "      <td>AT&amp;T</td>\n",
              "      <td>MANAGER</td>\n",
              "      <td>6302020</td>\n",
              "      <td>500</td>\n",
              "      <td>S0KY00339</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>S</td>\n",
              "      <td>P</td>\n",
              "      <td>228669</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>245</th>\n",
              "      <td>C00666040</td>\n",
              "      <td>HERNANDEZ, JOE</td>\n",
              "      <td>SAN BRUNO</td>\n",
              "      <td>CA</td>\n",
              "      <td>940661112</td>\n",
              "      <td>AT&amp;T</td>\n",
              "      <td>SPLICING TECHNICIAN</td>\n",
              "      <td>6302020</td>\n",
              "      <td>10</td>\n",
              "      <td>S8AZ00221</td>\n",
              "      <td>REP</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>S</td>\n",
              "      <td>P</td>\n",
              "      <td>224208</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>246</th>\n",
              "      <td>C00736876</td>\n",
              "      <td>BENTON, WANDETTA</td>\n",
              "      <td>DULUTH</td>\n",
              "      <td>GA</td>\n",
              "      <td>300978117</td>\n",
              "      <td>AT&amp;T</td>\n",
              "      <td>NETWORK TECH</td>\n",
              "      <td>6302020</td>\n",
              "      <td>25</td>\n",
              "      <td>S0GA00559</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>S</td>\n",
              "      <td>P</td>\n",
              "      <td>231982</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>247</th>\n",
              "      <td>C00736876</td>\n",
              "      <td>BENTON, WANDETTA</td>\n",
              "      <td>DULUTH</td>\n",
              "      <td>GA</td>\n",
              "      <td>300978117</td>\n",
              "      <td>AT&amp;T</td>\n",
              "      <td>NETWORK TECH</td>\n",
              "      <td>6302020</td>\n",
              "      <td>25</td>\n",
              "      <td>S0GA00559</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>S</td>\n",
              "      <td>P</td>\n",
              "      <td>231982</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "       CMTE_ID              NAME       CITY  ... CMTE_TP CMTE_DSGN LINKAGE_ID\n",
              "243  C00711549     COLLINS, RICK   LAKEWOOD  ...       S         P     228669\n",
              "244  C00711549      NURSE, CHRIS  ROCKVILLE  ...       S         P     228669\n",
              "245  C00666040    HERNANDEZ, JOE  SAN BRUNO  ...       S         P     224208\n",
              "246  C00736876  BENTON, WANDETTA     DULUTH  ...       S         P     231982\n",
              "247  C00736876  BENTON, WANDETTA     DULUTH  ...       S         P     231982\n",
              "\n",
              "[5 rows x 16 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 19
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ptU7ClSQEgW3",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 402
        },
        "outputId": "add4cc5d-fe11-4bd7-ecab-c93ceb212d1a"
      },
      "source": [
        "df_biogen = df_newdup[df_newdup['EMPLOYER'].str.contains('BIOGEN')]\n",
        "df_biogen"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>CMTE_ID</th>\n",
              "      <th>NAME</th>\n",
              "      <th>CITY</th>\n",
              "      <th>STATE</th>\n",
              "      <th>ZIP_CODE</th>\n",
              "      <th>EMPLOYER</th>\n",
              "      <th>OCCUPATION</th>\n",
              "      <th>TRANSACTION_DT</th>\n",
              "      <th>TRANSACTION_AMT</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>71874</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>MARX, ISAAC</td>\n",
              "      <td>ARLINGTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.47438e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>CHEMIST</td>\n",
              "      <td>7182020</td>\n",
              "      <td>250</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>125267</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7292020</td>\n",
              "      <td>50</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>125819</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7152020</td>\n",
              "      <td>50</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>125820</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7222020</td>\n",
              "      <td>50</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>128132</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7082020</td>\n",
              "      <td>50</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>...</th>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1576084</th>\n",
              "      <td>C00694323</td>\n",
              "      <td>CHECKAN, RICHARD</td>\n",
              "      <td>FUQUAY VARINA</td>\n",
              "      <td>NC</td>\n",
              "      <td>275267624</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ENGINEERING</td>\n",
              "      <td>6302020</td>\n",
              "      <td>5</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1582011</th>\n",
              "      <td>C00694323</td>\n",
              "      <td>CHECKAN, RICHARD</td>\n",
              "      <td>FUQUAY VARINA</td>\n",
              "      <td>NC</td>\n",
              "      <td>275267624</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ENGINEERING</td>\n",
              "      <td>6302020</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1591754</th>\n",
              "      <td>C00694323</td>\n",
              "      <td>CHECKAN, RICHARD</td>\n",
              "      <td>FUQUAY VARINA</td>\n",
              "      <td>NC</td>\n",
              "      <td>275267624</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ENGINEERING</td>\n",
              "      <td>6302020</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1600172</th>\n",
              "      <td>C00694323</td>\n",
              "      <td>CHECKAN, RICHARD</td>\n",
              "      <td>FUQUAY VARINA</td>\n",
              "      <td>NC</td>\n",
              "      <td>275267624</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ENGINEERING</td>\n",
              "      <td>6302020</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1603569</th>\n",
              "      <td>C00694323</td>\n",
              "      <td>CHECKAN, RICHARD</td>\n",
              "      <td>FUQUAY VARINA</td>\n",
              "      <td>NC</td>\n",
              "      <td>275267624</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ENGINEERING</td>\n",
              "      <td>6302020</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>98 rows × 9 columns</p>\n",
              "</div>"
            ],
            "text/plain": [
              "           CMTE_ID               NAME  ... TRANSACTION_DT TRANSACTION_AMT\n",
              "71874    C00703975        MARX, ISAAC  ...        7182020             250\n",
              "125267   C00703975  EDMONDSON, FRAZOR  ...        7292020              50\n",
              "125819   C00703975  EDMONDSON, FRAZOR  ...        7152020              50\n",
              "125820   C00703975  EDMONDSON, FRAZOR  ...        7222020              50\n",
              "128132   C00703975  EDMONDSON, FRAZOR  ...        7082020              50\n",
              "...            ...                ...  ...            ...             ...\n",
              "1576084  C00694323   CHECKAN, RICHARD  ...        6302020               5\n",
              "1582011  C00694323   CHECKAN, RICHARD  ...        6302020               1\n",
              "1591754  C00694323   CHECKAN, RICHARD  ...        6302020               1\n",
              "1600172  C00694323   CHECKAN, RICHARD  ...        6302020               1\n",
              "1603569  C00694323   CHECKAN, RICHARD  ...        6302020               1\n",
              "\n",
              "[98 rows x 9 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 21
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "gZz7kaXIEfwZ",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "outputId": "18a45932-d5ca-4591-824b-41403a07cdf3"
      },
      "source": [
        "df6 = pd.merge(df_biogen, df_merge, on='CMTE_ID')\n",
        "df6"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>CMTE_ID</th>\n",
              "      <th>NAME</th>\n",
              "      <th>CITY</th>\n",
              "      <th>STATE</th>\n",
              "      <th>ZIP_CODE</th>\n",
              "      <th>EMPLOYER</th>\n",
              "      <th>OCCUPATION</th>\n",
              "      <th>TRANSACTION_DT</th>\n",
              "      <th>TRANSACTION_AMT</th>\n",
              "      <th>CAND_ID</th>\n",
              "      <th>CAND_PTY_AFFILIATION</th>\n",
              "      <th>CAND_ELECTION_YR</th>\n",
              "      <th>FEC_ELECTION_YR</th>\n",
              "      <th>CMTE_TP</th>\n",
              "      <th>CMTE_DSGN</th>\n",
              "      <th>LINKAGE_ID</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>MARX, ISAAC</td>\n",
              "      <td>ARLINGTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.47438e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>CHEMIST</td>\n",
              "      <td>7182020</td>\n",
              "      <td>250</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7292020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7152020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7222020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7082020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>DILLEY, ANNE</td>\n",
              "      <td>ARLINGTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.47648e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>EPIDEMIOLOGIST</td>\n",
              "      <td>7052020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>24931439</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7302020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>24931439</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7182020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>24931439</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7252020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>VANDER STOEP, STEPHEN</td>\n",
              "      <td>BOSTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.12925e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7172020</td>\n",
              "      <td>100</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>10</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>THOMAS, DONNA</td>\n",
              "      <td>MEMPHIS</td>\n",
              "      <td>TN</td>\n",
              "      <td>3.81155e+08</td>\n",
              "      <td>PMC BIOGENIX INC.</td>\n",
              "      <td>CUSTOMER SERVICE</td>\n",
              "      <td>7172020</td>\n",
              "      <td>20</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>11</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.49314e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7042020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>12</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.49314e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7112020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>13</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>LYKINS, JIM</td>\n",
              "      <td>DUPONT</td>\n",
              "      <td>WA</td>\n",
              "      <td>9.83277e+08</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>SALES</td>\n",
              "      <td>7282020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>14</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>MALDONADO, REBECCA</td>\n",
              "      <td>SAN ANTONIO</td>\n",
              "      <td>TX</td>\n",
              "      <td>782491598</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>SALES</td>\n",
              "      <td>7032020</td>\n",
              "      <td>150</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>15</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>HOWE, MICHAEL</td>\n",
              "      <td>CANTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.02116e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7162020</td>\n",
              "      <td>100</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>16</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.49314e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>6302020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>17</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>THOMAS, DONNA</td>\n",
              "      <td>MEMPHIS</td>\n",
              "      <td>TN</td>\n",
              "      <td>3.81155e+08</td>\n",
              "      <td>PMC BIOGENIX INC.</td>\n",
              "      <td>CUSTOMER SERVICE</td>\n",
              "      <td>6302020</td>\n",
              "      <td>15</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>P</td>\n",
              "      <td>P</td>\n",
              "      <td>227491</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>18</th>\n",
              "      <td>C00727149</td>\n",
              "      <td>GATES, CYNTHIA</td>\n",
              "      <td>ROSLINDALE</td>\n",
              "      <td>MA</td>\n",
              "      <td>2131</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>REGULATORY MEDICAL WRITER</td>\n",
              "      <td>8052020</td>\n",
              "      <td>100</td>\n",
              "      <td>H0MA08045</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>H</td>\n",
              "      <td>P</td>\n",
              "      <td>230605</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>19</th>\n",
              "      <td>C00727149</td>\n",
              "      <td>GATES, CYNTHIA</td>\n",
              "      <td>ROSLINDALE</td>\n",
              "      <td>MA</td>\n",
              "      <td>02131</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>REGULATORY MEDICAL WRITER</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "      <td>H0MA08045</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>H</td>\n",
              "      <td>P</td>\n",
              "      <td>230605</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>20</th>\n",
              "      <td>C00745687</td>\n",
              "      <td>GRIFFITH, LISA</td>\n",
              "      <td>CAMBRIDGE</td>\n",
              "      <td>MA</td>\n",
              "      <td>021394369</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>MARKETING</td>\n",
              "      <td>7132020</td>\n",
              "      <td>500</td>\n",
              "      <td>H0MA04267</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>H</td>\n",
              "      <td>P</td>\n",
              "      <td>233009</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>21</th>\n",
              "      <td>C00196774</td>\n",
              "      <td>LOVEDAY, KENNETH S.</td>\n",
              "      <td>BROOKLINE</td>\n",
              "      <td>MA</td>\n",
              "      <td>024465827</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>BIOLOGIST</td>\n",
              "      <td>8042020</td>\n",
              "      <td>250</td>\n",
              "      <td>S4MA00028</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>S</td>\n",
              "      <td>P</td>\n",
              "      <td>222822</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>22</th>\n",
              "      <td>C00666149</td>\n",
              "      <td>LOVEDAY, KENNETH S</td>\n",
              "      <td>BROOKLINE</td>\n",
              "      <td>MA</td>\n",
              "      <td>024465827</td>\n",
              "      <td>BIOGEN INC</td>\n",
              "      <td>DIRECTOR</td>\n",
              "      <td>6302020</td>\n",
              "      <td>500</td>\n",
              "      <td>H8NM02248</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>H</td>\n",
              "      <td>P</td>\n",
              "      <td>223821</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>23</th>\n",
              "      <td>C00500843</td>\n",
              "      <td>FLANNELLY-KING, SHANE</td>\n",
              "      <td>SOMERVILLE</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.14421e+07</td>\n",
              "      <td>BIOGEN IDEC</td>\n",
              "      <td>BUSINESS ANALYST</td>\n",
              "      <td>6302020</td>\n",
              "      <td>250</td>\n",
              "      <td>S2MA00170</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2024</td>\n",
              "      <td>2020</td>\n",
              "      <td>S</td>\n",
              "      <td>P</td>\n",
              "      <td>222817</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>24</th>\n",
              "      <td>C00649376</td>\n",
              "      <td>NEWLAND, BART G.</td>\n",
              "      <td>BELMONT</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.4784e+07</td>\n",
              "      <td>BIOGEN INC</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "      <td>H8GA07201</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>H</td>\n",
              "      <td>P</td>\n",
              "      <td>224868</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>25</th>\n",
              "      <td>C00701599</td>\n",
              "      <td>SEGAL, KATE</td>\n",
              "      <td>BATTLE CREEK</td>\n",
              "      <td>MI</td>\n",
              "      <td>4.90159e+08</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>GOVERNMENT AFFAIRS</td>\n",
              "      <td>6302020</td>\n",
              "      <td>500</td>\n",
              "      <td>H0MI06152</td>\n",
              "      <td>DEM</td>\n",
              "      <td>2020</td>\n",
              "      <td>2020</td>\n",
              "      <td>H</td>\n",
              "      <td>P</td>\n",
              "      <td>227095</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "      CMTE_ID                   NAME  ... CMTE_DSGN LINKAGE_ID\n",
              "0   C00703975            MARX, ISAAC  ...         P     227491\n",
              "1   C00703975      EDMONDSON, FRAZOR  ...         P     227491\n",
              "2   C00703975      EDMONDSON, FRAZOR  ...         P     227491\n",
              "3   C00703975      EDMONDSON, FRAZOR  ...         P     227491\n",
              "4   C00703975      EDMONDSON, FRAZOR  ...         P     227491\n",
              "5   C00703975           DILLEY, ANNE  ...         P     227491\n",
              "6   C00703975       SMIRNAKIS, KAREN  ...         P     227491\n",
              "7   C00703975       SMIRNAKIS, KAREN  ...         P     227491\n",
              "8   C00703975       SMIRNAKIS, KAREN  ...         P     227491\n",
              "9   C00703975  VANDER STOEP, STEPHEN  ...         P     227491\n",
              "10  C00703975          THOMAS, DONNA  ...         P     227491\n",
              "11  C00703975       SMIRNAKIS, KAREN  ...         P     227491\n",
              "12  C00703975       SMIRNAKIS, KAREN  ...         P     227491\n",
              "13  C00703975            LYKINS, JIM  ...         P     227491\n",
              "14  C00703975     MALDONADO, REBECCA  ...         P     227491\n",
              "15  C00703975          HOWE, MICHAEL  ...         P     227491\n",
              "16  C00703975       SMIRNAKIS, KAREN  ...         P     227491\n",
              "17  C00703975          THOMAS, DONNA  ...         P     227491\n",
              "18  C00727149         GATES, CYNTHIA  ...         P     230605\n",
              "19  C00727149         GATES, CYNTHIA  ...         P     230605\n",
              "20  C00745687         GRIFFITH, LISA  ...         P     233009\n",
              "21  C00196774    LOVEDAY, KENNETH S.  ...         P     222822\n",
              "22  C00666149     LOVEDAY, KENNETH S  ...         P     223821\n",
              "23  C00500843  FLANNELLY-KING, SHANE  ...         P     222817\n",
              "24  C00649376       NEWLAND, BART G.  ...         P     224868\n",
              "25  C00701599            SEGAL, KATE  ...         P     227095\n",
              "\n",
              "[26 rows x 16 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 22
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "tYTdUPrlFKJn",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 50
        },
        "outputId": "d5fbed25-b24a-452c-8a0f-caa683c07d94"
      },
      "source": [
        "df6[df6['OCCUPATION'].str.contains('DIRECTOR')]['TRANSACTION_DT']"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "22    6302020\n",
              "Name: TRANSACTION_DT, dtype: int64"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 31
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "4nct8sKi6ZRo",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 134
        },
        "outputId": "531fd6c2-e159-4311-98b3-a6c5766583dc"
      },
      "source": [
        "df6[df6['OCCUPATION'].str.contains('VP')]['TRANSACTION_DT']"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "6     7302020\n",
              "7     7182020\n",
              "8     7252020\n",
              "11    7042020\n",
              "12    7112020\n",
              "16    6302020\n",
              "Name: TRANSACTION_DT, dtype: int64"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 32
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "TM9ctAKSHBGV"
      },
      "source": [
        "CD = ['MA-05', 'MA-03', 'MA-03', 'MA-03', 'MA-03', 'MA-05', 'MA-05', 'MA-05', 'MA-05', 'MA-07', 'TN-09', 'MA-05', 'MA-05', 'WA-10', 'TX-20', 'MA-08', 'MA-05', 'TN-09', 'MA-07', 'MA-07', 'MA-05', 'MA-04', 'MA-04', 'MA-07', 'MA-05', 'MI-03']"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "uqfKk_S8Hgr_"
      },
      "source": [
        "df6['CD'] = CD"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "eJkDmYpJ6ZBr",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "outputId": "deaf95cb-90c6-4a6e-86bd-6f9a89bd20e4"
      },
      "source": [
        "df7 = df6.drop(columns=['CAND_ELECTION_YR', 'FEC_ELECTION_YR', 'CMTE_TP', 'CMTE_DSGN'])\n",
        "df7"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>CMTE_ID</th>\n",
              "      <th>NAME</th>\n",
              "      <th>CITY</th>\n",
              "      <th>STATE</th>\n",
              "      <th>ZIP_CODE</th>\n",
              "      <th>EMPLOYER</th>\n",
              "      <th>OCCUPATION</th>\n",
              "      <th>TRANSACTION_DT</th>\n",
              "      <th>TRANSACTION_AMT</th>\n",
              "      <th>CAND_ID</th>\n",
              "      <th>CAND_PTY_AFFILIATION</th>\n",
              "      <th>LINKAGE_ID</th>\n",
              "      <th>CD</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>MARX, ISAAC</td>\n",
              "      <td>ARLINGTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.47438e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>CHEMIST</td>\n",
              "      <td>7182020</td>\n",
              "      <td>250</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7292020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7152020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7222020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7082020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>DILLEY, ANNE</td>\n",
              "      <td>ARLINGTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.47648e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>EPIDEMIOLOGIST</td>\n",
              "      <td>7052020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>24931439</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7302020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>24931439</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7182020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>24931439</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7252020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>VANDER STOEP, STEPHEN</td>\n",
              "      <td>BOSTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.12925e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7172020</td>\n",
              "      <td>100</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-07</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>10</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>THOMAS, DONNA</td>\n",
              "      <td>MEMPHIS</td>\n",
              "      <td>TN</td>\n",
              "      <td>3.81155e+08</td>\n",
              "      <td>PMC BIOGENIX INC.</td>\n",
              "      <td>CUSTOMER SERVICE</td>\n",
              "      <td>7172020</td>\n",
              "      <td>20</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>TN-09</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>11</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.49314e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7042020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>12</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.49314e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7112020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>13</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>LYKINS, JIM</td>\n",
              "      <td>DUPONT</td>\n",
              "      <td>WA</td>\n",
              "      <td>9.83277e+08</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>SALES</td>\n",
              "      <td>7282020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>WA-10</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>14</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>MALDONADO, REBECCA</td>\n",
              "      <td>SAN ANTONIO</td>\n",
              "      <td>TX</td>\n",
              "      <td>782491598</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>SALES</td>\n",
              "      <td>7032020</td>\n",
              "      <td>150</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>TX-20</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>15</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>HOWE, MICHAEL</td>\n",
              "      <td>CANTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.02116e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7162020</td>\n",
              "      <td>100</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-08</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>16</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.49314e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>6302020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>17</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>THOMAS, DONNA</td>\n",
              "      <td>MEMPHIS</td>\n",
              "      <td>TN</td>\n",
              "      <td>3.81155e+08</td>\n",
              "      <td>PMC BIOGENIX INC.</td>\n",
              "      <td>CUSTOMER SERVICE</td>\n",
              "      <td>6302020</td>\n",
              "      <td>15</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>TN-09</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>18</th>\n",
              "      <td>C00727149</td>\n",
              "      <td>GATES, CYNTHIA</td>\n",
              "      <td>ROSLINDALE</td>\n",
              "      <td>MA</td>\n",
              "      <td>2131</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>REGULATORY MEDICAL WRITER</td>\n",
              "      <td>8052020</td>\n",
              "      <td>100</td>\n",
              "      <td>H0MA08045</td>\n",
              "      <td>DEM</td>\n",
              "      <td>230605</td>\n",
              "      <td>MA-07</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>19</th>\n",
              "      <td>C00727149</td>\n",
              "      <td>GATES, CYNTHIA</td>\n",
              "      <td>ROSLINDALE</td>\n",
              "      <td>MA</td>\n",
              "      <td>02131</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>REGULATORY MEDICAL WRITER</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "      <td>H0MA08045</td>\n",
              "      <td>DEM</td>\n",
              "      <td>230605</td>\n",
              "      <td>MA-07</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>20</th>\n",
              "      <td>C00745687</td>\n",
              "      <td>GRIFFITH, LISA</td>\n",
              "      <td>CAMBRIDGE</td>\n",
              "      <td>MA</td>\n",
              "      <td>021394369</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>MARKETING</td>\n",
              "      <td>7132020</td>\n",
              "      <td>500</td>\n",
              "      <td>H0MA04267</td>\n",
              "      <td>DEM</td>\n",
              "      <td>233009</td>\n",
              "      <td>MA-05</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>21</th>\n",
              "      <td>C00196774</td>\n",
              "      <td>LOVEDAY, KENNETH S.</td>\n",
              "      <td>BROOKLINE</td>\n",
              "      <td>MA</td>\n",
              "      <td>024465827</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>BIOLOGIST</td>\n",
              "      <td>8042020</td>\n",
              "      <td>250</td>\n",
              "      <td>S4MA00028</td>\n",
              "      <td>DEM</td>\n",
              "      <td>222822</td>\n",
              "      <td>MA-04</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>22</th>\n",
              "      <td>C00666149</td>\n",
              "      <td>LOVEDAY, KENNETH S</td>\n",
              "      <td>BROOKLINE</td>\n",
              "      <td>MA</td>\n",
              "      <td>024465827</td>\n",
              "      <td>BIOGEN INC</td>\n",
              "      <td>DIRECTOR</td>\n",
              "      <td>6302020</td>\n",
              "      <td>500</td>\n",
              "      <td>H8NM02248</td>\n",
              "      <td>DEM</td>\n",
              "      <td>223821</td>\n",
              "      <td>MA-04</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>23</th>\n",
              "      <td>C00500843</td>\n",
              "      <td>FLANNELLY-KING, SHANE</td>\n",
              "      <td>SOMERVILLE</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.14421e+07</td>\n",
              "      <td>BIOGEN IDEC</td>\n",
              "      <td>BUSINESS ANALYST</td>\n",
              "      <td>6302020</td>\n",
              "      <td>250</td>\n",
              "      <td>S2MA00170</td>\n",
              "      <td>DEM</td>\n",
              "      <td>222817</td>\n",
              "      <td>MA-07</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>24</th>\n",
              "      <td>C00649376</td>\n",
              "      <td>NEWLAND, BART G.</td>\n",
              "      <td>BELMONT</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.4784e+07</td>\n",
              "      <td>BIOGEN INC</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "      <td>H8GA07201</td>\n",
              "      <td>DEM</td>\n",
              "      <td>224868</td>\n",
              "      <td>MA-05</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>25</th>\n",
              "      <td>C00701599</td>\n",
              "      <td>SEGAL, KATE</td>\n",
              "      <td>BATTLE CREEK</td>\n",
              "      <td>MI</td>\n",
              "      <td>4.90159e+08</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>GOVERNMENT AFFAIRS</td>\n",
              "      <td>6302020</td>\n",
              "      <td>500</td>\n",
              "      <td>H0MI06152</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227095</td>\n",
              "      <td>MI-03</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "      CMTE_ID                   NAME  ... LINKAGE_ID     CD\n",
              "0   C00703975            MARX, ISAAC  ...     227491  MA-05\n",
              "1   C00703975      EDMONDSON, FRAZOR  ...     227491  MA-03\n",
              "2   C00703975      EDMONDSON, FRAZOR  ...     227491  MA-03\n",
              "3   C00703975      EDMONDSON, FRAZOR  ...     227491  MA-03\n",
              "4   C00703975      EDMONDSON, FRAZOR  ...     227491  MA-03\n",
              "5   C00703975           DILLEY, ANNE  ...     227491  MA-05\n",
              "6   C00703975       SMIRNAKIS, KAREN  ...     227491  MA-05\n",
              "7   C00703975       SMIRNAKIS, KAREN  ...     227491  MA-05\n",
              "8   C00703975       SMIRNAKIS, KAREN  ...     227491  MA-05\n",
              "9   C00703975  VANDER STOEP, STEPHEN  ...     227491  MA-07\n",
              "10  C00703975          THOMAS, DONNA  ...     227491  TN-09\n",
              "11  C00703975       SMIRNAKIS, KAREN  ...     227491  MA-05\n",
              "12  C00703975       SMIRNAKIS, KAREN  ...     227491  MA-05\n",
              "13  C00703975            LYKINS, JIM  ...     227491  WA-10\n",
              "14  C00703975     MALDONADO, REBECCA  ...     227491  TX-20\n",
              "15  C00703975          HOWE, MICHAEL  ...     227491  MA-08\n",
              "16  C00703975       SMIRNAKIS, KAREN  ...     227491  MA-05\n",
              "17  C00703975          THOMAS, DONNA  ...     227491  TN-09\n",
              "18  C00727149         GATES, CYNTHIA  ...     230605  MA-07\n",
              "19  C00727149         GATES, CYNTHIA  ...     230605  MA-07\n",
              "20  C00745687         GRIFFITH, LISA  ...     233009  MA-05\n",
              "21  C00196774    LOVEDAY, KENNETH S.  ...     222822  MA-04\n",
              "22  C00666149     LOVEDAY, KENNETH S  ...     223821  MA-04\n",
              "23  C00500843  FLANNELLY-KING, SHANE  ...     222817  MA-07\n",
              "24  C00649376       NEWLAND, BART G.  ...     224868  MA-05\n",
              "25  C00701599            SEGAL, KATE  ...     227095  MI-03\n",
              "\n",
              "[26 rows x 13 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 42
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Mw4bIeWOOoaE"
      },
      "source": [
        "trends = pd.read_excel(data_dir+'/CD_trends.xlsx')"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "VMqR9ED6OoOW",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 204
        },
        "outputId": "6b45fe0d-9ae7-49d0-e8fc-0febc8350755"
      },
      "source": [
        "trends.head()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>CD</th>\n",
              "      <th>Party</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>AK-AL</td>\n",
              "      <td>(R)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>AL-01</td>\n",
              "      <td>(R)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>AL-02</td>\n",
              "      <td>(R)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>AL-03</td>\n",
              "      <td>(R)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>AL-04</td>\n",
              "      <td>(R)</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "      CD Party\n",
              "0  AK-AL   (R)\n",
              "1  AL-01   (R)\n",
              "2  AL-02   (R)\n",
              "3  AL-03   (R)\n",
              "4  AL-04   (R)"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 40
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "qg5DNqhaGyzR",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "outputId": "d37d3813-eaeb-4d10-b202-c12a21ec5f6e"
      },
      "source": [
        "inner_join = pd.merge(df7,  \n",
        "                      trends,  \n",
        "                      on ='CD',  \n",
        "                      how ='inner') \n",
        "inner_join "
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>CMTE_ID</th>\n",
              "      <th>NAME</th>\n",
              "      <th>CITY</th>\n",
              "      <th>STATE</th>\n",
              "      <th>ZIP_CODE</th>\n",
              "      <th>EMPLOYER</th>\n",
              "      <th>OCCUPATION</th>\n",
              "      <th>TRANSACTION_DT</th>\n",
              "      <th>TRANSACTION_AMT</th>\n",
              "      <th>CAND_ID</th>\n",
              "      <th>CAND_PTY_AFFILIATION</th>\n",
              "      <th>LINKAGE_ID</th>\n",
              "      <th>CD</th>\n",
              "      <th>Party</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>MARX, ISAAC</td>\n",
              "      <td>ARLINGTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.47438e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>CHEMIST</td>\n",
              "      <td>7182020</td>\n",
              "      <td>250</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>DILLEY, ANNE</td>\n",
              "      <td>ARLINGTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.47648e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>EPIDEMIOLOGIST</td>\n",
              "      <td>7052020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>24931439</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7302020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>24931439</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7182020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>24931439</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7252020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.49314e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7042020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.49314e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7112020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.49314e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>6302020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8</th>\n",
              "      <td>C00745687</td>\n",
              "      <td>GRIFFITH, LISA</td>\n",
              "      <td>CAMBRIDGE</td>\n",
              "      <td>MA</td>\n",
              "      <td>021394369</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>MARKETING</td>\n",
              "      <td>7132020</td>\n",
              "      <td>500</td>\n",
              "      <td>H0MA04267</td>\n",
              "      <td>DEM</td>\n",
              "      <td>233009</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>C00649376</td>\n",
              "      <td>NEWLAND, BART G.</td>\n",
              "      <td>BELMONT</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.4784e+07</td>\n",
              "      <td>BIOGEN INC</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "      <td>H8GA07201</td>\n",
              "      <td>DEM</td>\n",
              "      <td>224868</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>10</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7292020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-03</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>11</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7152020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-03</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>12</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7222020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-03</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>13</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7082020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-03</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>14</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>VANDER STOEP, STEPHEN</td>\n",
              "      <td>BOSTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.12925e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7172020</td>\n",
              "      <td>100</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-07</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>15</th>\n",
              "      <td>C00727149</td>\n",
              "      <td>GATES, CYNTHIA</td>\n",
              "      <td>ROSLINDALE</td>\n",
              "      <td>MA</td>\n",
              "      <td>2131</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>REGULATORY MEDICAL WRITER</td>\n",
              "      <td>8052020</td>\n",
              "      <td>100</td>\n",
              "      <td>H0MA08045</td>\n",
              "      <td>DEM</td>\n",
              "      <td>230605</td>\n",
              "      <td>MA-07</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>16</th>\n",
              "      <td>C00727149</td>\n",
              "      <td>GATES, CYNTHIA</td>\n",
              "      <td>ROSLINDALE</td>\n",
              "      <td>MA</td>\n",
              "      <td>02131</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>REGULATORY MEDICAL WRITER</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "      <td>H0MA08045</td>\n",
              "      <td>DEM</td>\n",
              "      <td>230605</td>\n",
              "      <td>MA-07</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>17</th>\n",
              "      <td>C00500843</td>\n",
              "      <td>FLANNELLY-KING, SHANE</td>\n",
              "      <td>SOMERVILLE</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.14421e+07</td>\n",
              "      <td>BIOGEN IDEC</td>\n",
              "      <td>BUSINESS ANALYST</td>\n",
              "      <td>6302020</td>\n",
              "      <td>250</td>\n",
              "      <td>S2MA00170</td>\n",
              "      <td>DEM</td>\n",
              "      <td>222817</td>\n",
              "      <td>MA-07</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>18</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>THOMAS, DONNA</td>\n",
              "      <td>MEMPHIS</td>\n",
              "      <td>TN</td>\n",
              "      <td>3.81155e+08</td>\n",
              "      <td>PMC BIOGENIX INC.</td>\n",
              "      <td>CUSTOMER SERVICE</td>\n",
              "      <td>7172020</td>\n",
              "      <td>20</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>TN-09</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>19</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>THOMAS, DONNA</td>\n",
              "      <td>MEMPHIS</td>\n",
              "      <td>TN</td>\n",
              "      <td>3.81155e+08</td>\n",
              "      <td>PMC BIOGENIX INC.</td>\n",
              "      <td>CUSTOMER SERVICE</td>\n",
              "      <td>6302020</td>\n",
              "      <td>15</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>TN-09</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>20</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>LYKINS, JIM</td>\n",
              "      <td>DUPONT</td>\n",
              "      <td>WA</td>\n",
              "      <td>9.83277e+08</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>SALES</td>\n",
              "      <td>7282020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>WA-10</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>21</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>MALDONADO, REBECCA</td>\n",
              "      <td>SAN ANTONIO</td>\n",
              "      <td>TX</td>\n",
              "      <td>782491598</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>SALES</td>\n",
              "      <td>7032020</td>\n",
              "      <td>150</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>TX-20</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>22</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>HOWE, MICHAEL</td>\n",
              "      <td>CANTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.02116e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7162020</td>\n",
              "      <td>100</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-08</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>23</th>\n",
              "      <td>C00196774</td>\n",
              "      <td>LOVEDAY, KENNETH S.</td>\n",
              "      <td>BROOKLINE</td>\n",
              "      <td>MA</td>\n",
              "      <td>024465827</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>BIOLOGIST</td>\n",
              "      <td>8042020</td>\n",
              "      <td>250</td>\n",
              "      <td>S4MA00028</td>\n",
              "      <td>DEM</td>\n",
              "      <td>222822</td>\n",
              "      <td>MA-04</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>24</th>\n",
              "      <td>C00666149</td>\n",
              "      <td>LOVEDAY, KENNETH S</td>\n",
              "      <td>BROOKLINE</td>\n",
              "      <td>MA</td>\n",
              "      <td>024465827</td>\n",
              "      <td>BIOGEN INC</td>\n",
              "      <td>DIRECTOR</td>\n",
              "      <td>6302020</td>\n",
              "      <td>500</td>\n",
              "      <td>H8NM02248</td>\n",
              "      <td>DEM</td>\n",
              "      <td>223821</td>\n",
              "      <td>MA-04</td>\n",
              "      <td>(D)</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>25</th>\n",
              "      <td>C00701599</td>\n",
              "      <td>SEGAL, KATE</td>\n",
              "      <td>BATTLE CREEK</td>\n",
              "      <td>MI</td>\n",
              "      <td>4.90159e+08</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>GOVERNMENT AFFAIRS</td>\n",
              "      <td>6302020</td>\n",
              "      <td>500</td>\n",
              "      <td>H0MI06152</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227095</td>\n",
              "      <td>MI-03</td>\n",
              "      <td>(L)</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "      CMTE_ID                   NAME          CITY  ... LINKAGE_ID     CD Party\n",
              "0   C00703975            MARX, ISAAC     ARLINGTON  ...     227491  MA-05   (D)\n",
              "1   C00703975           DILLEY, ANNE     ARLINGTON  ...     227491  MA-05   (D)\n",
              "2   C00703975       SMIRNAKIS, KAREN        WESTON  ...     227491  MA-05   (D)\n",
              "3   C00703975       SMIRNAKIS, KAREN        WESTON  ...     227491  MA-05   (D)\n",
              "4   C00703975       SMIRNAKIS, KAREN        WESTON  ...     227491  MA-05   (D)\n",
              "5   C00703975       SMIRNAKIS, KAREN        WESTON  ...     227491  MA-05   (D)\n",
              "6   C00703975       SMIRNAKIS, KAREN        WESTON  ...     227491  MA-05   (D)\n",
              "7   C00703975       SMIRNAKIS, KAREN        WESTON  ...     227491  MA-05   (D)\n",
              "8   C00745687         GRIFFITH, LISA     CAMBRIDGE  ...     233009  MA-05   (D)\n",
              "9   C00649376       NEWLAND, BART G.       BELMONT  ...     224868  MA-05   (D)\n",
              "10  C00703975      EDMONDSON, FRAZOR   MARLBOROUGH  ...     227491  MA-03   (D)\n",
              "11  C00703975      EDMONDSON, FRAZOR   MARLBOROUGH  ...     227491  MA-03   (D)\n",
              "12  C00703975      EDMONDSON, FRAZOR   MARLBOROUGH  ...     227491  MA-03   (D)\n",
              "13  C00703975      EDMONDSON, FRAZOR   MARLBOROUGH  ...     227491  MA-03   (D)\n",
              "14  C00703975  VANDER STOEP, STEPHEN        BOSTON  ...     227491  MA-07   (D)\n",
              "15  C00727149         GATES, CYNTHIA    ROSLINDALE  ...     230605  MA-07   (D)\n",
              "16  C00727149         GATES, CYNTHIA    ROSLINDALE  ...     230605  MA-07   (D)\n",
              "17  C00500843  FLANNELLY-KING, SHANE    SOMERVILLE  ...     222817  MA-07   (D)\n",
              "18  C00703975          THOMAS, DONNA       MEMPHIS  ...     227491  TN-09   (D)\n",
              "19  C00703975          THOMAS, DONNA       MEMPHIS  ...     227491  TN-09   (D)\n",
              "20  C00703975            LYKINS, JIM        DUPONT  ...     227491  WA-10   (D)\n",
              "21  C00703975     MALDONADO, REBECCA   SAN ANTONIO  ...     227491  TX-20   (D)\n",
              "22  C00703975          HOWE, MICHAEL        CANTON  ...     227491  MA-08   (D)\n",
              "23  C00196774    LOVEDAY, KENNETH S.     BROOKLINE  ...     222822  MA-04   (D)\n",
              "24  C00666149     LOVEDAY, KENNETH S     BROOKLINE  ...     223821  MA-04   (D)\n",
              "25  C00701599            SEGAL, KATE  BATTLE CREEK  ...     227095  MI-03   (L)\n",
              "\n",
              "[26 rows x 14 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 44
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "a2v3lRkYTjag"
      },
      "source": [
        "inner_join['INDEX']= [1 if x =='DEM' else 0 for x in inner_join['CAND_PTY_AFFILIATION']] \n",
        "  "
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Objszz9NSHe1",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "outputId": "a33bc488-4ac4-42af-c4a1-bfeeda4caad7"
      },
      "source": [
        "inner_join"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>CMTE_ID</th>\n",
              "      <th>NAME</th>\n",
              "      <th>CITY</th>\n",
              "      <th>STATE</th>\n",
              "      <th>ZIP_CODE</th>\n",
              "      <th>EMPLOYER</th>\n",
              "      <th>OCCUPATION</th>\n",
              "      <th>TRANSACTION_DT</th>\n",
              "      <th>TRANSACTION_AMT</th>\n",
              "      <th>CAND_ID</th>\n",
              "      <th>CAND_PTY_AFFILIATION</th>\n",
              "      <th>LINKAGE_ID</th>\n",
              "      <th>CD</th>\n",
              "      <th>Party</th>\n",
              "      <th>INDEX</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>MARX, ISAAC</td>\n",
              "      <td>ARLINGTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.47438e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>CHEMIST</td>\n",
              "      <td>7182020</td>\n",
              "      <td>250</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>DILLEY, ANNE</td>\n",
              "      <td>ARLINGTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.47648e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>EPIDEMIOLOGIST</td>\n",
              "      <td>7052020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>24931439</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7302020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>24931439</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7182020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>24931439</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7252020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.49314e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7042020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.49314e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7112020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.49314e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>6302020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8</th>\n",
              "      <td>C00745687</td>\n",
              "      <td>GRIFFITH, LISA</td>\n",
              "      <td>CAMBRIDGE</td>\n",
              "      <td>MA</td>\n",
              "      <td>021394369</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>MARKETING</td>\n",
              "      <td>7132020</td>\n",
              "      <td>500</td>\n",
              "      <td>H0MA04267</td>\n",
              "      <td>DEM</td>\n",
              "      <td>233009</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>C00649376</td>\n",
              "      <td>NEWLAND, BART G.</td>\n",
              "      <td>BELMONT</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.4784e+07</td>\n",
              "      <td>BIOGEN INC</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "      <td>H8GA07201</td>\n",
              "      <td>DEM</td>\n",
              "      <td>224868</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>10</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7292020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-03</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>11</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7152020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-03</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>12</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7222020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-03</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>13</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7082020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-03</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>14</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>VANDER STOEP, STEPHEN</td>\n",
              "      <td>BOSTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.12925e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7172020</td>\n",
              "      <td>100</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-07</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>15</th>\n",
              "      <td>C00727149</td>\n",
              "      <td>GATES, CYNTHIA</td>\n",
              "      <td>ROSLINDALE</td>\n",
              "      <td>MA</td>\n",
              "      <td>2131</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>REGULATORY MEDICAL WRITER</td>\n",
              "      <td>8052020</td>\n",
              "      <td>100</td>\n",
              "      <td>H0MA08045</td>\n",
              "      <td>DEM</td>\n",
              "      <td>230605</td>\n",
              "      <td>MA-07</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>16</th>\n",
              "      <td>C00727149</td>\n",
              "      <td>GATES, CYNTHIA</td>\n",
              "      <td>ROSLINDALE</td>\n",
              "      <td>MA</td>\n",
              "      <td>02131</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>REGULATORY MEDICAL WRITER</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "      <td>H0MA08045</td>\n",
              "      <td>DEM</td>\n",
              "      <td>230605</td>\n",
              "      <td>MA-07</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>17</th>\n",
              "      <td>C00500843</td>\n",
              "      <td>FLANNELLY-KING, SHANE</td>\n",
              "      <td>SOMERVILLE</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.14421e+07</td>\n",
              "      <td>BIOGEN IDEC</td>\n",
              "      <td>BUSINESS ANALYST</td>\n",
              "      <td>6302020</td>\n",
              "      <td>250</td>\n",
              "      <td>S2MA00170</td>\n",
              "      <td>DEM</td>\n",
              "      <td>222817</td>\n",
              "      <td>MA-07</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>18</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>THOMAS, DONNA</td>\n",
              "      <td>MEMPHIS</td>\n",
              "      <td>TN</td>\n",
              "      <td>3.81155e+08</td>\n",
              "      <td>PMC BIOGENIX INC.</td>\n",
              "      <td>CUSTOMER SERVICE</td>\n",
              "      <td>7172020</td>\n",
              "      <td>20</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>TN-09</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>19</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>THOMAS, DONNA</td>\n",
              "      <td>MEMPHIS</td>\n",
              "      <td>TN</td>\n",
              "      <td>3.81155e+08</td>\n",
              "      <td>PMC BIOGENIX INC.</td>\n",
              "      <td>CUSTOMER SERVICE</td>\n",
              "      <td>6302020</td>\n",
              "      <td>15</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>TN-09</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>20</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>LYKINS, JIM</td>\n",
              "      <td>DUPONT</td>\n",
              "      <td>WA</td>\n",
              "      <td>9.83277e+08</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>SALES</td>\n",
              "      <td>7282020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>WA-10</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>21</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>MALDONADO, REBECCA</td>\n",
              "      <td>SAN ANTONIO</td>\n",
              "      <td>TX</td>\n",
              "      <td>782491598</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>SALES</td>\n",
              "      <td>7032020</td>\n",
              "      <td>150</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>TX-20</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>22</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>HOWE, MICHAEL</td>\n",
              "      <td>CANTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.02116e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7162020</td>\n",
              "      <td>100</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-08</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>23</th>\n",
              "      <td>C00196774</td>\n",
              "      <td>LOVEDAY, KENNETH S.</td>\n",
              "      <td>BROOKLINE</td>\n",
              "      <td>MA</td>\n",
              "      <td>024465827</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>BIOLOGIST</td>\n",
              "      <td>8042020</td>\n",
              "      <td>250</td>\n",
              "      <td>S4MA00028</td>\n",
              "      <td>DEM</td>\n",
              "      <td>222822</td>\n",
              "      <td>MA-04</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>24</th>\n",
              "      <td>C00666149</td>\n",
              "      <td>LOVEDAY, KENNETH S</td>\n",
              "      <td>BROOKLINE</td>\n",
              "      <td>MA</td>\n",
              "      <td>024465827</td>\n",
              "      <td>BIOGEN INC</td>\n",
              "      <td>DIRECTOR</td>\n",
              "      <td>6302020</td>\n",
              "      <td>500</td>\n",
              "      <td>H8NM02248</td>\n",
              "      <td>DEM</td>\n",
              "      <td>223821</td>\n",
              "      <td>MA-04</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>25</th>\n",
              "      <td>C00701599</td>\n",
              "      <td>SEGAL, KATE</td>\n",
              "      <td>BATTLE CREEK</td>\n",
              "      <td>MI</td>\n",
              "      <td>4.90159e+08</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>GOVERNMENT AFFAIRS</td>\n",
              "      <td>6302020</td>\n",
              "      <td>500</td>\n",
              "      <td>H0MI06152</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227095</td>\n",
              "      <td>MI-03</td>\n",
              "      <td>(L)</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "      CMTE_ID                   NAME          CITY  ...     CD Party INDEX\n",
              "0   C00703975            MARX, ISAAC     ARLINGTON  ...  MA-05   (D)     1\n",
              "1   C00703975           DILLEY, ANNE     ARLINGTON  ...  MA-05   (D)     1\n",
              "2   C00703975       SMIRNAKIS, KAREN        WESTON  ...  MA-05   (D)     1\n",
              "3   C00703975       SMIRNAKIS, KAREN        WESTON  ...  MA-05   (D)     1\n",
              "4   C00703975       SMIRNAKIS, KAREN        WESTON  ...  MA-05   (D)     1\n",
              "5   C00703975       SMIRNAKIS, KAREN        WESTON  ...  MA-05   (D)     1\n",
              "6   C00703975       SMIRNAKIS, KAREN        WESTON  ...  MA-05   (D)     1\n",
              "7   C00703975       SMIRNAKIS, KAREN        WESTON  ...  MA-05   (D)     1\n",
              "8   C00745687         GRIFFITH, LISA     CAMBRIDGE  ...  MA-05   (D)     1\n",
              "9   C00649376       NEWLAND, BART G.       BELMONT  ...  MA-05   (D)     1\n",
              "10  C00703975      EDMONDSON, FRAZOR   MARLBOROUGH  ...  MA-03   (D)     1\n",
              "11  C00703975      EDMONDSON, FRAZOR   MARLBOROUGH  ...  MA-03   (D)     1\n",
              "12  C00703975      EDMONDSON, FRAZOR   MARLBOROUGH  ...  MA-03   (D)     1\n",
              "13  C00703975      EDMONDSON, FRAZOR   MARLBOROUGH  ...  MA-03   (D)     1\n",
              "14  C00703975  VANDER STOEP, STEPHEN        BOSTON  ...  MA-07   (D)     1\n",
              "15  C00727149         GATES, CYNTHIA    ROSLINDALE  ...  MA-07   (D)     1\n",
              "16  C00727149         GATES, CYNTHIA    ROSLINDALE  ...  MA-07   (D)     1\n",
              "17  C00500843  FLANNELLY-KING, SHANE    SOMERVILLE  ...  MA-07   (D)     1\n",
              "18  C00703975          THOMAS, DONNA       MEMPHIS  ...  TN-09   (D)     1\n",
              "19  C00703975          THOMAS, DONNA       MEMPHIS  ...  TN-09   (D)     1\n",
              "20  C00703975            LYKINS, JIM        DUPONT  ...  WA-10   (D)     1\n",
              "21  C00703975     MALDONADO, REBECCA   SAN ANTONIO  ...  TX-20   (D)     1\n",
              "22  C00703975          HOWE, MICHAEL        CANTON  ...  MA-08   (D)     1\n",
              "23  C00196774    LOVEDAY, KENNETH S.     BROOKLINE  ...  MA-04   (D)     1\n",
              "24  C00666149     LOVEDAY, KENNETH S     BROOKLINE  ...  MA-04   (D)     1\n",
              "25  C00701599            SEGAL, KATE  BATTLE CREEK  ...  MI-03   (L)     1\n",
              "\n",
              "[26 rows x 15 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 47
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "XL7glHAoSHM3"
      },
      "source": [
        "inner_join['INDEX_BOSS'] = 1"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "u2BIIZl3RYg1",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "outputId": "a0f10c04-3eb4-4294-a054-bb8f4275471e"
      },
      "source": [
        "inner_join"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>CMTE_ID</th>\n",
              "      <th>NAME</th>\n",
              "      <th>CITY</th>\n",
              "      <th>STATE</th>\n",
              "      <th>ZIP_CODE</th>\n",
              "      <th>EMPLOYER</th>\n",
              "      <th>OCCUPATION</th>\n",
              "      <th>TRANSACTION_DT</th>\n",
              "      <th>TRANSACTION_AMT</th>\n",
              "      <th>CAND_ID</th>\n",
              "      <th>CAND_PTY_AFFILIATION</th>\n",
              "      <th>LINKAGE_ID</th>\n",
              "      <th>CD</th>\n",
              "      <th>Party</th>\n",
              "      <th>INDEX</th>\n",
              "      <th>INDEX_BOSS</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>MARX, ISAAC</td>\n",
              "      <td>ARLINGTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.47438e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>CHEMIST</td>\n",
              "      <td>7182020</td>\n",
              "      <td>250</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>DILLEY, ANNE</td>\n",
              "      <td>ARLINGTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.47648e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>EPIDEMIOLOGIST</td>\n",
              "      <td>7052020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>24931439</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7302020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>24931439</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7182020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>24931439</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7252020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.49314e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7042020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.49314e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>7112020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>SMIRNAKIS, KAREN</td>\n",
              "      <td>WESTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.49314e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n",
              "      <td>6302020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8</th>\n",
              "      <td>C00745687</td>\n",
              "      <td>GRIFFITH, LISA</td>\n",
              "      <td>CAMBRIDGE</td>\n",
              "      <td>MA</td>\n",
              "      <td>021394369</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>MARKETING</td>\n",
              "      <td>7132020</td>\n",
              "      <td>500</td>\n",
              "      <td>H0MA04267</td>\n",
              "      <td>DEM</td>\n",
              "      <td>233009</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>C00649376</td>\n",
              "      <td>NEWLAND, BART G.</td>\n",
              "      <td>BELMONT</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.4784e+07</td>\n",
              "      <td>BIOGEN INC</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "      <td>H8GA07201</td>\n",
              "      <td>DEM</td>\n",
              "      <td>224868</td>\n",
              "      <td>MA-05</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>10</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7292020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-03</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>11</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7152020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-03</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>12</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7222020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-03</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>13</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>EDMONDSON, FRAZOR</td>\n",
              "      <td>MARLBOROUGH</td>\n",
              "      <td>MA</td>\n",
              "      <td>1.75267e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7082020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-03</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>14</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>VANDER STOEP, STEPHEN</td>\n",
              "      <td>BOSTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.12925e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7172020</td>\n",
              "      <td>100</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-07</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>15</th>\n",
              "      <td>C00727149</td>\n",
              "      <td>GATES, CYNTHIA</td>\n",
              "      <td>ROSLINDALE</td>\n",
              "      <td>MA</td>\n",
              "      <td>2131</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>REGULATORY MEDICAL WRITER</td>\n",
              "      <td>8052020</td>\n",
              "      <td>100</td>\n",
              "      <td>H0MA08045</td>\n",
              "      <td>DEM</td>\n",
              "      <td>230605</td>\n",
              "      <td>MA-07</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>16</th>\n",
              "      <td>C00727149</td>\n",
              "      <td>GATES, CYNTHIA</td>\n",
              "      <td>ROSLINDALE</td>\n",
              "      <td>MA</td>\n",
              "      <td>02131</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>REGULATORY MEDICAL WRITER</td>\n",
              "      <td>6302020</td>\n",
              "      <td>100</td>\n",
              "      <td>H0MA08045</td>\n",
              "      <td>DEM</td>\n",
              "      <td>230605</td>\n",
              "      <td>MA-07</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>17</th>\n",
              "      <td>C00500843</td>\n",
              "      <td>FLANNELLY-KING, SHANE</td>\n",
              "      <td>SOMERVILLE</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.14421e+07</td>\n",
              "      <td>BIOGEN IDEC</td>\n",
              "      <td>BUSINESS ANALYST</td>\n",
              "      <td>6302020</td>\n",
              "      <td>250</td>\n",
              "      <td>S2MA00170</td>\n",
              "      <td>DEM</td>\n",
              "      <td>222817</td>\n",
              "      <td>MA-07</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>18</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>THOMAS, DONNA</td>\n",
              "      <td>MEMPHIS</td>\n",
              "      <td>TN</td>\n",
              "      <td>3.81155e+08</td>\n",
              "      <td>PMC BIOGENIX INC.</td>\n",
              "      <td>CUSTOMER SERVICE</td>\n",
              "      <td>7172020</td>\n",
              "      <td>20</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>TN-09</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>19</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>THOMAS, DONNA</td>\n",
              "      <td>MEMPHIS</td>\n",
              "      <td>TN</td>\n",
              "      <td>3.81155e+08</td>\n",
              "      <td>PMC BIOGENIX INC.</td>\n",
              "      <td>CUSTOMER SERVICE</td>\n",
              "      <td>6302020</td>\n",
              "      <td>15</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>TN-09</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>20</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>LYKINS, JIM</td>\n",
              "      <td>DUPONT</td>\n",
              "      <td>WA</td>\n",
              "      <td>9.83277e+08</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>SALES</td>\n",
              "      <td>7282020</td>\n",
              "      <td>50</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>WA-10</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>21</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>MALDONADO, REBECCA</td>\n",
              "      <td>SAN ANTONIO</td>\n",
              "      <td>TX</td>\n",
              "      <td>782491598</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>SALES</td>\n",
              "      <td>7032020</td>\n",
              "      <td>150</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>TX-20</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>22</th>\n",
              "      <td>C00703975</td>\n",
              "      <td>HOWE, MICHAEL</td>\n",
              "      <td>CANTON</td>\n",
              "      <td>MA</td>\n",
              "      <td>2.02116e+07</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>ATTORNEY</td>\n",
              "      <td>7162020</td>\n",
              "      <td>100</td>\n",
              "      <td>P80000722</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227491</td>\n",
              "      <td>MA-08</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>23</th>\n",
              "      <td>C00196774</td>\n",
              "      <td>LOVEDAY, KENNETH S.</td>\n",
              "      <td>BROOKLINE</td>\n",
              "      <td>MA</td>\n",
              "      <td>024465827</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>BIOLOGIST</td>\n",
              "      <td>8042020</td>\n",
              "      <td>250</td>\n",
              "      <td>S4MA00028</td>\n",
              "      <td>DEM</td>\n",
              "      <td>222822</td>\n",
              "      <td>MA-04</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>24</th>\n",
              "      <td>C00666149</td>\n",
              "      <td>LOVEDAY, KENNETH S</td>\n",
              "      <td>BROOKLINE</td>\n",
              "      <td>MA</td>\n",
              "      <td>024465827</td>\n",
              "      <td>BIOGEN INC</td>\n",
              "      <td>DIRECTOR</td>\n",
              "      <td>6302020</td>\n",
              "      <td>500</td>\n",
              "      <td>H8NM02248</td>\n",
              "      <td>DEM</td>\n",
              "      <td>223821</td>\n",
              "      <td>MA-04</td>\n",
              "      <td>(D)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>25</th>\n",
              "      <td>C00701599</td>\n",
              "      <td>SEGAL, KATE</td>\n",
              "      <td>BATTLE CREEK</td>\n",
              "      <td>MI</td>\n",
              "      <td>4.90159e+08</td>\n",
              "      <td>BIOGEN</td>\n",
              "      <td>GOVERNMENT AFFAIRS</td>\n",
              "      <td>6302020</td>\n",
              "      <td>500</td>\n",
              "      <td>H0MI06152</td>\n",
              "      <td>DEM</td>\n",
              "      <td>227095</td>\n",
              "      <td>MI-03</td>\n",
              "      <td>(L)</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "      CMTE_ID                   NAME          CITY  ... Party INDEX INDEX_BOSS\n",
              "0   C00703975            MARX, ISAAC     ARLINGTON  ...   (D)     1          1\n",
              "1   C00703975           DILLEY, ANNE     ARLINGTON  ...   (D)     1          1\n",
              "2   C00703975       SMIRNAKIS, KAREN        WESTON  ...   (D)     1          1\n",
              "3   C00703975       SMIRNAKIS, KAREN        WESTON  ...   (D)     1          1\n",
              "4   C00703975       SMIRNAKIS, KAREN        WESTON  ...   (D)     1          1\n",
              "5   C00703975       SMIRNAKIS, KAREN        WESTON  ...   (D)     1          1\n",
              "6   C00703975       SMIRNAKIS, KAREN        WESTON  ...   (D)     1          1\n",
              "7   C00703975       SMIRNAKIS, KAREN        WESTON  ...   (D)     1          1\n",
              "8   C00745687         GRIFFITH, LISA     CAMBRIDGE  ...   (D)     1          1\n",
              "9   C00649376       NEWLAND, BART G.       BELMONT  ...   (D)     1          1\n",
              "10  C00703975      EDMONDSON, FRAZOR   MARLBOROUGH  ...   (D)     1          1\n",
              "11  C00703975      EDMONDSON, FRAZOR   MARLBOROUGH  ...   (D)     1          1\n",
              "12  C00703975      EDMONDSON, FRAZOR   MARLBOROUGH  ...   (D)     1          1\n",
              "13  C00703975      EDMONDSON, FRAZOR   MARLBOROUGH  ...   (D)     1          1\n",
              "14  C00703975  VANDER STOEP, STEPHEN        BOSTON  ...   (D)     1          1\n",
              "15  C00727149         GATES, CYNTHIA    ROSLINDALE  ...   (D)     1          1\n",
              "16  C00727149         GATES, CYNTHIA    ROSLINDALE  ...   (D)     1          1\n",
              "17  C00500843  FLANNELLY-KING, SHANE    SOMERVILLE  ...   (D)     1          1\n",
              "18  C00703975          THOMAS, DONNA       MEMPHIS  ...   (D)     1          1\n",
              "19  C00703975          THOMAS, DONNA       MEMPHIS  ...   (D)     1          1\n",
              "20  C00703975            LYKINS, JIM        DUPONT  ...   (D)     1          1\n",
              "21  C00703975     MALDONADO, REBECCA   SAN ANTONIO  ...   (D)     1          1\n",
              "22  C00703975          HOWE, MICHAEL        CANTON  ...   (D)     1          1\n",
              "23  C00196774    LOVEDAY, KENNETH S.     BROOKLINE  ...   (D)     1          1\n",
              "24  C00666149     LOVEDAY, KENNETH S     BROOKLINE  ...   (D)     1          1\n",
              "25  C00701599            SEGAL, KATE  BATTLE CREEK  ...   (L)     1          1\n",
              "\n",
              "[26 rows x 16 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 49
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "rW1ZZvZwYZ2s"
      },
      "source": [
        "subset2 = inner_join[['INDEX','INDEX_BOSS']]"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "M7vLCa_-dmhg"
      },
      "source": [
        "from sklearn.linear_model import LinearRegression"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "apb76xJfYP-w",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "bd90bb55-85ee-4d15-b585-3c3b1406d16c"
      },
      "source": [
        "linear_regressor = LinearRegression()\n",
        "from sklearn.preprocessing import MinMaxScaler\n",
        "scaler1 = MinMaxScaler()\n",
        "scaler1.fit(subset2)\n",
        "inner_join_scaled=scaler1.transform(subset2)\n",
        "\n",
        "x = inner_join_scaled[:,0].reshape(-1,1)\n",
        "y = inner_join_scaled[:,1].reshape(-1,1)\n",
        "\n",
        "linear_regressor.fit(x, y)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 55
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Q33krPq74eVs"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}