{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Week3_Assignment.ipynb", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "code", "metadata": { "id": "bs5dRVOjZ6pT", "colab": { "base_uri": "https://localhost:8080/", "height": 221 }, "outputId": "2801a080-a77a-4e62-d58c-732d318416b3" }, "source": [ "from google.colab import drive\n", "drive.mount('/data/')\n", "data_dir = '/data/My Drive/Colab Notebooks/FEC dataset'\n", "!ls '/data/My Drive/Colab Notebooks/FEC dataset'\n", "!pip install matplotlib" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Mounted at /data/\n", "ccl20.zip\t cm_header_file.csv indiv_header_file.csv\n", "ccl_header_file.csv cn20.zip\t\t pas220.zip\n", "CD_trends.xlsx\t cn_header_file.csv pas2_header_file.csv\n", "cm20.zip\t indiv20.zip\n", "Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (3.2.2)\n", "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (2.4.7)\n", "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (2.8.1)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (0.10.0)\n", "Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (1.18.5)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (1.2.0)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.1->matplotlib) (1.15.0)\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "uivLBlKyuC2V" }, "source": [ "import zipfile\n", "zip = zipfile.ZipFile(data_dir+'/indiv20.zip')\n", "#zip.namelist()" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "m-0PQq0Oufje", "colab": { "base_uri": "https://localhost:8080/", "height": 428 }, "outputId": "b79a482b-6444-49f8-f88a-d79975a0442c" }, "source": [ "import pandas as pd\n", "header = pd.read_csv(data_dir+'/indiv_header_file.csv')\n", "\n", "data=pd.read_csv(zip.open('by_date/itcont_2020_20200630_20300630.txt'), sep='|', names=header.columns)\n", "data.head()" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py:2718: DtypeWarning: Columns (10,16,18,19) have mixed types.Specify dtype option on import or set low_memory=False.\n", " interactivity=interactivity, compiler=compiler, result=result)\n" ], "name": "stderr" }, { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CMTE_ID</th>\n", " <th>AMNDT_IND</th>\n", " <th>RPT_TP</th>\n", " <th>TRANSACTION_PGI</th>\n", " <th>IMAGE_NUM</th>\n", " <th>TRANSACTION_TP</th>\n", " <th>ENTITY_TP</th>\n", " <th>NAME</th>\n", " <th>CITY</th>\n", " <th>STATE</th>\n", " <th>ZIP_CODE</th>\n", " <th>EMPLOYER</th>\n", " <th>OCCUPATION</th>\n", " <th>TRANSACTION_DT</th>\n", " <th>TRANSACTION_AMT</th>\n", " <th>OTHER_ID</th>\n", " <th>TRAN_ID</th>\n", " <th>FILE_NUM</th>\n", " <th>MEMO_CD</th>\n", " <th>MEMO_TEXT</th>\n", " <th>SUB_ID</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>C00363317</td>\n", " <td>A</td>\n", " <td>YE</td>\n", " <td>P2020</td>\n", " <td>202004199219743280</td>\n", " <td>15E</td>\n", " <td>IND</td>\n", " <td>LITTLE, WILLIAM</td>\n", " <td>NEW YORK</td>\n", " <td>NY</td>\n", " <td>1.0128e+08</td>\n", " <td>NOT EMPLOYED</td>\n", " <td>NOT EMPLOYED</td>\n", " <td>12162020</td>\n", " <td>500</td>\n", " <td>NaN</td>\n", " <td>4017159</td>\n", " <td>1402014</td>\n", " <td>NaN</td>\n", " <td>* EARMARKED CONTRIBUTION: SEE BELOW</td>\n", " <td>4042120201737536230</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>C00723122</td>\n", " <td>A</td>\n", " <td>YE</td>\n", " <td>P2020</td>\n", " <td>202007159244979799</td>\n", " <td>15E</td>\n", " <td>IND</td>\n", " <td>STOWE, BARBARA</td>\n", " <td>RESTON</td>\n", " <td>VA</td>\n", " <td>2.01942e+08</td>\n", " <td>NOT EMPLOYED</td>\n", " <td>NOT EMPLOYED</td>\n", " <td>12282020</td>\n", " <td>100</td>\n", " <td>C00193433</td>\n", " <td>4753483</td>\n", " <td>1423440</td>\n", " <td>NaN</td>\n", " <td>* EARMARKED CONTRIBUTION: SEE BELOW</td>\n", " <td>4072620201794577716</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>C00290825</td>\n", " <td>A</td>\n", " <td>YE</td>\n", " <td>P2020</td>\n", " <td>202004159216892816</td>\n", " <td>15E</td>\n", " <td>IND</td>\n", " <td>MEHIEL, KAREN</td>\n", " <td>NEW YORK</td>\n", " <td>NY</td>\n", " <td>1.01281e+08</td>\n", " <td>KAMPACK, INC.</td>\n", " <td>EXECUTIVE</td>\n", " <td>12182020</td>\n", " <td>2800</td>\n", " <td>C00401224</td>\n", " <td>3965375</td>\n", " <td>1398991</td>\n", " <td>NaN</td>\n", " <td>* EARMARKED CONTRIBUTION: SEE BELOW</td>\n", " <td>4050620201741858091</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>C00363317</td>\n", " <td>A</td>\n", " <td>M12</td>\n", " <td>P2020</td>\n", " <td>202004199219742982</td>\n", " <td>15E</td>\n", " <td>IND</td>\n", " <td>LITTLE, WILLIAM</td>\n", " <td>NEW YORK</td>\n", " <td>NY</td>\n", " <td>1.0128e+08</td>\n", " <td>NOT EMPLOYED</td>\n", " <td>NOT EMPLOYED</td>\n", " <td>10302020</td>\n", " <td>500</td>\n", " <td>C00401224</td>\n", " <td>4017173</td>\n", " <td>1401993</td>\n", " <td>NaN</td>\n", " <td>* EARMARKED CONTRIBUTION: SEE BELOW</td>\n", " <td>4042120201737536220</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>C00589309</td>\n", " <td>A</td>\n", " <td>YE</td>\n", " <td>P</td>\n", " <td>202002209187171385</td>\n", " <td>15E</td>\n", " <td>IND</td>\n", " <td>DAVIDSON, GREG</td>\n", " <td>REDONDO BEACH</td>\n", " <td>CA</td>\n", " <td>9.02782e+08</td>\n", " <td>NORTHROP GRUMMAN</td>\n", " <td>AEROSPACE MANAGER</td>\n", " <td>12312020</td>\n", " <td>100</td>\n", " <td>C00401224</td>\n", " <td>VVBX0QHNGR6</td>\n", " <td>1385228</td>\n", " <td>NaN</td>\n", " <td>* EARMARKED CONTRIBUTION: SEE BELOW</td>\n", " <td>4022920201700018835</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CMTE_ID ... SUB_ID\n", "0 C00363317 ... 4042120201737536230\n", "1 C00723122 ... 4072620201794577716\n", "2 C00290825 ... 4050620201741858091\n", "3 C00363317 ... 4042120201737536220\n", "4 C00589309 ... 4022920201700018835\n", "\n", "[5 rows x 21 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 3 } ] }, { "cell_type": "code", "metadata": { "id": "6PB4UgTa1Bih", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "5339e7e6-3336-494c-edb9-cfd45fa8cee4" }, "source": [ "print(data['TRANSACTION_AMT'].max())" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "10000000\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "owm7xZS11HEB", "colab": { "base_uri": "https://localhost:8080/", "height": 326 }, "outputId": "6769a41a-fe29-4f5b-acab-71ab9a292fd8" }, "source": [ "sort_amt = data.sort_values(by='TRANSACTION_AMT', ascending=False)\n", "sort_amt.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CMTE_ID</th>\n", " <th>AMNDT_IND</th>\n", " <th>RPT_TP</th>\n", " <th>TRANSACTION_PGI</th>\n", " <th>IMAGE_NUM</th>\n", " <th>TRANSACTION_TP</th>\n", " <th>ENTITY_TP</th>\n", " <th>NAME</th>\n", " <th>CITY</th>\n", " <th>STATE</th>\n", " <th>ZIP_CODE</th>\n", " <th>EMPLOYER</th>\n", " <th>OCCUPATION</th>\n", " <th>TRANSACTION_DT</th>\n", " <th>TRANSACTION_AMT</th>\n", " <th>OTHER_ID</th>\n", " <th>TRAN_ID</th>\n", " <th>FILE_NUM</th>\n", " <th>MEMO_CD</th>\n", " <th>MEMO_TEXT</th>\n", " <th>SUB_ID</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>990582</th>\n", " <td>C00571703</td>\n", " <td>N</td>\n", " <td>M8</td>\n", " <td>P</td>\n", " <td>202008209266851913</td>\n", " <td>10</td>\n", " <td>IND</td>\n", " <td>MELLON, TIMOTHY</td>\n", " <td>SARATOGA</td>\n", " <td>WY</td>\n", " <td>823311500</td>\n", " <td>SELF-EMPLOYED</td>\n", " <td>INVESTMENTS</td>\n", " <td>7092020</td>\n", " <td>10000000</td>\n", " <td>NaN</td>\n", " <td>SA11A.15446</td>\n", " <td>1434706</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>4090120201833903380</td>\n", " </tr>\n", " <tr>\n", " <th>990568</th>\n", " <td>C00571703</td>\n", " <td>N</td>\n", " <td>M8</td>\n", " <td>P</td>\n", " <td>202008209266851908</td>\n", " <td>10</td>\n", " <td>IND</td>\n", " <td>SCHWARZMAN, STEPHEN A.</td>\n", " <td>NEW YORK</td>\n", " <td>NY</td>\n", " <td>101543302</td>\n", " <td>BLACKSTONE</td>\n", " <td>CHAIRMAN & CEO</td>\n", " <td>7012020</td>\n", " <td>10000000</td>\n", " <td>NaN</td>\n", " <td>SA11A.15411</td>\n", " <td>1434706</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>4090120201833903366</td>\n", " </tr>\n", " <tr>\n", " <th>469388</th>\n", " <td>C00637512</td>\n", " <td>N</td>\n", " <td>M8</td>\n", " <td>P</td>\n", " <td>202008209266413693</td>\n", " <td>10</td>\n", " <td>ORG</td>\n", " <td>AMERICA FIRST POLICIES, INC.</td>\n", " <td>ARLINGTON</td>\n", " <td>VA</td>\n", " <td>22202</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>7202020</td>\n", " <td>10000000</td>\n", " <td>NaN</td>\n", " <td>SA11AI.165580</td>\n", " <td>1434640</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>4082920201831236982</td>\n", " </tr>\n", " <tr>\n", " <th>1151552</th>\n", " <td>C00484642</td>\n", " <td>N</td>\n", " <td>M7</td>\n", " <td>P</td>\n", " <td>202007209260164631</td>\n", " <td>10</td>\n", " <td>ORG</td>\n", " <td>MAJORITY FORWARD</td>\n", " <td>WASHINGTON</td>\n", " <td>DC</td>\n", " <td>200055998</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>6302020</td>\n", " <td>8000000</td>\n", " <td>NaN</td>\n", " <td>1973314</td>\n", " <td>1427419</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>4072920201808862242</td>\n", " </tr>\n", " <tr>\n", " <th>1351670</th>\n", " <td>C00747246</td>\n", " <td>N</td>\n", " <td>Q2</td>\n", " <td>P</td>\n", " <td>202007159245095555</td>\n", " <td>15</td>\n", " <td>ORG</td>\n", " <td>SIXTEEN THIRTY FUND</td>\n", " <td>WASHINGTON</td>\n", " <td>DC</td>\n", " <td>200362605</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>6302020</td>\n", " <td>5700000</td>\n", " <td>NaN</td>\n", " <td>12295463</td>\n", " <td>1423930</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>4071720201791015689</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CMTE_ID AMNDT_IND RPT_TP ... MEMO_CD MEMO_TEXT SUB_ID\n", "990582 C00571703 N M8 ... NaN NaN 4090120201833903380\n", "990568 C00571703 N M8 ... NaN NaN 4090120201833903366\n", "469388 C00637512 N M8 ... NaN NaN 4082920201831236982\n", "1151552 C00484642 N M7 ... NaN NaN 4072920201808862242\n", "1351670 C00747246 N Q2 ... NaN NaN 4071720201791015689\n", "\n", "[5 rows x 21 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 5 } ] }, { "cell_type": "code", "metadata": { "id": "WuJLWjlA29OT" }, "source": [ "df = pd.DataFrame(data, columns=['CMTE_ID', 'NAME', 'CITY', 'STATE', 'ZIP_CODE', 'EMPLOYER', 'OCCUPATION', 'TRANSACTION_DT', 'TRANSACTION_AMT'])" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "I9CI-mdn29Fq", "colab": { "base_uri": "https://localhost:8080/", "height": 238 }, "outputId": "5c059bc9-6a43-4365-db85-a24049a7568b" }, "source": [ "from zipfile import ZipFile\n", "import pandas as pd\n", "header = pd.read_csv(data_dir+'/cn_header_file.csv')\n", "\n", "with ZipFile(data_dir+'/cn20.zip') as zip:\n", " candidates = pd.read_csv(zip.open('cn.txt'), sep='|', names=header.columns)\n", "candidates.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CAND_ID</th>\n", " <th>CAND_NAME</th>\n", " <th>CAND_PTY_AFFILIATION</th>\n", " <th>CAND_ELECTION_YR</th>\n", " <th>CAND_OFFICE_ST</th>\n", " <th>CAND_OFFICE</th>\n", " <th>CAND_OFFICE_DISTRICT</th>\n", " <th>CAND_ICI</th>\n", " <th>CAND_STATUS</th>\n", " <th>CAND_PCC</th>\n", " <th>CAND_ST1</th>\n", " <th>CAND_ST2</th>\n", " <th>CAND_CITY</th>\n", " <th>CAND_ST</th>\n", " <th>CAND_ZIP</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>H0AK00105</td>\n", " <td>LAMB, THOMAS</td>\n", " <td>NNE</td>\n", " <td>2020</td>\n", " <td>AK</td>\n", " <td>H</td>\n", " <td>0.0</td>\n", " <td>C</td>\n", " <td>N</td>\n", " <td>C00607515</td>\n", " <td>1861 W LAKE LUCILLE DR</td>\n", " <td>NaN</td>\n", " <td>WASILLA</td>\n", " <td>AK</td>\n", " <td>99654.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>H0AK00113</td>\n", " <td>TUGATUK, RAY SEAN</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>AK</td>\n", " <td>H</td>\n", " <td>0.0</td>\n", " <td>C</td>\n", " <td>N</td>\n", " <td>NaN</td>\n", " <td>PO BOX 172</td>\n", " <td>NaN</td>\n", " <td>MANAKOTAK</td>\n", " <td>AK</td>\n", " <td>99628.0</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>H0AK01046</td>\n", " <td>CATALANO, THOMAS</td>\n", " <td>OTH</td>\n", " <td>2020</td>\n", " <td>AK</td>\n", " <td>H</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>N</td>\n", " <td>NaN</td>\n", " <td>188 WEST NORTHERN LIGHTS BOULEVARD</td>\n", " <td>NaN</td>\n", " <td>ANCHORAGE</td>\n", " <td>AK</td>\n", " <td>99503.0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>H0AL01055</td>\n", " <td>CARL, JERRY LEE, JR</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>AL</td>\n", " <td>H</td>\n", " <td>1.0</td>\n", " <td>O</td>\n", " <td>C</td>\n", " <td>C00697789</td>\n", " <td>PO BOX 852138</td>\n", " <td>NaN</td>\n", " <td>MOBILE</td>\n", " <td>AL</td>\n", " <td>36685.0</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>H0AL01063</td>\n", " <td>LAMBERT, DOUGLAS WESTLEY III</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>AL</td>\n", " <td>H</td>\n", " <td>1.0</td>\n", " <td>O</td>\n", " <td>C</td>\n", " <td>C00701557</td>\n", " <td>7194 STILLWATER BLVD</td>\n", " <td>NaN</td>\n", " <td>SPANISH FORT</td>\n", " <td>AL</td>\n", " <td>36527.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CAND_ID CAND_NAME ... CAND_ST CAND_ZIP\n", "0 H0AK00105 LAMB, THOMAS ... AK 99654.0\n", "1 H0AK00113 TUGATUK, RAY SEAN ... AK 99628.0\n", "2 H0AK01046 CATALANO, THOMAS ... AK 99503.0\n", "3 H0AL01055 CARL, JERRY LEE, JR ... AL 36685.0\n", "4 H0AL01063 LAMBERT, DOUGLAS WESTLEY III ... AL 36527.0\n", "\n", "[5 rows x 15 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 6 } ] }, { "cell_type": "code", "metadata": { "id": "uF9YJ-SQ6psu" }, "source": [ "candidates_final = pd.DataFrame(candidates, columns=['CAND_ID', 'CAND_PTY_AFFILIATION'])" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "H4_26uJ23RXX", "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "outputId": "543f7301-8524-4da0-85da-b3f3c2a4e6a2" }, "source": [ "header = pd.read_csv(data_dir+'/ccl_header_file.csv')\n", "\n", "with ZipFile(data_dir+'/ccl20.zip') as zip:\n", " #print(zip.namelist())\n", " linkage = pd.read_csv(zip.open('ccl.txt'), sep='|', names=header.columns)\n", "\n", "linkage.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CAND_ID</th>\n", " <th>CAND_ELECTION_YR</th>\n", " <th>FEC_ELECTION_YR</th>\n", " <th>CMTE_ID</th>\n", " <th>CMTE_TP</th>\n", " <th>CMTE_DSGN</th>\n", " <th>LINKAGE_ID</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>C00713602</td>\n", " <td>2019</td>\n", " <td>2020</td>\n", " <td>C00712851</td>\n", " <td>O</td>\n", " <td>U</td>\n", " <td>228963</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>H0AK00105</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>C00607515</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>229250</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>H0AL01055</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>C00697789</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>226125</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>H0AL01063</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>C00701557</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>227053</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>H0AL01071</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>C00701409</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>227054</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CAND_ID CAND_ELECTION_YR FEC_ELECTION_YR ... CMTE_TP CMTE_DSGN LINKAGE_ID\n", "0 C00713602 2019 2020 ... O U 228963\n", "1 H0AK00105 2020 2020 ... H P 229250\n", "2 H0AL01055 2020 2020 ... H P 226125\n", "3 H0AL01063 2020 2020 ... H P 227053\n", "4 H0AL01071 2020 2020 ... H P 227054\n", "\n", "[5 rows x 7 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 8 } ] }, { "cell_type": "code", "metadata": { "id": "wyK3OZ3y7Srb", "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "outputId": "632f3820-90b7-4759-99e7-089714c8243b" }, "source": [ "df_merge = pd.merge(candidates_final, linkage, on='CAND_ID')\n", "df_merge.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CAND_ID</th>\n", " <th>CAND_PTY_AFFILIATION</th>\n", " <th>CAND_ELECTION_YR</th>\n", " <th>FEC_ELECTION_YR</th>\n", " <th>CMTE_ID</th>\n", " <th>CMTE_TP</th>\n", " <th>CMTE_DSGN</th>\n", " <th>LINKAGE_ID</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>H0AK00105</td>\n", " <td>NNE</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>C00607515</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>229250</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>H0AL01055</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>C00697789</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>226125</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>H0AL01063</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>C00701557</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>227053</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>H0AL01071</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>C00701409</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>227054</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>H0AL01089</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>C00703066</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>227266</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CAND_ID CAND_PTY_AFFILIATION ... CMTE_DSGN LINKAGE_ID\n", "0 H0AK00105 NNE ... P 229250\n", "1 H0AL01055 REP ... P 226125\n", "2 H0AL01063 REP ... P 227053\n", "3 H0AL01071 REP ... P 227054\n", "4 H0AL01089 REP ... P 227266\n", "\n", "[5 rows x 8 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 9 } ] }, { "cell_type": "code", "metadata": { "id": "8i2m3TRG3QWd" }, "source": [ "" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "8ICMtX8B3TZi" }, "source": [ "sort_amt.dropna(subset = [\"EMPLOYER\", \"OCCUPATION\"], inplace=True)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "jNiWeB9J3TN7", "colab": { "base_uri": "https://localhost:8080/", "height": 343 }, "outputId": "7d11d265-7680-4c8f-ccba-dc19ec310ec3" }, "source": [ "sort_amt.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CMTE_ID</th>\n", " <th>AMNDT_IND</th>\n", " <th>RPT_TP</th>\n", " <th>TRANSACTION_PGI</th>\n", " <th>IMAGE_NUM</th>\n", " <th>TRANSACTION_TP</th>\n", " <th>ENTITY_TP</th>\n", " <th>NAME</th>\n", " <th>CITY</th>\n", " <th>STATE</th>\n", " <th>ZIP_CODE</th>\n", " <th>EMPLOYER</th>\n", " <th>OCCUPATION</th>\n", " <th>TRANSACTION_DT</th>\n", " <th>TRANSACTION_AMT</th>\n", " <th>OTHER_ID</th>\n", " <th>TRAN_ID</th>\n", " <th>FILE_NUM</th>\n", " <th>MEMO_CD</th>\n", " <th>MEMO_TEXT</th>\n", " <th>SUB_ID</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>990582</th>\n", " <td>C00571703</td>\n", " <td>N</td>\n", " <td>M8</td>\n", " <td>P</td>\n", " <td>202008209266851913</td>\n", " <td>10</td>\n", " <td>IND</td>\n", " <td>MELLON, TIMOTHY</td>\n", " <td>SARATOGA</td>\n", " <td>WY</td>\n", " <td>823311500</td>\n", " <td>SELF-EMPLOYED</td>\n", " <td>INVESTMENTS</td>\n", " <td>7092020</td>\n", " <td>10000000</td>\n", " <td>NaN</td>\n", " <td>SA11A.15446</td>\n", " <td>1434706</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>4090120201833903380</td>\n", " </tr>\n", " <tr>\n", " <th>990568</th>\n", " <td>C00571703</td>\n", " <td>N</td>\n", " <td>M8</td>\n", " <td>P</td>\n", " <td>202008209266851908</td>\n", " <td>10</td>\n", " <td>IND</td>\n", " <td>SCHWARZMAN, STEPHEN A.</td>\n", " <td>NEW YORK</td>\n", " <td>NY</td>\n", " <td>101543302</td>\n", " <td>BLACKSTONE</td>\n", " <td>CHAIRMAN & CEO</td>\n", " <td>7012020</td>\n", " <td>10000000</td>\n", " <td>NaN</td>\n", " <td>SA11A.15411</td>\n", " <td>1434706</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>4090120201833903366</td>\n", " </tr>\n", " <tr>\n", " <th>988418</th>\n", " <td>C00547349</td>\n", " <td>N</td>\n", " <td>M8</td>\n", " <td>P</td>\n", " <td>202008209266445875</td>\n", " <td>10</td>\n", " <td>IND</td>\n", " <td>STEYER, THOMAS F.</td>\n", " <td>SAN FRANCISCO</td>\n", " <td>CA</td>\n", " <td>941049007</td>\n", " <td>FAHR, LLC</td>\n", " <td>FOUNDER</td>\n", " <td>7012020</td>\n", " <td>3479294</td>\n", " <td>NaN</td>\n", " <td>VNVNVHN8SQ0</td>\n", " <td>1434668</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>4082920201831239483</td>\n", " </tr>\n", " <tr>\n", " <th>1001457</th>\n", " <td>C00495028</td>\n", " <td>N</td>\n", " <td>M8</td>\n", " <td>P</td>\n", " <td>202008209266639943</td>\n", " <td>10</td>\n", " <td>IND</td>\n", " <td>SIMONS, JAMES H.</td>\n", " <td>NEW YORK</td>\n", " <td>NY</td>\n", " <td>100107007</td>\n", " <td>EUCLIDEAN CAPITAL</td>\n", " <td>PRESIDENT</td>\n", " <td>7152020</td>\n", " <td>2500000</td>\n", " <td>NaN</td>\n", " <td>VN8FNNJW723</td>\n", " <td>1434687</td>\n", " <td>NaN</td>\n", " <td>NON-CONTRIBUTION ACCOUNT</td>\n", " <td>4090220201833936065</td>\n", " </tr>\n", " <tr>\n", " <th>860246</th>\n", " <td>C00620971</td>\n", " <td>N</td>\n", " <td>M8</td>\n", " <td>P</td>\n", " <td>202008209266126372</td>\n", " <td>10</td>\n", " <td>IND</td>\n", " <td>STEYER, THOMAS</td>\n", " <td>SAN FRANCISCO</td>\n", " <td>CA</td>\n", " <td>9.41045e+08</td>\n", " <td>FAHR LLC</td>\n", " <td>PHILANTHROPY AND ADVOCACY</td>\n", " <td>7242020</td>\n", " <td>2500000</td>\n", " <td>NaN</td>\n", " <td>VSH7WMSTV40</td>\n", " <td>1434556</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>4090120201833903301</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CMTE_ID AMNDT_IND ... MEMO_TEXT SUB_ID\n", "990582 C00571703 N ... NaN 4090120201833903380\n", "990568 C00571703 N ... NaN 4090120201833903366\n", "988418 C00547349 N ... NaN 4082920201831239483\n", "1001457 C00495028 N ... NON-CONTRIBUTION ACCOUNT 4090220201833936065\n", "860246 C00620971 N ... NaN 4090120201833903301\n", "\n", "[5 rows x 21 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 25 } ] }, { "cell_type": "code", "metadata": { "id": "ZpDCZF044orD", "colab": { "base_uri": "https://localhost:8080/", "height": 102 }, "outputId": "43935d99-c06b-4fcd-e723-4a5c5affe85d" }, "source": [ "sort_amt[sort_amt['OCCUPATION']=='EXECUTIVE']['EMPLOYER'].describe()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "count 6193\n", "unique 2349\n", "top SOUTHERN CA EDISON\n", "freq 215\n", "Name: EMPLOYER, dtype: object" ] }, "metadata": { "tags": [] }, "execution_count": 38 } ] }, { "cell_type": "code", "metadata": { "id": "hyiY1HCz4oaE" }, "source": [ "" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "k0qWZ1iFAlCE", "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "outputId": "490d0372-c2ff-4de9-8ad7-77783c72dd63" }, "source": [ "df_newdup = df[(df['EMPLOYER'].duplicated()) &\n", " (df['EMPLOYER']!='NOT EMPLOYED') &\n", " (df['EMPLOYER']!='RETIRED')]\n", "\n", "df_newdup.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CMTE_ID</th>\n", " <th>NAME</th>\n", " <th>CITY</th>\n", " <th>STATE</th>\n", " <th>ZIP_CODE</th>\n", " <th>EMPLOYER</th>\n", " <th>OCCUPATION</th>\n", " <th>TRANSACTION_DT</th>\n", " <th>TRANSACTION_AMT</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>7</th>\n", " <td>C00706333</td>\n", " <td>ALVAREZ, JACK</td>\n", " <td>TRACY</td>\n", " <td>CA</td>\n", " <td>95304</td>\n", " <td>ALVAREZ FARMS, INC.</td>\n", " <td>PRESIDENT</td>\n", " <td>9302020</td>\n", " <td>2300</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>C00706333</td>\n", " <td>ALVAREZ, JACK</td>\n", " <td>TRACY</td>\n", " <td>CA</td>\n", " <td>95304</td>\n", " <td>ALVAREZ FARMS, INC.</td>\n", " <td>PRESIDENT</td>\n", " <td>9302020</td>\n", " <td>200</td>\n", " </tr>\n", " <tr>\n", " <th>13</th>\n", " <td>C00431932</td>\n", " <td>COOPER, DAVID</td>\n", " <td>NEW BRAUNFELS</td>\n", " <td>TX</td>\n", " <td>78132</td>\n", " <td>OVINTIV SERVICES INC.</td>\n", " <td>DRILLING COORDINATOR</td>\n", " <td>6302020</td>\n", " <td>104</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>C00431932</td>\n", " <td>CURRAN, KENT</td>\n", " <td>LITTLETON</td>\n", " <td>CO</td>\n", " <td>80127</td>\n", " <td>OVINTIV SERVICES INC.</td>\n", " <td>SENIOR LAND NEGOTIATOR</td>\n", " <td>6302020</td>\n", " <td>20</td>\n", " </tr>\n", " <tr>\n", " <th>15</th>\n", " <td>C00431932</td>\n", " <td>DARLINGTON, BRUCE</td>\n", " <td>SPRING</td>\n", " <td>TX</td>\n", " <td>77379</td>\n", " <td>OVINTIV SERVICES INC.</td>\n", " <td>SR. MANAGER, DRILLING & COMPL</td>\n", " <td>6302020</td>\n", " <td>50</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CMTE_ID NAME ... TRANSACTION_DT TRANSACTION_AMT\n", "7 C00706333 ALVAREZ, JACK ... 9302020 2300\n", "8 C00706333 ALVAREZ, JACK ... 9302020 200\n", "13 C00431932 COOPER, DAVID ... 6302020 104\n", "14 C00431932 CURRAN, KENT ... 6302020 20\n", "15 C00431932 DARLINGTON, BRUCE ... 6302020 50\n", "\n", "[5 rows x 9 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 12 } ] }, { "cell_type": "code", "metadata": { "id": "0OznZbEdSjB1", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "91335d8e-6ce1-44c8-c9ba-1a7856cacdcb" }, "source": [ "set(df_newdup['EMPLOYER'])" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'CONSULTANT',\n", " 'BANNER HEALTH',\n", " 'YOUR PART-TIME CONTROLLER LLC',\n", " 'WRIGHT COLLEGE',\n", " 'II-VI INC.',\n", " 'AREAS APPRAISERS INC',\n", " 'SAIONTZ & KIRK, P.A.',\n", " \"UCSF BENIOFF CHILDREN'S HOSPITAL OAKLA\",\n", " 'NICKELSPORN &LUNDIN PC',\n", " 'LOCUS IMPACT INVESTING',\n", " 'GRAVLEE HOMES INC.',\n", " 'CENTURY CONTRACTORS',\n", " 'ONPOINT MARKETING INC.',\n", " 'GEORGE FOX UNIVERSITY',\n", " 'TURN IT OVER CLEANING',\n", " 'COLLIERS',\n", " 'PRIDGEON AND CLAY, INC.',\n", " 'STERLING REALTORS',\n", " 'MAINE STATE CU',\n", " 'CIVIL LIBERTIES LIST',\n", " 'CALIFORNIA',\n", " 'AMERICAN CONCRETE',\n", " 'GARRISON PC',\n", " 'SCHOOL OF ART INSTITUTE OF CHICAGO',\n", " 'WASHINGTON STATE HOSPITAL ASSOCIATION',\n", " 'JFNNJ',\n", " 'NAVAIR SETA (HOFFMAN ENGINEERING)',\n", " 'PEACH & LILY',\n", " 'EATON SALES',\n", " 'MILWAUKEE NEPHROLOGI',\n", " 'ADELPHI TECHNOLOGY INC.',\n", " 'DEANE DANCE CENTER',\n", " 'ODESSA FENCE',\n", " 'NXP SEMICONDUCTOR',\n", " 'NATIONAL FLATBED LLC',\n", " 'T.T.DUNPHY',\n", " 'KB DEVELOPMENT',\n", " 'SM CONSULTANT',\n", " 'PNWRCC',\n", " 'STORCH AMINI PC',\n", " 'MONTOGOMERY COUNTY GOVERNMENT',\n", " 'NIKE, INC',\n", " 'CAPE ELECTRIC',\n", " 'GD MISSION SYSTEMS INC',\n", " 'AKIN GUMP ET AL',\n", " 'MINUTEMAN POWER SERVICES LLC',\n", " 'BOS DAIRY, LLC',\n", " 'SILICONES PLUS INC',\n", " 'COCA-COLA CONSOLIDATED, INC.',\n", " 'CENTERSTONE',\n", " 'A. LEE KIRK ATTORNEY AT LAW',\n", " 'LMR FREIGHT',\n", " 'FOLEY ABBOTT LLC',\n", " 'WIND RIVER TRANSPORT',\n", " 'WARNER BROTHERS',\n", " 'MOSES & SINGER',\n", " 'STACKBIT',\n", " 'ROWPAR PHARMACEUTICALS',\n", " 'ASURINT',\n", " 'GUBB & BARSHAY',\n", " 'TYLER TECHNOLOGIES',\n", " 'YALE UNIVERSITY',\n", " 'CUNY / ISLG',\n", " 'OSCARRENDA CONT',\n", " 'HAHN & HAHN LLP',\n", " 'SBT',\n", " 'A BETTER CHANCE FOR OUR',\n", " 'GARMIN INTERNATIONAL',\n", " 'CA-LOTTS CREDIT & CAR SALES',\n", " 'EXXONMOBIL PRODUCTION US',\n", " 'BOSTON SYMPHONY ORCHESTRA',\n", " 'DECADES OF WHEELS LLC',\n", " 'SPORTS ROCKET INC',\n", " 'CTVHCS',\n", " 'ENTERTAINMENT ONE',\n", " 'GIDEON INFORMATICS INC',\n", " 'CRAYOLA LLC',\n", " 'ENERBANK USA',\n", " 'MAXIM CRANE WORKS, LP',\n", " 'UMASS MEDICAL SCHOOL',\n", " 'GROSSMAN IRON ANS STEEL CO',\n", " 'US CONCRETE, INC.',\n", " 'MARTIN M RON ASSOCIATES',\n", " 'OAKTON COMMUNITY COLLEGE',\n", " 'SANTA CRUZ IHSS',\n", " 'CENTRAL PACIFIC BANK',\n", " 'ICON VALUATION',\n", " 'CURETON MIDSTREAM',\n", " 'FIS GROU0',\n", " 'A TUMBLING T RANCHES',\n", " 'THOMAS MEDIA GROUP LLC',\n", " 'XCEL ENERGY',\n", " 'GSSM',\n", " 'I.S. ENVIRONMENTAL PROTECTION AGENCY',\n", " 'REESE NURSING SERVICE 51',\n", " 'IMPLUS LLC',\n", " 'MOORE CAPITAL MANAGEMENT',\n", " 'DONKAGELE FARMSINC.',\n", " 'PPMM',\n", " 'TTA APPRAISAL',\n", " 'CENTENNIAL INSURANCE AGENCY',\n", " 'KISABETH FURNITURE',\n", " 'INSTA LUBE PH CORP',\n", " 'UNVERSITY OF ALABAMA BIRMINGHA',\n", " \"ST. CATHERINE'S SCHOOL\",\n", " 'TATOOSH SEAFOODS',\n", " 'EMERGENCY PHYSICIAN',\n", " 'MERRILL BANK OF AMERICA',\n", " 'ONEAL AND ASSOCIATES',\n", " 'METROPOLITAN TRANSPORTATION AUTHORITY',\n", " 'NATHAN LITTAUER HOSPITAL',\n", " 'RETIRRD',\n", " 'ROPER ST FRANCIS HEALTHCARE',\n", " 'JGNEIL',\n", " 'PD&C',\n", " 'CVE',\n", " 'PIRE',\n", " 'SELF EMPLOYED - WOMAN OWNED SMALL BUSI',\n", " 'CUSHEES INC.',\n", " 'BEALLS',\n", " 'VALLEY PHYSICIANS ALLIANCE',\n", " 'FRIENDSHIP HOUSE',\n", " 'PATERSON CITY',\n", " 'INFO TECH, INC',\n", " 'ROSENDIN ELECTRIC',\n", " 'MCDERMOTT',\n", " 'GCCMHC',\n", " 'LOCKHART WORK PROGRAM FACILITY',\n", " 'FOOD LION, LLC',\n", " 'NTESS, LLC',\n", " 'ETRN - WAYNESBURG',\n", " 'UPPER IOWA UNIVERSITY',\n", " \"HOM SOTHEBY'S\",\n", " 'BAC LOCAL 01 MN',\n", " 'MURRAY IND',\n", " 'ARVEST BANK',\n", " 'GRIFFIN ELECTRIC.INC.',\n", " 'LAND TITLE',\n", " 'SAN PASQUAL BAND OF MISSION INDIANS',\n", " 'BLOOMER BIOTECH',\n", " 'GEORGIA-PACIFIC WOOD PRODUCTS LLC',\n", " \"ST. ANN'S WAREHOUSE\",\n", " 'COLTON JOINT UNIFIED',\n", " 'WINGATE WEST SPRINGFIELD',\n", " 'INSIGHTSQUARED',\n", " 'WASATCH DISTRIBUTING CO',\n", " 'LOGISTICS HEALTH INC',\n", " 'HOMES ARE US INC',\n", " 'MANPOWER',\n", " 'LOUISIANA ORTHOPEDIC SPECIALISTS',\n", " 'BHATE CONSTRUCTION',\n", " 'CORNUCOPIA CRUISE LI',\n", " 'WAKE FOREST',\n", " 'SALT RIVER PROJECT',\n", " 'ADVANCE FIRE SYSTEMS INC',\n", " 'THE WINDWARD SCHOOL',\n", " 'LIBERTY BANK',\n", " 'FAITH BAPTIST CHURCH',\n", " 'MORRIS AUTOMOTIVE MACHINE',\n", " 'SACTO. PUB. LIBRARY JOINT POWERS AUTH.',\n", " 'TRAILWEST BANK',\n", " 'A-1 AFFORDABLE SIGN CO.',\n", " 'TUMAC LUMBER CO',\n", " 'PINECONE APARTMENTS',\n", " 'APR SOULTIONS',\n", " 'VBCPS',\n", " 'QUORA, INC.',\n", " 'KOCH BUSINESS SOLUTIONS, LP',\n", " 'DRIGGERS SCHULTZ & HERBST',\n", " 'SVB FINANCIAL GROUP',\n", " 'SERRA & GARRITY PC',\n", " 'BUSINESS PERFORMANCE INC.',\n", " 'RTI-HS',\n", " 'HIGHLAND EXCAVATION',\n", " 'AMOS WILKINSON, CRNA',\n", " 'COMMUNICATIONS DIRECTOR',\n", " 'THE LIGHT SOURCE INC',\n", " 'MULLALLY DEVELOPMENT',\n", " 'SILICON LABS',\n", " 'BERNDT CPA LLC',\n", " 'CAREY INTERNATIONAL',\n", " 'ANJALEONI ENTERPRISES INC',\n", " 'HAMPSHIRE',\n", " 'AMICA',\n", " 'LIVINGMIND PROJECT, INC.',\n", " 'NICOR',\n", " 'STAR BODY AND PAINT',\n", " 'TARANTINO AUTO BODY',\n", " 'FPSR',\n", " 'AUTOMATE ASSOCIATES',\n", " 'DEMOCRATIC NATIONAL CONVENTION COMMITT',\n", " 'HOME & OFFICE CABINETRY',\n", " 'NUCOR STEEL FLORIDA INC.',\n", " 'THE PROPERTY SHOP',\n", " 'HOPKINS SCHOOL',\n", " 'SCRUBS ETC',\n", " 'ROCHESTER COMMUNITY SCHOOL DIS',\n", " 'BHE RENEWABLES, LLC',\n", " 'COMSEWOGUE SD',\n", " 'ZOGENIX INC.',\n", " 'NATIONAL AQUARIUMN',\n", " 'KIESEL LAW LLP',\n", " 'UNITARIAN UNIVERSITY',\n", " 'POSEF',\n", " 'CHENHALL SERVICES',\n", " 'STILLWATER PUBLIC SCHOOLS',\n", " 'GARCIA MARBLE & TILE',\n", " 'HENDERSON ENGINEERING CO.',\n", " 'ALLIANZ OF AMERICA CORP',\n", " 'FERMAN BMW',\n", " 'BRISBANE SCHOOL DISTRICT',\n", " 'DAWSON HOLDINGS INC',\n", " 'U. S. DEPT OF VETERANS AFFAIRS',\n", " 'EARLES ARCHITECTS AND ASSOCIATES',\n", " 'BLUFF POINT ASSOCIATES',\n", " 'OVESCO',\n", " 'RYAN COYLE',\n", " 'AMERICAN ENTERPRISE INV. SRVCS',\n", " 'VISITING NURSE ASSOCIATION',\n", " 'SMG',\n", " 'ASHNU INTERNATIONAL INC',\n", " 'MOLDEX METRIC',\n", " 'ROSWELL PARK CANCER INSTITUTE INC',\n", " 'PECCAINC',\n", " 'COEUR ALASKA',\n", " 'MRA LABRATORIES',\n", " 'PETERBOROUGH PLAYERS',\n", " 'AMERESCO',\n", " 'SUNY DOWNSTATE',\n", " 'BCBS',\n", " 'S M STOLLER',\n", " 'REAL ESTATE DEV CO',\n", " 'BAPTIST HEALTH',\n", " 'JONATHAN D. SALK M.D.',\n", " 'ALPHAPORT',\n", " 'PRECISION AUTOMOTIVE PLASTICS',\n", " 'CITY OF RIALTO',\n", " 'UMIVERSITY PF DENVER',\n", " 'SAN JUAN COLLEGE',\n", " 'SPENCER STUART',\n", " 'CHICAGO AREA LECET',\n", " \"WOMEN'S RESOURCE CENTER\",\n", " 'BAKER PERKINS',\n", " 'BOE REAL ESTATE',\n", " 'L.A.BELL MOTOR LINES INC.',\n", " 'CAPGEMINI AMERICA',\n", " 'ORION ENGINEERING CONSTRUCTION',\n", " 'GOSHEN FAMILY PHYSICIANS',\n", " 'ORANGE VILLAGE',\n", " 'SO TEX EXTERM',\n", " 'AIR PRODUCTS',\n", " 'MEDICAL GROUP',\n", " 'BOSTON CAPITAL',\n", " 'FOX NEWS NETWORK LLC',\n", " 'LSPM',\n", " 'SUPERMICRO COMPUTER INC',\n", " 'REDD REALTY',\n", " 'CUMMINS INC.',\n", " 'CAREY PERKINS',\n", " 'RHAMILTON CONSULTING',\n", " 'UCS',\n", " 'SAINT MARYS COUNTY PUBLIC SCHOOLS',\n", " 'NYSOMS',\n", " 'CODESTREAM INC.',\n", " 'CONNER MKTG SALES',\n", " 'BURGERBUSTERS INC',\n", " 'NEUROCRINE',\n", " 'FIRST AMERICAN',\n", " 'DURDEN CONSTRUCTION',\n", " 'TRUCKERS INSURANCE ASSOCIATES, INC.',\n", " 'YOUNG SOMMER',\n", " 'BERNARDS APPRAISAL ASSCOCIATES',\n", " 'C.L. BARNHOUSE CO.',\n", " 'FIVES MACHINING SYSTEMS',\n", " 'RDO',\n", " 'NYEMASTER GOODE PC',\n", " 'UNVERSITY OF COLORADO BOULDER',\n", " 'JIM DOYLE & ASSOCIATES',\n", " 'POLING & CUTLER',\n", " 'URIST FINANCIAL AND RETIREMENT PLANNIN',\n", " 'COUNCIL FOR RESPONSIBLE NUTRIT',\n", " 'USD 289',\n", " 'ICANN',\n", " 'VAPOTHERM',\n", " 'SMITHFIELD FOODS',\n", " 'CROCKETT PROPERTIES',\n", " 'CELEBRATION CHURCH',\n", " 'COASTAL RESOURCES',\n", " 'PALM BEACH COUNTY FIRE RESCUE',\n", " 'TEK SYSTEMS',\n", " 'WABASH VALLEY POWER ASSN., INC.',\n", " 'KAIFER INS',\n", " 'CENTRA',\n", " 'PBS MENTAL HEALTH ASSOCIATES',\n", " 'FYZICAL',\n", " 'META HOUSING CORPORATION',\n", " 'FLATIRON WORKS',\n", " 'CENTER FOR ECONOMIC DEVELOPMENT LAW',\n", " 'OMAHA PUBLIC SCHOOL',\n", " 'CONSTELLATION',\n", " 'WESTERRA CREDIT UNION',\n", " 'BREYMAN PROPERTIES',\n", " 'XXX',\n", " 'HMHP',\n", " 'MARY KAY INC',\n", " 'THE STANDARD',\n", " 'U OF UTAH HEALTH HOSPITALS AND CLINICS',\n", " 'TAKEDA PHARMACEUTICALS U.S.A. INC.',\n", " 'MCDERMOTT WILL & EMERY',\n", " 'AYA HEALTHCARE',\n", " 'GRAMBLING STATE UNIVERSITY',\n", " 'DUKE CUSTOM FABRICATION',\n", " 'TETRATECH',\n", " 'DAI',\n", " 'AVIANDS',\n", " 'FIDES LLC',\n", " 'EDUCATION FIRST FCU',\n", " 'CEM',\n", " 'BHG RAND REALTY',\n", " 'COMPOSITE & CASTING SUPPLY INC',\n", " 'DESIGN VITTORPIA LLC',\n", " 'MAC ARTHUR FOUNDATIO',\n", " 'LA CANADA WEST',\n", " 'BARJAC INC',\n", " 'MORRIS DEV',\n", " 'BROOKS, WILBURN, & LOGAN CO',\n", " 'SALVATION ARMY AND',\n", " 'BRAUN & BRAUN',\n", " 'BUCHER CHRISTIAN',\n", " 'VERITIV CORP',\n", " 'NANSEMOND PRE-CAST',\n", " 'JORDAN SCHOOL DISTRICT',\n", " 'CENTERSTAGE PRODUCTIONS',\n", " 'BTCO, INC.',\n", " 'SALEM CLINIC',\n", " 'RBC WEALTH MANAGEMENT',\n", " 'EMMANUEL MEDICAL',\n", " 'COMMUNITY GROUP INC',\n", " 'FINANCIAL BROKERAGE',\n", " 'SWISHER INTERNATIONAL, INC.',\n", " 'OPSALESINC',\n", " 'EXELTECH CONSULTING INC',\n", " 'OHIO CONFERENCE OF COMMUNITY DEVELOPME',\n", " 'THE CHAPIN SCHOOL',\n", " 'PHILLIP SAN SEBASTIAN',\n", " 'STATE OF VERMONT',\n", " 'RICK HAMM CONSTRUCTION',\n", " 'TIMMONS SHEET METAL',\n", " 'TVHO',\n", " 'UNITED TEACHERS LOS ANGELES',\n", " 'ST JOSEPH',\n", " 'WORCESTER PUBLIC SCHOOLS',\n", " 'LORDS VALLEY SELF STORAGE',\n", " 'FPN',\n", " 'MOUNT SINAI HOSPITAL MANHATTAN',\n", " 'KIDS DEVELOPMENTAL THERAPY',\n", " 'VETERANS AFFAIRS',\n", " \"MY FRIEND'S PLACE\",\n", " 'PAINT WIZARDS INC.',\n", " 'EDG CONSULTING ENGINEERS',\n", " 'FINISH KARE PRODUCTS',\n", " 'E-DEVELOPMENT INTERNATIONAL',\n", " 'JAMES F STEARNS CO',\n", " 'NUMERIX',\n", " 'PARK NICOLLET CLINIC',\n", " 'TUSCOLA ISD',\n", " 'INDEPENDENT REPAIR',\n", " 'KUMIN INSURANCE GROUP',\n", " 'COGHLAN CROWSON LLP',\n", " 'PASSAGE TO INDIA',\n", " 'PAWNEE HEALTH AND WELLNESS',\n", " 'M L BERGER & CO.',\n", " 'HP PRODUCTIONS',\n", " 'STRIBLING',\n", " 'ROBSON COMMUNITES',\n", " 'BANKERS FINANCIAL CORP',\n", " 'PEGASYSTEMS',\n", " 'AZ STAGE SOUND LIGHTS',\n", " 'LAW OFFICE OF DALE WAGNER',\n", " 'BRAYTON PURCELL LLP',\n", " 'NATIVEENERGY',\n", " 'FULTON COUNTY',\n", " 'ENCORE',\n", " 'ROOFEX',\n", " 'GCEI',\n", " 'NEW YORK CITY POLICEPENSION FUND',\n", " 'AT&T CORP.',\n", " 'KIPP DC',\n", " 'PARKER REALTY & ASSOCIATES',\n", " 'AMA CONSULTING ENGINEERS',\n", " 'SCORP GROUP INC.',\n", " 'VILLAGE SUPERMARKETS DBA SHOPRITE',\n", " 'GREG COLEMAN LAW PC',\n", " 'SALESFORCE',\n", " 'RAPID CPAP LLC',\n", " 'ARTIST',\n", " \"READ N' POST\",\n", " 'MONIMEL CORP',\n", " 'ORANGE COUNTY COMMUNITY COLLEGE',\n", " 'C MYERS CORP',\n", " 'LIGHTNING ORCHARD',\n", " 'CUNNINGHAM JEWELERS',\n", " 'FRANKLIN MUTUAL INSURANCE COMPANY',\n", " 'PCSD',\n", " 'DOCTOR',\n", " 'CDFW',\n", " \"ST. DUNSTAN'S ANGLICAN CHURCH\",\n", " 'ACME SUPERMARKET',\n", " 'MENARDS',\n", " 'CLAREMONT',\n", " 'LAWSON, DAVIS, PICKREN & SEYDEL',\n", " 'CHRISTIAN LEADERS INSTITUTE',\n", " \"SJOERD'S PRO TOOLS\",\n", " 'WHITE HILL CHURCH OF BRETHREN',\n", " 'BURNS MCDONNELL ENGINEERING COMPANY I',\n", " 'MATANKY',\n", " 'WOMBLE BOND DICKINSON (US) LLP',\n", " 'LUIMAN REAL ESTATE INC',\n", " 'HERZOG TECHNOLOGIES, INC.',\n", " 'PHILIPS HEALTH SYSTEMS',\n", " 'BENDER ENGINEERING',\n", " 'MEV',\n", " 'FOX VALLEY IMAGING',\n", " 'METROPOLITAN BAPTIST CHURCH',\n", " 'ROSEMOUNT CENTER',\n", " 'GREATER LAWRENCE TECH SCHOOL',\n", " 'RE/MAX REALTY ASSOCIATES-CHA',\n", " 'MORRISON FOERSTER',\n", " 'THE CARLYLE GROUP INC.',\n", " 'SENATOR LEW FREDERICK',\n", " 'HUNGRY PLANET INTELLIGENCE',\n", " 'MULLIGAN SECURITY COMPANY',\n", " 'SNC-LAVALIN',\n", " 'BSC',\n", " 'PRA',\n", " 'CLEAN WATER OF VA',\n", " 'ASA STAFFING',\n", " 'M/E ENGINEERING',\n", " 'SERVICE EMPLOYEES INTERNATIONAL UNION',\n", " 'PRECISIONEFFECT',\n", " 'SEAWORLD CALIFORNIA',\n", " 'AFSCME CA LOC 3299',\n", " 'WILDWOOD',\n", " 'GE PLASTICS',\n", " 'US TRANSPORTATION',\n", " 'MONTEFIORE MEDICAL CENTER',\n", " 'PCG',\n", " 'CTS',\n", " 'CEDAR FALLS COMM SCHOOLS',\n", " 'MERCANTILE BANK',\n", " 'THE POKEMON COMPANY INTERNATIONAL',\n", " 'FIFTH STREET RENAISSANCE',\n", " 'METROPOLITAN NASHVILLE BD OF ED',\n", " 'SPRINGETTSBURY TOWNSHIP',\n", " 'GETTYSBURG COLLEGE',\n", " 'SSES',\n", " 'CONTINENTAL AUTOMOTIVE',\n", " 'AMERICAN INSTITUTES FOR REASEARCH',\n", " 'DEER VALLEY RESORT',\n", " 'CARGILLE-SACHER LABS, INC.',\n", " 'JP MORGAN',\n", " 'CARDIOVASCULAR',\n", " 'PERFORMANCE SYSTEMS',\n", " 'KLD',\n", " 'FLORIDA',\n", " 'THE ARLINGTON SLEEP DISORDER CENTER',\n", " 'DE WINNE CONSTRUCTION',\n", " 'CBRE, INC',\n", " 'FISHER PHILLIPS',\n", " 'IC MANAGE',\n", " 'DELANEY CORPORATE SERVICES',\n", " 'HOMESTEAD INC',\n", " 'KERING',\n", " 'ONEOK FIELD SERVICES COMPANY',\n", " 'COWLES PARKWAY FORD, INC.',\n", " 'GIM CAPITAL MANAGEMENT',\n", " 'STANFORD MEDICAL GROUP',\n", " 'KILLIAN &DONOHUE',\n", " 'JENSEN TRAVELON',\n", " 'WMLM',\n", " 'MATTESON MARINE SEV',\n", " 'CRAFT COFFEE',\n", " 'INSTANT CARE',\n", " 'NOT IN WORKFORCE',\n", " 'HIGH-MARK SYSTEMS',\n", " 'TRINSEO LLC',\n", " 'HOYT ARCHITECTS',\n", " 'TIVERITY CONSULTING',\n", " 'LED SUPPLY',\n", " 'MELINDA MOTLAGH',\n", " 'CALIFORNIA STATE UNIVERSITY LA',\n", " 'UNC CHAPEL HILL',\n", " 'CMC CONSTRUCTION',\n", " 'G M NORTHRUP CORP',\n", " 'GROW MARKETING',\n", " 'SWISSRAY CUSTOMER CARE LLC',\n", " 'GREECE CENTRAL SCHOOL DISTRICT',\n", " 'BEVERLY-HANKS & ASSOCIATES',\n", " 'ASG REAL ESTATE CO.',\n", " 'BACK TO THE PAST',\n", " 'CHARLOTTE MECKLENBURG SCHOOLS',\n", " \"CONNOLLY'S TOWING INC\",\n", " 'UNIVERSITY OF PITTSBURGH SCHOOL OF MED',\n", " 'DOCTORS MAKING HOUSECALLS',\n", " 'MINITAB',\n", " 'HDR ARCHITECTURE INC.',\n", " 'NAR',\n", " 'THE MONEY STORE',\n", " 'LAMAR STATE COLLEGE - PORT ARTHUR',\n", " 'GGUSD',\n", " 'SHERATON UNIVERSAL HOTEL',\n", " 'STACY AND BAKER LAW',\n", " 'GJAC',\n", " 'LOBIS TECHNOLOGY CONSULTANTS LLC',\n", " 'ACCRUENT',\n", " 'CANCIO NADAL & RIVERA LLC',\n", " 'OLD VINE MANAGEMENT GROUP',\n", " 'NATIONAL PATIENT ADVOCATE FOUNDATION',\n", " 'GARNET VALLEY SCHOOL DISTRICT',\n", " 'GUARANTEE INS AGCY',\n", " 'TRINITY CONSULTANTS',\n", " 'COOK COUNTY OF IL',\n", " 'AONL',\n", " 'NOSSAMAN LLP',\n", " 'BREAD FOR THE WORLD',\n", " 'FNC',\n", " 'NORTH SHORE SENIOR CENTER',\n", " \"HAY'S\",\n", " 'SELF ORIGINAL ARTISTS NYC',\n", " 'POWER SUPPLY',\n", " 'WIDGEON MGT CORP',\n", " 'RADIAN GUARANTY INC.',\n", " 'JENISON PUBLIC SCHOOLS',\n", " 'A PITTSBURGH PLUMBER LLC',\n", " 'PENASQUITOS PET CLINIC',\n", " 'NEA FED. GOVT. AGENCY',\n", " 'MA LEAGUE OF CHCS',\n", " 'STATE FARM INS.',\n", " 'KANYEZI AFRICA SAFARI',\n", " 'UFCW LOCAL NO. 328',\n", " 'ABLE ELECTRICAL SVC.',\n", " 'KAREN G BINDER',\n", " 'VALLEY EMERGENCY CARE',\n", " 'SUMMIT REHAB UPMC',\n", " 'THE FLORIDA AQUARIUM',\n", " 'BRUCE LEE',\n", " 'SOUND COMMUNITY SOLUTIONS',\n", " 'FOOD SCIENCES CORP.',\n", " 'JOHN MORRELL & COMPANY',\n", " 'UN ENVIRONMENT PROGRAMME',\n", " 'JJ MARQUIS ELECTRIC',\n", " 'COMMUNITIES ACTIVELY LIVING INDEPENDEN',\n", " 'USONIAN REALTY',\n", " 'ZUMIEZ',\n", " 'ROYAL FLEX CIRCUITS',\n", " 'COMMERCEHUB',\n", " 'GENESIS MEDICAL CENTR',\n", " \"CHILDREN'S HOSPITAL BOSTON\",\n", " 'INDATA CORPORATION',\n", " 'EPIC LLC',\n", " 'AUDERE PARTNERS',\n", " 'CLARK CONSTRUCTION',\n", " 'RJH SCIENTIFIC INC',\n", " 'TBWBHL',\n", " 'MUNGER TOLLES & OLSON',\n", " 'HERE',\n", " 'SAP NATIONAL SECURITY SER',\n", " 'FORTINET',\n", " 'CATHERINE WILCOX DDS',\n", " 'HEMCON MEDICAL TECHNOLOGIES INC',\n", " 'RAYA RADIOLOGY',\n", " 'BROWNSTEIN HYATT FARBER SCHRECK',\n", " 'BLRG',\n", " 'BASD',\n", " 'PARIS BRIDGE ACADEMY',\n", " 'HOME FURNITURE',\n", " 'JDS&A ADVISORS',\n", " 'NVI',\n", " 'DISNEY ANIMATION STUDIOS',\n", " 'TELLIGENT MASONRY LLC',\n", " 'REI',\n", " 'HOLLYWOOD CASINO',\n", " 'SAPPHIRE COMPUTERS INC.',\n", " 'SEABULK TANKERS, INC.',\n", " 'TAURIAINEN ENGINEERING',\n", " 'SIMPLYEZ HDM LLC',\n", " 'LAFAYETTE GENERAL HEALTH',\n", " 'WELLTOWER, INC.',\n", " 'KIRKLAND AND ELLIS',\n", " \"CABELA'S INC.\",\n", " 'VJSTURDIVANTINC',\n", " 'GARDEN CITY SCHOOLS DIST',\n", " 'SEARIVER MARITIME INC',\n", " 'MADISON FIRE DEPT.',\n", " 'POWERS MUSIC SCHOOL',\n", " 'LA MESA SPRING VALLEY SCHOOLS',\n", " 'BHHS REAL ESTATE',\n", " 'NEW ENGLAND GRANITE MARBLE',\n", " 'CAPROCK DAIRY',\n", " 'RACHIO',\n", " 'MCPHEE PLUMBING',\n", " 'TERRE HAUTE HEART CENTER',\n", " 'MORENO',\n", " 'SENTINELONE',\n", " 'BERRY PLASTICS',\n", " 'COSTAL CONNECTION',\n", " 'GLOBAL VILLAGE ACADEMY',\n", " 'MARK WINKLER',\n", " 'PENN STATE UNIVERSITY',\n", " 'COLUMBIA MUTUAL INSURANCE COMPANY',\n", " 'BOB BARKER',\n", " 'HATCHERY PLANNING',\n", " 'UMECO',\n", " 'COMMERCIAL DEVELOPER',\n", " 'TRUCK-TECH',\n", " 'NEDERLANDER ORGANIZATION',\n", " 'MURRAY & MURRAY',\n", " 'WJW ARCHITECTS',\n", " 'HOLMES MURPHY',\n", " 'PEOPLE READY',\n", " 'COLUMBUS STATE UNIVERSITY',\n", " 'CARAHSOFT',\n", " \"FEDERAL GOV'T\",\n", " 'REIW CONSULTING LLC',\n", " 'I&CO',\n", " 'CHURCH OF. HRIST',\n", " 'OCCUCARE INTERNATIONAL',\n", " 'BP AMERICA',\n", " 'TEMPEST CAPITAL LTD',\n", " 'WEST LAFAYETTE COM SCHOOL CORP',\n", " 'ALEXANDRIA REAL ESTATE',\n", " 'JHU/APPLIED PHYSICS LAB.',\n", " 'DESERT HOUSE OF PRAYER',\n", " 'UPDEGRAFF CLINIC',\n", " 'SHANTI POOLS LLC',\n", " 'MPL',\n", " 'GLENN MITCHELL INSURANCE',\n", " 'SWITCHBACK TRAVEL LLC',\n", " 'PUBLIC EDUCATION',\n", " 'SELFEMPLOMENT',\n", " 'BCD MEETINGS & EVENTS',\n", " 'COLUMBIA PRESBYTERIAN HOSPITAL',\n", " 'SANOFI PASTEUR',\n", " 'KOPPEL AND SCHER',\n", " 'APEX-STUDIO SUAREZ',\n", " 'DEPT OF THE AIR FORCE',\n", " 'DURANGO',\n", " 'IRAD SERVICES LLC',\n", " 'WINGATE AT WEST SPRINGFIELD',\n", " 'LWV-DENVER',\n", " 'DOSS REALTY GROUP',\n", " 'CAPSTAR ADVISORS',\n", " 'SCHOOL CITY OF HAMMOND',\n", " 'NORBORD',\n", " 'FAMILY HERITAGE',\n", " 'TRACTOR SUPPLY OMPANY',\n", " 'JAMESTOWN ASSOCIATES',\n", " 'PIEDMONT TRIAD ANESTHESIA, PA',\n", " 'LIONS SHARE FCU',\n", " 'LOCKARD, INC.',\n", " 'GREENFIELD POWER EQUIPMENT, INC.',\n", " 'LOCHEED MARTIN',\n", " 'NUCOR STEEL AUBURN, INC.',\n", " 'SLMC',\n", " 'HANES INC.',\n", " 'OHHP',\n", " 'LANCASTER GENERAL HE',\n", " 'TELEPHONICS SYSTEMS ENGINEERING GROUP',\n", " 'INTEGRA',\n", " 'RESMED',\n", " 'DISCOVERY INSTITUTE',\n", " 'STOCKHOLM UNIVERSITY',\n", " 'CENTURY 21 MEYER',\n", " 'JACKSONLEWIS(PARTNER)',\n", " 'US DOT',\n", " 'WOODS PRECISION PRODUCTS',\n", " 'ENGINEWORLD LLC',\n", " 'THE KIRLIN COMPANY',\n", " 'W.A. HYNES & CO.',\n", " 'MORRISON & FOERSTER, LLP',\n", " 'SYMMETROCM',\n", " 'AUBURN HOUSING AUTHORITY',\n", " 'CALPINE',\n", " 'TOTAL E&P USA',\n", " 'ECONOMIC POLICY INSTITUTE',\n", " 'NEVADA STATE MUSEUM',\n", " 'HUNTER COLLGE',\n", " 'CITY OF HOUSTON',\n", " 'COLORADO CARE ASSISTANCE',\n", " 'BEAUREGARD ELECTRIC CO-OP, INC.',\n", " 'DF LEVIN ASSOCIATES',\n", " 'SOCIAL CAPITAL GROUP LLC',\n", " 'MRS.',\n", " 'WIT CREEK PARTNERS',\n", " 'SHONDALAND',\n", " 'NETSAGE',\n", " 'BGR, INC.',\n", " 'VERIZON CORP',\n", " 'FRIENDS SCHOOL OF BALTIMORE',\n", " 'TAYLOR CORPORATION',\n", " 'KAMIN IND',\n", " 'PROVIDENCE ANESTHESIOLOGY ASSOCIATES,',\n", " 'TTUHSC',\n", " 'VERRILL DANA, LLP',\n", " 'EL CAMINO COLLEGE',\n", " 'METROMILE',\n", " 'ROPER AND ROPER',\n", " 'IGLER/PEARLMAN PA',\n", " 'PROQUEST',\n", " 'MIRAGE FINE FOODS, INC.',\n", " 'AMSTED INTERNATIONAL',\n", " 'SOUTHWEST FAMILY GUIDANCE CENTER',\n", " 'CITIZENS MEDICAL CENTER',\n", " 'FRESNO STATE',\n", " \"ST. MARY'S UNIVERSITY\",\n", " 'BLUE HERON WELLNESS',\n", " 'RINGCENTRAL',\n", " 'RUST COLLEGE',\n", " 'NEXTEER',\n", " 'VOL STATE CC',\n", " 'PEOPLES GROUP SELF-EMPLOYED',\n", " 'RIA',\n", " 'VIMAR',\n", " 'GREATSCAPES',\n", " 'DAILY JOURNAL',\n", " 'GOULD KILLIAN CPA GROUP',\n", " 'FREDRICK MANAGEMENT',\n", " 'STRONGHOLD',\n", " 'GENISIS HEALTHCARE',\n", " 'DEMOCRATIC INTELLIGENCE',\n", " 'STADIUM TOYOTA',\n", " 'LB CONSOLIDATED',\n", " 'THE STATE BANK OF FARIBAULT',\n", " 'U.S. AGENCY FOR INTERNATIONAL DEVELOPM',\n", " 'COOK COUNTY',\n", " 'SPARKS WILLSON PC',\n", " 'GDK CONSTRUCTION',\n", " 'US GOVT ACCOUNTABILITY OFFICE',\n", " 'CENTRA INC.',\n", " 'LAWWA',\n", " 'VERITE',\n", " 'MOLZEN CORBIN',\n", " '831 DON CUBERO AVE',\n", " 'IOWA TALENTED AND GIFTED ASSOCIATION',\n", " 'THE ROADRUNNER PRESS',\n", " 'ACME GLASS AND MIRROR',\n", " 'HABITAT AMERICA',\n", " 'POWERS LAW',\n", " 'EXPRESSO',\n", " 'CSU SAN MARCOS',\n", " 'BWXT',\n", " 'PREMIER RADIOLOGY',\n", " 'WA STATE NURSES ASSOCIATION',\n", " 'TOURISM ASSN.',\n", " 'EKLHEALTH LLC',\n", " 'RODAN+FIELDS',\n", " 'UFCW LOCAL NO. 876',\n", " 'FRIENDSHIP ACRES PARK INC',\n", " 'MOORE AND VAN ALLEN PLLC',\n", " 'SAGE V FOODS',\n", " 'DR. SUE CAREY PLLC',\n", " 'KELLY AUTOMOTIVE GROUP',\n", " 'EDX',\n", " 'AMHS',\n", " 'ESI TOTAL FUEL MANAGEMENT',\n", " 'PRIZE LOGIC',\n", " 'WINSTEAD PC',\n", " 'MEDSTAR GEORGETOWN UNIVERSITY HOSPITAL',\n", " 'COMCAST CORPORATION',\n", " 'MOSES & SINGER LLP',\n", " 'SANDHILLS COMMUNITY COLLEGE',\n", " 'MILLIKEN',\n", " 'VA DCR',\n", " 'GWATNEY CHEVROLET',\n", " 'ORTHOPEDIC SPINE THERAPY',\n", " 'BERING STRAITS NATIVE CORPORATION',\n", " 'SIKORSKY',\n", " 'GREVE FOUNDATION',\n", " 'SOLIC',\n", " 'LUKE',\n", " 'CH ROBINSON',\n", " 'UCDAVIS CANCER CENTER',\n", " 'JAMS INC.',\n", " 'MCCOOL FARM AND CATTLE',\n", " 'VASSAR ELECTRIC INC',\n", " 'NWP',\n", " 'COTRONICS CORPORATION',\n", " 'MOVEMENT FOR LIFE',\n", " 'GILBERT CONSTRUCTION',\n", " 'MOUNT VERNON CITY SD',\n", " 'CAS',\n", " 'NSWCLA',\n", " 'CATHOLIC DIOCESE OF ROCKFORD',\n", " 'NAP ENGINEERS',\n", " 'DIRECT MARKETING CONCEPTS, INC.',\n", " 'FMCSA',\n", " 'SCIENTIAE LLC',\n", " 'MODA HEALTH',\n", " 'FLORIDA HIGH SCHOOL ATHLETIC ASSOC.',\n", " 'SUITECX',\n", " 'EVANS LAW FIRM, INC.',\n", " 'COMMUNITY LEGAL AID SERVICES',\n", " 'TONI SHERMAN INTERIORS LLC',\n", " 'AGS CONSTRUCTION',\n", " 'CCRMC',\n", " 'MOLINA HEALTHCARE OF FL',\n", " 'REPEAT CONSULTANTS',\n", " 'EWL INC.',\n", " 'WILMERHALE',\n", " 'TOWNSEND REAL ESTATE',\n", " 'CENTINEL FINANCIAL GROUP',\n", " 'AZARA LLC',\n", " 'GEORGETOWN UNIVERSITY LAW CENTER',\n", " 'CROSSROADS ANESTHESIAP',\n", " 'AMPLITY HEALTH',\n", " 'IMAGE ONE CORP',\n", " 'TRADELINK LLC',\n", " 'GIBBON PUBLIC SCHOOLS',\n", " 'MERCY FAMILY CENTER',\n", " 'SILVERSAND SERVICES',\n", " 'CITY OF CHESAPEAKE',\n", " 'HOWMET AEROSPACE INC.',\n", " 'SOUTHERN TRUCK AND EQUIPMENT',\n", " 'UNIVERSITY OF TEXAS MEDICAL BRANCH AT',\n", " 'KORDICH CONSTRUCTION',\n", " 'ALTSHULER BERZON LLP',\n", " 'SUNTRUST ROBINSON HUMPHREY INC.',\n", " 'AHRENS COMPANIES',\n", " 'HAL SYSTEMS CORP',\n", " 'PACIFIC RIM CAPITAL, INC.',\n", " 'APF',\n", " 'PREMIER ASSET MGMT., INC.',\n", " 'TEAMSTERS LOCAL UNION 191',\n", " 'ADLER GIERSCH',\n", " 'SGF',\n", " 'MICHIGAN STATE UNIVERSITY',\n", " 'ALLIED BARTON',\n", " 'RAINFOCUS',\n", " 'D.U.E BRANDS',\n", " 'WHEATON COLLEGE NORTON MA',\n", " 'GBP CONTRACTING',\n", " 'MOORE PUBLIC SCHOOLS',\n", " 'AIR TRANSPORT ASSOCIATION, INC',\n", " 'SSCI',\n", " 'THE RUSSELL GROU UNITED LLC',\n", " 'SONOMA COUNTY REGIONAL PARKS FOUNDATIO',\n", " 'FTLF',\n", " 'DOOR 2 DOOR INCOME INC',\n", " 'PROFESSIONAL LOSS ADJUSTERS INC.',\n", " 'AMERICAN IRON & ALLOYS',\n", " 'LASHLY & BAER P.C.',\n", " 'UNIVERSITY OF CALIFORNIA, LA',\n", " 'PEARL PROPERTIES',\n", " 'MID MICH INS',\n", " 'BURROW JAN',\n", " 'PATRICIA FLORES',\n", " 'WARNER BROTHERS TELEVISION',\n", " 'MASSACHUSETTS MUTUAL LIFE INSURANCE CO',\n", " 'AMERICAN FEDERATION OF TEACHERS',\n", " \"ST.PETER'S EPISCOPAL CHURCH\",\n", " 'RANDOLPH-BROOKS FCU',\n", " 'UNIVERSITY OF MASS',\n", " 'EQT CORP.',\n", " 'HAMILTON CITY SD',\n", " 'NAPER ENTERPRISES',\n", " 'NEW MEXICO ORTHOPAEDICS',\n", " 'BAYVIEW LOAN SERVICING',\n", " 'PICASSO TILE',\n", " 'TERRY ROBERTS CONSULTING INC',\n", " 'CONFLUENCE DISTRIBUTION INC.',\n", " 'HBSPECIALTY FOODS',\n", " 'AVONWORTH',\n", " 'ASSOCIATED UNIVERSIT',\n", " 'FAUSTOLLEAN',\n", " 'AVMED',\n", " 'EJME',\n", " 'SUPERIOR AIR GROUND AMBLANCE',\n", " 'UBER(RIDESHARE OPERATOR)',\n", " 'MAGIC TOUCH PAINTING',\n", " 'CITY OF PHOENIX',\n", " 'GRANDVIEW RADIOLOGY',\n", " 'LUNDEBERG SCHOOL OF SEAMANSHIP',\n", " 'SCHEEN&SMITH PSC',\n", " 'LIBERAIL KENWORK',\n", " 'DUKE ENERGY OHIO, INC.',\n", " 'CITY OF HUNTINGTON WOODS',\n", " 'SPIRIT PHARMACEUTICALS LLC',\n", " 'WILEY,WILSON, INC.',\n", " 'MOUNT SINAI WEST',\n", " 'THE METHODIST HOSPITAL',\n", " 'PRIMERA ENGINEERS',\n", " 'TOUR-SARKISSIAN LAW OFFICES LLP',\n", " 'SIBCY CLINE',\n", " 'C.G. REIN DEVELOPMENT CO.',\n", " 'LAKE TRUCKING CO.',\n", " 'POPE, HARDWICKE',\n", " 'AEROSPACE CORPORATION',\n", " 'INNOVATIVE THERAPY CONCEPTS INC.',\n", " 'ASSOC RADIOLOSISTS',\n", " 'RADIANT REFINING',\n", " 'CAMPO SANTO PRODUCTIONS LLC',\n", " 'KANSAS CITY BALLET',\n", " 'NATIONALITIES SERVICE CENTER',\n", " 'AIRSWIFT',\n", " 'NEW HARVEST MINISTRIES INC.',\n", " 'EASTCHESTER FIRE DISTRICT',\n", " 'THERMOSEAL',\n", " 'ADVANTEDGE',\n", " 'NC DEPT. OF PUBLIC SAFETY',\n", " 'ACCUSTAR',\n", " 'EXECUTIVE ENERGY MANAGEMENT, LLC',\n", " 'GPG',\n", " 'IMEX MEDIA',\n", " 'NTP',\n", " 'SP MANAGEMENT',\n", " 'BROWN CAPITAL MANAGEMENT',\n", " 'CIGNA DENTAL HEALTH, INC.',\n", " 'CSI COMPANIES',\n", " 'OHIO EQUITIES INC.',\n", " 'THE RUBY BRINK',\n", " 'MVWSD',\n", " 'HEALTH CARE SERVICE CORP',\n", " 'GREAT PLAINS TECHNOLOGY CENTER',\n", " 'NEW TEACHER CENTER',\n", " 'ANYTIME PLUMBING INC',\n", " 'CALVO ENTERPRISES',\n", " 'ARCHDIOCESE OF NEWARK',\n", " 'UNIVERSITY OF DELAWARE THEATRE DEPARTM',\n", " 'GREEN HASSON JANKS',\n", " 'OAKLEIGH LTD.',\n", " 'UNIVERSITY OF BRISTOL',\n", " 'POLSINELLI',\n", " 'CHRISTIAN WORSHIP CENTER',\n", " 'BILL BRAVO AUTOMOTIVE PORTRAITS',\n", " 'JOHN DEERE FINANCIAL',\n", " 'CONDOMINIUM MGMT SVCS',\n", " 'SALTCHUK',\n", " 'JUST FOR SHOW INC.',\n", " 'OXFORD UNIVERSITY PRESS',\n", " 'CHARLES J GARRISON',\n", " 'LAWRENCE MEMORIALS HOSPITAL',\n", " 'JACKSON HEALTHCARE',\n", " 'SIERRA PACIFIC',\n", " 'NEW SOUTH RIVER BAPTIST ASSO',\n", " 'UNIVERSITY OF MAINE',\n", " 'ALPHA ELECTRIC CO',\n", " 'KEYIMPACT',\n", " 'IL. DEPT OF HUMAN SERVICES',\n", " 'PANJIVA',\n", " 'FACIAL PLASTIC SURGERY ASSOCIATES',\n", " 'GREEN MOUNTAIN TREATMENT CENTER',\n", " 'CINTERRA GROUP',\n", " 'NIWCC',\n", " 'SOLTAGE LLC',\n", " 'PEPSI COLA',\n", " 'RLA NATIONAL REHABILITATION CENTER',\n", " 'CARE HAWAII',\n", " 'IVAN & DAUGUSTINIS',\n", " 'ALLIANCE RADIOLOGY',\n", " 'UNIV. OF CALIFORNIA',\n", " 'PARKVIEW COMMUNITY HOSPITAL',\n", " 'SPORTS LEICHT RESTORATIONS INC',\n", " 'NONE RETIRED',\n", " 'RENVYLE PARTNERS',\n", " 'PORT APARTMENTS',\n", " 'ECD',\n", " 'MO. DMH DD',\n", " 'DECAHEALTH',\n", " 'NESS INC',\n", " 'NJ DEPARTMENT OF HEALTH',\n", " 'AV INC.',\n", " 'ALLIED UNIVERSAL SECURITY SERVICES',\n", " 'ONI RISK PARTNERS',\n", " 'GROVEPORT MADISON',\n", " 'CAMBREX CHARLES CITY INC.',\n", " 'GILROY UNIFIED SCHOOL DISTRICT',\n", " 'MJUSD',\n", " 'ILCJA&TP',\n", " '4J ENERGY LLC',\n", " 'HIGHLINE MEDICAL CENTERE',\n", " 'SHIELD RESTRATINTS',\n", " '8 MILE FARM',\n", " 'R DIXON SPEAS ASSOCIATES, INC.',\n", " 'RED HOT AHIR',\n", " 'UNITY',\n", " 'EAST BATON ROUGE PARISH SCHOOL',\n", " 'GP',\n", " 'CHOATE HALL & STEWART',\n", " 'THE PATRIOT FINANCIAL GROUP, LLC',\n", " 'ANALYSIS GROUP',\n", " 'MORRIS JAMES LLP',\n", " 'MORRIS TEAM REALTY, LLC',\n", " 'MAYER BROWN LLP',\n", " \"LABORERS' LOCAL 225\",\n", " 'FREEDOM MOBILITY',\n", " 'DOCTORS FOR EMERGENCY SERVICES',\n", " 'CUSTOM VAULT CORP',\n", " 'XDSI',\n", " 'YCSD',\n", " 'REHAB WITHOUT WALLS',\n", " ...}" ] }, "metadata": { "tags": [] }, "execution_count": 24 } ] }, { "cell_type": "code", "metadata": { "id": "UTIiuGxW938o", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "7ef9a292-4ce5-4a57-c2c1-73d96de700e9" }, "source": [ "len(set(df_newdup['EMPLOYER']))" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "65420" ] }, "metadata": { "tags": [] }, "execution_count": 13 } ] }, { "cell_type": "code", "metadata": { "id": "BTZ4nqIiaA55", "colab": { "base_uri": "https://localhost:8080/", "height": 102 }, "outputId": "7bf205a2-b579-4044-b982-c41ec5790f23" }, "source": [ "df_newdup.dropna(subset = [\"EMPLOYER\"], inplace=True)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " \"\"\"Entry point for launching an IPython kernel.\n" ], "name": "stderr" } ] }, { "cell_type": "code", "metadata": { "id": "yUEct7Y5XyPf", "colab": { "base_uri": "https://localhost:8080/", "height": 979 }, "outputId": "b2bd54a6-52f1-43d2-cb48-71dd48bd9c81" }, "source": [ "df_aero = df_newdup[df_newdup['EMPLOYER'].str.contains('AEROSPACE CORPORATION')]\n", "df_aero" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CMTE_ID</th>\n", " <th>NAME</th>\n", " <th>CITY</th>\n", " <th>STATE</th>\n", " <th>ZIP_CODE</th>\n", " <th>EMPLOYER</th>\n", " <th>OCCUPATION</th>\n", " <th>TRANSACTION_DT</th>\n", " <th>TRANSACTION_AMT</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>136824</th>\n", " <td>C00703975</td>\n", " <td>DAVIS, LORRIE</td>\n", " <td>LOS ANGELES</td>\n", " <td>CA</td>\n", " <td>900561529</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>SENIOR PROJECT ENGINEER</td>\n", " <td>7112020</td>\n", " <td>20</td>\n", " </tr>\n", " <tr>\n", " <th>150573</th>\n", " <td>C00703975</td>\n", " <td>SIMPSON, MARK M</td>\n", " <td>LONG BEACH</td>\n", " <td>CA</td>\n", " <td>908083812</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7242020</td>\n", " <td>200</td>\n", " </tr>\n", " <tr>\n", " <th>150574</th>\n", " <td>C00703975</td>\n", " <td>SIMPSON, MARK M</td>\n", " <td>LONG BEACH</td>\n", " <td>CA</td>\n", " <td>908083812</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7292020</td>\n", " <td>200</td>\n", " </tr>\n", " <tr>\n", " <th>191693</th>\n", " <td>C00703975</td>\n", " <td>YOUNG, KAROLYN</td>\n", " <td>REDONDO BEACH</td>\n", " <td>CA</td>\n", " <td>9.02771e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7252020</td>\n", " <td>250</td>\n", " </tr>\n", " <tr>\n", " <th>201669</th>\n", " <td>C00703975</td>\n", " <td>STUTTERHEIM, KENNETH B.</td>\n", " <td>PASADENA</td>\n", " <td>MD</td>\n", " <td>2.11223e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEERING SPECIALIST</td>\n", " <td>7162020</td>\n", " <td>250</td>\n", " </tr>\n", " <tr>\n", " <th>246960</th>\n", " <td>C00703975</td>\n", " <td>JAGER, AMY</td>\n", " <td>INDIAN HARBOUR BEACH</td>\n", " <td>FL</td>\n", " <td>329373526</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7272020</td>\n", " <td>15</td>\n", " </tr>\n", " <tr>\n", " <th>404391</th>\n", " <td>C00193433</td>\n", " <td>SIMPSON, MARK M. MR.</td>\n", " <td>LONG BEACH</td>\n", " <td>CA</td>\n", " <td>90808</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7292020</td>\n", " <td>200</td>\n", " </tr>\n", " <tr>\n", " <th>493316</th>\n", " <td>C00075820</td>\n", " <td>FARAGO, ZOLTAN L. MR.</td>\n", " <td>BROAD RUN</td>\n", " <td>VA</td>\n", " <td>2.01372e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>PROJECT ENGINEER</td>\n", " <td>7232020</td>\n", " <td>10</td>\n", " </tr>\n", " <tr>\n", " <th>541906</th>\n", " <td>C00000935</td>\n", " <td>ALVAREZ, MANUEL</td>\n", " <td>SAN PEDRO</td>\n", " <td>CA</td>\n", " <td>9.07311e+08</td>\n", " <td>AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7232020</td>\n", " <td>35</td>\n", " </tr>\n", " <tr>\n", " <th>547077</th>\n", " <td>C00000935</td>\n", " <td>GUNAY, DEVIN</td>\n", " <td>LOS ANGELES</td>\n", " <td>CA</td>\n", " <td>9.00347e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>SOFTWARE ENGINEER</td>\n", " <td>7132020</td>\n", " <td>40</td>\n", " </tr>\n", " <tr>\n", " <th>585671</th>\n", " <td>C00075820</td>\n", " <td>FARAGO, ZOLTAN L. MR.</td>\n", " <td>BROAD RUN</td>\n", " <td>VA</td>\n", " <td>2.01372e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>PROJECT ENGINEER</td>\n", " <td>7042020</td>\n", " <td>25</td>\n", " </tr>\n", " <tr>\n", " <th>625961</th>\n", " <td>C00075820</td>\n", " <td>CINLEMIS, MICHELLE</td>\n", " <td>COLORADO SPRINGS</td>\n", " <td>CO</td>\n", " <td>8.09114e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>SENIOR PROJECT LEADER</td>\n", " <td>7302020</td>\n", " <td>100</td>\n", " </tr>\n", " <tr>\n", " <th>627176</th>\n", " <td>C00075820</td>\n", " <td>FARAGO, ZOLTAN L. MR.</td>\n", " <td>BROAD RUN</td>\n", " <td>VA</td>\n", " <td>2.01372e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>PROJECT ENGINEER</td>\n", " <td>7262020</td>\n", " <td>10</td>\n", " </tr>\n", " <tr>\n", " <th>920917</th>\n", " <td>C00010603</td>\n", " <td>MERRILL, ALBERT W</td>\n", " <td>VENICE</td>\n", " <td>CA</td>\n", " <td>90291</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7262020</td>\n", " <td>25</td>\n", " </tr>\n", " <tr>\n", " <th>922676</th>\n", " <td>C00010603</td>\n", " <td>ESLINGER, SUELLEN</td>\n", " <td>REDONDO BEACH</td>\n", " <td>CA</td>\n", " <td>9.02782e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7262020</td>\n", " <td>300</td>\n", " </tr>\n", " <tr>\n", " <th>934612</th>\n", " <td>C00010603</td>\n", " <td>MERRILL, ALBERT W</td>\n", " <td>VENICE</td>\n", " <td>CA</td>\n", " <td>90291</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7312020</td>\n", " <td>25</td>\n", " </tr>\n", " <tr>\n", " <th>936453</th>\n", " <td>C00010603</td>\n", " <td>MERRILL, ALBERT W</td>\n", " <td>VENICE</td>\n", " <td>CA</td>\n", " <td>90291</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7312020</td>\n", " <td>25</td>\n", " </tr>\n", " <tr>\n", " <th>938548</th>\n", " <td>C00010603</td>\n", " <td>MERRILL, ALBERT W</td>\n", " <td>VENICE</td>\n", " <td>CA</td>\n", " <td>90291</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7262020</td>\n", " <td>12</td>\n", " </tr>\n", " <tr>\n", " <th>946916</th>\n", " <td>C00010603</td>\n", " <td>BYERS, MARK</td>\n", " <td>SAN DIEGO</td>\n", " <td>CA</td>\n", " <td>9.21096e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7122020</td>\n", " <td>65</td>\n", " </tr>\n", " <tr>\n", " <th>947929</th>\n", " <td>C00010603</td>\n", " <td>FRICKS, KATHRYN</td>\n", " <td>GREENBELT</td>\n", " <td>MD</td>\n", " <td>2.07704e+08</td>\n", " <td>AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7312020</td>\n", " <td>500</td>\n", " </tr>\n", " <tr>\n", " <th>992812</th>\n", " <td>C00484642</td>\n", " <td>ALVAREZ, MANUEL</td>\n", " <td>SAN PEDRO</td>\n", " <td>CA</td>\n", " <td>907311416</td>\n", " <td>AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7242020</td>\n", " <td>50</td>\n", " </tr>\n", " <tr>\n", " <th>1014099</th>\n", " <td>C00484642</td>\n", " <td>ALVAREZ, MANUEL</td>\n", " <td>SAN PEDRO</td>\n", " <td>CA</td>\n", " <td>907311416</td>\n", " <td>AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7152020</td>\n", " <td>75</td>\n", " </tr>\n", " <tr>\n", " <th>1107728</th>\n", " <td>C00075820</td>\n", " <td>CINLEMIS, MICHELLE</td>\n", " <td>COLORADO SPRINGS</td>\n", " <td>CO</td>\n", " <td>809113801</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>SENIOR PROJECT LEADER</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " </tr>\n", " <tr>\n", " <th>1109284</th>\n", " <td>C00075820</td>\n", " <td>CINLEMIS, MICHELLE</td>\n", " <td>COLORADO SPRINGS</td>\n", " <td>CO</td>\n", " <td>809113801</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>SENIOR PROJECT LEADER</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " </tr>\n", " <tr>\n", " <th>1157904</th>\n", " <td>C00003418</td>\n", " <td>BAUER, SPENCER J. MR.</td>\n", " <td>EL SEGUNDO</td>\n", " <td>CA</td>\n", " <td>902453728</td>\n", " <td>AEROSPACE CORPORATION</td>\n", " <td>DIRECTOR</td>\n", " <td>6302020</td>\n", " <td>50</td>\n", " </tr>\n", " <tr>\n", " <th>1213364</th>\n", " <td>C00696526</td>\n", " <td>HOLLANDER, SIDNEY</td>\n", " <td>GLENDALE</td>\n", " <td>AZ</td>\n", " <td>853180038</td>\n", " <td>AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7092020</td>\n", " <td>250</td>\n", " </tr>\n", " <tr>\n", " <th>1254043</th>\n", " <td>C00401224</td>\n", " <td>WHITE, RUSSELL</td>\n", " <td>FAIRFAX</td>\n", " <td>VA</td>\n", " <td>220305208</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>SCIENTIST</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " </tr>\n", " <tr>\n", " <th>1363622</th>\n", " <td>C00126847</td>\n", " <td>SMITH, DARLENE</td>\n", " <td>CHARLESTOWN</td>\n", " <td>RI</td>\n", " <td>02813</td>\n", " <td>KAMAN AEROSPACE CORPORATION</td>\n", " <td>VP GM AIR VEHICLES</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " </tr>\n", " <tr>\n", " <th>1460070</th>\n", " <td>C00694323</td>\n", " <td>CINLEMIS, MICHELLE</td>\n", " <td>COLORADO SPRINGS</td>\n", " <td>CO</td>\n", " <td>809113801</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>SENIOR PROJECT LEADER</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " </tr>\n", " <tr>\n", " <th>1507410</th>\n", " <td>C00694323</td>\n", " <td>CINLEMIS, MICHELLE</td>\n", " <td>COLORADO SPRINGS</td>\n", " <td>CO</td>\n", " <td>809113801</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>SENIOR PROJECT LEADER</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CMTE_ID NAME ... TRANSACTION_DT TRANSACTION_AMT\n", "136824 C00703975 DAVIS, LORRIE ... 7112020 20\n", "150573 C00703975 SIMPSON, MARK M ... 7242020 200\n", "150574 C00703975 SIMPSON, MARK M ... 7292020 200\n", "191693 C00703975 YOUNG, KAROLYN ... 7252020 250\n", "201669 C00703975 STUTTERHEIM, KENNETH B. ... 7162020 250\n", "246960 C00703975 JAGER, AMY ... 7272020 15\n", "404391 C00193433 SIMPSON, MARK M. MR. ... 7292020 200\n", "493316 C00075820 FARAGO, ZOLTAN L. MR. ... 7232020 10\n", "541906 C00000935 ALVAREZ, MANUEL ... 7232020 35\n", "547077 C00000935 GUNAY, DEVIN ... 7132020 40\n", "585671 C00075820 FARAGO, ZOLTAN L. MR. ... 7042020 25\n", "625961 C00075820 CINLEMIS, MICHELLE ... 7302020 100\n", "627176 C00075820 FARAGO, ZOLTAN L. MR. ... 7262020 10\n", "920917 C00010603 MERRILL, ALBERT W ... 7262020 25\n", "922676 C00010603 ESLINGER, SUELLEN ... 7262020 300\n", "934612 C00010603 MERRILL, ALBERT W ... 7312020 25\n", "936453 C00010603 MERRILL, ALBERT W ... 7312020 25\n", "938548 C00010603 MERRILL, ALBERT W ... 7262020 12\n", "946916 C00010603 BYERS, MARK ... 7122020 65\n", "947929 C00010603 FRICKS, KATHRYN ... 7312020 500\n", "992812 C00484642 ALVAREZ, MANUEL ... 7242020 50\n", "1014099 C00484642 ALVAREZ, MANUEL ... 7152020 75\n", "1107728 C00075820 CINLEMIS, MICHELLE ... 6302020 100\n", "1109284 C00075820 CINLEMIS, MICHELLE ... 6302020 100\n", "1157904 C00003418 BAUER, SPENCER J. MR. ... 6302020 50\n", "1213364 C00696526 HOLLANDER, SIDNEY ... 7092020 250\n", "1254043 C00401224 WHITE, RUSSELL ... 6302020 100\n", "1363622 C00126847 SMITH, DARLENE ... 6302020 100\n", "1460070 C00694323 CINLEMIS, MICHELLE ... 6302020 100\n", "1507410 C00694323 CINLEMIS, MICHELLE ... 6302020 100\n", "\n", "[30 rows x 9 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 34 } ] }, { "cell_type": "code", "metadata": { "id": "n1Em1PItYDEH", "colab": { "base_uri": "https://localhost:8080/", "height": 758 }, "outputId": "6fc5d917-59ed-4b2b-8b0e-aebf68ebcfb9" }, "source": [ "df_aero_merge = pd.merge(df_bob, df_merge, on='CMTE_ID')\n", "df_aero_merge" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CMTE_ID</th>\n", " <th>NAME</th>\n", " <th>CITY</th>\n", " <th>STATE</th>\n", " <th>ZIP_CODE</th>\n", " <th>EMPLOYER</th>\n", " <th>OCCUPATION</th>\n", " <th>TRANSACTION_DT</th>\n", " <th>TRANSACTION_AMT</th>\n", " <th>CAND_ID</th>\n", " <th>CAND_PTY_AFFILIATION</th>\n", " <th>CAND_ELECTION_YR</th>\n", " <th>FEC_ELECTION_YR</th>\n", " <th>CMTE_TP</th>\n", " <th>CMTE_DSGN</th>\n", " <th>LINKAGE_ID</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>C00703975</td>\n", " <td>DAVIS, LORRIE</td>\n", " <td>LOS ANGELES</td>\n", " <td>CA</td>\n", " <td>900561529</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>SENIOR PROJECT ENGINEER</td>\n", " <td>7112020</td>\n", " <td>20</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>C00703975</td>\n", " <td>SIMPSON, MARK M</td>\n", " <td>LONG BEACH</td>\n", " <td>CA</td>\n", " <td>908083812</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7242020</td>\n", " <td>200</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>C00703975</td>\n", " <td>SIMPSON, MARK M</td>\n", " <td>LONG BEACH</td>\n", " <td>CA</td>\n", " <td>908083812</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7292020</td>\n", " <td>200</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>C00703975</td>\n", " <td>YOUNG, KAROLYN</td>\n", " <td>REDONDO BEACH</td>\n", " <td>CA</td>\n", " <td>9.02771e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7252020</td>\n", " <td>250</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>C00703975</td>\n", " <td>STUTTERHEIM, KENNETH B.</td>\n", " <td>PASADENA</td>\n", " <td>MD</td>\n", " <td>2.11223e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEERING SPECIALIST</td>\n", " <td>7162020</td>\n", " <td>250</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>C00703975</td>\n", " <td>JAGER, AMY</td>\n", " <td>INDIAN HARBOUR BEACH</td>\n", " <td>FL</td>\n", " <td>329373526</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7272020</td>\n", " <td>15</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>C00075820</td>\n", " <td>FARAGO, ZOLTAN L. MR.</td>\n", " <td>BROAD RUN</td>\n", " <td>VA</td>\n", " <td>2.01372e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>PROJECT ENGINEER</td>\n", " <td>7232020</td>\n", " <td>10</td>\n", " <td>H0NY27090</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>Y</td>\n", " <td>U</td>\n", " <td>232064</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>C00075820</td>\n", " <td>FARAGO, ZOLTAN L. MR.</td>\n", " <td>BROAD RUN</td>\n", " <td>VA</td>\n", " <td>2.01372e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>PROJECT ENGINEER</td>\n", " <td>7042020</td>\n", " <td>25</td>\n", " <td>H0NY27090</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>Y</td>\n", " <td>U</td>\n", " <td>232064</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>C00075820</td>\n", " <td>CINLEMIS, MICHELLE</td>\n", " <td>COLORADO SPRINGS</td>\n", " <td>CO</td>\n", " <td>8.09114e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>SENIOR PROJECT LEADER</td>\n", " <td>7302020</td>\n", " <td>100</td>\n", " <td>H0NY27090</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>Y</td>\n", " <td>U</td>\n", " <td>232064</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>C00075820</td>\n", " <td>FARAGO, ZOLTAN L. MR.</td>\n", " <td>BROAD RUN</td>\n", " <td>VA</td>\n", " <td>2.01372e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>PROJECT ENGINEER</td>\n", " <td>7262020</td>\n", " <td>10</td>\n", " <td>H0NY27090</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>Y</td>\n", " <td>U</td>\n", " <td>232064</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>C00075820</td>\n", " <td>CINLEMIS, MICHELLE</td>\n", " <td>COLORADO SPRINGS</td>\n", " <td>CO</td>\n", " <td>809113801</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>SENIOR PROJECT LEADER</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " <td>H0NY27090</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>Y</td>\n", " <td>U</td>\n", " <td>232064</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", " <td>C00075820</td>\n", " <td>CINLEMIS, MICHELLE</td>\n", " <td>COLORADO SPRINGS</td>\n", " <td>CO</td>\n", " <td>809113801</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>SENIOR PROJECT LEADER</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " <td>H0NY27090</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>Y</td>\n", " <td>U</td>\n", " <td>232064</td>\n", " </tr>\n", " <tr>\n", " <th>12</th>\n", " <td>C00696526</td>\n", " <td>HOLLANDER, SIDNEY</td>\n", " <td>GLENDALE</td>\n", " <td>AZ</td>\n", " <td>853180038</td>\n", " <td>AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7092020</td>\n", " <td>250</td>\n", " <td>S0AZ00350</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>S</td>\n", " <td>P</td>\n", " <td>225862</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CMTE_ID NAME ... CMTE_DSGN LINKAGE_ID\n", "0 C00703975 DAVIS, LORRIE ... P 227491\n", "1 C00703975 SIMPSON, MARK M ... P 227491\n", "2 C00703975 SIMPSON, MARK M ... P 227491\n", "3 C00703975 YOUNG, KAROLYN ... P 227491\n", "4 C00703975 STUTTERHEIM, KENNETH B. ... P 227491\n", "5 C00703975 JAGER, AMY ... P 227491\n", "6 C00075820 FARAGO, ZOLTAN L. MR. ... U 232064\n", "7 C00075820 FARAGO, ZOLTAN L. MR. ... U 232064\n", "8 C00075820 CINLEMIS, MICHELLE ... U 232064\n", "9 C00075820 FARAGO, ZOLTAN L. MR. ... U 232064\n", "10 C00075820 CINLEMIS, MICHELLE ... U 232064\n", "11 C00075820 CINLEMIS, MICHELLE ... U 232064\n", "12 C00696526 HOLLANDER, SIDNEY ... P 225862\n", "\n", "[13 rows x 16 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 35 } ] }, { "cell_type": "code", "metadata": { "id": "4qQlV6tnlega" }, "source": [ "CD = ['CA-37', 'CA-47', 'CA-47', 'CA-33', 'MD-03', 'FL-08', 'VA-05', 'VA-05', 'CO-05', 'VA-05', 'CO-05', 'CO-05', 'AZ-07']" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "PPqAAylhx0mj" }, "source": [ "df_aero_merge['CD'] = CD " ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Ls7gegLCyafO" }, "source": [ "df_aero_merge = df_aero_merge.drop(columns=['CAND_ELECTION_YR', 'FEC_ELECTION_YR', 'CMTE_TP', 'CMTE_DSGN'])" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Sn-H_tkYzBcc", "colab": { "base_uri": "https://localhost:8080/", "height": 673 }, "outputId": "b9c7ea40-2dcf-4b44-a00b-f4b7caf32df4" }, "source": [ "df_aero_final = pd.merge(df_aero_merge, \n", " trends, \n", " on ='CD', \n", " how ='inner') \n", "df_aero_final" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CMTE_ID</th>\n", " <th>NAME</th>\n", " <th>CITY</th>\n", " <th>STATE</th>\n", " <th>ZIP_CODE</th>\n", " <th>EMPLOYER</th>\n", " <th>OCCUPATION</th>\n", " <th>TRANSACTION_DT</th>\n", " <th>TRANSACTION_AMT</th>\n", " <th>CAND_ID</th>\n", " <th>CAND_PTY_AFFILIATION</th>\n", " <th>LINKAGE_ID</th>\n", " <th>CD</th>\n", " <th>Party</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>C00703975</td>\n", " <td>DAVIS, LORRIE</td>\n", " <td>LOS ANGELES</td>\n", " <td>CA</td>\n", " <td>900561529</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>SENIOR PROJECT ENGINEER</td>\n", " <td>7112020</td>\n", " <td>20</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>CA-37</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>C00703975</td>\n", " <td>SIMPSON, MARK M</td>\n", " <td>LONG BEACH</td>\n", " <td>CA</td>\n", " <td>908083812</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7242020</td>\n", " <td>200</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>CA-47</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>C00703975</td>\n", " <td>SIMPSON, MARK M</td>\n", " <td>LONG BEACH</td>\n", " <td>CA</td>\n", " <td>908083812</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7292020</td>\n", " <td>200</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>CA-47</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>C00703975</td>\n", " <td>YOUNG, KAROLYN</td>\n", " <td>REDONDO BEACH</td>\n", " <td>CA</td>\n", " <td>9.02771e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7252020</td>\n", " <td>250</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>CA-33</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>C00703975</td>\n", " <td>STUTTERHEIM, KENNETH B.</td>\n", " <td>PASADENA</td>\n", " <td>MD</td>\n", " <td>2.11223e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEERING SPECIALIST</td>\n", " <td>7162020</td>\n", " <td>250</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MD-03</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>C00703975</td>\n", " <td>JAGER, AMY</td>\n", " <td>INDIAN HARBOUR BEACH</td>\n", " <td>FL</td>\n", " <td>329373526</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7272020</td>\n", " <td>15</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>FL-08</td>\n", " <td>(R)</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>C00075820</td>\n", " <td>FARAGO, ZOLTAN L. MR.</td>\n", " <td>BROAD RUN</td>\n", " <td>VA</td>\n", " <td>2.01372e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>PROJECT ENGINEER</td>\n", " <td>7232020</td>\n", " <td>10</td>\n", " <td>H0NY27090</td>\n", " <td>REP</td>\n", " <td>232064</td>\n", " <td>VA-05</td>\n", " <td>(R)</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>C00075820</td>\n", " <td>FARAGO, ZOLTAN L. MR.</td>\n", " <td>BROAD RUN</td>\n", " <td>VA</td>\n", " <td>2.01372e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>PROJECT ENGINEER</td>\n", " <td>7042020</td>\n", " <td>25</td>\n", " <td>H0NY27090</td>\n", " <td>REP</td>\n", " <td>232064</td>\n", " <td>VA-05</td>\n", " <td>(R)</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>C00075820</td>\n", " <td>FARAGO, ZOLTAN L. MR.</td>\n", " <td>BROAD RUN</td>\n", " <td>VA</td>\n", " <td>2.01372e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>PROJECT ENGINEER</td>\n", " <td>7262020</td>\n", " <td>10</td>\n", " <td>H0NY27090</td>\n", " <td>REP</td>\n", " <td>232064</td>\n", " <td>VA-05</td>\n", " <td>(R)</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>C00075820</td>\n", " <td>CINLEMIS, MICHELLE</td>\n", " <td>COLORADO SPRINGS</td>\n", " <td>CO</td>\n", " <td>8.09114e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>SENIOR PROJECT LEADER</td>\n", " <td>7302020</td>\n", " <td>100</td>\n", " <td>H0NY27090</td>\n", " <td>REP</td>\n", " <td>232064</td>\n", " <td>CO-05</td>\n", " <td>(R)</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>C00075820</td>\n", " <td>CINLEMIS, MICHELLE</td>\n", " <td>COLORADO SPRINGS</td>\n", " <td>CO</td>\n", " <td>809113801</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>SENIOR PROJECT LEADER</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " <td>H0NY27090</td>\n", " <td>REP</td>\n", " <td>232064</td>\n", " <td>CO-05</td>\n", " <td>(R)</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", " <td>C00075820</td>\n", " <td>CINLEMIS, MICHELLE</td>\n", " <td>COLORADO SPRINGS</td>\n", " <td>CO</td>\n", " <td>809113801</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>SENIOR PROJECT LEADER</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " <td>H0NY27090</td>\n", " <td>REP</td>\n", " <td>232064</td>\n", " <td>CO-05</td>\n", " <td>(R)</td>\n", " </tr>\n", " <tr>\n", " <th>12</th>\n", " <td>C00696526</td>\n", " <td>HOLLANDER, SIDNEY</td>\n", " <td>GLENDALE</td>\n", " <td>AZ</td>\n", " <td>853180038</td>\n", " <td>AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7092020</td>\n", " <td>250</td>\n", " <td>S0AZ00350</td>\n", " <td>DEM</td>\n", " <td>225862</td>\n", " <td>AZ-07</td>\n", " <td>(D)</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CMTE_ID NAME ... CD Party\n", "0 C00703975 DAVIS, LORRIE ... CA-37 (D)\n", "1 C00703975 SIMPSON, MARK M ... CA-47 (D)\n", "2 C00703975 SIMPSON, MARK M ... CA-47 (D)\n", "3 C00703975 YOUNG, KAROLYN ... CA-33 (D)\n", "4 C00703975 STUTTERHEIM, KENNETH B. ... MD-03 (D)\n", "5 C00703975 JAGER, AMY ... FL-08 (R)\n", "6 C00075820 FARAGO, ZOLTAN L. MR. ... VA-05 (R)\n", "7 C00075820 FARAGO, ZOLTAN L. MR. ... VA-05 (R)\n", "8 C00075820 FARAGO, ZOLTAN L. MR. ... VA-05 (R)\n", "9 C00075820 CINLEMIS, MICHELLE ... CO-05 (R)\n", "10 C00075820 CINLEMIS, MICHELLE ... CO-05 (R)\n", "11 C00075820 CINLEMIS, MICHELLE ... CO-05 (R)\n", "12 C00696526 HOLLANDER, SIDNEY ... AZ-07 (D)\n", "\n", "[13 rows x 14 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 42 } ] }, { "cell_type": "code", "metadata": { "id": "R1wEhe8Rzaa_" }, "source": [ "df_aero_final['INDEX']= [1 if x =='DEM' else 0 for x in df_aero_final['CAND_PTY_AFFILIATION']] " ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Ix6G1cxzzuUA" }, "source": [ "df_aero_final['INDEX_BOSS']=1" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "dQqF_pwqldjH", "colab": { "base_uri": "https://localhost:8080/", "height": 673 }, "outputId": "4a60d88a-2393-487c-f12e-9f420683be30" }, "source": [ "df_aero_final\n" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CMTE_ID</th>\n", " <th>NAME</th>\n", " <th>CITY</th>\n", " <th>STATE</th>\n", " <th>ZIP_CODE</th>\n", " <th>EMPLOYER</th>\n", " <th>OCCUPATION</th>\n", " <th>TRANSACTION_DT</th>\n", " <th>TRANSACTION_AMT</th>\n", " <th>CAND_ID</th>\n", " <th>CAND_PTY_AFFILIATION</th>\n", " <th>LINKAGE_ID</th>\n", " <th>CD</th>\n", " <th>Party</th>\n", " <th>INDEX</th>\n", " <th>INDEX_BOSS</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>C00703975</td>\n", " <td>DAVIS, LORRIE</td>\n", " <td>LOS ANGELES</td>\n", " <td>CA</td>\n", " <td>900561529</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>SENIOR PROJECT ENGINEER</td>\n", " <td>7112020</td>\n", " <td>20</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>CA-37</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>C00703975</td>\n", " <td>SIMPSON, MARK M</td>\n", " <td>LONG BEACH</td>\n", " <td>CA</td>\n", " <td>908083812</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7242020</td>\n", " <td>200</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>CA-47</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>C00703975</td>\n", " <td>SIMPSON, MARK M</td>\n", " <td>LONG BEACH</td>\n", " <td>CA</td>\n", " <td>908083812</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7292020</td>\n", " <td>200</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>CA-47</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>C00703975</td>\n", " <td>YOUNG, KAROLYN</td>\n", " <td>REDONDO BEACH</td>\n", " <td>CA</td>\n", " <td>9.02771e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7252020</td>\n", " <td>250</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>CA-33</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>C00703975</td>\n", " <td>STUTTERHEIM, KENNETH B.</td>\n", " <td>PASADENA</td>\n", " <td>MD</td>\n", " <td>2.11223e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEERING SPECIALIST</td>\n", " <td>7162020</td>\n", " <td>250</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MD-03</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>C00703975</td>\n", " <td>JAGER, AMY</td>\n", " <td>INDIAN HARBOUR BEACH</td>\n", " <td>FL</td>\n", " <td>329373526</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7272020</td>\n", " <td>15</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>FL-08</td>\n", " <td>(R)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>C00075820</td>\n", " <td>FARAGO, ZOLTAN L. MR.</td>\n", " <td>BROAD RUN</td>\n", " <td>VA</td>\n", " <td>2.01372e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>PROJECT ENGINEER</td>\n", " <td>7232020</td>\n", " <td>10</td>\n", " <td>H0NY27090</td>\n", " <td>REP</td>\n", " <td>232064</td>\n", " <td>VA-05</td>\n", " <td>(R)</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>C00075820</td>\n", " <td>FARAGO, ZOLTAN L. MR.</td>\n", " <td>BROAD RUN</td>\n", " <td>VA</td>\n", " <td>2.01372e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>PROJECT ENGINEER</td>\n", " <td>7042020</td>\n", " <td>25</td>\n", " <td>H0NY27090</td>\n", " <td>REP</td>\n", " <td>232064</td>\n", " <td>VA-05</td>\n", " <td>(R)</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>C00075820</td>\n", " <td>FARAGO, ZOLTAN L. MR.</td>\n", " <td>BROAD RUN</td>\n", " <td>VA</td>\n", " <td>2.01372e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>PROJECT ENGINEER</td>\n", " <td>7262020</td>\n", " <td>10</td>\n", " <td>H0NY27090</td>\n", " <td>REP</td>\n", " <td>232064</td>\n", " <td>VA-05</td>\n", " <td>(R)</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>C00075820</td>\n", " <td>CINLEMIS, MICHELLE</td>\n", " <td>COLORADO SPRINGS</td>\n", " <td>CO</td>\n", " <td>8.09114e+08</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>SENIOR PROJECT LEADER</td>\n", " <td>7302020</td>\n", " <td>100</td>\n", " <td>H0NY27090</td>\n", " <td>REP</td>\n", " <td>232064</td>\n", " <td>CO-05</td>\n", " <td>(R)</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>C00075820</td>\n", " <td>CINLEMIS, MICHELLE</td>\n", " <td>COLORADO SPRINGS</td>\n", " <td>CO</td>\n", " <td>809113801</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>SENIOR PROJECT LEADER</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " <td>H0NY27090</td>\n", " <td>REP</td>\n", " <td>232064</td>\n", " <td>CO-05</td>\n", " <td>(R)</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", " <td>C00075820</td>\n", " <td>CINLEMIS, MICHELLE</td>\n", " <td>COLORADO SPRINGS</td>\n", " <td>CO</td>\n", " <td>809113801</td>\n", " <td>THE AEROSPACE CORPORATION</td>\n", " <td>SENIOR PROJECT LEADER</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " <td>H0NY27090</td>\n", " <td>REP</td>\n", " <td>232064</td>\n", " <td>CO-05</td>\n", " <td>(R)</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>12</th>\n", " <td>C00696526</td>\n", " <td>HOLLANDER, SIDNEY</td>\n", " <td>GLENDALE</td>\n", " <td>AZ</td>\n", " <td>853180038</td>\n", " <td>AEROSPACE CORPORATION</td>\n", " <td>ENGINEER</td>\n", " <td>7092020</td>\n", " <td>250</td>\n", " <td>S0AZ00350</td>\n", " <td>DEM</td>\n", " <td>225862</td>\n", " <td>AZ-07</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CMTE_ID NAME ... INDEX INDEX_BOSS\n", "0 C00703975 DAVIS, LORRIE ... 1 1\n", "1 C00703975 SIMPSON, MARK M ... 1 1\n", "2 C00703975 SIMPSON, MARK M ... 1 1\n", "3 C00703975 YOUNG, KAROLYN ... 1 1\n", "4 C00703975 STUTTERHEIM, KENNETH B. ... 1 1\n", "5 C00703975 JAGER, AMY ... 1 1\n", "6 C00075820 FARAGO, ZOLTAN L. MR. ... 0 1\n", "7 C00075820 FARAGO, ZOLTAN L. MR. ... 0 1\n", "8 C00075820 FARAGO, ZOLTAN L. MR. ... 0 1\n", "9 C00075820 CINLEMIS, MICHELLE ... 0 1\n", "10 C00075820 CINLEMIS, MICHELLE ... 0 1\n", "11 C00075820 CINLEMIS, MICHELLE ... 0 1\n", "12 C00696526 HOLLANDER, SIDNEY ... 1 1\n", "\n", "[13 rows x 16 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 45 } ] }, { "cell_type": "code", "metadata": { "id": "JZdHyMWo0Pbl" }, "source": [ "subset2 = df_aero_final[['INDEX','INDEX_BOSS']]" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "4-m6o5ek0Pup" }, "source": [ "from sklearn.linear_model import LinearRegression" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "UQeN6gFQ0CoX", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "b5f639bc-9878-4fd2-bfec-445ecc3aba55" }, "source": [ "linear_regressor = LinearRegression()\n", "from sklearn.preprocessing import MinMaxScaler\n", "scaler1 = MinMaxScaler()\n", "scaler1.fit(subset2)\n", "inner_join_scaled=scaler1.transform(subset2)\n", "\n", "x = inner_join_scaled[:,0].reshape(-1,1)\n", "y = inner_join_scaled[:,1].reshape(-1,1)\n", "\n", "linear_regressor.fit(x, y)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)" ] }, "metadata": { "tags": [] }, "execution_count": 48 } ] }, { "cell_type": "code", "metadata": { "id": "kdgNzW9Q0k7v" }, "source": [ "" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "vqZW4AL43o0r", "colab": { "base_uri": "https://localhost:8080/", "height": 419 }, "outputId": "76681360-3984-4485-f175-2fa4f4713437" }, "source": [ "df4 = df_newdup[df_newdup['EMPLOYER'].str.contains('AT&T')]\n", "df4" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CMTE_ID</th>\n", " <th>NAME</th>\n", " <th>CITY</th>\n", " <th>STATE</th>\n", " <th>ZIP_CODE</th>\n", " <th>EMPLOYER</th>\n", " <th>OCCUPATION</th>\n", " <th>TRANSACTION_DT</th>\n", " <th>TRANSACTION_AMT</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>13062</th>\n", " <td>C00703975</td>\n", " <td>FAVARA, RICHARD</td>\n", " <td>FREEHOLD</td>\n", " <td>NJ</td>\n", " <td>7.72843e+07</td>\n", " <td>AT&T</td>\n", " <td>SALES</td>\n", " <td>7082020</td>\n", " <td>25</td>\n", " </tr>\n", " <tr>\n", " <th>13063</th>\n", " <td>C00703975</td>\n", " <td>FAVARA, RICHARD</td>\n", " <td>FREEHOLD</td>\n", " <td>NJ</td>\n", " <td>7.72843e+07</td>\n", " <td>AT&T</td>\n", " <td>SALES</td>\n", " <td>7142020</td>\n", " <td>17</td>\n", " </tr>\n", " <tr>\n", " <th>13064</th>\n", " <td>C00703975</td>\n", " <td>FAVARA, RICHARD</td>\n", " <td>FREEHOLD</td>\n", " <td>NJ</td>\n", " <td>7.72843e+07</td>\n", " <td>AT&T</td>\n", " <td>SALES</td>\n", " <td>7192020</td>\n", " <td>17</td>\n", " </tr>\n", " <tr>\n", " <th>13622</th>\n", " <td>C00703975</td>\n", " <td>EMERSON, TERRY</td>\n", " <td>DALLAS</td>\n", " <td>TX</td>\n", " <td>7.52242e+08</td>\n", " <td>AT&T</td>\n", " <td>PROJECT MANAGER</td>\n", " <td>7012020</td>\n", " <td>5</td>\n", " </tr>\n", " <tr>\n", " <th>13623</th>\n", " <td>C00703975</td>\n", " <td>EMERSON, TERRY</td>\n", " <td>DALLAS</td>\n", " <td>TX</td>\n", " <td>7.52242e+08</td>\n", " <td>AT&T</td>\n", " <td>PROJECT MANAGER</td>\n", " <td>7162020</td>\n", " <td>21</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>1583204</th>\n", " <td>C00694323</td>\n", " <td>HERNANDEZ, JOE</td>\n", " <td>SAN BRUNO</td>\n", " <td>CA</td>\n", " <td>940661112</td>\n", " <td>AT&T</td>\n", " <td>SPLICING TECHNICIAN</td>\n", " <td>6302020</td>\n", " <td>10</td>\n", " </tr>\n", " <tr>\n", " <th>1587874</th>\n", " <td>C00694323</td>\n", " <td>HERNANDEZ, JOE</td>\n", " <td>SAN BRUNO</td>\n", " <td>CA</td>\n", " <td>940661112</td>\n", " <td>AT&T</td>\n", " <td>SPLICING TECHNICIAN</td>\n", " <td>6302020</td>\n", " <td>20</td>\n", " </tr>\n", " <tr>\n", " <th>1595694</th>\n", " <td>C00694323</td>\n", " <td>HERNANDEZ, JOE</td>\n", " <td>SAN BRUNO</td>\n", " <td>CA</td>\n", " <td>940661112</td>\n", " <td>AT&T</td>\n", " <td>SPLICING TECHNICIAN</td>\n", " <td>6302020</td>\n", " <td>10</td>\n", " </tr>\n", " <tr>\n", " <th>1600117</th>\n", " <td>C00694323</td>\n", " <td>HERNANDEZ, JOE</td>\n", " <td>SAN BRUNO</td>\n", " <td>CA</td>\n", " <td>940661112</td>\n", " <td>AT&T</td>\n", " <td>SPLICING TECHNICIAN</td>\n", " <td>6302020</td>\n", " <td>25</td>\n", " </tr>\n", " <tr>\n", " <th>1603559</th>\n", " <td>C00694323</td>\n", " <td>ORTIZ, LISA</td>\n", " <td>RIVERSIDE</td>\n", " <td>CA</td>\n", " <td>925035708</td>\n", " <td>AT&T</td>\n", " <td>PM</td>\n", " <td>6302020</td>\n", " <td>35</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>4226 rows × 9 columns</p>\n", "</div>" ], "text/plain": [ " CMTE_ID NAME ... TRANSACTION_DT TRANSACTION_AMT\n", "13062 C00703975 FAVARA, RICHARD ... 7082020 25\n", "13063 C00703975 FAVARA, RICHARD ... 7142020 17\n", "13064 C00703975 FAVARA, RICHARD ... 7192020 17\n", "13622 C00703975 EMERSON, TERRY ... 7012020 5\n", "13623 C00703975 EMERSON, TERRY ... 7162020 21\n", "... ... ... ... ... ...\n", "1583204 C00694323 HERNANDEZ, JOE ... 6302020 10\n", "1587874 C00694323 HERNANDEZ, JOE ... 6302020 20\n", "1595694 C00694323 HERNANDEZ, JOE ... 6302020 10\n", "1600117 C00694323 HERNANDEZ, JOE ... 6302020 25\n", "1603559 C00694323 ORTIZ, LISA ... 6302020 35\n", "\n", "[4226 rows x 9 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 16 } ] }, { "cell_type": "code", "metadata": { "id": "YvlPckei7uEA", "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "outputId": "757de2e5-e546-4bd7-d851-5a318d8008a0" }, "source": [ "df5 = pd.merge(df4, df_merge, on='CMTE_ID')\n", "df5.tail()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CMTE_ID</th>\n", " <th>NAME</th>\n", " <th>CITY</th>\n", " <th>STATE</th>\n", " <th>ZIP_CODE</th>\n", " <th>EMPLOYER</th>\n", " <th>OCCUPATION</th>\n", " <th>TRANSACTION_DT</th>\n", " <th>TRANSACTION_AMT</th>\n", " <th>CAND_ID</th>\n", " <th>CAND_PTY_AFFILIATION</th>\n", " <th>CAND_ELECTION_YR</th>\n", " <th>FEC_ELECTION_YR</th>\n", " <th>CMTE_TP</th>\n", " <th>CMTE_DSGN</th>\n", " <th>LINKAGE_ID</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>243</th>\n", " <td>C00711549</td>\n", " <td>COLLINS, RICK</td>\n", " <td>LAKEWOOD</td>\n", " <td>WA</td>\n", " <td>98498</td>\n", " <td>AT&T</td>\n", " <td>SALES CONSULTANT</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " <td>S0KY00339</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>S</td>\n", " <td>P</td>\n", " <td>228669</td>\n", " </tr>\n", " <tr>\n", " <th>244</th>\n", " <td>C00711549</td>\n", " <td>NURSE, CHRIS</td>\n", " <td>ROCKVILLE</td>\n", " <td>MD</td>\n", " <td>20850</td>\n", " <td>AT&T</td>\n", " <td>MANAGER</td>\n", " <td>6302020</td>\n", " <td>500</td>\n", " <td>S0KY00339</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>S</td>\n", " <td>P</td>\n", " <td>228669</td>\n", " </tr>\n", " <tr>\n", " <th>245</th>\n", " <td>C00666040</td>\n", " <td>HERNANDEZ, JOE</td>\n", " <td>SAN BRUNO</td>\n", " <td>CA</td>\n", " <td>940661112</td>\n", " <td>AT&T</td>\n", " <td>SPLICING TECHNICIAN</td>\n", " <td>6302020</td>\n", " <td>10</td>\n", " <td>S8AZ00221</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>S</td>\n", " <td>P</td>\n", " <td>224208</td>\n", " </tr>\n", " <tr>\n", " <th>246</th>\n", " <td>C00736876</td>\n", " <td>BENTON, WANDETTA</td>\n", " <td>DULUTH</td>\n", " <td>GA</td>\n", " <td>300978117</td>\n", " <td>AT&T</td>\n", " <td>NETWORK TECH</td>\n", " <td>6302020</td>\n", " <td>25</td>\n", " <td>S0GA00559</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>S</td>\n", " <td>P</td>\n", " <td>231982</td>\n", " </tr>\n", " <tr>\n", " <th>247</th>\n", " <td>C00736876</td>\n", " <td>BENTON, WANDETTA</td>\n", " <td>DULUTH</td>\n", " <td>GA</td>\n", " <td>300978117</td>\n", " <td>AT&T</td>\n", " <td>NETWORK TECH</td>\n", " <td>6302020</td>\n", " <td>25</td>\n", " <td>S0GA00559</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>S</td>\n", " <td>P</td>\n", " <td>231982</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CMTE_ID NAME CITY ... CMTE_TP CMTE_DSGN LINKAGE_ID\n", "243 C00711549 COLLINS, RICK LAKEWOOD ... S P 228669\n", "244 C00711549 NURSE, CHRIS ROCKVILLE ... S P 228669\n", "245 C00666040 HERNANDEZ, JOE SAN BRUNO ... S P 224208\n", "246 C00736876 BENTON, WANDETTA DULUTH ... S P 231982\n", "247 C00736876 BENTON, WANDETTA DULUTH ... S P 231982\n", "\n", "[5 rows x 16 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 19 } ] }, { "cell_type": "code", "metadata": { "id": "ptU7ClSQEgW3", "colab": { "base_uri": "https://localhost:8080/", "height": 402 }, "outputId": "add4cc5d-fe11-4bd7-ecab-c93ceb212d1a" }, "source": [ "df_biogen = df_newdup[df_newdup['EMPLOYER'].str.contains('BIOGEN')]\n", "df_biogen" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CMTE_ID</th>\n", " <th>NAME</th>\n", " <th>CITY</th>\n", " <th>STATE</th>\n", " <th>ZIP_CODE</th>\n", " <th>EMPLOYER</th>\n", " <th>OCCUPATION</th>\n", " <th>TRANSACTION_DT</th>\n", " <th>TRANSACTION_AMT</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>71874</th>\n", " <td>C00703975</td>\n", " <td>MARX, ISAAC</td>\n", " <td>ARLINGTON</td>\n", " <td>MA</td>\n", " <td>2.47438e+07</td>\n", " <td>BIOGEN</td>\n", " <td>CHEMIST</td>\n", " <td>7182020</td>\n", " <td>250</td>\n", " </tr>\n", " <tr>\n", " <th>125267</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7292020</td>\n", " <td>50</td>\n", " </tr>\n", " <tr>\n", " <th>125819</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7152020</td>\n", " <td>50</td>\n", " </tr>\n", " <tr>\n", " <th>125820</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7222020</td>\n", " <td>50</td>\n", " </tr>\n", " <tr>\n", " <th>128132</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7082020</td>\n", " <td>50</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>1576084</th>\n", " <td>C00694323</td>\n", " <td>CHECKAN, RICHARD</td>\n", " <td>FUQUAY VARINA</td>\n", " <td>NC</td>\n", " <td>275267624</td>\n", " <td>BIOGEN</td>\n", " <td>ENGINEERING</td>\n", " <td>6302020</td>\n", " <td>5</td>\n", " </tr>\n", " <tr>\n", " <th>1582011</th>\n", " <td>C00694323</td>\n", " <td>CHECKAN, RICHARD</td>\n", " <td>FUQUAY VARINA</td>\n", " <td>NC</td>\n", " <td>275267624</td>\n", " <td>BIOGEN</td>\n", " <td>ENGINEERING</td>\n", " <td>6302020</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>1591754</th>\n", " <td>C00694323</td>\n", " <td>CHECKAN, RICHARD</td>\n", " <td>FUQUAY VARINA</td>\n", " <td>NC</td>\n", " <td>275267624</td>\n", " <td>BIOGEN</td>\n", " <td>ENGINEERING</td>\n", " <td>6302020</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>1600172</th>\n", " <td>C00694323</td>\n", " <td>CHECKAN, RICHARD</td>\n", " <td>FUQUAY VARINA</td>\n", " <td>NC</td>\n", " <td>275267624</td>\n", " <td>BIOGEN</td>\n", " <td>ENGINEERING</td>\n", " <td>6302020</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>1603569</th>\n", " <td>C00694323</td>\n", " <td>CHECKAN, RICHARD</td>\n", " <td>FUQUAY VARINA</td>\n", " <td>NC</td>\n", " <td>275267624</td>\n", " <td>BIOGEN</td>\n", " <td>ENGINEERING</td>\n", " <td>6302020</td>\n", " <td>1</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>98 rows × 9 columns</p>\n", "</div>" ], "text/plain": [ " CMTE_ID NAME ... TRANSACTION_DT TRANSACTION_AMT\n", "71874 C00703975 MARX, ISAAC ... 7182020 250\n", "125267 C00703975 EDMONDSON, FRAZOR ... 7292020 50\n", "125819 C00703975 EDMONDSON, FRAZOR ... 7152020 50\n", "125820 C00703975 EDMONDSON, FRAZOR ... 7222020 50\n", "128132 C00703975 EDMONDSON, FRAZOR ... 7082020 50\n", "... ... ... ... ... ...\n", "1576084 C00694323 CHECKAN, RICHARD ... 6302020 5\n", "1582011 C00694323 CHECKAN, RICHARD ... 6302020 1\n", "1591754 C00694323 CHECKAN, RICHARD ... 6302020 1\n", "1600172 C00694323 CHECKAN, RICHARD ... 6302020 1\n", "1603569 C00694323 CHECKAN, RICHARD ... 6302020 1\n", "\n", "[98 rows x 9 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 21 } ] }, { "cell_type": "code", "metadata": { "id": "gZz7kaXIEfwZ", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "18a45932-d5ca-4591-824b-41403a07cdf3" }, "source": [ "df6 = pd.merge(df_biogen, df_merge, on='CMTE_ID')\n", "df6" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CMTE_ID</th>\n", " <th>NAME</th>\n", " <th>CITY</th>\n", " <th>STATE</th>\n", " <th>ZIP_CODE</th>\n", " <th>EMPLOYER</th>\n", " <th>OCCUPATION</th>\n", " <th>TRANSACTION_DT</th>\n", " <th>TRANSACTION_AMT</th>\n", " <th>CAND_ID</th>\n", " <th>CAND_PTY_AFFILIATION</th>\n", " <th>CAND_ELECTION_YR</th>\n", " <th>FEC_ELECTION_YR</th>\n", " <th>CMTE_TP</th>\n", " <th>CMTE_DSGN</th>\n", " <th>LINKAGE_ID</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>C00703975</td>\n", " <td>MARX, ISAAC</td>\n", " <td>ARLINGTON</td>\n", " <td>MA</td>\n", " <td>2.47438e+07</td>\n", " <td>BIOGEN</td>\n", " <td>CHEMIST</td>\n", " <td>7182020</td>\n", " <td>250</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7292020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7152020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7222020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7082020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>C00703975</td>\n", " <td>DILLEY, ANNE</td>\n", " <td>ARLINGTON</td>\n", " <td>MA</td>\n", " <td>2.47648e+07</td>\n", " <td>BIOGEN</td>\n", " <td>EPIDEMIOLOGIST</td>\n", " <td>7052020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>24931439</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7302020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>24931439</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7182020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>24931439</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7252020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>C00703975</td>\n", " <td>VANDER STOEP, STEPHEN</td>\n", " <td>BOSTON</td>\n", " <td>MA</td>\n", " <td>2.12925e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7172020</td>\n", " <td>100</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>C00703975</td>\n", " <td>THOMAS, DONNA</td>\n", " <td>MEMPHIS</td>\n", " <td>TN</td>\n", " <td>3.81155e+08</td>\n", " <td>PMC BIOGENIX INC.</td>\n", " <td>CUSTOMER SERVICE</td>\n", " <td>7172020</td>\n", " <td>20</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>2.49314e+07</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7042020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>12</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>2.49314e+07</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7112020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>13</th>\n", " <td>C00703975</td>\n", " <td>LYKINS, JIM</td>\n", " <td>DUPONT</td>\n", " <td>WA</td>\n", " <td>9.83277e+08</td>\n", " <td>BIOGEN</td>\n", " <td>SALES</td>\n", " <td>7282020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>C00703975</td>\n", " <td>MALDONADO, REBECCA</td>\n", " <td>SAN ANTONIO</td>\n", " <td>TX</td>\n", " <td>782491598</td>\n", " <td>BIOGEN</td>\n", " <td>SALES</td>\n", " <td>7032020</td>\n", " <td>150</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>15</th>\n", " <td>C00703975</td>\n", " <td>HOWE, MICHAEL</td>\n", " <td>CANTON</td>\n", " <td>MA</td>\n", " <td>2.02116e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7162020</td>\n", " <td>100</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>16</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>2.49314e+07</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>6302020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>17</th>\n", " <td>C00703975</td>\n", " <td>THOMAS, DONNA</td>\n", " <td>MEMPHIS</td>\n", " <td>TN</td>\n", " <td>3.81155e+08</td>\n", " <td>PMC BIOGENIX INC.</td>\n", " <td>CUSTOMER SERVICE</td>\n", " <td>6302020</td>\n", " <td>15</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>P</td>\n", " <td>P</td>\n", " <td>227491</td>\n", " </tr>\n", " <tr>\n", " <th>18</th>\n", " <td>C00727149</td>\n", " <td>GATES, CYNTHIA</td>\n", " <td>ROSLINDALE</td>\n", " <td>MA</td>\n", " <td>2131</td>\n", " <td>BIOGEN</td>\n", " <td>REGULATORY MEDICAL WRITER</td>\n", " <td>8052020</td>\n", " <td>100</td>\n", " <td>H0MA08045</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>230605</td>\n", " </tr>\n", " <tr>\n", " <th>19</th>\n", " <td>C00727149</td>\n", " <td>GATES, CYNTHIA</td>\n", " <td>ROSLINDALE</td>\n", " <td>MA</td>\n", " <td>02131</td>\n", " <td>BIOGEN</td>\n", " <td>REGULATORY MEDICAL WRITER</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " <td>H0MA08045</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>230605</td>\n", " </tr>\n", " <tr>\n", " <th>20</th>\n", " <td>C00745687</td>\n", " <td>GRIFFITH, LISA</td>\n", " <td>CAMBRIDGE</td>\n", " <td>MA</td>\n", " <td>021394369</td>\n", " <td>BIOGEN</td>\n", " <td>MARKETING</td>\n", " <td>7132020</td>\n", " <td>500</td>\n", " <td>H0MA04267</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>233009</td>\n", " </tr>\n", " <tr>\n", " <th>21</th>\n", " <td>C00196774</td>\n", " <td>LOVEDAY, KENNETH S.</td>\n", " <td>BROOKLINE</td>\n", " <td>MA</td>\n", " <td>024465827</td>\n", " <td>BIOGEN</td>\n", " <td>BIOLOGIST</td>\n", " <td>8042020</td>\n", " <td>250</td>\n", " <td>S4MA00028</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>S</td>\n", " <td>P</td>\n", " <td>222822</td>\n", " </tr>\n", " <tr>\n", " <th>22</th>\n", " <td>C00666149</td>\n", " <td>LOVEDAY, KENNETH S</td>\n", " <td>BROOKLINE</td>\n", " <td>MA</td>\n", " <td>024465827</td>\n", " <td>BIOGEN INC</td>\n", " <td>DIRECTOR</td>\n", " <td>6302020</td>\n", " <td>500</td>\n", " <td>H8NM02248</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>223821</td>\n", " </tr>\n", " <tr>\n", " <th>23</th>\n", " <td>C00500843</td>\n", " <td>FLANNELLY-KING, SHANE</td>\n", " <td>SOMERVILLE</td>\n", " <td>MA</td>\n", " <td>2.14421e+07</td>\n", " <td>BIOGEN IDEC</td>\n", " <td>BUSINESS ANALYST</td>\n", " <td>6302020</td>\n", " <td>250</td>\n", " <td>S2MA00170</td>\n", " <td>DEM</td>\n", " <td>2024</td>\n", " <td>2020</td>\n", " <td>S</td>\n", " <td>P</td>\n", " <td>222817</td>\n", " </tr>\n", " <tr>\n", " <th>24</th>\n", " <td>C00649376</td>\n", " <td>NEWLAND, BART G.</td>\n", " <td>BELMONT</td>\n", " <td>MA</td>\n", " <td>2.4784e+07</td>\n", " <td>BIOGEN INC</td>\n", " <td>ATTORNEY</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " <td>H8GA07201</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>224868</td>\n", " </tr>\n", " <tr>\n", " <th>25</th>\n", " <td>C00701599</td>\n", " <td>SEGAL, KATE</td>\n", " <td>BATTLE CREEK</td>\n", " <td>MI</td>\n", " <td>4.90159e+08</td>\n", " <td>BIOGEN</td>\n", " <td>GOVERNMENT AFFAIRS</td>\n", " <td>6302020</td>\n", " <td>500</td>\n", " <td>H0MI06152</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>227095</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CMTE_ID NAME ... CMTE_DSGN LINKAGE_ID\n", "0 C00703975 MARX, ISAAC ... P 227491\n", "1 C00703975 EDMONDSON, FRAZOR ... P 227491\n", "2 C00703975 EDMONDSON, FRAZOR ... P 227491\n", "3 C00703975 EDMONDSON, FRAZOR ... P 227491\n", "4 C00703975 EDMONDSON, FRAZOR ... P 227491\n", "5 C00703975 DILLEY, ANNE ... P 227491\n", "6 C00703975 SMIRNAKIS, KAREN ... P 227491\n", "7 C00703975 SMIRNAKIS, KAREN ... P 227491\n", "8 C00703975 SMIRNAKIS, KAREN ... P 227491\n", "9 C00703975 VANDER STOEP, STEPHEN ... P 227491\n", "10 C00703975 THOMAS, DONNA ... P 227491\n", "11 C00703975 SMIRNAKIS, KAREN ... P 227491\n", "12 C00703975 SMIRNAKIS, KAREN ... P 227491\n", "13 C00703975 LYKINS, JIM ... P 227491\n", "14 C00703975 MALDONADO, REBECCA ... P 227491\n", "15 C00703975 HOWE, MICHAEL ... P 227491\n", "16 C00703975 SMIRNAKIS, KAREN ... P 227491\n", "17 C00703975 THOMAS, DONNA ... P 227491\n", "18 C00727149 GATES, CYNTHIA ... P 230605\n", "19 C00727149 GATES, CYNTHIA ... P 230605\n", "20 C00745687 GRIFFITH, LISA ... P 233009\n", "21 C00196774 LOVEDAY, KENNETH S. ... P 222822\n", "22 C00666149 LOVEDAY, KENNETH S ... P 223821\n", "23 C00500843 FLANNELLY-KING, SHANE ... P 222817\n", "24 C00649376 NEWLAND, BART G. ... P 224868\n", "25 C00701599 SEGAL, KATE ... P 227095\n", "\n", "[26 rows x 16 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 22 } ] }, { "cell_type": "code", "metadata": { "id": "tYTdUPrlFKJn", "colab": { "base_uri": "https://localhost:8080/", "height": 50 }, "outputId": "d5fbed25-b24a-452c-8a0f-caa683c07d94" }, "source": [ "df6[df6['OCCUPATION'].str.contains('DIRECTOR')]['TRANSACTION_DT']" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "22 6302020\n", "Name: TRANSACTION_DT, dtype: int64" ] }, "metadata": { "tags": [] }, "execution_count": 31 } ] }, { "cell_type": "code", "metadata": { "id": "4nct8sKi6ZRo", "colab": { "base_uri": "https://localhost:8080/", "height": 134 }, "outputId": "531fd6c2-e159-4311-98b3-a6c5766583dc" }, "source": [ "df6[df6['OCCUPATION'].str.contains('VP')]['TRANSACTION_DT']" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "6 7302020\n", "7 7182020\n", "8 7252020\n", "11 7042020\n", "12 7112020\n", "16 6302020\n", "Name: TRANSACTION_DT, dtype: int64" ] }, "metadata": { "tags": [] }, "execution_count": 32 } ] }, { "cell_type": "code", "metadata": { "id": "TM9ctAKSHBGV" }, "source": [ "CD = ['MA-05', 'MA-03', 'MA-03', 'MA-03', 'MA-03', 'MA-05', 'MA-05', 'MA-05', 'MA-05', 'MA-07', 'TN-09', 'MA-05', 'MA-05', 'WA-10', 'TX-20', 'MA-08', 'MA-05', 'TN-09', 'MA-07', 'MA-07', 'MA-05', 'MA-04', 'MA-04', 'MA-07', 'MA-05', 'MI-03']" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "uqfKk_S8Hgr_" }, "source": [ "df6['CD'] = CD" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "eJkDmYpJ6ZBr", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "deaf95cb-90c6-4a6e-86bd-6f9a89bd20e4" }, "source": [ "df7 = df6.drop(columns=['CAND_ELECTION_YR', 'FEC_ELECTION_YR', 'CMTE_TP', 'CMTE_DSGN'])\n", "df7" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CMTE_ID</th>\n", " <th>NAME</th>\n", " <th>CITY</th>\n", " <th>STATE</th>\n", " <th>ZIP_CODE</th>\n", " <th>EMPLOYER</th>\n", " <th>OCCUPATION</th>\n", " <th>TRANSACTION_DT</th>\n", " <th>TRANSACTION_AMT</th>\n", " <th>CAND_ID</th>\n", " <th>CAND_PTY_AFFILIATION</th>\n", " <th>LINKAGE_ID</th>\n", " <th>CD</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>C00703975</td>\n", " <td>MARX, ISAAC</td>\n", " <td>ARLINGTON</td>\n", " <td>MA</td>\n", " <td>2.47438e+07</td>\n", " <td>BIOGEN</td>\n", " <td>CHEMIST</td>\n", " <td>7182020</td>\n", " <td>250</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7292020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-03</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7152020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-03</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7222020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-03</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7082020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-03</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>C00703975</td>\n", " <td>DILLEY, ANNE</td>\n", " <td>ARLINGTON</td>\n", " <td>MA</td>\n", " <td>2.47648e+07</td>\n", " <td>BIOGEN</td>\n", " <td>EPIDEMIOLOGIST</td>\n", " <td>7052020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>24931439</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7302020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>24931439</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7182020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>24931439</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7252020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>C00703975</td>\n", " <td>VANDER STOEP, STEPHEN</td>\n", " <td>BOSTON</td>\n", " <td>MA</td>\n", " <td>2.12925e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7172020</td>\n", " <td>100</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-07</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>C00703975</td>\n", " <td>THOMAS, DONNA</td>\n", " <td>MEMPHIS</td>\n", " <td>TN</td>\n", " <td>3.81155e+08</td>\n", " <td>PMC BIOGENIX INC.</td>\n", " <td>CUSTOMER SERVICE</td>\n", " <td>7172020</td>\n", " <td>20</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>TN-09</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>2.49314e+07</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7042020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " </tr>\n", " <tr>\n", " <th>12</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>2.49314e+07</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7112020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " </tr>\n", " <tr>\n", " <th>13</th>\n", " <td>C00703975</td>\n", " <td>LYKINS, JIM</td>\n", " <td>DUPONT</td>\n", " <td>WA</td>\n", " <td>9.83277e+08</td>\n", " <td>BIOGEN</td>\n", " <td>SALES</td>\n", " <td>7282020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>WA-10</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>C00703975</td>\n", " <td>MALDONADO, REBECCA</td>\n", " <td>SAN ANTONIO</td>\n", " <td>TX</td>\n", " <td>782491598</td>\n", " <td>BIOGEN</td>\n", " <td>SALES</td>\n", " <td>7032020</td>\n", " <td>150</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>TX-20</td>\n", " </tr>\n", " <tr>\n", " <th>15</th>\n", " <td>C00703975</td>\n", " <td>HOWE, MICHAEL</td>\n", " <td>CANTON</td>\n", " <td>MA</td>\n", " <td>2.02116e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7162020</td>\n", " <td>100</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-08</td>\n", " </tr>\n", " <tr>\n", " <th>16</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>2.49314e+07</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>6302020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " </tr>\n", " <tr>\n", " <th>17</th>\n", " <td>C00703975</td>\n", " <td>THOMAS, DONNA</td>\n", " <td>MEMPHIS</td>\n", " <td>TN</td>\n", " <td>3.81155e+08</td>\n", " <td>PMC BIOGENIX INC.</td>\n", " <td>CUSTOMER SERVICE</td>\n", " <td>6302020</td>\n", " <td>15</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>TN-09</td>\n", " </tr>\n", " <tr>\n", " <th>18</th>\n", " <td>C00727149</td>\n", " <td>GATES, CYNTHIA</td>\n", " <td>ROSLINDALE</td>\n", " <td>MA</td>\n", " <td>2131</td>\n", " <td>BIOGEN</td>\n", " <td>REGULATORY MEDICAL WRITER</td>\n", " <td>8052020</td>\n", " <td>100</td>\n", " <td>H0MA08045</td>\n", " <td>DEM</td>\n", " <td>230605</td>\n", " <td>MA-07</td>\n", " </tr>\n", " <tr>\n", " <th>19</th>\n", " <td>C00727149</td>\n", " <td>GATES, CYNTHIA</td>\n", " <td>ROSLINDALE</td>\n", " <td>MA</td>\n", " <td>02131</td>\n", " <td>BIOGEN</td>\n", " <td>REGULATORY MEDICAL WRITER</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " <td>H0MA08045</td>\n", " <td>DEM</td>\n", " <td>230605</td>\n", " <td>MA-07</td>\n", " </tr>\n", " <tr>\n", " <th>20</th>\n", " <td>C00745687</td>\n", " <td>GRIFFITH, LISA</td>\n", " <td>CAMBRIDGE</td>\n", " <td>MA</td>\n", " <td>021394369</td>\n", " <td>BIOGEN</td>\n", " <td>MARKETING</td>\n", " <td>7132020</td>\n", " <td>500</td>\n", " <td>H0MA04267</td>\n", " <td>DEM</td>\n", " <td>233009</td>\n", " <td>MA-05</td>\n", " </tr>\n", " <tr>\n", " <th>21</th>\n", " <td>C00196774</td>\n", " <td>LOVEDAY, KENNETH S.</td>\n", " <td>BROOKLINE</td>\n", " <td>MA</td>\n", " <td>024465827</td>\n", " <td>BIOGEN</td>\n", " <td>BIOLOGIST</td>\n", " <td>8042020</td>\n", " <td>250</td>\n", " <td>S4MA00028</td>\n", " <td>DEM</td>\n", " <td>222822</td>\n", " <td>MA-04</td>\n", " </tr>\n", " <tr>\n", " <th>22</th>\n", " <td>C00666149</td>\n", " <td>LOVEDAY, KENNETH S</td>\n", " <td>BROOKLINE</td>\n", " <td>MA</td>\n", " <td>024465827</td>\n", " <td>BIOGEN INC</td>\n", " <td>DIRECTOR</td>\n", " <td>6302020</td>\n", " <td>500</td>\n", " <td>H8NM02248</td>\n", " <td>DEM</td>\n", " <td>223821</td>\n", " <td>MA-04</td>\n", " </tr>\n", " <tr>\n", " <th>23</th>\n", " <td>C00500843</td>\n", " <td>FLANNELLY-KING, SHANE</td>\n", " <td>SOMERVILLE</td>\n", " <td>MA</td>\n", " <td>2.14421e+07</td>\n", " <td>BIOGEN IDEC</td>\n", " <td>BUSINESS ANALYST</td>\n", " <td>6302020</td>\n", " <td>250</td>\n", " <td>S2MA00170</td>\n", " <td>DEM</td>\n", " <td>222817</td>\n", " <td>MA-07</td>\n", " </tr>\n", " <tr>\n", " <th>24</th>\n", " <td>C00649376</td>\n", " <td>NEWLAND, BART G.</td>\n", " <td>BELMONT</td>\n", " <td>MA</td>\n", " <td>2.4784e+07</td>\n", " <td>BIOGEN INC</td>\n", " <td>ATTORNEY</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " <td>H8GA07201</td>\n", " <td>DEM</td>\n", " <td>224868</td>\n", " <td>MA-05</td>\n", " </tr>\n", " <tr>\n", " <th>25</th>\n", " <td>C00701599</td>\n", " <td>SEGAL, KATE</td>\n", " <td>BATTLE CREEK</td>\n", " <td>MI</td>\n", " <td>4.90159e+08</td>\n", " <td>BIOGEN</td>\n", " <td>GOVERNMENT AFFAIRS</td>\n", " <td>6302020</td>\n", " <td>500</td>\n", " <td>H0MI06152</td>\n", " <td>DEM</td>\n", " <td>227095</td>\n", " <td>MI-03</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CMTE_ID NAME ... LINKAGE_ID CD\n", "0 C00703975 MARX, ISAAC ... 227491 MA-05\n", "1 C00703975 EDMONDSON, FRAZOR ... 227491 MA-03\n", "2 C00703975 EDMONDSON, FRAZOR ... 227491 MA-03\n", "3 C00703975 EDMONDSON, FRAZOR ... 227491 MA-03\n", "4 C00703975 EDMONDSON, FRAZOR ... 227491 MA-03\n", "5 C00703975 DILLEY, ANNE ... 227491 MA-05\n", "6 C00703975 SMIRNAKIS, KAREN ... 227491 MA-05\n", "7 C00703975 SMIRNAKIS, KAREN ... 227491 MA-05\n", "8 C00703975 SMIRNAKIS, KAREN ... 227491 MA-05\n", "9 C00703975 VANDER STOEP, STEPHEN ... 227491 MA-07\n", "10 C00703975 THOMAS, DONNA ... 227491 TN-09\n", "11 C00703975 SMIRNAKIS, KAREN ... 227491 MA-05\n", "12 C00703975 SMIRNAKIS, KAREN ... 227491 MA-05\n", "13 C00703975 LYKINS, JIM ... 227491 WA-10\n", "14 C00703975 MALDONADO, REBECCA ... 227491 TX-20\n", "15 C00703975 HOWE, MICHAEL ... 227491 MA-08\n", "16 C00703975 SMIRNAKIS, KAREN ... 227491 MA-05\n", "17 C00703975 THOMAS, DONNA ... 227491 TN-09\n", "18 C00727149 GATES, CYNTHIA ... 230605 MA-07\n", "19 C00727149 GATES, CYNTHIA ... 230605 MA-07\n", "20 C00745687 GRIFFITH, LISA ... 233009 MA-05\n", "21 C00196774 LOVEDAY, KENNETH S. ... 222822 MA-04\n", "22 C00666149 LOVEDAY, KENNETH S ... 223821 MA-04\n", "23 C00500843 FLANNELLY-KING, SHANE ... 222817 MA-07\n", "24 C00649376 NEWLAND, BART G. ... 224868 MA-05\n", "25 C00701599 SEGAL, KATE ... 227095 MI-03\n", "\n", "[26 rows x 13 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 42 } ] }, { "cell_type": "code", "metadata": { "id": "Mw4bIeWOOoaE" }, "source": [ "trends = pd.read_excel(data_dir+'/CD_trends.xlsx')" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "VMqR9ED6OoOW", "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "outputId": "6b45fe0d-9ae7-49d0-e8fc-0febc8350755" }, "source": [ "trends.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CD</th>\n", " <th>Party</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>AK-AL</td>\n", " <td>(R)</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>AL-01</td>\n", " <td>(R)</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>AL-02</td>\n", " <td>(R)</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>AL-03</td>\n", " <td>(R)</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>AL-04</td>\n", " <td>(R)</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CD Party\n", "0 AK-AL (R)\n", "1 AL-01 (R)\n", "2 AL-02 (R)\n", "3 AL-03 (R)\n", "4 AL-04 (R)" ] }, "metadata": { "tags": [] }, "execution_count": 40 } ] }, { "cell_type": "code", "metadata": { "id": "qg5DNqhaGyzR", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "d37d3813-eaeb-4d10-b202-c12a21ec5f6e" }, "source": [ "inner_join = pd.merge(df7, \n", " trends, \n", " on ='CD', \n", " how ='inner') \n", "inner_join " ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CMTE_ID</th>\n", " <th>NAME</th>\n", " <th>CITY</th>\n", " <th>STATE</th>\n", " <th>ZIP_CODE</th>\n", " <th>EMPLOYER</th>\n", " <th>OCCUPATION</th>\n", " <th>TRANSACTION_DT</th>\n", " <th>TRANSACTION_AMT</th>\n", " <th>CAND_ID</th>\n", " <th>CAND_PTY_AFFILIATION</th>\n", " <th>LINKAGE_ID</th>\n", " <th>CD</th>\n", " <th>Party</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>C00703975</td>\n", " <td>MARX, ISAAC</td>\n", " <td>ARLINGTON</td>\n", " <td>MA</td>\n", " <td>2.47438e+07</td>\n", " <td>BIOGEN</td>\n", " <td>CHEMIST</td>\n", " <td>7182020</td>\n", " <td>250</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>C00703975</td>\n", " <td>DILLEY, ANNE</td>\n", " <td>ARLINGTON</td>\n", " <td>MA</td>\n", " <td>2.47648e+07</td>\n", " <td>BIOGEN</td>\n", " <td>EPIDEMIOLOGIST</td>\n", " <td>7052020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>24931439</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7302020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>24931439</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7182020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>24931439</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7252020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>2.49314e+07</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7042020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>2.49314e+07</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7112020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>2.49314e+07</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>6302020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>C00745687</td>\n", " <td>GRIFFITH, LISA</td>\n", " <td>CAMBRIDGE</td>\n", " <td>MA</td>\n", " <td>021394369</td>\n", " <td>BIOGEN</td>\n", " <td>MARKETING</td>\n", " <td>7132020</td>\n", " <td>500</td>\n", " <td>H0MA04267</td>\n", " <td>DEM</td>\n", " <td>233009</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>C00649376</td>\n", " <td>NEWLAND, BART G.</td>\n", " <td>BELMONT</td>\n", " <td>MA</td>\n", " <td>2.4784e+07</td>\n", " <td>BIOGEN INC</td>\n", " <td>ATTORNEY</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " <td>H8GA07201</td>\n", " <td>DEM</td>\n", " <td>224868</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7292020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-03</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7152020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-03</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>12</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7222020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-03</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>13</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7082020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-03</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>C00703975</td>\n", " <td>VANDER STOEP, STEPHEN</td>\n", " <td>BOSTON</td>\n", " <td>MA</td>\n", " <td>2.12925e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7172020</td>\n", " <td>100</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-07</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>15</th>\n", " <td>C00727149</td>\n", " <td>GATES, CYNTHIA</td>\n", " <td>ROSLINDALE</td>\n", " <td>MA</td>\n", " <td>2131</td>\n", " <td>BIOGEN</td>\n", " <td>REGULATORY MEDICAL WRITER</td>\n", " <td>8052020</td>\n", " <td>100</td>\n", " <td>H0MA08045</td>\n", " <td>DEM</td>\n", " <td>230605</td>\n", " <td>MA-07</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>16</th>\n", " <td>C00727149</td>\n", " <td>GATES, CYNTHIA</td>\n", " <td>ROSLINDALE</td>\n", " <td>MA</td>\n", " <td>02131</td>\n", " <td>BIOGEN</td>\n", " <td>REGULATORY MEDICAL WRITER</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " <td>H0MA08045</td>\n", " <td>DEM</td>\n", " <td>230605</td>\n", " <td>MA-07</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>17</th>\n", " <td>C00500843</td>\n", " <td>FLANNELLY-KING, SHANE</td>\n", " <td>SOMERVILLE</td>\n", " <td>MA</td>\n", " <td>2.14421e+07</td>\n", " <td>BIOGEN IDEC</td>\n", " <td>BUSINESS ANALYST</td>\n", " <td>6302020</td>\n", " <td>250</td>\n", " <td>S2MA00170</td>\n", " <td>DEM</td>\n", " <td>222817</td>\n", " <td>MA-07</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>18</th>\n", " <td>C00703975</td>\n", " <td>THOMAS, DONNA</td>\n", " <td>MEMPHIS</td>\n", " <td>TN</td>\n", " <td>3.81155e+08</td>\n", " <td>PMC BIOGENIX INC.</td>\n", " <td>CUSTOMER SERVICE</td>\n", " <td>7172020</td>\n", " <td>20</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>TN-09</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>19</th>\n", " <td>C00703975</td>\n", " <td>THOMAS, DONNA</td>\n", " <td>MEMPHIS</td>\n", " <td>TN</td>\n", " <td>3.81155e+08</td>\n", " <td>PMC BIOGENIX INC.</td>\n", " <td>CUSTOMER SERVICE</td>\n", " <td>6302020</td>\n", " <td>15</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>TN-09</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>20</th>\n", " <td>C00703975</td>\n", " <td>LYKINS, JIM</td>\n", " <td>DUPONT</td>\n", " <td>WA</td>\n", " <td>9.83277e+08</td>\n", " <td>BIOGEN</td>\n", " <td>SALES</td>\n", " <td>7282020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>WA-10</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>21</th>\n", " <td>C00703975</td>\n", " <td>MALDONADO, REBECCA</td>\n", " <td>SAN ANTONIO</td>\n", " <td>TX</td>\n", " <td>782491598</td>\n", " <td>BIOGEN</td>\n", " <td>SALES</td>\n", " <td>7032020</td>\n", " <td>150</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>TX-20</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>22</th>\n", " <td>C00703975</td>\n", " <td>HOWE, MICHAEL</td>\n", " <td>CANTON</td>\n", " <td>MA</td>\n", " <td>2.02116e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7162020</td>\n", " <td>100</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-08</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>23</th>\n", " <td>C00196774</td>\n", " <td>LOVEDAY, KENNETH S.</td>\n", " <td>BROOKLINE</td>\n", " <td>MA</td>\n", " <td>024465827</td>\n", " <td>BIOGEN</td>\n", " <td>BIOLOGIST</td>\n", " <td>8042020</td>\n", " <td>250</td>\n", " <td>S4MA00028</td>\n", " <td>DEM</td>\n", " <td>222822</td>\n", " <td>MA-04</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>24</th>\n", " <td>C00666149</td>\n", " <td>LOVEDAY, KENNETH S</td>\n", " <td>BROOKLINE</td>\n", " <td>MA</td>\n", " <td>024465827</td>\n", " <td>BIOGEN INC</td>\n", " <td>DIRECTOR</td>\n", " <td>6302020</td>\n", " <td>500</td>\n", " <td>H8NM02248</td>\n", " <td>DEM</td>\n", " <td>223821</td>\n", " <td>MA-04</td>\n", " <td>(D)</td>\n", " </tr>\n", " <tr>\n", " <th>25</th>\n", " <td>C00701599</td>\n", " <td>SEGAL, KATE</td>\n", " <td>BATTLE CREEK</td>\n", " <td>MI</td>\n", " <td>4.90159e+08</td>\n", " <td>BIOGEN</td>\n", " <td>GOVERNMENT AFFAIRS</td>\n", " <td>6302020</td>\n", " <td>500</td>\n", " <td>H0MI06152</td>\n", " <td>DEM</td>\n", " <td>227095</td>\n", " <td>MI-03</td>\n", " <td>(L)</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CMTE_ID NAME CITY ... LINKAGE_ID CD Party\n", "0 C00703975 MARX, ISAAC ARLINGTON ... 227491 MA-05 (D)\n", "1 C00703975 DILLEY, ANNE ARLINGTON ... 227491 MA-05 (D)\n", "2 C00703975 SMIRNAKIS, KAREN WESTON ... 227491 MA-05 (D)\n", "3 C00703975 SMIRNAKIS, KAREN WESTON ... 227491 MA-05 (D)\n", "4 C00703975 SMIRNAKIS, KAREN WESTON ... 227491 MA-05 (D)\n", "5 C00703975 SMIRNAKIS, KAREN WESTON ... 227491 MA-05 (D)\n", "6 C00703975 SMIRNAKIS, KAREN WESTON ... 227491 MA-05 (D)\n", "7 C00703975 SMIRNAKIS, KAREN WESTON ... 227491 MA-05 (D)\n", "8 C00745687 GRIFFITH, LISA CAMBRIDGE ... 233009 MA-05 (D)\n", "9 C00649376 NEWLAND, BART G. BELMONT ... 224868 MA-05 (D)\n", "10 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... 227491 MA-03 (D)\n", "11 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... 227491 MA-03 (D)\n", "12 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... 227491 MA-03 (D)\n", "13 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... 227491 MA-03 (D)\n", "14 C00703975 VANDER STOEP, STEPHEN BOSTON ... 227491 MA-07 (D)\n", "15 C00727149 GATES, CYNTHIA ROSLINDALE ... 230605 MA-07 (D)\n", "16 C00727149 GATES, CYNTHIA ROSLINDALE ... 230605 MA-07 (D)\n", "17 C00500843 FLANNELLY-KING, SHANE SOMERVILLE ... 222817 MA-07 (D)\n", "18 C00703975 THOMAS, DONNA MEMPHIS ... 227491 TN-09 (D)\n", "19 C00703975 THOMAS, DONNA MEMPHIS ... 227491 TN-09 (D)\n", "20 C00703975 LYKINS, JIM DUPONT ... 227491 WA-10 (D)\n", "21 C00703975 MALDONADO, REBECCA SAN ANTONIO ... 227491 TX-20 (D)\n", "22 C00703975 HOWE, MICHAEL CANTON ... 227491 MA-08 (D)\n", "23 C00196774 LOVEDAY, KENNETH S. BROOKLINE ... 222822 MA-04 (D)\n", "24 C00666149 LOVEDAY, KENNETH S BROOKLINE ... 223821 MA-04 (D)\n", "25 C00701599 SEGAL, KATE BATTLE CREEK ... 227095 MI-03 (L)\n", "\n", "[26 rows x 14 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 44 } ] }, { "cell_type": "code", "metadata": { "id": "a2v3lRkYTjag" }, "source": [ "inner_join['INDEX']= [1 if x =='DEM' else 0 for x in inner_join['CAND_PTY_AFFILIATION']] \n", " " ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Objszz9NSHe1", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "a33bc488-4ac4-42af-c4a1-bfeeda4caad7" }, "source": [ "inner_join" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CMTE_ID</th>\n", " <th>NAME</th>\n", " <th>CITY</th>\n", " <th>STATE</th>\n", " <th>ZIP_CODE</th>\n", " <th>EMPLOYER</th>\n", " <th>OCCUPATION</th>\n", " <th>TRANSACTION_DT</th>\n", " <th>TRANSACTION_AMT</th>\n", " <th>CAND_ID</th>\n", " <th>CAND_PTY_AFFILIATION</th>\n", " <th>LINKAGE_ID</th>\n", " <th>CD</th>\n", " <th>Party</th>\n", " <th>INDEX</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>C00703975</td>\n", " <td>MARX, ISAAC</td>\n", " <td>ARLINGTON</td>\n", " <td>MA</td>\n", " <td>2.47438e+07</td>\n", " <td>BIOGEN</td>\n", " <td>CHEMIST</td>\n", " <td>7182020</td>\n", " <td>250</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>C00703975</td>\n", " <td>DILLEY, ANNE</td>\n", " <td>ARLINGTON</td>\n", " <td>MA</td>\n", " <td>2.47648e+07</td>\n", " <td>BIOGEN</td>\n", " <td>EPIDEMIOLOGIST</td>\n", " <td>7052020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>24931439</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7302020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>24931439</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7182020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>24931439</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7252020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>2.49314e+07</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7042020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>2.49314e+07</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7112020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>2.49314e+07</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>6302020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>C00745687</td>\n", " <td>GRIFFITH, LISA</td>\n", " <td>CAMBRIDGE</td>\n", " <td>MA</td>\n", " <td>021394369</td>\n", " <td>BIOGEN</td>\n", " <td>MARKETING</td>\n", " <td>7132020</td>\n", " <td>500</td>\n", " <td>H0MA04267</td>\n", " <td>DEM</td>\n", " <td>233009</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>C00649376</td>\n", " <td>NEWLAND, BART G.</td>\n", " <td>BELMONT</td>\n", " <td>MA</td>\n", " <td>2.4784e+07</td>\n", " <td>BIOGEN INC</td>\n", " <td>ATTORNEY</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " <td>H8GA07201</td>\n", " <td>DEM</td>\n", " <td>224868</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7292020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-03</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7152020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-03</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>12</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7222020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-03</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>13</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7082020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-03</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>C00703975</td>\n", " <td>VANDER STOEP, STEPHEN</td>\n", " <td>BOSTON</td>\n", " <td>MA</td>\n", " <td>2.12925e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7172020</td>\n", " <td>100</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-07</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>15</th>\n", " <td>C00727149</td>\n", " <td>GATES, CYNTHIA</td>\n", " <td>ROSLINDALE</td>\n", " <td>MA</td>\n", " <td>2131</td>\n", " <td>BIOGEN</td>\n", " <td>REGULATORY MEDICAL WRITER</td>\n", " <td>8052020</td>\n", " <td>100</td>\n", " <td>H0MA08045</td>\n", " <td>DEM</td>\n", " <td>230605</td>\n", " <td>MA-07</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>16</th>\n", " <td>C00727149</td>\n", " <td>GATES, CYNTHIA</td>\n", " <td>ROSLINDALE</td>\n", " <td>MA</td>\n", " <td>02131</td>\n", " <td>BIOGEN</td>\n", " <td>REGULATORY MEDICAL WRITER</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " <td>H0MA08045</td>\n", " <td>DEM</td>\n", " <td>230605</td>\n", " <td>MA-07</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>17</th>\n", " <td>C00500843</td>\n", " <td>FLANNELLY-KING, SHANE</td>\n", " <td>SOMERVILLE</td>\n", " <td>MA</td>\n", " <td>2.14421e+07</td>\n", " <td>BIOGEN IDEC</td>\n", " <td>BUSINESS ANALYST</td>\n", " <td>6302020</td>\n", " <td>250</td>\n", " <td>S2MA00170</td>\n", " <td>DEM</td>\n", " <td>222817</td>\n", " <td>MA-07</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>18</th>\n", " <td>C00703975</td>\n", " <td>THOMAS, DONNA</td>\n", " <td>MEMPHIS</td>\n", " <td>TN</td>\n", " <td>3.81155e+08</td>\n", " <td>PMC BIOGENIX INC.</td>\n", " <td>CUSTOMER SERVICE</td>\n", " <td>7172020</td>\n", " <td>20</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>TN-09</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>19</th>\n", " <td>C00703975</td>\n", " <td>THOMAS, DONNA</td>\n", " <td>MEMPHIS</td>\n", " <td>TN</td>\n", " <td>3.81155e+08</td>\n", " <td>PMC BIOGENIX INC.</td>\n", " <td>CUSTOMER SERVICE</td>\n", " <td>6302020</td>\n", " <td>15</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>TN-09</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>20</th>\n", " <td>C00703975</td>\n", " <td>LYKINS, JIM</td>\n", " <td>DUPONT</td>\n", " <td>WA</td>\n", " <td>9.83277e+08</td>\n", " <td>BIOGEN</td>\n", " <td>SALES</td>\n", " <td>7282020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>WA-10</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>21</th>\n", " <td>C00703975</td>\n", " <td>MALDONADO, REBECCA</td>\n", " <td>SAN ANTONIO</td>\n", " <td>TX</td>\n", " <td>782491598</td>\n", " <td>BIOGEN</td>\n", " <td>SALES</td>\n", " <td>7032020</td>\n", " <td>150</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>TX-20</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>22</th>\n", " <td>C00703975</td>\n", " <td>HOWE, MICHAEL</td>\n", " <td>CANTON</td>\n", " <td>MA</td>\n", " <td>2.02116e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7162020</td>\n", " <td>100</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-08</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>23</th>\n", " <td>C00196774</td>\n", " <td>LOVEDAY, KENNETH S.</td>\n", " <td>BROOKLINE</td>\n", " <td>MA</td>\n", " <td>024465827</td>\n", " <td>BIOGEN</td>\n", " <td>BIOLOGIST</td>\n", " <td>8042020</td>\n", " <td>250</td>\n", " <td>S4MA00028</td>\n", " <td>DEM</td>\n", " <td>222822</td>\n", " <td>MA-04</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>24</th>\n", " <td>C00666149</td>\n", " <td>LOVEDAY, KENNETH S</td>\n", " <td>BROOKLINE</td>\n", " <td>MA</td>\n", " <td>024465827</td>\n", " <td>BIOGEN INC</td>\n", " <td>DIRECTOR</td>\n", " <td>6302020</td>\n", " <td>500</td>\n", " <td>H8NM02248</td>\n", " <td>DEM</td>\n", " <td>223821</td>\n", " <td>MA-04</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>25</th>\n", " <td>C00701599</td>\n", " <td>SEGAL, KATE</td>\n", " <td>BATTLE CREEK</td>\n", " <td>MI</td>\n", " <td>4.90159e+08</td>\n", " <td>BIOGEN</td>\n", " <td>GOVERNMENT AFFAIRS</td>\n", " <td>6302020</td>\n", " <td>500</td>\n", " <td>H0MI06152</td>\n", " <td>DEM</td>\n", " <td>227095</td>\n", " <td>MI-03</td>\n", " <td>(L)</td>\n", " <td>1</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CMTE_ID NAME CITY ... CD Party INDEX\n", "0 C00703975 MARX, ISAAC ARLINGTON ... MA-05 (D) 1\n", "1 C00703975 DILLEY, ANNE ARLINGTON ... MA-05 (D) 1\n", "2 C00703975 SMIRNAKIS, KAREN WESTON ... MA-05 (D) 1\n", "3 C00703975 SMIRNAKIS, KAREN WESTON ... MA-05 (D) 1\n", "4 C00703975 SMIRNAKIS, KAREN WESTON ... MA-05 (D) 1\n", "5 C00703975 SMIRNAKIS, KAREN WESTON ... MA-05 (D) 1\n", "6 C00703975 SMIRNAKIS, KAREN WESTON ... MA-05 (D) 1\n", "7 C00703975 SMIRNAKIS, KAREN WESTON ... MA-05 (D) 1\n", "8 C00745687 GRIFFITH, LISA CAMBRIDGE ... MA-05 (D) 1\n", "9 C00649376 NEWLAND, BART G. BELMONT ... MA-05 (D) 1\n", "10 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... MA-03 (D) 1\n", "11 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... MA-03 (D) 1\n", "12 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... MA-03 (D) 1\n", "13 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... MA-03 (D) 1\n", "14 C00703975 VANDER STOEP, STEPHEN BOSTON ... MA-07 (D) 1\n", "15 C00727149 GATES, CYNTHIA ROSLINDALE ... MA-07 (D) 1\n", "16 C00727149 GATES, CYNTHIA ROSLINDALE ... MA-07 (D) 1\n", "17 C00500843 FLANNELLY-KING, SHANE SOMERVILLE ... MA-07 (D) 1\n", "18 C00703975 THOMAS, DONNA MEMPHIS ... TN-09 (D) 1\n", "19 C00703975 THOMAS, DONNA MEMPHIS ... TN-09 (D) 1\n", "20 C00703975 LYKINS, JIM DUPONT ... WA-10 (D) 1\n", "21 C00703975 MALDONADO, REBECCA SAN ANTONIO ... TX-20 (D) 1\n", "22 C00703975 HOWE, MICHAEL CANTON ... MA-08 (D) 1\n", "23 C00196774 LOVEDAY, KENNETH S. BROOKLINE ... MA-04 (D) 1\n", "24 C00666149 LOVEDAY, KENNETH S BROOKLINE ... MA-04 (D) 1\n", "25 C00701599 SEGAL, KATE BATTLE CREEK ... MI-03 (L) 1\n", "\n", "[26 rows x 15 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 47 } ] }, { "cell_type": "code", "metadata": { "id": "XL7glHAoSHM3" }, "source": [ "inner_join['INDEX_BOSS'] = 1" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "u2BIIZl3RYg1", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "a0f10c04-3eb4-4294-a054-bb8f4275471e" }, "source": [ "inner_join" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CMTE_ID</th>\n", " <th>NAME</th>\n", " <th>CITY</th>\n", " <th>STATE</th>\n", " <th>ZIP_CODE</th>\n", " <th>EMPLOYER</th>\n", " <th>OCCUPATION</th>\n", " <th>TRANSACTION_DT</th>\n", " <th>TRANSACTION_AMT</th>\n", " <th>CAND_ID</th>\n", " <th>CAND_PTY_AFFILIATION</th>\n", " <th>LINKAGE_ID</th>\n", " <th>CD</th>\n", " <th>Party</th>\n", " <th>INDEX</th>\n", " <th>INDEX_BOSS</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>C00703975</td>\n", " <td>MARX, ISAAC</td>\n", " <td>ARLINGTON</td>\n", " <td>MA</td>\n", " <td>2.47438e+07</td>\n", " <td>BIOGEN</td>\n", " <td>CHEMIST</td>\n", " <td>7182020</td>\n", " <td>250</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>C00703975</td>\n", " <td>DILLEY, ANNE</td>\n", " <td>ARLINGTON</td>\n", " <td>MA</td>\n", " <td>2.47648e+07</td>\n", " <td>BIOGEN</td>\n", " <td>EPIDEMIOLOGIST</td>\n", " <td>7052020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>24931439</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7302020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>24931439</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7182020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>24931439</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7252020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>2.49314e+07</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7042020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>2.49314e+07</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>7112020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>C00703975</td>\n", " <td>SMIRNAKIS, KAREN</td>\n", " <td>WESTON</td>\n", " <td>MA</td>\n", " <td>2.49314e+07</td>\n", " <td>BIOGEN</td>\n", " <td>VP HEAD OF GLOBAL MEDICAL SAFETY</td>\n", " <td>6302020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>C00745687</td>\n", " <td>GRIFFITH, LISA</td>\n", " <td>CAMBRIDGE</td>\n", " <td>MA</td>\n", " <td>021394369</td>\n", " <td>BIOGEN</td>\n", " <td>MARKETING</td>\n", " <td>7132020</td>\n", " <td>500</td>\n", " <td>H0MA04267</td>\n", " <td>DEM</td>\n", " <td>233009</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>C00649376</td>\n", " <td>NEWLAND, BART G.</td>\n", " <td>BELMONT</td>\n", " <td>MA</td>\n", " <td>2.4784e+07</td>\n", " <td>BIOGEN INC</td>\n", " <td>ATTORNEY</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " <td>H8GA07201</td>\n", " <td>DEM</td>\n", " <td>224868</td>\n", " <td>MA-05</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7292020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-03</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7152020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-03</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>12</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7222020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-03</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>13</th>\n", " <td>C00703975</td>\n", " <td>EDMONDSON, FRAZOR</td>\n", " <td>MARLBOROUGH</td>\n", " <td>MA</td>\n", " <td>1.75267e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7082020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-03</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>C00703975</td>\n", " <td>VANDER STOEP, STEPHEN</td>\n", " <td>BOSTON</td>\n", " <td>MA</td>\n", " <td>2.12925e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7172020</td>\n", " <td>100</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-07</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>15</th>\n", " <td>C00727149</td>\n", " <td>GATES, CYNTHIA</td>\n", " <td>ROSLINDALE</td>\n", " <td>MA</td>\n", " <td>2131</td>\n", " <td>BIOGEN</td>\n", " <td>REGULATORY MEDICAL WRITER</td>\n", " <td>8052020</td>\n", " <td>100</td>\n", " <td>H0MA08045</td>\n", " <td>DEM</td>\n", " <td>230605</td>\n", " <td>MA-07</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>16</th>\n", " <td>C00727149</td>\n", " <td>GATES, CYNTHIA</td>\n", " <td>ROSLINDALE</td>\n", " <td>MA</td>\n", " <td>02131</td>\n", " <td>BIOGEN</td>\n", " <td>REGULATORY MEDICAL WRITER</td>\n", " <td>6302020</td>\n", " <td>100</td>\n", " <td>H0MA08045</td>\n", " <td>DEM</td>\n", " <td>230605</td>\n", " <td>MA-07</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>17</th>\n", " <td>C00500843</td>\n", " <td>FLANNELLY-KING, SHANE</td>\n", " <td>SOMERVILLE</td>\n", " <td>MA</td>\n", " <td>2.14421e+07</td>\n", " <td>BIOGEN IDEC</td>\n", " <td>BUSINESS ANALYST</td>\n", " <td>6302020</td>\n", " <td>250</td>\n", " <td>S2MA00170</td>\n", " <td>DEM</td>\n", " <td>222817</td>\n", " <td>MA-07</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>18</th>\n", " <td>C00703975</td>\n", " <td>THOMAS, DONNA</td>\n", " <td>MEMPHIS</td>\n", " <td>TN</td>\n", " <td>3.81155e+08</td>\n", " <td>PMC BIOGENIX INC.</td>\n", " <td>CUSTOMER SERVICE</td>\n", " <td>7172020</td>\n", " <td>20</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>TN-09</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>19</th>\n", " <td>C00703975</td>\n", " <td>THOMAS, DONNA</td>\n", " <td>MEMPHIS</td>\n", " <td>TN</td>\n", " <td>3.81155e+08</td>\n", " <td>PMC BIOGENIX INC.</td>\n", " <td>CUSTOMER SERVICE</td>\n", " <td>6302020</td>\n", " <td>15</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>TN-09</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>20</th>\n", " <td>C00703975</td>\n", " <td>LYKINS, JIM</td>\n", " <td>DUPONT</td>\n", " <td>WA</td>\n", " <td>9.83277e+08</td>\n", " <td>BIOGEN</td>\n", " <td>SALES</td>\n", " <td>7282020</td>\n", " <td>50</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>WA-10</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>21</th>\n", " <td>C00703975</td>\n", " <td>MALDONADO, REBECCA</td>\n", " <td>SAN ANTONIO</td>\n", " <td>TX</td>\n", " <td>782491598</td>\n", " <td>BIOGEN</td>\n", " <td>SALES</td>\n", " <td>7032020</td>\n", " <td>150</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>TX-20</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>22</th>\n", " <td>C00703975</td>\n", " <td>HOWE, MICHAEL</td>\n", " <td>CANTON</td>\n", " <td>MA</td>\n", " <td>2.02116e+07</td>\n", " <td>BIOGEN</td>\n", " <td>ATTORNEY</td>\n", " <td>7162020</td>\n", " <td>100</td>\n", " <td>P80000722</td>\n", " <td>DEM</td>\n", " <td>227491</td>\n", " <td>MA-08</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>23</th>\n", " <td>C00196774</td>\n", " <td>LOVEDAY, KENNETH S.</td>\n", " <td>BROOKLINE</td>\n", " <td>MA</td>\n", " <td>024465827</td>\n", " <td>BIOGEN</td>\n", " <td>BIOLOGIST</td>\n", " <td>8042020</td>\n", " <td>250</td>\n", " <td>S4MA00028</td>\n", " <td>DEM</td>\n", " <td>222822</td>\n", " <td>MA-04</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>24</th>\n", " <td>C00666149</td>\n", " <td>LOVEDAY, KENNETH S</td>\n", " <td>BROOKLINE</td>\n", " <td>MA</td>\n", " <td>024465827</td>\n", " <td>BIOGEN INC</td>\n", " <td>DIRECTOR</td>\n", " <td>6302020</td>\n", " <td>500</td>\n", " <td>H8NM02248</td>\n", " <td>DEM</td>\n", " <td>223821</td>\n", " <td>MA-04</td>\n", " <td>(D)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>25</th>\n", " <td>C00701599</td>\n", " <td>SEGAL, KATE</td>\n", " <td>BATTLE CREEK</td>\n", " <td>MI</td>\n", " <td>4.90159e+08</td>\n", " <td>BIOGEN</td>\n", " <td>GOVERNMENT AFFAIRS</td>\n", " <td>6302020</td>\n", " <td>500</td>\n", " <td>H0MI06152</td>\n", " <td>DEM</td>\n", " <td>227095</td>\n", " <td>MI-03</td>\n", " <td>(L)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CMTE_ID NAME CITY ... Party INDEX INDEX_BOSS\n", "0 C00703975 MARX, ISAAC ARLINGTON ... (D) 1 1\n", "1 C00703975 DILLEY, ANNE ARLINGTON ... (D) 1 1\n", "2 C00703975 SMIRNAKIS, KAREN WESTON ... (D) 1 1\n", "3 C00703975 SMIRNAKIS, KAREN WESTON ... (D) 1 1\n", "4 C00703975 SMIRNAKIS, KAREN WESTON ... (D) 1 1\n", "5 C00703975 SMIRNAKIS, KAREN WESTON ... (D) 1 1\n", "6 C00703975 SMIRNAKIS, KAREN WESTON ... (D) 1 1\n", "7 C00703975 SMIRNAKIS, KAREN WESTON ... (D) 1 1\n", "8 C00745687 GRIFFITH, LISA CAMBRIDGE ... (D) 1 1\n", "9 C00649376 NEWLAND, BART G. BELMONT ... (D) 1 1\n", "10 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... (D) 1 1\n", "11 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... (D) 1 1\n", "12 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... (D) 1 1\n", "13 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... (D) 1 1\n", "14 C00703975 VANDER STOEP, STEPHEN BOSTON ... (D) 1 1\n", "15 C00727149 GATES, CYNTHIA ROSLINDALE ... (D) 1 1\n", "16 C00727149 GATES, CYNTHIA ROSLINDALE ... (D) 1 1\n", "17 C00500843 FLANNELLY-KING, SHANE SOMERVILLE ... (D) 1 1\n", "18 C00703975 THOMAS, DONNA MEMPHIS ... (D) 1 1\n", "19 C00703975 THOMAS, DONNA MEMPHIS ... (D) 1 1\n", "20 C00703975 LYKINS, JIM DUPONT ... (D) 1 1\n", "21 C00703975 MALDONADO, REBECCA SAN ANTONIO ... (D) 1 1\n", "22 C00703975 HOWE, MICHAEL CANTON ... (D) 1 1\n", "23 C00196774 LOVEDAY, KENNETH S. BROOKLINE ... (D) 1 1\n", "24 C00666149 LOVEDAY, KENNETH S BROOKLINE ... (D) 1 1\n", "25 C00701599 SEGAL, KATE BATTLE CREEK ... (L) 1 1\n", "\n", "[26 rows x 16 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 49 } ] }, { "cell_type": "code", "metadata": { "id": "rW1ZZvZwYZ2s" }, "source": [ "subset2 = inner_join[['INDEX','INDEX_BOSS']]" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "M7vLCa_-dmhg" }, "source": [ "from sklearn.linear_model import LinearRegression" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "apb76xJfYP-w", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "bd90bb55-85ee-4d15-b585-3c3b1406d16c" }, "source": [ "linear_regressor = LinearRegression()\n", "from sklearn.preprocessing import MinMaxScaler\n", "scaler1 = MinMaxScaler()\n", "scaler1.fit(subset2)\n", "inner_join_scaled=scaler1.transform(subset2)\n", "\n", "x = inner_join_scaled[:,0].reshape(-1,1)\n", "y = inner_join_scaled[:,1].reshape(-1,1)\n", "\n", "linear_regressor.fit(x, y)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)" ] }, "metadata": { "tags": [] }, "execution_count": 55 } ] }, { "cell_type": "code", "metadata": { "id": "Q33krPq74eVs" }, "source": [ "" ], "execution_count": null, "outputs": [] } ] }