{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Week3_Assignment.ipynb", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "code", "metadata": { "id": "bs5dRVOjZ6pT", "colab": { "base_uri": "https://localhost:8080/", "height": 221 }, "outputId": "2801a080-a77a-4e62-d58c-732d318416b3" }, "source": [ "from google.colab import drive\n", "drive.mount('/data/')\n", "data_dir = '/data/My Drive/Colab Notebooks/FEC dataset'\n", "!ls '/data/My Drive/Colab Notebooks/FEC dataset'\n", "!pip install matplotlib" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Mounted at /data/\n", "ccl20.zip\t cm_header_file.csv indiv_header_file.csv\n", "ccl_header_file.csv cn20.zip\t\t pas220.zip\n", "CD_trends.xlsx\t cn_header_file.csv pas2_header_file.csv\n", "cm20.zip\t indiv20.zip\n", "Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (3.2.2)\n", "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (2.4.7)\n", "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (2.8.1)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (0.10.0)\n", "Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (1.18.5)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (1.2.0)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.1->matplotlib) (1.15.0)\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "uivLBlKyuC2V" }, "source": [ "import zipfile\n", "zip = zipfile.ZipFile(data_dir+'/indiv20.zip')\n", "#zip.namelist()" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "m-0PQq0Oufje", "colab": { "base_uri": "https://localhost:8080/", "height": 428 }, "outputId": "b79a482b-6444-49f8-f88a-d79975a0442c" }, "source": [ "import pandas as pd\n", "header = pd.read_csv(data_dir+'/indiv_header_file.csv')\n", "\n", "data=pd.read_csv(zip.open('by_date/itcont_2020_20200630_20300630.txt'), sep='|', names=header.columns)\n", "data.head()" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py:2718: DtypeWarning: Columns (10,16,18,19) have mixed types.Specify dtype option on import or set low_memory=False.\n", " interactivity=interactivity, compiler=compiler, result=result)\n" ], "name": "stderr" }, { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CMTE_IDAMNDT_INDRPT_TPTRANSACTION_PGIIMAGE_NUMTRANSACTION_TPENTITY_TPNAMECITYSTATEZIP_CODEEMPLOYEROCCUPATIONTRANSACTION_DTTRANSACTION_AMTOTHER_IDTRAN_IDFILE_NUMMEMO_CDMEMO_TEXTSUB_ID
0C00363317AYEP202020200419921974328015EINDLITTLE, WILLIAMNEW YORKNY1.0128e+08NOT EMPLOYEDNOT EMPLOYED12162020500NaN40171591402014NaN* EARMARKED CONTRIBUTION: SEE BELOW4042120201737536230
1C00723122AYEP202020200715924497979915EINDSTOWE, BARBARARESTONVA2.01942e+08NOT EMPLOYEDNOT EMPLOYED12282020100C0019343347534831423440NaN* EARMARKED CONTRIBUTION: SEE BELOW4072620201794577716
2C00290825AYEP202020200415921689281615EINDMEHIEL, KARENNEW YORKNY1.01281e+08KAMPACK, INC.EXECUTIVE121820202800C0040122439653751398991NaN* EARMARKED CONTRIBUTION: SEE BELOW4050620201741858091
3C00363317AM12P202020200419921974298215EINDLITTLE, WILLIAMNEW YORKNY1.0128e+08NOT EMPLOYEDNOT EMPLOYED10302020500C0040122440171731401993NaN* EARMARKED CONTRIBUTION: SEE BELOW4042120201737536220
4C00589309AYEP20200220918717138515EINDDAVIDSON, GREGREDONDO BEACHCA9.02782e+08NORTHROP GRUMMANAEROSPACE MANAGER12312020100C00401224VVBX0QHNGR61385228NaN* EARMARKED CONTRIBUTION: SEE BELOW4022920201700018835
\n", "
" ], "text/plain": [ " CMTE_ID ... SUB_ID\n", "0 C00363317 ... 4042120201737536230\n", "1 C00723122 ... 4072620201794577716\n", "2 C00290825 ... 4050620201741858091\n", "3 C00363317 ... 4042120201737536220\n", "4 C00589309 ... 4022920201700018835\n", "\n", "[5 rows x 21 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 3 } ] }, { "cell_type": "code", "metadata": { "id": "6PB4UgTa1Bih", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "5339e7e6-3336-494c-edb9-cfd45fa8cee4" }, "source": [ "print(data['TRANSACTION_AMT'].max())" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "10000000\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "owm7xZS11HEB", "colab": { "base_uri": "https://localhost:8080/", "height": 326 }, "outputId": "6769a41a-fe29-4f5b-acab-71ab9a292fd8" }, "source": [ "sort_amt = data.sort_values(by='TRANSACTION_AMT', ascending=False)\n", "sort_amt.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CMTE_IDAMNDT_INDRPT_TPTRANSACTION_PGIIMAGE_NUMTRANSACTION_TPENTITY_TPNAMECITYSTATEZIP_CODEEMPLOYEROCCUPATIONTRANSACTION_DTTRANSACTION_AMTOTHER_IDTRAN_IDFILE_NUMMEMO_CDMEMO_TEXTSUB_ID
990582C00571703NM8P20200820926685191310INDMELLON, TIMOTHYSARATOGAWY823311500SELF-EMPLOYEDINVESTMENTS709202010000000NaNSA11A.154461434706NaNNaN4090120201833903380
990568C00571703NM8P20200820926685190810INDSCHWARZMAN, STEPHEN A.NEW YORKNY101543302BLACKSTONECHAIRMAN & CEO701202010000000NaNSA11A.154111434706NaNNaN4090120201833903366
469388C00637512NM8P20200820926641369310ORGAMERICA FIRST POLICIES, INC.ARLINGTONVA22202NaNNaN720202010000000NaNSA11AI.1655801434640NaNNaN4082920201831236982
1151552C00484642NM7P20200720926016463110ORGMAJORITY FORWARDWASHINGTONDC200055998NaNNaN63020208000000NaN19733141427419NaNNaN4072920201808862242
1351670C00747246NQ2P20200715924509555515ORGSIXTEEN THIRTY FUNDWASHINGTONDC200362605NaNNaN63020205700000NaN122954631423930NaNNaN4071720201791015689
\n", "
" ], "text/plain": [ " CMTE_ID AMNDT_IND RPT_TP ... MEMO_CD MEMO_TEXT SUB_ID\n", "990582 C00571703 N M8 ... NaN NaN 4090120201833903380\n", "990568 C00571703 N M8 ... NaN NaN 4090120201833903366\n", "469388 C00637512 N M8 ... NaN NaN 4082920201831236982\n", "1151552 C00484642 N M7 ... NaN NaN 4072920201808862242\n", "1351670 C00747246 N Q2 ... NaN NaN 4071720201791015689\n", "\n", "[5 rows x 21 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 5 } ] }, { "cell_type": "code", "metadata": { "id": "WuJLWjlA29OT" }, "source": [ "df = pd.DataFrame(data, columns=['CMTE_ID', 'NAME', 'CITY', 'STATE', 'ZIP_CODE', 'EMPLOYER', 'OCCUPATION', 'TRANSACTION_DT', 'TRANSACTION_AMT'])" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "I9CI-mdn29Fq", "colab": { "base_uri": "https://localhost:8080/", "height": 238 }, "outputId": "5c059bc9-6a43-4365-db85-a24049a7568b" }, "source": [ "from zipfile import ZipFile\n", "import pandas as pd\n", "header = pd.read_csv(data_dir+'/cn_header_file.csv')\n", "\n", "with ZipFile(data_dir+'/cn20.zip') as zip:\n", " candidates = pd.read_csv(zip.open('cn.txt'), sep='|', names=header.columns)\n", "candidates.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CAND_IDCAND_NAMECAND_PTY_AFFILIATIONCAND_ELECTION_YRCAND_OFFICE_STCAND_OFFICECAND_OFFICE_DISTRICTCAND_ICICAND_STATUSCAND_PCCCAND_ST1CAND_ST2CAND_CITYCAND_STCAND_ZIP
0H0AK00105LAMB, THOMASNNE2020AKH0.0CNC006075151861 W LAKE LUCILLE DRNaNWASILLAAK99654.0
1H0AK00113TUGATUK, RAY SEANDEM2020AKH0.0CNNaNPO BOX 172NaNMANAKOTAKAK99628.0
2H0AK01046CATALANO, THOMASOTH2020AKH0.0NaNNNaN188 WEST NORTHERN LIGHTS BOULEVARDNaNANCHORAGEAK99503.0
3H0AL01055CARL, JERRY LEE, JRREP2020ALH1.0OCC00697789PO BOX 852138NaNMOBILEAL36685.0
4H0AL01063LAMBERT, DOUGLAS WESTLEY IIIREP2020ALH1.0OCC007015577194 STILLWATER BLVDNaNSPANISH FORTAL36527.0
\n", "
" ], "text/plain": [ " CAND_ID CAND_NAME ... CAND_ST CAND_ZIP\n", "0 H0AK00105 LAMB, THOMAS ... AK 99654.0\n", "1 H0AK00113 TUGATUK, RAY SEAN ... AK 99628.0\n", "2 H0AK01046 CATALANO, THOMAS ... AK 99503.0\n", "3 H0AL01055 CARL, JERRY LEE, JR ... AL 36685.0\n", "4 H0AL01063 LAMBERT, DOUGLAS WESTLEY III ... AL 36527.0\n", "\n", "[5 rows x 15 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 6 } ] }, { "cell_type": "code", "metadata": { "id": "uF9YJ-SQ6psu" }, "source": [ "candidates_final = pd.DataFrame(candidates, columns=['CAND_ID', 'CAND_PTY_AFFILIATION'])" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "H4_26uJ23RXX", "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "outputId": "543f7301-8524-4da0-85da-b3f3c2a4e6a2" }, "source": [ "header = pd.read_csv(data_dir+'/ccl_header_file.csv')\n", "\n", "with ZipFile(data_dir+'/ccl20.zip') as zip:\n", " #print(zip.namelist())\n", " linkage = pd.read_csv(zip.open('ccl.txt'), sep='|', names=header.columns)\n", "\n", "linkage.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CAND_IDCAND_ELECTION_YRFEC_ELECTION_YRCMTE_IDCMTE_TPCMTE_DSGNLINKAGE_ID
0C0071360220192020C00712851OU228963
1H0AK0010520202020C00607515HP229250
2H0AL0105520202020C00697789HP226125
3H0AL0106320202020C00701557HP227053
4H0AL0107120202020C00701409HP227054
\n", "
" ], "text/plain": [ " CAND_ID CAND_ELECTION_YR FEC_ELECTION_YR ... CMTE_TP CMTE_DSGN LINKAGE_ID\n", "0 C00713602 2019 2020 ... O U 228963\n", "1 H0AK00105 2020 2020 ... H P 229250\n", "2 H0AL01055 2020 2020 ... H P 226125\n", "3 H0AL01063 2020 2020 ... H P 227053\n", "4 H0AL01071 2020 2020 ... H P 227054\n", "\n", "[5 rows x 7 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 8 } ] }, { "cell_type": "code", "metadata": { "id": "wyK3OZ3y7Srb", "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "outputId": "632f3820-90b7-4759-99e7-089714c8243b" }, "source": [ "df_merge = pd.merge(candidates_final, linkage, on='CAND_ID')\n", "df_merge.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CAND_IDCAND_PTY_AFFILIATIONCAND_ELECTION_YRFEC_ELECTION_YRCMTE_IDCMTE_TPCMTE_DSGNLINKAGE_ID
0H0AK00105NNE20202020C00607515HP229250
1H0AL01055REP20202020C00697789HP226125
2H0AL01063REP20202020C00701557HP227053
3H0AL01071REP20202020C00701409HP227054
4H0AL01089REP20202020C00703066HP227266
\n", "
" ], "text/plain": [ " CAND_ID CAND_PTY_AFFILIATION ... CMTE_DSGN LINKAGE_ID\n", "0 H0AK00105 NNE ... P 229250\n", "1 H0AL01055 REP ... P 226125\n", "2 H0AL01063 REP ... P 227053\n", "3 H0AL01071 REP ... P 227054\n", "4 H0AL01089 REP ... P 227266\n", "\n", "[5 rows x 8 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 9 } ] }, { "cell_type": "code", "metadata": { "id": "8i2m3TRG3QWd" }, "source": [ "" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "8ICMtX8B3TZi" }, "source": [ "sort_amt.dropna(subset = [\"EMPLOYER\", \"OCCUPATION\"], inplace=True)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "jNiWeB9J3TN7", "colab": { "base_uri": "https://localhost:8080/", "height": 343 }, "outputId": "7d11d265-7680-4c8f-ccba-dc19ec310ec3" }, "source": [ "sort_amt.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CMTE_IDAMNDT_INDRPT_TPTRANSACTION_PGIIMAGE_NUMTRANSACTION_TPENTITY_TPNAMECITYSTATEZIP_CODEEMPLOYEROCCUPATIONTRANSACTION_DTTRANSACTION_AMTOTHER_IDTRAN_IDFILE_NUMMEMO_CDMEMO_TEXTSUB_ID
990582C00571703NM8P20200820926685191310INDMELLON, TIMOTHYSARATOGAWY823311500SELF-EMPLOYEDINVESTMENTS709202010000000NaNSA11A.154461434706NaNNaN4090120201833903380
990568C00571703NM8P20200820926685190810INDSCHWARZMAN, STEPHEN A.NEW YORKNY101543302BLACKSTONECHAIRMAN & CEO701202010000000NaNSA11A.154111434706NaNNaN4090120201833903366
988418C00547349NM8P20200820926644587510INDSTEYER, THOMAS F.SAN FRANCISCOCA941049007FAHR, LLCFOUNDER70120203479294NaNVNVNVHN8SQ01434668NaNNaN4082920201831239483
1001457C00495028NM8P20200820926663994310INDSIMONS, JAMES H.NEW YORKNY100107007EUCLIDEAN CAPITALPRESIDENT71520202500000NaNVN8FNNJW7231434687NaNNON-CONTRIBUTION ACCOUNT4090220201833936065
860246C00620971NM8P20200820926612637210INDSTEYER, THOMASSAN FRANCISCOCA9.41045e+08FAHR LLCPHILANTHROPY AND ADVOCACY72420202500000NaNVSH7WMSTV401434556NaNNaN4090120201833903301
\n", "
" ], "text/plain": [ " CMTE_ID AMNDT_IND ... MEMO_TEXT SUB_ID\n", "990582 C00571703 N ... NaN 4090120201833903380\n", "990568 C00571703 N ... NaN 4090120201833903366\n", "988418 C00547349 N ... NaN 4082920201831239483\n", "1001457 C00495028 N ... NON-CONTRIBUTION ACCOUNT 4090220201833936065\n", "860246 C00620971 N ... NaN 4090120201833903301\n", "\n", "[5 rows x 21 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 25 } ] }, { "cell_type": "code", "metadata": { "id": "ZpDCZF044orD", "colab": { "base_uri": "https://localhost:8080/", "height": 102 }, "outputId": "43935d99-c06b-4fcd-e723-4a5c5affe85d" }, "source": [ "sort_amt[sort_amt['OCCUPATION']=='EXECUTIVE']['EMPLOYER'].describe()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "count 6193\n", "unique 2349\n", "top SOUTHERN CA EDISON\n", "freq 215\n", "Name: EMPLOYER, dtype: object" ] }, "metadata": { "tags": [] }, "execution_count": 38 } ] }, { "cell_type": "code", "metadata": { "id": "hyiY1HCz4oaE" }, "source": [ "" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "k0qWZ1iFAlCE", "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "outputId": "490d0372-c2ff-4de9-8ad7-77783c72dd63" }, "source": [ "df_newdup = df[(df['EMPLOYER'].duplicated()) &\n", " (df['EMPLOYER']!='NOT EMPLOYED') &\n", " (df['EMPLOYER']!='RETIRED')]\n", "\n", "df_newdup.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CMTE_IDNAMECITYSTATEZIP_CODEEMPLOYEROCCUPATIONTRANSACTION_DTTRANSACTION_AMT
7C00706333ALVAREZ, JACKTRACYCA95304ALVAREZ FARMS, INC.PRESIDENT93020202300
8C00706333ALVAREZ, JACKTRACYCA95304ALVAREZ FARMS, INC.PRESIDENT9302020200
13C00431932COOPER, DAVIDNEW BRAUNFELSTX78132OVINTIV SERVICES INC.DRILLING COORDINATOR6302020104
14C00431932CURRAN, KENTLITTLETONCO80127OVINTIV SERVICES INC.SENIOR LAND NEGOTIATOR630202020
15C00431932DARLINGTON, BRUCESPRINGTX77379OVINTIV SERVICES INC.SR. MANAGER, DRILLING & COMPL630202050
\n", "
" ], "text/plain": [ " CMTE_ID NAME ... TRANSACTION_DT TRANSACTION_AMT\n", "7 C00706333 ALVAREZ, JACK ... 9302020 2300\n", "8 C00706333 ALVAREZ, JACK ... 9302020 200\n", "13 C00431932 COOPER, DAVID ... 6302020 104\n", "14 C00431932 CURRAN, KENT ... 6302020 20\n", "15 C00431932 DARLINGTON, BRUCE ... 6302020 50\n", "\n", "[5 rows x 9 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 12 } ] }, { "cell_type": "code", "metadata": { "id": "0OznZbEdSjB1", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "91335d8e-6ce1-44c8-c9ba-1a7856cacdcb" }, "source": [ "set(df_newdup['EMPLOYER'])" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'CONSULTANT',\n", " 'BANNER HEALTH',\n", " 'YOUR PART-TIME CONTROLLER LLC',\n", " 'WRIGHT COLLEGE',\n", " 'II-VI INC.',\n", " 'AREAS APPRAISERS INC',\n", " 'SAIONTZ & KIRK, P.A.',\n", " \"UCSF BENIOFF CHILDREN'S HOSPITAL OAKLA\",\n", " 'NICKELSPORN &LUNDIN PC',\n", " 'LOCUS IMPACT INVESTING',\n", " 'GRAVLEE HOMES INC.',\n", " 'CENTURY CONTRACTORS',\n", " 'ONPOINT MARKETING INC.',\n", " 'GEORGE FOX UNIVERSITY',\n", " 'TURN IT OVER CLEANING',\n", " 'COLLIERS',\n", " 'PRIDGEON AND CLAY, INC.',\n", " 'STERLING REALTORS',\n", " 'MAINE STATE CU',\n", " 'CIVIL LIBERTIES LIST',\n", " 'CALIFORNIA',\n", " 'AMERICAN CONCRETE',\n", " 'GARRISON PC',\n", " 'SCHOOL OF ART INSTITUTE OF CHICAGO',\n", " 'WASHINGTON STATE HOSPITAL ASSOCIATION',\n", " 'JFNNJ',\n", " 'NAVAIR SETA (HOFFMAN ENGINEERING)',\n", " 'PEACH & LILY',\n", " 'EATON SALES',\n", " 'MILWAUKEE NEPHROLOGI',\n", " 'ADELPHI TECHNOLOGY INC.',\n", " 'DEANE DANCE CENTER',\n", " 'ODESSA FENCE',\n", " 'NXP SEMICONDUCTOR',\n", " 'NATIONAL FLATBED LLC',\n", " 'T.T.DUNPHY',\n", " 'KB DEVELOPMENT',\n", " 'SM CONSULTANT',\n", " 'PNWRCC',\n", " 'STORCH AMINI PC',\n", " 'MONTOGOMERY COUNTY GOVERNMENT',\n", " 'NIKE, INC',\n", " 'CAPE ELECTRIC',\n", " 'GD MISSION SYSTEMS INC',\n", " 'AKIN GUMP ET AL',\n", " 'MINUTEMAN POWER SERVICES LLC',\n", " 'BOS DAIRY, LLC',\n", " 'SILICONES PLUS INC',\n", " 'COCA-COLA CONSOLIDATED, INC.',\n", " 'CENTERSTONE',\n", " 'A. LEE KIRK ATTORNEY AT LAW',\n", " 'LMR FREIGHT',\n", " 'FOLEY ABBOTT LLC',\n", " 'WIND RIVER TRANSPORT',\n", " 'WARNER BROTHERS',\n", " 'MOSES & SINGER',\n", " 'STACKBIT',\n", " 'ROWPAR PHARMACEUTICALS',\n", " 'ASURINT',\n", " 'GUBB & BARSHAY',\n", " 'TYLER TECHNOLOGIES',\n", " 'YALE UNIVERSITY',\n", " 'CUNY / ISLG',\n", " 'OSCARRENDA CONT',\n", " 'HAHN & HAHN LLP',\n", " 'SBT',\n", " 'A BETTER CHANCE FOR OUR',\n", " 'GARMIN INTERNATIONAL',\n", " 'CA-LOTTS CREDIT & CAR SALES',\n", " 'EXXONMOBIL PRODUCTION US',\n", " 'BOSTON SYMPHONY ORCHESTRA',\n", " 'DECADES OF WHEELS LLC',\n", " 'SPORTS ROCKET INC',\n", " 'CTVHCS',\n", " 'ENTERTAINMENT ONE',\n", " 'GIDEON INFORMATICS INC',\n", " 'CRAYOLA LLC',\n", " 'ENERBANK USA',\n", " 'MAXIM CRANE WORKS, LP',\n", " 'UMASS MEDICAL SCHOOL',\n", " 'GROSSMAN IRON ANS STEEL CO',\n", " 'US CONCRETE, INC.',\n", " 'MARTIN M RON ASSOCIATES',\n", " 'OAKTON COMMUNITY COLLEGE',\n", " 'SANTA CRUZ IHSS',\n", " 'CENTRAL PACIFIC BANK',\n", " 'ICON VALUATION',\n", " 'CURETON MIDSTREAM',\n", " 'FIS GROU0',\n", " 'A TUMBLING T RANCHES',\n", " 'THOMAS MEDIA GROUP LLC',\n", " 'XCEL ENERGY',\n", " 'GSSM',\n", " 'I.S. ENVIRONMENTAL PROTECTION AGENCY',\n", " 'REESE NURSING SERVICE 51',\n", " 'IMPLUS LLC',\n", " 'MOORE CAPITAL MANAGEMENT',\n", " 'DONKAGELE FARMSINC.',\n", " 'PPMM',\n", " 'TTA APPRAISAL',\n", " 'CENTENNIAL INSURANCE AGENCY',\n", " 'KISABETH FURNITURE',\n", " 'INSTA LUBE PH CORP',\n", " 'UNVERSITY OF ALABAMA BIRMINGHA',\n", " \"ST. CATHERINE'S SCHOOL\",\n", " 'TATOOSH SEAFOODS',\n", " 'EMERGENCY PHYSICIAN',\n", " 'MERRILL BANK OF AMERICA',\n", " 'ONEAL AND ASSOCIATES',\n", " 'METROPOLITAN TRANSPORTATION AUTHORITY',\n", " 'NATHAN LITTAUER HOSPITAL',\n", " 'RETIRRD',\n", " 'ROPER ST FRANCIS HEALTHCARE',\n", " 'JGNEIL',\n", " 'PD&C',\n", " 'CVE',\n", " 'PIRE',\n", " 'SELF EMPLOYED - WOMAN OWNED SMALL BUSI',\n", " 'CUSHEES INC.',\n", " 'BEALLS',\n", " 'VALLEY PHYSICIANS ALLIANCE',\n", " 'FRIENDSHIP HOUSE',\n", " 'PATERSON CITY',\n", " 'INFO TECH, INC',\n", " 'ROSENDIN ELECTRIC',\n", " 'MCDERMOTT',\n", " 'GCCMHC',\n", " 'LOCKHART WORK PROGRAM FACILITY',\n", " 'FOOD LION, LLC',\n", " 'NTESS, LLC',\n", " 'ETRN - WAYNESBURG',\n", " 'UPPER IOWA UNIVERSITY',\n", " \"HOM SOTHEBY'S\",\n", " 'BAC LOCAL 01 MN',\n", " 'MURRAY IND',\n", " 'ARVEST BANK',\n", " 'GRIFFIN ELECTRIC.INC.',\n", " 'LAND TITLE',\n", " 'SAN PASQUAL BAND OF MISSION INDIANS',\n", " 'BLOOMER BIOTECH',\n", " 'GEORGIA-PACIFIC WOOD PRODUCTS LLC',\n", " \"ST. ANN'S WAREHOUSE\",\n", " 'COLTON JOINT UNIFIED',\n", " 'WINGATE WEST SPRINGFIELD',\n", " 'INSIGHTSQUARED',\n", " 'WASATCH DISTRIBUTING CO',\n", " 'LOGISTICS HEALTH INC',\n", " 'HOMES ARE US INC',\n", " 'MANPOWER',\n", " 'LOUISIANA ORTHOPEDIC SPECIALISTS',\n", " 'BHATE CONSTRUCTION',\n", " 'CORNUCOPIA CRUISE LI',\n", " 'WAKE FOREST',\n", " 'SALT RIVER PROJECT',\n", " 'ADVANCE FIRE SYSTEMS INC',\n", " 'THE WINDWARD SCHOOL',\n", " 'LIBERTY BANK',\n", " 'FAITH BAPTIST CHURCH',\n", " 'MORRIS AUTOMOTIVE MACHINE',\n", " 'SACTO. PUB. LIBRARY JOINT POWERS AUTH.',\n", " 'TRAILWEST BANK',\n", " 'A-1 AFFORDABLE SIGN CO.',\n", " 'TUMAC LUMBER CO',\n", " 'PINECONE APARTMENTS',\n", " 'APR SOULTIONS',\n", " 'VBCPS',\n", " 'QUORA, INC.',\n", " 'KOCH BUSINESS SOLUTIONS, LP',\n", " 'DRIGGERS SCHULTZ & HERBST',\n", " 'SVB FINANCIAL GROUP',\n", " 'SERRA & GARRITY PC',\n", " 'BUSINESS PERFORMANCE INC.',\n", " 'RTI-HS',\n", " 'HIGHLAND EXCAVATION',\n", " 'AMOS WILKINSON, CRNA',\n", " 'COMMUNICATIONS DIRECTOR',\n", " 'THE LIGHT SOURCE INC',\n", " 'MULLALLY DEVELOPMENT',\n", " 'SILICON LABS',\n", " 'BERNDT CPA LLC',\n", " 'CAREY INTERNATIONAL',\n", " 'ANJALEONI ENTERPRISES INC',\n", " 'HAMPSHIRE',\n", " 'AMICA',\n", " 'LIVINGMIND PROJECT, INC.',\n", " 'NICOR',\n", " 'STAR BODY AND PAINT',\n", " 'TARANTINO AUTO BODY',\n", " 'FPSR',\n", " 'AUTOMATE ASSOCIATES',\n", " 'DEMOCRATIC NATIONAL CONVENTION COMMITT',\n", " 'HOME & OFFICE CABINETRY',\n", " 'NUCOR STEEL FLORIDA INC.',\n", " 'THE PROPERTY SHOP',\n", " 'HOPKINS SCHOOL',\n", " 'SCRUBS ETC',\n", " 'ROCHESTER COMMUNITY SCHOOL DIS',\n", " 'BHE RENEWABLES, LLC',\n", " 'COMSEWOGUE SD',\n", " 'ZOGENIX INC.',\n", " 'NATIONAL AQUARIUMN',\n", " 'KIESEL LAW LLP',\n", " 'UNITARIAN UNIVERSITY',\n", " 'POSEF',\n", " 'CHENHALL SERVICES',\n", " 'STILLWATER PUBLIC SCHOOLS',\n", " 'GARCIA MARBLE & TILE',\n", " 'HENDERSON ENGINEERING CO.',\n", " 'ALLIANZ OF AMERICA CORP',\n", " 'FERMAN BMW',\n", " 'BRISBANE SCHOOL DISTRICT',\n", " 'DAWSON HOLDINGS INC',\n", " 'U. S. DEPT OF VETERANS AFFAIRS',\n", " 'EARLES ARCHITECTS AND ASSOCIATES',\n", " 'BLUFF POINT ASSOCIATES',\n", " 'OVESCO',\n", " 'RYAN COYLE',\n", " 'AMERICAN ENTERPRISE INV. SRVCS',\n", " 'VISITING NURSE ASSOCIATION',\n", " 'SMG',\n", " 'ASHNU INTERNATIONAL INC',\n", " 'MOLDEX METRIC',\n", " 'ROSWELL PARK CANCER INSTITUTE INC',\n", " 'PECCAINC',\n", " 'COEUR ALASKA',\n", " 'MRA LABRATORIES',\n", " 'PETERBOROUGH PLAYERS',\n", " 'AMERESCO',\n", " 'SUNY DOWNSTATE',\n", " 'BCBS',\n", " 'S M STOLLER',\n", " 'REAL ESTATE DEV CO',\n", " 'BAPTIST HEALTH',\n", " 'JONATHAN D. SALK M.D.',\n", " 'ALPHAPORT',\n", " 'PRECISION AUTOMOTIVE PLASTICS',\n", " 'CITY OF RIALTO',\n", " 'UMIVERSITY PF DENVER',\n", " 'SAN JUAN COLLEGE',\n", " 'SPENCER STUART',\n", " 'CHICAGO AREA LECET',\n", " \"WOMEN'S RESOURCE CENTER\",\n", " 'BAKER PERKINS',\n", " 'BOE REAL ESTATE',\n", " 'L.A.BELL MOTOR LINES INC.',\n", " 'CAPGEMINI AMERICA',\n", " 'ORION ENGINEERING CONSTRUCTION',\n", " 'GOSHEN FAMILY PHYSICIANS',\n", " 'ORANGE VILLAGE',\n", " 'SO TEX EXTERM',\n", " 'AIR PRODUCTS',\n", " 'MEDICAL GROUP',\n", " 'BOSTON CAPITAL',\n", " 'FOX NEWS NETWORK LLC',\n", " 'LSPM',\n", " 'SUPERMICRO COMPUTER INC',\n", " 'REDD REALTY',\n", " 'CUMMINS INC.',\n", " 'CAREY PERKINS',\n", " 'RHAMILTON CONSULTING',\n", " 'UCS',\n", " 'SAINT MARYS COUNTY PUBLIC SCHOOLS',\n", " 'NYSOMS',\n", " 'CODESTREAM INC.',\n", " 'CONNER MKTG SALES',\n", " 'BURGERBUSTERS INC',\n", " 'NEUROCRINE',\n", " 'FIRST AMERICAN',\n", " 'DURDEN CONSTRUCTION',\n", " 'TRUCKERS INSURANCE ASSOCIATES, INC.',\n", " 'YOUNG SOMMER',\n", " 'BERNARDS APPRAISAL ASSCOCIATES',\n", " 'C.L. BARNHOUSE CO.',\n", " 'FIVES MACHINING SYSTEMS',\n", " 'RDO',\n", " 'NYEMASTER GOODE PC',\n", " 'UNVERSITY OF COLORADO BOULDER',\n", " 'JIM DOYLE & ASSOCIATES',\n", " 'POLING & CUTLER',\n", " 'URIST FINANCIAL AND RETIREMENT PLANNIN',\n", " 'COUNCIL FOR RESPONSIBLE NUTRIT',\n", " 'USD 289',\n", " 'ICANN',\n", " 'VAPOTHERM',\n", " 'SMITHFIELD FOODS',\n", " 'CROCKETT PROPERTIES',\n", " 'CELEBRATION CHURCH',\n", " 'COASTAL RESOURCES',\n", " 'PALM BEACH COUNTY FIRE RESCUE',\n", " 'TEK SYSTEMS',\n", " 'WABASH VALLEY POWER ASSN., INC.',\n", " 'KAIFER INS',\n", " 'CENTRA',\n", " 'PBS MENTAL HEALTH ASSOCIATES',\n", " 'FYZICAL',\n", " 'META HOUSING CORPORATION',\n", " 'FLATIRON WORKS',\n", " 'CENTER FOR ECONOMIC DEVELOPMENT LAW',\n", " 'OMAHA PUBLIC SCHOOL',\n", " 'CONSTELLATION',\n", " 'WESTERRA CREDIT UNION',\n", " 'BREYMAN PROPERTIES',\n", " 'XXX',\n", " 'HMHP',\n", " 'MARY KAY INC',\n", " 'THE STANDARD',\n", " 'U OF UTAH HEALTH HOSPITALS AND CLINICS',\n", " 'TAKEDA PHARMACEUTICALS U.S.A. INC.',\n", " 'MCDERMOTT WILL & EMERY',\n", " 'AYA HEALTHCARE',\n", " 'GRAMBLING STATE UNIVERSITY',\n", " 'DUKE CUSTOM FABRICATION',\n", " 'TETRATECH',\n", " 'DAI',\n", " 'AVIANDS',\n", " 'FIDES LLC',\n", " 'EDUCATION FIRST FCU',\n", " 'CEM',\n", " 'BHG RAND REALTY',\n", " 'COMPOSITE & CASTING SUPPLY INC',\n", " 'DESIGN VITTORPIA LLC',\n", " 'MAC ARTHUR FOUNDATIO',\n", " 'LA CANADA WEST',\n", " 'BARJAC INC',\n", " 'MORRIS DEV',\n", " 'BROOKS, WILBURN, & LOGAN CO',\n", " 'SALVATION ARMY AND',\n", " 'BRAUN & BRAUN',\n", " 'BUCHER CHRISTIAN',\n", " 'VERITIV CORP',\n", " 'NANSEMOND PRE-CAST',\n", " 'JORDAN SCHOOL DISTRICT',\n", " 'CENTERSTAGE PRODUCTIONS',\n", " 'BTCO, INC.',\n", " 'SALEM CLINIC',\n", " 'RBC WEALTH MANAGEMENT',\n", " 'EMMANUEL MEDICAL',\n", " 'COMMUNITY GROUP INC',\n", " 'FINANCIAL BROKERAGE',\n", " 'SWISHER INTERNATIONAL, INC.',\n", " 'OPSALESINC',\n", " 'EXELTECH CONSULTING INC',\n", " 'OHIO CONFERENCE OF COMMUNITY DEVELOPME',\n", " 'THE CHAPIN SCHOOL',\n", " 'PHILLIP SAN SEBASTIAN',\n", " 'STATE OF VERMONT',\n", " 'RICK HAMM CONSTRUCTION',\n", " 'TIMMONS SHEET METAL',\n", " 'TVHO',\n", " 'UNITED TEACHERS LOS ANGELES',\n", " 'ST JOSEPH',\n", " 'WORCESTER PUBLIC SCHOOLS',\n", " 'LORDS VALLEY SELF STORAGE',\n", " 'FPN',\n", " 'MOUNT SINAI HOSPITAL MANHATTAN',\n", " 'KIDS DEVELOPMENTAL THERAPY',\n", " 'VETERANS AFFAIRS',\n", " \"MY FRIEND'S PLACE\",\n", " 'PAINT WIZARDS INC.',\n", " 'EDG CONSULTING ENGINEERS',\n", " 'FINISH KARE PRODUCTS',\n", " 'E-DEVELOPMENT INTERNATIONAL',\n", " 'JAMES F STEARNS CO',\n", " 'NUMERIX',\n", " 'PARK NICOLLET CLINIC',\n", " 'TUSCOLA ISD',\n", " 'INDEPENDENT REPAIR',\n", " 'KUMIN INSURANCE GROUP',\n", " 'COGHLAN CROWSON LLP',\n", " 'PASSAGE TO INDIA',\n", " 'PAWNEE HEALTH AND WELLNESS',\n", " 'M L BERGER & CO.',\n", " 'HP PRODUCTIONS',\n", " 'STRIBLING',\n", " 'ROBSON COMMUNITES',\n", " 'BANKERS FINANCIAL CORP',\n", " 'PEGASYSTEMS',\n", " 'AZ STAGE SOUND LIGHTS',\n", " 'LAW OFFICE OF DALE WAGNER',\n", " 'BRAYTON PURCELL LLP',\n", " 'NATIVEENERGY',\n", " 'FULTON COUNTY',\n", " 'ENCORE',\n", " 'ROOFEX',\n", " 'GCEI',\n", " 'NEW YORK CITY POLICEPENSION FUND',\n", " 'AT&T CORP.',\n", " 'KIPP DC',\n", " 'PARKER REALTY & ASSOCIATES',\n", " 'AMA CONSULTING ENGINEERS',\n", " 'SCORP GROUP INC.',\n", " 'VILLAGE SUPERMARKETS DBA SHOPRITE',\n", " 'GREG COLEMAN LAW PC',\n", " 'SALESFORCE',\n", " 'RAPID CPAP LLC',\n", " 'ARTIST',\n", " \"READ N' POST\",\n", " 'MONIMEL CORP',\n", " 'ORANGE COUNTY COMMUNITY COLLEGE',\n", " 'C MYERS CORP',\n", " 'LIGHTNING ORCHARD',\n", " 'CUNNINGHAM JEWELERS',\n", " 'FRANKLIN MUTUAL INSURANCE COMPANY',\n", " 'PCSD',\n", " 'DOCTOR',\n", " 'CDFW',\n", " \"ST. DUNSTAN'S ANGLICAN CHURCH\",\n", " 'ACME SUPERMARKET',\n", " 'MENARDS',\n", " 'CLAREMONT',\n", " 'LAWSON, DAVIS, PICKREN & SEYDEL',\n", " 'CHRISTIAN LEADERS INSTITUTE',\n", " \"SJOERD'S PRO TOOLS\",\n", " 'WHITE HILL CHURCH OF BRETHREN',\n", " 'BURNS MCDONNELL ENGINEERING COMPANY I',\n", " 'MATANKY',\n", " 'WOMBLE BOND DICKINSON (US) LLP',\n", " 'LUIMAN REAL ESTATE INC',\n", " 'HERZOG TECHNOLOGIES, INC.',\n", " 'PHILIPS HEALTH SYSTEMS',\n", " 'BENDER ENGINEERING',\n", " 'MEV',\n", " 'FOX VALLEY IMAGING',\n", " 'METROPOLITAN BAPTIST CHURCH',\n", " 'ROSEMOUNT CENTER',\n", " 'GREATER LAWRENCE TECH SCHOOL',\n", " 'RE/MAX REALTY ASSOCIATES-CHA',\n", " 'MORRISON FOERSTER',\n", " 'THE CARLYLE GROUP INC.',\n", " 'SENATOR LEW FREDERICK',\n", " 'HUNGRY PLANET INTELLIGENCE',\n", " 'MULLIGAN SECURITY COMPANY',\n", " 'SNC-LAVALIN',\n", " 'BSC',\n", " 'PRA',\n", " 'CLEAN WATER OF VA',\n", " 'ASA STAFFING',\n", " 'M/E ENGINEERING',\n", " 'SERVICE EMPLOYEES INTERNATIONAL UNION',\n", " 'PRECISIONEFFECT',\n", " 'SEAWORLD CALIFORNIA',\n", " 'AFSCME CA LOC 3299',\n", " 'WILDWOOD',\n", " 'GE PLASTICS',\n", " 'US TRANSPORTATION',\n", " 'MONTEFIORE MEDICAL CENTER',\n", " 'PCG',\n", " 'CTS',\n", " 'CEDAR FALLS COMM SCHOOLS',\n", " 'MERCANTILE BANK',\n", " 'THE POKEMON COMPANY INTERNATIONAL',\n", " 'FIFTH STREET RENAISSANCE',\n", " 'METROPOLITAN NASHVILLE BD OF ED',\n", " 'SPRINGETTSBURY TOWNSHIP',\n", " 'GETTYSBURG COLLEGE',\n", " 'SSES',\n", " 'CONTINENTAL AUTOMOTIVE',\n", " 'AMERICAN INSTITUTES FOR REASEARCH',\n", " 'DEER VALLEY RESORT',\n", " 'CARGILLE-SACHER LABS, INC.',\n", " 'JP MORGAN',\n", " 'CARDIOVASCULAR',\n", " 'PERFORMANCE SYSTEMS',\n", " 'KLD',\n", " 'FLORIDA',\n", " 'THE ARLINGTON SLEEP DISORDER CENTER',\n", " 'DE WINNE CONSTRUCTION',\n", " 'CBRE, INC',\n", " 'FISHER PHILLIPS',\n", " 'IC MANAGE',\n", " 'DELANEY CORPORATE SERVICES',\n", " 'HOMESTEAD INC',\n", " 'KERING',\n", " 'ONEOK FIELD SERVICES COMPANY',\n", " 'COWLES PARKWAY FORD, INC.',\n", " 'GIM CAPITAL MANAGEMENT',\n", " 'STANFORD MEDICAL GROUP',\n", " 'KILLIAN &DONOHUE',\n", " 'JENSEN TRAVELON',\n", " 'WMLM',\n", " 'MATTESON MARINE SEV',\n", " 'CRAFT COFFEE',\n", " 'INSTANT CARE',\n", " 'NOT IN WORKFORCE',\n", " 'HIGH-MARK SYSTEMS',\n", " 'TRINSEO LLC',\n", " 'HOYT ARCHITECTS',\n", " 'TIVERITY CONSULTING',\n", " 'LED SUPPLY',\n", " 'MELINDA MOTLAGH',\n", " 'CALIFORNIA STATE UNIVERSITY LA',\n", " 'UNC CHAPEL HILL',\n", " 'CMC CONSTRUCTION',\n", " 'G M NORTHRUP CORP',\n", " 'GROW MARKETING',\n", " 'SWISSRAY CUSTOMER CARE LLC',\n", " 'GREECE CENTRAL SCHOOL DISTRICT',\n", " 'BEVERLY-HANKS & ASSOCIATES',\n", " 'ASG REAL ESTATE CO.',\n", " 'BACK TO THE PAST',\n", " 'CHARLOTTE MECKLENBURG SCHOOLS',\n", " \"CONNOLLY'S TOWING INC\",\n", " 'UNIVERSITY OF PITTSBURGH SCHOOL OF MED',\n", " 'DOCTORS MAKING HOUSECALLS',\n", " 'MINITAB',\n", " 'HDR ARCHITECTURE INC.',\n", " 'NAR',\n", " 'THE MONEY STORE',\n", " 'LAMAR STATE COLLEGE - PORT ARTHUR',\n", " 'GGUSD',\n", " 'SHERATON UNIVERSAL HOTEL',\n", " 'STACY AND BAKER LAW',\n", " 'GJAC',\n", " 'LOBIS TECHNOLOGY CONSULTANTS LLC',\n", " 'ACCRUENT',\n", " 'CANCIO NADAL & RIVERA LLC',\n", " 'OLD VINE MANAGEMENT GROUP',\n", " 'NATIONAL PATIENT ADVOCATE FOUNDATION',\n", " 'GARNET VALLEY SCHOOL DISTRICT',\n", " 'GUARANTEE INS AGCY',\n", " 'TRINITY CONSULTANTS',\n", " 'COOK COUNTY OF IL',\n", " 'AONL',\n", " 'NOSSAMAN LLP',\n", " 'BREAD FOR THE WORLD',\n", " 'FNC',\n", " 'NORTH SHORE SENIOR CENTER',\n", " \"HAY'S\",\n", " 'SELF ORIGINAL ARTISTS NYC',\n", " 'POWER SUPPLY',\n", " 'WIDGEON MGT CORP',\n", " 'RADIAN GUARANTY INC.',\n", " 'JENISON PUBLIC SCHOOLS',\n", " 'A PITTSBURGH PLUMBER LLC',\n", " 'PENASQUITOS PET CLINIC',\n", " 'NEA FED. GOVT. AGENCY',\n", " 'MA LEAGUE OF CHCS',\n", " 'STATE FARM INS.',\n", " 'KANYEZI AFRICA SAFARI',\n", " 'UFCW LOCAL NO. 328',\n", " 'ABLE ELECTRICAL SVC.',\n", " 'KAREN G BINDER',\n", " 'VALLEY EMERGENCY CARE',\n", " 'SUMMIT REHAB UPMC',\n", " 'THE FLORIDA AQUARIUM',\n", " 'BRUCE LEE',\n", " 'SOUND COMMUNITY SOLUTIONS',\n", " 'FOOD SCIENCES CORP.',\n", " 'JOHN MORRELL & COMPANY',\n", " 'UN ENVIRONMENT PROGRAMME',\n", " 'JJ MARQUIS ELECTRIC',\n", " 'COMMUNITIES ACTIVELY LIVING INDEPENDEN',\n", " 'USONIAN REALTY',\n", " 'ZUMIEZ',\n", " 'ROYAL FLEX CIRCUITS',\n", " 'COMMERCEHUB',\n", " 'GENESIS MEDICAL CENTR',\n", " \"CHILDREN'S HOSPITAL BOSTON\",\n", " 'INDATA CORPORATION',\n", " 'EPIC LLC',\n", " 'AUDERE PARTNERS',\n", " 'CLARK CONSTRUCTION',\n", " 'RJH SCIENTIFIC INC',\n", " 'TBWBHL',\n", " 'MUNGER TOLLES & OLSON',\n", " 'HERE',\n", " 'SAP NATIONAL SECURITY SER',\n", " 'FORTINET',\n", " 'CATHERINE WILCOX DDS',\n", " 'HEMCON MEDICAL TECHNOLOGIES INC',\n", " 'RAYA RADIOLOGY',\n", " 'BROWNSTEIN HYATT FARBER SCHRECK',\n", " 'BLRG',\n", " 'BASD',\n", " 'PARIS BRIDGE ACADEMY',\n", " 'HOME FURNITURE',\n", " 'JDS&A ADVISORS',\n", " 'NVI',\n", " 'DISNEY ANIMATION STUDIOS',\n", " 'TELLIGENT MASONRY LLC',\n", " 'REI',\n", " 'HOLLYWOOD CASINO',\n", " 'SAPPHIRE COMPUTERS INC.',\n", " 'SEABULK TANKERS, INC.',\n", " 'TAURIAINEN ENGINEERING',\n", " 'SIMPLYEZ HDM LLC',\n", " 'LAFAYETTE GENERAL HEALTH',\n", " 'WELLTOWER, INC.',\n", " 'KIRKLAND AND ELLIS',\n", " \"CABELA'S INC.\",\n", " 'VJSTURDIVANTINC',\n", " 'GARDEN CITY SCHOOLS DIST',\n", " 'SEARIVER MARITIME INC',\n", " 'MADISON FIRE DEPT.',\n", " 'POWERS MUSIC SCHOOL',\n", " 'LA MESA SPRING VALLEY SCHOOLS',\n", " 'BHHS REAL ESTATE',\n", " 'NEW ENGLAND GRANITE MARBLE',\n", " 'CAPROCK DAIRY',\n", " 'RACHIO',\n", " 'MCPHEE PLUMBING',\n", " 'TERRE HAUTE HEART CENTER',\n", " 'MORENO',\n", " 'SENTINELONE',\n", " 'BERRY PLASTICS',\n", " 'COSTAL CONNECTION',\n", " 'GLOBAL VILLAGE ACADEMY',\n", " 'MARK WINKLER',\n", " 'PENN STATE UNIVERSITY',\n", " 'COLUMBIA MUTUAL INSURANCE COMPANY',\n", " 'BOB BARKER',\n", " 'HATCHERY PLANNING',\n", " 'UMECO',\n", " 'COMMERCIAL DEVELOPER',\n", " 'TRUCK-TECH',\n", " 'NEDERLANDER ORGANIZATION',\n", " 'MURRAY & MURRAY',\n", " 'WJW ARCHITECTS',\n", " 'HOLMES MURPHY',\n", " 'PEOPLE READY',\n", " 'COLUMBUS STATE UNIVERSITY',\n", " 'CARAHSOFT',\n", " \"FEDERAL GOV'T\",\n", " 'REIW CONSULTING LLC',\n", " 'I&CO',\n", " 'CHURCH OF. HRIST',\n", " 'OCCUCARE INTERNATIONAL',\n", " 'BP AMERICA',\n", " 'TEMPEST CAPITAL LTD',\n", " 'WEST LAFAYETTE COM SCHOOL CORP',\n", " 'ALEXANDRIA REAL ESTATE',\n", " 'JHU/APPLIED PHYSICS LAB.',\n", " 'DESERT HOUSE OF PRAYER',\n", " 'UPDEGRAFF CLINIC',\n", " 'SHANTI POOLS LLC',\n", " 'MPL',\n", " 'GLENN MITCHELL INSURANCE',\n", " 'SWITCHBACK TRAVEL LLC',\n", " 'PUBLIC EDUCATION',\n", " 'SELFEMPLOMENT',\n", " 'BCD MEETINGS & EVENTS',\n", " 'COLUMBIA PRESBYTERIAN HOSPITAL',\n", " 'SANOFI PASTEUR',\n", " 'KOPPEL AND SCHER',\n", " 'APEX-STUDIO SUAREZ',\n", " 'DEPT OF THE AIR FORCE',\n", " 'DURANGO',\n", " 'IRAD SERVICES LLC',\n", " 'WINGATE AT WEST SPRINGFIELD',\n", " 'LWV-DENVER',\n", " 'DOSS REALTY GROUP',\n", " 'CAPSTAR ADVISORS',\n", " 'SCHOOL CITY OF HAMMOND',\n", " 'NORBORD',\n", " 'FAMILY HERITAGE',\n", " 'TRACTOR SUPPLY OMPANY',\n", " 'JAMESTOWN ASSOCIATES',\n", " 'PIEDMONT TRIAD ANESTHESIA, PA',\n", " 'LIONS SHARE FCU',\n", " 'LOCKARD, INC.',\n", " 'GREENFIELD POWER EQUIPMENT, INC.',\n", " 'LOCHEED MARTIN',\n", " 'NUCOR STEEL AUBURN, INC.',\n", " 'SLMC',\n", " 'HANES INC.',\n", " 'OHHP',\n", " 'LANCASTER GENERAL HE',\n", " 'TELEPHONICS SYSTEMS ENGINEERING GROUP',\n", " 'INTEGRA',\n", " 'RESMED',\n", " 'DISCOVERY INSTITUTE',\n", " 'STOCKHOLM UNIVERSITY',\n", " 'CENTURY 21 MEYER',\n", " 'JACKSONLEWIS(PARTNER)',\n", " 'US DOT',\n", " 'WOODS PRECISION PRODUCTS',\n", " 'ENGINEWORLD LLC',\n", " 'THE KIRLIN COMPANY',\n", " 'W.A. HYNES & CO.',\n", " 'MORRISON & FOERSTER, LLP',\n", " 'SYMMETROCM',\n", " 'AUBURN HOUSING AUTHORITY',\n", " 'CALPINE',\n", " 'TOTAL E&P USA',\n", " 'ECONOMIC POLICY INSTITUTE',\n", " 'NEVADA STATE MUSEUM',\n", " 'HUNTER COLLGE',\n", " 'CITY OF HOUSTON',\n", " 'COLORADO CARE ASSISTANCE',\n", " 'BEAUREGARD ELECTRIC CO-OP, INC.',\n", " 'DF LEVIN ASSOCIATES',\n", " 'SOCIAL CAPITAL GROUP LLC',\n", " 'MRS.',\n", " 'WIT CREEK PARTNERS',\n", " 'SHONDALAND',\n", " 'NETSAGE',\n", " 'BGR, INC.',\n", " 'VERIZON CORP',\n", " 'FRIENDS SCHOOL OF BALTIMORE',\n", " 'TAYLOR CORPORATION',\n", " 'KAMIN IND',\n", " 'PROVIDENCE ANESTHESIOLOGY ASSOCIATES,',\n", " 'TTUHSC',\n", " 'VERRILL DANA, LLP',\n", " 'EL CAMINO COLLEGE',\n", " 'METROMILE',\n", " 'ROPER AND ROPER',\n", " 'IGLER/PEARLMAN PA',\n", " 'PROQUEST',\n", " 'MIRAGE FINE FOODS, INC.',\n", " 'AMSTED INTERNATIONAL',\n", " 'SOUTHWEST FAMILY GUIDANCE CENTER',\n", " 'CITIZENS MEDICAL CENTER',\n", " 'FRESNO STATE',\n", " \"ST. MARY'S UNIVERSITY\",\n", " 'BLUE HERON WELLNESS',\n", " 'RINGCENTRAL',\n", " 'RUST COLLEGE',\n", " 'NEXTEER',\n", " 'VOL STATE CC',\n", " 'PEOPLES GROUP SELF-EMPLOYED',\n", " 'RIA',\n", " 'VIMAR',\n", " 'GREATSCAPES',\n", " 'DAILY JOURNAL',\n", " 'GOULD KILLIAN CPA GROUP',\n", " 'FREDRICK MANAGEMENT',\n", " 'STRONGHOLD',\n", " 'GENISIS HEALTHCARE',\n", " 'DEMOCRATIC INTELLIGENCE',\n", " 'STADIUM TOYOTA',\n", " 'LB CONSOLIDATED',\n", " 'THE STATE BANK OF FARIBAULT',\n", " 'U.S. AGENCY FOR INTERNATIONAL DEVELOPM',\n", " 'COOK COUNTY',\n", " 'SPARKS WILLSON PC',\n", " 'GDK CONSTRUCTION',\n", " 'US GOVT ACCOUNTABILITY OFFICE',\n", " 'CENTRA INC.',\n", " 'LAWWA',\n", " 'VERITE',\n", " 'MOLZEN CORBIN',\n", " '831 DON CUBERO AVE',\n", " 'IOWA TALENTED AND GIFTED ASSOCIATION',\n", " 'THE ROADRUNNER PRESS',\n", " 'ACME GLASS AND MIRROR',\n", " 'HABITAT AMERICA',\n", " 'POWERS LAW',\n", " 'EXPRESSO',\n", " 'CSU SAN MARCOS',\n", " 'BWXT',\n", " 'PREMIER RADIOLOGY',\n", " 'WA STATE NURSES ASSOCIATION',\n", " 'TOURISM ASSN.',\n", " 'EKLHEALTH LLC',\n", " 'RODAN+FIELDS',\n", " 'UFCW LOCAL NO. 876',\n", " 'FRIENDSHIP ACRES PARK INC',\n", " 'MOORE AND VAN ALLEN PLLC',\n", " 'SAGE V FOODS',\n", " 'DR. SUE CAREY PLLC',\n", " 'KELLY AUTOMOTIVE GROUP',\n", " 'EDX',\n", " 'AMHS',\n", " 'ESI TOTAL FUEL MANAGEMENT',\n", " 'PRIZE LOGIC',\n", " 'WINSTEAD PC',\n", " 'MEDSTAR GEORGETOWN UNIVERSITY HOSPITAL',\n", " 'COMCAST CORPORATION',\n", " 'MOSES & SINGER LLP',\n", " 'SANDHILLS COMMUNITY COLLEGE',\n", " 'MILLIKEN',\n", " 'VA DCR',\n", " 'GWATNEY CHEVROLET',\n", " 'ORTHOPEDIC SPINE THERAPY',\n", " 'BERING STRAITS NATIVE CORPORATION',\n", " 'SIKORSKY',\n", " 'GREVE FOUNDATION',\n", " 'SOLIC',\n", " 'LUKE',\n", " 'CH ROBINSON',\n", " 'UCDAVIS CANCER CENTER',\n", " 'JAMS INC.',\n", " 'MCCOOL FARM AND CATTLE',\n", " 'VASSAR ELECTRIC INC',\n", " 'NWP',\n", " 'COTRONICS CORPORATION',\n", " 'MOVEMENT FOR LIFE',\n", " 'GILBERT CONSTRUCTION',\n", " 'MOUNT VERNON CITY SD',\n", " 'CAS',\n", " 'NSWCLA',\n", " 'CATHOLIC DIOCESE OF ROCKFORD',\n", " 'NAP ENGINEERS',\n", " 'DIRECT MARKETING CONCEPTS, INC.',\n", " 'FMCSA',\n", " 'SCIENTIAE LLC',\n", " 'MODA HEALTH',\n", " 'FLORIDA HIGH SCHOOL ATHLETIC ASSOC.',\n", " 'SUITECX',\n", " 'EVANS LAW FIRM, INC.',\n", " 'COMMUNITY LEGAL AID SERVICES',\n", " 'TONI SHERMAN INTERIORS LLC',\n", " 'AGS CONSTRUCTION',\n", " 'CCRMC',\n", " 'MOLINA HEALTHCARE OF FL',\n", " 'REPEAT CONSULTANTS',\n", " 'EWL INC.',\n", " 'WILMERHALE',\n", " 'TOWNSEND REAL ESTATE',\n", " 'CENTINEL FINANCIAL GROUP',\n", " 'AZARA LLC',\n", " 'GEORGETOWN UNIVERSITY LAW CENTER',\n", " 'CROSSROADS ANESTHESIAP',\n", " 'AMPLITY HEALTH',\n", " 'IMAGE ONE CORP',\n", " 'TRADELINK LLC',\n", " 'GIBBON PUBLIC SCHOOLS',\n", " 'MERCY FAMILY CENTER',\n", " 'SILVERSAND SERVICES',\n", " 'CITY OF CHESAPEAKE',\n", " 'HOWMET AEROSPACE INC.',\n", " 'SOUTHERN TRUCK AND EQUIPMENT',\n", " 'UNIVERSITY OF TEXAS MEDICAL BRANCH AT',\n", " 'KORDICH CONSTRUCTION',\n", " 'ALTSHULER BERZON LLP',\n", " 'SUNTRUST ROBINSON HUMPHREY INC.',\n", " 'AHRENS COMPANIES',\n", " 'HAL SYSTEMS CORP',\n", " 'PACIFIC RIM CAPITAL, INC.',\n", " 'APF',\n", " 'PREMIER ASSET MGMT., INC.',\n", " 'TEAMSTERS LOCAL UNION 191',\n", " 'ADLER GIERSCH',\n", " 'SGF',\n", " 'MICHIGAN STATE UNIVERSITY',\n", " 'ALLIED BARTON',\n", " 'RAINFOCUS',\n", " 'D.U.E BRANDS',\n", " 'WHEATON COLLEGE NORTON MA',\n", " 'GBP CONTRACTING',\n", " 'MOORE PUBLIC SCHOOLS',\n", " 'AIR TRANSPORT ASSOCIATION, INC',\n", " 'SSCI',\n", " 'THE RUSSELL GROU UNITED LLC',\n", " 'SONOMA COUNTY REGIONAL PARKS FOUNDATIO',\n", " 'FTLF',\n", " 'DOOR 2 DOOR INCOME INC',\n", " 'PROFESSIONAL LOSS ADJUSTERS INC.',\n", " 'AMERICAN IRON & ALLOYS',\n", " 'LASHLY & BAER P.C.',\n", " 'UNIVERSITY OF CALIFORNIA, LA',\n", " 'PEARL PROPERTIES',\n", " 'MID MICH INS',\n", " 'BURROW JAN',\n", " 'PATRICIA FLORES',\n", " 'WARNER BROTHERS TELEVISION',\n", " 'MASSACHUSETTS MUTUAL LIFE INSURANCE CO',\n", " 'AMERICAN FEDERATION OF TEACHERS',\n", " \"ST.PETER'S EPISCOPAL CHURCH\",\n", " 'RANDOLPH-BROOKS FCU',\n", " 'UNIVERSITY OF MASS',\n", " 'EQT CORP.',\n", " 'HAMILTON CITY SD',\n", " 'NAPER ENTERPRISES',\n", " 'NEW MEXICO ORTHOPAEDICS',\n", " 'BAYVIEW LOAN SERVICING',\n", " 'PICASSO TILE',\n", " 'TERRY ROBERTS CONSULTING INC',\n", " 'CONFLUENCE DISTRIBUTION INC.',\n", " 'HBSPECIALTY FOODS',\n", " 'AVONWORTH',\n", " 'ASSOCIATED UNIVERSIT',\n", " 'FAUSTOLLEAN',\n", " 'AVMED',\n", " 'EJME',\n", " 'SUPERIOR AIR GROUND AMBLANCE',\n", " 'UBER(RIDESHARE OPERATOR)',\n", " 'MAGIC TOUCH PAINTING',\n", " 'CITY OF PHOENIX',\n", " 'GRANDVIEW RADIOLOGY',\n", " 'LUNDEBERG SCHOOL OF SEAMANSHIP',\n", " 'SCHEEN&SMITH PSC',\n", " 'LIBERAIL KENWORK',\n", " 'DUKE ENERGY OHIO, INC.',\n", " 'CITY OF HUNTINGTON WOODS',\n", " 'SPIRIT PHARMACEUTICALS LLC',\n", " 'WILEY,WILSON, INC.',\n", " 'MOUNT SINAI WEST',\n", " 'THE METHODIST HOSPITAL',\n", " 'PRIMERA ENGINEERS',\n", " 'TOUR-SARKISSIAN LAW OFFICES LLP',\n", " 'SIBCY CLINE',\n", " 'C.G. REIN DEVELOPMENT CO.',\n", " 'LAKE TRUCKING CO.',\n", " 'POPE, HARDWICKE',\n", " 'AEROSPACE CORPORATION',\n", " 'INNOVATIVE THERAPY CONCEPTS INC.',\n", " 'ASSOC RADIOLOSISTS',\n", " 'RADIANT REFINING',\n", " 'CAMPO SANTO PRODUCTIONS LLC',\n", " 'KANSAS CITY BALLET',\n", " 'NATIONALITIES SERVICE CENTER',\n", " 'AIRSWIFT',\n", " 'NEW HARVEST MINISTRIES INC.',\n", " 'EASTCHESTER FIRE DISTRICT',\n", " 'THERMOSEAL',\n", " 'ADVANTEDGE',\n", " 'NC DEPT. OF PUBLIC SAFETY',\n", " 'ACCUSTAR',\n", " 'EXECUTIVE ENERGY MANAGEMENT, LLC',\n", " 'GPG',\n", " 'IMEX MEDIA',\n", " 'NTP',\n", " 'SP MANAGEMENT',\n", " 'BROWN CAPITAL MANAGEMENT',\n", " 'CIGNA DENTAL HEALTH, INC.',\n", " 'CSI COMPANIES',\n", " 'OHIO EQUITIES INC.',\n", " 'THE RUBY BRINK',\n", " 'MVWSD',\n", " 'HEALTH CARE SERVICE CORP',\n", " 'GREAT PLAINS TECHNOLOGY CENTER',\n", " 'NEW TEACHER CENTER',\n", " 'ANYTIME PLUMBING INC',\n", " 'CALVO ENTERPRISES',\n", " 'ARCHDIOCESE OF NEWARK',\n", " 'UNIVERSITY OF DELAWARE THEATRE DEPARTM',\n", " 'GREEN HASSON JANKS',\n", " 'OAKLEIGH LTD.',\n", " 'UNIVERSITY OF BRISTOL',\n", " 'POLSINELLI',\n", " 'CHRISTIAN WORSHIP CENTER',\n", " 'BILL BRAVO AUTOMOTIVE PORTRAITS',\n", " 'JOHN DEERE FINANCIAL',\n", " 'CONDOMINIUM MGMT SVCS',\n", " 'SALTCHUK',\n", " 'JUST FOR SHOW INC.',\n", " 'OXFORD UNIVERSITY PRESS',\n", " 'CHARLES J GARRISON',\n", " 'LAWRENCE MEMORIALS HOSPITAL',\n", " 'JACKSON HEALTHCARE',\n", " 'SIERRA PACIFIC',\n", " 'NEW SOUTH RIVER BAPTIST ASSO',\n", " 'UNIVERSITY OF MAINE',\n", " 'ALPHA ELECTRIC CO',\n", " 'KEYIMPACT',\n", " 'IL. DEPT OF HUMAN SERVICES',\n", " 'PANJIVA',\n", " 'FACIAL PLASTIC SURGERY ASSOCIATES',\n", " 'GREEN MOUNTAIN TREATMENT CENTER',\n", " 'CINTERRA GROUP',\n", " 'NIWCC',\n", " 'SOLTAGE LLC',\n", " 'PEPSI COLA',\n", " 'RLA NATIONAL REHABILITATION CENTER',\n", " 'CARE HAWAII',\n", " 'IVAN & DAUGUSTINIS',\n", " 'ALLIANCE RADIOLOGY',\n", " 'UNIV. OF CALIFORNIA',\n", " 'PARKVIEW COMMUNITY HOSPITAL',\n", " 'SPORTS LEICHT RESTORATIONS INC',\n", " 'NONE RETIRED',\n", " 'RENVYLE PARTNERS',\n", " 'PORT APARTMENTS',\n", " 'ECD',\n", " 'MO. DMH DD',\n", " 'DECAHEALTH',\n", " 'NESS INC',\n", " 'NJ DEPARTMENT OF HEALTH',\n", " 'AV INC.',\n", " 'ALLIED UNIVERSAL SECURITY SERVICES',\n", " 'ONI RISK PARTNERS',\n", " 'GROVEPORT MADISON',\n", " 'CAMBREX CHARLES CITY INC.',\n", " 'GILROY UNIFIED SCHOOL DISTRICT',\n", " 'MJUSD',\n", " 'ILCJA&TP',\n", " '4J ENERGY LLC',\n", " 'HIGHLINE MEDICAL CENTERE',\n", " 'SHIELD RESTRATINTS',\n", " '8 MILE FARM',\n", " 'R DIXON SPEAS ASSOCIATES, INC.',\n", " 'RED HOT AHIR',\n", " 'UNITY',\n", " 'EAST BATON ROUGE PARISH SCHOOL',\n", " 'GP',\n", " 'CHOATE HALL & STEWART',\n", " 'THE PATRIOT FINANCIAL GROUP, LLC',\n", " 'ANALYSIS GROUP',\n", " 'MORRIS JAMES LLP',\n", " 'MORRIS TEAM REALTY, LLC',\n", " 'MAYER BROWN LLP',\n", " \"LABORERS' LOCAL 225\",\n", " 'FREEDOM MOBILITY',\n", " 'DOCTORS FOR EMERGENCY SERVICES',\n", " 'CUSTOM VAULT CORP',\n", " 'XDSI',\n", " 'YCSD',\n", " 'REHAB WITHOUT WALLS',\n", " ...}" ] }, "metadata": { "tags": [] }, "execution_count": 24 } ] }, { "cell_type": "code", "metadata": { "id": "UTIiuGxW938o", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "7ef9a292-4ce5-4a57-c2c1-73d96de700e9" }, "source": [ "len(set(df_newdup['EMPLOYER']))" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "65420" ] }, "metadata": { "tags": [] }, "execution_count": 13 } ] }, { "cell_type": "code", "metadata": { "id": "BTZ4nqIiaA55", "colab": { "base_uri": "https://localhost:8080/", "height": 102 }, "outputId": "7bf205a2-b579-4044-b982-c41ec5790f23" }, "source": [ "df_newdup.dropna(subset = [\"EMPLOYER\"], inplace=True)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " \"\"\"Entry point for launching an IPython kernel.\n" ], "name": "stderr" } ] }, { "cell_type": "code", "metadata": { "id": "yUEct7Y5XyPf", "colab": { "base_uri": "https://localhost:8080/", "height": 979 }, "outputId": "b2bd54a6-52f1-43d2-cb48-71dd48bd9c81" }, "source": [ "df_aero = df_newdup[df_newdup['EMPLOYER'].str.contains('AEROSPACE CORPORATION')]\n", "df_aero" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CMTE_IDNAMECITYSTATEZIP_CODEEMPLOYEROCCUPATIONTRANSACTION_DTTRANSACTION_AMT
136824C00703975DAVIS, LORRIELOS ANGELESCA900561529THE AEROSPACE CORPORATIONSENIOR PROJECT ENGINEER711202020
150573C00703975SIMPSON, MARK MLONG BEACHCA908083812THE AEROSPACE CORPORATIONENGINEER7242020200
150574C00703975SIMPSON, MARK MLONG BEACHCA908083812THE AEROSPACE CORPORATIONENGINEER7292020200
191693C00703975YOUNG, KAROLYNREDONDO BEACHCA9.02771e+08THE AEROSPACE CORPORATIONENGINEER7252020250
201669C00703975STUTTERHEIM, KENNETH B.PASADENAMD2.11223e+08THE AEROSPACE CORPORATIONENGINEERING SPECIALIST7162020250
246960C00703975JAGER, AMYINDIAN HARBOUR BEACHFL329373526THE AEROSPACE CORPORATIONENGINEER727202015
404391C00193433SIMPSON, MARK M. MR.LONG BEACHCA90808THE AEROSPACE CORPORATIONENGINEER7292020200
493316C00075820FARAGO, ZOLTAN L. MR.BROAD RUNVA2.01372e+08THE AEROSPACE CORPORATIONPROJECT ENGINEER723202010
541906C00000935ALVAREZ, MANUELSAN PEDROCA9.07311e+08AEROSPACE CORPORATIONENGINEER723202035
547077C00000935GUNAY, DEVINLOS ANGELESCA9.00347e+08THE AEROSPACE CORPORATIONSOFTWARE ENGINEER713202040
585671C00075820FARAGO, ZOLTAN L. MR.BROAD RUNVA2.01372e+08THE AEROSPACE CORPORATIONPROJECT ENGINEER704202025
625961C00075820CINLEMIS, MICHELLECOLORADO SPRINGSCO8.09114e+08THE AEROSPACE CORPORATIONSENIOR PROJECT LEADER7302020100
627176C00075820FARAGO, ZOLTAN L. MR.BROAD RUNVA2.01372e+08THE AEROSPACE CORPORATIONPROJECT ENGINEER726202010
920917C00010603MERRILL, ALBERT WVENICECA90291THE AEROSPACE CORPORATIONENGINEER726202025
922676C00010603ESLINGER, SUELLENREDONDO BEACHCA9.02782e+08THE AEROSPACE CORPORATIONENGINEER7262020300
934612C00010603MERRILL, ALBERT WVENICECA90291THE AEROSPACE CORPORATIONENGINEER731202025
936453C00010603MERRILL, ALBERT WVENICECA90291THE AEROSPACE CORPORATIONENGINEER731202025
938548C00010603MERRILL, ALBERT WVENICECA90291THE AEROSPACE CORPORATIONENGINEER726202012
946916C00010603BYERS, MARKSAN DIEGOCA9.21096e+08THE AEROSPACE CORPORATIONENGINEER712202065
947929C00010603FRICKS, KATHRYNGREENBELTMD2.07704e+08AEROSPACE CORPORATIONENGINEER7312020500
992812C00484642ALVAREZ, MANUELSAN PEDROCA907311416AEROSPACE CORPORATIONENGINEER724202050
1014099C00484642ALVAREZ, MANUELSAN PEDROCA907311416AEROSPACE CORPORATIONENGINEER715202075
1107728C00075820CINLEMIS, MICHELLECOLORADO SPRINGSCO809113801THE AEROSPACE CORPORATIONSENIOR PROJECT LEADER6302020100
1109284C00075820CINLEMIS, MICHELLECOLORADO SPRINGSCO809113801THE AEROSPACE CORPORATIONSENIOR PROJECT LEADER6302020100
1157904C00003418BAUER, SPENCER J. MR.EL SEGUNDOCA902453728AEROSPACE CORPORATIONDIRECTOR630202050
1213364C00696526HOLLANDER, SIDNEYGLENDALEAZ853180038AEROSPACE CORPORATIONENGINEER7092020250
1254043C00401224WHITE, RUSSELLFAIRFAXVA220305208THE AEROSPACE CORPORATIONSCIENTIST6302020100
1363622C00126847SMITH, DARLENECHARLESTOWNRI02813KAMAN AEROSPACE CORPORATIONVP GM AIR VEHICLES6302020100
1460070C00694323CINLEMIS, MICHELLECOLORADO SPRINGSCO809113801THE AEROSPACE CORPORATIONSENIOR PROJECT LEADER6302020100
1507410C00694323CINLEMIS, MICHELLECOLORADO SPRINGSCO809113801THE AEROSPACE CORPORATIONSENIOR PROJECT LEADER6302020100
\n", "
" ], "text/plain": [ " CMTE_ID NAME ... TRANSACTION_DT TRANSACTION_AMT\n", "136824 C00703975 DAVIS, LORRIE ... 7112020 20\n", "150573 C00703975 SIMPSON, MARK M ... 7242020 200\n", "150574 C00703975 SIMPSON, MARK M ... 7292020 200\n", "191693 C00703975 YOUNG, KAROLYN ... 7252020 250\n", "201669 C00703975 STUTTERHEIM, KENNETH B. ... 7162020 250\n", "246960 C00703975 JAGER, AMY ... 7272020 15\n", "404391 C00193433 SIMPSON, MARK M. MR. ... 7292020 200\n", "493316 C00075820 FARAGO, ZOLTAN L. MR. ... 7232020 10\n", "541906 C00000935 ALVAREZ, MANUEL ... 7232020 35\n", "547077 C00000935 GUNAY, DEVIN ... 7132020 40\n", "585671 C00075820 FARAGO, ZOLTAN L. MR. ... 7042020 25\n", "625961 C00075820 CINLEMIS, MICHELLE ... 7302020 100\n", "627176 C00075820 FARAGO, ZOLTAN L. MR. ... 7262020 10\n", "920917 C00010603 MERRILL, ALBERT W ... 7262020 25\n", "922676 C00010603 ESLINGER, SUELLEN ... 7262020 300\n", "934612 C00010603 MERRILL, ALBERT W ... 7312020 25\n", "936453 C00010603 MERRILL, ALBERT W ... 7312020 25\n", "938548 C00010603 MERRILL, ALBERT W ... 7262020 12\n", "946916 C00010603 BYERS, MARK ... 7122020 65\n", "947929 C00010603 FRICKS, KATHRYN ... 7312020 500\n", "992812 C00484642 ALVAREZ, MANUEL ... 7242020 50\n", "1014099 C00484642 ALVAREZ, MANUEL ... 7152020 75\n", "1107728 C00075820 CINLEMIS, MICHELLE ... 6302020 100\n", "1109284 C00075820 CINLEMIS, MICHELLE ... 6302020 100\n", "1157904 C00003418 BAUER, SPENCER J. MR. ... 6302020 50\n", "1213364 C00696526 HOLLANDER, SIDNEY ... 7092020 250\n", "1254043 C00401224 WHITE, RUSSELL ... 6302020 100\n", "1363622 C00126847 SMITH, DARLENE ... 6302020 100\n", "1460070 C00694323 CINLEMIS, MICHELLE ... 6302020 100\n", "1507410 C00694323 CINLEMIS, MICHELLE ... 6302020 100\n", "\n", "[30 rows x 9 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 34 } ] }, { "cell_type": "code", "metadata": { "id": "n1Em1PItYDEH", "colab": { "base_uri": "https://localhost:8080/", "height": 758 }, "outputId": "6fc5d917-59ed-4b2b-8b0e-aebf68ebcfb9" }, "source": [ "df_aero_merge = pd.merge(df_bob, df_merge, on='CMTE_ID')\n", "df_aero_merge" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CMTE_IDNAMECITYSTATEZIP_CODEEMPLOYEROCCUPATIONTRANSACTION_DTTRANSACTION_AMTCAND_IDCAND_PTY_AFFILIATIONCAND_ELECTION_YRFEC_ELECTION_YRCMTE_TPCMTE_DSGNLINKAGE_ID
0C00703975DAVIS, LORRIELOS ANGELESCA900561529THE AEROSPACE CORPORATIONSENIOR PROJECT ENGINEER711202020P80000722DEM20202020PP227491
1C00703975SIMPSON, MARK MLONG BEACHCA908083812THE AEROSPACE CORPORATIONENGINEER7242020200P80000722DEM20202020PP227491
2C00703975SIMPSON, MARK MLONG BEACHCA908083812THE AEROSPACE CORPORATIONENGINEER7292020200P80000722DEM20202020PP227491
3C00703975YOUNG, KAROLYNREDONDO BEACHCA9.02771e+08THE AEROSPACE CORPORATIONENGINEER7252020250P80000722DEM20202020PP227491
4C00703975STUTTERHEIM, KENNETH B.PASADENAMD2.11223e+08THE AEROSPACE CORPORATIONENGINEERING SPECIALIST7162020250P80000722DEM20202020PP227491
5C00703975JAGER, AMYINDIAN HARBOUR BEACHFL329373526THE AEROSPACE CORPORATIONENGINEER727202015P80000722DEM20202020PP227491
6C00075820FARAGO, ZOLTAN L. MR.BROAD RUNVA2.01372e+08THE AEROSPACE CORPORATIONPROJECT ENGINEER723202010H0NY27090REP20202020YU232064
7C00075820FARAGO, ZOLTAN L. MR.BROAD RUNVA2.01372e+08THE AEROSPACE CORPORATIONPROJECT ENGINEER704202025H0NY27090REP20202020YU232064
8C00075820CINLEMIS, MICHELLECOLORADO SPRINGSCO8.09114e+08THE AEROSPACE CORPORATIONSENIOR PROJECT LEADER7302020100H0NY27090REP20202020YU232064
9C00075820FARAGO, ZOLTAN L. MR.BROAD RUNVA2.01372e+08THE AEROSPACE CORPORATIONPROJECT ENGINEER726202010H0NY27090REP20202020YU232064
10C00075820CINLEMIS, MICHELLECOLORADO SPRINGSCO809113801THE AEROSPACE CORPORATIONSENIOR PROJECT LEADER6302020100H0NY27090REP20202020YU232064
11C00075820CINLEMIS, MICHELLECOLORADO SPRINGSCO809113801THE AEROSPACE CORPORATIONSENIOR PROJECT LEADER6302020100H0NY27090REP20202020YU232064
12C00696526HOLLANDER, SIDNEYGLENDALEAZ853180038AEROSPACE CORPORATIONENGINEER7092020250S0AZ00350DEM20202020SP225862
\n", "
" ], "text/plain": [ " CMTE_ID NAME ... CMTE_DSGN LINKAGE_ID\n", "0 C00703975 DAVIS, LORRIE ... P 227491\n", "1 C00703975 SIMPSON, MARK M ... P 227491\n", "2 C00703975 SIMPSON, MARK M ... P 227491\n", "3 C00703975 YOUNG, KAROLYN ... P 227491\n", "4 C00703975 STUTTERHEIM, KENNETH B. ... P 227491\n", "5 C00703975 JAGER, AMY ... P 227491\n", "6 C00075820 FARAGO, ZOLTAN L. MR. ... U 232064\n", "7 C00075820 FARAGO, ZOLTAN L. MR. ... U 232064\n", "8 C00075820 CINLEMIS, MICHELLE ... U 232064\n", "9 C00075820 FARAGO, ZOLTAN L. MR. ... U 232064\n", "10 C00075820 CINLEMIS, MICHELLE ... U 232064\n", "11 C00075820 CINLEMIS, MICHELLE ... U 232064\n", "12 C00696526 HOLLANDER, SIDNEY ... P 225862\n", "\n", "[13 rows x 16 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 35 } ] }, { "cell_type": "code", "metadata": { "id": "4qQlV6tnlega" }, "source": [ "CD = ['CA-37', 'CA-47', 'CA-47', 'CA-33', 'MD-03', 'FL-08', 'VA-05', 'VA-05', 'CO-05', 'VA-05', 'CO-05', 'CO-05', 'AZ-07']" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "PPqAAylhx0mj" }, "source": [ "df_aero_merge['CD'] = CD " ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Ls7gegLCyafO" }, "source": [ "df_aero_merge = df_aero_merge.drop(columns=['CAND_ELECTION_YR', 'FEC_ELECTION_YR', 'CMTE_TP', 'CMTE_DSGN'])" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Sn-H_tkYzBcc", "colab": { "base_uri": "https://localhost:8080/", "height": 673 }, "outputId": "b9c7ea40-2dcf-4b44-a00b-f4b7caf32df4" }, "source": [ "df_aero_final = pd.merge(df_aero_merge, \n", " trends, \n", " on ='CD', \n", " how ='inner') \n", "df_aero_final" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CMTE_IDNAMECITYSTATEZIP_CODEEMPLOYEROCCUPATIONTRANSACTION_DTTRANSACTION_AMTCAND_IDCAND_PTY_AFFILIATIONLINKAGE_IDCDParty
0C00703975DAVIS, LORRIELOS ANGELESCA900561529THE AEROSPACE CORPORATIONSENIOR PROJECT ENGINEER711202020P80000722DEM227491CA-37(D)
1C00703975SIMPSON, MARK MLONG BEACHCA908083812THE AEROSPACE CORPORATIONENGINEER7242020200P80000722DEM227491CA-47(D)
2C00703975SIMPSON, MARK MLONG BEACHCA908083812THE AEROSPACE CORPORATIONENGINEER7292020200P80000722DEM227491CA-47(D)
3C00703975YOUNG, KAROLYNREDONDO BEACHCA9.02771e+08THE AEROSPACE CORPORATIONENGINEER7252020250P80000722DEM227491CA-33(D)
4C00703975STUTTERHEIM, KENNETH B.PASADENAMD2.11223e+08THE AEROSPACE CORPORATIONENGINEERING SPECIALIST7162020250P80000722DEM227491MD-03(D)
5C00703975JAGER, AMYINDIAN HARBOUR BEACHFL329373526THE AEROSPACE CORPORATIONENGINEER727202015P80000722DEM227491FL-08(R)
6C00075820FARAGO, ZOLTAN L. MR.BROAD RUNVA2.01372e+08THE AEROSPACE CORPORATIONPROJECT ENGINEER723202010H0NY27090REP232064VA-05(R)
7C00075820FARAGO, ZOLTAN L. MR.BROAD RUNVA2.01372e+08THE AEROSPACE CORPORATIONPROJECT ENGINEER704202025H0NY27090REP232064VA-05(R)
8C00075820FARAGO, ZOLTAN L. MR.BROAD RUNVA2.01372e+08THE AEROSPACE CORPORATIONPROJECT ENGINEER726202010H0NY27090REP232064VA-05(R)
9C00075820CINLEMIS, MICHELLECOLORADO SPRINGSCO8.09114e+08THE AEROSPACE CORPORATIONSENIOR PROJECT LEADER7302020100H0NY27090REP232064CO-05(R)
10C00075820CINLEMIS, MICHELLECOLORADO SPRINGSCO809113801THE AEROSPACE CORPORATIONSENIOR PROJECT LEADER6302020100H0NY27090REP232064CO-05(R)
11C00075820CINLEMIS, MICHELLECOLORADO SPRINGSCO809113801THE AEROSPACE CORPORATIONSENIOR PROJECT LEADER6302020100H0NY27090REP232064CO-05(R)
12C00696526HOLLANDER, SIDNEYGLENDALEAZ853180038AEROSPACE CORPORATIONENGINEER7092020250S0AZ00350DEM225862AZ-07(D)
\n", "
" ], "text/plain": [ " CMTE_ID NAME ... CD Party\n", "0 C00703975 DAVIS, LORRIE ... CA-37 (D)\n", "1 C00703975 SIMPSON, MARK M ... CA-47 (D)\n", "2 C00703975 SIMPSON, MARK M ... CA-47 (D)\n", "3 C00703975 YOUNG, KAROLYN ... CA-33 (D)\n", "4 C00703975 STUTTERHEIM, KENNETH B. ... MD-03 (D)\n", "5 C00703975 JAGER, AMY ... FL-08 (R)\n", "6 C00075820 FARAGO, ZOLTAN L. MR. ... VA-05 (R)\n", "7 C00075820 FARAGO, ZOLTAN L. MR. ... VA-05 (R)\n", "8 C00075820 FARAGO, ZOLTAN L. MR. ... VA-05 (R)\n", "9 C00075820 CINLEMIS, MICHELLE ... CO-05 (R)\n", "10 C00075820 CINLEMIS, MICHELLE ... CO-05 (R)\n", "11 C00075820 CINLEMIS, MICHELLE ... CO-05 (R)\n", "12 C00696526 HOLLANDER, SIDNEY ... AZ-07 (D)\n", "\n", "[13 rows x 14 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 42 } ] }, { "cell_type": "code", "metadata": { "id": "R1wEhe8Rzaa_" }, "source": [ "df_aero_final['INDEX']= [1 if x =='DEM' else 0 for x in df_aero_final['CAND_PTY_AFFILIATION']] " ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Ix6G1cxzzuUA" }, "source": [ "df_aero_final['INDEX_BOSS']=1" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "dQqF_pwqldjH", "colab": { "base_uri": "https://localhost:8080/", "height": 673 }, "outputId": "4a60d88a-2393-487c-f12e-9f420683be30" }, "source": [ "df_aero_final\n" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CMTE_IDNAMECITYSTATEZIP_CODEEMPLOYEROCCUPATIONTRANSACTION_DTTRANSACTION_AMTCAND_IDCAND_PTY_AFFILIATIONLINKAGE_IDCDPartyINDEXINDEX_BOSS
0C00703975DAVIS, LORRIELOS ANGELESCA900561529THE AEROSPACE CORPORATIONSENIOR PROJECT ENGINEER711202020P80000722DEM227491CA-37(D)11
1C00703975SIMPSON, MARK MLONG BEACHCA908083812THE AEROSPACE CORPORATIONENGINEER7242020200P80000722DEM227491CA-47(D)11
2C00703975SIMPSON, MARK MLONG BEACHCA908083812THE AEROSPACE CORPORATIONENGINEER7292020200P80000722DEM227491CA-47(D)11
3C00703975YOUNG, KAROLYNREDONDO BEACHCA9.02771e+08THE AEROSPACE CORPORATIONENGINEER7252020250P80000722DEM227491CA-33(D)11
4C00703975STUTTERHEIM, KENNETH B.PASADENAMD2.11223e+08THE AEROSPACE CORPORATIONENGINEERING SPECIALIST7162020250P80000722DEM227491MD-03(D)11
5C00703975JAGER, AMYINDIAN HARBOUR BEACHFL329373526THE AEROSPACE CORPORATIONENGINEER727202015P80000722DEM227491FL-08(R)11
6C00075820FARAGO, ZOLTAN L. MR.BROAD RUNVA2.01372e+08THE AEROSPACE CORPORATIONPROJECT ENGINEER723202010H0NY27090REP232064VA-05(R)01
7C00075820FARAGO, ZOLTAN L. MR.BROAD RUNVA2.01372e+08THE AEROSPACE CORPORATIONPROJECT ENGINEER704202025H0NY27090REP232064VA-05(R)01
8C00075820FARAGO, ZOLTAN L. MR.BROAD RUNVA2.01372e+08THE AEROSPACE CORPORATIONPROJECT ENGINEER726202010H0NY27090REP232064VA-05(R)01
9C00075820CINLEMIS, MICHELLECOLORADO SPRINGSCO8.09114e+08THE AEROSPACE CORPORATIONSENIOR PROJECT LEADER7302020100H0NY27090REP232064CO-05(R)01
10C00075820CINLEMIS, MICHELLECOLORADO SPRINGSCO809113801THE AEROSPACE CORPORATIONSENIOR PROJECT LEADER6302020100H0NY27090REP232064CO-05(R)01
11C00075820CINLEMIS, MICHELLECOLORADO SPRINGSCO809113801THE AEROSPACE CORPORATIONSENIOR PROJECT LEADER6302020100H0NY27090REP232064CO-05(R)01
12C00696526HOLLANDER, SIDNEYGLENDALEAZ853180038AEROSPACE CORPORATIONENGINEER7092020250S0AZ00350DEM225862AZ-07(D)11
\n", "
" ], "text/plain": [ " CMTE_ID NAME ... INDEX INDEX_BOSS\n", "0 C00703975 DAVIS, LORRIE ... 1 1\n", "1 C00703975 SIMPSON, MARK M ... 1 1\n", "2 C00703975 SIMPSON, MARK M ... 1 1\n", "3 C00703975 YOUNG, KAROLYN ... 1 1\n", "4 C00703975 STUTTERHEIM, KENNETH B. ... 1 1\n", "5 C00703975 JAGER, AMY ... 1 1\n", "6 C00075820 FARAGO, ZOLTAN L. MR. ... 0 1\n", "7 C00075820 FARAGO, ZOLTAN L. MR. ... 0 1\n", "8 C00075820 FARAGO, ZOLTAN L. MR. ... 0 1\n", "9 C00075820 CINLEMIS, MICHELLE ... 0 1\n", "10 C00075820 CINLEMIS, MICHELLE ... 0 1\n", "11 C00075820 CINLEMIS, MICHELLE ... 0 1\n", "12 C00696526 HOLLANDER, SIDNEY ... 1 1\n", "\n", "[13 rows x 16 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 45 } ] }, { "cell_type": "code", "metadata": { "id": "JZdHyMWo0Pbl" }, "source": [ "subset2 = df_aero_final[['INDEX','INDEX_BOSS']]" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "4-m6o5ek0Pup" }, "source": [ "from sklearn.linear_model import LinearRegression" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "UQeN6gFQ0CoX", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "b5f639bc-9878-4fd2-bfec-445ecc3aba55" }, "source": [ "linear_regressor = LinearRegression()\n", "from sklearn.preprocessing import MinMaxScaler\n", "scaler1 = MinMaxScaler()\n", "scaler1.fit(subset2)\n", "inner_join_scaled=scaler1.transform(subset2)\n", "\n", "x = inner_join_scaled[:,0].reshape(-1,1)\n", "y = inner_join_scaled[:,1].reshape(-1,1)\n", "\n", "linear_regressor.fit(x, y)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)" ] }, "metadata": { "tags": [] }, "execution_count": 48 } ] }, { "cell_type": "code", "metadata": { "id": "kdgNzW9Q0k7v" }, "source": [ "" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "vqZW4AL43o0r", "colab": { "base_uri": "https://localhost:8080/", "height": 419 }, "outputId": "76681360-3984-4485-f175-2fa4f4713437" }, "source": [ "df4 = df_newdup[df_newdup['EMPLOYER'].str.contains('AT&T')]\n", "df4" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CMTE_IDNAMECITYSTATEZIP_CODEEMPLOYEROCCUPATIONTRANSACTION_DTTRANSACTION_AMT
13062C00703975FAVARA, RICHARDFREEHOLDNJ7.72843e+07AT&TSALES708202025
13063C00703975FAVARA, RICHARDFREEHOLDNJ7.72843e+07AT&TSALES714202017
13064C00703975FAVARA, RICHARDFREEHOLDNJ7.72843e+07AT&TSALES719202017
13622C00703975EMERSON, TERRYDALLASTX7.52242e+08AT&TPROJECT MANAGER70120205
13623C00703975EMERSON, TERRYDALLASTX7.52242e+08AT&TPROJECT MANAGER716202021
..............................
1583204C00694323HERNANDEZ, JOESAN BRUNOCA940661112AT&TSPLICING TECHNICIAN630202010
1587874C00694323HERNANDEZ, JOESAN BRUNOCA940661112AT&TSPLICING TECHNICIAN630202020
1595694C00694323HERNANDEZ, JOESAN BRUNOCA940661112AT&TSPLICING TECHNICIAN630202010
1600117C00694323HERNANDEZ, JOESAN BRUNOCA940661112AT&TSPLICING TECHNICIAN630202025
1603559C00694323ORTIZ, LISARIVERSIDECA925035708AT&TPM630202035
\n", "

4226 rows × 9 columns

\n", "
" ], "text/plain": [ " CMTE_ID NAME ... TRANSACTION_DT TRANSACTION_AMT\n", "13062 C00703975 FAVARA, RICHARD ... 7082020 25\n", "13063 C00703975 FAVARA, RICHARD ... 7142020 17\n", "13064 C00703975 FAVARA, RICHARD ... 7192020 17\n", "13622 C00703975 EMERSON, TERRY ... 7012020 5\n", "13623 C00703975 EMERSON, TERRY ... 7162020 21\n", "... ... ... ... ... ...\n", "1583204 C00694323 HERNANDEZ, JOE ... 6302020 10\n", "1587874 C00694323 HERNANDEZ, JOE ... 6302020 20\n", "1595694 C00694323 HERNANDEZ, JOE ... 6302020 10\n", "1600117 C00694323 HERNANDEZ, JOE ... 6302020 25\n", "1603559 C00694323 ORTIZ, LISA ... 6302020 35\n", "\n", "[4226 rows x 9 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 16 } ] }, { "cell_type": "code", "metadata": { "id": "YvlPckei7uEA", "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "outputId": "757de2e5-e546-4bd7-d851-5a318d8008a0" }, "source": [ "df5 = pd.merge(df4, df_merge, on='CMTE_ID')\n", "df5.tail()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CMTE_IDNAMECITYSTATEZIP_CODEEMPLOYEROCCUPATIONTRANSACTION_DTTRANSACTION_AMTCAND_IDCAND_PTY_AFFILIATIONCAND_ELECTION_YRFEC_ELECTION_YRCMTE_TPCMTE_DSGNLINKAGE_ID
243C00711549COLLINS, RICKLAKEWOODWA98498AT&TSALES CONSULTANT6302020100S0KY00339DEM20202020SP228669
244C00711549NURSE, CHRISROCKVILLEMD20850AT&TMANAGER6302020500S0KY00339DEM20202020SP228669
245C00666040HERNANDEZ, JOESAN BRUNOCA940661112AT&TSPLICING TECHNICIAN630202010S8AZ00221REP20202020SP224208
246C00736876BENTON, WANDETTADULUTHGA300978117AT&TNETWORK TECH630202025S0GA00559DEM20202020SP231982
247C00736876BENTON, WANDETTADULUTHGA300978117AT&TNETWORK TECH630202025S0GA00559DEM20202020SP231982
\n", "
" ], "text/plain": [ " CMTE_ID NAME CITY ... CMTE_TP CMTE_DSGN LINKAGE_ID\n", "243 C00711549 COLLINS, RICK LAKEWOOD ... S P 228669\n", "244 C00711549 NURSE, CHRIS ROCKVILLE ... S P 228669\n", "245 C00666040 HERNANDEZ, JOE SAN BRUNO ... S P 224208\n", "246 C00736876 BENTON, WANDETTA DULUTH ... S P 231982\n", "247 C00736876 BENTON, WANDETTA DULUTH ... S P 231982\n", "\n", "[5 rows x 16 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 19 } ] }, { "cell_type": "code", "metadata": { "id": "ptU7ClSQEgW3", "colab": { "base_uri": "https://localhost:8080/", "height": 402 }, "outputId": "add4cc5d-fe11-4bd7-ecab-c93ceb212d1a" }, "source": [ "df_biogen = df_newdup[df_newdup['EMPLOYER'].str.contains('BIOGEN')]\n", "df_biogen" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CMTE_IDNAMECITYSTATEZIP_CODEEMPLOYEROCCUPATIONTRANSACTION_DTTRANSACTION_AMT
71874C00703975MARX, ISAACARLINGTONMA2.47438e+07BIOGENCHEMIST7182020250
125267C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY729202050
125819C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY715202050
125820C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY722202050
128132C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY708202050
..............................
1576084C00694323CHECKAN, RICHARDFUQUAY VARINANC275267624BIOGENENGINEERING63020205
1582011C00694323CHECKAN, RICHARDFUQUAY VARINANC275267624BIOGENENGINEERING63020201
1591754C00694323CHECKAN, RICHARDFUQUAY VARINANC275267624BIOGENENGINEERING63020201
1600172C00694323CHECKAN, RICHARDFUQUAY VARINANC275267624BIOGENENGINEERING63020201
1603569C00694323CHECKAN, RICHARDFUQUAY VARINANC275267624BIOGENENGINEERING63020201
\n", "

98 rows × 9 columns

\n", "
" ], "text/plain": [ " CMTE_ID NAME ... TRANSACTION_DT TRANSACTION_AMT\n", "71874 C00703975 MARX, ISAAC ... 7182020 250\n", "125267 C00703975 EDMONDSON, FRAZOR ... 7292020 50\n", "125819 C00703975 EDMONDSON, FRAZOR ... 7152020 50\n", "125820 C00703975 EDMONDSON, FRAZOR ... 7222020 50\n", "128132 C00703975 EDMONDSON, FRAZOR ... 7082020 50\n", "... ... ... ... ... ...\n", "1576084 C00694323 CHECKAN, RICHARD ... 6302020 5\n", "1582011 C00694323 CHECKAN, RICHARD ... 6302020 1\n", "1591754 C00694323 CHECKAN, RICHARD ... 6302020 1\n", "1600172 C00694323 CHECKAN, RICHARD ... 6302020 1\n", "1603569 C00694323 CHECKAN, RICHARD ... 6302020 1\n", "\n", "[98 rows x 9 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 21 } ] }, { "cell_type": "code", "metadata": { "id": "gZz7kaXIEfwZ", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "18a45932-d5ca-4591-824b-41403a07cdf3" }, "source": [ "df6 = pd.merge(df_biogen, df_merge, on='CMTE_ID')\n", "df6" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CMTE_IDNAMECITYSTATEZIP_CODEEMPLOYEROCCUPATIONTRANSACTION_DTTRANSACTION_AMTCAND_IDCAND_PTY_AFFILIATIONCAND_ELECTION_YRFEC_ELECTION_YRCMTE_TPCMTE_DSGNLINKAGE_ID
0C00703975MARX, ISAACARLINGTONMA2.47438e+07BIOGENCHEMIST7182020250P80000722DEM20202020PP227491
1C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY729202050P80000722DEM20202020PP227491
2C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY715202050P80000722DEM20202020PP227491
3C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY722202050P80000722DEM20202020PP227491
4C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY708202050P80000722DEM20202020PP227491
5C00703975DILLEY, ANNEARLINGTONMA2.47648e+07BIOGENEPIDEMIOLOGIST705202050P80000722DEM20202020PP227491
6C00703975SMIRNAKIS, KARENWESTONMA24931439BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY730202050P80000722DEM20202020PP227491
7C00703975SMIRNAKIS, KARENWESTONMA24931439BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY718202050P80000722DEM20202020PP227491
8C00703975SMIRNAKIS, KARENWESTONMA24931439BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY725202050P80000722DEM20202020PP227491
9C00703975VANDER STOEP, STEPHENBOSTONMA2.12925e+07BIOGENATTORNEY7172020100P80000722DEM20202020PP227491
10C00703975THOMAS, DONNAMEMPHISTN3.81155e+08PMC BIOGENIX INC.CUSTOMER SERVICE717202020P80000722DEM20202020PP227491
11C00703975SMIRNAKIS, KARENWESTONMA2.49314e+07BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY704202050P80000722DEM20202020PP227491
12C00703975SMIRNAKIS, KARENWESTONMA2.49314e+07BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY711202050P80000722DEM20202020PP227491
13C00703975LYKINS, JIMDUPONTWA9.83277e+08BIOGENSALES728202050P80000722DEM20202020PP227491
14C00703975MALDONADO, REBECCASAN ANTONIOTX782491598BIOGENSALES7032020150P80000722DEM20202020PP227491
15C00703975HOWE, MICHAELCANTONMA2.02116e+07BIOGENATTORNEY7162020100P80000722DEM20202020PP227491
16C00703975SMIRNAKIS, KARENWESTONMA2.49314e+07BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY630202050P80000722DEM20202020PP227491
17C00703975THOMAS, DONNAMEMPHISTN3.81155e+08PMC BIOGENIX INC.CUSTOMER SERVICE630202015P80000722DEM20202020PP227491
18C00727149GATES, CYNTHIAROSLINDALEMA2131BIOGENREGULATORY MEDICAL WRITER8052020100H0MA08045DEM20202020HP230605
19C00727149GATES, CYNTHIAROSLINDALEMA02131BIOGENREGULATORY MEDICAL WRITER6302020100H0MA08045DEM20202020HP230605
20C00745687GRIFFITH, LISACAMBRIDGEMA021394369BIOGENMARKETING7132020500H0MA04267DEM20202020HP233009
21C00196774LOVEDAY, KENNETH S.BROOKLINEMA024465827BIOGENBIOLOGIST8042020250S4MA00028DEM20202020SP222822
22C00666149LOVEDAY, KENNETH SBROOKLINEMA024465827BIOGEN INCDIRECTOR6302020500H8NM02248DEM20202020HP223821
23C00500843FLANNELLY-KING, SHANESOMERVILLEMA2.14421e+07BIOGEN IDECBUSINESS ANALYST6302020250S2MA00170DEM20242020SP222817
24C00649376NEWLAND, BART G.BELMONTMA2.4784e+07BIOGEN INCATTORNEY6302020100H8GA07201DEM20202020HP224868
25C00701599SEGAL, KATEBATTLE CREEKMI4.90159e+08BIOGENGOVERNMENT AFFAIRS6302020500H0MI06152DEM20202020HP227095
\n", "
" ], "text/plain": [ " CMTE_ID NAME ... CMTE_DSGN LINKAGE_ID\n", "0 C00703975 MARX, ISAAC ... P 227491\n", "1 C00703975 EDMONDSON, FRAZOR ... P 227491\n", "2 C00703975 EDMONDSON, FRAZOR ... P 227491\n", "3 C00703975 EDMONDSON, FRAZOR ... P 227491\n", "4 C00703975 EDMONDSON, FRAZOR ... P 227491\n", "5 C00703975 DILLEY, ANNE ... P 227491\n", "6 C00703975 SMIRNAKIS, KAREN ... P 227491\n", "7 C00703975 SMIRNAKIS, KAREN ... P 227491\n", "8 C00703975 SMIRNAKIS, KAREN ... P 227491\n", "9 C00703975 VANDER STOEP, STEPHEN ... P 227491\n", "10 C00703975 THOMAS, DONNA ... P 227491\n", "11 C00703975 SMIRNAKIS, KAREN ... P 227491\n", "12 C00703975 SMIRNAKIS, KAREN ... P 227491\n", "13 C00703975 LYKINS, JIM ... P 227491\n", "14 C00703975 MALDONADO, REBECCA ... P 227491\n", "15 C00703975 HOWE, MICHAEL ... P 227491\n", "16 C00703975 SMIRNAKIS, KAREN ... P 227491\n", "17 C00703975 THOMAS, DONNA ... P 227491\n", "18 C00727149 GATES, CYNTHIA ... P 230605\n", "19 C00727149 GATES, CYNTHIA ... P 230605\n", "20 C00745687 GRIFFITH, LISA ... P 233009\n", "21 C00196774 LOVEDAY, KENNETH S. ... P 222822\n", "22 C00666149 LOVEDAY, KENNETH S ... P 223821\n", "23 C00500843 FLANNELLY-KING, SHANE ... P 222817\n", "24 C00649376 NEWLAND, BART G. ... P 224868\n", "25 C00701599 SEGAL, KATE ... P 227095\n", "\n", "[26 rows x 16 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 22 } ] }, { "cell_type": "code", "metadata": { "id": "tYTdUPrlFKJn", "colab": { "base_uri": "https://localhost:8080/", "height": 50 }, "outputId": "d5fbed25-b24a-452c-8a0f-caa683c07d94" }, "source": [ "df6[df6['OCCUPATION'].str.contains('DIRECTOR')]['TRANSACTION_DT']" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "22 6302020\n", "Name: TRANSACTION_DT, dtype: int64" ] }, "metadata": { "tags": [] }, "execution_count": 31 } ] }, { "cell_type": "code", "metadata": { "id": "4nct8sKi6ZRo", "colab": { "base_uri": "https://localhost:8080/", "height": 134 }, "outputId": "531fd6c2-e159-4311-98b3-a6c5766583dc" }, "source": [ "df6[df6['OCCUPATION'].str.contains('VP')]['TRANSACTION_DT']" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "6 7302020\n", "7 7182020\n", "8 7252020\n", "11 7042020\n", "12 7112020\n", "16 6302020\n", "Name: TRANSACTION_DT, dtype: int64" ] }, "metadata": { "tags": [] }, "execution_count": 32 } ] }, { "cell_type": "code", "metadata": { "id": "TM9ctAKSHBGV" }, "source": [ "CD = ['MA-05', 'MA-03', 'MA-03', 'MA-03', 'MA-03', 'MA-05', 'MA-05', 'MA-05', 'MA-05', 'MA-07', 'TN-09', 'MA-05', 'MA-05', 'WA-10', 'TX-20', 'MA-08', 'MA-05', 'TN-09', 'MA-07', 'MA-07', 'MA-05', 'MA-04', 'MA-04', 'MA-07', 'MA-05', 'MI-03']" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "uqfKk_S8Hgr_" }, "source": [ "df6['CD'] = CD" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "eJkDmYpJ6ZBr", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "deaf95cb-90c6-4a6e-86bd-6f9a89bd20e4" }, "source": [ "df7 = df6.drop(columns=['CAND_ELECTION_YR', 'FEC_ELECTION_YR', 'CMTE_TP', 'CMTE_DSGN'])\n", "df7" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CMTE_IDNAMECITYSTATEZIP_CODEEMPLOYEROCCUPATIONTRANSACTION_DTTRANSACTION_AMTCAND_IDCAND_PTY_AFFILIATIONLINKAGE_IDCD
0C00703975MARX, ISAACARLINGTONMA2.47438e+07BIOGENCHEMIST7182020250P80000722DEM227491MA-05
1C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY729202050P80000722DEM227491MA-03
2C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY715202050P80000722DEM227491MA-03
3C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY722202050P80000722DEM227491MA-03
4C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY708202050P80000722DEM227491MA-03
5C00703975DILLEY, ANNEARLINGTONMA2.47648e+07BIOGENEPIDEMIOLOGIST705202050P80000722DEM227491MA-05
6C00703975SMIRNAKIS, KARENWESTONMA24931439BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY730202050P80000722DEM227491MA-05
7C00703975SMIRNAKIS, KARENWESTONMA24931439BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY718202050P80000722DEM227491MA-05
8C00703975SMIRNAKIS, KARENWESTONMA24931439BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY725202050P80000722DEM227491MA-05
9C00703975VANDER STOEP, STEPHENBOSTONMA2.12925e+07BIOGENATTORNEY7172020100P80000722DEM227491MA-07
10C00703975THOMAS, DONNAMEMPHISTN3.81155e+08PMC BIOGENIX INC.CUSTOMER SERVICE717202020P80000722DEM227491TN-09
11C00703975SMIRNAKIS, KARENWESTONMA2.49314e+07BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY704202050P80000722DEM227491MA-05
12C00703975SMIRNAKIS, KARENWESTONMA2.49314e+07BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY711202050P80000722DEM227491MA-05
13C00703975LYKINS, JIMDUPONTWA9.83277e+08BIOGENSALES728202050P80000722DEM227491WA-10
14C00703975MALDONADO, REBECCASAN ANTONIOTX782491598BIOGENSALES7032020150P80000722DEM227491TX-20
15C00703975HOWE, MICHAELCANTONMA2.02116e+07BIOGENATTORNEY7162020100P80000722DEM227491MA-08
16C00703975SMIRNAKIS, KARENWESTONMA2.49314e+07BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY630202050P80000722DEM227491MA-05
17C00703975THOMAS, DONNAMEMPHISTN3.81155e+08PMC BIOGENIX INC.CUSTOMER SERVICE630202015P80000722DEM227491TN-09
18C00727149GATES, CYNTHIAROSLINDALEMA2131BIOGENREGULATORY MEDICAL WRITER8052020100H0MA08045DEM230605MA-07
19C00727149GATES, CYNTHIAROSLINDALEMA02131BIOGENREGULATORY MEDICAL WRITER6302020100H0MA08045DEM230605MA-07
20C00745687GRIFFITH, LISACAMBRIDGEMA021394369BIOGENMARKETING7132020500H0MA04267DEM233009MA-05
21C00196774LOVEDAY, KENNETH S.BROOKLINEMA024465827BIOGENBIOLOGIST8042020250S4MA00028DEM222822MA-04
22C00666149LOVEDAY, KENNETH SBROOKLINEMA024465827BIOGEN INCDIRECTOR6302020500H8NM02248DEM223821MA-04
23C00500843FLANNELLY-KING, SHANESOMERVILLEMA2.14421e+07BIOGEN IDECBUSINESS ANALYST6302020250S2MA00170DEM222817MA-07
24C00649376NEWLAND, BART G.BELMONTMA2.4784e+07BIOGEN INCATTORNEY6302020100H8GA07201DEM224868MA-05
25C00701599SEGAL, KATEBATTLE CREEKMI4.90159e+08BIOGENGOVERNMENT AFFAIRS6302020500H0MI06152DEM227095MI-03
\n", "
" ], "text/plain": [ " CMTE_ID NAME ... LINKAGE_ID CD\n", "0 C00703975 MARX, ISAAC ... 227491 MA-05\n", "1 C00703975 EDMONDSON, FRAZOR ... 227491 MA-03\n", "2 C00703975 EDMONDSON, FRAZOR ... 227491 MA-03\n", "3 C00703975 EDMONDSON, FRAZOR ... 227491 MA-03\n", "4 C00703975 EDMONDSON, FRAZOR ... 227491 MA-03\n", "5 C00703975 DILLEY, ANNE ... 227491 MA-05\n", "6 C00703975 SMIRNAKIS, KAREN ... 227491 MA-05\n", "7 C00703975 SMIRNAKIS, KAREN ... 227491 MA-05\n", "8 C00703975 SMIRNAKIS, KAREN ... 227491 MA-05\n", "9 C00703975 VANDER STOEP, STEPHEN ... 227491 MA-07\n", "10 C00703975 THOMAS, DONNA ... 227491 TN-09\n", "11 C00703975 SMIRNAKIS, KAREN ... 227491 MA-05\n", "12 C00703975 SMIRNAKIS, KAREN ... 227491 MA-05\n", "13 C00703975 LYKINS, JIM ... 227491 WA-10\n", "14 C00703975 MALDONADO, REBECCA ... 227491 TX-20\n", "15 C00703975 HOWE, MICHAEL ... 227491 MA-08\n", "16 C00703975 SMIRNAKIS, KAREN ... 227491 MA-05\n", "17 C00703975 THOMAS, DONNA ... 227491 TN-09\n", "18 C00727149 GATES, CYNTHIA ... 230605 MA-07\n", "19 C00727149 GATES, CYNTHIA ... 230605 MA-07\n", "20 C00745687 GRIFFITH, LISA ... 233009 MA-05\n", "21 C00196774 LOVEDAY, KENNETH S. ... 222822 MA-04\n", "22 C00666149 LOVEDAY, KENNETH S ... 223821 MA-04\n", "23 C00500843 FLANNELLY-KING, SHANE ... 222817 MA-07\n", "24 C00649376 NEWLAND, BART G. ... 224868 MA-05\n", "25 C00701599 SEGAL, KATE ... 227095 MI-03\n", "\n", "[26 rows x 13 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 42 } ] }, { "cell_type": "code", "metadata": { "id": "Mw4bIeWOOoaE" }, "source": [ "trends = pd.read_excel(data_dir+'/CD_trends.xlsx')" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "VMqR9ED6OoOW", "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "outputId": "6b45fe0d-9ae7-49d0-e8fc-0febc8350755" }, "source": [ "trends.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CDParty
0AK-AL(R)
1AL-01(R)
2AL-02(R)
3AL-03(R)
4AL-04(R)
\n", "
" ], "text/plain": [ " CD Party\n", "0 AK-AL (R)\n", "1 AL-01 (R)\n", "2 AL-02 (R)\n", "3 AL-03 (R)\n", "4 AL-04 (R)" ] }, "metadata": { "tags": [] }, "execution_count": 40 } ] }, { "cell_type": "code", "metadata": { "id": "qg5DNqhaGyzR", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "d37d3813-eaeb-4d10-b202-c12a21ec5f6e" }, "source": [ "inner_join = pd.merge(df7, \n", " trends, \n", " on ='CD', \n", " how ='inner') \n", "inner_join " ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CMTE_IDNAMECITYSTATEZIP_CODEEMPLOYEROCCUPATIONTRANSACTION_DTTRANSACTION_AMTCAND_IDCAND_PTY_AFFILIATIONLINKAGE_IDCDParty
0C00703975MARX, ISAACARLINGTONMA2.47438e+07BIOGENCHEMIST7182020250P80000722DEM227491MA-05(D)
1C00703975DILLEY, ANNEARLINGTONMA2.47648e+07BIOGENEPIDEMIOLOGIST705202050P80000722DEM227491MA-05(D)
2C00703975SMIRNAKIS, KARENWESTONMA24931439BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY730202050P80000722DEM227491MA-05(D)
3C00703975SMIRNAKIS, KARENWESTONMA24931439BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY718202050P80000722DEM227491MA-05(D)
4C00703975SMIRNAKIS, KARENWESTONMA24931439BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY725202050P80000722DEM227491MA-05(D)
5C00703975SMIRNAKIS, KARENWESTONMA2.49314e+07BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY704202050P80000722DEM227491MA-05(D)
6C00703975SMIRNAKIS, KARENWESTONMA2.49314e+07BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY711202050P80000722DEM227491MA-05(D)
7C00703975SMIRNAKIS, KARENWESTONMA2.49314e+07BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY630202050P80000722DEM227491MA-05(D)
8C00745687GRIFFITH, LISACAMBRIDGEMA021394369BIOGENMARKETING7132020500H0MA04267DEM233009MA-05(D)
9C00649376NEWLAND, BART G.BELMONTMA2.4784e+07BIOGEN INCATTORNEY6302020100H8GA07201DEM224868MA-05(D)
10C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY729202050P80000722DEM227491MA-03(D)
11C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY715202050P80000722DEM227491MA-03(D)
12C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY722202050P80000722DEM227491MA-03(D)
13C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY708202050P80000722DEM227491MA-03(D)
14C00703975VANDER STOEP, STEPHENBOSTONMA2.12925e+07BIOGENATTORNEY7172020100P80000722DEM227491MA-07(D)
15C00727149GATES, CYNTHIAROSLINDALEMA2131BIOGENREGULATORY MEDICAL WRITER8052020100H0MA08045DEM230605MA-07(D)
16C00727149GATES, CYNTHIAROSLINDALEMA02131BIOGENREGULATORY MEDICAL WRITER6302020100H0MA08045DEM230605MA-07(D)
17C00500843FLANNELLY-KING, SHANESOMERVILLEMA2.14421e+07BIOGEN IDECBUSINESS ANALYST6302020250S2MA00170DEM222817MA-07(D)
18C00703975THOMAS, DONNAMEMPHISTN3.81155e+08PMC BIOGENIX INC.CUSTOMER SERVICE717202020P80000722DEM227491TN-09(D)
19C00703975THOMAS, DONNAMEMPHISTN3.81155e+08PMC BIOGENIX INC.CUSTOMER SERVICE630202015P80000722DEM227491TN-09(D)
20C00703975LYKINS, JIMDUPONTWA9.83277e+08BIOGENSALES728202050P80000722DEM227491WA-10(D)
21C00703975MALDONADO, REBECCASAN ANTONIOTX782491598BIOGENSALES7032020150P80000722DEM227491TX-20(D)
22C00703975HOWE, MICHAELCANTONMA2.02116e+07BIOGENATTORNEY7162020100P80000722DEM227491MA-08(D)
23C00196774LOVEDAY, KENNETH S.BROOKLINEMA024465827BIOGENBIOLOGIST8042020250S4MA00028DEM222822MA-04(D)
24C00666149LOVEDAY, KENNETH SBROOKLINEMA024465827BIOGEN INCDIRECTOR6302020500H8NM02248DEM223821MA-04(D)
25C00701599SEGAL, KATEBATTLE CREEKMI4.90159e+08BIOGENGOVERNMENT AFFAIRS6302020500H0MI06152DEM227095MI-03(L)
\n", "
" ], "text/plain": [ " CMTE_ID NAME CITY ... LINKAGE_ID CD Party\n", "0 C00703975 MARX, ISAAC ARLINGTON ... 227491 MA-05 (D)\n", "1 C00703975 DILLEY, ANNE ARLINGTON ... 227491 MA-05 (D)\n", "2 C00703975 SMIRNAKIS, KAREN WESTON ... 227491 MA-05 (D)\n", "3 C00703975 SMIRNAKIS, KAREN WESTON ... 227491 MA-05 (D)\n", "4 C00703975 SMIRNAKIS, KAREN WESTON ... 227491 MA-05 (D)\n", "5 C00703975 SMIRNAKIS, KAREN WESTON ... 227491 MA-05 (D)\n", "6 C00703975 SMIRNAKIS, KAREN WESTON ... 227491 MA-05 (D)\n", "7 C00703975 SMIRNAKIS, KAREN WESTON ... 227491 MA-05 (D)\n", "8 C00745687 GRIFFITH, LISA CAMBRIDGE ... 233009 MA-05 (D)\n", "9 C00649376 NEWLAND, BART G. BELMONT ... 224868 MA-05 (D)\n", "10 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... 227491 MA-03 (D)\n", "11 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... 227491 MA-03 (D)\n", "12 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... 227491 MA-03 (D)\n", "13 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... 227491 MA-03 (D)\n", "14 C00703975 VANDER STOEP, STEPHEN BOSTON ... 227491 MA-07 (D)\n", "15 C00727149 GATES, CYNTHIA ROSLINDALE ... 230605 MA-07 (D)\n", "16 C00727149 GATES, CYNTHIA ROSLINDALE ... 230605 MA-07 (D)\n", "17 C00500843 FLANNELLY-KING, SHANE SOMERVILLE ... 222817 MA-07 (D)\n", "18 C00703975 THOMAS, DONNA MEMPHIS ... 227491 TN-09 (D)\n", "19 C00703975 THOMAS, DONNA MEMPHIS ... 227491 TN-09 (D)\n", "20 C00703975 LYKINS, JIM DUPONT ... 227491 WA-10 (D)\n", "21 C00703975 MALDONADO, REBECCA SAN ANTONIO ... 227491 TX-20 (D)\n", "22 C00703975 HOWE, MICHAEL CANTON ... 227491 MA-08 (D)\n", "23 C00196774 LOVEDAY, KENNETH S. BROOKLINE ... 222822 MA-04 (D)\n", "24 C00666149 LOVEDAY, KENNETH S BROOKLINE ... 223821 MA-04 (D)\n", "25 C00701599 SEGAL, KATE BATTLE CREEK ... 227095 MI-03 (L)\n", "\n", "[26 rows x 14 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 44 } ] }, { "cell_type": "code", "metadata": { "id": "a2v3lRkYTjag" }, "source": [ "inner_join['INDEX']= [1 if x =='DEM' else 0 for x in inner_join['CAND_PTY_AFFILIATION']] \n", " " ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Objszz9NSHe1", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "a33bc488-4ac4-42af-c4a1-bfeeda4caad7" }, "source": [ "inner_join" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CMTE_IDNAMECITYSTATEZIP_CODEEMPLOYEROCCUPATIONTRANSACTION_DTTRANSACTION_AMTCAND_IDCAND_PTY_AFFILIATIONLINKAGE_IDCDPartyINDEX
0C00703975MARX, ISAACARLINGTONMA2.47438e+07BIOGENCHEMIST7182020250P80000722DEM227491MA-05(D)1
1C00703975DILLEY, ANNEARLINGTONMA2.47648e+07BIOGENEPIDEMIOLOGIST705202050P80000722DEM227491MA-05(D)1
2C00703975SMIRNAKIS, KARENWESTONMA24931439BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY730202050P80000722DEM227491MA-05(D)1
3C00703975SMIRNAKIS, KARENWESTONMA24931439BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY718202050P80000722DEM227491MA-05(D)1
4C00703975SMIRNAKIS, KARENWESTONMA24931439BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY725202050P80000722DEM227491MA-05(D)1
5C00703975SMIRNAKIS, KARENWESTONMA2.49314e+07BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY704202050P80000722DEM227491MA-05(D)1
6C00703975SMIRNAKIS, KARENWESTONMA2.49314e+07BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY711202050P80000722DEM227491MA-05(D)1
7C00703975SMIRNAKIS, KARENWESTONMA2.49314e+07BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY630202050P80000722DEM227491MA-05(D)1
8C00745687GRIFFITH, LISACAMBRIDGEMA021394369BIOGENMARKETING7132020500H0MA04267DEM233009MA-05(D)1
9C00649376NEWLAND, BART G.BELMONTMA2.4784e+07BIOGEN INCATTORNEY6302020100H8GA07201DEM224868MA-05(D)1
10C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY729202050P80000722DEM227491MA-03(D)1
11C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY715202050P80000722DEM227491MA-03(D)1
12C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY722202050P80000722DEM227491MA-03(D)1
13C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY708202050P80000722DEM227491MA-03(D)1
14C00703975VANDER STOEP, STEPHENBOSTONMA2.12925e+07BIOGENATTORNEY7172020100P80000722DEM227491MA-07(D)1
15C00727149GATES, CYNTHIAROSLINDALEMA2131BIOGENREGULATORY MEDICAL WRITER8052020100H0MA08045DEM230605MA-07(D)1
16C00727149GATES, CYNTHIAROSLINDALEMA02131BIOGENREGULATORY MEDICAL WRITER6302020100H0MA08045DEM230605MA-07(D)1
17C00500843FLANNELLY-KING, SHANESOMERVILLEMA2.14421e+07BIOGEN IDECBUSINESS ANALYST6302020250S2MA00170DEM222817MA-07(D)1
18C00703975THOMAS, DONNAMEMPHISTN3.81155e+08PMC BIOGENIX INC.CUSTOMER SERVICE717202020P80000722DEM227491TN-09(D)1
19C00703975THOMAS, DONNAMEMPHISTN3.81155e+08PMC BIOGENIX INC.CUSTOMER SERVICE630202015P80000722DEM227491TN-09(D)1
20C00703975LYKINS, JIMDUPONTWA9.83277e+08BIOGENSALES728202050P80000722DEM227491WA-10(D)1
21C00703975MALDONADO, REBECCASAN ANTONIOTX782491598BIOGENSALES7032020150P80000722DEM227491TX-20(D)1
22C00703975HOWE, MICHAELCANTONMA2.02116e+07BIOGENATTORNEY7162020100P80000722DEM227491MA-08(D)1
23C00196774LOVEDAY, KENNETH S.BROOKLINEMA024465827BIOGENBIOLOGIST8042020250S4MA00028DEM222822MA-04(D)1
24C00666149LOVEDAY, KENNETH SBROOKLINEMA024465827BIOGEN INCDIRECTOR6302020500H8NM02248DEM223821MA-04(D)1
25C00701599SEGAL, KATEBATTLE CREEKMI4.90159e+08BIOGENGOVERNMENT AFFAIRS6302020500H0MI06152DEM227095MI-03(L)1
\n", "
" ], "text/plain": [ " CMTE_ID NAME CITY ... CD Party INDEX\n", "0 C00703975 MARX, ISAAC ARLINGTON ... MA-05 (D) 1\n", "1 C00703975 DILLEY, ANNE ARLINGTON ... MA-05 (D) 1\n", "2 C00703975 SMIRNAKIS, KAREN WESTON ... MA-05 (D) 1\n", "3 C00703975 SMIRNAKIS, KAREN WESTON ... MA-05 (D) 1\n", "4 C00703975 SMIRNAKIS, KAREN WESTON ... MA-05 (D) 1\n", "5 C00703975 SMIRNAKIS, KAREN WESTON ... MA-05 (D) 1\n", "6 C00703975 SMIRNAKIS, KAREN WESTON ... MA-05 (D) 1\n", "7 C00703975 SMIRNAKIS, KAREN WESTON ... MA-05 (D) 1\n", "8 C00745687 GRIFFITH, LISA CAMBRIDGE ... MA-05 (D) 1\n", "9 C00649376 NEWLAND, BART G. BELMONT ... MA-05 (D) 1\n", "10 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... MA-03 (D) 1\n", "11 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... MA-03 (D) 1\n", "12 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... MA-03 (D) 1\n", "13 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... MA-03 (D) 1\n", "14 C00703975 VANDER STOEP, STEPHEN BOSTON ... MA-07 (D) 1\n", "15 C00727149 GATES, CYNTHIA ROSLINDALE ... MA-07 (D) 1\n", "16 C00727149 GATES, CYNTHIA ROSLINDALE ... MA-07 (D) 1\n", "17 C00500843 FLANNELLY-KING, SHANE SOMERVILLE ... MA-07 (D) 1\n", "18 C00703975 THOMAS, DONNA MEMPHIS ... TN-09 (D) 1\n", "19 C00703975 THOMAS, DONNA MEMPHIS ... TN-09 (D) 1\n", "20 C00703975 LYKINS, JIM DUPONT ... WA-10 (D) 1\n", "21 C00703975 MALDONADO, REBECCA SAN ANTONIO ... TX-20 (D) 1\n", "22 C00703975 HOWE, MICHAEL CANTON ... MA-08 (D) 1\n", "23 C00196774 LOVEDAY, KENNETH S. BROOKLINE ... MA-04 (D) 1\n", "24 C00666149 LOVEDAY, KENNETH S BROOKLINE ... MA-04 (D) 1\n", "25 C00701599 SEGAL, KATE BATTLE CREEK ... MI-03 (L) 1\n", "\n", "[26 rows x 15 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 47 } ] }, { "cell_type": "code", "metadata": { "id": "XL7glHAoSHM3" }, "source": [ "inner_join['INDEX_BOSS'] = 1" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "u2BIIZl3RYg1", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "a0f10c04-3eb4-4294-a054-bb8f4275471e" }, "source": [ "inner_join" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CMTE_IDNAMECITYSTATEZIP_CODEEMPLOYEROCCUPATIONTRANSACTION_DTTRANSACTION_AMTCAND_IDCAND_PTY_AFFILIATIONLINKAGE_IDCDPartyINDEXINDEX_BOSS
0C00703975MARX, ISAACARLINGTONMA2.47438e+07BIOGENCHEMIST7182020250P80000722DEM227491MA-05(D)11
1C00703975DILLEY, ANNEARLINGTONMA2.47648e+07BIOGENEPIDEMIOLOGIST705202050P80000722DEM227491MA-05(D)11
2C00703975SMIRNAKIS, KARENWESTONMA24931439BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY730202050P80000722DEM227491MA-05(D)11
3C00703975SMIRNAKIS, KARENWESTONMA24931439BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY718202050P80000722DEM227491MA-05(D)11
4C00703975SMIRNAKIS, KARENWESTONMA24931439BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY725202050P80000722DEM227491MA-05(D)11
5C00703975SMIRNAKIS, KARENWESTONMA2.49314e+07BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY704202050P80000722DEM227491MA-05(D)11
6C00703975SMIRNAKIS, KARENWESTONMA2.49314e+07BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY711202050P80000722DEM227491MA-05(D)11
7C00703975SMIRNAKIS, KARENWESTONMA2.49314e+07BIOGENVP HEAD OF GLOBAL MEDICAL SAFETY630202050P80000722DEM227491MA-05(D)11
8C00745687GRIFFITH, LISACAMBRIDGEMA021394369BIOGENMARKETING7132020500H0MA04267DEM233009MA-05(D)11
9C00649376NEWLAND, BART G.BELMONTMA2.4784e+07BIOGEN INCATTORNEY6302020100H8GA07201DEM224868MA-05(D)11
10C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY729202050P80000722DEM227491MA-03(D)11
11C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY715202050P80000722DEM227491MA-03(D)11
12C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY722202050P80000722DEM227491MA-03(D)11
13C00703975EDMONDSON, FRAZORMARLBOROUGHMA1.75267e+07BIOGENATTORNEY708202050P80000722DEM227491MA-03(D)11
14C00703975VANDER STOEP, STEPHENBOSTONMA2.12925e+07BIOGENATTORNEY7172020100P80000722DEM227491MA-07(D)11
15C00727149GATES, CYNTHIAROSLINDALEMA2131BIOGENREGULATORY MEDICAL WRITER8052020100H0MA08045DEM230605MA-07(D)11
16C00727149GATES, CYNTHIAROSLINDALEMA02131BIOGENREGULATORY MEDICAL WRITER6302020100H0MA08045DEM230605MA-07(D)11
17C00500843FLANNELLY-KING, SHANESOMERVILLEMA2.14421e+07BIOGEN IDECBUSINESS ANALYST6302020250S2MA00170DEM222817MA-07(D)11
18C00703975THOMAS, DONNAMEMPHISTN3.81155e+08PMC BIOGENIX INC.CUSTOMER SERVICE717202020P80000722DEM227491TN-09(D)11
19C00703975THOMAS, DONNAMEMPHISTN3.81155e+08PMC BIOGENIX INC.CUSTOMER SERVICE630202015P80000722DEM227491TN-09(D)11
20C00703975LYKINS, JIMDUPONTWA9.83277e+08BIOGENSALES728202050P80000722DEM227491WA-10(D)11
21C00703975MALDONADO, REBECCASAN ANTONIOTX782491598BIOGENSALES7032020150P80000722DEM227491TX-20(D)11
22C00703975HOWE, MICHAELCANTONMA2.02116e+07BIOGENATTORNEY7162020100P80000722DEM227491MA-08(D)11
23C00196774LOVEDAY, KENNETH S.BROOKLINEMA024465827BIOGENBIOLOGIST8042020250S4MA00028DEM222822MA-04(D)11
24C00666149LOVEDAY, KENNETH SBROOKLINEMA024465827BIOGEN INCDIRECTOR6302020500H8NM02248DEM223821MA-04(D)11
25C00701599SEGAL, KATEBATTLE CREEKMI4.90159e+08BIOGENGOVERNMENT AFFAIRS6302020500H0MI06152DEM227095MI-03(L)11
\n", "
" ], "text/plain": [ " CMTE_ID NAME CITY ... Party INDEX INDEX_BOSS\n", "0 C00703975 MARX, ISAAC ARLINGTON ... (D) 1 1\n", "1 C00703975 DILLEY, ANNE ARLINGTON ... (D) 1 1\n", "2 C00703975 SMIRNAKIS, KAREN WESTON ... (D) 1 1\n", "3 C00703975 SMIRNAKIS, KAREN WESTON ... (D) 1 1\n", "4 C00703975 SMIRNAKIS, KAREN WESTON ... (D) 1 1\n", "5 C00703975 SMIRNAKIS, KAREN WESTON ... (D) 1 1\n", "6 C00703975 SMIRNAKIS, KAREN WESTON ... (D) 1 1\n", "7 C00703975 SMIRNAKIS, KAREN WESTON ... (D) 1 1\n", "8 C00745687 GRIFFITH, LISA CAMBRIDGE ... (D) 1 1\n", "9 C00649376 NEWLAND, BART G. BELMONT ... (D) 1 1\n", "10 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... (D) 1 1\n", "11 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... (D) 1 1\n", "12 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... (D) 1 1\n", "13 C00703975 EDMONDSON, FRAZOR MARLBOROUGH ... (D) 1 1\n", "14 C00703975 VANDER STOEP, STEPHEN BOSTON ... (D) 1 1\n", "15 C00727149 GATES, CYNTHIA ROSLINDALE ... (D) 1 1\n", "16 C00727149 GATES, CYNTHIA ROSLINDALE ... (D) 1 1\n", "17 C00500843 FLANNELLY-KING, SHANE SOMERVILLE ... (D) 1 1\n", "18 C00703975 THOMAS, DONNA MEMPHIS ... (D) 1 1\n", "19 C00703975 THOMAS, DONNA MEMPHIS ... (D) 1 1\n", "20 C00703975 LYKINS, JIM DUPONT ... (D) 1 1\n", "21 C00703975 MALDONADO, REBECCA SAN ANTONIO ... (D) 1 1\n", "22 C00703975 HOWE, MICHAEL CANTON ... (D) 1 1\n", "23 C00196774 LOVEDAY, KENNETH S. BROOKLINE ... (D) 1 1\n", "24 C00666149 LOVEDAY, KENNETH S BROOKLINE ... (D) 1 1\n", "25 C00701599 SEGAL, KATE BATTLE CREEK ... (L) 1 1\n", "\n", "[26 rows x 16 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 49 } ] }, { "cell_type": "code", "metadata": { "id": "rW1ZZvZwYZ2s" }, "source": [ "subset2 = inner_join[['INDEX','INDEX_BOSS']]" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "M7vLCa_-dmhg" }, "source": [ "from sklearn.linear_model import LinearRegression" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "apb76xJfYP-w", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "bd90bb55-85ee-4d15-b585-3c3b1406d16c" }, "source": [ "linear_regressor = LinearRegression()\n", "from sklearn.preprocessing import MinMaxScaler\n", "scaler1 = MinMaxScaler()\n", "scaler1.fit(subset2)\n", "inner_join_scaled=scaler1.transform(subset2)\n", "\n", "x = inner_join_scaled[:,0].reshape(-1,1)\n", "y = inner_join_scaled[:,1].reshape(-1,1)\n", "\n", "linear_regressor.fit(x, y)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)" ] }, "metadata": { "tags": [] }, "execution_count": 55 } ] }, { "cell_type": "code", "metadata": { "id": "Q33krPq74eVs" }, "source": [ "" ], "execution_count": null, "outputs": [] } ] }