{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Week2_Assignment.ipynb", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "qn8CLGQ1U_sw" }, "source": [ "\n", "Assignment 1 Part 1: Graph Class" ] }, { "cell_type": "code", "metadata": { "id": "00cUFtx6Sixf", "colab": { "base_uri": "https://localhost:8080/", "height": 50 }, "outputId": "be253fd7-1337-4755-e4f3-f66b042c2b79" }, "source": [ "graph_elements = { \"Apple\" : [\"Banana\",\"Cherry\"],\n", " \"Banana\" : [\"Apple\", \"Durian\"],\n", " \"Cherry\" : [\"Apple\", \"Durian\"],\n", " \"Durian\" : [\"Orange\"],\n", " \"Orange\" : [\"Durian\"]\n", " }\n", "\n", "class graph:\n", "\n", " def __init__(self, value=None):\n", " self.value = value\n", "\n", " def getVertices(self):\n", " return list(self.value.keys())\n", "\n", " def getEdges(self):\n", " return self.edges()\n", "\n", " def edges(self):\n", " edgelist = []\n", " for vertex in self.value:\n", " for neighbor in self.value[vertex]:\n", " if {neighbor, vertex} not in edgelist:\n", " edgelist.append({vertex, neighbor})\n", " return edgelist\n", "\n", "g = graph(graph_elements)\n", "\n", "\n", "print(g.getVertices())\n", "print(g.getEdges())" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "['Apple', 'Banana', 'Cherry', 'Durian', 'Orange']\n", "[{'Apple', 'Banana'}, {'Cherry', 'Apple'}, {'Durian', 'Banana'}, {'Cherry', 'Durian'}, {'Orange', 'Durian'}]\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "QzmgYXn5VVk1" }, "source": [ "Depth First Traversal of a graph" ] }, { "cell_type": "code", "metadata": { "id": "4wh5icuWSkt8", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "37d45899-4560-4535-807f-0f8713b8cec1" }, "source": [ "def depth_first_search(graph, start, visited=None):\n", " if visited is None:\n", " visited = set()\n", " visited.add(start)\n", "\n", " #print(start)\n", "\n", " for next in graph[start] - visited:\n", " depth_first_search(graph, next, visited)\n", " return visited\n", "\n", "graph = {'Apple': set(['Banana', 'Cherry']),\n", " 'Banana': set(['Apple', 'Durian', 'Orange']),\n", " 'Cherry': set(['Apple']),\n", " 'Durian': set(['Banana', 'Orange']),\n", " 'Orange': set(['Cherry', 'Durian'])\n", " }\n", "\n", "\n", "depth_first_search(graph, 'Banana')" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'Apple', 'Banana', 'Cherry', 'Durian', 'Orange'}" ] }, "metadata": { "tags": [] }, "execution_count": 102 } ] }, { "cell_type": "markdown", "metadata": { "id": "90iwo9W1Wanw" }, "source": [ "Breadth-First Traversal of a Graph" ] }, { "cell_type": "code", "metadata": { "id": "_QEoUEm-SkhT", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "c283fe39-4004-4770-a3bc-3b80a1102dbb" }, "source": [ "import collections\n", "\n", "def breadth_first_search(graph, root):\n", "\n", " visited, queue = set(), collections.deque([root])\n", " visited.add(root)\n", "\n", " while queue:\n", "\n", " vertex = queue.popleft()\n", " print(str(vertex) + \" \", end=\"\")\n", "\n", " for neighbour in graph[vertex]:\n", " if neighbour not in visited:\n", " visited.add(neighbour)\n", " queue.append(neighbour)\n", "\n", "\n", "if __name__ == '__main__':\n", "\n", " graph = {0: [1, 2], \n", " 1: [0, 2], \n", " 2: [3], \n", " 3: [1, 2]\n", " }\n", "\n", "\n", "breadth_first_search(graph, 1)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "1 0 2 3 " ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "nHtTTpI5SkHs" }, "source": [ "" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "_DAwWMYGWwcW" }, "source": [ "Assignment 1 Part 2: Election Data Search" ] }, { "cell_type": "code", "metadata": { "id": "XytUX-nemkr0" }, "source": [ "import pandas as pd" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "k0V_IGLnl7P7", "colab": { "base_uri": "https://localhost:8080/", "height": 84 }, "outputId": "e6919695-6c1f-4631-c315-2967cd1b2d02" }, "source": [ "from google.colab import drive\n", "drive.mount('/data/')\n", "data_dir = '/data/My Drive/Colab Notebooks/FEC dataset'\n", "!ls '/data/My Drive/Colab Notebooks/FEC dataset'" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Drive already mounted at /data/; to attempt to forcibly remount, call drive.mount(\"/data/\", force_remount=True).\n", "ccl20.zip\t cm_header_file.csv pas220.zip\n", "ccl_header_file.csv cn20.zip\t\t pas2_header_file.csv\n", "cm20.zip\t cn_header_file.csv\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "pl-0E8cE-hGy" }, "source": [ "Search on 'CN20.zip' folder" ] }, { "cell_type": "code", "metadata": { "id": "zw1KZ0NmmWQw", "colab": { "base_uri": "https://localhost:8080/", "height": 400 }, "outputId": "cfb24d9f-b184-42d8-9b4d-5c5ad20b32b7" }, "source": [ "from zipfile import ZipFile\n", "\n", "header = pd.read_csv(data_dir+'/cn_header_file.csv')\n", "\n", "with ZipFile(data_dir+'/cn20.zip') as zip:\n", " candidates = pd.read_csv(zip.open('cn.txt'), sep='|', names=header.columns)\n", "candidates.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CAND_IDCAND_NAMECAND_PTY_AFFILIATIONCAND_ELECTION_YRCAND_OFFICE_STCAND_OFFICECAND_OFFICE_DISTRICTCAND_ICICAND_STATUSCAND_PCCCAND_ST1CAND_ST2CAND_CITYCAND_STCAND_ZIP
0H0AK00105LAMB, THOMASNNE2020AKH0.0CNC006075151861 W LAKE LUCILLE DRNaNWASILLAAK99654.0
1H0AK00113TUGATUK, RAY SEANDEM2020AKH0.0CNNaNPO BOX 172NaNMANAKOTAKAK99628.0
2H0AK01046CATALANO, THOMASOTH2020AKH0.0NaNNNaN188 WEST NORTHERN LIGHTS BOULEVARDNaNANCHORAGEAK99503.0
3H0AL01055CARL, JERRY LEE, JRREP2020ALH1.0OCC00697789PO BOX 852138NaNMOBILEAL36685.0
4H0AL01063LAMBERT, DOUGLAS WESTLEY IIIREP2020ALH1.0OCC007015577194 STILLWATER BLVDNaNSPANISH FORTAL36527.0
\n", "
" ], "text/plain": [ " CAND_ID CAND_NAME ... CAND_ST CAND_ZIP\n", "0 H0AK00105 LAMB, THOMAS ... AK 99654.0\n", "1 H0AK00113 TUGATUK, RAY SEAN ... AK 99628.0\n", "2 H0AK01046 CATALANO, THOMAS ... AK 99503.0\n", "3 H0AL01055 CARL, JERRY LEE, JR ... AL 36685.0\n", "4 H0AL01063 LAMBERT, DOUGLAS WESTLEY III ... AL 36527.0\n", "\n", "[5 rows x 15 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 106 } ] }, { "cell_type": "code", "metadata": { "id": "CZ2y9RTAnTzU", "colab": { "base_uri": "https://localhost:8080/", "height": 333 }, "outputId": "d698ba69-e8bf-4db5-b657-c79efd645bdf" }, "source": [ "candidates[candidates['CAND_NAME'].str.contains('WALKER')].head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CAND_IDCAND_NAMECAND_PTY_AFFILIATIONCAND_ELECTION_YRCAND_OFFICE_STCAND_OFFICECAND_OFFICE_DISTRICTCAND_ICICAND_STATUSCAND_PCCCAND_ST1CAND_ST2CAND_CITYCAND_STCAND_ZIP
97H0AZ09054WALKER, JASON SREP2020AZH9.0CNNaN4815 E. THOMAS RD.W224PHOENIXAZ85018.0
848H0IL15103WALKER, ALEXREP2020ILH15.0ONC007179672606 CHARLESTON AVENaNMATTOONIL61938.0
1009H0MA04168GROSSMAN, REBECCA WALKERDEM2020MAH4.0CCC00720482PO BOX 590686NaNNEWTON CENTERMA2459.0
1391H0NC08247WALKER, RODNEY LDEM2020NCH8.0CNC00707877PO BOX 712NaNSOUTHERN PINESNC28388.0
1574H0NV03108WALKERLIEB, ZACHARYREP2020NVH3.0CCC007030255887 GLORY HEIGHTS DRNaNLAS VEGASNV89135.0
\n", "
" ], "text/plain": [ " CAND_ID CAND_NAME ... CAND_ST CAND_ZIP\n", "97 H0AZ09054 WALKER, JASON S ... AZ 85018.0\n", "848 H0IL15103 WALKER, ALEX ... IL 61938.0\n", "1009 H0MA04168 GROSSMAN, REBECCA WALKER ... MA 2459.0\n", "1391 H0NC08247 WALKER, RODNEY L ... NC 28388.0\n", "1574 H0NV03108 WALKERLIEB, ZACHARY ... NV 89135.0\n", "\n", "[5 rows x 15 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 107 } ] }, { "cell_type": "code", "metadata": { "id": "tdUayozq-9Cb", "colab": { "base_uri": "https://localhost:8080/", "height": 417 }, "outputId": "a26fa3cc-48cc-48aa-aef1-3195d29110a7" }, "source": [ "tg = candidates[(candidates['CAND_ELECTION_YR'] == 2020) & (candidates['CAND_OFFICE_ST'] == 'FL')]\n", "tg.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CAND_IDCAND_NAMECAND_PTY_AFFILIATIONCAND_ELECTION_YRCAND_OFFICE_STCAND_OFFICECAND_OFFICE_DISTRICTCAND_ICICAND_STATUSCAND_PCCCAND_ST1CAND_ST2CAND_CITYCAND_STCAND_ZIP
437H0FL01112ROSAS, EMILYREP2020FLH1.0CNC006643006610 JENNIFER DRIVENaNTEMPLE TERRACEFL33617.0
438H0FL01120ROMAGNANO, CHASE ANDERSON ANDY REV.REP2020FLH1.0CNC00719351P.O. BOX 9328NaNPENSACOLAFL32513.0
439H0FL01138HOWARD, CHERYLDEM2020FLH1.0CNC00735977605 CROWN COVENaNPENSACOLAFL32502.0
440H0FL01146MERK, GREGORY CHARLESREP2020FLH1.0CCC007403404661 CALLE VENTOSONaNPENSACOLAFL32514.0
441H0FL01153ORAM, ALBERTNPA2020FLH1.0CCC00746636825 BAYSHORE DR APT 804NaNPENSACOLAFL32507.0
\n", "
" ], "text/plain": [ " CAND_ID CAND_NAME ... CAND_ST CAND_ZIP\n", "437 H0FL01112 ROSAS, EMILY ... FL 33617.0\n", "438 H0FL01120 ROMAGNANO, CHASE ANDERSON ANDY REV. ... FL 32513.0\n", "439 H0FL01138 HOWARD, CHERYL ... FL 32502.0\n", "440 H0FL01146 MERK, GREGORY CHARLES ... FL 32514.0\n", "441 H0FL01153 ORAM, ALBERT ... FL 32507.0\n", "\n", "[5 rows x 15 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 108 } ] }, { "cell_type": "code", "metadata": { "id": "ytvXx2wHCvs1", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "f379bdaf-43ee-4446-a7e5-1c4302a9ec9c" }, "source": [ "len(tg.index)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "242" ] }, "metadata": { "tags": [] }, "execution_count": 109 } ] }, { "cell_type": "code", "metadata": { "id": "mymZWrEypYla", "colab": { "base_uri": "https://localhost:8080/", "height": 130 }, "outputId": "6f97a11e-f339-4022-fd05-136583e1ab0f" }, "source": [ "candidates[candidates['CAND_NAME'].str.contains('TRUMP, DONALD')]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CAND_IDCAND_NAMECAND_PTY_AFFILIATIONCAND_ELECTION_YRCAND_OFFICE_STCAND_OFFICECAND_OFFICE_DISTRICTCAND_ICICAND_STATUSCAND_PCCCAND_ST1CAND_ST2CAND_CITYCAND_STCAND_ZIP
6403P80001571TRUMP, DONALD J.REP2020USP0.0ICC00580100725 FIFTH AVENUENaNNEW YORKNY10022.0
\n", "
" ], "text/plain": [ " CAND_ID CAND_NAME ... CAND_ST CAND_ZIP\n", "6403 P80001571 TRUMP, DONALD J. ... NY 10022.0\n", "\n", "[1 rows x 15 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 110 } ] }, { "cell_type": "markdown", "metadata": { "id": "AoFXVna2--1J" }, "source": [ "Search on 'Pas220.zip' folder" ] }, { "cell_type": "code", "metadata": { "id": "wIKz_EIYiHLj", "colab": { "base_uri": "https://localhost:8080/", "height": 400 }, "outputId": "1bb2e9a1-c58c-460c-e64d-2f73e34c291f" }, "source": [ "header = pd.read_csv(data_dir+'/pas2_header_file.csv')\n", "\n", "with ZipFile(data_dir+'/pas220.zip') as zip:\n", " #print(zip.namelist())\n", " spending = pd.read_csv(zip.open('itpas2.txt'), sep='|', names=header.columns)\n", "spending.head()" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py:2718: DtypeWarning: Columns (10,11,12) have mixed types.Specify dtype option on import or set low_memory=False.\n", " interactivity=interactivity, compiler=compiler, result=result)\n" ], "name": "stderr" }, { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CMTE_IDAMNDT_INDRPT_TPTRANSACTION_PGIIMAGE_NUMTRANSACTION_TPENTITY_TPNAMECITYSTATEZIP_CODEEMPLOYEROCCUPATIONTRANSACTION_DTTRANSACTION_AMTOTHER_IDCAND_IDTRAN_IDFILE_NUMMEMO_CDMEMO_TEXTSUB_ID
0C00567180TTERP202020190109914377419924KPACTED YOHO FOR CONGRESSGAINESVILLEFL32608NaNNaN1082019.01880C00494583H2FL06109SB23.44471303604NaNNaN4021920191640423011
1C00104885ATERG202020190128914403151124KCCMTEAM GRAHAM INCCOLUMBIASC29202NaNNaN12202018.03000C00458828H4SC03087SB23.177571307636NaNNaN4022220191643444985
2C00104885ATERP202220190128914403151224KCCMTIM SCOTT FOR SENATECHARLESTONSC29407NaNNaN12202018.01000C00540302H0SC01279SB23.177561307636NaNNaN4022220191643444987
3C00104885ATERP202020190128914403151124KCCMFRIENDS OF JIM CLYBURNCOLUMBIASC29211NaNNaN1072019.0470C00255562H2SC02042SB23.177551307636NaNNaN4022220191643444981
4C00688408TTERG201820190131914430586724EORGJACKSON ADVOCATEJACKSONMS39207NaNNaN12122018.01000S8MS00287S8MS00287SE.42331310906NaNNaN4022420191643632157
\n", "
" ], "text/plain": [ " CMTE_ID AMNDT_IND RPT_TP ... MEMO_CD MEMO_TEXT SUB_ID\n", "0 C00567180 T TER ... NaN NaN 4021920191640423011\n", "1 C00104885 A TER ... NaN NaN 4022220191643444985\n", "2 C00104885 A TER ... NaN NaN 4022220191643444987\n", "3 C00104885 A TER ... NaN NaN 4022220191643444981\n", "4 C00688408 T TER ... NaN NaN 4022420191643632157\n", "\n", "[5 rows x 22 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 111 } ] }, { "cell_type": "code", "metadata": { "id": "cJReRZAi7RSR", "colab": { "base_uri": "https://localhost:8080/", "height": 195 }, "outputId": "8f41dac7-2e34-4db1-b661-2b46ab2debab" }, "source": [ "house_spending_FL = spending[(spending['CAND_ID'].str.startswith('H')==True) & (spending['STATE']=='FL')]\n", "df_house = house_spending_FL[['CMTE_ID', 'NAME', 'STATE', 'TRANSACTION_AMT', 'CAND_ID']]\n", "df_house.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CMTE_IDNAMESTATETRANSACTION_AMTCAND_ID
0C00567180TED YOHO FOR CONGRESSFL1880H2FL06109
154C00027342HASTINGS FOR CONGRESSFL5000H2FL23021
195C00003855DONNA SHALALA FOR CONGRESSFL1000H8FL27193
256C00339655VERN BUCHANAN FOR CONGRESSFL5000H6FL13148
421C00035204BILIRAKIS FOR CONGRESSFL1000H6FL09070
\n", "
" ], "text/plain": [ " CMTE_ID NAME STATE TRANSACTION_AMT CAND_ID\n", "0 C00567180 TED YOHO FOR CONGRESS FL 1880 H2FL06109\n", "154 C00027342 HASTINGS FOR CONGRESS FL 5000 H2FL23021\n", "195 C00003855 DONNA SHALALA FOR CONGRESS FL 1000 H8FL27193\n", "256 C00339655 VERN BUCHANAN FOR CONGRESS FL 5000 H6FL13148\n", "421 C00035204 BILIRAKIS FOR CONGRESS FL 1000 H6FL09070" ] }, "metadata": { "tags": [] }, "execution_count": 112 } ] }, { "cell_type": "markdown", "metadata": { "id": "L-O7kKyr_OkU" }, "source": [ "Search on 'CM20.zip' folder" ] }, { "cell_type": "code", "metadata": { "id": "UX-S1tm7nOo2", "colab": { "base_uri": "https://localhost:8080/", "height": 518 }, "outputId": "145959fb-bb59-4e7a-c6e1-31f3d64148c5" }, "source": [ "header = pd.read_csv(data_dir+'/cm_header_file.csv')\n", "\n", "with ZipFile(data_dir+'/cm20.zip') as zip:\n", " #print(zip.namelist())\n", " df = pd.read_csv(zip.open('cm.txt'), sep='|', names=header.columns)\n", "df.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CMTE_IDCMTE_NMTRES_NMCMTE_ST1CMTE_ST2CMTE_CITYCMTE_STCMTE_ZIPCMTE_DSGNCMTE_TPCMTE_PTY_AFFILIATIONCMTE_FILING_FREQORG_TPCONNECTED_ORG_NMCAND_ID
0C00000059HALLMARK CARDS PACSARAH MOE2501 MCGEEMD #500KANSAS CITYMO64108UQUNKMCNaNNaN
1C00000422AMERICAN MEDICAL ASSOCIATION POLITICAL ACTION ...WALKER, KEVIN MR.25 MASSACHUSETTS AVE, NWSUITE 600WASHINGTONDC200017400BQNaNMMDELAWARE MEDICAL PACNaN
2C00000489D R I V E POLITICAL FUND CHAPTER 886JERRY SIMS JR3528 W RENONaNOKLAHOMA CITYOK73107UNNaNQLNaNNaN
3C00000547KANSAS MEDICAL SOCIETY POLITICAL ACTION COMMITTEEJERRY SLAUGHTER623 SW 10TH AVENaNTOPEKAKS666121627UQUNKQMKANSAS MEDICAL SOCIETYNaN
4C00000638INDIANA STATE MEDICAL ASSOCIATION POLITICAL AC...ACHENBACH, GRANT MR.322 CANAL WALK, CANAL LEVELNaNINDIANAPOLISIN46202UQNaNTMNaNNaN
\n", "
" ], "text/plain": [ " CMTE_ID ... CAND_ID\n", "0 C00000059 ... NaN\n", "1 C00000422 ... NaN\n", "2 C00000489 ... NaN\n", "3 C00000547 ... NaN\n", "4 C00000638 ... NaN\n", "\n", "[5 rows x 15 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 113 } ] }, { "cell_type": "code", "metadata": { "id": "Gz5FKlS11ZGQ", "colab": { "base_uri": "https://localhost:8080/", "height": 195 }, "outputId": "caef84a1-84e2-4340-d070-4103622b12fa" }, "source": [ "df_name = df[['CMTE_ID', 'CMTE_NM']]\n", "df_name.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CMTE_IDCMTE_NM
0C00000059HALLMARK CARDS PAC
1C00000422AMERICAN MEDICAL ASSOCIATION POLITICAL ACTION ...
2C00000489D R I V E POLITICAL FUND CHAPTER 886
3C00000547KANSAS MEDICAL SOCIETY POLITICAL ACTION COMMITTEE
4C00000638INDIANA STATE MEDICAL ASSOCIATION POLITICAL AC...
\n", "
" ], "text/plain": [ " CMTE_ID CMTE_NM\n", "0 C00000059 HALLMARK CARDS PAC\n", "1 C00000422 AMERICAN MEDICAL ASSOCIATION POLITICAL ACTION ...\n", "2 C00000489 D R I V E POLITICAL FUND CHAPTER 886\n", "3 C00000547 KANSAS MEDICAL SOCIETY POLITICAL ACTION COMMITTEE\n", "4 C00000638 INDIANA STATE MEDICAL ASSOCIATION POLITICAL AC..." ] }, "metadata": { "tags": [] }, "execution_count": 114 } ] }, { "cell_type": "code", "metadata": { "id": "MpE777-JE3Gt", "colab": { "base_uri": "https://localhost:8080/", "height": 618 }, "outputId": "4d62b2f1-8c4e-4678-df9b-b028eeda0ea3" }, "source": [ "Society_Organization = df[df['CONNECTED_ORG_NM'].str.contains('SOCIETY', na=False)]\n", "Society_Organization.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CMTE_IDCMTE_NMTRES_NMCMTE_ST1CMTE_ST2CMTE_CITYCMTE_STCMTE_ZIPCMTE_DSGNCMTE_TPCMTE_PTY_AFFILIATIONCMTE_FILING_FREQORG_TPCONNECTED_ORG_NMCAND_ID
3C00000547KANSAS MEDICAL SOCIETY POLITICAL ACTION COMMITTEEJERRY SLAUGHTER623 SW 10TH AVENaNTOPEKAKS666121627UQUNKQMKANSAS MEDICAL SOCIETYNaN
11C00001180MICHIGAN DOCTORS POLITICAL ACTION COMMITTEE - ...GHOSE, AMITP.O. BOX 769NaNEAST LANSINGMI48826UQNNEQTMICHIGAN STATE MEDICAL SOCIETYNaN
37C00003152NORTH CAROLINA MEDICAL SOCIETY FEDERAL POLITIC...HAYES, E. REBECCAPO BOX 25834222 N. PERSON STREETRALEIGHNC27611UQNaNQMNORTH CAROLINA MEDICAL SOCIETYNaN
51C00003970IOWA MEDICAL SOCIETY POLITICAL ACTION COMMITTEEDOOLEY, JOHN B515 E LOCUST STREETSUITE 400DES MOINESIA50309UQNaNQMIOWA MEDICAL SOCIETYNaN
136C00012211NORTH CAROLINA DENTAL SOCIETY PACBROWN, EVELYN M. DR.1600 EVANS ROADNaNCARYNC27513UQNaNQMNORTH CAROLINA DENTAL SOCIETYNaN
\n", "
" ], "text/plain": [ " CMTE_ID ... CAND_ID\n", "3 C00000547 ... NaN\n", "11 C00001180 ... NaN\n", "37 C00003152 ... NaN\n", "51 C00003970 ... NaN\n", "136 C00012211 ... NaN\n", "\n", "[5 rows x 15 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 115 } ] }, { "cell_type": "code", "metadata": { "id": "GErXTSsJGOV-", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "f2899955-e0ec-47b2-9310-24ba8fa7def4" }, "source": [ "len(Society_Organization.index)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "28" ] }, "metadata": { "tags": [] }, "execution_count": 116 } ] }, { "cell_type": "markdown", "metadata": { "id": "drRP2x2Z_lZS" }, "source": [ "Search on 'CCL20.zip' folder" ] }, { "cell_type": "code", "metadata": { "id": "ombc-hjIn0KR", "colab": { "base_uri": "https://localhost:8080/", "height": 195 }, "outputId": "20885340-ea71-466c-e66d-ba3684884989" }, "source": [ "header = pd.read_csv(data_dir+'/ccl_header_file.csv')\n", "\n", "with ZipFile(data_dir+'/ccl20.zip') as zip:\n", " #print(zip.namelist())\n", " linkage = pd.read_csv(zip.open('ccl.txt'), sep='|', names=header.columns)\n", "\n", "linkage.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CAND_IDCAND_ELECTION_YRFEC_ELECTION_YRCMTE_IDCMTE_TPCMTE_DSGNLINKAGE_ID
0C0071360220192020C00712851OU228963
1H0AK0010520202020C00607515HP229250
2H0AL0105520202020C00697789HP226125
3H0AL0106320202020C00701557HP227053
4H0AL0107120202020C00701409HP227054
\n", "
" ], "text/plain": [ " CAND_ID CAND_ELECTION_YR FEC_ELECTION_YR ... CMTE_TP CMTE_DSGN LINKAGE_ID\n", "0 C00713602 2019 2020 ... O U 228963\n", "1 H0AK00105 2020 2020 ... H P 229250\n", "2 H0AL01055 2020 2020 ... H P 226125\n", "3 H0AL01063 2020 2020 ... H P 227053\n", "4 H0AL01071 2020 2020 ... H P 227054\n", "\n", "[5 rows x 7 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 117 } ] }, { "cell_type": "code", "metadata": { "id": "vb8dIlKKE_4m" }, "source": [ "df1 = pd.concat([candidates, linkage, df], axis=1, sort=False).reset_index()" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "yEgZmnNHT2xw", "colab": { "base_uri": "https://localhost:8080/", "height": 534 }, "outputId": "d25d1e93-80a7-4ebd-a382-97460212b7ae" }, "source": [ "df1.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexCAND_IDCAND_NAMECAND_PTY_AFFILIATIONCAND_ELECTION_YRCAND_OFFICE_STCAND_OFFICECAND_OFFICE_DISTRICTCAND_ICICAND_STATUSCAND_PCCCAND_ST1CAND_ST2CAND_CITYCAND_STCAND_ZIPCAND_IDCAND_ELECTION_YRFEC_ELECTION_YRCMTE_IDCMTE_TPCMTE_DSGNLINKAGE_IDCMTE_IDCMTE_NMTRES_NMCMTE_ST1CMTE_ST2CMTE_CITYCMTE_STCMTE_ZIPCMTE_DSGNCMTE_TPCMTE_PTY_AFFILIATIONCMTE_FILING_FREQORG_TPCONNECTED_ORG_NMCAND_ID
00H0AK00105LAMB, THOMASNNE2020.0AKH0.0CNC006075151861 W LAKE LUCILLE DRNaNWASILLAAK99654.0C007136022019.02020.0C00712851OU228963.0C00000059HALLMARK CARDS PACSARAH MOE2501 MCGEEMD #500KANSAS CITYMO64108UQUNKMCNaNNaN
11H0AK00113TUGATUK, RAY SEANDEM2020.0AKH0.0CNNaNPO BOX 172NaNMANAKOTAKAK99628.0H0AK001052020.02020.0C00607515HP229250.0C00000422AMERICAN MEDICAL ASSOCIATION POLITICAL ACTION ...WALKER, KEVIN MR.25 MASSACHUSETTS AVE, NWSUITE 600WASHINGTONDC200017400BQNaNMMDELAWARE MEDICAL PACNaN
22H0AK01046CATALANO, THOMASOTH2020.0AKH0.0NaNNNaN188 WEST NORTHERN LIGHTS BOULEVARDNaNANCHORAGEAK99503.0H0AL010552020.02020.0C00697789HP226125.0C00000489D R I V E POLITICAL FUND CHAPTER 886JERRY SIMS JR3528 W RENONaNOKLAHOMA CITYOK73107UNNaNQLNaNNaN
33H0AL01055CARL, JERRY LEE, JRREP2020.0ALH1.0OCC00697789PO BOX 852138NaNMOBILEAL36685.0H0AL010632020.02020.0C00701557HP227053.0C00000547KANSAS MEDICAL SOCIETY POLITICAL ACTION COMMITTEEJERRY SLAUGHTER623 SW 10TH AVENaNTOPEKAKS666121627UQUNKQMKANSAS MEDICAL SOCIETYNaN
44H0AL01063LAMBERT, DOUGLAS WESTLEY IIIREP2020.0ALH1.0OCC007015577194 STILLWATER BLVDNaNSPANISH FORTAL36527.0H0AL010712020.02020.0C00701409HP227054.0C00000638INDIANA STATE MEDICAL ASSOCIATION POLITICAL AC...ACHENBACH, GRANT MR.322 CANAL WALK, CANAL LEVELNaNINDIANAPOLISIN46202UQNaNTMNaNNaN
\n", "
" ], "text/plain": [ " index CAND_ID ... CONNECTED_ORG_NM CAND_ID\n", "0 0 H0AK00105 ... NaN NaN\n", "1 1 H0AK00113 ... DELAWARE MEDICAL PAC NaN\n", "2 2 H0AK01046 ... NaN NaN\n", "3 3 H0AL01055 ... KANSAS MEDICAL SOCIETY NaN\n", "4 4 H0AL01063 ... NaN NaN\n", "\n", "[5 rows x 38 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 119 } ] }, { "cell_type": "code", "metadata": { "id": "d_oy0b5jT8eY", "colab": { "base_uri": "https://localhost:8080/", "height": 333 }, "outputId": "6650d9ab-983e-4277-843d-157107978f50" }, "source": [ "df2 = pd.merge(candidates, linkage, on='CAND_ID')\n", "df2.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CAND_IDCAND_NAMECAND_PTY_AFFILIATIONCAND_ELECTION_YR_xCAND_OFFICE_STCAND_OFFICECAND_OFFICE_DISTRICTCAND_ICICAND_STATUSCAND_PCCCAND_ST1CAND_ST2CAND_CITYCAND_STCAND_ZIPCAND_ELECTION_YR_yFEC_ELECTION_YRCMTE_IDCMTE_TPCMTE_DSGNLINKAGE_ID
0H0AK00105LAMB, THOMASNNE2020AKH0.0CNC006075151861 W LAKE LUCILLE DRNaNWASILLAAK99654.020202020C00607515HP229250
1H0AL01055CARL, JERRY LEE, JRREP2020ALH1.0OCC00697789PO BOX 852138NaNMOBILEAL36685.020202020C00697789HP226125
2H0AL01063LAMBERT, DOUGLAS WESTLEY IIIREP2020ALH1.0OCC007015577194 STILLWATER BLVDNaNSPANISH FORTAL36527.020202020C00701557HP227053
3H0AL01071PRINGLE, CHRISTOPHER PAULREP2020ALH1.0OCC00701409202 GOVERNMENT STREETSUITE 220MOBILEAL36602.020202020C00701409HP227054
4H0AL01089HIGHTOWER, BILLREP2020ALH1.0OCC00703066PO BOX 91038NaNMOBILEAL36691.020202020C00703066HP227266
\n", "
" ], "text/plain": [ " CAND_ID CAND_NAME ... CMTE_DSGN LINKAGE_ID\n", "0 H0AK00105 LAMB, THOMAS ... P 229250\n", "1 H0AL01055 CARL, JERRY LEE, JR ... P 226125\n", "2 H0AL01063 LAMBERT, DOUGLAS WESTLEY III ... P 227053\n", "3 H0AL01071 PRINGLE, CHRISTOPHER PAUL ... P 227054\n", "4 H0AL01089 HIGHTOWER, BILL ... P 227266\n", "\n", "[5 rows x 21 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 120 } ] }, { "cell_type": "code", "metadata": { "id": "5pJTEGPm0OXR" }, "source": [ "df_merge = pd.merge(df2, df_name, on='CMTE_ID')\n", "#df_merge.head()\n", "\n", "df_sort = df_merge[['CAND_ID', 'CAND_NAME', 'CAND_ST', 'CMTE_NM']]" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "jAIGmy572dYD", "colab": { "base_uri": "https://localhost:8080/", "height": 343 }, "outputId": "bbeade85-9d03-4b76-88d7-ba6ecf5ec6f0" }, "source": [ "df_sort[df_sort['CAND_ST']=='FL'].head(10)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CAND_IDCAND_NAMECAND_STCMTE_NM
7H0AL01113ROMAGNANO, CHASE ANDERSON ANDY REVERENFLANDY FOR NORTHWEST FLORIDA
8H0FL01120ROMAGNANO, CHASE ANDERSON ANDY REV.FLANDY FOR NORTHWEST FLORIDA
14H0FL02110SOUTHERLAND, WILLIAM STEVE IIFLFRESHMAN AGRICULTURAL REPUBLICAN MEMBERS TRUST...
381H0FL01120ROMAGNANO, CHASE ANDERSON ANDY REV.FLANDY ROMAGNANO FOR NORTHWEST FLORIDA
382H0FL01138HOWARD, CHERYLFLCHERYL HOWARD FOR U S HOUSE OF REPRESENTATIVES
383H0FL01146MERK, GREGORY CHARLESFLGREG MERK CAMPAIGN
384H0FL01153ORAM, ALBERTFLORAM FOR CONGRESS
386H0FL02193ANDERSON, WILLIE JEFFERSON JRFLANDERSON FOR US CONGRESS
387H0FL02219THRIPP, KRISTYFLKRISTY THRIPP FOR CONGRESS
388H0FL03118BLACK, DEAN AFLDEAN BLACK FOR CONGRESS
\n", "
" ], "text/plain": [ " CAND_ID ... CMTE_NM\n", "7 H0AL01113 ... ANDY FOR NORTHWEST FLORIDA\n", "8 H0FL01120 ... ANDY FOR NORTHWEST FLORIDA\n", "14 H0FL02110 ... FRESHMAN AGRICULTURAL REPUBLICAN MEMBERS TRUST...\n", "381 H0FL01120 ... ANDY ROMAGNANO FOR NORTHWEST FLORIDA\n", "382 H0FL01138 ... CHERYL HOWARD FOR U S HOUSE OF REPRESENTATIVES\n", "383 H0FL01146 ... GREG MERK CAMPAIGN\n", "384 H0FL01153 ... ORAM FOR CONGRESS\n", "386 H0FL02193 ... ANDERSON FOR US CONGRESS\n", "387 H0FL02219 ... KRISTY THRIPP FOR CONGRESS\n", "388 H0FL03118 ... DEAN BLACK FOR CONGRESS\n", "\n", "[10 rows x 4 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 122 } ] }, { "cell_type": "code", "metadata": { "id": "geYa-uW_34Aq", "colab": { "base_uri": "https://localhost:8080/", "height": 195 }, "outputId": "8ec71abb-0aa5-42cf-a6f4-41716b07891e" }, "source": [ "df_sort_house = pd.merge(df_sort, df_house, on='CAND_ID')\n", "df_sort_house.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CAND_IDCAND_NAMECAND_STCMTE_NMCMTE_IDNAMESTATETRANSACTION_AMT
0H0AL02087ROBY, MARTHANaNMARTHA ROBY FOR CONGRESSC00522458PINPOINT WEBSOLUTIONSFL750
1H0AL02087ROBY, MARTHANaNFRESHMAN AGRICULTURAL REPUBLICAN MEMBERS TRUST...C00522458PINPOINT WEBSOLUTIONSFL750
2H0GA08099SCOTT, JAMES AUSTINGAFRESHMAN AGRICULTURAL REPUBLICAN MEMBERS TRUST...C00522458PINPOINT WEBSOLUTIONSFL750
3H0GA08099SCOTT, JAMES AUSTINGAAUSTIN SCOTT FOR CONGRESS INCC00522458PINPOINT WEBSOLUTIONSFL750
4H0GA08099SCOTT, JAMES AUSTINGAAUSTIN SCOTT VICTORY FUNDC00522458PINPOINT WEBSOLUTIONSFL750
\n", "
" ], "text/plain": [ " CAND_ID CAND_NAME ... STATE TRANSACTION_AMT\n", "0 H0AL02087 ROBY, MARTHA ... FL 750\n", "1 H0AL02087 ROBY, MARTHA ... FL 750\n", "2 H0GA08099 SCOTT, JAMES AUSTIN ... FL 750\n", "3 H0GA08099 SCOTT, JAMES AUSTIN ... FL 750\n", "4 H0GA08099 SCOTT, JAMES AUSTIN ... FL 750\n", "\n", "[5 rows x 8 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 123 } ] }, { "cell_type": "code", "metadata": { "id": "hRa7Rywa-Rby", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "71f63e33-c35c-413b-c882-c3863e073ba0" }, "source": [ "df_sort_house['TRANSACTION_AMT'].sum()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "17474528" ] }, "metadata": { "tags": [] }, "execution_count": 124 } ] }, { "cell_type": "code", "metadata": { "id": "lJ51BCl5YHBM" }, "source": [ "" ], "execution_count": null, "outputs": [] } ] }