{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Week2_Assignment.ipynb", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "qn8CLGQ1U_sw" }, "source": [ "\n", "Assignment 1 Part 1: Graph Class" ] }, { "cell_type": "code", "metadata": { "id": "00cUFtx6Sixf", "colab": { "base_uri": "https://localhost:8080/", "height": 50 }, "outputId": "be253fd7-1337-4755-e4f3-f66b042c2b79" }, "source": [ "graph_elements = { \"Apple\" : [\"Banana\",\"Cherry\"],\n", " \"Banana\" : [\"Apple\", \"Durian\"],\n", " \"Cherry\" : [\"Apple\", \"Durian\"],\n", " \"Durian\" : [\"Orange\"],\n", " \"Orange\" : [\"Durian\"]\n", " }\n", "\n", "class graph:\n", "\n", " def __init__(self, value=None):\n", " self.value = value\n", "\n", " def getVertices(self):\n", " return list(self.value.keys())\n", "\n", " def getEdges(self):\n", " return self.edges()\n", "\n", " def edges(self):\n", " edgelist = []\n", " for vertex in self.value:\n", " for neighbor in self.value[vertex]:\n", " if {neighbor, vertex} not in edgelist:\n", " edgelist.append({vertex, neighbor})\n", " return edgelist\n", "\n", "g = graph(graph_elements)\n", "\n", "\n", "print(g.getVertices())\n", "print(g.getEdges())" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "['Apple', 'Banana', 'Cherry', 'Durian', 'Orange']\n", "[{'Apple', 'Banana'}, {'Cherry', 'Apple'}, {'Durian', 'Banana'}, {'Cherry', 'Durian'}, {'Orange', 'Durian'}]\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "QzmgYXn5VVk1" }, "source": [ "Depth First Traversal of a graph" ] }, { "cell_type": "code", "metadata": { "id": "4wh5icuWSkt8", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "37d45899-4560-4535-807f-0f8713b8cec1" }, "source": [ "def depth_first_search(graph, start, visited=None):\n", " if visited is None:\n", " visited = set()\n", " visited.add(start)\n", "\n", " #print(start)\n", "\n", " for next in graph[start] - visited:\n", " depth_first_search(graph, next, visited)\n", " return visited\n", "\n", "graph = {'Apple': set(['Banana', 'Cherry']),\n", " 'Banana': set(['Apple', 'Durian', 'Orange']),\n", " 'Cherry': set(['Apple']),\n", " 'Durian': set(['Banana', 'Orange']),\n", " 'Orange': set(['Cherry', 'Durian'])\n", " }\n", "\n", "\n", "depth_first_search(graph, 'Banana')" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'Apple', 'Banana', 'Cherry', 'Durian', 'Orange'}" ] }, "metadata": { "tags": [] }, "execution_count": 102 } ] }, { "cell_type": "markdown", "metadata": { "id": "90iwo9W1Wanw" }, "source": [ "Breadth-First Traversal of a Graph" ] }, { "cell_type": "code", "metadata": { "id": "_QEoUEm-SkhT", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "c283fe39-4004-4770-a3bc-3b80a1102dbb" }, "source": [ "import collections\n", "\n", "def breadth_first_search(graph, root):\n", "\n", " visited, queue = set(), collections.deque([root])\n", " visited.add(root)\n", "\n", " while queue:\n", "\n", " vertex = queue.popleft()\n", " print(str(vertex) + \" \", end=\"\")\n", "\n", " for neighbour in graph[vertex]:\n", " if neighbour not in visited:\n", " visited.add(neighbour)\n", " queue.append(neighbour)\n", "\n", "\n", "if __name__ == '__main__':\n", "\n", " graph = {0: [1, 2], \n", " 1: [0, 2], \n", " 2: [3], \n", " 3: [1, 2]\n", " }\n", "\n", "\n", "breadth_first_search(graph, 1)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "1 0 2 3 " ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "nHtTTpI5SkHs" }, "source": [ "" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "_DAwWMYGWwcW" }, "source": [ "Assignment 1 Part 2: Election Data Search" ] }, { "cell_type": "code", "metadata": { "id": "XytUX-nemkr0" }, "source": [ "import pandas as pd" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "k0V_IGLnl7P7", "colab": { "base_uri": "https://localhost:8080/", "height": 84 }, "outputId": "e6919695-6c1f-4631-c315-2967cd1b2d02" }, "source": [ "from google.colab import drive\n", "drive.mount('/data/')\n", "data_dir = '/data/My Drive/Colab Notebooks/FEC dataset'\n", "!ls '/data/My Drive/Colab Notebooks/FEC dataset'" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Drive already mounted at /data/; to attempt to forcibly remount, call drive.mount(\"/data/\", force_remount=True).\n", "ccl20.zip\t cm_header_file.csv pas220.zip\n", "ccl_header_file.csv cn20.zip\t\t pas2_header_file.csv\n", "cm20.zip\t cn_header_file.csv\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "pl-0E8cE-hGy" }, "source": [ "Search on 'CN20.zip' folder" ] }, { "cell_type": "code", "metadata": { "id": "zw1KZ0NmmWQw", "colab": { "base_uri": "https://localhost:8080/", "height": 400 }, "outputId": "cfb24d9f-b184-42d8-9b4d-5c5ad20b32b7" }, "source": [ "from zipfile import ZipFile\n", "\n", "header = pd.read_csv(data_dir+'/cn_header_file.csv')\n", "\n", "with ZipFile(data_dir+'/cn20.zip') as zip:\n", " candidates = pd.read_csv(zip.open('cn.txt'), sep='|', names=header.columns)\n", "candidates.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CAND_ID</th>\n", " <th>CAND_NAME</th>\n", " <th>CAND_PTY_AFFILIATION</th>\n", " <th>CAND_ELECTION_YR</th>\n", " <th>CAND_OFFICE_ST</th>\n", " <th>CAND_OFFICE</th>\n", " <th>CAND_OFFICE_DISTRICT</th>\n", " <th>CAND_ICI</th>\n", " <th>CAND_STATUS</th>\n", " <th>CAND_PCC</th>\n", " <th>CAND_ST1</th>\n", " <th>CAND_ST2</th>\n", " <th>CAND_CITY</th>\n", " <th>CAND_ST</th>\n", " <th>CAND_ZIP</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>H0AK00105</td>\n", " <td>LAMB, THOMAS</td>\n", " <td>NNE</td>\n", " <td>2020</td>\n", " <td>AK</td>\n", " <td>H</td>\n", " <td>0.0</td>\n", " <td>C</td>\n", " <td>N</td>\n", " <td>C00607515</td>\n", " <td>1861 W LAKE LUCILLE DR</td>\n", " <td>NaN</td>\n", " <td>WASILLA</td>\n", " <td>AK</td>\n", " <td>99654.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>H0AK00113</td>\n", " <td>TUGATUK, RAY SEAN</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>AK</td>\n", " <td>H</td>\n", " <td>0.0</td>\n", " <td>C</td>\n", " <td>N</td>\n", " <td>NaN</td>\n", " <td>PO BOX 172</td>\n", " <td>NaN</td>\n", " <td>MANAKOTAK</td>\n", " <td>AK</td>\n", " <td>99628.0</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>H0AK01046</td>\n", " <td>CATALANO, THOMAS</td>\n", " <td>OTH</td>\n", " <td>2020</td>\n", " <td>AK</td>\n", " <td>H</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>N</td>\n", " <td>NaN</td>\n", " <td>188 WEST NORTHERN LIGHTS BOULEVARD</td>\n", " <td>NaN</td>\n", " <td>ANCHORAGE</td>\n", " <td>AK</td>\n", " <td>99503.0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>H0AL01055</td>\n", " <td>CARL, JERRY LEE, JR</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>AL</td>\n", " <td>H</td>\n", " <td>1.0</td>\n", " <td>O</td>\n", " <td>C</td>\n", " <td>C00697789</td>\n", " <td>PO BOX 852138</td>\n", " <td>NaN</td>\n", " <td>MOBILE</td>\n", " <td>AL</td>\n", " <td>36685.0</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>H0AL01063</td>\n", " <td>LAMBERT, DOUGLAS WESTLEY III</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>AL</td>\n", " <td>H</td>\n", " <td>1.0</td>\n", " <td>O</td>\n", " <td>C</td>\n", " <td>C00701557</td>\n", " <td>7194 STILLWATER BLVD</td>\n", " <td>NaN</td>\n", " <td>SPANISH FORT</td>\n", " <td>AL</td>\n", " <td>36527.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CAND_ID CAND_NAME ... CAND_ST CAND_ZIP\n", "0 H0AK00105 LAMB, THOMAS ... AK 99654.0\n", "1 H0AK00113 TUGATUK, RAY SEAN ... AK 99628.0\n", "2 H0AK01046 CATALANO, THOMAS ... AK 99503.0\n", "3 H0AL01055 CARL, JERRY LEE, JR ... AL 36685.0\n", "4 H0AL01063 LAMBERT, DOUGLAS WESTLEY III ... AL 36527.0\n", "\n", "[5 rows x 15 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 106 } ] }, { "cell_type": "code", "metadata": { "id": "CZ2y9RTAnTzU", "colab": { "base_uri": "https://localhost:8080/", "height": 333 }, "outputId": "d698ba69-e8bf-4db5-b657-c79efd645bdf" }, "source": [ "candidates[candidates['CAND_NAME'].str.contains('WALKER')].head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CAND_ID</th>\n", " <th>CAND_NAME</th>\n", " <th>CAND_PTY_AFFILIATION</th>\n", " <th>CAND_ELECTION_YR</th>\n", " <th>CAND_OFFICE_ST</th>\n", " <th>CAND_OFFICE</th>\n", " <th>CAND_OFFICE_DISTRICT</th>\n", " <th>CAND_ICI</th>\n", " <th>CAND_STATUS</th>\n", " <th>CAND_PCC</th>\n", " <th>CAND_ST1</th>\n", " <th>CAND_ST2</th>\n", " <th>CAND_CITY</th>\n", " <th>CAND_ST</th>\n", " <th>CAND_ZIP</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>97</th>\n", " <td>H0AZ09054</td>\n", " <td>WALKER, JASON S</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>AZ</td>\n", " <td>H</td>\n", " <td>9.0</td>\n", " <td>C</td>\n", " <td>N</td>\n", " <td>NaN</td>\n", " <td>4815 E. THOMAS RD.</td>\n", " <td>W224</td>\n", " <td>PHOENIX</td>\n", " <td>AZ</td>\n", " <td>85018.0</td>\n", " </tr>\n", " <tr>\n", " <th>848</th>\n", " <td>H0IL15103</td>\n", " <td>WALKER, ALEX</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>IL</td>\n", " <td>H</td>\n", " <td>15.0</td>\n", " <td>O</td>\n", " <td>N</td>\n", " <td>C00717967</td>\n", " <td>2606 CHARLESTON AVE</td>\n", " <td>NaN</td>\n", " <td>MATTOON</td>\n", " <td>IL</td>\n", " <td>61938.0</td>\n", " </tr>\n", " <tr>\n", " <th>1009</th>\n", " <td>H0MA04168</td>\n", " <td>GROSSMAN, REBECCA WALKER</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>MA</td>\n", " <td>H</td>\n", " <td>4.0</td>\n", " <td>C</td>\n", " <td>C</td>\n", " <td>C00720482</td>\n", " <td>PO BOX 590686</td>\n", " <td>NaN</td>\n", " <td>NEWTON CENTER</td>\n", " <td>MA</td>\n", " <td>2459.0</td>\n", " </tr>\n", " <tr>\n", " <th>1391</th>\n", " <td>H0NC08247</td>\n", " <td>WALKER, RODNEY L</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>NC</td>\n", " <td>H</td>\n", " <td>8.0</td>\n", " <td>C</td>\n", " <td>N</td>\n", " <td>C00707877</td>\n", " <td>PO BOX 712</td>\n", " <td>NaN</td>\n", " <td>SOUTHERN PINES</td>\n", " <td>NC</td>\n", " <td>28388.0</td>\n", " </tr>\n", " <tr>\n", " <th>1574</th>\n", " <td>H0NV03108</td>\n", " <td>WALKERLIEB, ZACHARY</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>NV</td>\n", " <td>H</td>\n", " <td>3.0</td>\n", " <td>C</td>\n", " <td>C</td>\n", " <td>C00703025</td>\n", " <td>5887 GLORY HEIGHTS DR</td>\n", " <td>NaN</td>\n", " <td>LAS VEGAS</td>\n", " <td>NV</td>\n", " <td>89135.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CAND_ID CAND_NAME ... CAND_ST CAND_ZIP\n", "97 H0AZ09054 WALKER, JASON S ... AZ 85018.0\n", "848 H0IL15103 WALKER, ALEX ... IL 61938.0\n", "1009 H0MA04168 GROSSMAN, REBECCA WALKER ... MA 2459.0\n", "1391 H0NC08247 WALKER, RODNEY L ... NC 28388.0\n", "1574 H0NV03108 WALKERLIEB, ZACHARY ... NV 89135.0\n", "\n", "[5 rows x 15 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 107 } ] }, { "cell_type": "code", "metadata": { "id": "tdUayozq-9Cb", "colab": { "base_uri": "https://localhost:8080/", "height": 417 }, "outputId": "a26fa3cc-48cc-48aa-aef1-3195d29110a7" }, "source": [ "tg = candidates[(candidates['CAND_ELECTION_YR'] == 2020) & (candidates['CAND_OFFICE_ST'] == 'FL')]\n", "tg.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CAND_ID</th>\n", " <th>CAND_NAME</th>\n", " <th>CAND_PTY_AFFILIATION</th>\n", " <th>CAND_ELECTION_YR</th>\n", " <th>CAND_OFFICE_ST</th>\n", " <th>CAND_OFFICE</th>\n", " <th>CAND_OFFICE_DISTRICT</th>\n", " <th>CAND_ICI</th>\n", " <th>CAND_STATUS</th>\n", " <th>CAND_PCC</th>\n", " <th>CAND_ST1</th>\n", " <th>CAND_ST2</th>\n", " <th>CAND_CITY</th>\n", " <th>CAND_ST</th>\n", " <th>CAND_ZIP</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>437</th>\n", " <td>H0FL01112</td>\n", " <td>ROSAS, EMILY</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>FL</td>\n", " <td>H</td>\n", " <td>1.0</td>\n", " <td>C</td>\n", " <td>N</td>\n", " <td>C00664300</td>\n", " <td>6610 JENNIFER DRIVE</td>\n", " <td>NaN</td>\n", " <td>TEMPLE TERRACE</td>\n", " <td>FL</td>\n", " <td>33617.0</td>\n", " </tr>\n", " <tr>\n", " <th>438</th>\n", " <td>H0FL01120</td>\n", " <td>ROMAGNANO, CHASE ANDERSON ANDY REV.</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>FL</td>\n", " <td>H</td>\n", " <td>1.0</td>\n", " <td>C</td>\n", " <td>N</td>\n", " <td>C00719351</td>\n", " <td>P.O. BOX 9328</td>\n", " <td>NaN</td>\n", " <td>PENSACOLA</td>\n", " <td>FL</td>\n", " <td>32513.0</td>\n", " </tr>\n", " <tr>\n", " <th>439</th>\n", " <td>H0FL01138</td>\n", " <td>HOWARD, CHERYL</td>\n", " <td>DEM</td>\n", " <td>2020</td>\n", " <td>FL</td>\n", " <td>H</td>\n", " <td>1.0</td>\n", " <td>C</td>\n", " <td>N</td>\n", " <td>C00735977</td>\n", " <td>605 CROWN COVE</td>\n", " <td>NaN</td>\n", " <td>PENSACOLA</td>\n", " <td>FL</td>\n", " <td>32502.0</td>\n", " </tr>\n", " <tr>\n", " <th>440</th>\n", " <td>H0FL01146</td>\n", " <td>MERK, GREGORY CHARLES</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>FL</td>\n", " <td>H</td>\n", " <td>1.0</td>\n", " <td>C</td>\n", " <td>C</td>\n", " <td>C00740340</td>\n", " <td>4661 CALLE VENTOSO</td>\n", " <td>NaN</td>\n", " <td>PENSACOLA</td>\n", " <td>FL</td>\n", " <td>32514.0</td>\n", " </tr>\n", " <tr>\n", " <th>441</th>\n", " <td>H0FL01153</td>\n", " <td>ORAM, ALBERT</td>\n", " <td>NPA</td>\n", " <td>2020</td>\n", " <td>FL</td>\n", " <td>H</td>\n", " <td>1.0</td>\n", " <td>C</td>\n", " <td>C</td>\n", " <td>C00746636</td>\n", " <td>825 BAYSHORE DR APT 804</td>\n", " <td>NaN</td>\n", " <td>PENSACOLA</td>\n", " <td>FL</td>\n", " <td>32507.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CAND_ID CAND_NAME ... CAND_ST CAND_ZIP\n", "437 H0FL01112 ROSAS, EMILY ... FL 33617.0\n", "438 H0FL01120 ROMAGNANO, CHASE ANDERSON ANDY REV. ... FL 32513.0\n", "439 H0FL01138 HOWARD, CHERYL ... FL 32502.0\n", "440 H0FL01146 MERK, GREGORY CHARLES ... FL 32514.0\n", "441 H0FL01153 ORAM, ALBERT ... FL 32507.0\n", "\n", "[5 rows x 15 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 108 } ] }, { "cell_type": "code", "metadata": { "id": "ytvXx2wHCvs1", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "f379bdaf-43ee-4446-a7e5-1c4302a9ec9c" }, "source": [ "len(tg.index)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "242" ] }, "metadata": { "tags": [] }, "execution_count": 109 } ] }, { "cell_type": "code", "metadata": { "id": "mymZWrEypYla", "colab": { "base_uri": "https://localhost:8080/", "height": 130 }, "outputId": "6f97a11e-f339-4022-fd05-136583e1ab0f" }, "source": [ "candidates[candidates['CAND_NAME'].str.contains('TRUMP, DONALD')]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CAND_ID</th>\n", " <th>CAND_NAME</th>\n", " <th>CAND_PTY_AFFILIATION</th>\n", " <th>CAND_ELECTION_YR</th>\n", " <th>CAND_OFFICE_ST</th>\n", " <th>CAND_OFFICE</th>\n", " <th>CAND_OFFICE_DISTRICT</th>\n", " <th>CAND_ICI</th>\n", " <th>CAND_STATUS</th>\n", " <th>CAND_PCC</th>\n", " <th>CAND_ST1</th>\n", " <th>CAND_ST2</th>\n", " <th>CAND_CITY</th>\n", " <th>CAND_ST</th>\n", " <th>CAND_ZIP</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>6403</th>\n", " <td>P80001571</td>\n", " <td>TRUMP, DONALD J.</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>US</td>\n", " <td>P</td>\n", " <td>0.0</td>\n", " <td>I</td>\n", " <td>C</td>\n", " <td>C00580100</td>\n", " <td>725 FIFTH AVENUE</td>\n", " <td>NaN</td>\n", " <td>NEW YORK</td>\n", " <td>NY</td>\n", " <td>10022.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CAND_ID CAND_NAME ... CAND_ST CAND_ZIP\n", "6403 P80001571 TRUMP, DONALD J. ... NY 10022.0\n", "\n", "[1 rows x 15 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 110 } ] }, { "cell_type": "markdown", "metadata": { "id": "AoFXVna2--1J" }, "source": [ "Search on 'Pas220.zip' folder" ] }, { "cell_type": "code", "metadata": { "id": "wIKz_EIYiHLj", "colab": { "base_uri": "https://localhost:8080/", "height": 400 }, "outputId": "1bb2e9a1-c58c-460c-e64d-2f73e34c291f" }, "source": [ "header = pd.read_csv(data_dir+'/pas2_header_file.csv')\n", "\n", "with ZipFile(data_dir+'/pas220.zip') as zip:\n", " #print(zip.namelist())\n", " spending = pd.read_csv(zip.open('itpas2.txt'), sep='|', names=header.columns)\n", "spending.head()" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py:2718: DtypeWarning: Columns (10,11,12) have mixed types.Specify dtype option on import or set low_memory=False.\n", " interactivity=interactivity, compiler=compiler, result=result)\n" ], "name": "stderr" }, { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CMTE_ID</th>\n", " <th>AMNDT_IND</th>\n", " <th>RPT_TP</th>\n", " <th>TRANSACTION_PGI</th>\n", " <th>IMAGE_NUM</th>\n", " <th>TRANSACTION_TP</th>\n", " <th>ENTITY_TP</th>\n", " <th>NAME</th>\n", " <th>CITY</th>\n", " <th>STATE</th>\n", " <th>ZIP_CODE</th>\n", " <th>EMPLOYER</th>\n", " <th>OCCUPATION</th>\n", " <th>TRANSACTION_DT</th>\n", " <th>TRANSACTION_AMT</th>\n", " <th>OTHER_ID</th>\n", " <th>CAND_ID</th>\n", " <th>TRAN_ID</th>\n", " <th>FILE_NUM</th>\n", " <th>MEMO_CD</th>\n", " <th>MEMO_TEXT</th>\n", " <th>SUB_ID</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>C00567180</td>\n", " <td>T</td>\n", " <td>TER</td>\n", " <td>P2020</td>\n", " <td>201901099143774199</td>\n", " <td>24K</td>\n", " <td>PAC</td>\n", " <td>TED YOHO FOR CONGRESS</td>\n", " <td>GAINESVILLE</td>\n", " <td>FL</td>\n", " <td>32608</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>1082019.0</td>\n", " <td>1880</td>\n", " <td>C00494583</td>\n", " <td>H2FL06109</td>\n", " <td>SB23.4447</td>\n", " <td>1303604</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>4021920191640423011</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>C00104885</td>\n", " <td>A</td>\n", " <td>TER</td>\n", " <td>G2020</td>\n", " <td>201901289144031511</td>\n", " <td>24K</td>\n", " <td>CCM</td>\n", " <td>TEAM GRAHAM INC</td>\n", " <td>COLUMBIA</td>\n", " <td>SC</td>\n", " <td>29202</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>12202018.0</td>\n", " <td>3000</td>\n", " <td>C00458828</td>\n", " <td>H4SC03087</td>\n", " <td>SB23.17757</td>\n", " <td>1307636</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>4022220191643444985</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>C00104885</td>\n", " <td>A</td>\n", " <td>TER</td>\n", " <td>P2022</td>\n", " <td>201901289144031512</td>\n", " <td>24K</td>\n", " <td>CCM</td>\n", " <td>TIM SCOTT FOR SENATE</td>\n", " <td>CHARLESTON</td>\n", " <td>SC</td>\n", " <td>29407</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>12202018.0</td>\n", " <td>1000</td>\n", " <td>C00540302</td>\n", " <td>H0SC01279</td>\n", " <td>SB23.17756</td>\n", " <td>1307636</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>4022220191643444987</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>C00104885</td>\n", " <td>A</td>\n", " <td>TER</td>\n", " <td>P2020</td>\n", " <td>201901289144031511</td>\n", " <td>24K</td>\n", " <td>CCM</td>\n", " <td>FRIENDS OF JIM CLYBURN</td>\n", " <td>COLUMBIA</td>\n", " <td>SC</td>\n", " <td>29211</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>1072019.0</td>\n", " <td>470</td>\n", " <td>C00255562</td>\n", " <td>H2SC02042</td>\n", " <td>SB23.17755</td>\n", " <td>1307636</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>4022220191643444981</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>C00688408</td>\n", " <td>T</td>\n", " <td>TER</td>\n", " <td>G2018</td>\n", " <td>201901319144305867</td>\n", " <td>24E</td>\n", " <td>ORG</td>\n", " <td>JACKSON ADVOCATE</td>\n", " <td>JACKSON</td>\n", " <td>MS</td>\n", " <td>39207</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>12122018.0</td>\n", " <td>1000</td>\n", " <td>S8MS00287</td>\n", " <td>S8MS00287</td>\n", " <td>SE.4233</td>\n", " <td>1310906</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>4022420191643632157</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CMTE_ID AMNDT_IND RPT_TP ... MEMO_CD MEMO_TEXT SUB_ID\n", "0 C00567180 T TER ... NaN NaN 4021920191640423011\n", "1 C00104885 A TER ... NaN NaN 4022220191643444985\n", "2 C00104885 A TER ... NaN NaN 4022220191643444987\n", "3 C00104885 A TER ... NaN NaN 4022220191643444981\n", "4 C00688408 T TER ... NaN NaN 4022420191643632157\n", "\n", "[5 rows x 22 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 111 } ] }, { "cell_type": "code", "metadata": { "id": "cJReRZAi7RSR", "colab": { "base_uri": "https://localhost:8080/", "height": 195 }, "outputId": "8f41dac7-2e34-4db1-b661-2b46ab2debab" }, "source": [ "house_spending_FL = spending[(spending['CAND_ID'].str.startswith('H')==True) & (spending['STATE']=='FL')]\n", "df_house = house_spending_FL[['CMTE_ID', 'NAME', 'STATE', 'TRANSACTION_AMT', 'CAND_ID']]\n", "df_house.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CMTE_ID</th>\n", " <th>NAME</th>\n", " <th>STATE</th>\n", " <th>TRANSACTION_AMT</th>\n", " <th>CAND_ID</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>C00567180</td>\n", " <td>TED YOHO FOR CONGRESS</td>\n", " <td>FL</td>\n", " <td>1880</td>\n", " <td>H2FL06109</td>\n", " </tr>\n", " <tr>\n", " <th>154</th>\n", " <td>C00027342</td>\n", " <td>HASTINGS FOR CONGRESS</td>\n", " <td>FL</td>\n", " <td>5000</td>\n", " <td>H2FL23021</td>\n", " </tr>\n", " <tr>\n", " <th>195</th>\n", " <td>C00003855</td>\n", " <td>DONNA SHALALA FOR CONGRESS</td>\n", " <td>FL</td>\n", " <td>1000</td>\n", " <td>H8FL27193</td>\n", " </tr>\n", " <tr>\n", " <th>256</th>\n", " <td>C00339655</td>\n", " <td>VERN BUCHANAN FOR CONGRESS</td>\n", " <td>FL</td>\n", " <td>5000</td>\n", " <td>H6FL13148</td>\n", " </tr>\n", " <tr>\n", " <th>421</th>\n", " <td>C00035204</td>\n", " <td>BILIRAKIS FOR CONGRESS</td>\n", " <td>FL</td>\n", " <td>1000</td>\n", " <td>H6FL09070</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CMTE_ID NAME STATE TRANSACTION_AMT CAND_ID\n", "0 C00567180 TED YOHO FOR CONGRESS FL 1880 H2FL06109\n", "154 C00027342 HASTINGS FOR CONGRESS FL 5000 H2FL23021\n", "195 C00003855 DONNA SHALALA FOR CONGRESS FL 1000 H8FL27193\n", "256 C00339655 VERN BUCHANAN FOR CONGRESS FL 5000 H6FL13148\n", "421 C00035204 BILIRAKIS FOR CONGRESS FL 1000 H6FL09070" ] }, "metadata": { "tags": [] }, "execution_count": 112 } ] }, { "cell_type": "markdown", "metadata": { "id": "L-O7kKyr_OkU" }, "source": [ "Search on 'CM20.zip' folder" ] }, { "cell_type": "code", "metadata": { "id": "UX-S1tm7nOo2", "colab": { "base_uri": "https://localhost:8080/", "height": 518 }, "outputId": "145959fb-bb59-4e7a-c6e1-31f3d64148c5" }, "source": [ "header = pd.read_csv(data_dir+'/cm_header_file.csv')\n", "\n", "with ZipFile(data_dir+'/cm20.zip') as zip:\n", " #print(zip.namelist())\n", " df = pd.read_csv(zip.open('cm.txt'), sep='|', names=header.columns)\n", "df.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CMTE_ID</th>\n", " <th>CMTE_NM</th>\n", " <th>TRES_NM</th>\n", " <th>CMTE_ST1</th>\n", " <th>CMTE_ST2</th>\n", " <th>CMTE_CITY</th>\n", " <th>CMTE_ST</th>\n", " <th>CMTE_ZIP</th>\n", " <th>CMTE_DSGN</th>\n", " <th>CMTE_TP</th>\n", " <th>CMTE_PTY_AFFILIATION</th>\n", " <th>CMTE_FILING_FREQ</th>\n", " <th>ORG_TP</th>\n", " <th>CONNECTED_ORG_NM</th>\n", " <th>CAND_ID</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>C00000059</td>\n", " <td>HALLMARK CARDS PAC</td>\n", " <td>SARAH MOE</td>\n", " <td>2501 MCGEE</td>\n", " <td>MD #500</td>\n", " <td>KANSAS CITY</td>\n", " <td>MO</td>\n", " <td>64108</td>\n", " <td>U</td>\n", " <td>Q</td>\n", " <td>UNK</td>\n", " <td>M</td>\n", " <td>C</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>C00000422</td>\n", " <td>AMERICAN MEDICAL ASSOCIATION POLITICAL ACTION ...</td>\n", " <td>WALKER, KEVIN MR.</td>\n", " <td>25 MASSACHUSETTS AVE, NW</td>\n", " <td>SUITE 600</td>\n", " <td>WASHINGTON</td>\n", " <td>DC</td>\n", " <td>200017400</td>\n", " <td>B</td>\n", " <td>Q</td>\n", " <td>NaN</td>\n", " <td>M</td>\n", " <td>M</td>\n", " <td>DELAWARE MEDICAL PAC</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>C00000489</td>\n", " <td>D R I V E POLITICAL FUND CHAPTER 886</td>\n", " <td>JERRY SIMS JR</td>\n", " <td>3528 W RENO</td>\n", " <td>NaN</td>\n", " <td>OKLAHOMA CITY</td>\n", " <td>OK</td>\n", " <td>73107</td>\n", " <td>U</td>\n", " <td>N</td>\n", " <td>NaN</td>\n", " <td>Q</td>\n", " <td>L</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>C00000547</td>\n", " <td>KANSAS MEDICAL SOCIETY POLITICAL ACTION COMMITTEE</td>\n", " <td>JERRY SLAUGHTER</td>\n", " <td>623 SW 10TH AVE</td>\n", " <td>NaN</td>\n", " <td>TOPEKA</td>\n", " <td>KS</td>\n", " <td>666121627</td>\n", " <td>U</td>\n", " <td>Q</td>\n", " <td>UNK</td>\n", " <td>Q</td>\n", " <td>M</td>\n", " <td>KANSAS MEDICAL SOCIETY</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>C00000638</td>\n", " <td>INDIANA STATE MEDICAL ASSOCIATION POLITICAL AC...</td>\n", " <td>ACHENBACH, GRANT MR.</td>\n", " <td>322 CANAL WALK, CANAL LEVEL</td>\n", " <td>NaN</td>\n", " <td>INDIANAPOLIS</td>\n", " <td>IN</td>\n", " <td>46202</td>\n", " <td>U</td>\n", " <td>Q</td>\n", " <td>NaN</td>\n", " <td>T</td>\n", " <td>M</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CMTE_ID ... CAND_ID\n", "0 C00000059 ... NaN\n", "1 C00000422 ... NaN\n", "2 C00000489 ... NaN\n", "3 C00000547 ... NaN\n", "4 C00000638 ... NaN\n", "\n", "[5 rows x 15 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 113 } ] }, { "cell_type": "code", "metadata": { "id": "Gz5FKlS11ZGQ", "colab": { "base_uri": "https://localhost:8080/", "height": 195 }, "outputId": "caef84a1-84e2-4340-d070-4103622b12fa" }, "source": [ "df_name = df[['CMTE_ID', 'CMTE_NM']]\n", "df_name.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CMTE_ID</th>\n", " <th>CMTE_NM</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>C00000059</td>\n", " <td>HALLMARK CARDS PAC</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>C00000422</td>\n", " <td>AMERICAN MEDICAL ASSOCIATION POLITICAL ACTION ...</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>C00000489</td>\n", " <td>D R I V E POLITICAL FUND CHAPTER 886</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>C00000547</td>\n", " <td>KANSAS MEDICAL SOCIETY POLITICAL ACTION COMMITTEE</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>C00000638</td>\n", " <td>INDIANA STATE MEDICAL ASSOCIATION POLITICAL AC...</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CMTE_ID CMTE_NM\n", "0 C00000059 HALLMARK CARDS PAC\n", "1 C00000422 AMERICAN MEDICAL ASSOCIATION POLITICAL ACTION ...\n", "2 C00000489 D R I V E POLITICAL FUND CHAPTER 886\n", "3 C00000547 KANSAS MEDICAL SOCIETY POLITICAL ACTION COMMITTEE\n", "4 C00000638 INDIANA STATE MEDICAL ASSOCIATION POLITICAL AC..." ] }, "metadata": { "tags": [] }, "execution_count": 114 } ] }, { "cell_type": "code", "metadata": { "id": "MpE777-JE3Gt", "colab": { "base_uri": "https://localhost:8080/", "height": 618 }, "outputId": "4d62b2f1-8c4e-4678-df9b-b028eeda0ea3" }, "source": [ "Society_Organization = df[df['CONNECTED_ORG_NM'].str.contains('SOCIETY', na=False)]\n", "Society_Organization.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CMTE_ID</th>\n", " <th>CMTE_NM</th>\n", " <th>TRES_NM</th>\n", " <th>CMTE_ST1</th>\n", " <th>CMTE_ST2</th>\n", " <th>CMTE_CITY</th>\n", " <th>CMTE_ST</th>\n", " <th>CMTE_ZIP</th>\n", " <th>CMTE_DSGN</th>\n", " <th>CMTE_TP</th>\n", " <th>CMTE_PTY_AFFILIATION</th>\n", " <th>CMTE_FILING_FREQ</th>\n", " <th>ORG_TP</th>\n", " <th>CONNECTED_ORG_NM</th>\n", " <th>CAND_ID</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>3</th>\n", " <td>C00000547</td>\n", " <td>KANSAS MEDICAL SOCIETY POLITICAL ACTION COMMITTEE</td>\n", " <td>JERRY SLAUGHTER</td>\n", " <td>623 SW 10TH AVE</td>\n", " <td>NaN</td>\n", " <td>TOPEKA</td>\n", " <td>KS</td>\n", " <td>666121627</td>\n", " <td>U</td>\n", " <td>Q</td>\n", " <td>UNK</td>\n", " <td>Q</td>\n", " <td>M</td>\n", " <td>KANSAS MEDICAL SOCIETY</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", " <td>C00001180</td>\n", " <td>MICHIGAN DOCTORS POLITICAL ACTION COMMITTEE - ...</td>\n", " <td>GHOSE, AMIT</td>\n", " <td>P.O. BOX 769</td>\n", " <td>NaN</td>\n", " <td>EAST LANSING</td>\n", " <td>MI</td>\n", " <td>48826</td>\n", " <td>U</td>\n", " <td>Q</td>\n", " <td>NNE</td>\n", " <td>Q</td>\n", " <td>T</td>\n", " <td>MICHIGAN STATE MEDICAL SOCIETY</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>37</th>\n", " <td>C00003152</td>\n", " <td>NORTH CAROLINA MEDICAL SOCIETY FEDERAL POLITIC...</td>\n", " <td>HAYES, E. REBECCA</td>\n", " <td>PO BOX 25834</td>\n", " <td>222 N. PERSON STREET</td>\n", " <td>RALEIGH</td>\n", " <td>NC</td>\n", " <td>27611</td>\n", " <td>U</td>\n", " <td>Q</td>\n", " <td>NaN</td>\n", " <td>Q</td>\n", " <td>M</td>\n", " <td>NORTH CAROLINA MEDICAL SOCIETY</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>51</th>\n", " <td>C00003970</td>\n", " <td>IOWA MEDICAL SOCIETY POLITICAL ACTION COMMITTEE</td>\n", " <td>DOOLEY, JOHN B</td>\n", " <td>515 E LOCUST STREET</td>\n", " <td>SUITE 400</td>\n", " <td>DES MOINES</td>\n", " <td>IA</td>\n", " <td>50309</td>\n", " <td>U</td>\n", " <td>Q</td>\n", " <td>NaN</td>\n", " <td>Q</td>\n", " <td>M</td>\n", " <td>IOWA MEDICAL SOCIETY</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>136</th>\n", " <td>C00012211</td>\n", " <td>NORTH CAROLINA DENTAL SOCIETY PAC</td>\n", " <td>BROWN, EVELYN M. DR.</td>\n", " <td>1600 EVANS ROAD</td>\n", " <td>NaN</td>\n", " <td>CARY</td>\n", " <td>NC</td>\n", " <td>27513</td>\n", " <td>U</td>\n", " <td>Q</td>\n", " <td>NaN</td>\n", " <td>Q</td>\n", " <td>M</td>\n", " <td>NORTH CAROLINA DENTAL SOCIETY</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CMTE_ID ... CAND_ID\n", "3 C00000547 ... NaN\n", "11 C00001180 ... NaN\n", "37 C00003152 ... NaN\n", "51 C00003970 ... NaN\n", "136 C00012211 ... NaN\n", "\n", "[5 rows x 15 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 115 } ] }, { "cell_type": "code", "metadata": { "id": "GErXTSsJGOV-", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "f2899955-e0ec-47b2-9310-24ba8fa7def4" }, "source": [ "len(Society_Organization.index)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "28" ] }, "metadata": { "tags": [] }, "execution_count": 116 } ] }, { "cell_type": "markdown", "metadata": { "id": "drRP2x2Z_lZS" }, "source": [ "Search on 'CCL20.zip' folder" ] }, { "cell_type": "code", "metadata": { "id": "ombc-hjIn0KR", "colab": { "base_uri": "https://localhost:8080/", "height": 195 }, "outputId": "20885340-ea71-466c-e66d-ba3684884989" }, "source": [ "header = pd.read_csv(data_dir+'/ccl_header_file.csv')\n", "\n", "with ZipFile(data_dir+'/ccl20.zip') as zip:\n", " #print(zip.namelist())\n", " linkage = pd.read_csv(zip.open('ccl.txt'), sep='|', names=header.columns)\n", "\n", "linkage.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CAND_ID</th>\n", " <th>CAND_ELECTION_YR</th>\n", " <th>FEC_ELECTION_YR</th>\n", " <th>CMTE_ID</th>\n", " <th>CMTE_TP</th>\n", " <th>CMTE_DSGN</th>\n", " <th>LINKAGE_ID</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>C00713602</td>\n", " <td>2019</td>\n", " <td>2020</td>\n", " <td>C00712851</td>\n", " <td>O</td>\n", " <td>U</td>\n", " <td>228963</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>H0AK00105</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>C00607515</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>229250</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>H0AL01055</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>C00697789</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>226125</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>H0AL01063</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>C00701557</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>227053</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>H0AL01071</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>C00701409</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>227054</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CAND_ID CAND_ELECTION_YR FEC_ELECTION_YR ... CMTE_TP CMTE_DSGN LINKAGE_ID\n", "0 C00713602 2019 2020 ... O U 228963\n", "1 H0AK00105 2020 2020 ... H P 229250\n", "2 H0AL01055 2020 2020 ... H P 226125\n", "3 H0AL01063 2020 2020 ... H P 227053\n", "4 H0AL01071 2020 2020 ... H P 227054\n", "\n", "[5 rows x 7 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 117 } ] }, { "cell_type": "code", "metadata": { "id": "vb8dIlKKE_4m" }, "source": [ "df1 = pd.concat([candidates, linkage, df], axis=1, sort=False).reset_index()" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "yEgZmnNHT2xw", "colab": { "base_uri": "https://localhost:8080/", "height": 534 }, "outputId": "d25d1e93-80a7-4ebd-a382-97460212b7ae" }, "source": [ "df1.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>index</th>\n", " <th>CAND_ID</th>\n", " <th>CAND_NAME</th>\n", " <th>CAND_PTY_AFFILIATION</th>\n", " <th>CAND_ELECTION_YR</th>\n", " <th>CAND_OFFICE_ST</th>\n", " <th>CAND_OFFICE</th>\n", " <th>CAND_OFFICE_DISTRICT</th>\n", " <th>CAND_ICI</th>\n", " <th>CAND_STATUS</th>\n", " <th>CAND_PCC</th>\n", " <th>CAND_ST1</th>\n", " <th>CAND_ST2</th>\n", " <th>CAND_CITY</th>\n", " <th>CAND_ST</th>\n", " <th>CAND_ZIP</th>\n", " <th>CAND_ID</th>\n", " <th>CAND_ELECTION_YR</th>\n", " <th>FEC_ELECTION_YR</th>\n", " <th>CMTE_ID</th>\n", " <th>CMTE_TP</th>\n", " <th>CMTE_DSGN</th>\n", " <th>LINKAGE_ID</th>\n", " <th>CMTE_ID</th>\n", " <th>CMTE_NM</th>\n", " <th>TRES_NM</th>\n", " <th>CMTE_ST1</th>\n", " <th>CMTE_ST2</th>\n", " <th>CMTE_CITY</th>\n", " <th>CMTE_ST</th>\n", " <th>CMTE_ZIP</th>\n", " <th>CMTE_DSGN</th>\n", " <th>CMTE_TP</th>\n", " <th>CMTE_PTY_AFFILIATION</th>\n", " <th>CMTE_FILING_FREQ</th>\n", " <th>ORG_TP</th>\n", " <th>CONNECTED_ORG_NM</th>\n", " <th>CAND_ID</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>0</td>\n", " <td>H0AK00105</td>\n", " <td>LAMB, THOMAS</td>\n", " <td>NNE</td>\n", " <td>2020.0</td>\n", " <td>AK</td>\n", " <td>H</td>\n", " <td>0.0</td>\n", " <td>C</td>\n", " <td>N</td>\n", " <td>C00607515</td>\n", " <td>1861 W LAKE LUCILLE DR</td>\n", " <td>NaN</td>\n", " <td>WASILLA</td>\n", " <td>AK</td>\n", " <td>99654.0</td>\n", " <td>C00713602</td>\n", " <td>2019.0</td>\n", " <td>2020.0</td>\n", " <td>C00712851</td>\n", " <td>O</td>\n", " <td>U</td>\n", " <td>228963.0</td>\n", " <td>C00000059</td>\n", " <td>HALLMARK CARDS PAC</td>\n", " <td>SARAH MOE</td>\n", " <td>2501 MCGEE</td>\n", " <td>MD #500</td>\n", " <td>KANSAS CITY</td>\n", " <td>MO</td>\n", " <td>64108</td>\n", " <td>U</td>\n", " <td>Q</td>\n", " <td>UNK</td>\n", " <td>M</td>\n", " <td>C</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>1</td>\n", " <td>H0AK00113</td>\n", " <td>TUGATUK, RAY SEAN</td>\n", " <td>DEM</td>\n", " <td>2020.0</td>\n", " <td>AK</td>\n", " <td>H</td>\n", " <td>0.0</td>\n", " <td>C</td>\n", " <td>N</td>\n", " <td>NaN</td>\n", " <td>PO BOX 172</td>\n", " <td>NaN</td>\n", " <td>MANAKOTAK</td>\n", " <td>AK</td>\n", " <td>99628.0</td>\n", " <td>H0AK00105</td>\n", " <td>2020.0</td>\n", " <td>2020.0</td>\n", " <td>C00607515</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>229250.0</td>\n", " <td>C00000422</td>\n", " <td>AMERICAN MEDICAL ASSOCIATION POLITICAL ACTION ...</td>\n", " <td>WALKER, KEVIN MR.</td>\n", " <td>25 MASSACHUSETTS AVE, NW</td>\n", " <td>SUITE 600</td>\n", " <td>WASHINGTON</td>\n", " <td>DC</td>\n", " <td>200017400</td>\n", " <td>B</td>\n", " <td>Q</td>\n", " <td>NaN</td>\n", " <td>M</td>\n", " <td>M</td>\n", " <td>DELAWARE MEDICAL PAC</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>2</td>\n", " <td>H0AK01046</td>\n", " <td>CATALANO, THOMAS</td>\n", " <td>OTH</td>\n", " <td>2020.0</td>\n", " <td>AK</td>\n", " <td>H</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>N</td>\n", " <td>NaN</td>\n", " <td>188 WEST NORTHERN LIGHTS BOULEVARD</td>\n", " <td>NaN</td>\n", " <td>ANCHORAGE</td>\n", " <td>AK</td>\n", " <td>99503.0</td>\n", " <td>H0AL01055</td>\n", " <td>2020.0</td>\n", " <td>2020.0</td>\n", " <td>C00697789</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>226125.0</td>\n", " <td>C00000489</td>\n", " <td>D R I V E POLITICAL FUND CHAPTER 886</td>\n", " <td>JERRY SIMS JR</td>\n", " <td>3528 W RENO</td>\n", " <td>NaN</td>\n", " <td>OKLAHOMA CITY</td>\n", " <td>OK</td>\n", " <td>73107</td>\n", " <td>U</td>\n", " <td>N</td>\n", " <td>NaN</td>\n", " <td>Q</td>\n", " <td>L</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>3</td>\n", " <td>H0AL01055</td>\n", " <td>CARL, JERRY LEE, JR</td>\n", " <td>REP</td>\n", " <td>2020.0</td>\n", " <td>AL</td>\n", " <td>H</td>\n", " <td>1.0</td>\n", " <td>O</td>\n", " <td>C</td>\n", " <td>C00697789</td>\n", " <td>PO BOX 852138</td>\n", " <td>NaN</td>\n", " <td>MOBILE</td>\n", " <td>AL</td>\n", " <td>36685.0</td>\n", " <td>H0AL01063</td>\n", " <td>2020.0</td>\n", " <td>2020.0</td>\n", " <td>C00701557</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>227053.0</td>\n", " <td>C00000547</td>\n", " <td>KANSAS MEDICAL SOCIETY POLITICAL ACTION COMMITTEE</td>\n", " <td>JERRY SLAUGHTER</td>\n", " <td>623 SW 10TH AVE</td>\n", " <td>NaN</td>\n", " <td>TOPEKA</td>\n", " <td>KS</td>\n", " <td>666121627</td>\n", " <td>U</td>\n", " <td>Q</td>\n", " <td>UNK</td>\n", " <td>Q</td>\n", " <td>M</td>\n", " <td>KANSAS MEDICAL SOCIETY</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>4</td>\n", " <td>H0AL01063</td>\n", " <td>LAMBERT, DOUGLAS WESTLEY III</td>\n", " <td>REP</td>\n", " <td>2020.0</td>\n", " <td>AL</td>\n", " <td>H</td>\n", " <td>1.0</td>\n", " <td>O</td>\n", " <td>C</td>\n", " <td>C00701557</td>\n", " <td>7194 STILLWATER BLVD</td>\n", " <td>NaN</td>\n", " <td>SPANISH FORT</td>\n", " <td>AL</td>\n", " <td>36527.0</td>\n", " <td>H0AL01071</td>\n", " <td>2020.0</td>\n", " <td>2020.0</td>\n", " <td>C00701409</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>227054.0</td>\n", " <td>C00000638</td>\n", " <td>INDIANA STATE MEDICAL ASSOCIATION POLITICAL AC...</td>\n", " <td>ACHENBACH, GRANT MR.</td>\n", " <td>322 CANAL WALK, CANAL LEVEL</td>\n", " <td>NaN</td>\n", " <td>INDIANAPOLIS</td>\n", " <td>IN</td>\n", " <td>46202</td>\n", " <td>U</td>\n", " <td>Q</td>\n", " <td>NaN</td>\n", " <td>T</td>\n", " <td>M</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " index CAND_ID ... CONNECTED_ORG_NM CAND_ID\n", "0 0 H0AK00105 ... NaN NaN\n", "1 1 H0AK00113 ... DELAWARE MEDICAL PAC NaN\n", "2 2 H0AK01046 ... NaN NaN\n", "3 3 H0AL01055 ... KANSAS MEDICAL SOCIETY NaN\n", "4 4 H0AL01063 ... NaN NaN\n", "\n", "[5 rows x 38 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 119 } ] }, { "cell_type": "code", "metadata": { "id": "d_oy0b5jT8eY", "colab": { "base_uri": "https://localhost:8080/", "height": 333 }, "outputId": "6650d9ab-983e-4277-843d-157107978f50" }, "source": [ "df2 = pd.merge(candidates, linkage, on='CAND_ID')\n", "df2.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CAND_ID</th>\n", " <th>CAND_NAME</th>\n", " <th>CAND_PTY_AFFILIATION</th>\n", " <th>CAND_ELECTION_YR_x</th>\n", " <th>CAND_OFFICE_ST</th>\n", " <th>CAND_OFFICE</th>\n", " <th>CAND_OFFICE_DISTRICT</th>\n", " <th>CAND_ICI</th>\n", " <th>CAND_STATUS</th>\n", " <th>CAND_PCC</th>\n", " <th>CAND_ST1</th>\n", " <th>CAND_ST2</th>\n", " <th>CAND_CITY</th>\n", " <th>CAND_ST</th>\n", " <th>CAND_ZIP</th>\n", " <th>CAND_ELECTION_YR_y</th>\n", " <th>FEC_ELECTION_YR</th>\n", " <th>CMTE_ID</th>\n", " <th>CMTE_TP</th>\n", " <th>CMTE_DSGN</th>\n", " <th>LINKAGE_ID</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>H0AK00105</td>\n", " <td>LAMB, THOMAS</td>\n", " <td>NNE</td>\n", " <td>2020</td>\n", " <td>AK</td>\n", " <td>H</td>\n", " <td>0.0</td>\n", " <td>C</td>\n", " <td>N</td>\n", " <td>C00607515</td>\n", " <td>1861 W LAKE LUCILLE DR</td>\n", " <td>NaN</td>\n", " <td>WASILLA</td>\n", " <td>AK</td>\n", " <td>99654.0</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>C00607515</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>229250</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>H0AL01055</td>\n", " <td>CARL, JERRY LEE, JR</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>AL</td>\n", " <td>H</td>\n", " <td>1.0</td>\n", " <td>O</td>\n", " <td>C</td>\n", " <td>C00697789</td>\n", " <td>PO BOX 852138</td>\n", " <td>NaN</td>\n", " <td>MOBILE</td>\n", " <td>AL</td>\n", " <td>36685.0</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>C00697789</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>226125</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>H0AL01063</td>\n", " <td>LAMBERT, DOUGLAS WESTLEY III</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>AL</td>\n", " <td>H</td>\n", " <td>1.0</td>\n", " <td>O</td>\n", " <td>C</td>\n", " <td>C00701557</td>\n", " <td>7194 STILLWATER BLVD</td>\n", " <td>NaN</td>\n", " <td>SPANISH FORT</td>\n", " <td>AL</td>\n", " <td>36527.0</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>C00701557</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>227053</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>H0AL01071</td>\n", " <td>PRINGLE, CHRISTOPHER PAUL</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>AL</td>\n", " <td>H</td>\n", " <td>1.0</td>\n", " <td>O</td>\n", " <td>C</td>\n", " <td>C00701409</td>\n", " <td>202 GOVERNMENT STREET</td>\n", " <td>SUITE 220</td>\n", " <td>MOBILE</td>\n", " <td>AL</td>\n", " <td>36602.0</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>C00701409</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>227054</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>H0AL01089</td>\n", " <td>HIGHTOWER, BILL</td>\n", " <td>REP</td>\n", " <td>2020</td>\n", " <td>AL</td>\n", " <td>H</td>\n", " <td>1.0</td>\n", " <td>O</td>\n", " <td>C</td>\n", " <td>C00703066</td>\n", " <td>PO BOX 91038</td>\n", " <td>NaN</td>\n", " <td>MOBILE</td>\n", " <td>AL</td>\n", " <td>36691.0</td>\n", " <td>2020</td>\n", " <td>2020</td>\n", " <td>C00703066</td>\n", " <td>H</td>\n", " <td>P</td>\n", " <td>227266</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CAND_ID CAND_NAME ... CMTE_DSGN LINKAGE_ID\n", "0 H0AK00105 LAMB, THOMAS ... P 229250\n", "1 H0AL01055 CARL, JERRY LEE, JR ... P 226125\n", "2 H0AL01063 LAMBERT, DOUGLAS WESTLEY III ... P 227053\n", "3 H0AL01071 PRINGLE, CHRISTOPHER PAUL ... P 227054\n", "4 H0AL01089 HIGHTOWER, BILL ... P 227266\n", "\n", "[5 rows x 21 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 120 } ] }, { "cell_type": "code", "metadata": { "id": "5pJTEGPm0OXR" }, "source": [ "df_merge = pd.merge(df2, df_name, on='CMTE_ID')\n", "#df_merge.head()\n", "\n", "df_sort = df_merge[['CAND_ID', 'CAND_NAME', 'CAND_ST', 'CMTE_NM']]" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "jAIGmy572dYD", "colab": { "base_uri": "https://localhost:8080/", "height": 343 }, "outputId": "bbeade85-9d03-4b76-88d7-ba6ecf5ec6f0" }, "source": [ "df_sort[df_sort['CAND_ST']=='FL'].head(10)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CAND_ID</th>\n", " <th>CAND_NAME</th>\n", " <th>CAND_ST</th>\n", " <th>CMTE_NM</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>7</th>\n", " <td>H0AL01113</td>\n", " <td>ROMAGNANO, CHASE ANDERSON ANDY REVEREN</td>\n", " <td>FL</td>\n", " <td>ANDY FOR NORTHWEST FLORIDA</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>H0FL01120</td>\n", " <td>ROMAGNANO, CHASE ANDERSON ANDY REV.</td>\n", " <td>FL</td>\n", " <td>ANDY FOR NORTHWEST FLORIDA</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>H0FL02110</td>\n", " <td>SOUTHERLAND, WILLIAM STEVE II</td>\n", " <td>FL</td>\n", " <td>FRESHMAN AGRICULTURAL REPUBLICAN MEMBERS TRUST...</td>\n", " </tr>\n", " <tr>\n", " <th>381</th>\n", " <td>H0FL01120</td>\n", " <td>ROMAGNANO, CHASE ANDERSON ANDY REV.</td>\n", " <td>FL</td>\n", " <td>ANDY ROMAGNANO FOR NORTHWEST FLORIDA</td>\n", " </tr>\n", " <tr>\n", " <th>382</th>\n", " <td>H0FL01138</td>\n", " <td>HOWARD, CHERYL</td>\n", " <td>FL</td>\n", " <td>CHERYL HOWARD FOR U S HOUSE OF REPRESENTATIVES</td>\n", " </tr>\n", " <tr>\n", " <th>383</th>\n", " <td>H0FL01146</td>\n", " <td>MERK, GREGORY CHARLES</td>\n", " <td>FL</td>\n", " <td>GREG MERK CAMPAIGN</td>\n", " </tr>\n", " <tr>\n", " <th>384</th>\n", " <td>H0FL01153</td>\n", " <td>ORAM, ALBERT</td>\n", " <td>FL</td>\n", " <td>ORAM FOR CONGRESS</td>\n", " </tr>\n", " <tr>\n", " <th>386</th>\n", " <td>H0FL02193</td>\n", " <td>ANDERSON, WILLIE JEFFERSON JR</td>\n", " <td>FL</td>\n", " <td>ANDERSON FOR US CONGRESS</td>\n", " </tr>\n", " <tr>\n", " <th>387</th>\n", " <td>H0FL02219</td>\n", " <td>THRIPP, KRISTY</td>\n", " <td>FL</td>\n", " <td>KRISTY THRIPP FOR CONGRESS</td>\n", " </tr>\n", " <tr>\n", " <th>388</th>\n", " <td>H0FL03118</td>\n", " <td>BLACK, DEAN A</td>\n", " <td>FL</td>\n", " <td>DEAN BLACK FOR CONGRESS</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CAND_ID ... CMTE_NM\n", "7 H0AL01113 ... ANDY FOR NORTHWEST FLORIDA\n", "8 H0FL01120 ... ANDY FOR NORTHWEST FLORIDA\n", "14 H0FL02110 ... FRESHMAN AGRICULTURAL REPUBLICAN MEMBERS TRUST...\n", "381 H0FL01120 ... ANDY ROMAGNANO FOR NORTHWEST FLORIDA\n", "382 H0FL01138 ... CHERYL HOWARD FOR U S HOUSE OF REPRESENTATIVES\n", "383 H0FL01146 ... GREG MERK CAMPAIGN\n", "384 H0FL01153 ... ORAM FOR CONGRESS\n", "386 H0FL02193 ... ANDERSON FOR US CONGRESS\n", "387 H0FL02219 ... KRISTY THRIPP FOR CONGRESS\n", "388 H0FL03118 ... DEAN BLACK FOR CONGRESS\n", "\n", "[10 rows x 4 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 122 } ] }, { "cell_type": "code", "metadata": { "id": "geYa-uW_34Aq", "colab": { "base_uri": "https://localhost:8080/", "height": 195 }, "outputId": "8ec71abb-0aa5-42cf-a6f4-41716b07891e" }, "source": [ "df_sort_house = pd.merge(df_sort, df_house, on='CAND_ID')\n", "df_sort_house.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CAND_ID</th>\n", " <th>CAND_NAME</th>\n", " <th>CAND_ST</th>\n", " <th>CMTE_NM</th>\n", " <th>CMTE_ID</th>\n", " <th>NAME</th>\n", " <th>STATE</th>\n", " <th>TRANSACTION_AMT</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>H0AL02087</td>\n", " <td>ROBY, MARTHA</td>\n", " <td>NaN</td>\n", " <td>MARTHA ROBY FOR CONGRESS</td>\n", " <td>C00522458</td>\n", " <td>PINPOINT WEBSOLUTIONS</td>\n", " <td>FL</td>\n", " <td>750</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>H0AL02087</td>\n", " <td>ROBY, MARTHA</td>\n", " <td>NaN</td>\n", " <td>FRESHMAN AGRICULTURAL REPUBLICAN MEMBERS TRUST...</td>\n", " <td>C00522458</td>\n", " <td>PINPOINT WEBSOLUTIONS</td>\n", " <td>FL</td>\n", " <td>750</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>H0GA08099</td>\n", " <td>SCOTT, JAMES AUSTIN</td>\n", " <td>GA</td>\n", " <td>FRESHMAN AGRICULTURAL REPUBLICAN MEMBERS TRUST...</td>\n", " <td>C00522458</td>\n", " <td>PINPOINT WEBSOLUTIONS</td>\n", " <td>FL</td>\n", " <td>750</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>H0GA08099</td>\n", " <td>SCOTT, JAMES AUSTIN</td>\n", " <td>GA</td>\n", " <td>AUSTIN SCOTT FOR CONGRESS INC</td>\n", " <td>C00522458</td>\n", " <td>PINPOINT WEBSOLUTIONS</td>\n", " <td>FL</td>\n", " <td>750</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>H0GA08099</td>\n", " <td>SCOTT, JAMES AUSTIN</td>\n", " <td>GA</td>\n", " <td>AUSTIN SCOTT VICTORY FUND</td>\n", " <td>C00522458</td>\n", " <td>PINPOINT WEBSOLUTIONS</td>\n", " <td>FL</td>\n", " <td>750</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CAND_ID CAND_NAME ... STATE TRANSACTION_AMT\n", "0 H0AL02087 ROBY, MARTHA ... FL 750\n", "1 H0AL02087 ROBY, MARTHA ... FL 750\n", "2 H0GA08099 SCOTT, JAMES AUSTIN ... FL 750\n", "3 H0GA08099 SCOTT, JAMES AUSTIN ... FL 750\n", "4 H0GA08099 SCOTT, JAMES AUSTIN ... FL 750\n", "\n", "[5 rows x 8 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 123 } ] }, { "cell_type": "code", "metadata": { "id": "hRa7Rywa-Rby", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "71f63e33-c35c-413b-c882-c3863e073ba0" }, "source": [ "df_sort_house['TRANSACTION_AMT'].sum()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "17474528" ] }, "metadata": { "tags": [] }, "execution_count": 124 } ] }, { "cell_type": "code", "metadata": { "id": "lJ51BCl5YHBM" }, "source": [ "" ], "execution_count": null, "outputs": [] } ] }