{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Load Packages" ] }, { "cell_type": "code", "execution_count": 440, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "4857" ] }, "execution_count": 440, "metadata": {}, "output_type": "execute_result" } ], "source": [ "3522 + 35 + 1032 + 62 + 206" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import sys\n", "import time\n", "import json" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import sqlite3 as lite\n", "\n", "from pandas import DataFrame\n", "from pandas import Series" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#http://pandas.pydata.org/pandas-docs/stable/options.html\n", "pd.set_option('display.max_columns', None)\n", "pd.set_option('max_colwidth', 250)" ] }, { "cell_type": "code", "execution_count": 92, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from __future__ import division" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "#### Set working directory" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/gregorysaxton/Google Drive/SOX\n" ] } ], "source": [ "cd '/Users/gregorysaxton/Google Drive/SOX'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create List of EINs for '2016' charities -- I'll use this to create a list of EINs for matching with e-file database\n", "- Previous (Read in Saved List of EINs with 'current' (2016) donor advisories)\n", "- What I'm doing here is reading in the Stata 'Table 5' dataset so that I can create a list of the 8,304 relevant EINs." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#f = open('2016 donor advisory EINs.json', 'r')\n", "#advisories_2016 = json.load(f)\n", "#print len(advisories_2016)\n", "#print advisories_2016[:10]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#df = pd.read_pickle('2016 - Test 4 data.pkl')\n", "#print \"Number of columns:\", len(df.columns)\n", "#print \"Number of observations:\", len(df)\n", "#df.head(1)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#print len(set(df['EIN'].tolist()))\n", "#eins_2016 = df['EIN'].tolist()\n", "#print len(eins_2016), len(set(eins_2016))\n", "#print eins_2016[:5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Read in Test 4 data" ] }, { "cell_type": "code", "execution_count": 393, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of columns: 35\n", "Number of observations: 8304\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
donor_advisorydonor_advisory_2016donor_advisory_2011_to_2016org_idEINFYEForm 990 FYEratings_system2011_data2016_dataconflict_of_interest_policy_v2records_retention_policy_v2whistleblower_policy_v2SOX_policiesSOX_policies_binarySOX_policies_all_binaryprogram_efficiencycomplexitycomplexity_2011agetotal_revenue_loggedcategorystatetot_revcategory_Animalscategory_Arts, Culture, Humanitiescategory_Community Developmentcategory_Educationcategory_Environmentcategory_Healthcategory_Human Servicescategory_Human and Civil Rightscategory_Internationalcategory_Religioncategory_Research and Public Policy
507090.00.00.05954010202467FY20142014-12CN 2.10.01.01.01.01.03.01.01.00.7944576.0NaN62.016.377993Research and Public PolicyMENaN0.00.00.00.00.00.00.00.00.00.01.0
\n", "
" ], "text/plain": [ " donor_advisory donor_advisory_2016 donor_advisory_2011_to_2016 \\\n", "50709 0.0 0.0 0.0 \n", "\n", " org_id EIN FYE Form 990 FYE ratings_system 2011_data \\\n", "50709 5954 010202467 FY2014 2014-12 CN 2.1 0.0 \n", "\n", " 2016_data conflict_of_interest_policy_v2 records_retention_policy_v2 \\\n", "50709 1.0 1.0 1.0 \n", "\n", " whistleblower_policy_v2 SOX_policies SOX_policies_binary \\\n", "50709 1.0 3.0 1.0 \n", "\n", " SOX_policies_all_binary program_efficiency complexity \\\n", "50709 1.0 0.794457 6.0 \n", "\n", " complexity_2011 age total_revenue_logged \\\n", "50709 NaN 62.0 16.377993 \n", "\n", " category state tot_rev category_Animals \\\n", "50709 Research and Public Policy ME NaN 0.0 \n", "\n", " category_Arts, Culture, Humanities category_Community Development \\\n", "50709 0.0 0.0 \n", "\n", " category_Education category_Environment category_Health \\\n", "50709 0.0 0.0 0.0 \n", "\n", " category_Human Services category_Human and Civil Rights \\\n", "50709 0.0 0.0 \n", "\n", " category_International category_Religion \\\n", "50709 0.0 0.0 \n", "\n", " category_Research and Public Policy \n", "50709 1.0 " ] }, "execution_count": 393, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_pickle('2016 - Test 4 data.pkl')\n", "print \"Number of columns:\", len(df.columns)\n", "print \"Number of observations:\", len(df)\n", "df.head(1)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8304\n", "8304 8304\n", "['010202467', '010211478', '010211513', '010211530', '010211543']\n" ] } ], "source": [ "print len(set(df['EIN'].tolist()))\n", "eins_2016 = df['EIN'].tolist()\n", "print len(eins_2016), len(set(eins_2016))\n", "print eins_2016[:5]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "print len(eins_2016)\n", "with open('eins_2016.json', 'w') as outfile:\n", " json.dump(eins_2016, outfile)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#import json\n", "#with open('valid_next_v8.json', 'r') as fp:\n", "# valid_next = json.load(fp)\n", "#print len(valid_next)\n", "#print valid_next[:5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Read DB into PANDAS DF\n", "- I'm taking all 29,440 990 e-filings for our 8,304 EINs and importing them from MongoDB into a Python PANDAS dataframe for manipulation.\n", "- As you can see if you scroll horizontally through this sample row, there are 691 columns!" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "# of columns: 695\n", "# of observations: 29440\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
@documentId@referenceDocumentId@referenceDocumentName@softwareId@softwareVersion@softwareVersionNumAccountantCompileOrReviewAccountantCompileOrReviewBasisAccountantCompileOrReviewIndAccountsPayableAccrExpnssGrpAccountsPayableAccruedExpensesAccountsReceivableAccountsReceivableGrpAcctCompileOrReviewBasisGrpActivitiesConductedPartnershipActivitiesConductedPrtshpIndActivity2Activity3ActivityCdActivityCodeActivityOrMissionDescActivityOrMissionDescriptionActivityOtherAddressChangeAddressChangeIndAddressPrincipalOfficerForeignAddressPrincipalOfficerUSAdvertisingAdvertisingGrpAllAffiliatesIncludedAllAffiliatesIncludedIndAllOtherContributionsAllOtherContributionsAmtAllOtherExpensesAllOtherExpensesGrpAmendedReturnAmendedReturnIndAnnualDisclosureCoveredPersonsAnnualDisclosureCoveredPrsnIndAuditCommitteeAuditCommitteeIndAuditedFinancialStmtAttIndBackupWthldComplianceIndBalanceSheetAmountsReportedBenefitsPaidToMembersCYBenefitsPaidToMembersPriorYearBenefitsToMembersBenefitsToMembersGrpBooksInCareOfDetailBsnssRltnshpThruFamilyMemberBsnssRltnshpWithOrganizationBusinessRlnWithFamMemIndBusinessRlnWithOfficerEntIndBusinessRlnWithOrgMemIndCYBenefitsPaidToMembersAmtCYContributionsGrantsAmtCYGrantsAndSimilarPaidAmtCYInvestmentIncomeAmtCYOtherExpensesAmtCYOtherRevenueAmtCYProgramServiceRevenueAmtCYRevenuesLessExpensesAmtCYSalariesCompEmpBnftPaidAmtCYTotalExpensesAmtCYTotalFundraisingExpenseAmtCYTotalProfFndrsngExpnsAmtCYTotalRevenueAmtCapStckTrstPrinCurrentFundsCapStkTrPrinCurrentFundsGrpCashNonInterestBearingCashNonInterestBearingGrpChangeToOrgDocumentsIndChangesToOrganizingDocsCntrbtnsRprtdFundraisingEventsCntrctRcvdGreaterThan100KCntCollectionsOfArtCollectionsOfArtIndCompCurrentOfcrDirectorsGrpCompCurrentOfficersDirectorsCompDisqualPersonsCompDisqualPersonsGrpCompensationFromOtherSourcesCompensationFromOtherSrcsIndCompensationProcessCEOCompensationProcessCEOIndCompensationProcessOtherCompensationProcessOtherIndComplianceWithBackupWitholdingConferencesMeetingsConferencesMeetingsGrpConflictOfInterestPolicyConflictOfInterestPolicyIndConservationEasementsConservationEasementsIndConsolidatedAuditFinancialStmtConsolidatedAuditFinclStmtIndContractTerminationIndContractorCompensationContractorCompensationGrpContriRptFundraisingEventAmtContributionsGrantsCurrentYearContributionsGrantsPriorYearCostOfGoodsSoldCostOfGoodsSoldAmtCountryLegalDomicileCreditCounselingCreditCounselingIndDAFExcessBusinessHoldingsIndDLNDecisionsSubjectToApprovaIndDecisionsSubjectToApprovalDeductibleArtContributionIndDeductibleContributionsOfArtDeductibleNonCashContriIndDeductibleNonCashContributionsDeferredRevenueDeferredRevenueGrpDelegationOfManagementDutiesDelegationOfMgmtDutiesIndDepreciationDepletionDepreciationDepletionGrpDescDescribedIn501C3DescribedInSection501c3IndDescriptionDisregardedEntityDisregardedEntityIndDistributionToDonorDistributionToDonorIndDoNotFollowSFAS117DocumentRetentionPolicyDocumentRetentionPolicyIndDoingBusinessAsDoingBusinessAsNameDonatedServicesAndUseFcltsAmtDonorAdvisedFundIndDonorAdvisedFundsEINElectionOfBoardMembersElectionOfBoardMembersIndEmployeeCntEmploymentTaxReturnsFiledEmploymentTaxReturnsFiledIndEngagedInExcessBenefitTransIndEscrowAccountEscrowAccountIndEscrowAccountLiabilityEscrowAccountLiabilityGrpExcessBenefitTransactionExcessBusinessHoldingsExcessBusinessHoldingsIndExpenseExpenseAmtFSAuditedFSAuditedBasisFSAuditedBasisGrpFSAuditedIndFamilyOrBusinessRelationshipFamilyOrBusinessRlnIndFederalGrantAuditPerformedFederalGrantAuditPerformedIndFederalGrantAuditRequiredFederalGrantAuditRequiredIndFederatedCampaignsFederatedCampaignsAmtFeesForServicesAccountingFeesForServicesAccountingGrpFeesForServicesInvstMgmntFeesFeesForServicesLegalFeesForServicesLegalGrpFeesForServicesLobbyingFeesForServicesLobbyingGrpFeesForServicesManagementFeesForServicesManagementGrpFeesForServicesOtherFeesForServicesOtherGrpFeesForServicesProfFundraisingFeesForSrvcInvstMgmntFeesGrpFiledLieu1041FinalReturnIndFinancialStatementBothFinancialStatementConsolidatedFinancialStatementSeparateFinancialStmtAttachedFollowSFAS117ForeignActivitiesForeignActivitiesIndForeignAddressForeignCountryCdForeignFinancialAccountForeignFinancialAccountIndForeignGrantsForeignGrantsGrpForeignOfficeForeignOfficeIndForm1098CFiledForm1098CFiledIndForm720FiledForm8282FiledCntForm8282PropertyDisposedOfForm8282PropertyDisposedOfIndForm8886TFiledForm8886TFiledIndForm8899FiledForm8899FiledindForm990-TFiledForm990PartVIISectionAForm990PartVIISectionAGrpForm990ProvidedToGoverningBodyForm990ProvidedToGvrnBodyIndForm990TFiledIndFormTypeFormationYrFormerOfcrEmployeesListedIndFormersListedFundraisingActivitiesFundraisingActivitiesIndFundraisingAmtFundraisingDirectExpensesFundraisingDirectExpensesAmtFundraisingEventsFundraisingGrossIncomeAmtFundsToPayPremiumsGainOrLossGainOrLossGrpGamingGamingActivitiesIndGamingDirectExpensesGamingDirectExpensesAmtGamingGrossIncomeAmtGoverningBodyVotingMembersCntGovernmentGrantsGovernmentGrantsAmtGrantAmtGrantToRelatedPersonGrantToRelatedPersonIndGrantsGrantsAndSimilarAmntsCYGrantsAndSimilarAmntsPriorYearGrantsPayableGrantsPayableGrpGrantsToDomesticIndividualsGrantsToDomesticIndividualsGrpGrantsToDomesticOrgsGrantsToDomesticOrgsGrpGrantsToIndividualsGrantsToIndividualsIndGrantsToOrganizationsGrantsToOrganizationsIndGrossAmountSalesAssetsGrossAmountSalesAssetsGrpGrossIncomeFundraisingEventsGrossIncomeGamingGrossReceiptsGrossReceiptsAmtGrossRentsGrossRentsGrpGrossSalesOfInventoryGrossSalesOfInventoryAmtGroupExemptionNumGroupExemptionNumberGroupReturnForAffiliatesGroupReturnForAffiliatesIndHospitalIRPDocumentCntIRPDocumentW2GCntIncludeFIN48FootnoteIndIncmFromInvestBondProceedsGrpIncomeFromInvestBondProceedsIndependentAuditFinancialStmtIndependentAuditFinclStmtIndIndependentVotingMemberCntIndivRcvdGreaterThan100KCntIndoorTanningServicesIndoorTanningServicesIndInfoInScheduleOPartIIIInfoInScheduleOPartIIIIndInfoInScheduleOPartIXInfoInScheduleOPartIXIndInfoInScheduleOPartVInfoInScheduleOPartVIInfoInScheduleOPartVIIInfoInScheduleOPartVIIIInfoInScheduleOPartVIIIIndInfoInScheduleOPartVIIIndInfoInScheduleOPartVIIndInfoInScheduleOPartVIndInfoInScheduleOPartXInfoInScheduleOPartXIInfoInScheduleOPartXIIInfoInScheduleOPartXIIIndInfoInScheduleOPartXIIndInfoInScheduleOPartXIndInformationTechnologyInformationTechnologyGrpInitialReturnInitialReturnIndInsuranceInsuranceGrpIntangibleAssetsIntangibleAssetsGrpInterestInterestGrpInventoriesForSaleOrUseInventoriesForSaleOrUseGrpInvestTaxExemptBondsInvestTaxExemptBondsIndInvestmentExpenseAmtInvestmentInJointVentureInvestmentInJointVentureIndInvestmentIncomeInvestmentIncomeCurrentYearInvestmentIncomeGrpInvestmentIncomePriorYearInvestmentsOtherSecuritiesInvestmentsOtherSecuritiesGrpInvestmentsProgramRelatedInvestmentsProgramRelatedGrpInvestmentsPubTradedSecGrpInvestmentsPubTradedSecuritiesIsAvailableIsElectronicJointCostsJointCostsIndLandBldgEquipAccumDeprecAmtLandBldgEquipBasisNetGrpLandBldgEquipCostOrOtherBssAmtLandBldgEquipmentAccumDeprecLandBuildingsEquipmentBasisLandBuildingsEquipmentBasisNetLastUpdatedLegalDomicileStateCdLessCostOthBasisSalesExpensesLessCostOthBasisSalesExpnssGrpLessRentalExpensesLessRentalExpensesGrpLicensedMoreThanOneStateLicensedMoreThanOneStateIndLoanOutstandingIndLoanToOfficerOrDQPLoansFromOfficersDirectorsLoansFromOfficersDirectorsGrpLobbyingActivitiesLobbyingActivitiesIndLocalChaptersLocalChaptersIndMaterialDiversionOrMisuseMaterialDiversionOrMisuseIndMembersOrStockholdersMembersOrStockholdersIndMembershipDuesMembershipDuesAmtMethodOfAccountingAccrualMethodOfAccountingAccrualIndMethodOfAccountingCashMethodOfAccountingCashIndMethodOfAccountingOtherMethodOfAccountingOtherIndMinutesOfCommitteesMinutesOfCommitteesIndMinutesOfGoverningBodyMinutesOfGoverningBodyIndMiscellaneousRevenueMiscellaneousRevenueGrpMissionDescMissionDescriptionMoreThan5000KToIndividualsMoreThan5000KToIndividualsIndMoreThan5000KToOrgIndMoreThan5000KToOrganizationsMortNotesPyblSecuredInvestPropMortgNotesPyblScrdInvstPropGrpNameOfForeignCountryNameOfPrincipalOfficerBusinessNameOfPrincipalOfficerPersonNbrIndependentVotingMembersNbrVotingGoverningBodyMembersNbrVotingMembersGoverningBodyNetAssetsOrFundBalancesBOYNetAssetsOrFundBalancesBOYAmtNetAssetsOrFundBalancesEOYNetAssetsOrFundBalancesEOYAmtNetGainOrLossInvestmentsNetGainOrLossInvestmentsGrpNetIncmFromFundraisingEvtGrpNetIncomeFromFundraisingEventsNetIncomeFromGamingNetIncomeFromGamingGrpNetIncomeOrLossNetIncomeOrLossGrpNetRentalIncomeOrLossNetRentalIncomeOrLossGrpNetUnrelatedBusTxblIncmAmtNetUnrelatedBusinessTxblIncomeNetUnrlzdGainsLossesInvstAmtNoListedPersonsCompensatedNoListedPersonsCompensatedIndNonDeductibleContributionsNonDeductibleDisclosureNoncashContributionsNoncashContributionsAmtNondeductibleContriDisclIndNondeductibleContributionsIndNumberFormsTransmittedWith1096NumberIndependentVotingMembersNumberIndividualsGT100KNumberOf8282FiledNumberOfContractorsGT100KNumberOfEmployeesNumberW2GIncludedObjectIdOccupancyOccupancyGrpOfficeExpensesOfficeExpensesGrpOfficerEntityWithBsnssRltnshpOfficerMailingAddressOfficerMailingAddressIndOnBehalfOfIssuerOnBehalfOfIssuerIndOperateHospitalIndOrgDoesNotFollowSFAS117IndOrgFiledInLieuOfForm1041IndOrganization501cOrganization501c3Organization501c3IndOrganization501cIndOrganizationFollowsSFAS117IndOrganizationNameOthNotesLoansReceivableNetGrpOtherAssetsTotalOtherAssetsTotalGrpOtherChangesInNetAssetsAmtOtherEmployeeBenefitsOtherEmployeeBenefitsGrpOtherExpensePriorYearOtherExpensesOtherExpensesCurrentYearOtherExpensesGrpOtherExplainInSchOOtherIndOtherLiabilitiesOtherLiabilitiesGrpOtherNotesLoansReceivableNetOtherOrganizationDscOtherRevenueCurrentYearOtherRevenueMiscOtherRevenueMiscGrpOtherRevenuePriorYearOtherRevenueTotalAmtOtherSalariesAndWagesOtherSalariesAndWagesGrpOtherWebsiteOtherWebsiteIndOwnWebsiteOwnWebsiteIndPYBenefitsPaidToMembersAmtPYContributionsGrantsAmtPYExcessBenefitTransIndPYGrantsAndSimilarPaidAmtPYInvestmentIncomeAmtPYOtherExpensesAmtPYOtherRevenueAmtPYProgramServiceRevenueAmtPYRevenuesLessExpensesAmtPYSalariesCompEmpBnftPaidAmtPYTotalExpensesAmtPYTotalProfFndrsngExpnsAmtPYTotalRevenueAmtPaidInCapSrplsLandBldgEqpFundPartialLiquidationPartialLiquidationIndPayPremiumsPrsnlBnftCntrctIndPaymentsToAffiliatesPaymentsToAffiliatesGrpPayrollTaxesPayrollTaxesGrpPdInCapSrplsLandBldgEqpFundGrpPensionPlanContributionsPensionPlanContributionsGrpPermanentlyRestrictedNetAssetsPermanentlyRstrNetAssetsGrpPledgesAndGrantsReceivablePledgesAndGrantsReceivableGrpPoliciesReferenceChaptersPoliciesReferenceChaptersIndPoliticalActivitiesPoliticalCampaignActyIndPremiumsPaidPrepaidExpensesDeferredChargesPrepaidExpensesDefrdChargesGrpPrincipalOfcrBusinessAddressPrincipalOfcrBusinessNamePrincipalOfficerNmPriorExcessBenefitTransactionPriorPeriodAdjustmentsAmtProfessionalFundraisingProfessionalFundraisingIndProgSrvcAccomActy2GrpProgSrvcAccomActy3GrpProgSrvcAccomActyOtherGrpProgramServiceRevenueProgramServiceRevenueCYProgramServiceRevenueGrpProgramServiceRevenuePriorYearProhibitedTaxShelterTransProhibitedTaxShelterTransIndPymtTravelEntrtnmntPubOfclGrpQuidProQuoContriDisclIndQuidProQuoContributionsQuidProQuoContributionsIndQuidProQuoDisclosureRcvFndsToPayPrsnlBnftCntrctIndRcvblFromDisqualifiedPrsnGrpReceivablesFromDisqualPersonsReceivablesFromOfficersEtcReceivablesFromOfficersEtcGrpReconcilationDonatedServicesReconcilationInvestExpensesReconcilationOtherChangesReconcilationPriorAdjustmentReconcilationRevenueExpensesReconcilationRevenueExpnssAmtReconciliationUnrealizedInvestRegularMonitoringEnforcementRegularMonitoringEnfrcIndRelatedEntityRelatedEntityIndRelatedOrgControlledEntityRelatedOrganizationCtrlEntIndRelatedOrganizationsRelatedOrganizationsAmtRentalIncomeOrLossRentalIncomeOrLossGrpReportFin48FootnoteReportInvestOthSecuritiesReportInvestmentsOtherSecIndReportLandBldgEquipReportLandBuildingEquipmentIndReportOtherAssetsReportOtherAssetsIndReportOtherLiabilitiesReportOtherLiabilitiesIndReportProgRelInvestReportProgramRelatedInvstIndReservesMaintainedAmtRetainedEarningsEndowmentEtcRevenueRevenueAmtRevenuesLessExpensesCYRevenuesLessExpensesPriorYearRoyaltiesRoyaltiesGrpRoyaltiesRevenueRoyaltiesRevenueGrpRtnEarnEndowmentIncmOthFndsGrpSalariesEtcCurrentYearSalariesEtcPriorYearSavingsAndTempCashInvestmentsSavingsAndTempCashInvstGrpScheduleBRequiredScheduleBRequiredIndScheduleJRequiredScheduleJRequiredIndScheduleORequiredScheduleORequiredIndSchoolSchoolOperatingIndSignificantChangeSignificantChangeIndSignificantNewProgramServicesSignificantNewProgramSrvcIndSpecialConditionDescriptionStateLegalDomicileStateRequiredReservesAmtStatesWhereCopyOfReturnIsFiledStatesWhereCopyOfReturnIsFldCdSubjectToProxyTaxSubjectToProxyTaxIndSubmittedOnTaxExemptBondLiabilitiesTaxExemptBondLiabilitiesGrpTaxExemptBondsTaxExemptBondsIndTaxPeriodTaxableDistributionsTaxableDistributionsIndTaxablePartyNotificationTaxablePartyNotificationIndTempOrPermanentEndowmentsIndTemporarilyRestrictedNetAssetsTemporarilyRstrNetAssetsGrpTermOrPermanentEndowmentsTerminateOperationsIndTerminatedTerminationOrContractionTheBooksAreInCareOfTotLiabNetAssetsFundBalanceGrpTotReportableCompRltdOrgAmtTotalAssetsTotalAssetsBOYTotalAssetsBOYAmtTotalAssetsEOYTotalAssetsEOYAmtTotalAssetsGrpTotalCompGT150KTotalCompGreaterThan150KIndTotalContributionsTotalContributionsAmtTotalEmployeeCntTotalExpensesCurrentYearTotalExpensesPriorYearTotalFunctionalExpensesTotalFunctionalExpensesGrpTotalFundrsngExpCurrentYearTotalGrossUBITotalGrossUBIAmtTotalJointCostsTotalJointCostsGrpTotalLiabNetAssetsFundBalancesTotalLiabilitiesTotalLiabilitiesBOYTotalLiabilitiesBOYAmtTotalLiabilitiesEOYTotalLiabilitiesEOYAmtTotalLiabilitiesGrpTotalNbrEmployeesTotalNbrVolunteersTotalNetAssetsFundBalanceGrpTotalNetAssetsFundBalancesTotalOfOtherProgramServiceExpTotalOfOtherProgramServiceGrntTotalOfOtherProgramServiceRevTotalOthProgramServiceRevGrpTotalOthProgramServiceRevenueTotalOtherCompensationTotalOtherCompensationAmtTotalOtherProgSrvcExpenseAmtTotalOtherProgSrvcGrantAmtTotalOtherProgSrvcRevenueAmtTotalOtherRevenueTotalProfFundrsngExpCYTotalProfFundrsngExpPriorYearTotalProgramServiceExpenseTotalProgramServiceExpensesAmtTotalProgramServiceRevenueTotalProgramServiceRevenueAmtTotalReportableCompFrmRltdOrgsTotalReportableCompFromOrgTotalReportableCompFromOrgAmtTotalRevenueTotalRevenueCurrentYearTotalRevenueGrpTotalRevenuePriorYearTotalVolunteersCntTransactionRelatedEntityTransactionWithControlEntIndTransfersToExemptNonChrtblOrgTravelTravelEntrtnmntPublicOfficialsTravelGrpTrnsfrExmptNonChrtblRltdOrgIndTypeOfOrgOtherDescriptionTypeOfOrganizationAssocIndTypeOfOrganizationAssociationTypeOfOrganizationCorpIndTypeOfOrganizationCorporationTypeOfOrganizationOtherTypeOfOrganizationOtherIndTypeOfOrganizationTrustTypeOfOrganizationTrustIndURLUSAddressUnrelatedBusIncmOverLimitIndUnrelatedBusinessIncomeUnrestrictedNetAssetsUnrestrictedNetAssetsGrpUnsecuredNotesLoansPayableUnsecuredNotesLoansPayableGrpUponRequestUponRequestIndVotingMembersGoverningBodyCntVotingMembersIndependentCntWebSiteWebsiteAddressTxtWhistleblowerPolicyWhistleblowerPolicyIndWrittenPolicyOrProcedureWrittenPolicyOrProcedureIndYearFormation_idFeesForServicesAccountingGrp_v2FYEFeesForServicesAccounting_TotalAmtFeesForServicesAccounting_binary
18819RetDoc1038000001RetDoc1044400001NaNNaNNaNNaNNaNNaN0{u'BOYAmt': u'506341', u'EOYAmt': u'557069'}NaNNaN{u'BOYAmt': u'315681', u'EOYAmt': u'161137'}NaNNaN0NaNNaNNaNNaNTO DEVELOP SOLUTIONS TO COMPLEX HUMAN & ENVIRONMENTAL HEALTH PROBLEMS THROUGH RESEARCH & EDUCATION.NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2334773NaN{u'TotalAmt': u'660207', u'ProgramServicesAmt': u'515736', u'ManagementAndGeneralAmt': u'95934', u'FundraisingAmt': u'48537'}NaNNaNNaN1NaN1NaN1NaNNaNNaNNaNNaN{u'PersonNm': u'CLAUDINE D LURVEY', u'PhoneNum': u'2072889880', u'USAddress': {u'CityNm': u'SALISBURY COVE', u'StateAbbreviationCd': u'ME', u'ZIPCd': u'04672', u'AddressLine1Txt': u'OLD BAR HARBOR ROAD'}}NaNNaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1042800001'}{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1042800001'}{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1042800001'}010272455098227266612821404171183721365474155139108314215546081500012967968NaNNaNNaN{u'BOYAmt': u'1121795', u'EOYAmt': u'2540456'}0NaNNaN1NaN{u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'}NaNNaNNaNNaNNaN0NaN1NaN1NaNNaNNaNNaN1NaN{u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'}NaN{u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'}NaNNaN{u'ContractorName': {u'BusinessName': {u'BusinessNameLine1Txt': u'GOODIE'S ELECTRICAL SERVICE'}}, u'ServicesDesc': u'ELECTRICIAN', u'ContractorAddress': {u'USAddress': {u'CityNm': u'BAR HARBOR', u'StateAbbreviationCd': u'ME', u'ZIPCd': u'04609', ...800NaNNaNNaN0NaNNaN{u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'}NaN934933200800350NaN{u'#text': u'0', u'@referenceDocumentId': u'RetDoc1042900001'}NaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1042900001'}NaNNaN{u'BOYAmt': u'529444', u'EOYAmt': u'835605'}NaN1NaN{u'TotalAmt': u'1121059', u'ProgramServicesAmt': u'896847', u'ManagementAndGeneralAmt': u'168159', u'FundraisingAmt': u'56053'}THE MDI BIOLOGICAL LABORATORY IS RECOGNIZED BY THE NATIONAL INSTITUTES OF HEALTH AS A CENTER OF BIOMEDICAL RESEARCH EXCELLENCE (COBRE) FOR OUR INNOVATIVE RESEARCH PROGRAM IN REGENERATIVE AND AGING BIOLOGY AND MEDICINE.OUR APPROACH SPEEDS THE DISC...NaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1039100001'}NaNNaN0NaNNaNNaNNaN1NaNNaNNaN{u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'}NaN010202467NaN196NaN1{u'#text': u'0', u'@referenceDocumentId': u'RetDoc1042800001'}NaN0NaNNaNNaNNaNNaNNaN8605100NaNNaN{u'SeparateBasisFinclStmtInd': u'X'}1NaN0NaN1NaN1NaNNaNNaN{u'TotalAmt': u'23700', u'ManagementAndGeneralAmt': u'23700'}NaNNaN{u'TotalAmt': u'3792', u'ManagementAndGeneralAmt': u'3792'}NaN{u'TotalAmt': u'89167', u'ManagementAndGeneralAmt': u'3379', u'FundraisingAmt': u'85788'}NaNNaNNaN{u'TotalAmt': u'201518', u'ProgramServicesAmt': u'127950', u'ManagementAndGeneralAmt': u'64909', u'FundraisingAmt': u'8659'}{u'TotalAmt': u'15000', u'FundraisingAmt': u'15000'}{u'TotalAmt': u'18902', u'ManagementAndGeneralAmt': u'18902'}NaNNaNNaNNaNNaNNaNNaNNaN0NaNNaNNaN0NaNNaNNaN0NaNNaNNaNNaNNaN0NaNNaNNaNNaNNaNNaN[{u'PersonNm': u'DR EDWARD J BENZ', u'IndividualTrusteeOrDirectorInd': u'X', u'AverageHoursPerWeekRt': u'2.00', u'ReportableCompFromRltdOrgAmt': u'0', u'OtherCompensationAmt': u'0', u'TitleTxt': u'VICE CHAIRMAN OF THE BOARD', u'OfficerInd': u'X',...NaN1NaN99018980NaNNaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1041300001'}800NaN37598NaN38964NaNNaN{u'OtherAmt': u'322752'}NaN{u'#text': u'0', u'@referenceDocumentId': u'RetDoc1041300001'}NaNNaNNaN22NaN7932282NaNNaN{u'#text': u'0', u'@referenceDocumentId': u'RetDoc1042800001'}NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaN0NaN{u'OtherAmt': u'340000'}NaNNaNNaN13022814NaNNaNNaN38NaNNaNNaN0NaN1580{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}NaNNaNNaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}213NaN0NaNXNaNNaNNaNNaNNaNNaNNaNNaNXNaNNaNNaNNaNXXNaNNaN{u'TotalAmt': u'49382', u'ProgramServicesAmt': u'8673', u'ManagementAndGeneralAmt': u'40709'}NaNNaNNaN{u'TotalAmt': u'92396', u'ProgramServicesAmt': u'10397', u'ManagementAndGeneralAmt': u'81746', u'FundraisingAmt': u'253'}NaN{u'BOYAmt': u'147691', u'EOYAmt': u'141271'}NaN{u'TotalAmt': u'255985', u'ProgramServicesAmt': u'191989', u'ManagementAndGeneralAmt': u'63996'}NaN{u'BOYAmt': u'8329'}NaN0NaNNaN0NaNNaN{u'TotalRevenueColumnAmt': u'659520', u'ExclusionAmt': u'659520'}NaNNaNNaNNaNNaN{u'BOYAmt': u'4031973', u'EOYAmt': u'4747115'}NaNTrueTrueNaNNaN11322245{u'BOYAmt': u'14025403', u'EOYAmt': u'14020382'}25342627NaNNaNNaN2016-04-29T13:40:20MENaN{u'OtherAmt': u'17248'}NaNNaNNaNNaN{u'#text': u'0', u'@referenceDocumentId': u'RetDoc1042800001'}NaNNaNNaNNaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1039700001'}NaN0NaN0NaN1NaN4600NaNXNaNNaNNaNNaNNaN1NaN1NaNNaNTHE MDI BIOLOGICAL LABORATORY IS A RAPIDLY GROWING, INDEPENDENT NON-PROFIT BIOMEDICAL RESEARCH INSTITUTION. ITS MISSION IS TO IMPROVE HUMAN HEALTH AND WELL-BEING THROUGH RESEARCH, EDUCATION,AND DEVELOPMENT VENTURES THAT TRANSFORM DISCOVERIES INTO...NaNNaN00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN21657900NaN23690097NaN{u'TotalRevenueColumnAmt': u'322752', u'ExclusionAmt': u'322752'}{u'TotalRevenueColumnAmt': u'1366', u'ExclusionAmt': u'1366'}NaNNaNNaNNaN{u'TotalRevenueColumnAmt': u'38', u'ExclusionAmt': u'38'}NaNNaN0NaN-104353NaNXNaNNaNNaN57474NaN0NaNNaNNaNNaNNaNNaNNaN201533209349308003NaN{u'TotalAmt': u'587738', u'ProgramServicesAmt': u'494365', u'ManagementAndGeneralAmt': u'91234', u'FundraisingAmt': u'2139'}NaNNaNNaNNaN0NaN00NaNNaNNaNNaNXNaNXMOUNT DESERT ISLAND BIOLOGICAL LABORATORYNaNNaNNaN3NaN{u'TotalAmt': u'370740', u'ProgramServicesAmt': u'284086', u'ManagementAndGeneralAmt': u'70910', u'FundraisingAmt': u'15744'}NaNNaNNaN[{u'TotalAmt': u'2124748', u'ProgramServicesAmt': u'2124748', u'Desc': u'SUBAWARDS'}, {u'TotalAmt': u'682378', u'ProgramServicesAmt': u'627645', u'ManagementAndGeneralAmt': u'47547', u'FundraisingAmt': u'7186', u'Desc': u'SUPPLIES'}, {u'TotalAmt'...NaNNaNNaN{u'BOYAmt': u'4630000', u'EOYAmt': u'4525000'}NaNNaNNaNNaNNaNNaNNaNNaN{u'TotalAmt': u'3353971', u'ProgramServicesAmt': u'2478596', u'ManagementAndGeneralAmt': u'683266', u'FundraisingAmt': u'192109'}NaNXNaNX07802139{u'#text': u'0', u'@referenceDocumentId': u'RetDoc1042800001'}07856336736413-133751591204-449665387885310615266010165601NaNNaN0NaNNaNNaNNaN{u'TotalAmt': u'257923', u'ProgramServicesAmt': u'189245', u'ManagementAndGeneralAmt': u'52592', u'FundraisingAmt': u'16086'}NaNNaN{u'TotalAmt': u'172505', u'ProgramServicesAmt': u'125037', u'ManagementAndGeneralAmt': u'37384', u'FundraisingAmt': u'10084'}NaN{u'BOYAmt': u'4193257', u'EOYAmt': u'4433997'}NaN{u'BOYAmt': u'797504', u'EOYAmt': u'986621'}NaNNaNNaN{u'#text': u'0', u'@referenceDocumentId': u'RetDoc1039700001'}NaNNaN{u'BOYAmt': u'4791', u'EOYAmt': u'5301'}NaNNaNDR KEVIN STRANGE PHDNaNNaNNaN{u'#text': u'0', u'@referenceDocumentId': u'RetDoc1041300001'}NaNNaNNaNNaNNaN[{u'TotalRevenueColumnAmt': u'469300', u'BusinessCd': u'611710', u'RelatedOrExemptFuncIncomeAmt': u'469300', u'Desc': u'CONFER & COURSE FEES'}, {u'TotalRevenueColumnAmt': u'353507', u'BusinessCd': u'541700', u'RelatedOrExemptFuncIncomeAmt': u'353...NaNNaN0NaNNaNNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2136547NaNNaN1NaN0NaN0NaNNaNNaNNaNNaNNaN{u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'}NaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}NaN{u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'}NaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}NaN{u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'}NaNNaNNaN1733437NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN{u'BOYAmt': u'6870518', u'EOYAmt': u'7005488'}NaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1234500001'}NaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1042400001'}NaN1NaN0NaN0NaN0NaNNaNNaNNaNMENaN{u'#text': u'0', u'@referenceDocumentId': u'RetDoc1039700001'}2016-02-16NaNNaNNaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'}201412NaNNaNNaN0{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}NaN{u'BOYAmt': u'5880227', u'EOYAmt': u'7351259'}NaN0NaNNaNNaN{u'BOYAmt': u'27323685', u'EOYAmt': u'29607771'}0NaNNaN27323685NaN29607771{u'BOYAmt': u'27323685', u'EOYAmt': u'29607771'}NaN1NaN1027245596NaNNaNNaN{u'TotalAmt': u'10831421', u'ProgramServicesAmt': u'8605100', u'ManagementAndGeneralAmt': u'1671713', u'FundraisingAmt': u'554608'}NaNNaN0NaNNaNNaNNaNNaN5665785NaN5917674{u'BOYAmt': u'5665785', u'EOYAmt': u'5917674'}NaNNaN{u'BOYAmt': u'21657900', u'EOYAmt': u'23690097'}NaNNaNNaNNaN{u'TotalRevenueColumnAmt': u'38356', u'RelatedOrExemptFuncIncomeAmt': u'38356'}NaNNaN67100NaNNaNNaNNaNNaNNaNNaN8605100NaN1711837NaNNaN496334NaNNaN{u'UnrelatedBusinessRevenueAmt': u'0', u'TotalRevenueColumnAmt': u'12967968', u'RelatedOrExemptFuncIncomeAmt': u'1711837', u'ExclusionAmt': u'983676'}NaN125NaNNaNNaNNaNNaN{u'TotalAmt': u'128657', u'ProgramServicesAmt': u'84416', u'ManagementAndGeneralAmt': u'33781', u'FundraisingAmt': u'10460'}0NaNNaNNaNXNaNNaNNaNNaNNaNhttps://s3.amazonaws.com/irs-form-990/201533209349308003_public.xml{u'CityNm': u'SALISBURY COVE', u'StateAbbreviationCd': u'ME', u'ZIPCd': u'046720035', u'AddressLine1Txt': u'PO BOX 35'}0NaNNaN{u'BOYAmt': u'11584416', u'EOYAmt': u'11904841'}NaNNaNNaNX2221NaNWWW.MDIBL.ORGNaN1NaNNaNNaN58c33e003ffc5a1664e96b73{u'TotalAmt': u'23700', u'ManagementAndGeneralAmt': u'23700'}FY201423700.01
\n", "
" ], "text/plain": [ " @documentId @referenceDocumentId @referenceDocumentName \\\n", "18819 RetDoc1038000001 RetDoc1044400001 NaN \n", "\n", " @softwareId @softwareVersion @softwareVersionNum \\\n", "18819 NaN NaN NaN \n", "\n", " AccountantCompileOrReview AccountantCompileOrReviewBasis \\\n", "18819 NaN NaN \n", "\n", " AccountantCompileOrReviewInd \\\n", "18819 0 \n", "\n", " AccountsPayableAccrExpnssGrp \\\n", "18819 {u'BOYAmt': u'506341', u'EOYAmt': u'557069'} \n", "\n", " AccountsPayableAccruedExpenses AccountsReceivable \\\n", "18819 NaN NaN \n", "\n", " AccountsReceivableGrp \\\n", "18819 {u'BOYAmt': u'315681', u'EOYAmt': u'161137'} \n", "\n", " AcctCompileOrReviewBasisGrp ActivitiesConductedPartnership \\\n", "18819 NaN NaN \n", "\n", " ActivitiesConductedPrtshpInd Activity2 Activity3 ActivityCd \\\n", "18819 0 NaN NaN NaN \n", "\n", " ActivityCode \\\n", "18819 NaN \n", "\n", " ActivityOrMissionDesc \\\n", "18819 TO DEVELOP SOLUTIONS TO COMPLEX HUMAN & ENVIRONMENTAL HEALTH PROBLEMS THROUGH RESEARCH & EDUCATION. \n", "\n", " ActivityOrMissionDescription ActivityOther AddressChange \\\n", "18819 NaN NaN NaN \n", "\n", " AddressChangeInd AddressPrincipalOfficerForeign \\\n", "18819 NaN NaN \n", "\n", " AddressPrincipalOfficerUS Advertising AdvertisingGrp \\\n", "18819 NaN NaN NaN \n", "\n", " AllAffiliatesIncluded AllAffiliatesIncludedInd AllOtherContributions \\\n", "18819 NaN NaN NaN \n", "\n", " AllOtherContributionsAmt AllOtherExpenses \\\n", "18819 2334773 NaN \n", "\n", " AllOtherExpensesGrp \\\n", "18819 {u'TotalAmt': u'660207', u'ProgramServicesAmt': u'515736', u'ManagementAndGeneralAmt': u'95934', u'FundraisingAmt': u'48537'} \n", "\n", " AmendedReturn AmendedReturnInd AnnualDisclosureCoveredPersons \\\n", "18819 NaN NaN NaN \n", "\n", " AnnualDisclosureCoveredPrsnInd AuditCommittee AuditCommitteeInd \\\n", "18819 1 NaN 1 \n", "\n", " AuditedFinancialStmtAttInd BackupWthldComplianceInd \\\n", "18819 NaN 1 \n", "\n", " BalanceSheetAmountsReported BenefitsPaidToMembersCY \\\n", "18819 NaN NaN \n", "\n", " BenefitsPaidToMembersPriorYear BenefitsToMembers BenefitsToMembersGrp \\\n", "18819 NaN NaN NaN \n", "\n", " BooksInCareOfDetail \\\n", "18819 {u'PersonNm': u'CLAUDINE D LURVEY', u'PhoneNum': u'2072889880', u'USAddress': {u'CityNm': u'SALISBURY COVE', u'StateAbbreviationCd': u'ME', u'ZIPCd': u'04672', u'AddressLine1Txt': u'OLD BAR HARBOR ROAD'}} \n", "\n", " BsnssRltnshpThruFamilyMember BsnssRltnshpWithOrganization \\\n", "18819 NaN NaN \n", "\n", " BusinessRlnWithFamMemInd \\\n", "18819 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1042800001'} \n", "\n", " BusinessRlnWithOfficerEntInd \\\n", "18819 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1042800001'} \n", "\n", " BusinessRlnWithOrgMemInd \\\n", "18819 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1042800001'} \n", "\n", " CYBenefitsPaidToMembersAmt CYContributionsGrantsAmt \\\n", "18819 0 10272455 \n", "\n", " CYGrantsAndSimilarPaidAmt CYInvestmentIncomeAmt CYOtherExpensesAmt \\\n", "18819 0 982272 6661282 \n", "\n", " CYOtherRevenueAmt CYProgramServiceRevenueAmt CYRevenuesLessExpensesAmt \\\n", "18819 1404 1711837 2136547 \n", "\n", " CYSalariesCompEmpBnftPaidAmt CYTotalExpensesAmt \\\n", "18819 4155139 10831421 \n", "\n", " CYTotalFundraisingExpenseAmt CYTotalProfFndrsngExpnsAmt \\\n", "18819 554608 15000 \n", "\n", " CYTotalRevenueAmt CapStckTrstPrinCurrentFunds \\\n", "18819 12967968 NaN \n", "\n", " CapStkTrPrinCurrentFundsGrp CashNonInterestBearing \\\n", "18819 NaN NaN \n", "\n", " CashNonInterestBearingGrp ChangeToOrgDocumentsInd \\\n", "18819 {u'BOYAmt': u'1121795', u'EOYAmt': u'2540456'} 0 \n", "\n", " ChangesToOrganizingDocs CntrbtnsRprtdFundraisingEvents \\\n", "18819 NaN NaN \n", "\n", " CntrctRcvdGreaterThan100KCnt CollectionsOfArt \\\n", "18819 1 NaN \n", "\n", " CollectionsOfArtInd \\\n", "18819 {u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " CompCurrentOfcrDirectorsGrp CompCurrentOfficersDirectors \\\n", "18819 NaN NaN \n", "\n", " CompDisqualPersons CompDisqualPersonsGrp CompensationFromOtherSources \\\n", "18819 NaN NaN NaN \n", "\n", " CompensationFromOtherSrcsInd CompensationProcessCEO \\\n", "18819 0 NaN \n", "\n", " CompensationProcessCEOInd CompensationProcessOther \\\n", "18819 1 NaN \n", "\n", " CompensationProcessOtherInd ComplianceWithBackupWitholding \\\n", "18819 1 NaN \n", "\n", " ConferencesMeetings ConferencesMeetingsGrp ConflictOfInterestPolicy \\\n", "18819 NaN NaN NaN \n", "\n", " ConflictOfInterestPolicyInd ConservationEasements \\\n", "18819 1 NaN \n", "\n", " ConservationEasementsInd \\\n", "18819 {u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " ConsolidatedAuditFinancialStmt \\\n", "18819 NaN \n", "\n", " ConsolidatedAuditFinclStmtInd \\\n", "18819 {u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " ContractTerminationInd ContractorCompensation \\\n", "18819 NaN NaN \n", "\n", " ContractorCompensationGrp \\\n", "18819 {u'ContractorName': {u'BusinessName': {u'BusinessNameLine1Txt': u'GOODIE'S ELECTRICAL SERVICE'}}, u'ServicesDesc': u'ELECTRICIAN', u'ContractorAddress': {u'USAddress': {u'CityNm': u'BAR HARBOR', u'StateAbbreviationCd': u'ME', u'ZIPCd': u'04609', ... \n", "\n", " ContriRptFundraisingEventAmt ContributionsGrantsCurrentYear \\\n", "18819 800 NaN \n", "\n", " ContributionsGrantsPriorYear CostOfGoodsSold CostOfGoodsSoldAmt \\\n", "18819 NaN NaN 0 \n", "\n", " CountryLegalDomicile CreditCounseling \\\n", "18819 NaN NaN \n", "\n", " CreditCounselingInd \\\n", "18819 {u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " DAFExcessBusinessHoldingsInd DLN \\\n", "18819 NaN 93493320080035 \n", "\n", " DecisionsSubjectToApprovaInd DecisionsSubjectToApproval \\\n", "18819 0 NaN \n", "\n", " DeductibleArtContributionInd \\\n", "18819 {u'#text': u'0', u'@referenceDocumentId': u'RetDoc1042900001'} \n", "\n", " DeductibleContributionsOfArt \\\n", "18819 NaN \n", "\n", " DeductibleNonCashContriInd \\\n", "18819 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1042900001'} \n", "\n", " DeductibleNonCashContributions DeferredRevenue \\\n", "18819 NaN NaN \n", "\n", " DeferredRevenueGrp \\\n", "18819 {u'BOYAmt': u'529444', u'EOYAmt': u'835605'} \n", "\n", " DelegationOfManagementDuties DelegationOfMgmtDutiesInd \\\n", "18819 NaN 1 \n", "\n", " DepreciationDepletion \\\n", "18819 NaN \n", "\n", " DepreciationDepletionGrp \\\n", "18819 {u'TotalAmt': u'1121059', u'ProgramServicesAmt': u'896847', u'ManagementAndGeneralAmt': u'168159', u'FundraisingAmt': u'56053'} \n", "\n", " Desc \\\n", "18819 THE MDI BIOLOGICAL LABORATORY IS RECOGNIZED BY THE NATIONAL INSTITUTES OF HEALTH AS A CENTER OF BIOMEDICAL RESEARCH EXCELLENCE (COBRE) FOR OUR INNOVATIVE RESEARCH PROGRAM IN REGENERATIVE AND AGING BIOLOGY AND MEDICINE.OUR APPROACH SPEEDS THE DISC... \n", "\n", " DescribedIn501C3 \\\n", "18819 NaN \n", "\n", " DescribedInSection501c3Ind \\\n", "18819 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1039100001'} \n", "\n", " Description DisregardedEntity DisregardedEntityInd DistributionToDonor \\\n", "18819 NaN NaN 0 NaN \n", "\n", " DistributionToDonorInd DoNotFollowSFAS117 DocumentRetentionPolicy \\\n", "18819 NaN NaN NaN \n", "\n", " DocumentRetentionPolicyInd DoingBusinessAs DoingBusinessAsName \\\n", "18819 1 NaN NaN \n", "\n", " DonatedServicesAndUseFcltsAmt \\\n", "18819 NaN \n", "\n", " DonorAdvisedFundInd \\\n", "18819 {u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " DonorAdvisedFunds EIN ElectionOfBoardMembers \\\n", "18819 NaN 010202467 NaN \n", "\n", " ElectionOfBoardMembersInd EmployeeCnt EmploymentTaxReturnsFiled \\\n", "18819 1 96 NaN \n", "\n", " EmploymentTaxReturnsFiledInd \\\n", "18819 1 \n", "\n", " EngagedInExcessBenefitTransInd \\\n", "18819 {u'#text': u'0', u'@referenceDocumentId': u'RetDoc1042800001'} \n", "\n", " EscrowAccount EscrowAccountInd EscrowAccountLiability \\\n", "18819 NaN 0 NaN \n", "\n", " EscrowAccountLiabilityGrp ExcessBenefitTransaction \\\n", "18819 NaN NaN \n", "\n", " ExcessBusinessHoldings ExcessBusinessHoldingsInd Expense ExpenseAmt \\\n", "18819 NaN NaN NaN 8605100 \n", "\n", " FSAudited FSAuditedBasis FSAuditedBasisGrp \\\n", "18819 NaN NaN {u'SeparateBasisFinclStmtInd': u'X'} \n", "\n", " FSAuditedInd FamilyOrBusinessRelationship FamilyOrBusinessRlnInd \\\n", "18819 1 NaN 0 \n", "\n", " FederalGrantAuditPerformed FederalGrantAuditPerformedInd \\\n", "18819 NaN 1 \n", "\n", " FederalGrantAuditRequired FederalGrantAuditRequiredInd \\\n", "18819 NaN 1 \n", "\n", " FederatedCampaigns FederatedCampaignsAmt FeesForServicesAccounting \\\n", "18819 NaN NaN NaN \n", "\n", " FeesForServicesAccountingGrp \\\n", "18819 {u'TotalAmt': u'23700', u'ManagementAndGeneralAmt': u'23700'} \n", "\n", " FeesForServicesInvstMgmntFees FeesForServicesLegal \\\n", "18819 NaN NaN \n", "\n", " FeesForServicesLegalGrp \\\n", "18819 {u'TotalAmt': u'3792', u'ManagementAndGeneralAmt': u'3792'} \n", "\n", " FeesForServicesLobbying \\\n", "18819 NaN \n", "\n", " FeesForServicesLobbyingGrp \\\n", "18819 {u'TotalAmt': u'89167', u'ManagementAndGeneralAmt': u'3379', u'FundraisingAmt': u'85788'} \n", "\n", " FeesForServicesManagement FeesForServicesManagementGrp \\\n", "18819 NaN NaN \n", "\n", " FeesForServicesOther \\\n", "18819 NaN \n", "\n", " FeesForServicesOtherGrp \\\n", "18819 {u'TotalAmt': u'201518', u'ProgramServicesAmt': u'127950', u'ManagementAndGeneralAmt': u'64909', u'FundraisingAmt': u'8659'} \n", "\n", " FeesForServicesProfFundraising \\\n", "18819 {u'TotalAmt': u'15000', u'FundraisingAmt': u'15000'} \n", "\n", " FeesForSrvcInvstMgmntFeesGrp \\\n", "18819 {u'TotalAmt': u'18902', u'ManagementAndGeneralAmt': u'18902'} \n", "\n", " FiledLieu1041 FinalReturnInd FinancialStatementBoth \\\n", "18819 NaN NaN NaN \n", "\n", " FinancialStatementConsolidated FinancialStatementSeparate \\\n", "18819 NaN NaN \n", "\n", " FinancialStmtAttached FollowSFAS117 ForeignActivities \\\n", "18819 NaN NaN NaN \n", "\n", " ForeignActivitiesInd ForeignAddress ForeignCountryCd \\\n", "18819 0 NaN NaN \n", "\n", " ForeignFinancialAccount ForeignFinancialAccountInd ForeignGrants \\\n", "18819 NaN 0 NaN \n", "\n", " ForeignGrantsGrp ForeignOffice ForeignOfficeInd Form1098CFiled \\\n", "18819 NaN NaN 0 NaN \n", "\n", " Form1098CFiledInd Form720Filed Form8282FiledCnt \\\n", "18819 NaN NaN NaN \n", "\n", " Form8282PropertyDisposedOf Form8282PropertyDisposedOfInd Form8886TFiled \\\n", "18819 NaN 0 NaN \n", "\n", " Form8886TFiledInd Form8899Filed Form8899Filedind Form990-TFiled \\\n", "18819 NaN NaN NaN NaN \n", "\n", " Form990PartVIISectionA \\\n", "18819 NaN \n", "\n", " Form990PartVIISectionAGrp \\\n", "18819 [{u'PersonNm': u'DR EDWARD J BENZ', u'IndividualTrusteeOrDirectorInd': u'X', u'AverageHoursPerWeekRt': u'2.00', u'ReportableCompFromRltdOrgAmt': u'0', u'OtherCompensationAmt': u'0', u'TitleTxt': u'VICE CHAIRMAN OF THE BOARD', u'OfficerInd': u'X',... \n", "\n", " Form990ProvidedToGoverningBody Form990ProvidedToGvrnBodyInd \\\n", "18819 NaN 1 \n", "\n", " Form990TFiledInd FormType FormationYr FormerOfcrEmployeesListedInd \\\n", "18819 NaN 990 1898 0 \n", "\n", " FormersListed FundraisingActivities \\\n", "18819 NaN NaN \n", "\n", " FundraisingActivitiesInd \\\n", "18819 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1041300001'} \n", "\n", " FundraisingAmt FundraisingDirectExpenses FundraisingDirectExpensesAmt \\\n", "18819 800 NaN 37598 \n", "\n", " FundraisingEvents FundraisingGrossIncomeAmt FundsToPayPremiums \\\n", "18819 NaN 38964 NaN \n", "\n", " GainOrLoss GainOrLossGrp Gaming \\\n", "18819 NaN {u'OtherAmt': u'322752'} NaN \n", "\n", " GamingActivitiesInd \\\n", "18819 {u'#text': u'0', u'@referenceDocumentId': u'RetDoc1041300001'} \n", "\n", " GamingDirectExpenses GamingDirectExpensesAmt GamingGrossIncomeAmt \\\n", "18819 NaN NaN NaN \n", "\n", " GoverningBodyVotingMembersCnt GovernmentGrants GovernmentGrantsAmt \\\n", "18819 22 NaN 7932282 \n", "\n", " GrantAmt GrantToRelatedPerson \\\n", "18819 NaN NaN \n", "\n", " GrantToRelatedPersonInd Grants \\\n", "18819 {u'#text': u'0', u'@referenceDocumentId': u'RetDoc1042800001'} NaN \n", "\n", " GrantsAndSimilarAmntsCY GrantsAndSimilarAmntsPriorYear GrantsPayable \\\n", "18819 NaN NaN NaN \n", "\n", " GrantsPayableGrp GrantsToDomesticIndividuals \\\n", "18819 NaN NaN \n", "\n", " GrantsToDomesticIndividualsGrp GrantsToDomesticOrgs \\\n", "18819 NaN NaN \n", "\n", " GrantsToDomesticOrgsGrp GrantsToIndividuals GrantsToIndividualsInd \\\n", "18819 NaN NaN 0 \n", "\n", " GrantsToOrganizations GrantsToOrganizationsInd GrossAmountSalesAssets \\\n", "18819 NaN 0 NaN \n", "\n", " GrossAmountSalesAssetsGrp GrossIncomeFundraisingEvents \\\n", "18819 {u'OtherAmt': u'340000'} NaN \n", "\n", " GrossIncomeGaming GrossReceipts GrossReceiptsAmt GrossRents \\\n", "18819 NaN NaN 13022814 NaN \n", "\n", " GrossRentsGrp GrossSalesOfInventory GrossSalesOfInventoryAmt \\\n", "18819 NaN NaN 38 \n", "\n", " GroupExemptionNum GroupExemptionNumber GroupReturnForAffiliates \\\n", "18819 NaN NaN NaN \n", "\n", " GroupReturnForAffiliatesInd Hospital IRPDocumentCnt IRPDocumentW2GCnt \\\n", "18819 0 NaN 158 0 \n", "\n", " IncludeFIN48FootnoteInd \\\n", "18819 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " IncmFromInvestBondProceedsGrp IncomeFromInvestBondProceeds \\\n", "18819 NaN NaN \n", "\n", " IndependentAuditFinancialStmt \\\n", "18819 NaN \n", "\n", " IndependentAuditFinclStmtInd \\\n", "18819 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " IndependentVotingMemberCnt IndivRcvdGreaterThan100KCnt \\\n", "18819 21 3 \n", "\n", " IndoorTanningServices IndoorTanningServicesInd InfoInScheduleOPartIII \\\n", "18819 NaN 0 NaN \n", "\n", " InfoInScheduleOPartIIIInd InfoInScheduleOPartIX \\\n", "18819 X NaN \n", "\n", " InfoInScheduleOPartIXInd InfoInScheduleOPartV InfoInScheduleOPartVI \\\n", "18819 NaN NaN NaN \n", "\n", " InfoInScheduleOPartVII InfoInScheduleOPartVIII \\\n", "18819 NaN NaN \n", "\n", " InfoInScheduleOPartVIIIInd InfoInScheduleOPartVIIInd \\\n", "18819 NaN NaN \n", "\n", " InfoInScheduleOPartVIInd InfoInScheduleOPartVInd InfoInScheduleOPartX \\\n", "18819 X NaN NaN \n", "\n", " InfoInScheduleOPartXI InfoInScheduleOPartXII InfoInScheduleOPartXIIInd \\\n", "18819 NaN NaN X \n", "\n", " InfoInScheduleOPartXIInd InfoInScheduleOPartXInd InformationTechnology \\\n", "18819 X NaN NaN \n", "\n", " InformationTechnologyGrp \\\n", "18819 {u'TotalAmt': u'49382', u'ProgramServicesAmt': u'8673', u'ManagementAndGeneralAmt': u'40709'} \n", "\n", " InitialReturn InitialReturnInd Insurance \\\n", "18819 NaN NaN NaN \n", "\n", " InsuranceGrp \\\n", "18819 {u'TotalAmt': u'92396', u'ProgramServicesAmt': u'10397', u'ManagementAndGeneralAmt': u'81746', u'FundraisingAmt': u'253'} \n", "\n", " IntangibleAssets IntangibleAssetsGrp Interest \\\n", "18819 NaN {u'BOYAmt': u'147691', u'EOYAmt': u'141271'} NaN \n", "\n", " InterestGrp \\\n", "18819 {u'TotalAmt': u'255985', u'ProgramServicesAmt': u'191989', u'ManagementAndGeneralAmt': u'63996'} \n", "\n", " InventoriesForSaleOrUse InventoriesForSaleOrUseGrp InvestTaxExemptBonds \\\n", "18819 NaN {u'BOYAmt': u'8329'} NaN \n", "\n", " InvestTaxExemptBondsInd InvestmentExpenseAmt InvestmentInJointVenture \\\n", "18819 0 NaN NaN \n", "\n", " InvestmentInJointVentureInd InvestmentIncome \\\n", "18819 0 NaN \n", "\n", " InvestmentIncomeCurrentYear \\\n", "18819 NaN \n", "\n", " InvestmentIncomeGrp \\\n", "18819 {u'TotalRevenueColumnAmt': u'659520', u'ExclusionAmt': u'659520'} \n", "\n", " InvestmentIncomePriorYear InvestmentsOtherSecurities \\\n", "18819 NaN NaN \n", "\n", " InvestmentsOtherSecuritiesGrp InvestmentsProgramRelated \\\n", "18819 NaN NaN \n", "\n", " InvestmentsProgramRelatedGrp \\\n", "18819 NaN \n", "\n", " InvestmentsPubTradedSecGrp \\\n", "18819 {u'BOYAmt': u'4031973', u'EOYAmt': u'4747115'} \n", "\n", " InvestmentsPubTradedSecurities IsAvailable IsElectronic JointCosts \\\n", "18819 NaN True True NaN \n", "\n", " JointCostsInd LandBldgEquipAccumDeprecAmt \\\n", "18819 NaN 11322245 \n", "\n", " LandBldgEquipBasisNetGrp \\\n", "18819 {u'BOYAmt': u'14025403', u'EOYAmt': u'14020382'} \n", "\n", " LandBldgEquipCostOrOtherBssAmt LandBldgEquipmentAccumDeprec \\\n", "18819 25342627 NaN \n", "\n", " LandBuildingsEquipmentBasis LandBuildingsEquipmentBasisNet \\\n", "18819 NaN NaN \n", "\n", " LastUpdated LegalDomicileStateCd LessCostOthBasisSalesExpenses \\\n", "18819 2016-04-29T13:40:20 ME NaN \n", "\n", " LessCostOthBasisSalesExpnssGrp LessRentalExpenses LessRentalExpensesGrp \\\n", "18819 {u'OtherAmt': u'17248'} NaN NaN \n", "\n", " LicensedMoreThanOneState LicensedMoreThanOneStateInd \\\n", "18819 NaN NaN \n", "\n", " LoanOutstandingInd \\\n", "18819 {u'#text': u'0', u'@referenceDocumentId': u'RetDoc1042800001'} \n", "\n", " LoanToOfficerOrDQP LoansFromOfficersDirectors \\\n", "18819 NaN NaN \n", "\n", " LoansFromOfficersDirectorsGrp LobbyingActivities \\\n", "18819 NaN NaN \n", "\n", " LobbyingActivitiesInd \\\n", "18819 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1039700001'} \n", "\n", " LocalChapters LocalChaptersInd MaterialDiversionOrMisuse \\\n", "18819 NaN 0 NaN \n", "\n", " MaterialDiversionOrMisuseInd MembersOrStockholders \\\n", "18819 0 NaN \n", "\n", " MembersOrStockholdersInd MembershipDues MembershipDuesAmt \\\n", "18819 1 NaN 4600 \n", "\n", " MethodOfAccountingAccrual MethodOfAccountingAccrualInd \\\n", "18819 NaN X \n", "\n", " MethodOfAccountingCash MethodOfAccountingCashInd \\\n", "18819 NaN NaN \n", "\n", " MethodOfAccountingOther MethodOfAccountingOtherInd MinutesOfCommittees \\\n", "18819 NaN NaN NaN \n", "\n", " MinutesOfCommitteesInd MinutesOfGoverningBody MinutesOfGoverningBodyInd \\\n", "18819 1 NaN 1 \n", "\n", " MiscellaneousRevenue MiscellaneousRevenueGrp \\\n", "18819 NaN NaN \n", "\n", " MissionDesc \\\n", "18819 THE MDI BIOLOGICAL LABORATORY IS A RAPIDLY GROWING, INDEPENDENT NON-PROFIT BIOMEDICAL RESEARCH INSTITUTION. ITS MISSION IS TO IMPROVE HUMAN HEALTH AND WELL-BEING THROUGH RESEARCH, EDUCATION,AND DEVELOPMENT VENTURES THAT TRANSFORM DISCOVERIES INTO... \n", "\n", " MissionDescription MoreThan5000KToIndividuals \\\n", "18819 NaN NaN \n", "\n", " MoreThan5000KToIndividualsInd MoreThan5000KToOrgInd \\\n", "18819 0 0 \n", "\n", " MoreThan5000KToOrganizations MortNotesPyblSecuredInvestProp \\\n", "18819 NaN NaN \n", "\n", " MortgNotesPyblScrdInvstPropGrp NameOfForeignCountry \\\n", "18819 NaN NaN \n", "\n", " NameOfPrincipalOfficerBusiness NameOfPrincipalOfficerPerson \\\n", "18819 NaN NaN \n", "\n", " NbrIndependentVotingMembers NbrVotingGoverningBodyMembers \\\n", "18819 NaN NaN \n", "\n", " NbrVotingMembersGoverningBody NetAssetsOrFundBalancesBOY \\\n", "18819 NaN NaN \n", "\n", " NetAssetsOrFundBalancesBOYAmt NetAssetsOrFundBalancesEOY \\\n", "18819 21657900 NaN \n", "\n", " NetAssetsOrFundBalancesEOYAmt NetGainOrLossInvestments \\\n", "18819 23690097 NaN \n", "\n", " NetGainOrLossInvestmentsGrp \\\n", "18819 {u'TotalRevenueColumnAmt': u'322752', u'ExclusionAmt': u'322752'} \n", "\n", " NetIncmFromFundraisingEvtGrp \\\n", "18819 {u'TotalRevenueColumnAmt': u'1366', u'ExclusionAmt': u'1366'} \n", "\n", " NetIncomeFromFundraisingEvents NetIncomeFromGaming \\\n", "18819 NaN NaN \n", "\n", " NetIncomeFromGamingGrp NetIncomeOrLoss \\\n", "18819 NaN NaN \n", "\n", " NetIncomeOrLossGrp \\\n", "18819 {u'TotalRevenueColumnAmt': u'38', u'ExclusionAmt': u'38'} \n", "\n", " NetRentalIncomeOrLoss NetRentalIncomeOrLossGrp \\\n", "18819 NaN NaN \n", "\n", " NetUnrelatedBusTxblIncmAmt NetUnrelatedBusinessTxblIncome \\\n", "18819 0 NaN \n", "\n", " NetUnrlzdGainsLossesInvstAmt NoListedPersonsCompensated \\\n", "18819 -104353 NaN \n", "\n", " NoListedPersonsCompensatedInd NonDeductibleContributions \\\n", "18819 X NaN \n", "\n", " NonDeductibleDisclosure NoncashContributions NoncashContributionsAmt \\\n", "18819 NaN NaN 57474 \n", "\n", " NondeductibleContriDisclInd NondeductibleContributionsInd \\\n", "18819 NaN 0 \n", "\n", " NumberFormsTransmittedWith1096 NumberIndependentVotingMembers \\\n", "18819 NaN NaN \n", "\n", " NumberIndividualsGT100K NumberOf8282Filed NumberOfContractorsGT100K \\\n", "18819 NaN NaN NaN \n", "\n", " NumberOfEmployees NumberW2GIncluded ObjectId Occupancy \\\n", "18819 NaN NaN 201533209349308003 NaN \n", "\n", " OccupancyGrp \\\n", "18819 {u'TotalAmt': u'587738', u'ProgramServicesAmt': u'494365', u'ManagementAndGeneralAmt': u'91234', u'FundraisingAmt': u'2139'} \n", "\n", " OfficeExpenses OfficeExpensesGrp OfficerEntityWithBsnssRltnshp \\\n", "18819 NaN NaN NaN \n", "\n", " OfficerMailingAddress OfficerMailingAddressInd OnBehalfOfIssuer \\\n", "18819 NaN 0 NaN \n", "\n", " OnBehalfOfIssuerInd OperateHospitalInd OrgDoesNotFollowSFAS117Ind \\\n", "18819 0 0 NaN \n", "\n", " OrgFiledInLieuOfForm1041Ind Organization501c Organization501c3 \\\n", "18819 NaN NaN NaN \n", "\n", " Organization501c3Ind Organization501cInd OrganizationFollowsSFAS117Ind \\\n", "18819 X NaN X \n", "\n", " OrganizationName \\\n", "18819 MOUNT DESERT ISLAND BIOLOGICAL LABORATORY \n", "\n", " OthNotesLoansReceivableNetGrp OtherAssetsTotal OtherAssetsTotalGrp \\\n", "18819 NaN NaN NaN \n", "\n", " OtherChangesInNetAssetsAmt OtherEmployeeBenefits \\\n", "18819 3 NaN \n", "\n", " OtherEmployeeBenefitsGrp \\\n", "18819 {u'TotalAmt': u'370740', u'ProgramServicesAmt': u'284086', u'ManagementAndGeneralAmt': u'70910', u'FundraisingAmt': u'15744'} \n", "\n", " OtherExpensePriorYear OtherExpenses OtherExpensesCurrentYear \\\n", "18819 NaN NaN NaN \n", "\n", " OtherExpensesGrp \\\n", "18819 [{u'TotalAmt': u'2124748', u'ProgramServicesAmt': u'2124748', u'Desc': u'SUBAWARDS'}, {u'TotalAmt': u'682378', u'ProgramServicesAmt': u'627645', u'ManagementAndGeneralAmt': u'47547', u'FundraisingAmt': u'7186', u'Desc': u'SUPPLIES'}, {u'TotalAmt'... \n", "\n", " OtherExplainInSchO OtherInd OtherLiabilities \\\n", "18819 NaN NaN NaN \n", "\n", " OtherLiabilitiesGrp \\\n", "18819 {u'BOYAmt': u'4630000', u'EOYAmt': u'4525000'} \n", "\n", " OtherNotesLoansReceivableNet OtherOrganizationDsc \\\n", "18819 NaN NaN \n", "\n", " OtherRevenueCurrentYear OtherRevenueMisc OtherRevenueMiscGrp \\\n", "18819 NaN NaN NaN \n", "\n", " OtherRevenuePriorYear OtherRevenueTotalAmt OtherSalariesAndWages \\\n", "18819 NaN NaN NaN \n", "\n", " OtherSalariesAndWagesGrp \\\n", "18819 {u'TotalAmt': u'3353971', u'ProgramServicesAmt': u'2478596', u'ManagementAndGeneralAmt': u'683266', u'FundraisingAmt': u'192109'} \n", "\n", " OtherWebsite OtherWebsiteInd OwnWebsite OwnWebsiteInd \\\n", "18819 NaN X NaN X \n", "\n", " PYBenefitsPaidToMembersAmt PYContributionsGrantsAmt \\\n", "18819 0 7802139 \n", "\n", " PYExcessBenefitTransInd \\\n", "18819 {u'#text': u'0', u'@referenceDocumentId': u'RetDoc1042800001'} \n", "\n", " PYGrantsAndSimilarPaidAmt PYInvestmentIncomeAmt PYOtherExpensesAmt \\\n", "18819 0 785633 6736413 \n", "\n", " PYOtherRevenueAmt PYProgramServiceRevenueAmt PYRevenuesLessExpensesAmt \\\n", "18819 -13375 1591204 -449665 \n", "\n", " PYSalariesCompEmpBnftPaidAmt PYTotalExpensesAmt \\\n", "18819 3878853 10615266 \n", "\n", " PYTotalProfFndrsngExpnsAmt PYTotalRevenueAmt \\\n", "18819 0 10165601 \n", "\n", " PaidInCapSrplsLandBldgEqpFund PartialLiquidation PartialLiquidationInd \\\n", "18819 NaN NaN 0 \n", "\n", " PayPremiumsPrsnlBnftCntrctInd PaymentsToAffiliates \\\n", "18819 NaN NaN \n", "\n", " PaymentsToAffiliatesGrp PayrollTaxes \\\n", "18819 NaN NaN \n", "\n", " PayrollTaxesGrp \\\n", "18819 {u'TotalAmt': u'257923', u'ProgramServicesAmt': u'189245', u'ManagementAndGeneralAmt': u'52592', u'FundraisingAmt': u'16086'} \n", "\n", " PdInCapSrplsLandBldgEqpFundGrp PensionPlanContributions \\\n", "18819 NaN NaN \n", "\n", " PensionPlanContributionsGrp \\\n", "18819 {u'TotalAmt': u'172505', u'ProgramServicesAmt': u'125037', u'ManagementAndGeneralAmt': u'37384', u'FundraisingAmt': u'10084'} \n", "\n", " PermanentlyRestrictedNetAssets \\\n", "18819 NaN \n", "\n", " PermanentlyRstrNetAssetsGrp \\\n", "18819 {u'BOYAmt': u'4193257', u'EOYAmt': u'4433997'} \n", "\n", " PledgesAndGrantsReceivable \\\n", "18819 NaN \n", "\n", " PledgesAndGrantsReceivableGrp PoliciesReferenceChapters \\\n", "18819 {u'BOYAmt': u'797504', u'EOYAmt': u'986621'} NaN \n", "\n", " PoliciesReferenceChaptersInd PoliticalActivities \\\n", "18819 NaN NaN \n", "\n", " PoliticalCampaignActyInd \\\n", "18819 {u'#text': u'0', u'@referenceDocumentId': u'RetDoc1039700001'} \n", "\n", " PremiumsPaid PrepaidExpensesDeferredCharges \\\n", "18819 NaN NaN \n", "\n", " PrepaidExpensesDefrdChargesGrp PrincipalOfcrBusinessAddress \\\n", "18819 {u'BOYAmt': u'4791', u'EOYAmt': u'5301'} NaN \n", "\n", " PrincipalOfcrBusinessName PrincipalOfficerNm \\\n", "18819 NaN DR KEVIN STRANGE PHD \n", "\n", " PriorExcessBenefitTransaction PriorPeriodAdjustmentsAmt \\\n", "18819 NaN NaN \n", "\n", " ProfessionalFundraising \\\n", "18819 NaN \n", "\n", " ProfessionalFundraisingInd \\\n", "18819 {u'#text': u'0', u'@referenceDocumentId': u'RetDoc1041300001'} \n", "\n", " ProgSrvcAccomActy2Grp ProgSrvcAccomActy3Grp ProgSrvcAccomActyOtherGrp \\\n", "18819 NaN NaN NaN \n", "\n", " ProgramServiceRevenue ProgramServiceRevenueCY \\\n", "18819 NaN NaN \n", "\n", " ProgramServiceRevenueGrp \\\n", "18819 [{u'TotalRevenueColumnAmt': u'469300', u'BusinessCd': u'611710', u'RelatedOrExemptFuncIncomeAmt': u'469300', u'Desc': u'CONFER & COURSE FEES'}, {u'TotalRevenueColumnAmt': u'353507', u'BusinessCd': u'541700', u'RelatedOrExemptFuncIncomeAmt': u'353... \n", "\n", " ProgramServiceRevenuePriorYear ProhibitedTaxShelterTrans \\\n", "18819 NaN NaN \n", "\n", " ProhibitedTaxShelterTransInd PymtTravelEntrtnmntPubOfclGrp \\\n", "18819 0 NaN \n", "\n", " QuidProQuoContriDisclInd QuidProQuoContributions \\\n", "18819 NaN NaN \n", "\n", " QuidProQuoContributionsInd QuidProQuoDisclosure \\\n", "18819 0 NaN \n", "\n", " RcvFndsToPayPrsnlBnftCntrctInd RcvblFromDisqualifiedPrsnGrp \\\n", "18819 NaN NaN \n", "\n", " ReceivablesFromDisqualPersons ReceivablesFromOfficersEtc \\\n", "18819 NaN NaN \n", "\n", " ReceivablesFromOfficersEtcGrp ReconcilationDonatedServices \\\n", "18819 NaN NaN \n", "\n", " ReconcilationInvestExpenses ReconcilationOtherChanges \\\n", "18819 NaN NaN \n", "\n", " ReconcilationPriorAdjustment ReconcilationRevenueExpenses \\\n", "18819 NaN NaN \n", "\n", " ReconcilationRevenueExpnssAmt ReconciliationUnrealizedInvest \\\n", "18819 2136547 NaN \n", "\n", " RegularMonitoringEnforcement RegularMonitoringEnfrcInd RelatedEntity \\\n", "18819 NaN 1 NaN \n", "\n", " RelatedEntityInd RelatedOrgControlledEntity \\\n", "18819 0 NaN \n", "\n", " RelatedOrganizationCtrlEntInd RelatedOrganizations \\\n", "18819 0 NaN \n", "\n", " RelatedOrganizationsAmt RentalIncomeOrLoss RentalIncomeOrLossGrp \\\n", "18819 NaN NaN NaN \n", "\n", " ReportFin48Footnote ReportInvestOthSecurities \\\n", "18819 NaN NaN \n", "\n", " ReportInvestmentsOtherSecInd \\\n", "18819 {u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " ReportLandBldgEquip \\\n", "18819 NaN \n", "\n", " ReportLandBuildingEquipmentInd \\\n", "18819 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " ReportOtherAssets \\\n", "18819 NaN \n", "\n", " ReportOtherAssetsInd \\\n", "18819 {u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " ReportOtherLiabilities \\\n", "18819 NaN \n", "\n", " ReportOtherLiabilitiesInd \\\n", "18819 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " ReportProgRelInvest \\\n", "18819 NaN \n", "\n", " ReportProgramRelatedInvstInd \\\n", "18819 {u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " ReservesMaintainedAmt RetainedEarningsEndowmentEtc Revenue RevenueAmt \\\n", "18819 NaN NaN NaN 1733437 \n", "\n", " RevenuesLessExpensesCY RevenuesLessExpensesPriorYear Royalties \\\n", "18819 NaN NaN NaN \n", "\n", " RoyaltiesGrp RoyaltiesRevenue RoyaltiesRevenueGrp \\\n", "18819 NaN NaN NaN \n", "\n", " RtnEarnEndowmentIncmOthFndsGrp SalariesEtcCurrentYear \\\n", "18819 NaN NaN \n", "\n", " SalariesEtcPriorYear SavingsAndTempCashInvestments \\\n", "18819 NaN NaN \n", "\n", " SavingsAndTempCashInvstGrp ScheduleBRequired \\\n", "18819 {u'BOYAmt': u'6870518', u'EOYAmt': u'7005488'} NaN \n", "\n", " ScheduleBRequiredInd \\\n", "18819 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1234500001'} \n", "\n", " ScheduleJRequired \\\n", "18819 NaN \n", "\n", " ScheduleJRequiredInd \\\n", "18819 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1042400001'} \n", "\n", " ScheduleORequired ScheduleORequiredInd School SchoolOperatingInd \\\n", "18819 NaN 1 NaN 0 \n", "\n", " SignificantChange SignificantChangeInd SignificantNewProgramServices \\\n", "18819 NaN 0 NaN \n", "\n", " SignificantNewProgramSrvcInd SpecialConditionDescription \\\n", "18819 0 NaN \n", "\n", " StateLegalDomicile StateRequiredReservesAmt \\\n", "18819 NaN NaN \n", "\n", " StatesWhereCopyOfReturnIsFiled StatesWhereCopyOfReturnIsFldCd \\\n", "18819 NaN ME \n", "\n", " SubjectToProxyTax \\\n", "18819 NaN \n", "\n", " SubjectToProxyTaxInd \\\n", "18819 {u'#text': u'0', u'@referenceDocumentId': u'RetDoc1039700001'} \n", "\n", " SubmittedOn TaxExemptBondLiabilities TaxExemptBondLiabilitiesGrp \\\n", "18819 2016-02-16 NaN NaN \n", "\n", " TaxExemptBonds \\\n", "18819 NaN \n", "\n", " TaxExemptBondsInd \\\n", "18819 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'} \n", "\n", " TaxPeriod TaxableDistributions TaxableDistributionsInd \\\n", "18819 201412 NaN NaN \n", "\n", " TaxablePartyNotification TaxablePartyNotificationInd \\\n", "18819 NaN 0 \n", "\n", " TempOrPermanentEndowmentsInd \\\n", "18819 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " TemporarilyRestrictedNetAssets \\\n", "18819 NaN \n", "\n", " TemporarilyRstrNetAssetsGrp \\\n", "18819 {u'BOYAmt': u'5880227', u'EOYAmt': u'7351259'} \n", "\n", " TermOrPermanentEndowments TerminateOperationsInd Terminated \\\n", "18819 NaN 0 NaN \n", "\n", " TerminationOrContraction TheBooksAreInCareOf \\\n", "18819 NaN NaN \n", "\n", " TotLiabNetAssetsFundBalanceGrp \\\n", "18819 {u'BOYAmt': u'27323685', u'EOYAmt': u'29607771'} \n", "\n", " TotReportableCompRltdOrgAmt TotalAssets TotalAssetsBOY \\\n", "18819 0 NaN NaN \n", "\n", " TotalAssetsBOYAmt TotalAssetsEOY TotalAssetsEOYAmt \\\n", "18819 27323685 NaN 29607771 \n", "\n", " TotalAssetsGrp TotalCompGT150K \\\n", "18819 {u'BOYAmt': u'27323685', u'EOYAmt': u'29607771'} NaN \n", "\n", " TotalCompGreaterThan150KInd TotalContributions TotalContributionsAmt \\\n", "18819 1 NaN 10272455 \n", "\n", " TotalEmployeeCnt TotalExpensesCurrentYear TotalExpensesPriorYear \\\n", "18819 96 NaN NaN \n", "\n", " TotalFunctionalExpenses \\\n", "18819 NaN \n", "\n", " TotalFunctionalExpensesGrp \\\n", "18819 {u'TotalAmt': u'10831421', u'ProgramServicesAmt': u'8605100', u'ManagementAndGeneralAmt': u'1671713', u'FundraisingAmt': u'554608'} \n", "\n", " TotalFundrsngExpCurrentYear TotalGrossUBI TotalGrossUBIAmt \\\n", "18819 NaN NaN 0 \n", "\n", " TotalJointCosts TotalJointCostsGrp TotalLiabNetAssetsFundBalances \\\n", "18819 NaN NaN NaN \n", "\n", " TotalLiabilities TotalLiabilitiesBOY TotalLiabilitiesBOYAmt \\\n", "18819 NaN NaN 5665785 \n", "\n", " TotalLiabilitiesEOY TotalLiabilitiesEOYAmt \\\n", "18819 NaN 5917674 \n", "\n", " TotalLiabilitiesGrp TotalNbrEmployees \\\n", "18819 {u'BOYAmt': u'5665785', u'EOYAmt': u'5917674'} NaN \n", "\n", " TotalNbrVolunteers TotalNetAssetsFundBalanceGrp \\\n", "18819 NaN {u'BOYAmt': u'21657900', u'EOYAmt': u'23690097'} \n", "\n", " TotalNetAssetsFundBalances TotalOfOtherProgramServiceExp \\\n", "18819 NaN NaN \n", "\n", " TotalOfOtherProgramServiceGrnt TotalOfOtherProgramServiceRev \\\n", "18819 NaN NaN \n", "\n", " TotalOthProgramServiceRevGrp \\\n", "18819 {u'TotalRevenueColumnAmt': u'38356', u'RelatedOrExemptFuncIncomeAmt': u'38356'} \n", "\n", " TotalOthProgramServiceRevenue TotalOtherCompensation \\\n", "18819 NaN NaN \n", "\n", " TotalOtherCompensationAmt TotalOtherProgSrvcExpenseAmt \\\n", "18819 67100 NaN \n", "\n", " TotalOtherProgSrvcGrantAmt TotalOtherProgSrvcRevenueAmt \\\n", "18819 NaN NaN \n", "\n", " TotalOtherRevenue TotalProfFundrsngExpCY TotalProfFundrsngExpPriorYear \\\n", "18819 NaN NaN NaN \n", "\n", " TotalProgramServiceExpense TotalProgramServiceExpensesAmt \\\n", "18819 NaN 8605100 \n", "\n", " TotalProgramServiceRevenue TotalProgramServiceRevenueAmt \\\n", "18819 NaN 1711837 \n", "\n", " TotalReportableCompFrmRltdOrgs TotalReportableCompFromOrg \\\n", "18819 NaN NaN \n", "\n", " TotalReportableCompFromOrgAmt TotalRevenue TotalRevenueCurrentYear \\\n", "18819 496334 NaN NaN \n", "\n", " TotalRevenueGrp \\\n", "18819 {u'UnrelatedBusinessRevenueAmt': u'0', u'TotalRevenueColumnAmt': u'12967968', u'RelatedOrExemptFuncIncomeAmt': u'1711837', u'ExclusionAmt': u'983676'} \n", "\n", " TotalRevenuePriorYear TotalVolunteersCnt TransactionRelatedEntity \\\n", "18819 NaN 125 NaN \n", "\n", " TransactionWithControlEntInd TransfersToExemptNonChrtblOrg Travel \\\n", "18819 NaN NaN NaN \n", "\n", " TravelEntrtnmntPublicOfficials \\\n", "18819 NaN \n", "\n", " TravelGrp \\\n", "18819 {u'TotalAmt': u'128657', u'ProgramServicesAmt': u'84416', u'ManagementAndGeneralAmt': u'33781', u'FundraisingAmt': u'10460'} \n", "\n", " TrnsfrExmptNonChrtblRltdOrgInd TypeOfOrgOtherDescription \\\n", "18819 0 NaN \n", "\n", " TypeOfOrganizationAssocInd TypeOfOrganizationAssociation \\\n", "18819 NaN NaN \n", "\n", " TypeOfOrganizationCorpInd TypeOfOrganizationCorporation \\\n", "18819 X NaN \n", "\n", " TypeOfOrganizationOther TypeOfOrganizationOtherInd \\\n", "18819 NaN NaN \n", "\n", " TypeOfOrganizationTrust TypeOfOrganizationTrustInd \\\n", "18819 NaN NaN \n", "\n", " URL \\\n", "18819 https://s3.amazonaws.com/irs-form-990/201533209349308003_public.xml \n", "\n", " USAddress \\\n", "18819 {u'CityNm': u'SALISBURY COVE', u'StateAbbreviationCd': u'ME', u'ZIPCd': u'046720035', u'AddressLine1Txt': u'PO BOX 35'} \n", "\n", " UnrelatedBusIncmOverLimitInd UnrelatedBusinessIncome \\\n", "18819 0 NaN \n", "\n", " UnrestrictedNetAssets UnrestrictedNetAssetsGrp \\\n", "18819 NaN {u'BOYAmt': u'11584416', u'EOYAmt': u'11904841'} \n", "\n", " UnsecuredNotesLoansPayable UnsecuredNotesLoansPayableGrp UponRequest \\\n", "18819 NaN NaN NaN \n", "\n", " UponRequestInd VotingMembersGoverningBodyCnt \\\n", "18819 X 22 \n", "\n", " VotingMembersIndependentCnt WebSite WebsiteAddressTxt \\\n", "18819 21 NaN WWW.MDIBL.ORG \n", "\n", " WhistleblowerPolicy WhistleblowerPolicyInd WrittenPolicyOrProcedure \\\n", "18819 NaN 1 NaN \n", "\n", " WrittenPolicyOrProcedureInd YearFormation _id \\\n", "18819 NaN NaN 58c33e003ffc5a1664e96b73 \n", "\n", " FeesForServicesAccountingGrp_v2 FYE \\\n", "18819 {u'TotalAmt': u'23700', u'ManagementAndGeneralAmt': u'23700'} FY2014 \n", "\n", " FeesForServicesAccounting_TotalAmt FeesForServicesAccounting_binary \n", "18819 23700.0 1 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_all = pd.read_pickle('e-file data for 7,133 of 8,304 2016 EINs.pkl')\n", "print '# of columns:', len(df_all.columns)\n", "print '# of observations:', len(df_all)\n", "df_all.head(1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Identify Governance Variables" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " # of columns: 4\n", "# of observations: 652\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
variabledescriptionline_numberfirst year
0AccountantCompileOrReviewAccountant provide compilation or review?Part XII Line 2a2010
\n", "
" ], "text/plain": [ " variable description \\\n", "0 AccountantCompileOrReview Accountant provide compilation or review? \n", "\n", " line_number first year \n", "0 Part XII Line 2a 2010 " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vars = pd.read_pickle('variable descriptions 2010-2015 990 e-file data (first year only).pkl')\n", "print '# of columns:', len(vars.columns)\n", "print '# of observations:', len(vars)\n", "vars.head(1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Independent and voting directors" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FYEEINNbrIndependentVotingMembersNbrVotingGoverningBodyMembersNbrVotingMembersGoverningBodyNumberIndependentVotingMembersGoverningBodyVotingMembersCntIndependentVotingMemberCntVotingMembersGoverningBodyCntVotingMembersIndependentCnt
18819FY2014010202467NaNNaNNaNNaN22212221
7965FY2013010202467NaNNaNNaNNaN21212121
8989FY201201020246720202020NaNNaNNaNNaN
22904FY201101020246723232323NaNNaNNaNNaN
19649FY201001020246724242424NaNNaNNaNNaN
28581FY2015010211478NaNNaNNaNNaN18181818
20683FY2014010211478NaNNaNNaNNaN18181818
4797FY201301021147818181818NaNNaNNaNNaN
15877FY201201021147818181818NaNNaNNaNNaN
18981FY201101021147819191919NaNNaNNaNNaN
589FY2014010211513NaNNaNNaNNaN23212321
21460FY2013010211513NaNNaNNaNNaN22202220
4502FY201201021151320222220NaNNaNNaNNaN
13619FY201101021151321212121NaNNaNNaNNaN
17018FY201001021151324252524NaNNaNNaNNaN
28147FY2014010211530NaNNaNNaNNaN28282828
6614FY201301021153029292929NaNNaNNaNNaN
27887FY201201021153027272727NaNNaNNaNNaN
13438FY201101021153029292929NaNNaNNaNNaN
17003FY201001021153029292929NaNNaNNaNNaN
\n", "
" ], "text/plain": [ " FYE EIN NbrIndependentVotingMembers \\\n", "18819 FY2014 010202467 NaN \n", "7965 FY2013 010202467 NaN \n", "8989 FY2012 010202467 20 \n", "22904 FY2011 010202467 23 \n", "19649 FY2010 010202467 24 \n", "28581 FY2015 010211478 NaN \n", "20683 FY2014 010211478 NaN \n", "4797 FY2013 010211478 18 \n", "15877 FY2012 010211478 18 \n", "18981 FY2011 010211478 19 \n", "589 FY2014 010211513 NaN \n", "21460 FY2013 010211513 NaN \n", "4502 FY2012 010211513 20 \n", "13619 FY2011 010211513 21 \n", "17018 FY2010 010211513 24 \n", "28147 FY2014 010211530 NaN \n", "6614 FY2013 010211530 29 \n", "27887 FY2012 010211530 27 \n", "13438 FY2011 010211530 29 \n", "17003 FY2010 010211530 29 \n", "\n", " NbrVotingGoverningBodyMembers NbrVotingMembersGoverningBody \\\n", "18819 NaN NaN \n", "7965 NaN NaN \n", "8989 20 20 \n", "22904 23 23 \n", "19649 24 24 \n", "28581 NaN NaN \n", "20683 NaN NaN \n", "4797 18 18 \n", "15877 18 18 \n", "18981 19 19 \n", "589 NaN NaN \n", "21460 NaN NaN \n", "4502 22 22 \n", "13619 21 21 \n", "17018 25 25 \n", "28147 NaN NaN \n", "6614 29 29 \n", "27887 27 27 \n", "13438 29 29 \n", "17003 29 29 \n", "\n", " NumberIndependentVotingMembers GoverningBodyVotingMembersCnt \\\n", "18819 NaN 22 \n", "7965 NaN 21 \n", "8989 20 NaN \n", "22904 23 NaN \n", "19649 24 NaN \n", "28581 NaN 18 \n", "20683 NaN 18 \n", "4797 18 NaN \n", "15877 18 NaN \n", "18981 19 NaN \n", "589 NaN 23 \n", "21460 NaN 22 \n", "4502 20 NaN \n", "13619 21 NaN \n", "17018 24 NaN \n", "28147 NaN 28 \n", "6614 29 NaN \n", "27887 27 NaN \n", "13438 29 NaN \n", "17003 29 NaN \n", "\n", " IndependentVotingMemberCnt VotingMembersGoverningBodyCnt \\\n", "18819 21 22 \n", "7965 21 21 \n", "8989 NaN NaN \n", "22904 NaN NaN \n", "19649 NaN NaN \n", "28581 18 18 \n", "20683 18 18 \n", "4797 NaN NaN \n", "15877 NaN NaN \n", "18981 NaN NaN \n", "589 21 23 \n", "21460 20 22 \n", "4502 NaN NaN \n", "13619 NaN NaN \n", "17018 NaN NaN \n", "28147 28 28 \n", "6614 NaN NaN \n", "27887 NaN NaN \n", "13438 NaN NaN \n", "17003 NaN NaN \n", "\n", " VotingMembersIndependentCnt \n", "18819 21 \n", "7965 21 \n", "8989 NaN \n", "22904 NaN \n", "19649 NaN \n", "28581 18 \n", "20683 18 \n", "4797 NaN \n", "15877 NaN \n", "18981 NaN \n", "589 21 \n", "21460 20 \n", "4502 NaN \n", "13619 NaN \n", "17018 NaN \n", "28147 28 \n", "6614 NaN \n", "27887 NaN \n", "13438 NaN \n", "17003 NaN " ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cols_2013 = ['GoverningBodyVotingMembersCnt', 'IndependentVotingMemberCnt',\n", " 'VotingMembersGoverningBodyCnt', 'VotingMembersIndependentCnt']\n", " \n", "cols_2010 = ['NbrIndependentVotingMembers', 'NbrVotingGoverningBodyMembers',\n", " 'NbrVotingMembersGoverningBody', 'NumberIndependentVotingMembers']\n", "df_all[['FYE', 'EIN'] + cols_2010+cols_2013][:20]" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "11143\n", "11143\n", "0\n", "0 \n", "\n", "0\n", "0\n" ] } ], "source": [ "print len(df_all[(df_all['IndependentVotingMemberCnt'].notnull())])\n", "print len(df_all[(df_all['VotingMembersIndependentCnt'].notnull())])\n", "print len(df_all[(df_all['IndependentVotingMemberCnt'].isnull()) & (df_all['VotingMembersIndependentCnt'].notnull())])\n", "print len(df_all[(df_all['VotingMembersIndependentCnt'].isnull()) & (df_all['IndependentVotingMemberCnt'].notnull())]), '\\n'\n", "\n", "print len(df_all[(df_all['GoverningBodyVotingMembersCnt'].isnull()) & (df_all['VotingMembersGoverningBodyCnt'].notnull())])\n", "print len(df_all[(df_all['VotingMembersGoverningBodyCnt'].isnull()) & (df_all['GoverningBodyVotingMembersCnt'].notnull())])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
Based on the above we can drop the first two for each year -- we'll go with the Section I, Line 3 and 4 versions, which seem to be the same" ] }, { "cell_type": "code", "execution_count": 242, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['EIN', 'FYE', 'VotingMembersGoverningBodyCnt', 'NbrVotingMembersGoverningBody', 'VotingMembersIndependentCnt', 'NbrIndependentVotingMembers']\n" ] } ], "source": [ "cols = ['EIN', 'FYE']\n", "#cols_2013 = ['VotingMembersGoverningBodyCnt', 'VotingMembersIndependentCnt']\n", "#cols_2010 = ['NbrIndependentVotingMembers', 'NbrVotingMembersGoverningBody']\n", "cols = cols + ['VotingMembersGoverningBodyCnt', 'NbrVotingMembersGoverningBody',\n", " 'VotingMembersIndependentCnt', 'NbrIndependentVotingMembers']\n", "print cols" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Outsourced Management" ] }, { "cell_type": "code", "execution_count": 243, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['EIN', 'FYE', 'VotingMembersGoverningBodyCnt', 'NbrVotingMembersGoverningBody', 'VotingMembersIndependentCnt', 'NbrIndependentVotingMembers', 'DelegationOfMgmtDutiesInd', 'DelegationOfManagementDuties']\n" ] } ], "source": [ "cols = cols + ['DelegationOfMgmtDutiesInd', 'DelegationOfManagementDuties']\n", "print cols" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### IRS 990 Review" ] }, { "cell_type": "code", "execution_count": 244, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['EIN', 'FYE', 'VotingMembersGoverningBodyCnt', 'NbrVotingMembersGoverningBody', 'VotingMembersIndependentCnt', 'NbrIndependentVotingMembers', 'DelegationOfMgmtDutiesInd', 'DelegationOfManagementDuties', 'Form990ProvidedToGvrnBodyInd', 'Form990ProvidedToGoverningBody']\n" ] } ], "source": [ "cols = cols + ['Form990ProvidedToGvrnBodyInd', 'Form990ProvidedToGoverningBody']\n", "print cols " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Audit Committee" ] }, { "cell_type": "code", "execution_count": 245, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['EIN', 'FYE', 'VotingMembersGoverningBodyCnt', 'NbrVotingMembersGoverningBody', 'VotingMembersIndependentCnt', 'NbrIndependentVotingMembers', 'DelegationOfMgmtDutiesInd', 'DelegationOfManagementDuties', 'Form990ProvidedToGvrnBodyInd', 'Form990ProvidedToGoverningBody', 'AuditCommitteeInd', 'AuditCommittee']\n" ] } ], "source": [ "cols = cols + ['AuditCommitteeInd', 'AuditCommittee']\n", "print cols " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Donor Restrictions" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "23690097" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "7351259+4433997+11904841" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
What we see here is that *TemporarilyRstrNetAssetsGrp* + *PermanentlyRstrNetAssetsGrp* + *UnrestrictedNetAssetsGrp* = *TotalNetAssetsFundBalanceGrp* = *NetAssetsOrFundBalancesEOYAmt*\n", "\n", "- Given that *TotalNetAssetsFundBalanceGrp* is a 'group' variable, I should gather EOY values from the first two variables, take the sum, and then divide by *NetAssetsOrFundBalancesEOYAmt* in order to get a *DONOR RESTRICTIONS* VARIABLE SIMILAR TO YETMAN & YETMAN" ] }, { "cell_type": "code", "execution_count": 53, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EINFYETemporarilyRstrNetAssetsGrpTemporarilyRestrictedNetAssetsPermanentlyRstrNetAssetsGrpPermanentlyRestrictedNetAssetsUnrestrictedNetAssetsGrpUnrestrictedNetAssetsTotalNetAssetsFundBalanceGrpTotalNetAssetsFundBalancesNetAssetsOrFundBalancesBOYAmtNetAssetsOrFundBalancesBOYNetAssetsOrFundBalancesEOYAmtNetAssetsOrFundBalancesEOY
18819010202467FY2014{u'BOYAmt': u'5880227', u'EOYAmt': u'7351259'}NaN{u'BOYAmt': u'4193257', u'EOYAmt': u'4433997'}NaN{u'BOYAmt': u'11584416', u'EOYAmt': u'11904841'}NaN{u'BOYAmt': u'21657900', u'EOYAmt': u'23690097'}NaN21657900NaN23690097NaN
7965010202467FY2013{u'BOYAmt': u'5773597', u'EOYAmt': u'5880227'}NaN{u'BOYAmt': u'4153627', u'EOYAmt': u'4193257'}NaN{u'BOYAmt': u'12203695', u'EOYAmt': u'11584416'}NaN{u'BOYAmt': u'22130919', u'EOYAmt': u'21657900'}NaN22130919NaN21657900NaN
8989010202467FY2012NaN{u'BOY': u'6554891', u'EOY': u'5773597'}NaN{u'BOY': u'3096087', u'EOY': u'4153627'}NaN{u'BOY': u'10835720', u'EOY': u'12203695'}NaN{u'BOY': u'20486698', u'EOY': u'22130919'}NaN20486698NaN22130919
22904010202467FY2011NaN{u'BOY': u'7110362', u'EOY': u'6554891'}NaN{u'BOY': u'2873816', u'EOY': u'3096087'}NaN{u'BOY': u'7951722', u'EOY': u'10835720'}NaN{u'BOY': u'17935900', u'EOY': u'20486698'}NaN17935900NaN20486698
19649010202467FY2010NaN{u'BOY': u'7291747', u'EOY': u'7110362'}NaN{u'BOY': u'2824416', u'EOY': u'2873816'}NaN{u'BOY': u'7878008', u'EOY': u'7951722'}NaN{u'BOY': u'17994171', u'EOY': u'17935900'}NaN17994171NaN17935900
28581010211478FY2015{u'BOYAmt': u'889233', u'EOYAmt': u'908556'}NaN{u'BOYAmt': u'2717476', u'EOYAmt': u'2717000'}NaN{u'BOYAmt': u'1254972', u'EOYAmt': u'1179056'}NaN{u'BOYAmt': u'4861681', u'EOYAmt': u'4804612'}NaN4861681NaN4804612NaN
20683010211478FY2014{u'BOYAmt': u'614778', u'EOYAmt': u'889233'}NaN{u'BOYAmt': u'2669412', u'EOYAmt': u'2717476'}NaN{u'BOYAmt': u'1291239', u'EOYAmt': u'1254972'}NaN{u'BOYAmt': u'4575429', u'EOYAmt': u'4861681'}NaN4575429NaN4861681NaN
4797010211478FY2013NaN{u'BOY': u'563731', u'EOY': u'614778'}NaN{u'BOY': u'2652247', u'EOY': u'2669412'}NaN{u'BOY': u'1362377', u'EOY': u'1291239'}NaN{u'BOY': u'4578355', u'EOY': u'4575429'}NaN4578355NaN4575429
15877010211478FY2012NaN{u'BOY': u'511917', u'EOY': u'563731'}NaN{u'BOY': u'2662228', u'EOY': u'2652247'}NaN{u'BOY': u'1383839', u'EOY': u'1362377'}NaN{u'BOY': u'4557984', u'EOY': u'4578355'}NaN4557984NaN4578355
18981010211478FY2011NaN{u'BOY': u'586298', u'EOY': u'511917'}NaN{u'BOY': u'406957', u'EOY': u'2662228'}NaN{u'BOY': u'928373', u'EOY': u'1383839'}NaN{u'BOY': u'1921628', u'EOY': u'4557984'}NaN1921628NaN4557984
\n", "
" ], "text/plain": [ " EIN FYE TemporarilyRstrNetAssetsGrp \\\n", "18819 010202467 FY2014 {u'BOYAmt': u'5880227', u'EOYAmt': u'7351259'} \n", "7965 010202467 FY2013 {u'BOYAmt': u'5773597', u'EOYAmt': u'5880227'} \n", "8989 010202467 FY2012 NaN \n", "22904 010202467 FY2011 NaN \n", "19649 010202467 FY2010 NaN \n", "28581 010211478 FY2015 {u'BOYAmt': u'889233', u'EOYAmt': u'908556'} \n", "20683 010211478 FY2014 {u'BOYAmt': u'614778', u'EOYAmt': u'889233'} \n", "4797 010211478 FY2013 NaN \n", "15877 010211478 FY2012 NaN \n", "18981 010211478 FY2011 NaN \n", "\n", " TemporarilyRestrictedNetAssets \\\n", "18819 NaN \n", "7965 NaN \n", "8989 {u'BOY': u'6554891', u'EOY': u'5773597'} \n", "22904 {u'BOY': u'7110362', u'EOY': u'6554891'} \n", "19649 {u'BOY': u'7291747', u'EOY': u'7110362'} \n", "28581 NaN \n", "20683 NaN \n", "4797 {u'BOY': u'563731', u'EOY': u'614778'} \n", "15877 {u'BOY': u'511917', u'EOY': u'563731'} \n", "18981 {u'BOY': u'586298', u'EOY': u'511917'} \n", "\n", " PermanentlyRstrNetAssetsGrp \\\n", "18819 {u'BOYAmt': u'4193257', u'EOYAmt': u'4433997'} \n", "7965 {u'BOYAmt': u'4153627', u'EOYAmt': u'4193257'} \n", "8989 NaN \n", "22904 NaN \n", "19649 NaN \n", "28581 {u'BOYAmt': u'2717476', u'EOYAmt': u'2717000'} \n", "20683 {u'BOYAmt': u'2669412', u'EOYAmt': u'2717476'} \n", "4797 NaN \n", "15877 NaN \n", "18981 NaN \n", "\n", " PermanentlyRestrictedNetAssets \\\n", "18819 NaN \n", "7965 NaN \n", "8989 {u'BOY': u'3096087', u'EOY': u'4153627'} \n", "22904 {u'BOY': u'2873816', u'EOY': u'3096087'} \n", "19649 {u'BOY': u'2824416', u'EOY': u'2873816'} \n", "28581 NaN \n", "20683 NaN \n", "4797 {u'BOY': u'2652247', u'EOY': u'2669412'} \n", "15877 {u'BOY': u'2662228', u'EOY': u'2652247'} \n", "18981 {u'BOY': u'406957', u'EOY': u'2662228'} \n", "\n", " UnrestrictedNetAssetsGrp \\\n", "18819 {u'BOYAmt': u'11584416', u'EOYAmt': u'11904841'} \n", "7965 {u'BOYAmt': u'12203695', u'EOYAmt': u'11584416'} \n", "8989 NaN \n", "22904 NaN \n", "19649 NaN \n", "28581 {u'BOYAmt': u'1254972', u'EOYAmt': u'1179056'} \n", "20683 {u'BOYAmt': u'1291239', u'EOYAmt': u'1254972'} \n", "4797 NaN \n", "15877 NaN \n", "18981 NaN \n", "\n", " UnrestrictedNetAssets \\\n", "18819 NaN \n", "7965 NaN \n", "8989 {u'BOY': u'10835720', u'EOY': u'12203695'} \n", "22904 {u'BOY': u'7951722', u'EOY': u'10835720'} \n", "19649 {u'BOY': u'7878008', u'EOY': u'7951722'} \n", "28581 NaN \n", "20683 NaN \n", "4797 {u'BOY': u'1362377', u'EOY': u'1291239'} \n", "15877 {u'BOY': u'1383839', u'EOY': u'1362377'} \n", "18981 {u'BOY': u'928373', u'EOY': u'1383839'} \n", "\n", " TotalNetAssetsFundBalanceGrp \\\n", "18819 {u'BOYAmt': u'21657900', u'EOYAmt': u'23690097'} \n", "7965 {u'BOYAmt': u'22130919', u'EOYAmt': u'21657900'} \n", "8989 NaN \n", "22904 NaN \n", "19649 NaN \n", "28581 {u'BOYAmt': u'4861681', u'EOYAmt': u'4804612'} \n", "20683 {u'BOYAmt': u'4575429', u'EOYAmt': u'4861681'} \n", "4797 NaN \n", "15877 NaN \n", "18981 NaN \n", "\n", " TotalNetAssetsFundBalances \\\n", "18819 NaN \n", "7965 NaN \n", "8989 {u'BOY': u'20486698', u'EOY': u'22130919'} \n", "22904 {u'BOY': u'17935900', u'EOY': u'20486698'} \n", "19649 {u'BOY': u'17994171', u'EOY': u'17935900'} \n", "28581 NaN \n", "20683 NaN \n", "4797 {u'BOY': u'4578355', u'EOY': u'4575429'} \n", "15877 {u'BOY': u'4557984', u'EOY': u'4578355'} \n", "18981 {u'BOY': u'1921628', u'EOY': u'4557984'} \n", "\n", " NetAssetsOrFundBalancesBOYAmt NetAssetsOrFundBalancesBOY \\\n", "18819 21657900 NaN \n", "7965 22130919 NaN \n", "8989 NaN 20486698 \n", "22904 NaN 17935900 \n", "19649 NaN 17994171 \n", "28581 4861681 NaN \n", "20683 4575429 NaN \n", "4797 NaN 4578355 \n", "15877 NaN 4557984 \n", "18981 NaN 1921628 \n", "\n", " NetAssetsOrFundBalancesEOYAmt NetAssetsOrFundBalancesEOY \n", "18819 23690097 NaN \n", "7965 21657900 NaN \n", "8989 NaN 22130919 \n", "22904 NaN 20486698 \n", "19649 NaN 17935900 \n", "28581 4804612 NaN \n", "20683 4861681 NaN \n", "4797 NaN 4575429 \n", "15877 NaN 4578355 \n", "18981 NaN 4557984 " ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "asset_cols = ['TemporarilyRstrNetAssetsGrp', 'TemporarilyRestrictedNetAssets',\n", " 'PermanentlyRstrNetAssetsGrp', 'PermanentlyRestrictedNetAssets',\n", " 'UnrestrictedNetAssetsGrp', 'UnrestrictedNetAssets', \n", " 'TotalNetAssetsFundBalanceGrp', 'TotalNetAssetsFundBalances', \n", " 'NetAssetsOrFundBalancesBOYAmt', 'NetAssetsOrFundBalancesBOY', \n", " 'NetAssetsOrFundBalancesEOYAmt', 'NetAssetsOrFundBalancesEOY']\n", "df_all[['EIN', 'FYE']+asset_cols][:10]" ] }, { "cell_type": "code", "execution_count": 62, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['EIN', 'FYE', 'VotingMembersGoverningBodyCnt', 'NbrVotingMembersGoverningBody', 'VotingMembersIndependentCnt', 'NbrIndependentVotingMembers', 'DelegationOfMgmtDutiesInd', 'DelegationOfManagementDuties', 'Form990ProvidedToGvrnBodyInd', 'Form990ProvidedToGoverningBody', 'AuditCommitteeInd', 'AuditCommittee', 'TemporarilyRstrNetAssetsGrp', 'TemporarilyRestrictedNetAssets', 'PermanentlyRstrNetAssetsGrp', 'PermanentlyRestrictedNetAssets', 'NetAssetsOrFundBalancesEOYAmt', 'NetAssetsOrFundBalancesEOY']\n" ] } ], "source": [ "#cols = cols + ['TemporarilyRstrNetAssetsGrp', 'TemporarilyRestrictedNetAssets',\n", " 'PermanentlyRstrNetAssetsGrp', 'PermanentlyRestrictedNetAssets',\n", " #'UnrestrictedNetAssetsGrp', 'UnrestrictedNetAssets', \n", " #'TotalNetAssetsFundBalanceGrp', 'TotalNetAssetsFundBalances', \n", " #'NetAssetsOrFundBalancesBOYAmt', 'NetAssetsOrFundBalancesBOY', \n", " 'NetAssetsOrFundBalancesEOYAmt', 'NetAssetsOrFundBalancesEOY']\n", "#print cols" ] }, { "cell_type": "code", "execution_count": 247, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[u'UponRequest', u'UponRequestInd', u'VotingMembersGoverningBodyCnt', u'VotingMembersIndependentCnt', u'WebSite', u'WebsiteAddressTxt', u'WhistleblowerPolicy', u'WhistleblowerPolicyInd', u'WrittenPolicyOrProcedure', u'WrittenPolicyOrProcedureInd', u'YearFormation', u'_id', 'FeesForServicesAccountingGrp_v2', 'FYE', 'FeesForServicesAccounting_TotalAmt', 'FeesForServicesAccounting_binary', 'perm_rest_assets', 'temp_rest_assets', 'net_assets', 'donor_restrictions']\n" ] } ], "source": [ "print df_all.columns.tolist()[-20:]" ] }, { "cell_type": "code", "execution_count": 249, "metadata": { "collapsed": true }, "outputs": [], "source": [ "cols = cols + ['perm_rest_assets', 'temp_rest_assets', 'net_assets', 'donor_restrictions']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Tax-Exempt Bonds (municipal bonds)" ] }, { "cell_type": "code", "execution_count": 250, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['EIN', 'FYE', 'VotingMembersGoverningBodyCnt', 'NbrVotingMembersGoverningBody', 'VotingMembersIndependentCnt', 'NbrIndependentVotingMembers', 'DelegationOfMgmtDutiesInd', 'DelegationOfManagementDuties', 'Form990ProvidedToGvrnBodyInd', 'Form990ProvidedToGoverningBody', 'AuditCommitteeInd', 'AuditCommittee', 'perm_rest_assets', 'temp_rest_assets', 'net_assets', 'donor_restrictions', 'TaxExemptBondsInd', 'TaxExemptBonds']\n" ] } ], "source": [ "cols = cols + ['TaxExemptBondsInd', 'TaxExemptBonds']\n", "print cols" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Taxable Revenues" ] }, { "cell_type": "code", "execution_count": 251, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['EIN', 'FYE', 'VotingMembersGoverningBodyCnt', 'NbrVotingMembersGoverningBody', 'VotingMembersIndependentCnt', 'NbrIndependentVotingMembers', 'DelegationOfMgmtDutiesInd', 'DelegationOfManagementDuties', 'Form990ProvidedToGvrnBodyInd', 'Form990ProvidedToGoverningBody', 'AuditCommitteeInd', 'AuditCommittee', 'perm_rest_assets', 'temp_rest_assets', 'net_assets', 'donor_restrictions', 'TaxExemptBondsInd', 'TaxExemptBonds', 'NetUnrelatedBusTxblIncmAmt', 'NetUnrelatedBusinessTxblIncome']\n" ] } ], "source": [ "cols = cols + ['NetUnrelatedBusTxblIncmAmt', 'NetUnrelatedBusinessTxblIncome']\n", "print cols" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Audited Financials and Federal Audits" ] }, { "cell_type": "code", "execution_count": 69, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{u'#text': u'true', u'@referenceDocumentId': u'BinaryAttach BinaryAttachN1 BinaryAttachN2'} 2\n", "{u'#text': u'true', u'@referenceDocumentId': u'BinaryAttach BinaryAttachN1'} 2\n", "{u'#text': u'true', u'@referenceDocumentId': u'990:103'} 2\n", "{u'#text': u'true', u'@referenceDocumentId': u'RetDoc3'} 2\n", "1 1\n", "{u'#text': u'1', u'@referenceDocumentId': u'RetDoc2317200001'} 1\n", "Name: AuditedFinancialStmtAttInd, dtype: int64" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#df_all['AuditedFinancialStmtAttInd'].value_counts()" ] }, { "cell_type": "code", "execution_count": 76, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EINFYEFSAuditedIndFSAuditedFederalGrantAuditPerformedIndFederalGrantAuditPerformedFederalGrantAuditRequiredIndFederalGrantAuditRequiredIndependentAuditFinclStmtIndIndependentAuditFinancialStmt
18819010202467FY20141NaN1NaN1NaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}NaN
7965010202467FY20131NaN1NaN1NaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}NaN
8989010202467FY2012NaN1NaN1NaN1NaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}
22904010202467FY2011NaN1NaN1NaN1NaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}
19649010202467FY2010NaN1NaN1NaN1NaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1039900001'}
28581010211478FY2015trueNaNNaNNaNfalseNaN{u'#text': u'true', u'@referenceDocumentId': u'RetDoc3'}NaN
20683010211478FY2014trueNaNNaNNaNfalseNaN{u'#text': u'true', u'@referenceDocumentId': u'RetDoc4'}NaN
4797010211478FY2013NaNtrueNaNNaNNaNfalseNaN{u'#text': u'true', u'@referenceDocumentId': u'RetDoc3'}
\n", "
" ], "text/plain": [ " EIN FYE FSAuditedInd FSAudited FederalGrantAuditPerformedInd \\\n", "18819 010202467 FY2014 1 NaN 1 \n", "7965 010202467 FY2013 1 NaN 1 \n", "8989 010202467 FY2012 NaN 1 NaN \n", "22904 010202467 FY2011 NaN 1 NaN \n", "19649 010202467 FY2010 NaN 1 NaN \n", "28581 010211478 FY2015 true NaN NaN \n", "20683 010211478 FY2014 true NaN NaN \n", "4797 010211478 FY2013 NaN true NaN \n", "\n", " FederalGrantAuditPerformed FederalGrantAuditRequiredInd \\\n", "18819 NaN 1 \n", "7965 NaN 1 \n", "8989 1 NaN \n", "22904 1 NaN \n", "19649 1 NaN \n", "28581 NaN false \n", "20683 NaN false \n", "4797 NaN NaN \n", "\n", " FederalGrantAuditRequired \\\n", "18819 NaN \n", "7965 NaN \n", "8989 1 \n", "22904 1 \n", "19649 1 \n", "28581 NaN \n", "20683 NaN \n", "4797 false \n", "\n", " IndependentAuditFinclStmtInd \\\n", "18819 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "7965 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "8989 NaN \n", "22904 NaN \n", "19649 NaN \n", "28581 {u'#text': u'true', u'@referenceDocumentId': u'RetDoc3'} \n", "20683 {u'#text': u'true', u'@referenceDocumentId': u'RetDoc4'} \n", "4797 NaN \n", "\n", " IndependentAuditFinancialStmt \n", "18819 NaN \n", "7965 NaN \n", "8989 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "22904 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "19649 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1039900001'} \n", "28581 NaN \n", "20683 NaN \n", "4797 {u'#text': u'true', u'@referenceDocumentId': u'RetDoc3'} " ] }, "execution_count": 76, "metadata": {}, "output_type": "execute_result" } ], "source": [ "audit_cols = [#'AuditedFinancialStmtAttInd', \n", " #'ConsolidatedAuditFinclStmtInd', 'ConsolidatedAuditFinancialStmt', \n", " 'FSAuditedInd', 'FSAudited', #'FSAuditedBasisGrp', 'FSAuditedBasis',\n", " 'FederalGrantAuditPerformedInd', 'FederalGrantAuditPerformed', \n", " 'FederalGrantAuditRequiredInd', 'FederalGrantAuditRequired', \n", " 'IndependentAuditFinclStmtInd', 'IndependentAuditFinancialStmt', ]\n", "df_all[['EIN', 'FYE']+ audit_cols][:8]" ] }, { "cell_type": "code", "execution_count": 252, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['EIN', 'FYE', 'VotingMembersGoverningBodyCnt', 'NbrVotingMembersGoverningBody', 'VotingMembersIndependentCnt', 'NbrIndependentVotingMembers', 'DelegationOfMgmtDutiesInd', 'DelegationOfManagementDuties', 'Form990ProvidedToGvrnBodyInd', 'Form990ProvidedToGoverningBody', 'AuditCommitteeInd', 'AuditCommittee', 'perm_rest_assets', 'temp_rest_assets', 'net_assets', 'donor_restrictions', 'TaxExemptBondsInd', 'TaxExemptBonds', 'NetUnrelatedBusTxblIncmAmt', 'NetUnrelatedBusinessTxblIncome', 'FSAuditedInd', 'FSAudited', 'FederalGrantAuditPerformedInd', 'FederalGrantAuditPerformed', 'FederalGrantAuditRequiredInd', 'FederalGrantAuditRequired', 'IndependentAuditFinclStmtInd', 'IndependentAuditFinancialStmt']\n" ] } ], "source": [ "cols = cols + ['FSAuditedInd', 'FSAudited', \n", " 'FederalGrantAuditPerformedInd', 'FederalGrantAuditPerformed', \n", " 'FederalGrantAuditRequiredInd', 'FederalGrantAuditRequired', \n", " 'IndependentAuditFinclStmtInd', 'IndependentAuditFinancialStmt', ]\n", "print cols" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Create *Donor Restrictions* variable as in Yetman & Yetman (2012)\n", "I NEED TO DO IT *BEFORE* COLLAPSING -- OTHERWISE IT WILL CAUSE PROBLEMS FOR AN EIN THAT HAS NO PERMANENTLY RESTRICTED ASSETS IN ONE YEAR, SAY, BUT IT DOES THE NEXT -- THE GROUPBY FUNCTION WILL BRING THOSE INTO THE SAME ROW." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Donor Restrictions -- Part (a) - Permanently Restricted Assets" ] }, { "cell_type": "code", "execution_count": 199, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('Index: ', 26647, 'Perm. Restr Assets:', 0) \n", "\n", "# of minutes: 1.63736418486 \n", "\n", "\n", "29440\n" ] } ], "source": [ "from IPython.display import display, clear_output ##### FOR USE WITH STDOUT (DYNAMIC, SINGLE-LINE PRINTING)\n", "\n", "import timeit\n", "start_time = timeit.default_timer()\n", "\n", "for index, row in df_all[:].iterrows():\n", " perm_rest = 0\n", " if pd.notnull(row['PermanentlyRstrNetAssetsGrp']):\n", " #print 'not null!'\n", " if 'EOYAmt' in row['PermanentlyRstrNetAssetsGrp']:\n", " perm_rest = row['PermanentlyRstrNetAssetsGrp']['EOYAmt']\n", " elif 'BOYAmt' in row['PermanentlyRstrNetAssetsGrp']:\n", " perm_rest = row['PermanentlyRstrNetAssetsGrp']['BOYAmt'] \n", " elif pd.notnull(row['PermanentlyRestrictedNetAssets']):\n", " print 'going to second variable'\n", " if 'EOY' in row['PermanentlyRestrictedNetAssets']:\n", " perm_rest = row['PermanentlyRestrictedNetAssets']['EOY']\n", " elif 'BOY' in row['PermanentlyRestrictedNetAssets']:\n", " perm_rest = row['PermanentlyRestrictedNetAssets']['BOY']\n", " \n", " df_all.ix[index, 'perm_rest_assets'] = perm_rest\n", " \n", " clear_output()\n", " print ('Index: ', index, 'Perm. Restr Assets:', perm_rest), '\\n'\n", " sys.stdout.flush() \n", " \n", " \n", "elapsed = timeit.default_timer() - start_time\n", "print '# of minutes: ', elapsed/60, '\\n', '\\n' \n", "print df_all['perm_rest_assets'].value_counts().sum()" ] }, { "cell_type": "code", "execution_count": 200, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "29440\n", "0\n", "count 29440\n", "unique 10252\n", "top 0\n", "freq 13659\n", "Name: perm_rest_assets, dtype: int64 \n", "\n", "count 2.944000e+04\n", "mean 5.853230e+06\n", "std 3.626274e+07\n", "min 0.000000e+00\n", "25% 0.000000e+00\n", "50% 2.500000e+03\n", "75% 1.215208e+06\n", "max 1.097021e+09\n", "Name: perm_rest_assets, dtype: float64\n" ] } ], "source": [ "print len(df_all[df_all['perm_rest_assets'].notnull()])\n", "print len(df_all[df_all['perm_rest_assets'].isnull()])\n", "print df_all['perm_rest_assets'].describe(), '\\n'\n", "df_all['perm_rest_assets'] = df_all['perm_rest_assets'].astype('int')\n", "print df_all['perm_rest_assets'].describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Donor Restrictions -- Part (b) - Temporarily Restricted Assets" ] }, { "cell_type": "code", "execution_count": 201, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('Index: ', 26647, 'Temp. Restr Assets:', u'96956') \n", "\n", "# of minutes: 1.76544401646 \n", "\n", "\n", "29440\n" ] } ], "source": [ "from IPython.display import display, clear_output ##### FOR USE WITH STDOUT (DYNAMIC, SINGLE-LINE PRINTING)\n", "\n", "import timeit\n", "start_time = timeit.default_timer()\n", "\n", "for index, row in df_all[:].iterrows():\n", " temp_rest = 0\n", " if pd.notnull(row['TemporarilyRstrNetAssetsGrp']):\n", " #print 'not null!'\n", " if 'EOYAmt' in row['TemporarilyRstrNetAssetsGrp']:\n", " temp_rest = row['TemporarilyRstrNetAssetsGrp']['EOYAmt']\n", " elif 'BOYAmt' in row['TemporarilyRstrNetAssetsGrp']:\n", " temp_rest = row['TemporarilyRstrNetAssetsGrp']['BOYAmt'] \n", " elif pd.notnull(row['TemporarilyRestrictedNetAssets']):\n", " print 'going to second'\n", " if 'EOY' in row['TemporarilyRestrictedNetAssets']:\n", " temp_rest = row['TemporarilyRestrictedNetAssets']['EOY']\n", " elif 'BOY' in row['TemporarilyRestrictedNetAssets']:\n", " temp_rest = row['TemporarilyRestrictedNetAssets']['BOY']\n", " \n", " df_all.ix[index, 'temp_rest_assets'] = temp_rest\n", " \n", " clear_output()\n", " print ('Index: ', index, 'Temp. Restr Assets:', temp_rest), '\\n'\n", " sys.stdout.flush() \n", " \n", " \n", "elapsed = timeit.default_timer() - start_time\n", "print '# of minutes: ', elapsed/60, '\\n', '\\n' \n", "print df_all['temp_rest_assets'].value_counts().sum()" ] }, { "cell_type": "code", "execution_count": 202, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "29440\n", "0\n", "count 29440\n", "unique 24978\n", "top 0\n", "freq 2849\n", "Name: temp_rest_assets, dtype: int64 \n", "\n", "count 2.944000e+04\n", "mean 7.365323e+06\n", "std 3.905838e+07\n", "min -2.158587e+06\n", "25% 1.274518e+05\n", "50% 6.781395e+05\n", "75% 2.880510e+06\n", "max 1.473751e+09\n", "Name: temp_rest_assets, dtype: float64\n" ] } ], "source": [ "print len(df_all[df_all['temp_rest_assets'].notnull()])\n", "print len(df_all[df_all['temp_rest_assets'].isnull()])\n", "print df_all['temp_rest_assets'].describe(), '\\n'\n", "df_all['temp_rest_assets'] = df_all['temp_rest_assets'].astype('int')\n", "print df_all['temp_rest_assets'].describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Donor Restrictions -- Part (c) - Total Assets" ] }, { "cell_type": "code", "execution_count": 203, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "count 29440\n", "unique 28932\n", "top 0\n", "freq 41\n", "Name: net_assets, dtype: object \n", "\n", "count 2.944000e+04\n", "mean 3.175411e+07\n", "std 1.712160e+08\n", "min -1.399548e+08\n", "25% 1.734802e+06\n", "50% 5.309920e+06\n", "75% 1.683359e+07\n", "max 8.116399e+09\n", "Name: net_assets, dtype: float64\n" ] } ], "source": [ "df_all['net_assets'] = df_all['NetAssetsOrFundBalancesEOYAmt']\n", "df_all['net_assets'] = np.where(df_all['net_assets'].isnull(),\n", " df_all['NetAssetsOrFundBalancesEOY'], df_all['net_assets'])\n", "print df_all['net_assets'].describe(), '\\n'\n", "df_all['net_assets'] = df_all['net_assets'].astype('int')\n", "print df_all['net_assets'].describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Donor Restrictions -- Part (d) - Calculations" ] }, { "cell_type": "code", "execution_count": 204, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "count 2.940000e+04\n", "mean inf\n", "std NaN\n", "min -1.564426e+02\n", "25% 6.059707e-02\n", "50% 2.498250e-01\n", "75% 5.590346e-01\n", "max inf\n", "Name: donor_restrictions, dtype: float64" ] }, "execution_count": 204, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_all['donor_restrictions'] = (df_all['temp_rest_assets']+df_all['perm_rest_assets'])/df_all['net_assets']\n", "df_all['donor_restrictions'].describe()" ] }, { "cell_type": "code", "execution_count": 205, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "FeesForServicesAccounting_binary int64\n", "perm_rest_assets int64\n", "temp_rest_assets int64\n", "net_assets int64\n", "donor_restrictions float64\n", "dtype: object" ] }, "execution_count": 205, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_all.dtypes[-5:]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
THERE ARE 40 MISSING CASES -- ASSIGN THEM A VALUE OF ZERO" ] }, { "cell_type": "code", "execution_count": 207, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "40" ] }, "execution_count": 207, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print len(df_all[df_all['donor_restrictions'].isnull()])" ] }, { "cell_type": "code", "execution_count": 208, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "count 2.944000e+04\n", "mean inf\n", "std NaN\n", "min -1.564426e+02\n", "25% 5.999956e-02\n", "50% 2.490086e-01\n", "75% 5.585148e-01\n", "max inf\n", "Name: donor_restrictions, dtype: float64" ] }, "execution_count": 208, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_all['donor_restrictions'] = np.where(df_all['donor_restrictions'].isnull(), 0, df_all['donor_restrictions'])\n", "df_all['donor_restrictions'].describe()" ] }, { "cell_type": "code", "execution_count": 209, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n" ] } ], "source": [ "print len(df_all[df_all['donor_restrictions'].isnull()])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
CHECK A FEW >1 ROWS" ] }, { "cell_type": "code", "execution_count": 221, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FYEdonor_restrictionstemp_rest_assetsTemporarilyRstrNetAssetsGrpTemporarilyRestrictedNetAssetsperm_rest_assetsPermanentlyRstrNetAssetsGrpPermanentlyRestrictedNetAssetsnet_assetsNetAssetsOrFundBalancesEOYAmtNetAssetsOrFundBalancesEOYTotalNetAssetsFundBalanceGrpTotalNetAssetsFundBalancesNetAssetsOrFundBalancesEOYAmtNetAssetsOrFundBalancesEOY
13277FY20110.01188645000NaN{u'BOY': u'73257', u'EOY': u'45000'}0NaNNaN3785879NaN3785879NaN{u'BOY': u'3228716', u'EOY': u'3785879'}NaN3785879
616FY20140.14315352310{u'BOYAmt': u'91351', u'EOYAmt': u'52310'}NaN284363{u'BOYAmt': u'207970', u'EOYAmt': u'284363'}NaN23518422351842NaN{u'BOYAmt': u'2321970', u'EOYAmt': u'2351842'}NaN2351842NaN
8573FY20130.12890891351{u'BOYAmt': u'467396', u'EOYAmt': u'91351'}NaN207970{u'BOYAmt': u'100000', u'EOYAmt': u'207970'}NaN23219702321970NaN{u'BOYAmt': u'2441674', u'EOYAmt': u'2321970'}NaN2321970NaN
23577FY20120.232380467396NaN{u'BOY': u'171880', u'EOY': u'467396'}100000NaN{u'BOY': u'0', u'EOY': u'100000'}2441674NaN2441674NaN{u'BOY': u'1953372', u'EOY': u'2441674'}NaN2441674
16294FY20110.087991171880NaN{u'BOY': u'96956', u'EOY': u'171880'}0NaNNaN1953372NaN1953372NaN{u'BOY': u'1720152', u'EOY': u'1953372'}NaN1953372
26647FY20100.05636596956NaN{u'BOY': u'79000', u'EOY': u'96956'}0NaNNaN1720152NaN1720152NaN{u'BOY': u'1413351', u'EOY': u'1720152'}NaN1720152
\n", "
" ], "text/plain": [ " FYE donor_restrictions temp_rest_assets \\\n", "13277 FY2011 0.011886 45000 \n", "616 FY2014 0.143153 52310 \n", "8573 FY2013 0.128908 91351 \n", "23577 FY2012 0.232380 467396 \n", "16294 FY2011 0.087991 171880 \n", "26647 FY2010 0.056365 96956 \n", "\n", " TemporarilyRstrNetAssetsGrp \\\n", "13277 NaN \n", "616 {u'BOYAmt': u'91351', u'EOYAmt': u'52310'} \n", "8573 {u'BOYAmt': u'467396', u'EOYAmt': u'91351'} \n", "23577 NaN \n", "16294 NaN \n", "26647 NaN \n", "\n", " TemporarilyRestrictedNetAssets perm_rest_assets \\\n", "13277 {u'BOY': u'73257', u'EOY': u'45000'} 0 \n", "616 NaN 284363 \n", "8573 NaN 207970 \n", "23577 {u'BOY': u'171880', u'EOY': u'467396'} 100000 \n", "16294 {u'BOY': u'96956', u'EOY': u'171880'} 0 \n", "26647 {u'BOY': u'79000', u'EOY': u'96956'} 0 \n", "\n", " PermanentlyRstrNetAssetsGrp \\\n", "13277 NaN \n", "616 {u'BOYAmt': u'207970', u'EOYAmt': u'284363'} \n", "8573 {u'BOYAmt': u'100000', u'EOYAmt': u'207970'} \n", "23577 NaN \n", "16294 NaN \n", "26647 NaN \n", "\n", " PermanentlyRestrictedNetAssets net_assets \\\n", "13277 NaN 3785879 \n", "616 NaN 2351842 \n", "8573 NaN 2321970 \n", "23577 {u'BOY': u'0', u'EOY': u'100000'} 2441674 \n", "16294 NaN 1953372 \n", "26647 NaN 1720152 \n", "\n", " NetAssetsOrFundBalancesEOYAmt NetAssetsOrFundBalancesEOY \\\n", "13277 NaN 3785879 \n", "616 2351842 NaN \n", "8573 2321970 NaN \n", "23577 NaN 2441674 \n", "16294 NaN 1953372 \n", "26647 NaN 1720152 \n", "\n", " TotalNetAssetsFundBalanceGrp \\\n", "13277 NaN \n", "616 {u'BOYAmt': u'2321970', u'EOYAmt': u'2351842'} \n", "8573 {u'BOYAmt': u'2441674', u'EOYAmt': u'2321970'} \n", "23577 NaN \n", "16294 NaN \n", "26647 NaN \n", "\n", " TotalNetAssetsFundBalances NetAssetsOrFundBalancesEOYAmt \\\n", "13277 {u'BOY': u'3228716', u'EOY': u'3785879'} NaN \n", "616 NaN 2351842 \n", "8573 NaN 2321970 \n", "23577 {u'BOY': u'1953372', u'EOY': u'2441674'} NaN \n", "16294 {u'BOY': u'1720152', u'EOY': u'1953372'} NaN \n", "26647 {u'BOY': u'1413351', u'EOY': u'1720152'} NaN \n", "\n", " NetAssetsOrFundBalancesEOY \n", "13277 3785879 \n", "616 NaN \n", "8573 NaN \n", "23577 2441674 \n", "16294 1953372 \n", "26647 1720152 " ] }, "execution_count": 221, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_all[['FYE', 'donor_restrictions', 'temp_rest_assets', 'TemporarilyRstrNetAssetsGrp', 'TemporarilyRestrictedNetAssets',\n", " 'perm_rest_assets', 'PermanentlyRstrNetAssetsGrp', 'PermanentlyRestrictedNetAssets',\n", " 'net_assets', 'NetAssetsOrFundBalancesEOYAmt', 'NetAssetsOrFundBalancesEOY',\n", " #'UnrestrictedNetAssetsGrp', 'UnrestrictedNetAssets', \n", " 'TotalNetAssetsFundBalanceGrp', 'TotalNetAssetsFundBalances', \n", " #'NetAssetsOrFundBalancesBOYAmt', 'NetAssetsOrFundBalancesBOY', \n", " 'NetAssetsOrFundBalancesEOYAmt', 'NetAssetsOrFundBalancesEOY']][-6:]" ] }, { "cell_type": "code", "execution_count": 211, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FYEdonor_restrictionstemp_rest_assetsperm_rest_assetsnet_assets
13277FY20110.0118864500003785879
616FY20140.143153523102843632351842
8573FY20130.128908913512079702321970
23577FY20120.2323804673961000002441674
16294FY20110.08799117188001953372
26647FY20100.0563659695601720152
\n", "
" ], "text/plain": [ " FYE donor_restrictions temp_rest_assets perm_rest_assets \\\n", "13277 FY2011 0.011886 45000 0 \n", "616 FY2014 0.143153 52310 284363 \n", "8573 FY2013 0.128908 91351 207970 \n", "23577 FY2012 0.232380 467396 100000 \n", "16294 FY2011 0.087991 171880 0 \n", "26647 FY2010 0.056365 96956 0 \n", "\n", " net_assets \n", "13277 3785879 \n", "616 2351842 \n", "8573 2321970 \n", "23577 2441674 \n", "16294 1953372 \n", "26647 1720152 " ] }, "execution_count": 211, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_all[['FYE', 'donor_restrictions', 'temp_rest_assets', 'perm_rest_assets', 'net_assets']][-6:]" ] }, { "cell_type": "code", "execution_count": 224, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1230\n", "1117\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FYEdonor_restrictionstemp_rest_assetsTemporarilyRstrNetAssetsGrpTemporarilyRestrictedNetAssetsperm_rest_assetsPermanentlyRstrNetAssetsGrpPermanentlyRestrictedNetAssetsnet_assetsNetAssetsOrFundBalancesEOYAmtNetAssetsOrFundBalancesEOYTotalNetAssetsFundBalanceGrpTotalNetAssetsFundBalancesNetAssetsOrFundBalancesEOYAmtNetAssetsOrFundBalancesEOY
4768FY20121.029663261414NaN{u'BOY': u'589365', u'EOY': u'261414'}0NaNNaN253883NaN253883NaN{u'BOY': u'610579', u'EOY': u'253883'}NaN253883
3425FY20141.162178124675{u'BOYAmt': u'271888', u'EOYAmt': u'124675'}NaN0NaNNaN107277107277NaN{u'BOYAmt': u'440094', u'EOYAmt': u'107277'}NaN107277NaN
25435FY20141.426897312031{u'BOYAmt': u'94893', u'EOYAmt': u'312031'}NaN0NaNNaN218678218678NaN{u'BOYAmt': u'608360', u'EOYAmt': u'218678'}NaN218678NaN
1853FY20141.013958685532{u'BOYAmt': u'499406', u'EOYAmt': u'685532'}NaN0NaNNaN676095676095NaN{u'BOYAmt': u'426629', u'EOYAmt': u'676095'}NaN676095NaN
1872FY20141.0014052144621{u'BOYAmt': u'1579409', u'EOYAmt': u'2144621'}NaN6644457{u'BOYAmt': u'6501148', u'EOYAmt': u'6644457'}NaN87767508776750NaN{u'BOYAmt': u'8480604', u'EOYAmt': u'8776750'}NaN8776750NaN
1938FY20141.0014052144621{u'BOYAmt': u'1579409', u'EOYAmt': u'2144621'}NaN6644457{u'BOYAmt': u'6501148', u'EOYAmt': u'6644457'}NaN87767508776750NaN{u'BOYAmt': u'8480604', u'EOYAmt': u'8776750'}NaN8776750NaN
\n", "
" ], "text/plain": [ " FYE donor_restrictions temp_rest_assets \\\n", "4768 FY2012 1.029663 261414 \n", "3425 FY2014 1.162178 124675 \n", "25435 FY2014 1.426897 312031 \n", "1853 FY2014 1.013958 685532 \n", "1872 FY2014 1.001405 2144621 \n", "1938 FY2014 1.001405 2144621 \n", "\n", " TemporarilyRstrNetAssetsGrp \\\n", "4768 NaN \n", "3425 {u'BOYAmt': u'271888', u'EOYAmt': u'124675'} \n", "25435 {u'BOYAmt': u'94893', u'EOYAmt': u'312031'} \n", "1853 {u'BOYAmt': u'499406', u'EOYAmt': u'685532'} \n", "1872 {u'BOYAmt': u'1579409', u'EOYAmt': u'2144621'} \n", "1938 {u'BOYAmt': u'1579409', u'EOYAmt': u'2144621'} \n", "\n", " TemporarilyRestrictedNetAssets perm_rest_assets \\\n", "4768 {u'BOY': u'589365', u'EOY': u'261414'} 0 \n", "3425 NaN 0 \n", "25435 NaN 0 \n", "1853 NaN 0 \n", "1872 NaN 6644457 \n", "1938 NaN 6644457 \n", "\n", " PermanentlyRstrNetAssetsGrp \\\n", "4768 NaN \n", "3425 NaN \n", "25435 NaN \n", "1853 NaN \n", "1872 {u'BOYAmt': u'6501148', u'EOYAmt': u'6644457'} \n", "1938 {u'BOYAmt': u'6501148', u'EOYAmt': u'6644457'} \n", "\n", " PermanentlyRestrictedNetAssets net_assets \\\n", "4768 NaN 253883 \n", "3425 NaN 107277 \n", "25435 NaN 218678 \n", "1853 NaN 676095 \n", "1872 NaN 8776750 \n", "1938 NaN 8776750 \n", "\n", " NetAssetsOrFundBalancesEOYAmt NetAssetsOrFundBalancesEOY \\\n", "4768 NaN 253883 \n", "3425 107277 NaN \n", "25435 218678 NaN \n", "1853 676095 NaN \n", "1872 8776750 NaN \n", "1938 8776750 NaN \n", "\n", " TotalNetAssetsFundBalanceGrp \\\n", "4768 NaN \n", "3425 {u'BOYAmt': u'440094', u'EOYAmt': u'107277'} \n", "25435 {u'BOYAmt': u'608360', u'EOYAmt': u'218678'} \n", "1853 {u'BOYAmt': u'426629', u'EOYAmt': u'676095'} \n", "1872 {u'BOYAmt': u'8480604', u'EOYAmt': u'8776750'} \n", "1938 {u'BOYAmt': u'8480604', u'EOYAmt': u'8776750'} \n", "\n", " TotalNetAssetsFundBalances NetAssetsOrFundBalancesEOYAmt \\\n", "4768 {u'BOY': u'610579', u'EOY': u'253883'} NaN \n", "3425 NaN 107277 \n", "25435 NaN 218678 \n", "1853 NaN 676095 \n", "1872 NaN 8776750 \n", "1938 NaN 8776750 \n", "\n", " NetAssetsOrFundBalancesEOY \n", "4768 253883 \n", "3425 NaN \n", "25435 NaN \n", "1853 NaN \n", "1872 NaN \n", "1938 NaN " ] }, "execution_count": 224, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print len(df_all[df_all['donor_restrictions']>.99])\n", "print len(df_all[df_all['donor_restrictions']>1])\n", "df_all[df_all['donor_restrictions']>1][['FYE', 'donor_restrictions', 'temp_rest_assets', 'TemporarilyRstrNetAssetsGrp', 'TemporarilyRestrictedNetAssets',\n", " 'perm_rest_assets', 'PermanentlyRstrNetAssetsGrp', 'PermanentlyRestrictedNetAssets',\n", " 'net_assets', 'NetAssetsOrFundBalancesEOYAmt', 'NetAssetsOrFundBalancesEOY',\n", " #'UnrestrictedNetAssetsGrp', 'UnrestrictedNetAssets', \n", " 'TotalNetAssetsFundBalanceGrp', 'TotalNetAssetsFundBalances', \n", " #'NetAssetsOrFundBalancesBOYAmt', 'NetAssetsOrFundBalancesBOY', \n", " 'NetAssetsOrFundBalancesEOYAmt', 'NetAssetsOrFundBalancesEOY']][-6:]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Fees for Services (proxy for audit)" ] }, { "cell_type": "code", "execution_count": 228, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3723 \n", "\n", "25717 \n", "\n" ] } ], "source": [ "df_all['FeesForServicesAccountingGrp_v2'] = np.where(df_all['FeesForServicesAccountingGrp'].notnull(),\n", " df_all['FeesForServicesAccountingGrp'],\n", " df_all['FeesForServicesAccounting']) \n", "print len(df_all[df_all['FeesForServicesAccountingGrp_v2'].isnull()]), '\\n'\n", "print len(df_all[df_all['FeesForServicesAccountingGrp_v2'].notnull()]), '\\n'" ] }, { "cell_type": "code", "execution_count": 229, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('Index: ', 26647, 'Fees:', u'21614') \n", "\n", "# of minutes: 23.574959151 \n", "\n", "\n" ] }, { "data": { "text/plain": [ "25717" ] }, "execution_count": 229, "metadata": {}, "output_type": "execute_result" } ], "source": [ "for index, row in df_all[df_all['FeesForServicesAccountingGrp_v2'].notnull()][:].iterrows():\n", " #print type(row['FeesForServicesAccountingGrp_v2']), row['FeesForServicesAccountingGrp_v2']\n", " if 'Total' in row['FeesForServicesAccountingGrp_v2']:\n", " #print 'THERE IS \"Total\"', row['FeesForServicesAccountingGrp_v2']['Total'] \n", " df_all.ix[index, 'FeesForServicesAccounting_TotalAmt'] = row['FeesForServicesAccountingGrp_v2']['Total'] \n", " if 'TotalAmt' in row['FeesForServicesAccountingGrp_v2']:\n", " #print 'THERE IS \"TotalAmt\"', row['FeesForServicesAccountingGrp_v2']['TotalAmt'] \n", " df_all.ix[index, 'FeesForServicesAccounting_TotalAmt'] = row['FeesForServicesAccountingGrp_v2']['TotalAmt'] \n", " \n", " clear_output()\n", " print ('Index: ', index, 'Fees:', df_all.ix[index, 'FeesForServicesAccounting_TotalAmt']), '\\n'\n", " sys.stdout.flush() \n", " \n", "elapsed = timeit.default_timer() - start_time\n", "print '# of minutes: ', elapsed/60, '\\n', '\\n' \n", "df_all['FeesForServicesAccounting_TotalAmt'].value_counts().sum()" ] }, { "cell_type": "code", "execution_count": 230, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df_all['FeesForServicesAccounting_TotalAmt'] = df_all['FeesForServicesAccounting_TotalAmt'].astype('float')" ] }, { "cell_type": "code", "execution_count": 231, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1 24439\n", "0 5001\n", "Name: FeesForServicesAccounting_binary, dtype: int64" ] }, "execution_count": 231, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_all['FeesForServicesAccounting_binary'] = np.where(df_all['FeesForServicesAccounting_TotalAmt']>0, 1,0)\n", "df_all['FeesForServicesAccounting_binary'].value_counts()" ] }, { "cell_type": "code", "execution_count": 232, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "# of rows where fees less than 0: 4 \n", "\n", "# of rows where fees equal zero: 1274 \n", "\n", "# of rows where fees greater than zero: 24439 \n", "\n", "# of rows missing values: 3723 \n", "\n" ] } ], "source": [ "print '# of rows where fees less than 0:', len(df_all[df_all['FeesForServicesAccounting_TotalAmt']<0]), '\\n'\n", "print '# of rows where fees equal zero:', len(df_all[df_all['FeesForServicesAccounting_TotalAmt']==0]), '\\n'\n", "print '# of rows where fees greater than zero:', len(df_all[df_all['FeesForServicesAccounting_TotalAmt']>0]), '\\n'\n", "print '# of rows missing values:', len(df_all[df_all['FeesForServicesAccounting_TotalAmt'].isnull()]), '\\n'" ] }, { "cell_type": "code", "execution_count": 233, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "# of rows that should have value of zero: 5001\n" ] } ], "source": [ "print '# of rows that should have value of zero:', 4+1274+3723" ] }, { "cell_type": "code", "execution_count": 236, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FeesForServicesAccounting_TotalAmt
25894-2500.0
24536-67439.0
1466-48284.0
12588-44250.0
\n", "
" ], "text/plain": [ " FeesForServicesAccounting_TotalAmt\n", "25894 -2500.0\n", "24536 -67439.0\n", "1466 -48284.0\n", "12588 -44250.0" ] }, "execution_count": 236, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_all[df_all['FeesForServicesAccounting_TotalAmt']<0][:][['FeesForServicesAccounting_TotalAmt']]" ] }, { "cell_type": "code", "execution_count": 237, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1 24439\n", "0 5001\n", "Name: FeesForServicesAccounting_binary, dtype: int64" ] }, "execution_count": 237, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_all['FeesForServicesAccounting_binary'].value_counts()" ] }, { "cell_type": "code", "execution_count": 254, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['EIN', 'FYE', 'VotingMembersGoverningBodyCnt', 'NbrVotingMembersGoverningBody', 'VotingMembersIndependentCnt', 'NbrIndependentVotingMembers', 'DelegationOfMgmtDutiesInd', 'DelegationOfManagementDuties', 'Form990ProvidedToGvrnBodyInd', 'Form990ProvidedToGoverningBody', 'AuditCommitteeInd', 'AuditCommittee', 'perm_rest_assets', 'temp_rest_assets', 'net_assets', 'donor_restrictions', 'TaxExemptBondsInd', 'TaxExemptBonds', 'NetUnrelatedBusTxblIncmAmt', 'NetUnrelatedBusinessTxblIncome', 'FSAuditedInd', 'FSAudited', 'FederalGrantAuditPerformedInd', 'FederalGrantAuditPerformed', 'FederalGrantAuditRequiredInd', 'FederalGrantAuditRequired', 'IndependentAuditFinclStmtInd', 'IndependentAuditFinancialStmt', 'FeesForServicesAccounting_binary']\n" ] } ], "source": [ "cols = cols + ['FeesForServicesAccounting_binary']\n", "print cols" ] }, { "cell_type": "code", "execution_count": 287, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EINFYEVotingMembersGoverningBodyCntNbrVotingMembersGoverningBodyVotingMembersIndependentCntNbrIndependentVotingMembersDelegationOfMgmtDutiesIndDelegationOfManagementDutiesForm990ProvidedToGvrnBodyIndForm990ProvidedToGoverningBodyAuditCommitteeIndAuditCommitteeperm_rest_assetstemp_rest_assetsnet_assetsdonor_restrictionsTaxExemptBondsIndTaxExemptBondsNetUnrelatedBusTxblIncmAmtNetUnrelatedBusinessTxblIncomeFSAuditedIndFSAuditedFederalGrantAuditPerformedIndFederalGrantAuditPerformedFederalGrantAuditRequiredIndFederalGrantAuditRequiredIndependentAuditFinclStmtIndIndependentAuditFinancialStmtFeesForServicesAccounting_binary
27377840516736FY201410NaN9NaNfalseNaNtrueNaNtrueNaN26065400inffalseNaNNaNNaNtrueNaNNaNNaNfalseNaN{u'#text': u'true', u'@referenceDocumentId': u'00000004', u'@referenceDocumentName': u'IRS990ScheduleD'}NaN0
\n", "
" ], "text/plain": [ " EIN FYE VotingMembersGoverningBodyCnt \\\n", "27377 840516736 FY2014 10 \n", "\n", " NbrVotingMembersGoverningBody VotingMembersIndependentCnt \\\n", "27377 NaN 9 \n", "\n", " NbrIndependentVotingMembers DelegationOfMgmtDutiesInd \\\n", "27377 NaN false \n", "\n", " DelegationOfManagementDuties Form990ProvidedToGvrnBodyInd \\\n", "27377 NaN true \n", "\n", " Form990ProvidedToGoverningBody AuditCommitteeInd AuditCommittee \\\n", "27377 NaN true NaN \n", "\n", " perm_rest_assets temp_rest_assets net_assets donor_restrictions \\\n", "27377 260654 0 0 inf \n", "\n", " TaxExemptBondsInd TaxExemptBonds NetUnrelatedBusTxblIncmAmt \\\n", "27377 false NaN NaN \n", "\n", " NetUnrelatedBusinessTxblIncome FSAuditedInd FSAudited \\\n", "27377 NaN true NaN \n", "\n", " FederalGrantAuditPerformedInd FederalGrantAuditPerformed \\\n", "27377 NaN NaN \n", "\n", " FederalGrantAuditRequiredInd FederalGrantAuditRequired \\\n", "27377 false NaN \n", "\n", " IndependentAuditFinclStmtInd \\\n", "27377 {u'#text': u'true', u'@referenceDocumentId': u'00000004', u'@referenceDocumentName': u'IRS990ScheduleD'} \n", "\n", " IndependentAuditFinancialStmt FeesForServicesAccounting_binary \n", "27377 NaN 0 " ] }, "execution_count": 287, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print len(df_all[df_all['donor_restrictions']>100])\n", "df_all[df_all['donor_restrictions']>100][cols]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Family Business Relations" ] }, { "cell_type": "code", "execution_count": 443, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['EIN', 'FYE', 'VotingMembersGoverningBodyCnt', 'NbrVotingMembersGoverningBody', 'VotingMembersIndependentCnt', 'NbrIndependentVotingMembers', 'DelegationOfMgmtDutiesInd', 'DelegationOfManagementDuties', 'Form990ProvidedToGvrnBodyInd', 'Form990ProvidedToGoverningBody', 'AuditCommitteeInd', 'AuditCommittee', 'perm_rest_assets', 'temp_rest_assets', 'net_assets', 'donor_restrictions', 'TaxExemptBondsInd', 'TaxExemptBonds', 'NetUnrelatedBusTxblIncmAmt', 'NetUnrelatedBusinessTxblIncome', 'FSAuditedInd', 'FSAudited', 'FederalGrantAuditPerformedInd', 'FederalGrantAuditPerformed', 'FederalGrantAuditRequiredInd', 'FederalGrantAuditRequired', 'IndependentAuditFinclStmtInd', 'IndependentAuditFinancialStmt', 'FeesForServicesAccounting_binary', 'FamilyOrBusinessRlnInd', 'FamilyOrBusinessRelationship']\n" ] } ], "source": [ "cols = cols + ['FamilyOrBusinessRlnInd', 'FamilyOrBusinessRelationship']\n", "print cols" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Tax Exempt Bonds, Take II" ] }, { "cell_type": "code", "execution_count": 444, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['EIN', 'FYE', 'VotingMembersGoverningBodyCnt', 'NbrVotingMembersGoverningBody', 'VotingMembersIndependentCnt', 'NbrIndependentVotingMembers', 'DelegationOfMgmtDutiesInd', 'DelegationOfManagementDuties', 'Form990ProvidedToGvrnBodyInd', 'Form990ProvidedToGoverningBody', 'AuditCommitteeInd', 'AuditCommittee', 'perm_rest_assets', 'temp_rest_assets', 'net_assets', 'donor_restrictions', 'TaxExemptBondsInd', 'TaxExemptBonds', 'NetUnrelatedBusTxblIncmAmt', 'NetUnrelatedBusinessTxblIncome', 'FSAuditedInd', 'FSAudited', 'FederalGrantAuditPerformedInd', 'FederalGrantAuditPerformed', 'FederalGrantAuditRequiredInd', 'FederalGrantAuditRequired', 'IndependentAuditFinclStmtInd', 'IndependentAuditFinancialStmt', 'FeesForServicesAccounting_binary', 'FamilyOrBusinessRlnInd', 'FamilyOrBusinessRelationship', 'TaxExemptBondLiabilitiesGrp', 'TaxExemptBondLiabilities']\n" ] } ], "source": [ "cols = cols + ['TaxExemptBondLiabilitiesGrp', 'TaxExemptBondLiabilities']\n", "print cols" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Government Grants" ] }, { "cell_type": "code", "execution_count": 445, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['EIN', 'FYE', 'VotingMembersGoverningBodyCnt', 'NbrVotingMembersGoverningBody', 'VotingMembersIndependentCnt', 'NbrIndependentVotingMembers', 'DelegationOfMgmtDutiesInd', 'DelegationOfManagementDuties', 'Form990ProvidedToGvrnBodyInd', 'Form990ProvidedToGoverningBody', 'AuditCommitteeInd', 'AuditCommittee', 'perm_rest_assets', 'temp_rest_assets', 'net_assets', 'donor_restrictions', 'TaxExemptBondsInd', 'TaxExemptBonds', 'NetUnrelatedBusTxblIncmAmt', 'NetUnrelatedBusinessTxblIncome', 'FSAuditedInd', 'FSAudited', 'FederalGrantAuditPerformedInd', 'FederalGrantAuditPerformed', 'FederalGrantAuditRequiredInd', 'FederalGrantAuditRequired', 'IndependentAuditFinclStmtInd', 'IndependentAuditFinancialStmt', 'FeesForServicesAccounting_binary', 'FamilyOrBusinessRlnInd', 'FamilyOrBusinessRelationship', 'TaxExemptBondLiabilitiesGrp', 'TaxExemptBondLiabilities', 'GovernmentGrantsAmt', 'GovernmentGrants']\n" ] } ], "source": [ "cols = cols + ['GovernmentGrantsAmt', 'GovernmentGrants']\n", "print cols" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Election of Board Members" ] }, { "cell_type": "code", "execution_count": 446, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['EIN', 'FYE', 'VotingMembersGoverningBodyCnt', 'NbrVotingMembersGoverningBody', 'VotingMembersIndependentCnt', 'NbrIndependentVotingMembers', 'DelegationOfMgmtDutiesInd', 'DelegationOfManagementDuties', 'Form990ProvidedToGvrnBodyInd', 'Form990ProvidedToGoverningBody', 'AuditCommitteeInd', 'AuditCommittee', 'perm_rest_assets', 'temp_rest_assets', 'net_assets', 'donor_restrictions', 'TaxExemptBondsInd', 'TaxExemptBonds', 'NetUnrelatedBusTxblIncmAmt', 'NetUnrelatedBusinessTxblIncome', 'FSAuditedInd', 'FSAudited', 'FederalGrantAuditPerformedInd', 'FederalGrantAuditPerformed', 'FederalGrantAuditRequiredInd', 'FederalGrantAuditRequired', 'IndependentAuditFinclStmtInd', 'IndependentAuditFinancialStmt', 'FeesForServicesAccounting_binary', 'FamilyOrBusinessRlnInd', 'FamilyOrBusinessRelationship', 'TaxExemptBondLiabilitiesGrp', 'TaxExemptBondLiabilities', 'GovernmentGrantsAmt', 'GovernmentGrants', 'ElectionOfBoardMembersInd', 'ElectionOfBoardMembers']\n" ] } ], "source": [ "cols = cols + ['ElectionOfBoardMembersInd', 'ElectionOfBoardMembers']\n", "print cols" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Zero Fundraising" ] }, { "cell_type": "code", "execution_count": 551, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['EIN', 'FYE', 'VotingMembersGoverningBodyCnt', 'NbrVotingMembersGoverningBody', 'VotingMembersIndependentCnt', 'NbrIndependentVotingMembers', 'DelegationOfMgmtDutiesInd', 'DelegationOfManagementDuties', 'Form990ProvidedToGvrnBodyInd', 'Form990ProvidedToGoverningBody', 'AuditCommitteeInd', 'AuditCommittee', 'perm_rest_assets', 'temp_rest_assets', 'net_assets', 'donor_restrictions', 'TaxExemptBondsInd', 'TaxExemptBonds', 'NetUnrelatedBusTxblIncmAmt', 'NetUnrelatedBusinessTxblIncome', 'FSAuditedInd', 'FSAudited', 'FederalGrantAuditPerformedInd', 'FederalGrantAuditPerformed', 'FederalGrantAuditRequiredInd', 'FederalGrantAuditRequired', 'IndependentAuditFinclStmtInd', 'IndependentAuditFinancialStmt', 'FeesForServicesAccounting_binary', 'FamilyOrBusinessRlnInd', 'FamilyOrBusinessRelationship', 'TaxExemptBondLiabilitiesGrp', 'TaxExemptBondLiabilities', 'GovernmentGrantsAmt', 'GovernmentGrants', 'ElectionOfBoardMembersInd', 'ElectionOfBoardMembers', 'CYTotalFundraisingExpenseAmt', 'TotalFundrsngExpCurrentYear']\n" ] } ], "source": [ "cols = cols + ['CYTotalFundraisingExpenseAmt', 'TotalFundrsngExpCurrentYear']\n", "print cols" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Save DF" ] }, { "cell_type": "code", "execution_count": 255, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "29440" ] }, "execution_count": 255, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df_all)" ] }, { "cell_type": "code", "execution_count": 256, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df_all.to_pickle('e-file data for 7,133 of 8,304 2016 EINs (n=24940).pkl')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Collapse Dataset -- take last" ] }, { "cell_type": "code", "execution_count": 447, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EINFYEVotingMembersGoverningBodyCntNbrVotingMembersGoverningBodyVotingMembersIndependentCntNbrIndependentVotingMembersDelegationOfMgmtDutiesIndDelegationOfManagementDutiesForm990ProvidedToGvrnBodyIndForm990ProvidedToGoverningBodyAuditCommitteeIndAuditCommitteeperm_rest_assetstemp_rest_assetsnet_assetsdonor_restrictionsTaxExemptBondsIndTaxExemptBondsNetUnrelatedBusTxblIncmAmtNetUnrelatedBusinessTxblIncomeFSAuditedIndFSAuditedFederalGrantAuditPerformedIndFederalGrantAuditPerformedFederalGrantAuditRequiredIndFederalGrantAuditRequiredIndependentAuditFinclStmtIndIndependentAuditFinancialStmtFeesForServicesAccounting_binaryFamilyOrBusinessRlnIndFamilyOrBusinessRelationshipTaxExemptBondLiabilitiesGrpTaxExemptBondLiabilitiesGovernmentGrantsAmtGovernmentGrantsElectionOfBoardMembersIndElectionOfBoardMembers
18819010202467FY201422NaN21NaN1NaN1NaN1NaN44339977351259236900970.50{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'}NaN0NaN1NaN1NaN1NaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}NaN10NaNNaNNaN7932282NaN1NaN
7965010202467FY201321NaN21NaN1NaN1NaN1NaN41932575880227216579000.47{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'}NaN0NaN1NaN1NaN1NaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}NaN11NaNNaNNaN6258147NaN1NaN
8989010202467FY2012NaN20NaN20NaN1NaN0NaN141536275773597221309190.45NaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'}NaN0NaN1NaN1NaN1NaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}1NaN1NaNNaNNaN7295185NaN1
22904010202467FY2011NaN23NaN23NaN1NaN0NaN130960876554891204866980.47NaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'}NaN0NaN1NaN1NaN1NaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}1NaN1NaNNaNNaN9819406NaN1
19649010202467FY2010NaN24NaN24NaN1NaN1NaN128738167110362179359000.56NaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'}NaN0NaN1NaN1NaN1NaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1039900001'}1NaN1NaNNaNNaN7006788NaN1
\n", "
" ], "text/plain": [ " EIN FYE VotingMembersGoverningBodyCnt \\\n", "18819 010202467 FY2014 22 \n", "7965 010202467 FY2013 21 \n", "8989 010202467 FY2012 NaN \n", "22904 010202467 FY2011 NaN \n", "19649 010202467 FY2010 NaN \n", "\n", " NbrVotingMembersGoverningBody VotingMembersIndependentCnt \\\n", "18819 NaN 21 \n", "7965 NaN 21 \n", "8989 20 NaN \n", "22904 23 NaN \n", "19649 24 NaN \n", "\n", " NbrIndependentVotingMembers DelegationOfMgmtDutiesInd \\\n", "18819 NaN 1 \n", "7965 NaN 1 \n", "8989 20 NaN \n", "22904 23 NaN \n", "19649 24 NaN \n", "\n", " DelegationOfManagementDuties Form990ProvidedToGvrnBodyInd \\\n", "18819 NaN 1 \n", "7965 NaN 1 \n", "8989 1 NaN \n", "22904 1 NaN \n", "19649 1 NaN \n", "\n", " Form990ProvidedToGoverningBody AuditCommitteeInd AuditCommittee \\\n", "18819 NaN 1 NaN \n", "7965 NaN 1 NaN \n", "8989 0 NaN 1 \n", "22904 0 NaN 1 \n", "19649 1 NaN 1 \n", "\n", " perm_rest_assets temp_rest_assets net_assets donor_restrictions \\\n", "18819 4433997 7351259 23690097 0.50 \n", "7965 4193257 5880227 21657900 0.47 \n", "8989 4153627 5773597 22130919 0.45 \n", "22904 3096087 6554891 20486698 0.47 \n", "19649 2873816 7110362 17935900 0.56 \n", "\n", " TaxExemptBondsInd \\\n", "18819 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'} \n", "7965 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'} \n", "8989 NaN \n", "22904 NaN \n", "19649 NaN \n", "\n", " TaxExemptBonds \\\n", "18819 NaN \n", "7965 NaN \n", "8989 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'} \n", "22904 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'} \n", "19649 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'} \n", "\n", " NetUnrelatedBusTxblIncmAmt NetUnrelatedBusinessTxblIncome FSAuditedInd \\\n", "18819 0 NaN 1 \n", "7965 0 NaN 1 \n", "8989 NaN 0 NaN \n", "22904 NaN 0 NaN \n", "19649 NaN 0 NaN \n", "\n", " FSAudited FederalGrantAuditPerformedInd FederalGrantAuditPerformed \\\n", "18819 NaN 1 NaN \n", "7965 NaN 1 NaN \n", "8989 1 NaN 1 \n", "22904 1 NaN 1 \n", "19649 1 NaN 1 \n", "\n", " FederalGrantAuditRequiredInd FederalGrantAuditRequired \\\n", "18819 1 NaN \n", "7965 1 NaN \n", "8989 NaN 1 \n", "22904 NaN 1 \n", "19649 NaN 1 \n", "\n", " IndependentAuditFinclStmtInd \\\n", "18819 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "7965 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "8989 NaN \n", "22904 NaN \n", "19649 NaN \n", "\n", " IndependentAuditFinancialStmt \\\n", "18819 NaN \n", "7965 NaN \n", "8989 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "22904 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "19649 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1039900001'} \n", "\n", " FeesForServicesAccounting_binary FamilyOrBusinessRlnInd \\\n", "18819 1 0 \n", "7965 1 1 \n", "8989 1 NaN \n", "22904 1 NaN \n", "19649 1 NaN \n", "\n", " FamilyOrBusinessRelationship TaxExemptBondLiabilitiesGrp \\\n", "18819 NaN NaN \n", "7965 NaN NaN \n", "8989 1 NaN \n", "22904 1 NaN \n", "19649 1 NaN \n", "\n", " TaxExemptBondLiabilities GovernmentGrantsAmt GovernmentGrants \\\n", "18819 NaN 7932282 NaN \n", "7965 NaN 6258147 NaN \n", "8989 NaN NaN 7295185 \n", "22904 NaN NaN 9819406 \n", "19649 NaN NaN 7006788 \n", "\n", " ElectionOfBoardMembersInd ElectionOfBoardMembers \n", "18819 1 NaN \n", "7965 1 NaN \n", "8989 NaN 1 \n", "22904 NaN 1 \n", "19649 NaN 1 " ] }, "execution_count": 447, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_all = df_all.sort_values(by=['EIN', 'FYE'], ascending=[1,0])\n", "df_all[:5][cols]" ] }, { "cell_type": "code", "execution_count": 259, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EINFYEVotingMembersGoverningBodyCntNbrVotingMembersGoverningBodyVotingMembersIndependentCntNbrIndependentVotingMembersDelegationOfMgmtDutiesIndDelegationOfManagementDutiesForm990ProvidedToGvrnBodyIndForm990ProvidedToGoverningBodyAuditCommitteeIndAuditCommitteeperm_rest_assetstemp_rest_assetsnet_assetsdonor_restrictionsTaxExemptBondsIndTaxExemptBondsNetUnrelatedBusTxblIncmAmtNetUnrelatedBusinessTxblIncomeFSAuditedIndFSAuditedFederalGrantAuditPerformedIndFederalGrantAuditPerformedFederalGrantAuditRequiredIndFederalGrantAuditRequiredIndependentAuditFinclStmtIndIndependentAuditFinancialStmtFeesForServicesAccounting_binary
23577990350803FY2012NaN11NaN11NaN0NaN1NaN110000046739624416740.232380NaN0NaN0NaN1NaN1NaN1NaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}1
16294990350803FY2011NaN11NaN11NaN0NaN1NaN1017188019533720.087991NaN0NaN0NaN1NaN1NaN1NaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}1
26647990350803FY2010NaN7NaN7NaN0NaN1NaNNaN09695617201520.056365NaN0NaN0NaN0NaN0NaN1NaN{u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'}1
\n", "
" ], "text/plain": [ " EIN FYE VotingMembersGoverningBodyCnt \\\n", "23577 990350803 FY2012 NaN \n", "16294 990350803 FY2011 NaN \n", "26647 990350803 FY2010 NaN \n", "\n", " NbrVotingMembersGoverningBody VotingMembersIndependentCnt \\\n", "23577 11 NaN \n", "16294 11 NaN \n", "26647 7 NaN \n", "\n", " NbrIndependentVotingMembers DelegationOfMgmtDutiesInd \\\n", "23577 11 NaN \n", "16294 11 NaN \n", "26647 7 NaN \n", "\n", " DelegationOfManagementDuties Form990ProvidedToGvrnBodyInd \\\n", "23577 0 NaN \n", "16294 0 NaN \n", "26647 0 NaN \n", "\n", " Form990ProvidedToGoverningBody AuditCommitteeInd AuditCommittee \\\n", "23577 1 NaN 1 \n", "16294 1 NaN 1 \n", "26647 1 NaN NaN \n", "\n", " perm_rest_assets temp_rest_assets net_assets donor_restrictions \\\n", "23577 100000 467396 2441674 0.232380 \n", "16294 0 171880 1953372 0.087991 \n", "26647 0 96956 1720152 0.056365 \n", "\n", " TaxExemptBondsInd TaxExemptBonds NetUnrelatedBusTxblIncmAmt \\\n", "23577 NaN 0 NaN \n", "16294 NaN 0 NaN \n", "26647 NaN 0 NaN \n", "\n", " NetUnrelatedBusinessTxblIncome FSAuditedInd FSAudited \\\n", "23577 0 NaN 1 \n", "16294 0 NaN 1 \n", "26647 0 NaN 0 \n", "\n", " FederalGrantAuditPerformedInd FederalGrantAuditPerformed \\\n", "23577 NaN 1 \n", "16294 NaN 1 \n", "26647 NaN 0 \n", "\n", " FederalGrantAuditRequiredInd FederalGrantAuditRequired \\\n", "23577 NaN 1 \n", "16294 NaN 1 \n", "26647 NaN 1 \n", "\n", " IndependentAuditFinclStmtInd \\\n", "23577 NaN \n", "16294 NaN \n", "26647 NaN \n", "\n", " IndependentAuditFinancialStmt \\\n", "23577 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "16294 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "26647 {u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " FeesForServicesAccounting_binary \n", "23577 1 \n", "16294 1 \n", "26647 1 " ] }, "execution_count": 259, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_all[-3:][cols]" ] }, { "cell_type": "code", "execution_count": 552, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "7133\n", "7133\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EINFYEVotingMembersGoverningBodyCntNbrVotingMembersGoverningBodyVotingMembersIndependentCntNbrIndependentVotingMembersDelegationOfMgmtDutiesIndDelegationOfManagementDutiesForm990ProvidedToGvrnBodyIndForm990ProvidedToGoverningBodyAuditCommitteeIndAuditCommitteeperm_rest_assetstemp_rest_assetsnet_assetsdonor_restrictionsTaxExemptBondsIndTaxExemptBondsNetUnrelatedBusTxblIncmAmtNetUnrelatedBusinessTxblIncomeFSAuditedIndFSAuditedFederalGrantAuditPerformedIndFederalGrantAuditPerformedFederalGrantAuditRequiredIndFederalGrantAuditRequiredIndependentAuditFinclStmtIndIndependentAuditFinancialStmtFeesForServicesAccounting_binaryFamilyOrBusinessRlnIndFamilyOrBusinessRelationshipTaxExemptBondLiabilitiesGrpTaxExemptBondLiabilitiesGovernmentGrantsAmtGovernmentGrantsElectionOfBoardMembersIndElectionOfBoardMembersCYTotalFundraisingExpenseAmtTotalFundrsngExpCurrentYear
0010202467FY20142220212011101144339977351259236900970.50{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'}{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'}00111111{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}101NaNNaN7932282729518511554608358835
1010211478FY201518181818falsefalsetruetruetruetrue271700090855648046120.75falsefalseNaNNaNtruetrueNaNNaNfalsefalse{u'#text': u'true', u'@referenceDocumentId': u'RetDoc3'}{u'#text': u'true', u'@referenceDocumentId': u'RetDoc3'}0falsefalseNaNNaN42442129755falsefalse415743363139
2010211513FY201423222120falsefalsetruetruetruetrue15329657385703184929859530.11{u'#text': u'true', u'@referenceDocumentId': u'IRS990ScheduleK'}{u'#text': u'true', u'@referenceDocumentId': u'IRS990ScheduleK'}-57641-12313truetruetruetruetruetrue{u'#text': u'true', u'@referenceDocumentId': u'IRS990ScheduleD'}{u'#text': u'true', u'@referenceDocumentId': u'IRS990ScheduleD'}1falsetrue{u'BOYAmt': u'106014868', u'EOYAmt': u'104647760'}{u'BOY': u'77905017', u'EOY': u'109371484'}7732373657810755falsefalse34745382402953
3010211530FY2014282928290000114778571161177127636020.13000011NaNNaN00{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}000NaNNaN23718331838311181290255685
4010211543FY2014353435340011117793274259694167026010.30000011NaNNaN00{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}100NaNNaN9466311354800150504103085
\n", "
" ], "text/plain": [ " EIN FYE VotingMembersGoverningBodyCnt \\\n", "0 010202467 FY2014 22 \n", "1 010211478 FY2015 18 \n", "2 010211513 FY2014 23 \n", "3 010211530 FY2014 28 \n", "4 010211543 FY2014 35 \n", "\n", " NbrVotingMembersGoverningBody VotingMembersIndependentCnt \\\n", "0 20 21 \n", "1 18 18 \n", "2 22 21 \n", "3 29 28 \n", "4 34 35 \n", "\n", " NbrIndependentVotingMembers DelegationOfMgmtDutiesInd \\\n", "0 20 1 \n", "1 18 false \n", "2 20 false \n", "3 29 0 \n", "4 34 0 \n", "\n", " DelegationOfManagementDuties Form990ProvidedToGvrnBodyInd \\\n", "0 1 1 \n", "1 false true \n", "2 false true \n", "3 0 0 \n", "4 0 1 \n", "\n", " Form990ProvidedToGoverningBody AuditCommitteeInd AuditCommittee \\\n", "0 0 1 1 \n", "1 true true true \n", "2 true true true \n", "3 0 1 1 \n", "4 1 1 1 \n", "\n", " perm_rest_assets temp_rest_assets net_assets donor_restrictions \\\n", "0 4433997 7351259 23690097 0.50 \n", "1 2717000 908556 4804612 0.75 \n", "2 15329657 38570318 492985953 0.11 \n", "3 477857 1161177 12763602 0.13 \n", "4 779327 4259694 16702601 0.30 \n", "\n", " TaxExemptBondsInd \\\n", "0 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'} \n", "1 false \n", "2 {u'#text': u'true', u'@referenceDocumentId': u'IRS990ScheduleK'} \n", "3 0 \n", "4 0 \n", "\n", " TaxExemptBonds \\\n", "0 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'} \n", "1 false \n", "2 {u'#text': u'true', u'@referenceDocumentId': u'IRS990ScheduleK'} \n", "3 0 \n", "4 0 \n", "\n", " NetUnrelatedBusTxblIncmAmt NetUnrelatedBusinessTxblIncome FSAuditedInd \\\n", "0 0 0 1 \n", "1 NaN NaN true \n", "2 -57641 -12313 true \n", "3 0 0 1 \n", "4 0 0 1 \n", "\n", " FSAudited FederalGrantAuditPerformedInd FederalGrantAuditPerformed \\\n", "0 1 1 1 \n", "1 true NaN NaN \n", "2 true true true \n", "3 1 NaN NaN \n", "4 1 NaN NaN \n", "\n", " FederalGrantAuditRequiredInd FederalGrantAuditRequired \\\n", "0 1 1 \n", "1 false false \n", "2 true true \n", "3 0 0 \n", "4 0 0 \n", "\n", " IndependentAuditFinclStmtInd \\\n", "0 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "1 {u'#text': u'true', u'@referenceDocumentId': u'RetDoc3'} \n", "2 {u'#text': u'true', u'@referenceDocumentId': u'IRS990ScheduleD'} \n", "3 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "4 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " IndependentAuditFinancialStmt \\\n", "0 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "1 {u'#text': u'true', u'@referenceDocumentId': u'RetDoc3'} \n", "2 {u'#text': u'true', u'@referenceDocumentId': u'IRS990ScheduleD'} \n", "3 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "4 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " FeesForServicesAccounting_binary FamilyOrBusinessRlnInd \\\n", "0 1 0 \n", "1 0 false \n", "2 1 false \n", "3 0 0 \n", "4 1 0 \n", "\n", " FamilyOrBusinessRelationship \\\n", "0 1 \n", "1 false \n", "2 true \n", "3 0 \n", "4 0 \n", "\n", " TaxExemptBondLiabilitiesGrp \\\n", "0 NaN \n", "1 NaN \n", "2 {u'BOYAmt': u'106014868', u'EOYAmt': u'104647760'} \n", "3 NaN \n", "4 NaN \n", "\n", " TaxExemptBondLiabilities GovernmentGrantsAmt \\\n", "0 NaN 7932282 \n", "1 NaN 42442 \n", "2 {u'BOY': u'77905017', u'EOY': u'109371484'} 77323736 \n", "3 NaN 237183 \n", "4 NaN 94663 \n", "\n", " GovernmentGrants ElectionOfBoardMembersInd ElectionOfBoardMembers \\\n", "0 7295185 1 1 \n", "1 129755 false false \n", "2 57810755 false false \n", "3 318383 1 1 \n", "4 113548 0 0 \n", "\n", " CYTotalFundraisingExpenseAmt TotalFundrsngExpCurrentYear \n", "0 554608 358835 \n", "1 415743 363139 \n", "2 3474538 2402953 \n", "3 181290 255685 \n", "4 150504 103085 " ] }, "execution_count": 552, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print len(df_all[cols].groupby('EIN').agg('first'))\n", "governance = df_all[cols].groupby('EIN').agg('first')\n", "print len(governance)\n", "governance = governance.reset_index()\n", "governance[:5]" ] }, { "cell_type": "code", "execution_count": 449, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "FY2014 5225\n", "FY2015 1578\n", "FY2013 197\n", "FY2012 55\n", "FY2011 41\n", "FY2010 37\n", "Name: FYE, dtype: int64" ] }, "execution_count": 449, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance['FYE'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create combined variables" ] }, { "cell_type": "code", "execution_count": 262, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['EIN', 'FYE', 'VotingMembersGoverningBodyCnt', 'NbrVotingMembersGoverningBody', 'VotingMembersIndependentCnt', 'NbrIndependentVotingMembers', 'DelegationOfMgmtDutiesInd', 'DelegationOfManagementDuties', 'Form990ProvidedToGvrnBodyInd', 'Form990ProvidedToGoverningBody', 'AuditCommitteeInd', 'AuditCommittee', 'perm_rest_assets', 'temp_rest_assets', 'net_assets', 'donor_restrictions', 'TaxExemptBondsInd', 'TaxExemptBonds', 'NetUnrelatedBusTxblIncmAmt', 'NetUnrelatedBusinessTxblIncome', 'FSAuditedInd', 'FSAudited', 'FederalGrantAuditPerformedInd', 'FederalGrantAuditPerformed', 'FederalGrantAuditRequiredInd', 'FederalGrantAuditRequired', 'IndependentAuditFinclStmtInd', 'IndependentAuditFinancialStmt', 'FeesForServicesAccounting_binary']\n" ] } ], "source": [ "print cols" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Independent Directors" ] }, { "cell_type": "code", "execution_count": 450, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n" ] }, { "data": { "text/plain": [ "count 7133.00\n", "mean 21.45\n", "std 34.58\n", "min 0.00\n", "25% 11.00\n", "50% 17.00\n", "75% 26.00\n", "max 2500.00\n", "Name: independent_directors_num, dtype: float64" ] }, "execution_count": 450, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance['independent_directors_num'] = governance['VotingMembersIndependentCnt']\n", "governance['independent_directors_num'] = np.where(governance['independent_directors_num'].isnull(),\n", " governance['NbrIndependentVotingMembers'], governance['independent_directors_num'])\n", "print len(governance[governance['independent_directors_num'].isnull()])\n", "#print governance['independent_directors_num'].value_counts()\n", "governance['independent_directors_num'] = governance['independent_directors_num'].astype('int')\n", "governance['independent_directors_num'].describe()" ] }, { "cell_type": "code", "execution_count": 451, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EINFYEVotingMembersGoverningBodyCntNbrVotingMembersGoverningBodyVotingMembersIndependentCntNbrIndependentVotingMembersDelegationOfMgmtDutiesIndDelegationOfManagementDutiesForm990ProvidedToGvrnBodyIndForm990ProvidedToGoverningBodyAuditCommitteeIndAuditCommitteeperm_rest_assetstemp_rest_assetsnet_assetsdonor_restrictionsTaxExemptBondsIndTaxExemptBondsNetUnrelatedBusTxblIncmAmtNetUnrelatedBusinessTxblIncomeFSAuditedIndFSAuditedFederalGrantAuditPerformedIndFederalGrantAuditPerformedFederalGrantAuditRequiredIndFederalGrantAuditRequiredIndependentAuditFinclStmtIndIndependentAuditFinancialStmtFeesForServicesAccounting_binaryFamilyOrBusinessRlnIndFamilyOrBusinessRelationshipTaxExemptBondLiabilitiesGrpTaxExemptBondLiabilitiesGovernmentGrantsAmtGovernmentGrantsElectionOfBoardMembersIndElectionOfBoardMembersindependent_directors_num
35010679337FY2013NaN11NaN10NaN0NaN1NaN111197249558403894511372560590.61NaN0NaN30871NaN1NaNNaNNaN0NaN{u'#text': u'0', u'@referenceDocumentId': u'RetDoc1039900001'}1NaN1NaNNaNNaN237833NaN010
65026015642FY2012NaN24NaN24NaN0NaN1NaN1024364810473040.23NaN0NaN-34607NaN1NaNNaNNaN0NaN{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}0NaN1NaNNaNNaNNaNNaN024
\n", "
" ], "text/plain": [ " EIN FYE VotingMembersGoverningBodyCnt \\\n", "35 010679337 FY2013 NaN \n", "65 026015642 FY2012 NaN \n", "\n", " NbrVotingMembersGoverningBody VotingMembersIndependentCnt \\\n", "35 11 NaN \n", "65 24 NaN \n", "\n", " NbrIndependentVotingMembers DelegationOfMgmtDutiesInd \\\n", "35 10 NaN \n", "65 24 NaN \n", "\n", " DelegationOfManagementDuties Form990ProvidedToGvrnBodyInd \\\n", "35 0 NaN \n", "65 0 NaN \n", "\n", " Form990ProvidedToGoverningBody AuditCommitteeInd AuditCommittee \\\n", "35 1 NaN 1 \n", "65 1 NaN 1 \n", "\n", " perm_rest_assets temp_rest_assets net_assets donor_restrictions \\\n", "35 111972495 584038945 1137256059 0.61 \n", "65 0 243648 1047304 0.23 \n", "\n", " TaxExemptBondsInd TaxExemptBonds NetUnrelatedBusTxblIncmAmt \\\n", "35 NaN 0 NaN \n", "65 NaN 0 NaN \n", "\n", " NetUnrelatedBusinessTxblIncome FSAuditedInd FSAudited \\\n", "35 30871 NaN 1 \n", "65 -34607 NaN 1 \n", "\n", " FederalGrantAuditPerformedInd FederalGrantAuditPerformed \\\n", "35 NaN NaN \n", "65 NaN NaN \n", "\n", " FederalGrantAuditRequiredInd FederalGrantAuditRequired \\\n", "35 NaN 0 \n", "65 NaN 0 \n", "\n", " IndependentAuditFinclStmtInd \\\n", "35 NaN \n", "65 NaN \n", "\n", " IndependentAuditFinancialStmt \\\n", "35 {u'#text': u'0', u'@referenceDocumentId': u'RetDoc1039900001'} \n", "65 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " FeesForServicesAccounting_binary FamilyOrBusinessRlnInd \\\n", "35 1 NaN \n", "65 0 NaN \n", "\n", " FamilyOrBusinessRelationship TaxExemptBondLiabilitiesGrp \\\n", "35 1 NaN \n", "65 1 NaN \n", "\n", " TaxExemptBondLiabilities GovernmentGrantsAmt GovernmentGrants \\\n", "35 NaN NaN 237833 \n", "65 NaN NaN NaN \n", "\n", " ElectionOfBoardMembersInd ElectionOfBoardMembers independent_directors_num \n", "35 NaN 0 10 \n", "65 NaN 0 24 " ] }, "execution_count": 451, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance[governance['VotingMembersIndependentCnt'].isnull()][:2]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Voting Directors" ] }, { "cell_type": "code", "execution_count": 452, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n" ] } ], "source": [ "governance['voting_directors'] = governance['VotingMembersGoverningBodyCnt']\n", "governance['voting_directors'] = np.where(governance['voting_directors'].isnull(),\n", " governance['NbrVotingMembersGoverningBody'], governance['voting_directors'])\n", "print len(governance[governance['voting_directors'].isnull()])\n", "#print governance['voting_directors'].value_counts()\n", "governance['voting_directors'] = governance['voting_directors'].astype('int')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Percent Independent Directors" ] }, { "cell_type": "code", "execution_count": 457, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "count 7129.00\n", "mean 0.97\n", "std 1.09\n", "min 0.00\n", "25% 0.96\n", "50% 1.00\n", "75% 1.00\n", "max 92.59\n", "Name: independent_directors_pct, dtype: float64" ] }, "execution_count": 457, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance['independent_directors_pct'] = governance['independent_directors_num']/governance['voting_directors']\n", "governance['independent_directors_pct'].describe()" ] }, { "cell_type": "code", "execution_count": 456, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EINFYEVotingMembersGoverningBodyCntNbrVotingMembersGoverningBodyVotingMembersIndependentCntNbrIndependentVotingMembersDelegationOfMgmtDutiesIndDelegationOfManagementDutiesForm990ProvidedToGvrnBodyIndForm990ProvidedToGoverningBodyAuditCommitteeIndAuditCommitteeperm_rest_assetstemp_rest_assetsnet_assetsdonor_restrictionsTaxExemptBondsIndTaxExemptBondsNetUnrelatedBusTxblIncmAmtNetUnrelatedBusinessTxblIncomeFSAuditedIndFSAuditedFederalGrantAuditPerformedIndFederalGrantAuditPerformedFederalGrantAuditRequiredIndFederalGrantAuditRequiredIndependentAuditFinclStmtIndIndependentAuditFinancialStmtFeesForServicesAccounting_binaryFamilyOrBusinessRlnIndFamilyOrBusinessRelationshipTaxExemptBondLiabilitiesGrpTaxExemptBondLiabilitiesGovernmentGrantsAmtGovernmentGrantsElectionOfBoardMembersIndElectionOfBoardMembersindependent_directors_numvoting_directorsindependent_directors_pct
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [EIN, FYE, VotingMembersGoverningBodyCnt, NbrVotingMembersGoverningBody, VotingMembersIndependentCnt, NbrIndependentVotingMembers, DelegationOfMgmtDutiesInd, DelegationOfManagementDuties, Form990ProvidedToGvrnBodyInd, Form990ProvidedToGoverningBody, AuditCommitteeInd, AuditCommittee, perm_rest_assets, temp_rest_assets, net_assets, donor_restrictions, TaxExemptBondsInd, TaxExemptBonds, NetUnrelatedBusTxblIncmAmt, NetUnrelatedBusinessTxblIncome, FSAuditedInd, FSAudited, FederalGrantAuditPerformedInd, FederalGrantAuditPerformed, FederalGrantAuditRequiredInd, FederalGrantAuditRequired, IndependentAuditFinclStmtInd, IndependentAuditFinancialStmt, FeesForServicesAccounting_binary, FamilyOrBusinessRlnInd, FamilyOrBusinessRelationship, TaxExemptBondLiabilitiesGrp, TaxExemptBondLiabilities, GovernmentGrantsAmt, GovernmentGrants, ElectionOfBoardMembersInd, ElectionOfBoardMembers, independent_directors_num, voting_directors, independent_directors_pct]\n", "Index: []" ] }, "execution_count": 456, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance[governance['independent_directors_pct'].isnull()]" ] }, { "cell_type": "code", "execution_count": 458, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "count 7133.00\n", "mean 0.97\n", "std 1.09\n", "min 0.00\n", "25% 0.96\n", "50% 1.00\n", "75% 1.00\n", "max 92.59\n", "Name: independent_directors_pct, dtype: float64" ] }, "execution_count": 458, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance['independent_directors_pct'] = np.where(governance['independent_directors_pct'].isnull(), 0, \n", " governance['independent_directors_pct'])\n", "governance['independent_directors_pct'].describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
Second version -- none over 100%" ] }, { "cell_type": "code", "execution_count": 461, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "11\n", "0\n", "count 7133.00\n", "mean 0.95\n", "std 0.13\n", "min 0.00\n", "25% 0.96\n", "50% 1.00\n", "75% 1.00\n", "max 1.00\n", "Name: independent_directors_pct_v2, dtype: float64\n" ] } ], "source": [ "print len(governance[governance['independent_directors_pct']>1])\n", "governance['independent_directors_pct_v2'] = np.where(governance['independent_directors_pct']>1, 1, \n", " governance['independent_directors_pct'])\n", "print len(governance[governance['independent_directors_pct_v2']>1])\n", "print governance['independent_directors_pct_v2'].describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Outsourced Management" ] }, { "cell_type": "code", "execution_count": 463, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n", "0 4365\n", "false 2636\n", "1 92\n", "true 40\n", "Name: outsourced_mgt, dtype: int64\n" ] } ], "source": [ "governance['outsourced_mgt'] = governance['DelegationOfMgmtDutiesInd']\n", "governance['outsourced_mgt'] = np.where(governance['outsourced_mgt'].isnull(),\n", " governance['DelegationOfManagementDuties'], governance['outsourced_mgt'])\n", "print len(governance[governance['outsourced_mgt'].isnull()])\n", "print governance['outsourced_mgt'].value_counts()" ] }, { "cell_type": "code", "execution_count": 464, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 4365\n", "0 2636\n", "1 92\n", "1 40\n", "Name: outsourced_mgt, dtype: int64\n" ] } ], "source": [ "governance['outsourced_mgt'] = np.where(governance['outsourced_mgt']=='true', 1, governance['outsourced_mgt'])\n", "governance['outsourced_mgt'] = np.where(governance['outsourced_mgt']=='false', 0, governance['outsourced_mgt'])\n", "print governance['outsourced_mgt'].value_counts()" ] }, { "cell_type": "code", "execution_count": 465, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 7001\n", "1 132\n", "Name: outsourced_mgt, dtype: int64\n" ] } ], "source": [ "governance['outsourced_mgt']=governance['outsourced_mgt'].astype('int')\n", "print governance['outsourced_mgt'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### IRS 990 Review" ] }, { "cell_type": "code", "execution_count": 466, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n", "1 4088\n", "true 2352\n", "0 369\n", "false 324\n", "Name: 990_review, dtype: int64\n" ] } ], "source": [ "governance['990_review'] = governance['Form990ProvidedToGvrnBodyInd']\n", "governance['990_review'] = np.where(governance['990_review'].isnull(),\n", " governance['Form990ProvidedToGoverningBody'], governance['990_review'])\n", "print len(governance[governance['990_review'].isnull()])\n", "print governance['990_review'].value_counts()" ] }, { "cell_type": "code", "execution_count": 467, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 6440\n", "0 693\n", "Name: 990_review, dtype: int64\n" ] } ], "source": [ "governance['990_review'] = np.where(governance['990_review']=='true', 1, governance['990_review'])\n", "governance['990_review'] = np.where(governance['990_review']=='false', 0, governance['990_review'])\n", "governance['990_review'] = governance['990_review'].astype('int')\n", "print governance['990_review'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Audit Committee\n", "I'M ASSIGNING A VALUE OF '0' TO THE 138 CASES MISSING VALUES" ] }, { "cell_type": "code", "execution_count": 468, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "138\n", "1 4205\n", "true 2454\n", "0 189\n", "false 147\n", "Name: audit_committee, dtype: int64\n" ] } ], "source": [ "governance['audit_committee'] = governance['AuditCommitteeInd']\n", "governance['audit_committee'] = np.where(governance['audit_committee'].isnull(),\n", " governance['AuditCommittee'], governance['audit_committee'])\n", "print len(governance[governance['audit_committee'].isnull()])\n", "print governance['audit_committee'].value_counts()" ] }, { "cell_type": "code", "execution_count": 469, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 6659\n", "0 474\n", "Name: audit_committee, dtype: int64\n" ] } ], "source": [ "governance['audit_committee'] = np.where(governance['audit_committee']=='true', 1, governance['audit_committee'])\n", "governance['audit_committee'] = np.where(governance['audit_committee']=='false', 0, governance['audit_committee'])\n", "governance['audit_committee'] = np.where(governance['audit_committee'].isnull(), 0, governance['audit_committee'])\n", "governance['audit_committee']=governance['audit_committee'].astype('int')\n", "print governance['audit_committee'].value_counts()" ] }, { "cell_type": "code", "execution_count": 470, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countmeanstdmin25%50%75%max
perm_rest_assets7133.005398758.0834888704.490.000.000.001033359.001097020543.00
temp_rest_assets7133.007347199.3940974295.28-1417569.00119999.00660803.002879306.001473750677.00
net_assets7133.0031409548.61175347462.23-139954787.001766750.005157328.0015828642.007982613662.00
donor_restrictions7133.00infnan-156.440.060.240.55inf
FeesForServicesAccounting_binary7133.000.830.380.001.001.001.001.00
independent_directors_num7133.0021.4534.580.0011.0017.0026.002500.00
voting_directors7133.0021.7018.270.0012.0018.0026.00434.00
independent_directors_pct7133.000.971.090.000.961.001.0092.59
independent_directors_pct_v27133.000.950.130.000.961.001.001.00
outsourced_mgt7133.000.020.130.000.000.000.001.00
990_review7133.000.900.300.001.001.001.001.00
audit_committee7133.000.930.250.001.001.001.001.00
\n", "
" ], "text/plain": [ " count mean std \\\n", "perm_rest_assets 7133.00 5398758.08 34888704.49 \n", "temp_rest_assets 7133.00 7347199.39 40974295.28 \n", "net_assets 7133.00 31409548.61 175347462.23 \n", "donor_restrictions 7133.00 inf nan \n", "FeesForServicesAccounting_binary 7133.00 0.83 0.38 \n", "independent_directors_num 7133.00 21.45 34.58 \n", "voting_directors 7133.00 21.70 18.27 \n", "independent_directors_pct 7133.00 0.97 1.09 \n", "independent_directors_pct_v2 7133.00 0.95 0.13 \n", "outsourced_mgt 7133.00 0.02 0.13 \n", "990_review 7133.00 0.90 0.30 \n", "audit_committee 7133.00 0.93 0.25 \n", "\n", " min 25% 50% \\\n", "perm_rest_assets 0.00 0.00 0.00 \n", "temp_rest_assets -1417569.00 119999.00 660803.00 \n", "net_assets -139954787.00 1766750.00 5157328.00 \n", "donor_restrictions -156.44 0.06 0.24 \n", "FeesForServicesAccounting_binary 0.00 1.00 1.00 \n", "independent_directors_num 0.00 11.00 17.00 \n", "voting_directors 0.00 12.00 18.00 \n", "independent_directors_pct 0.00 0.96 1.00 \n", "independent_directors_pct_v2 0.00 0.96 1.00 \n", "outsourced_mgt 0.00 0.00 0.00 \n", "990_review 0.00 1.00 1.00 \n", "audit_committee 0.00 1.00 1.00 \n", "\n", " 75% max \n", "perm_rest_assets 1033359.00 1097020543.00 \n", "temp_rest_assets 2879306.00 1473750677.00 \n", "net_assets 15828642.00 7982613662.00 \n", "donor_restrictions 0.55 inf \n", "FeesForServicesAccounting_binary 1.00 1.00 \n", "independent_directors_num 26.00 2500.00 \n", "voting_directors 26.00 434.00 \n", "independent_directors_pct 1.00 92.59 \n", "independent_directors_pct_v2 1.00 1.00 \n", "outsourced_mgt 0.00 1.00 \n", "990_review 1.00 1.00 \n", "audit_committee 1.00 1.00 " ] }, "execution_count": 470, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance.describe().T" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Donor Restrictions\n", "- NOTE THAT I'M NOT DOING IT THIS WAY ANY MORE -- NOW IT IS DONE ABOVE IN *df_all* BEFORE COLLAPSING THE DATA\n", "- ALSO NOTE THAT THE 990 DATA MUST HAVE SOME ERRORS BECAUSE THERE ARE A FEW CASES WITH VERY HIGH VALUES ON *DONOR_RESTRICTIONS*" ] }, { "cell_type": "code", "execution_count": 471, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-156.442553191\n" ] }, { "data": { "text/plain": [ "inf" ] }, "execution_count": 471, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#print governance['donor_restrictions'].min()\n", "#governance['donor_restrictions'].max()" ] }, { "cell_type": "code", "execution_count": 473, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EINFYEVotingMembersGoverningBodyCntNbrVotingMembersGoverningBodyVotingMembersIndependentCntNbrIndependentVotingMembersDelegationOfMgmtDutiesIndDelegationOfManagementDutiesForm990ProvidedToGvrnBodyIndForm990ProvidedToGoverningBodyAuditCommitteeIndAuditCommitteeperm_rest_assetstemp_rest_assetsnet_assetsdonor_restrictionsTaxExemptBondsIndTaxExemptBondsNetUnrelatedBusTxblIncmAmtNetUnrelatedBusinessTxblIncomeFSAuditedIndFSAuditedFederalGrantAuditPerformedIndFederalGrantAuditPerformedFederalGrantAuditRequiredIndFederalGrantAuditRequiredIndependentAuditFinclStmtIndIndependentAuditFinancialStmtFeesForServicesAccounting_binaryFamilyOrBusinessRlnIndFamilyOrBusinessRelationshipTaxExemptBondLiabilitiesGrpTaxExemptBondLiabilitiesGovernmentGrantsAmtGovernmentGrantsElectionOfBoardMembersIndElectionOfBoardMembersindependent_directors_numvoting_directorsindependent_directors_pctindependent_directors_pct_v2outsourced_mgt990_reviewaudit_committee
5939840516736FY2014101099falsefalsetruetruetruetrue26065400inffalsefalseNaNNaNtruetrueNaNNaNfalsefalse{u'#text': u'true', u'@referenceDocumentId': u'00000004', u'@referenceDocumentName': u'IRS990ScheduleD'}{u'#text': u'true', u'@referenceDocumentId': u'00000004', u'@referenceDocumentName': u'IRS990ScheduleD'}0truefalseNaNNaNNaN5644falsefalse9100.900.90011
\n", "
" ], "text/plain": [ " EIN FYE VotingMembersGoverningBodyCnt \\\n", "5939 840516736 FY2014 10 \n", "\n", " NbrVotingMembersGoverningBody VotingMembersIndependentCnt \\\n", "5939 10 9 \n", "\n", " NbrIndependentVotingMembers DelegationOfMgmtDutiesInd \\\n", "5939 9 false \n", "\n", " DelegationOfManagementDuties Form990ProvidedToGvrnBodyInd \\\n", "5939 false true \n", "\n", " Form990ProvidedToGoverningBody AuditCommitteeInd AuditCommittee \\\n", "5939 true true true \n", "\n", " perm_rest_assets temp_rest_assets net_assets donor_restrictions \\\n", "5939 260654 0 0 inf \n", "\n", " TaxExemptBondsInd TaxExemptBonds NetUnrelatedBusTxblIncmAmt \\\n", "5939 false false NaN \n", "\n", " NetUnrelatedBusinessTxblIncome FSAuditedInd FSAudited \\\n", "5939 NaN true true \n", "\n", " FederalGrantAuditPerformedInd FederalGrantAuditPerformed \\\n", "5939 NaN NaN \n", "\n", " FederalGrantAuditRequiredInd FederalGrantAuditRequired \\\n", "5939 false false \n", "\n", " IndependentAuditFinclStmtInd \\\n", "5939 {u'#text': u'true', u'@referenceDocumentId': u'00000004', u'@referenceDocumentName': u'IRS990ScheduleD'} \n", "\n", " IndependentAuditFinancialStmt \\\n", "5939 {u'#text': u'true', u'@referenceDocumentId': u'00000004', u'@referenceDocumentName': u'IRS990ScheduleD'} \n", "\n", " FeesForServicesAccounting_binary FamilyOrBusinessRlnInd \\\n", "5939 0 true \n", "\n", " FamilyOrBusinessRelationship TaxExemptBondLiabilitiesGrp \\\n", "5939 false NaN \n", "\n", " TaxExemptBondLiabilities GovernmentGrantsAmt GovernmentGrants \\\n", "5939 NaN NaN 5644 \n", "\n", " ElectionOfBoardMembersInd ElectionOfBoardMembers \\\n", "5939 false false \n", "\n", " independent_directors_num voting_directors independent_directors_pct \\\n", "5939 9 10 0.90 \n", "\n", " independent_directors_pct_v2 outsourced_mgt 990_review \\\n", "5939 0.90 0 1 \n", "\n", " audit_committee \n", "5939 1 " ] }, "execution_count": 473, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print len(governance[governance['donor_restrictions']>100])\n", "governance[governance['donor_restrictions']>100]" ] }, { "cell_type": "code", "execution_count": 472, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "6\n" ] } ], "source": [ "print len(governance[governance['donor_restrictions']>10])" ] }, { "cell_type": "code", "execution_count": 290, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EINFYEVotingMembersGoverningBodyCntNbrVotingMembersGoverningBodyVotingMembersIndependentCntNbrIndependentVotingMembersDelegationOfMgmtDutiesIndDelegationOfManagementDutiesForm990ProvidedToGvrnBodyIndForm990ProvidedToGoverningBodyAuditCommitteeIndAuditCommitteeperm_rest_assetstemp_rest_assetsnet_assetsdonor_restrictionsTaxExemptBondsIndTaxExemptBondsNetUnrelatedBusTxblIncmAmtNetUnrelatedBusinessTxblIncomeFSAuditedIndFSAuditedFederalGrantAuditPerformedIndFederalGrantAuditPerformedFederalGrantAuditRequiredIndFederalGrantAuditRequiredIndependentAuditFinclStmtIndIndependentAuditFinancialStmtFeesForServicesAccounting_binaryindependent_directors_numvoting_directorsindependent_directors_pctoutsourced_mgt990_reviewaudit_committee
435061343149FY20153024282200111103842047880.376569000011NaNNaN00{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}128300.933333011
3907521591381FY2014131412130011110189310613784813.73328600-100988011NaNNaN00{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}112130.923077011
4438570761297FY201524232123falsefalsefalsetruetruetrue2248091224362124116.347865falsefalse00truefalseNaNNaNfalsefalse{u'#text': u'true', u'@referenceDocumentId': u'RetDoc4'}false121240.875000001
5358730568096FY20141111111100111152182110821856320025.379842000011NaNfalse00{u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'}{u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'}111111.000000011
7045954291515FY201469470true1true1false0584470850314011.6164650false001true1true1true{u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'}{u'#text': u'true', u'@referenceDocumentId': u'IRS990ScheduleD'}0460.666667011
\n", "
" ], "text/plain": [ " EIN FYE VotingMembersGoverningBodyCnt \\\n", "435 061343149 FY2015 30 \n", "3907 521591381 FY2014 13 \n", "4438 570761297 FY2015 24 \n", "5358 730568096 FY2014 11 \n", "7045 954291515 FY2014 6 \n", "\n", " NbrVotingMembersGoverningBody VotingMembersIndependentCnt \\\n", "435 24 28 \n", "3907 14 12 \n", "4438 23 21 \n", "5358 11 11 \n", "7045 9 4 \n", "\n", " NbrIndependentVotingMembers DelegationOfMgmtDutiesInd \\\n", "435 22 0 \n", "3907 13 0 \n", "4438 23 false \n", "5358 11 0 \n", "7045 7 0 \n", "\n", " DelegationOfManagementDuties Form990ProvidedToGvrnBodyInd \\\n", "435 0 1 \n", "3907 0 1 \n", "4438 false false \n", "5358 0 1 \n", "7045 true 1 \n", "\n", " Form990ProvidedToGoverningBody AuditCommitteeInd AuditCommittee \\\n", "435 1 1 1 \n", "3907 1 1 1 \n", "4438 true true true \n", "5358 1 1 1 \n", "7045 true 1 false \n", "\n", " perm_rest_assets temp_rest_assets net_assets donor_restrictions \\\n", "435 0 38420 478 80.376569 \n", "3907 0 1893106 137848 13.733286 \n", "4438 224809 122436 21241 16.347865 \n", "5358 521821 1082185 63200 25.379842 \n", "7045 0 5844708 503140 11.616465 \n", "\n", " TaxExemptBondsInd TaxExemptBonds NetUnrelatedBusTxblIncmAmt \\\n", "435 0 0 0 \n", "3907 0 0 -1009 \n", "4438 false false 0 \n", "5358 0 0 0 \n", "7045 0 false 0 \n", "\n", " NetUnrelatedBusinessTxblIncome FSAuditedInd FSAudited \\\n", "435 0 1 1 \n", "3907 880 1 1 \n", "4438 0 true false \n", "5358 0 1 1 \n", "7045 0 1 true \n", "\n", " FederalGrantAuditPerformedInd FederalGrantAuditPerformed \\\n", "435 NaN NaN \n", "3907 NaN NaN \n", "4438 NaN NaN \n", "5358 NaN false \n", "7045 1 true \n", "\n", " FederalGrantAuditRequiredInd FederalGrantAuditRequired \\\n", "435 0 0 \n", "3907 0 0 \n", "4438 false false \n", "5358 0 0 \n", "7045 1 true \n", "\n", " IndependentAuditFinclStmtInd \\\n", "435 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "3907 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "4438 {u'#text': u'true', u'@referenceDocumentId': u'RetDoc4'} \n", "5358 {u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "7045 {u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " IndependentAuditFinancialStmt \\\n", "435 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "3907 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "4438 false \n", "5358 {u'#text': u'0', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "7045 {u'#text': u'true', u'@referenceDocumentId': u'IRS990ScheduleD'} \n", "\n", " FeesForServicesAccounting_binary independent_directors_num \\\n", "435 1 28 \n", "3907 1 12 \n", "4438 1 21 \n", "5358 1 11 \n", "7045 0 4 \n", "\n", " voting_directors independent_directors_pct outsourced_mgt 990_review \\\n", "435 30 0.933333 0 1 \n", "3907 13 0.923077 0 1 \n", "4438 24 0.875000 0 0 \n", "5358 11 1.000000 0 1 \n", "7045 6 0.666667 0 1 \n", "\n", " audit_committee \n", "435 1 \n", "3907 1 \n", "4438 1 \n", "5358 1 \n", "7045 1 " ] }, "execution_count": 290, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance[governance['donor_restrictions']>10]" ] }, { "cell_type": "code", "execution_count": 474, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "count 7133.00\n", "mean 0.32\n", "std 2.27\n", "min -156.44\n", "25% 0.06\n", "50% 0.24\n", "75% 0.55\n", "max 80.38\n", "Name: donor_restrictions, dtype: float64" ] }, "execution_count": 474, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance['donor_restrictions'] = np.where(governance['donor_restrictions']>100, 0, governance['donor_restrictions'])\n", "governance['donor_restrictions'].describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Donor Restrictions (binary) - as in Harris et al." ] }, { "cell_type": "code", "execution_count": 476, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EINFYEVotingMembersGoverningBodyCntNbrVotingMembersGoverningBodyVotingMembersIndependentCntNbrIndependentVotingMembersDelegationOfMgmtDutiesIndDelegationOfManagementDutiesForm990ProvidedToGvrnBodyIndForm990ProvidedToGoverningBodyAuditCommitteeIndAuditCommitteeperm_rest_assetstemp_rest_assetsnet_assetsdonor_restrictionsTaxExemptBondsIndTaxExemptBondsNetUnrelatedBusTxblIncmAmtNetUnrelatedBusinessTxblIncomeFSAuditedIndFSAuditedFederalGrantAuditPerformedIndFederalGrantAuditPerformedFederalGrantAuditRequiredIndFederalGrantAuditRequiredIndependentAuditFinclStmtIndIndependentAuditFinancialStmtFeesForServicesAccounting_binaryFamilyOrBusinessRlnIndFamilyOrBusinessRelationshipTaxExemptBondLiabilitiesGrpTaxExemptBondLiabilitiesGovernmentGrantsAmtGovernmentGrantsElectionOfBoardMembersIndElectionOfBoardMembersindependent_directors_numvoting_directorsindependent_directors_pctindependent_directors_pct_v2outsourced_mgt990_reviewaudit_committee
0010202467FY20142220212011101144339977351259236900970.50{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'}{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'}00111111{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}101NaNNaN793228272951851121220.950.95111
\n", "
" ], "text/plain": [ " EIN FYE VotingMembersGoverningBodyCnt \\\n", "0 010202467 FY2014 22 \n", "\n", " NbrVotingMembersGoverningBody VotingMembersIndependentCnt \\\n", "0 20 21 \n", "\n", " NbrIndependentVotingMembers DelegationOfMgmtDutiesInd \\\n", "0 20 1 \n", "\n", " DelegationOfManagementDuties Form990ProvidedToGvrnBodyInd \\\n", "0 1 1 \n", "\n", " Form990ProvidedToGoverningBody AuditCommitteeInd AuditCommittee \\\n", "0 0 1 1 \n", "\n", " perm_rest_assets temp_rest_assets net_assets donor_restrictions \\\n", "0 4433997 7351259 23690097 0.50 \n", "\n", " TaxExemptBondsInd \\\n", "0 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'} \n", "\n", " TaxExemptBonds \\\n", "0 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'} \n", "\n", " NetUnrelatedBusTxblIncmAmt NetUnrelatedBusinessTxblIncome FSAuditedInd \\\n", "0 0 0 1 \n", "\n", " FSAudited FederalGrantAuditPerformedInd FederalGrantAuditPerformed \\\n", "0 1 1 1 \n", "\n", " FederalGrantAuditRequiredInd FederalGrantAuditRequired \\\n", "0 1 1 \n", "\n", " IndependentAuditFinclStmtInd \\\n", "0 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " IndependentAuditFinancialStmt \\\n", "0 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " FeesForServicesAccounting_binary FamilyOrBusinessRlnInd \\\n", "0 1 0 \n", "\n", " FamilyOrBusinessRelationship TaxExemptBondLiabilitiesGrp \\\n", "0 1 NaN \n", "\n", " TaxExemptBondLiabilities GovernmentGrantsAmt GovernmentGrants \\\n", "0 NaN 7932282 7295185 \n", "\n", " ElectionOfBoardMembersInd ElectionOfBoardMembers independent_directors_num \\\n", "0 1 1 21 \n", "\n", " voting_directors independent_directors_pct independent_directors_pct_v2 \\\n", "0 22 0.95 0.95 \n", "\n", " outsourced_mgt 990_review audit_committee \n", "0 1 1 1 " ] }, "execution_count": 476, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance[:1]" ] }, { "cell_type": "code", "execution_count": 479, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1 6414\n", "0 719\n", "Name: restricted_donations, dtype: int64" ] }, "execution_count": 479, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance['restricted_donations'] = np.where(((governance['perm_rest_assets']>1)|\n", " (governance['temp_rest_assets']>1)), 1,0)\n", "governance['restricted_donations'].value_counts()" ] }, { "cell_type": "code", "execution_count": 491, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
perm_rest_assetstemp_rest_assetsrestricted_donations
202360830032406121
21081611091
2215303219811801
23000
2411057335142124321
\n", "
" ], "text/plain": [ " perm_rest_assets temp_rest_assets restricted_donations\n", "20 23608300 3240612 1\n", "21 0 8161109 1\n", "22 153032 1981180 1\n", "23 0 0 0\n", "24 11057335 14212432 1" ] }, "execution_count": 491, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance[['perm_rest_assets', 'temp_rest_assets', 'restricted_donations']][20:25]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Donor Restrictions -- Part (a) - Temporarily Restricted Assets" ] }, { "cell_type": "code", "execution_count": 123, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "842\n" ] } ], "source": [ "'''\n", "print len(governance[governance['TemporarilyRstrNetAssetsGrp'].isnull()])\n", "#, 'TemporarilyRestrictedNetAssets', \n", "#'PermanentlyRstrNetAssetsGrp', 'PermanentlyRestrictedNetAssets', \n", "#'NetAssetsOrFundBalancesEOYAmt', 'NetAssetsOrFundBalancesEOY',\n", "'''" ] }, { "cell_type": "code", "execution_count": 291, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#governance[267:270]" ] }, { "cell_type": "code", "execution_count": 140, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('Index: ', 7132, 'Temp. Restr Assets:', u'52310') \n", "\n", "# of minutes: 0.214020017783 \n", "\n", "\n" ] }, { "data": { "text/plain": [ "7133" ] }, "execution_count": 140, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'''\n", "from IPython.display import display, clear_output ##### FOR USE WITH STDOUT (DYNAMIC, SINGLE-LINE PRINTING)\n", "\n", "import timeit\n", "start_time = timeit.default_timer()\n", "\n", "for index, row in governance[:].iterrows():\n", " if pd.notnull(row['TemporarilyRstrNetAssetsGrp']):\n", " #print 'not null!'\n", " if 'EOYAmt' in row['TemporarilyRstrNetAssetsGrp']:\n", " temp_rest = row['TemporarilyRstrNetAssetsGrp']['EOYAmt']\n", " elif 'BOYAmt' in row['TemporarilyRstrNetAssetsGrp']:\n", " temp_rest = row['TemporarilyRstrNetAssetsGrp']['BOYAmt'] \n", " elif pd.notnull(row['TemporarilyRestrictedNetAssets']):\n", " print 'going to second'\n", " if 'EOY' in row['TemporarilyRestrictedNetAssets']:\n", " temp_rest = row['TemporarilyRestrictedNetAssets']['EOY']\n", " elif 'BOY' in row['TemporarilyRestrictedNetAssets']:\n", " temp_rest = row['TemporarilyRestrictedNetAssets']['BOY']\n", " \n", " governance.ix[index, 'temp_rest_assets'] = temp_rest\n", " \n", " clear_output()\n", " print ('Index: ', index, 'Temp. Restr Assets:', temp_rest), '\\n'\n", " sys.stdout.flush() \n", " \n", " \n", "elapsed = timeit.default_timer() - start_time\n", "print '# of minutes: ', elapsed/60, '\\n', '\\n' \n", "print governance['temp_rest_assets'].value_counts().sum()\n", "'''" ] }, { "cell_type": "code", "execution_count": 146, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "7133\n", "0\n", "count 7133\n", "unique 6282\n", "top 0\n", "freq 195\n", "Name: temp_rest_assets, dtype: object\n", "count 7.133000e+03\n", "mean 7.934227e+06\n", "std 4.203098e+07\n", "min -1.417569e+06\n", "25% 2.050260e+05\n", "50% 8.326920e+05\n", "75% 3.255075e+06\n", "max 1.473751e+09\n", "Name: temp_rest_assets, dtype: float64\n" ] } ], "source": [ "'''\n", "print len(governance[governance['temp_rest_assets'].notnull()])\n", "print len(governance[governance['temp_rest_assets'].isnull()])\n", "print governance['temp_rest_assets'].describe()\n", "governance['temp_rest_assets'] = governance['temp_rest_assets'].astype('int')\n", "print governance['temp_rest_assets'].describe()\n", "'''" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Donor Restrictions -- Part (b) - Permanently Restricted Assets" ] }, { "cell_type": "code", "execution_count": 292, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#governance[:2]" ] }, { "cell_type": "code", "execution_count": 149, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3520\n", "3613\n", "3699\n", "3434\n" ] } ], "source": [ "'''\n", "print len(governance[governance['PermanentlyRestrictedNetAssets'].notnull()])\n", "print len(governance[governance['PermanentlyRestrictedNetAssets'].isnull()])\n", "print len(governance[governance['PermanentlyRstrNetAssetsGrp'].notnull()])\n", "print len(governance[governance['PermanentlyRstrNetAssetsGrp'].isnull()])\n", "'''" ] }, { "cell_type": "code", "execution_count": 151, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('Index: ', 7132, 'Perm. Restr Assets:', u'284363') \n", "\n", "# of minutes: 0.194717868169 \n", "\n", "\n", "7133\n" ] } ], "source": [ "'''\n", "from IPython.display import display, clear_output ##### FOR USE WITH STDOUT (DYNAMIC, SINGLE-LINE PRINTING)\n", "\n", "import timeit\n", "start_time = timeit.default_timer()\n", "\n", "for index, row in governance[:].iterrows():\n", " if pd.notnull(row['PermanentlyRstrNetAssetsGrp']):\n", " #print 'not null!'\n", " if 'EOYAmt' in row['PermanentlyRstrNetAssetsGrp']:\n", " perm_rest = row['PermanentlyRstrNetAssetsGrp']['EOYAmt']\n", " elif 'BOYAmt' in row['PermanentlyRstrNetAssetsGrp']:\n", " perm_rest = row['PermanentlyRstrNetAssetsGrp']['BOYAmt'] \n", " elif pd.notnull(row['PermanentlyRestrictedNetAssets']):\n", " print 'going to second variable'\n", " if 'EOY' in row['PermanentlyRestrictedNetAssets']:\n", " perm_rest = row['PermanentlyRestrictedNetAssets']['EOY']\n", " elif 'BOY' in row['PermanentlyRestrictedNetAssets']:\n", " perm_rest = row['PermanentlyRestrictedNetAssets']['BOY']\n", " \n", " governance.ix[index, 'perm_rest_assets'] = perm_rest\n", " \n", " clear_output()\n", " print ('Index: ', index, 'Perm. Restr Assets:', perm_rest), '\\n'\n", " sys.stdout.flush() \n", " \n", " \n", "elapsed = timeit.default_timer() - start_time\n", "print '# of minutes: ', elapsed/60, '\\n', '\\n' \n", "print governance['perm_rest_assets'].value_counts().sum()\n", "'''" ] }, { "cell_type": "code", "execution_count": 153, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "7133\n", "0\n", "count 7133\n", "unique 3215\n", "top 0\n", "freq 887\n", "Name: perm_rest_assets, dtype: object\n", "count 7.133000e+03\n", "mean 7.831133e+06\n", "std 3.922188e+07\n", "min 0.000000e+00\n", "25% 6.430000e+04\n", "50% 5.732270e+05\n", "75% 3.064642e+06\n", "max 1.097021e+09\n", "Name: perm_rest_assets, dtype: float64\n" ] } ], "source": [ "'''\n", "print len(governance[governance['perm_rest_assets'].notnull()])\n", "print len(governance[governance['perm_rest_assets'].isnull()])\n", "print governance['perm_rest_assets'].describe(), '\\n'\n", "governance['perm_rest_assets'] = governance['perm_rest_assets'].astype('int')\n", "print governance['perm_rest_assets'].describe()\n", "'''" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Donor Restrictions -- Part (c) - Total Assets" ] }, { "cell_type": "code", "execution_count": 157, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "count 7133\n", "unique 7120\n", "top 0\n", "freq 14\n", "Name: net_assets, dtype: object \n", "\n", "count 7.133000e+03\n", "mean 3.140955e+07\n", "std 1.753475e+08\n", "min -1.399548e+08\n", "25% 1.766750e+06\n", "50% 5.157328e+06\n", "75% 1.582864e+07\n", "max 7.982614e+09\n", "Name: net_assets, dtype: float64\n" ] } ], "source": [ "'''\n", "governance['net_assets'] = governance['NetAssetsOrFundBalancesEOYAmt']\n", "governance['net_assets'] = np.where(governance['net_assets'].isnull(),\n", " governance['NetAssetsOrFundBalancesEOY'], governance['net_assets'])\n", "print governance['net_assets'].describe(), '\\n'\n", "governance['net_assets'] = governance['net_assets'].astype('int')\n", "print governance['net_assets'].describe()\n", "'''" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Donor Restrictions -- Part (d) - Calculations" ] }, { "cell_type": "code", "execution_count": 159, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "count 7.131000e+03\n", "mean inf\n", "std NaN\n", "min -4.987176e+05\n", "25% 1.567595e-01\n", "50% 4.216044e-01\n", "75% 8.305730e-01\n", "max inf\n", "Name: donor_restrictions, dtype: float64" ] }, "execution_count": 159, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'''\n", "governance['donor_restrictions'] = (governance['temp_rest_assets']+governance['perm_rest_assets'])/governance['net_assets']\n", "governance['donor_restrictions'].describe()\n", "'''" ] }, { "cell_type": "code", "execution_count": 162, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "count 7.133000e+03\n", "mean inf\n", "std NaN\n", "min -4.987176e+05\n", "25% 1.567123e-01\n", "50% 4.216012e-01\n", "75% 8.303938e-01\n", "max inf\n", "Name: donor_restrictions, dtype: float64" ] }, "execution_count": 162, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'''\n", "governance['donor_restrictions'] = np.where(governance['donor_restrictions'].isnull(), 0, governance['donor_restrictions'])\n", "governance['donor_restrictions'].describe()\n", "'''" ] }, { "cell_type": "code", "execution_count": 169, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FYETemporarilyRstrNetAssetsGrpTemporarilyRestrictedNetAssetsPermanentlyRstrNetAssetsGrpPermanentlyRestrictedNetAssetsUnrestrictedNetAssetsGrpUnrestrictedNetAssetsTotalNetAssetsFundBalanceGrpTotalNetAssetsFundBalancesNetAssetsOrFundBalancesBOYAmtNetAssetsOrFundBalancesBOYNetAssetsOrFundBalancesEOYAmtNetAssetsOrFundBalancesEOY
19137FY2014{u'BOYAmt': u'763419', u'EOYAmt': u'718875'}NaNNaNNaN{u'BOYAmt': u'1114442', u'EOYAmt': u'1112268'}NaN{u'BOYAmt': u'1877861', u'EOYAmt': u'1831143'}NaN1877861NaN1831143NaN
4153FY2013{u'BOYAmt': u'568056', u'EOYAmt': u'763419'}NaNNaNNaN{u'BOYAmt': u'969792', u'EOYAmt': u'1114442'}NaN{u'BOYAmt': u'1537848', u'EOYAmt': u'1877861'}NaN1537848NaN1877861NaN
4696FY2012NaN{u'BOY': u'489517', u'EOY': u'568056'}NaNNaNNaN{u'BOY': u'1227268', u'EOY': u'969792'}NaN{u'BOY': u'1716785', u'EOY': u'1537848'}NaN1716785NaN1537848
22671FY2011NaN{u'BOY': u'472102', u'EOY': u'489517'}NaNNaNNaN{u'BOY': u'1451414', u'EOY': u'1227268'}NaN{u'BOY': u'1923516', u'EOY': u'1716785'}NaN1923516NaN1716785
\n", "
" ], "text/plain": [ " FYE TemporarilyRstrNetAssetsGrp \\\n", "19137 FY2014 {u'BOYAmt': u'763419', u'EOYAmt': u'718875'} \n", "4153 FY2013 {u'BOYAmt': u'568056', u'EOYAmt': u'763419'} \n", "4696 FY2012 NaN \n", "22671 FY2011 NaN \n", "\n", " TemporarilyRestrictedNetAssets PermanentlyRstrNetAssetsGrp \\\n", "19137 NaN NaN \n", "4153 NaN NaN \n", "4696 {u'BOY': u'489517', u'EOY': u'568056'} NaN \n", "22671 {u'BOY': u'472102', u'EOY': u'489517'} NaN \n", "\n", " PermanentlyRestrictedNetAssets \\\n", "19137 NaN \n", "4153 NaN \n", "4696 NaN \n", "22671 NaN \n", "\n", " UnrestrictedNetAssetsGrp \\\n", "19137 {u'BOYAmt': u'1114442', u'EOYAmt': u'1112268'} \n", "4153 {u'BOYAmt': u'969792', u'EOYAmt': u'1114442'} \n", "4696 NaN \n", "22671 NaN \n", "\n", " UnrestrictedNetAssets \\\n", "19137 NaN \n", "4153 NaN \n", "4696 {u'BOY': u'1227268', u'EOY': u'969792'} \n", "22671 {u'BOY': u'1451414', u'EOY': u'1227268'} \n", "\n", " TotalNetAssetsFundBalanceGrp \\\n", "19137 {u'BOYAmt': u'1877861', u'EOYAmt': u'1831143'} \n", "4153 {u'BOYAmt': u'1537848', u'EOYAmt': u'1877861'} \n", "4696 NaN \n", "22671 NaN \n", "\n", " TotalNetAssetsFundBalances NetAssetsOrFundBalancesBOYAmt \\\n", "19137 NaN 1877861 \n", "4153 NaN 1537848 \n", "4696 {u'BOY': u'1716785', u'EOY': u'1537848'} NaN \n", "22671 {u'BOY': u'1923516', u'EOY': u'1716785'} NaN \n", "\n", " NetAssetsOrFundBalancesBOY NetAssetsOrFundBalancesEOYAmt \\\n", "19137 NaN 1831143 \n", "4153 NaN 1877861 \n", "4696 1716785 NaN \n", "22671 1923516 NaN \n", "\n", " NetAssetsOrFundBalancesEOY \n", "19137 NaN \n", "4153 NaN \n", "4696 1537848 \n", "22671 1716785 " ] }, "execution_count": 169, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'''\n", "df_all[df_all['EIN']=='010287624'][['FYE', 'TemporarilyRstrNetAssetsGrp', 'TemporarilyRestrictedNetAssets',\n", " 'PermanentlyRstrNetAssetsGrp', 'PermanentlyRestrictedNetAssets',\n", " 'UnrestrictedNetAssetsGrp', 'UnrestrictedNetAssets', \n", " 'TotalNetAssetsFundBalanceGrp', 'TotalNetAssetsFundBalances', \n", " 'NetAssetsOrFundBalancesBOYAmt', 'NetAssetsOrFundBalancesBOY', \n", " 'NetAssetsOrFundBalancesEOYAmt', 'NetAssetsOrFundBalancesEOY']]\n", "'''" ] }, { "cell_type": "code", "execution_count": 171, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0.39258266558100596" ] }, "execution_count": 171, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#(718875+0)/(718875+1112268)" ] }, { "cell_type": "code", "execution_count": 174, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n", "-1\n" ] } ], "source": [ "#print (718875+1112268)-1831143\n", "#print 1831143 - (718875+1112268)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "##### Tax-Exempt Bond" ] }, { "cell_type": "code", "execution_count": 492, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('Index: ', 7132, 'Tax-exempt bond:', u'0') \n", "\n", "# of minutes: 0.197681299845 \n", "\n", "\n", "7133\n" ] } ], "source": [ "from IPython.display import display, clear_output ##### FOR USE WITH STDOUT (DYNAMIC, SINGLE-LINE PRINTING)\n", "\n", "import timeit\n", "start_time = timeit.default_timer()\n", "\n", "for index, row in governance[:].iterrows():\n", " bond = 0\n", " if pd.notnull(row['TaxExemptBondsInd']):\n", " #print 'not null!'\n", " if '#text' in row['TaxExemptBondsInd']:\n", " bond = row['TaxExemptBondsInd']['#text']\n", " else:\n", " bond = row['TaxExemptBondsInd'] \n", " elif pd.notnull(row['TaxExemptBonds']):\n", " if '#text' in row['TaxExemptBonds']:\n", " bond = row['TaxExemptBonds']['#text']\n", " else:\n", " bond = row['TaxExemptBonds'] \n", " \n", " governance.ix[index, 'tax_exempt_bond'] = bond\n", " \n", " clear_output()\n", " print ('Index: ', index, 'Tax-exempt bond:', bond), '\\n'\n", " sys.stdout.flush() \n", " \n", " \n", "elapsed = timeit.default_timer() - start_time\n", "print '# of minutes: ', elapsed/60, '\\n', '\\n' \n", "print governance['tax_exempt_bond'].value_counts().sum()" ] }, { "cell_type": "code", "execution_count": 493, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 4251\n", "false 2580\n", "1 206\n", "true 96\n", "Name: tax_exempt_bond, dtype: int64 \n", "\n", "0 6831\n", "1 302\n", "Name: tax_exempt_bond, dtype: int64\n" ] } ], "source": [ "print governance['tax_exempt_bond'].value_counts(), '\\n'\n", "governance['tax_exempt_bond'] = np.where(governance['tax_exempt_bond']=='true', 1, governance['tax_exempt_bond'])\n", "governance['tax_exempt_bond'] = np.where(governance['tax_exempt_bond']=='false', 0, governance['tax_exempt_bond'])\n", "governance['tax_exempt_bond'] = np.where(governance['tax_exempt_bond'].isnull(), 0, governance['tax_exempt_bond'])\n", "governance['tax_exempt_bond'] = governance['tax_exempt_bond'].astype('int')\n", "print governance['tax_exempt_bond'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Tax-Exempt Bonds, v2 (Harris et al.)\n", "NO NEED TO DO THIS -- MY VARIABLE ALREADY TAPS IT" ] }, { "cell_type": "code", "execution_count": 501, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TaxExemptBondLiabilitiesGrpTaxExemptBondLiabilitiestax_exempt_bond
2{u'BOYAmt': u'106014868', u'EOYAmt': u'104647760'}{u'BOY': u'77905017', u'EOY': u'109371484'}1
3NaNNaN0
\n", "
" ], "text/plain": [ " TaxExemptBondLiabilitiesGrp \\\n", "2 {u'BOYAmt': u'106014868', u'EOYAmt': u'104647760'} \n", "3 NaN \n", "\n", " TaxExemptBondLiabilities tax_exempt_bond \n", "2 {u'BOY': u'77905017', u'EOY': u'109371484'} 1 \n", "3 NaN 0 " ] }, "execution_count": 501, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance[['TaxExemptBondLiabilitiesGrp', 'TaxExemptBondLiabilities', 'tax_exempt_bond']][2:4]" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "##### Taxable Revenues\n", "AS IN YETMAN AND YETMAN'S *TAXABLE REVENUE* VARIABLE -- A REGULATORY OVERSIGHT VARIABLE, GIVEN THAT IT IS RELATED TO AUDIT LIKELIHOOD." ] }, { "cell_type": "code", "execution_count": 502, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1743\n", "count 5390\n", "unique 971\n", "top 0\n", "freq 4406\n", "Name: taxable_revenue, dtype: object \n", "\n", "count 7133\n", "unique 972\n", "top 0\n", "freq 4406\n", "Name: taxable_revenue, dtype: object \n", "\n", "count 7133.00\n", "mean -7272.61\n", "std 156439.97\n", "min -8633000.00\n", "25% 0.00\n", "50% 0.00\n", "75% 0.00\n", "max 2376271.00\n", "Name: taxable_revenue, dtype: float64 \n", "\n" ] } ], "source": [ "governance['taxable_revenue'] = governance['NetUnrelatedBusTxblIncmAmt']\n", "governance['taxable_revenue'] = np.where(governance['taxable_revenue'].isnull(),\n", " governance['NetUnrelatedBusinessTxblIncome'], governance['taxable_revenue'])\n", "print len(governance[governance['taxable_revenue'].isnull()])\n", "print governance['taxable_revenue'].describe(), '\\n'\n", "governance['taxable_revenue'] = np.where(governance['taxable_revenue'].isnull(), 0, governance['taxable_revenue'])\n", "print governance['taxable_revenue'].describe(), '\\n'\n", "governance['taxable_revenue'] = governance['taxable_revenue'].astype('int')\n", "print governance['taxable_revenue'].describe(), '\\n'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
CREATE TWO BINARY VERSION -- ONE FOR TAXABLE REVENUE GREATER THAN ZERO AND ONE FOR TAXABLE REVENUE OTHER THAN ZERO" ] }, { "cell_type": "code", "execution_count": 503, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "361\n", "6149\n", "623\n", "0 6772\n", "1 361\n", "Name: taxable_revenue_binary, dtype: int64 \n", "\n", "0 6149\n", "1 984\n", "Name: taxable_revenue_neg_or_pos_rev_binary, dtype: int64\n" ] } ], "source": [ "print len(governance[governance['taxable_revenue']>0])\n", "print len(governance[governance['taxable_revenue']==0])\n", "print len(governance[governance['taxable_revenue']<0])\n", "governance['taxable_revenue_binary'] = np.where(governance['taxable_revenue']>0, 1,0)\n", "print governance['taxable_revenue_binary'].value_counts(), '\\n'\n", "governance['taxable_revenue_neg_or_pos_rev_binary'] = np.where( ((governance['taxable_revenue']>0) |\n", " (governance['taxable_revenue']<0)), 1,0)\n", "print governance['taxable_revenue_neg_or_pos_rev_binary'].value_counts() " ] }, { "cell_type": "code", "execution_count": 504, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EINFYEVotingMembersGoverningBodyCntNbrVotingMembersGoverningBodyVotingMembersIndependentCntNbrIndependentVotingMembersDelegationOfMgmtDutiesIndDelegationOfManagementDutiesForm990ProvidedToGvrnBodyIndForm990ProvidedToGoverningBodyAuditCommitteeIndAuditCommitteeperm_rest_assetstemp_rest_assetsnet_assetsdonor_restrictionsTaxExemptBondsIndTaxExemptBondsNetUnrelatedBusTxblIncmAmtNetUnrelatedBusinessTxblIncomeFSAuditedIndFSAuditedFederalGrantAuditPerformedIndFederalGrantAuditPerformedFederalGrantAuditRequiredIndFederalGrantAuditRequiredIndependentAuditFinclStmtIndIndependentAuditFinancialStmtFeesForServicesAccounting_binaryFamilyOrBusinessRlnIndFamilyOrBusinessRelationshipTaxExemptBondLiabilitiesGrpTaxExemptBondLiabilitiesGovernmentGrantsAmtGovernmentGrantsElectionOfBoardMembersIndElectionOfBoardMembersindependent_directors_numvoting_directorsindependent_directors_pctindependent_directors_pct_v2outsourced_mgt990_reviewaudit_committeerestricted_donationstax_exempt_bondtaxable_revenuetaxable_revenue_binarytaxable_revenue_neg_or_pos_rev_binary
0010202467FY20142220212011101144339977351259236900970.50{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'}{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'}00111111{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}101NaNNaN793228272951851121220.950.9511111000
\n", "
" ], "text/plain": [ " EIN FYE VotingMembersGoverningBodyCnt \\\n", "0 010202467 FY2014 22 \n", "\n", " NbrVotingMembersGoverningBody VotingMembersIndependentCnt \\\n", "0 20 21 \n", "\n", " NbrIndependentVotingMembers DelegationOfMgmtDutiesInd \\\n", "0 20 1 \n", "\n", " DelegationOfManagementDuties Form990ProvidedToGvrnBodyInd \\\n", "0 1 1 \n", "\n", " Form990ProvidedToGoverningBody AuditCommitteeInd AuditCommittee \\\n", "0 0 1 1 \n", "\n", " perm_rest_assets temp_rest_assets net_assets donor_restrictions \\\n", "0 4433997 7351259 23690097 0.50 \n", "\n", " TaxExemptBondsInd \\\n", "0 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'} \n", "\n", " TaxExemptBonds \\\n", "0 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'} \n", "\n", " NetUnrelatedBusTxblIncmAmt NetUnrelatedBusinessTxblIncome FSAuditedInd \\\n", "0 0 0 1 \n", "\n", " FSAudited FederalGrantAuditPerformedInd FederalGrantAuditPerformed \\\n", "0 1 1 1 \n", "\n", " FederalGrantAuditRequiredInd FederalGrantAuditRequired \\\n", "0 1 1 \n", "\n", " IndependentAuditFinclStmtInd \\\n", "0 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " IndependentAuditFinancialStmt \\\n", "0 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " FeesForServicesAccounting_binary FamilyOrBusinessRlnInd \\\n", "0 1 0 \n", "\n", " FamilyOrBusinessRelationship TaxExemptBondLiabilitiesGrp \\\n", "0 1 NaN \n", "\n", " TaxExemptBondLiabilities GovernmentGrantsAmt GovernmentGrants \\\n", "0 NaN 7932282 7295185 \n", "\n", " ElectionOfBoardMembersInd ElectionOfBoardMembers independent_directors_num \\\n", "0 1 1 21 \n", "\n", " voting_directors independent_directors_pct independent_directors_pct_v2 \\\n", "0 22 0.95 0.95 \n", "\n", " outsourced_mgt 990_review audit_committee restricted_donations \\\n", "0 1 1 1 1 \n", "\n", " tax_exempt_bond taxable_revenue taxable_revenue_binary \\\n", "0 1 0 0 \n", "\n", " taxable_revenue_neg_or_pos_rev_binary \n", "0 0 " ] }, "execution_count": 504, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance[:1]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Audited Financials" ] }, { "cell_type": "code", "execution_count": 505, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n", "6914 \n", "\n", "7133 \n", "\n" ] }, { "data": { "text/plain": [ "1 4254\n", "true 2454\n", "false 222\n", "0 203\n", "Name: audited_financials, dtype: int64" ] }, "execution_count": 505, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance['audited_financials'] = np.nan\n", "print len(governance[governance['audited_financials'].notnull()])\n", "governance['audited_financials'] = np.where(governance['FSAuditedInd'].notnull(), governance['FSAuditedInd'], \n", " governance['audited_financials'] )\n", "print len(governance[governance['audited_financials'].notnull()]), '\\n'\n", "governance['audited_financials'] = np.where( ((governance['audited_financials'].isnull()) & \n", " (governance['FSAudited'].notnull())), \n", " governance['FSAudited'], governance['audited_financials'] )\n", "print len(governance[governance['audited_financials'].notnull()]), '\\n'\n", "governance['audited_financials'].value_counts()" ] }, { "cell_type": "code", "execution_count": 506, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "7133\n" ] }, { "data": { "text/plain": [ "1 6708\n", "0 425\n", "Name: audited_financials, dtype: int64" ] }, "execution_count": 506, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance['audited_financials'] = np.where( governance['audited_financials']=='true', 1, governance['audited_financials'] )\n", "governance['audited_financials'] = np.where( governance['audited_financials']=='1', 1, governance['audited_financials'] )\n", "governance['audited_financials'] = np.where( governance['audited_financials']=='false', 0, governance['audited_financials'] )\n", "governance['audited_financials'] = np.where( governance['audited_financials']=='0', 0, governance['audited_financials'] )\n", "print len(governance[governance['audited_financials'].notnull()])\n", "governance['audited_financials'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Federal Grant Audit Performed" ] }, { "cell_type": "code", "execution_count": 507, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "5974\n", "5061\n", "1 931\n", "false 646\n", "true 490\n", "0 5\n", "Name: federal_grant_audit_performed, dtype: int64\n" ] } ], "source": [ "governance['federal_grant_audit_performed'] = governance['FederalGrantAuditPerformedInd']\n", "print len(governance[governance['federal_grant_audit_performed'].isnull()])\n", "governance['federal_grant_audit_performed'] = np.where(governance['federal_grant_audit_performed'].isnull(),\n", " governance['FederalGrantAuditPerformed'], governance['federal_grant_audit_performed'])\n", "print len(governance[governance['federal_grant_audit_performed'].isnull()])\n", "print governance['federal_grant_audit_performed'].value_counts()" ] }, { "cell_type": "code", "execution_count": 508, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2072\n" ] }, { "data": { "text/plain": [ "1 1421\n", "0 651\n", "Name: federal_grant_audit_performed, dtype: int64" ] }, "execution_count": 508, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance['federal_grant_audit_performed'] = np.where( governance['federal_grant_audit_performed']=='true', 1, governance['federal_grant_audit_performed'] )\n", "governance['federal_grant_audit_performed'] = np.where( governance['federal_grant_audit_performed']=='1', 1, governance['federal_grant_audit_performed'] )\n", "governance['federal_grant_audit_performed'] = np.where( governance['federal_grant_audit_performed']=='false', 0, governance['federal_grant_audit_performed'] )\n", "governance['federal_grant_audit_performed'] = np.where( governance['federal_grant_audit_performed']=='0', 0, governance['federal_grant_audit_performed'] )\n", "print len(governance[governance['federal_grant_audit_performed'].notnull()])\n", "governance['federal_grant_audit_performed'].value_counts()" ] }, { "cell_type": "code", "execution_count": 509, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 5712\n", "1 1421\n", "Name: federal_grant_audit_performed, dtype: int64" ] }, "execution_count": 509, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance['federal_grant_audit_performed'] = np.where( governance['federal_grant_audit_performed'].isnull(), \n", " 0, governance['federal_grant_audit_performed'])\n", "governance['federal_grant_audit_performed'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Federal Grant Audit Required" ] }, { "cell_type": "code", "execution_count": 510, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "288\n", "45 \n", "\n", "0 3746\n", "false 2214\n", "1 719\n", "true 409\n", "Name: federal_grant_audit_required, dtype: int64 \n", "\n" ] }, { "data": { "text/plain": [ "0 3746\n", "false 2214\n", "1 719\n", "true 409\n", "0 45\n", "Name: federal_grant_audit_required, dtype: int64" ] }, "execution_count": 510, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance['federal_grant_audit_required'] = governance['FederalGrantAuditRequiredInd']\n", "print len(governance[governance['federal_grant_audit_required'].isnull()])\n", "governance['federal_grant_audit_required'] = np.where(governance['federal_grant_audit_required'].isnull(),\n", " governance['FederalGrantAuditRequired'], governance['federal_grant_audit_required'])\n", "print len(governance[governance['federal_grant_audit_required'].isnull()]), '\\n'\n", "print governance['federal_grant_audit_required'].value_counts(), '\\n'\n", "governance['federal_grant_audit_required'] = np.where( governance['federal_grant_audit_required'].isnull(), \n", " 0, governance['federal_grant_audit_required'])\n", "governance['federal_grant_audit_required'].value_counts()" ] }, { "cell_type": "code", "execution_count": 511, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "7133\n" ] }, { "data": { "text/plain": [ "0 6005\n", "1 1128\n", "Name: federal_grant_audit_required, dtype: int64" ] }, "execution_count": 511, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance['federal_grant_audit_required'] = np.where( governance['federal_grant_audit_required']=='true', 1, governance['federal_grant_audit_required'] )\n", "governance['federal_grant_audit_required'] = np.where( governance['federal_grant_audit_required']=='1', 1, governance['federal_grant_audit_required'] )\n", "governance['federal_grant_audit_required'] = np.where( governance['federal_grant_audit_required']=='false', 0, governance['federal_grant_audit_required'] )\n", "governance['federal_grant_audit_required'] = np.where( governance['federal_grant_audit_required']=='0', 0, governance['federal_grant_audit_required'] )\n", "print len(governance[governance['federal_grant_audit_required'].notnull()])\n", "governance['federal_grant_audit_required'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Independent Audited Financial Statements" ] }, { "cell_type": "code", "execution_count": 512, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n" ] }, { "ename": "TypeError", "evalue": "unhashable type: 'dict'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m governance['IndependentAuditFinancialStmt'], governance['independent_audited_fs'])\n\u001b[1;32m 4\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgovernance\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mgovernance\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'independent_audited_fs'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misnull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0;32mprint\u001b[0m \u001b[0mgovernance\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'independent_audited_fs'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue_counts\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m//anaconda/lib/python2.7/site-packages/pandas/core/base.pyc\u001b[0m in \u001b[0;36mvalue_counts\u001b[0;34m(self, normalize, sort, ascending, bins, dropna)\u001b[0m\n\u001b[1;32m 949\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mpandas\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0malgorithms\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mvalue_counts\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 950\u001b[0m result = value_counts(self, sort=sort, ascending=ascending,\n\u001b[0;32m--> 951\u001b[0;31m normalize=normalize, bins=bins, dropna=dropna)\n\u001b[0m\u001b[1;32m 952\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 953\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m//anaconda/lib/python2.7/site-packages/pandas/core/algorithms.pyc\u001b[0m in \u001b[0;36mvalue_counts\u001b[0;34m(values, sort, ascending, normalize, bins, dropna)\u001b[0m\n\u001b[1;32m 376\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 377\u001b[0m \u001b[0;31m# ndarray path. pass original to handle DatetimeTzBlock\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 378\u001b[0;31m \u001b[0mkeys\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcounts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_value_counts_arraylike\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdropna\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdropna\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 379\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 380\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mpandas\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mIndex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSeries\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m//anaconda/lib/python2.7/site-packages/pandas/core/algorithms.pyc\u001b[0m in \u001b[0;36m_value_counts_arraylike\u001b[0;34m(values, dropna)\u001b[0m\n\u001b[1;32m 444\u001b[0m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_ensure_object\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 445\u001b[0m \u001b[0mmask\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0misnull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 446\u001b[0;31m \u001b[0mkeys\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcounts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhtable\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue_count_object\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 447\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mdropna\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 448\u001b[0m \u001b[0mkeys\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minsert\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkeys\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mNaN\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32mpandas/hashtable.pyx\u001b[0m in \u001b[0;36mpandas.hashtable.value_count_object (pandas/hashtable.c:21313)\u001b[0;34m()\u001b[0m\n", "\u001b[0;32mpandas/hashtable.pyx\u001b[0m in \u001b[0;36mpandas.hashtable.value_count_object (pandas/hashtable.c:21048)\u001b[0;34m()\u001b[0m\n", "\u001b[0;31mTypeError\u001b[0m: unhashable type: 'dict'" ] } ], "source": [ "governance['independent_audited_fs'] = governance['IndependentAuditFinclStmtInd']\n", "governance['independent_audited_fs'] = np.where(governance['independent_audited_fs'].isnull(),\n", " governance['IndependentAuditFinancialStmt'], governance['independent_audited_fs'])\n", "print len(governance[governance['independent_audited_fs'].isnull()])\n", "print governance['independent_audited_fs'].value_counts()" ] }, { "cell_type": "code", "execution_count": 517, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 517, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(governance[governance['independent_audited_fs'].isnull()])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
GIVEN ABOVE ERROR, I WILL HAVE TO EXTRACT THE RIGHT SUB-VARIABLE FROM THE COLUMN\n", "- NOTE: WHEN CHECKING VARIABLE TYPE DO NOT PUT 'DICT' IN QUOTES" ] }, { "cell_type": "code", "execution_count": 518, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('Index: ', 9, 'Independent audited F/S:', u'1') \n", "\n", "# of minutes: 0.000384934743245 \n", "\n", "\n" ] } ], "source": [ "from IPython.display import display, clear_output ##### FOR USE WITH STDOUT (DYNAMIC, SINGLE-LINE PRINTING)\n", "\n", "import timeit\n", "start_time = timeit.default_timer()\n", "\n", "for index, row in governance[:10].iterrows():\n", " #bond = 0\n", " if type(row['independent_audited_fs'])==dict and '#text' in row['independent_audited_fs']:\n", " #print 'looking'\n", " governance.ix[index, 'independent_audited_fs'] = row['independent_audited_fs']['#text']\n", " \n", " clear_output()\n", " print ('Index: ', index, 'Independent audited F/S:', governance.ix[index, 'independent_audited_fs']), '\\n'\n", " sys.stdout.flush() \n", " \n", " \n", "elapsed = timeit.default_timer() - start_time\n", "print '# of minutes: ', elapsed/60, '\\n', '\\n' " ] }, { "cell_type": "code", "execution_count": 519, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EINFYEVotingMembersGoverningBodyCntNbrVotingMembersGoverningBodyVotingMembersIndependentCntNbrIndependentVotingMembersDelegationOfMgmtDutiesIndDelegationOfManagementDutiesForm990ProvidedToGvrnBodyIndForm990ProvidedToGoverningBodyAuditCommitteeIndAuditCommitteeperm_rest_assetstemp_rest_assetsnet_assetsdonor_restrictionsTaxExemptBondsIndTaxExemptBondsNetUnrelatedBusTxblIncmAmtNetUnrelatedBusinessTxblIncomeFSAuditedIndFSAuditedFederalGrantAuditPerformedIndFederalGrantAuditPerformedFederalGrantAuditRequiredIndFederalGrantAuditRequiredIndependentAuditFinclStmtIndIndependentAuditFinancialStmtFeesForServicesAccounting_binaryFamilyOrBusinessRlnIndFamilyOrBusinessRelationshipTaxExemptBondLiabilitiesGrpTaxExemptBondLiabilitiesGovernmentGrantsAmtGovernmentGrantsElectionOfBoardMembersIndElectionOfBoardMembersindependent_directors_numvoting_directorsindependent_directors_pctindependent_directors_pct_v2outsourced_mgt990_reviewaudit_committeerestricted_donationstax_exempt_bondtaxable_revenuetaxable_revenue_binarytaxable_revenue_neg_or_pos_rev_binaryaudited_financialsfederal_grant_audit_performedfederal_grant_audit_requiredindependent_audited_fs
0010202467FY20142220212011101144339977351259236900970.50{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'}{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'}00111111{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}{u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'}101NaNNaN793228272951851121220.950.95111110001111
\n", "
" ], "text/plain": [ " EIN FYE VotingMembersGoverningBodyCnt \\\n", "0 010202467 FY2014 22 \n", "\n", " NbrVotingMembersGoverningBody VotingMembersIndependentCnt \\\n", "0 20 21 \n", "\n", " NbrIndependentVotingMembers DelegationOfMgmtDutiesInd \\\n", "0 20 1 \n", "\n", " DelegationOfManagementDuties Form990ProvidedToGvrnBodyInd \\\n", "0 1 1 \n", "\n", " Form990ProvidedToGoverningBody AuditCommitteeInd AuditCommittee \\\n", "0 0 1 1 \n", "\n", " perm_rest_assets temp_rest_assets net_assets donor_restrictions \\\n", "0 4433997 7351259 23690097 0.50 \n", "\n", " TaxExemptBondsInd \\\n", "0 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'} \n", "\n", " TaxExemptBonds \\\n", "0 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1053100001'} \n", "\n", " NetUnrelatedBusTxblIncmAmt NetUnrelatedBusinessTxblIncome FSAuditedInd \\\n", "0 0 0 1 \n", "\n", " FSAudited FederalGrantAuditPerformedInd FederalGrantAuditPerformed \\\n", "0 1 1 1 \n", "\n", " FederalGrantAuditRequiredInd FederalGrantAuditRequired \\\n", "0 1 1 \n", "\n", " IndependentAuditFinclStmtInd \\\n", "0 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " IndependentAuditFinancialStmt \\\n", "0 {u'#text': u'1', u'@referenceDocumentId': u'RetDoc1040000001'} \n", "\n", " FeesForServicesAccounting_binary FamilyOrBusinessRlnInd \\\n", "0 1 0 \n", "\n", " FamilyOrBusinessRelationship TaxExemptBondLiabilitiesGrp \\\n", "0 1 NaN \n", "\n", " TaxExemptBondLiabilities GovernmentGrantsAmt GovernmentGrants \\\n", "0 NaN 7932282 7295185 \n", "\n", " ElectionOfBoardMembersInd ElectionOfBoardMembers independent_directors_num \\\n", "0 1 1 21 \n", "\n", " voting_directors independent_directors_pct independent_directors_pct_v2 \\\n", "0 22 0.95 0.95 \n", "\n", " outsourced_mgt 990_review audit_committee restricted_donations \\\n", "0 1 1 1 1 \n", "\n", " tax_exempt_bond taxable_revenue taxable_revenue_binary \\\n", "0 1 0 0 \n", "\n", " taxable_revenue_neg_or_pos_rev_binary audited_financials \\\n", "0 0 1 \n", "\n", " federal_grant_audit_performed federal_grant_audit_required \\\n", "0 1 1 \n", "\n", " independent_audited_fs \n", "0 1 " ] }, "execution_count": 519, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance[:1]" ] }, { "cell_type": "code", "execution_count": 520, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1 3503\n", "true 2050\n", "0 954\n", "false 626\n", "Name: independent_audited_fs, dtype: int64" ] }, "execution_count": 520, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance['independent_audited_fs'].value_counts()" ] }, { "cell_type": "code", "execution_count": 521, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "7133\n" ] }, { "data": { "text/plain": [ "1 5553\n", "0 1580\n", "Name: independent_audited_fs, dtype: int64" ] }, "execution_count": 521, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance['independent_audited_fs'] = np.where( governance['independent_audited_fs']=='true', 1, governance['independent_audited_fs'] )\n", "governance['independent_audited_fs'] = np.where( governance['independent_audited_fs']=='1', 1, governance['independent_audited_fs'] )\n", "governance['independent_audited_fs'] = np.where( governance['independent_audited_fs']=='false', 0, governance['independent_audited_fs'] )\n", "governance['independent_audited_fs'] = np.where( governance['independent_audited_fs']=='0', 0, governance['independent_audited_fs'] )\n", "print len(governance[governance['independent_audited_fs'].notnull()])\n", "governance['independent_audited_fs'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Family Business Relations" ] }, { "cell_type": "code", "execution_count": 522, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n", "0 3461\n", "false 2092\n", "1 996\n", "true 584\n", "Name: no_relations, dtype: int64\n" ] } ], "source": [ "governance['no_relations'] = governance['FamilyOrBusinessRlnInd']\n", "governance['no_relations'] = np.where(governance['no_relations'].isnull(),\n", " governance['FamilyOrBusinessRelationship'], governance['no_relations'])\n", "print len(governance[governance['no_relations'].isnull()])\n", "print governance['no_relations'].value_counts()" ] }, { "cell_type": "code", "execution_count": 523, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "7133\n" ] }, { "data": { "text/plain": [ "0 5553\n", "1 1580\n", "Name: no_relations, dtype: int64" ] }, "execution_count": 523, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance['no_relations'] = np.where( governance['no_relations']=='true', 1, governance['no_relations'] )\n", "governance['no_relations'] = np.where( governance['no_relations']=='1', 1, governance['no_relations'] )\n", "governance['no_relations'] = np.where( governance['no_relations']=='false', 0, governance['no_relations'] )\n", "governance['no_relations'] = np.where( governance['no_relations']=='0', 0, governance['no_relations'] )\n", "print len(governance[governance['no_relations'].notnull()])\n", "governance['no_relations'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Election of Board Members" ] }, { "cell_type": "code", "execution_count": 525, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n", "0 3843\n", "false 2342\n", "1 614\n", "true 334\n", "Name: elected_board, dtype: int64\n" ] } ], "source": [ "governance['elected_board'] = governance['ElectionOfBoardMembersInd']\n", "governance['elected_board'] = np.where(governance['elected_board'].isnull(),\n", " governance['ElectionOfBoardMembers'], governance['elected_board'])\n", "print len(governance[governance['elected_board'].isnull()])\n", "print governance['elected_board'].value_counts() " ] }, { "cell_type": "code", "execution_count": 526, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "7133\n" ] }, { "data": { "text/plain": [ "0 6185\n", "1 948\n", "Name: elected_board, dtype: int64" ] }, "execution_count": 526, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance['elected_board'] = np.where( governance['elected_board']=='true', 1, governance['elected_board'] )\n", "governance['elected_board'] = np.where( governance['elected_board']=='1', 1, governance['elected_board'] )\n", "governance['elected_board'] = np.where( governance['elected_board']=='false', 0, governance['elected_board'] )\n", "governance['elected_board'] = np.where( governance['elected_board']=='0', 0, governance['elected_board'] )\n", "print len(governance[governance['elected_board'].notnull()])\n", "governance['elected_board'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Government Grant" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "governance[['GovernmentGrantsAmt', ]]" ] }, { "cell_type": "code", "execution_count": 537, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3327\n", "count 3806\n", "unique 3559\n", "top 0\n", "freq 86\n", "Name: government_grant, dtype: object \n", "\n", "0\n", "count 7133\n", "unique 3560\n", "top 0\n", "freq 3327\n", "Name: government_grant, dtype: int64 \n", "\n", "0\n", "1 3806\n", "0 3327\n", "Name: government_grant, dtype: int64 \n", "\n" ] } ], "source": [ "governance['government_grant'] = governance['GovernmentGrantsAmt']\n", "governance['government_grant'] = np.where(governance['government_grant'].isnull(),\n", " governance['GovernmentGrants'], governance['government_grant'])\n", "print len(governance[governance['government_grant'].isnull()])\n", "print governance['government_grant'].describe(), '\\n'\n", "governance['government_grant'] = np.where(governance['government_grant'].isnull(), 0,governance['government_grant'] )\n", "print len(governance[governance['government_grant'].isnull()])\n", "print governance['government_grant'].describe(), '\\n'\n", "governance['government_grant'] = np.where(governance['government_grant']>0, 1,0)\n", "print len(governance[governance['government_grant'].isnull()])\n", "print governance['government_grant'].value_counts(), '\\n'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Zero FR Expenses" ] }, { "cell_type": "code", "execution_count": 555, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 \n", "\n", "6434 \n", "\n", "7133\n" ] }, { "data": { "text/plain": [ "0 86\n", "140183 2\n", "221765 2\n", "135672 2\n", "121804 2\n", "Name: zero_FR, dtype: int64" ] }, "execution_count": 555, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance2['zero_FR'] = np.nan\n", "print len(governance2[governance2['zero_FR'].notnull()]), '\\n'\n", "governance2['zero_FR'] = np.where(governance2['TotalFundrsngExpCurrentYear'].notnull(), \n", " governance2['TotalFundrsngExpCurrentYear'], \n", " governance2['zero_FR'] )\n", "print len(governance2[governance2['zero_FR'].notnull()]), '\\n'\n", "governance2['zero_FR'] = np.where( ((governance2['zero_FR'].isnull()) & \n", " (governance2['CYTotalFundraisingExpenseAmt'].notnull())), \n", " governance2['CYTotalFundraisingExpenseAmt'], governance2['zero_FR'] )\n", "print len(governance2[governance2['zero_FR'].notnull()])\n", "governance2['zero_FR'].value_counts()[:5]" ] }, { "cell_type": "code", "execution_count": 558, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 7047\n", "1 86\n", "Name: zero_FR, dtype: int64" ] }, "execution_count": 558, "metadata": {}, "output_type": "execute_result" } ], "source": [ "governance2['zero_FR'] = np.where(governance2['zero_FR']=='0', 1,0)\n", "governance2['zero_FR'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Save DF" ] }, { "cell_type": "code", "execution_count": 539, "metadata": { "collapsed": true }, "outputs": [], "source": [ "governance.to_pickle('governance variables including original columns (n=7,133).pkl')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Create subset of columns in preparation for merge" ] }, { "cell_type": "code", "execution_count": 540, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['EIN', 'FYE', u'VotingMembersGoverningBodyCnt', u'NbrVotingMembersGoverningBody', u'VotingMembersIndependentCnt', u'NbrIndependentVotingMembers', u'DelegationOfMgmtDutiesInd', u'DelegationOfManagementDuties', u'Form990ProvidedToGvrnBodyInd', u'Form990ProvidedToGoverningBody', u'AuditCommitteeInd', u'AuditCommittee', 'perm_rest_assets', 'temp_rest_assets', 'net_assets', 'donor_restrictions', u'TaxExemptBondsInd', u'TaxExemptBonds', u'NetUnrelatedBusTxblIncmAmt', u'NetUnrelatedBusinessTxblIncome', u'FSAuditedInd', u'FSAudited', u'FederalGrantAuditPerformedInd', u'FederalGrantAuditPerformed', u'FederalGrantAuditRequiredInd', u'FederalGrantAuditRequired', u'IndependentAuditFinclStmtInd', u'IndependentAuditFinancialStmt', 'FeesForServicesAccounting_binary', u'FamilyOrBusinessRlnInd', u'FamilyOrBusinessRelationship', u'TaxExemptBondLiabilitiesGrp', u'TaxExemptBondLiabilities', u'GovernmentGrantsAmt', u'GovernmentGrants', u'ElectionOfBoardMembersInd', u'ElectionOfBoardMembers', 'independent_directors_num', 'voting_directors', 'independent_directors_pct', 'independent_directors_pct_v2', 'outsourced_mgt', '990_review', 'audit_committee', 'restricted_donations', 'tax_exempt_bond', 'taxable_revenue', 'taxable_revenue_binary', 'taxable_revenue_neg_or_pos_rev_binary', 'audited_financials', 'federal_grant_audit_performed', 'federal_grant_audit_required', 'independent_audited_fs', 'no_relations', 'elected_board', 'government_grant']\n" ] } ], "source": [ "print governance.columns.tolist()" ] }, { "cell_type": "code", "execution_count": 542, "metadata": { "collapsed": true }, "outputs": [], "source": [ "governance_cols = ['EIN', 'FYE', \n", " 'independent_directors_num', 'independent_directors_pct', 'independent_directors_pct_v2',\n", " 'voting_directors',\n", " 'outsourced_mgt', '990_review', 'audit_committee', \n", " 'perm_rest_assets', 'temp_rest_assets', 'net_assets', 'donor_restrictions', \n", " 'restricted_donations',\n", " 'tax_exempt_bond', 'taxable_revenue', 'taxable_revenue_binary', \n", " 'taxable_revenue_neg_or_pos_rev_binary', \n", " 'audited_financials', 'independent_audited_fs', \n", " 'federal_grant_audit_performed', 'federal_grant_audit_required', \n", " 'FeesForServicesAccounting_binary',\n", " 'no_relations', \n", " 'elected_board', \n", " 'government_grant',\n", " ]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Merge with Model 4 Data" ] }, { "cell_type": "code", "execution_count": 394, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of columns: 35\n", "Number of observations: 8304\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
donor_advisorydonor_advisory_2016donor_advisory_2011_to_2016org_idEINFYEForm 990 FYEratings_system2011_data2016_dataconflict_of_interest_policy_v2records_retention_policy_v2whistleblower_policy_v2SOX_policiesSOX_policies_binarySOX_policies_all_binaryprogram_efficiencycomplexitycomplexity_2011agetotal_revenue_loggedcategorystatetot_revcategory_Animalscategory_Arts, Culture, Humanitiescategory_Community Developmentcategory_Educationcategory_Environmentcategory_Healthcategory_Human Servicescategory_Human and Civil Rightscategory_Internationalcategory_Religioncategory_Research and Public Policy
507090.00.00.05954010202467FY20142014-12CN 2.10.01.01.01.01.03.01.01.00.7944576.0NaN62.016.377993Research and Public PolicyMENaN0.00.00.00.00.00.00.00.00.00.01.0
\n", "
" ], "text/plain": [ " donor_advisory donor_advisory_2016 donor_advisory_2011_to_2016 \\\n", "50709 0.0 0.0 0.0 \n", "\n", " org_id EIN FYE Form 990 FYE ratings_system 2011_data \\\n", "50709 5954 010202467 FY2014 2014-12 CN 2.1 0.0 \n", "\n", " 2016_data conflict_of_interest_policy_v2 records_retention_policy_v2 \\\n", "50709 1.0 1.0 1.0 \n", "\n", " whistleblower_policy_v2 SOX_policies SOX_policies_binary \\\n", "50709 1.0 3.0 1.0 \n", "\n", " SOX_policies_all_binary program_efficiency complexity \\\n", "50709 1.0 0.794457 6.0 \n", "\n", " complexity_2011 age total_revenue_logged \\\n", "50709 NaN 62.0 16.377993 \n", "\n", " category state tot_rev category_Animals \\\n", "50709 Research and Public Policy ME NaN 0.0 \n", "\n", " category_Arts, Culture, Humanities category_Community Development \\\n", "50709 0.0 0.0 \n", "\n", " category_Education category_Environment category_Health \\\n", "50709 0.0 0.0 0.0 \n", "\n", " category_Human Services category_Human and Civil Rights \\\n", "50709 0.0 0.0 \n", "\n", " category_International category_Religion \\\n", "50709 0.0 0.0 \n", "\n", " category_Research and Public Policy \n", "50709 1.0 " ] }, "execution_count": 394, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_pickle('2016 - Test 4 data.pkl')\n", "print \"Number of columns:\", len(df.columns)\n", "print \"Number of observations:\", len(df)\n", "df.head(1)" ] }, { "cell_type": "code", "execution_count": 543, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of columns: 39\n", "Number of observations: 8238\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Col1A2011_dataA2016_dataeinfyeForm_990_FYESOX_policiesSOX_policies_all_binarySOX_policies_binaryagecategorycategory_Animalscategory_Arts__Culture__Humaniticategory_Community_Developmentcategory_Educationcategory_Environmentcategory_Healthcategory_Human_Servicescategory_Human_and_Civil_Rightscategory_Internationalcategory_Religioncategory_Research_and_Public_Polcomplexitycomplexity_2011conflict_of_interest_policy_v2donor_advisorydonor_advisory_2011_to_2016donor_advisory_2016org_idprogram_efficiencyratings_systemrecords_retention_policy_v2statetot_revtotal_revenue_loggedwhistleblower_policy_v2ncategoryrevssamused_
05070901010202467FY20142014-123.001.001.0062.00Research and Public Policy000000000016.00nan1.0000059540.79CN 2.11.00MEnan16.381.00Research and Public Policy12967968.001.00
\n", "
" ], "text/plain": [ " Col1 A2011_data A2016_data ein fye Form_990_FYE \\\n", "0 50709 0 1 010202467 FY2014 2014-12 \n", "\n", " SOX_policies SOX_policies_all_binary SOX_policies_binary age \\\n", "0 3.00 1.00 1.00 62.00 \n", "\n", " category category_Animals \\\n", "0 Research and Public Policy 0 \n", "\n", " category_Arts__Culture__Humaniti category_Community_Development \\\n", "0 0 0 \n", "\n", " category_Education category_Environment category_Health \\\n", "0 0 0 0 \n", "\n", " category_Human_Services category_Human_and_Civil_Rights \\\n", "0 0 0 \n", "\n", " category_International category_Religion \\\n", "0 0 0 \n", "\n", " category_Research_and_Public_Pol complexity complexity_2011 \\\n", "0 1 6.00 nan \n", "\n", " conflict_of_interest_policy_v2 donor_advisory \\\n", "0 1.00 0 \n", "\n", " donor_advisory_2011_to_2016 donor_advisory_2016 org_id \\\n", "0 0 0 5954 \n", "\n", " program_efficiency ratings_system records_retention_policy_v2 state \\\n", "0 0.79 CN 2.1 1.00 ME \n", "\n", " tot_rev total_revenue_logged whistleblower_policy_v2 \\\n", "0 nan 16.38 1.00 \n", "\n", " ncategory revs samused_ \n", "0 Research and Public Policy 12967968.00 1.00 " ] }, "execution_count": 543, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_test4 = pd.read_stata('Test 4 data.dta')\n", "print \"Number of columns:\", len(df_test4.columns)\n", "print \"Number of observations:\", len(df_test4)\n", "df_test4.head(1)" ] }, { "cell_type": "code", "execution_count": 544, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "set(['Form 990 FYE', 'category_Human Services', 'category_Community Development', 'FYE', '2016_data', 'category_Arts, Culture, Humanities', 'category_Research and Public Policy', 'category_Human and Civil Rights', '2011_data', 'EIN']) \n", "\n", "set(['Form_990_FYE', 'samused_', 'category_Research_and_Public_Pol', 'category_Human_Services', 'A2011_data', 'fye', 'category_Human_and_Civil_Rights', 'Col1', 'category_Community_Development', 'A2016_data', 'revs', 'ein', 'category_Arts__Culture__Humaniti', 'ncategory'])\n" ] } ], "source": [ "print set(df.columns.tolist()) - set(df_test4.columns.tolist()), '\\n'\n", "print set(df_test4.columns.tolist()) - set(df.columns.tolist())" ] }, { "cell_type": "code", "execution_count": 405, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "35\n", "8304 \n", "\n", "56\n", "8304 \n", "\n", "both 7133\n", "left_only 1171\n", "right_only 0\n", "Name: _merge, dtype: int64\n" ] } ], "source": [ "#print len(df.columns)\n", "#print len(pd.merge(df, governance[governance_cols], left_on='EIN', right_on='EIN', how='left', indicator=True)), '\\n'\n", "#merged = pd.merge(df, governance[governance_cols], left_on='EIN', right_on='EIN', how='left', indicator=True)\n", "#print len(merged.columns)\n", "#print len(merged), '\\n'\n", "#print merged['_merge'].value_counts()" ] }, { "cell_type": "code", "execution_count": 545, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "35\n", "8238 \n", "\n", "66\n", "8238 \n", "\n", "both 7133\n", "left_only 1105\n", "right_only 0\n", "Name: _merge, dtype: int64\n" ] } ], "source": [ "print len(df.columns)\n", "print len(pd.merge(df_test4, governance[governance_cols], left_on='ein', right_on='EIN', how='left', indicator=True)), '\\n'\n", "merged = pd.merge(df_test4, governance[governance_cols], left_on='ein', right_on='EIN', how='left', indicator=True)\n", "print len(merged.columns)\n", "print len(merged), '\\n'\n", "print merged['_merge'].value_counts()" ] }, { "cell_type": "code", "execution_count": 560, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#merged = merged.drop('_merge', 1)" ] }, { "cell_type": "code", "execution_count": 562, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "65\n", "8238 \n", "\n", "68\n", "8238 \n", "\n", "both 7133\n", "left_only 1105\n", "right_only 0\n", "Name: _merge, dtype: int64\n" ] } ], "source": [ "'''\n", "print len(merged.columns)\n", "print len(pd.merge(merged, governance2[['EIN', 'zero_FR']], left_on='ein', right_on='EIN', how='left', indicator=True)), '\\n'\n", "merged = pd.merge(merged, governance2[['EIN', 'zero_FR']], left_on='ein', right_on='EIN', how='left', indicator=True)\n", "print len(merged.columns)\n", "print len(merged), '\\n'\n", "print merged['_merge'].value_counts()\n", "'''" ] }, { "cell_type": "code", "execution_count": 563, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Col1A2011_dataA2016_dataeinfyeForm_990_FYESOX_policiesSOX_policies_all_binarySOX_policies_binaryagecategorycategory_Animalscategory_Arts__Culture__Humaniticategory_Community_Developmentcategory_Educationcategory_Environmentcategory_Healthcategory_Human_Servicescategory_Human_and_Civil_Rightscategory_Internationalcategory_Religioncategory_Research_and_Public_Polcomplexitycomplexity_2011conflict_of_interest_policy_v2donor_advisorydonor_advisory_2011_to_2016donor_advisory_2016org_idprogram_efficiencyratings_systemrecords_retention_policy_v2statetot_revtotal_revenue_loggedwhistleblower_policy_v2ncategoryrevssamused_EIN_xFYEindependent_directors_numindependent_directors_pctindependent_directors_pct_v2voting_directorsoutsourced_mgt990_reviewaudit_committeeperm_rest_assetstemp_rest_assetsnet_assetsdonor_restrictionsrestricted_donationstax_exempt_bondtaxable_revenuetaxable_revenue_binarytaxable_revenue_neg_or_pos_rev_binaryaudited_financialsindependent_audited_fsfederal_grant_audit_performedfederal_grant_audit_requiredFeesForServicesAccounting_binaryno_relationselected_boardgovernment_grantEIN_yzero_FR_merge
05070901010202467FY20142014-123.001.001.0062.00Research and Public Policy000000000016.00nan1.0000059540.79CN 2.11.00MEnan16.381.00Research and Public Policy12967968.001.00010202467FY201421.000.950.9522.001.001.001.004433997.007351259.0023690097.000.501.001.000.000.000.0011111.00011.000102024670.00both
\n", "
" ], "text/plain": [ " Col1 A2011_data A2016_data ein fye Form_990_FYE \\\n", "0 50709 0 1 010202467 FY2014 2014-12 \n", "\n", " SOX_policies SOX_policies_all_binary SOX_policies_binary age \\\n", "0 3.00 1.00 1.00 62.00 \n", "\n", " category category_Animals \\\n", "0 Research and Public Policy 0 \n", "\n", " category_Arts__Culture__Humaniti category_Community_Development \\\n", "0 0 0 \n", "\n", " category_Education category_Environment category_Health \\\n", "0 0 0 0 \n", "\n", " category_Human_Services category_Human_and_Civil_Rights \\\n", "0 0 0 \n", "\n", " category_International category_Religion \\\n", "0 0 0 \n", "\n", " category_Research_and_Public_Pol complexity complexity_2011 \\\n", "0 1 6.00 nan \n", "\n", " conflict_of_interest_policy_v2 donor_advisory \\\n", "0 1.00 0 \n", "\n", " donor_advisory_2011_to_2016 donor_advisory_2016 org_id \\\n", "0 0 0 5954 \n", "\n", " program_efficiency ratings_system records_retention_policy_v2 state \\\n", "0 0.79 CN 2.1 1.00 ME \n", "\n", " tot_rev total_revenue_logged whistleblower_policy_v2 \\\n", "0 nan 16.38 1.00 \n", "\n", " ncategory revs samused_ EIN_x FYE \\\n", "0 Research and Public Policy 12967968.00 1.00 010202467 FY2014 \n", "\n", " independent_directors_num independent_directors_pct \\\n", "0 21.00 0.95 \n", "\n", " independent_directors_pct_v2 voting_directors outsourced_mgt 990_review \\\n", "0 0.95 22.00 1.00 1.00 \n", "\n", " audit_committee perm_rest_assets temp_rest_assets net_assets \\\n", "0 1.00 4433997.00 7351259.00 23690097.00 \n", "\n", " donor_restrictions restricted_donations tax_exempt_bond taxable_revenue \\\n", "0 0.50 1.00 1.00 0.00 \n", "\n", " taxable_revenue_binary taxable_revenue_neg_or_pos_rev_binary \\\n", "0 0.00 0.00 \n", "\n", " audited_financials independent_audited_fs federal_grant_audit_performed \\\n", "0 1 1 1 \n", "\n", " federal_grant_audit_required FeesForServicesAccounting_binary no_relations \\\n", "0 1 1.00 0 \n", "\n", " elected_board government_grant EIN_y zero_FR _merge \n", "0 1 1.00 010202467 0.00 both " ] }, "execution_count": 563, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged[:1]" ] }, { "cell_type": "code", "execution_count": 416, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#merged.rename(columns={'FYE_x':'FYE'}, inplace=True)" ] }, { "cell_type": "code", "execution_count": 418, "metadata": { "collapsed": true }, "outputs": [], "source": [ "pd.set_option('display.float_format', lambda x: '%.2f' % x)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Descriptives for our governance variables" ] }, { "cell_type": "code", "execution_count": 439, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Col1A2011_dataA2016_dataeinfyeForm_990_FYESOX_policiesSOX_policies_all_binarySOX_policies_binaryagecategorycategory_Animalscategory_Arts__Culture__Humaniticategory_Community_Developmentcategory_Educationcategory_Environmentcategory_Healthcategory_Human_Servicescategory_Human_and_Civil_Rightscategory_Internationalcategory_Religioncategory_Research_and_Public_Polcomplexitycomplexity_2011conflict_of_interest_policy_v2donor_advisorydonor_advisory_2011_to_2016donor_advisory_2016org_idprogram_efficiencyratings_systemrecords_retention_policy_v2statetot_revtotal_revenue_loggedwhistleblower_policy_v2ncategoryrevsEINFYEindependent_directors_numindependent_directors_pctvoting_directorsoutsourced_mgt990_reviewaudit_committeeperm_rest_assetstemp_rest_assetsnet_assetsdonor_restrictionstax_exempt_bondtaxable_revenuetaxable_revenue_binarytaxable_revenue_neg_or_pos_rev_binaryaudited_financialsindependent_audited_fsfederal_grant_audit_performedfederal_grant_audit_requiredFeesForServicesAccounting_binary_mergeindependent_directors_pct_v2
05070901010202467FY20142014-123.001.001.0062.00Research and Public Policy000000000016.00nan1.0000059540.79CN 2.11.00MEnan16.381.00Research and Public Policy12967968.00010202467FY201421.000.9522.001.001.001.004433997.007351259.0023690097.000.501.000.000.000.0011111.00both0.95
\n", "
" ], "text/plain": [ " Col1 A2011_data A2016_data ein fye Form_990_FYE \\\n", "0 50709 0 1 010202467 FY2014 2014-12 \n", "\n", " SOX_policies SOX_policies_all_binary SOX_policies_binary age \\\n", "0 3.00 1.00 1.00 62.00 \n", "\n", " category category_Animals \\\n", "0 Research and Public Policy 0 \n", "\n", " category_Arts__Culture__Humaniti category_Community_Development \\\n", "0 0 0 \n", "\n", " category_Education category_Environment category_Health \\\n", "0 0 0 0 \n", "\n", " category_Human_Services category_Human_and_Civil_Rights \\\n", "0 0 0 \n", "\n", " category_International category_Religion \\\n", "0 0 0 \n", "\n", " category_Research_and_Public_Pol complexity complexity_2011 \\\n", "0 1 6.00 nan \n", "\n", " conflict_of_interest_policy_v2 donor_advisory \\\n", "0 1.00 0 \n", "\n", " donor_advisory_2011_to_2016 donor_advisory_2016 org_id \\\n", "0 0 0 5954 \n", "\n", " program_efficiency ratings_system records_retention_policy_v2 state \\\n", "0 0.79 CN 2.1 1.00 ME \n", "\n", " tot_rev total_revenue_logged whistleblower_policy_v2 \\\n", "0 nan 16.38 1.00 \n", "\n", " ncategory revs EIN FYE \\\n", "0 Research and Public Policy 12967968.00 010202467 FY2014 \n", "\n", " independent_directors_num independent_directors_pct voting_directors \\\n", "0 21.00 0.95 22.00 \n", "\n", " outsourced_mgt 990_review audit_committee perm_rest_assets \\\n", "0 1.00 1.00 1.00 4433997.00 \n", "\n", " temp_rest_assets net_assets donor_restrictions tax_exempt_bond \\\n", "0 7351259.00 23690097.00 0.50 1.00 \n", "\n", " taxable_revenue taxable_revenue_binary \\\n", "0 0.00 0.00 \n", "\n", " taxable_revenue_neg_or_pos_rev_binary audited_financials \\\n", "0 0.00 1 \n", "\n", " independent_audited_fs federal_grant_audit_performed \\\n", "0 1 1 \n", "\n", " federal_grant_audit_required FeesForServicesAccounting_binary _merge \\\n", "0 1 1.00 both \n", "\n", " independent_directors_pct_v2 \n", "0 0.95 " ] }, "execution_count": 439, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged[:1]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Save DF" ] }, { "cell_type": "code", "execution_count": 565, "metadata": { "collapsed": true }, "outputs": [], "source": [ "merged.to_pickle('model 4 data with e-file governance variables (n=8,238).pkl')" ] }, { "cell_type": "code", "execution_count": 566, "metadata": { "collapsed": true }, "outputs": [], "source": [ "merged.to_excel('model 4 data with e-file governance variables (n=8,238).xls')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 0 }