{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/gregorysaxton/Google Drive/SOX\n" ] } ], "source": [ "cd '/Users/gregorysaxton/Google Drive/SOX'" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from pandas import DataFrame\n", "from pandas import Series" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Set PANDAS to show all columns in DataFrame\n", "pd.set_option('display.max_columns', None)\n", "pd.set_option('max_colwidth',200)" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from __future__ import division" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "304\n", "85401\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
org_idEINorg_urlnamecategorycategory-fullDate PublishedForm 990 FYEForm 990 FYE, v2FYEEarliest Rating Publication Dateratings_systemOverall ScoreOverall Ratingadvisory text - current advisoryadvisory text - past advisorycurrent_or_past_donor_advisorycurrent_donor_advisorypast_donor_advisorylatest_entrycurrent_ratings_urlein_2016Publication_date_and_FY_2016Publication Date_2016FYE_2016donor_alert_2016overall_rating_2016efficiency_rating_rating_2016AT_rating_2016overall_rating_star_2016financial_rating_star_2016AT_rating_star_2016program_expense_percent_2016admin_expense_percent_2016fund_expense_percent_2016fund_efficiency_2016working_capital_ratio_2016program_expense_growth_2016liabilities_to_assets_2016independent_board_2016no_material_division_2016audited_financials_2016no_loans_related_2016documents_minutes_2016form_990_2016conflict_of_interest_policy_2016whistleblower_policy_2016records_retention_policy_2016CEO_listed_2016process_CEO_compensation_2016no_board_compensation_2016donor_privacy_policy_2016board_listed_2016audited_financials_web_2016form_990_web_2016staff_listed_2016contributions_gifts_grants_2016federated_campaigns_2016membership_dues_2016fundraising_events_2016related_organizations_2016government_grants_2016total_contributions_2016program_service_revenue_2016total_primary_revenue_2016other_revenue_2016total_revenue_2016program_expenses_2016administrative_expenses_2016fundraising_expenses_2016total_functional_expenses_2016payments_to_affiliates_2016excess_or_deficit_2016net_assets_2016comp_2016cp_2016mission_20162011_datacharity_name_2011category_2011city_2011state_2011cause_2011tag_line_2011url_2011ein_2011fye_2011overall_rating_2011overall_rating_2011_plus_30overall_rating_2011_plus_30_v2overall_rating_star_2011overall_rating_star_2011_textefficiency_rating_2011AT_rating_2011financial_rating_star_2011AT_rating_star_2011program_expense_percent_2011admin_expense_percent_2011fund_expense_percent_2011fund_efficiency_2011primary_revenue_growth_2011program_expense_growth_2011working_capital_ratio_2011independent_board_2011no_material_division_2011audited_financials_2011no_loans_related_2011documents_minutes_2011form_990_2011conflict_of_interest_policy_2011whistleblower_policy_2011records_retention_policy_2011CEO_listed_2011process_CEO_compensation_2011no_board_compensation_2011donor_privacy_policy_2011board_listed_2011audited_financials_web_2011form_990_web_2011staff_listed_2011primary_revenue_2011other_revenue_2011total_revenue_2011govt_revenue_2011program_expense_2011admin_expense_2011fund_expense_2011total_functional_expense_2011affiliate_payments_2011budget_surplus_2011net_assets_2011leader_comp_2011leader_comp_percent_2011email_2011website_20112016 Advisory - Date Posted2016 Advisory - Charity Name2016 Advisory - advisory_url2016 Advisory - advisory_merge_v1to_be_mergedNEW ROWNAME_2015_BMFSTREET_2015_BMFCITY_2015_BMFSTATE_2015_BMFZIP_2015_BMFRULING_2015_BMFACTIVITY_2015_BMFTAX_PERIOD_2015_BMFASSET_AMT_2015_BMFINCOME_AMT_2015_BMFREVENUE_AMT_2015_BMFNTEE_CD_2015_BMF2015 BMFruledate_2004_BMFname_MSTRALLstate_MSTRALLNTEE1_MSTRALLnteecc_MSTRALLzip_MSTRALLfips_MSTRALLtaxper_MSTRALLincome_MSTRALLF990REV_MSTRALLassets_MSTRALLruledate_MSTRALLdeductcd_MSTRALLaccper_MSTRALLrule_date_v1taxpdNAME_SOIyr_frmtnpt1_num_vtng_gvrn_bdy_memspt1_num_ind_vtng_memsnum_vtng_gvrn_bdy_memsnum_ind_vtng_memstot_num_emplstot_num_vlntrscontri_grnts_cyprog_srvc_rev_cyinvst_incm_cyoth_rev_cygrnts_and_smlr_amts_cytot_prof_fndrsng_exp_cytot_fndrsng_exp_cypt1_tot_asts_eoyaud_fincl_stmtsmtrl_divrsn_or_misusecnflct_int_plcywhistleblower_plcydoc_retention_plcyfederated_campaignsmemshp_duesrltd_orgsgovt_grntsall_oth_contrinncsh_contritot_contripsr_totinv_incm_tot_revbonds_tot_revroylrev_tot_revnet_rent_tot_revgain_or_loss_secgain_or_loss_othoth_rev_tottot_revmgmt_srvc_fee_totfee_for_srvc_leg_totfee_for_srvc_acct_totfee_for_srvc_lbby_totfee_for_srvc_prof_totfee_for_srvc_invst_totfee_for_srvc_oth_totfs_auditedaudit_committeevlntr_hrs_merge_v2rule_dateruledate_2004_BMF_v2ruledate_MSTRALL_v2yr_frmtn_v2agecategory_Animalscategory_Arts, Culture, Humanitiescategory_Community Developmentcategory_Educationcategory_Environmentcategory_Healthcategory_Human Servicescategory_Human and Civil Rightscategory_Internationalcategory_Religioncategory_Research and Public Policygovt_revenue_2011_binaryother_revenue_2011_binarycomplexity_2011advisorySOX_policies_2011total_revenue_2011_loggedtotal_revenuetotal_revenue_loggedprogram_efficiency_2016statetot_func_expns_prg_srvcstot_func_expns_tot_merge_v3program_expensestotal_expensesprogram_efficiencyfndrsng_events_merge_v4other_revenue_SOIcomplexity_2016complexity_SOIcomplexityconflict_of_interest_policywhistleblower_policyrecords_retention_policyconflict_of_interest_policy_v2records_retention_policy_v2whistleblower_policy_v2SOX_policiesSOX_policies_binary2016_dataAdvisory Textdonor_advisorydonor_advisory_2016donor_advisory_2011_to_2016SOX_policies_all_binarytotal_revenue_no_negEIN_47conflict_of_interest_policy_47records_retention_policy_47whistleblower_policy_47SOX_policies_47SOX_policies_all_binary_47SOX_policies_binary_47tot_rev_47total_revenue_logged_47program_expenses_47total_expenses_47program_efficiency_47complexity_47_merge_47OrganizationName_efileURL_efileSubmittedOn_efileTaxPeriod_efilewhistleblower_policy_efileconflict_of_interest_policy_efilerecords_retention_policy_efileSOX_policies_efileSOX_policies_binary_efileSOX_policies_all_binary_efiletot_rev_efiletot_rev_no_neg_efiletotal_revenue_logged_efileprogram_expenses_efiletotal_expenses_efileprogram_efficiency_efilecomplexity_efile_merge_efile
016722020503776http://www.charitynavigator.org/index.cfm?bay=search.summary&orgid=16722Portsmouth Girls Softball AssociationHuman ServicesHuman Services : Multipurpose Human Service Organizations2016-08-12 00:00:00current2015-01-01currentNaNcurrentNaNcurrent (2016) donor advisoryOn August 1, 2016, the New Hampshire Union Leader published an article titled, \"Former Portsmouth youth softball president accused of stealing thousands from nonprofit.\"NaN1.01.00.0Truehttp://www.charitynavigator.org/index.cfm?bay=search.summary&orgid=16722NaNNaNNaNcurrentcurrent donor advisory 2016NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNleft_only0.0NaNPORTSMOUTH GIRLS SOFTBALL ASSOCIATIONPO BOX 8092PORTSMOUTHNH03802-8092201104.00.0201309.00.00.00.0N631.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2011NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNleft_only2011NaNNaNnan5.00.00.00.00.00.00.01.00.00.00.00.0NaNNaNNaN1.0NaNNaNNaNNaNNaNNHNaNNaNleft_onlyNaNNaNNaNNaNleft_onlyNaN0.00.00.0NaNNaNNaNNaNNaNNaNNaNNaN1.0On August 1, 2016, the New Hampshire Union Leader published an article titled, \"Former Portsmouth youth softball president accused of stealing thousands from nonprofit.\"1.01.01.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNleft_onlyNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNleft_only
\n", "
" ], "text/plain": [ " org_id EIN \\\n", "0 16722 020503776 \n", "\n", " org_url \\\n", "0 http://www.charitynavigator.org/index.cfm?bay=search.summary&orgid=16722 \n", "\n", " name category \\\n", "0 Portsmouth Girls Softball Association Human Services \n", "\n", " category-full \\\n", "0 Human Services : Multipurpose Human Service Organizations \n", "\n", " Date Published Form 990 FYE Form 990 FYE, v2 FYE \\\n", "0 2016-08-12 00:00:00 current 2015-01-01 current \n", "\n", " Earliest Rating Publication Date ratings_system Overall Score \\\n", "0 NaN current NaN \n", "\n", " Overall Rating \\\n", "0 current (2016) donor advisory \n", "\n", " advisory text - current advisory \\\n", "0 On August 1, 2016, the New Hampshire Union Leader published an article titled, \"Former Portsmouth youth softball president accused of stealing thousands from nonprofit.\" \n", "\n", " advisory text - past advisory current_or_past_donor_advisory \\\n", "0 NaN 1.0 \n", "\n", " current_donor_advisory past_donor_advisory latest_entry \\\n", "0 1.0 0.0 True \n", "\n", " current_ratings_url \\\n", "0 http://www.charitynavigator.org/index.cfm?bay=search.summary&orgid=16722 \n", "\n", " ein_2016 Publication_date_and_FY_2016 Publication Date_2016 FYE_2016 \\\n", "0 NaN NaN NaN current \n", "\n", " donor_alert_2016 overall_rating_2016 \\\n", "0 current donor advisory 2016 NaN \n", "\n", " efficiency_rating_rating_2016 AT_rating_2016 overall_rating_star_2016 \\\n", "0 NaN NaN NaN \n", "\n", " financial_rating_star_2016 AT_rating_star_2016 program_expense_percent_2016 \\\n", "0 NaN NaN NaN \n", "\n", " admin_expense_percent_2016 fund_expense_percent_2016 fund_efficiency_2016 \\\n", "0 NaN NaN NaN \n", "\n", " working_capital_ratio_2016 program_expense_growth_2016 \\\n", "0 NaN NaN \n", "\n", " liabilities_to_assets_2016 independent_board_2016 no_material_division_2016 \\\n", "0 NaN NaN NaN \n", "\n", " audited_financials_2016 no_loans_related_2016 documents_minutes_2016 \\\n", "0 NaN NaN NaN \n", "\n", " form_990_2016 conflict_of_interest_policy_2016 whistleblower_policy_2016 \\\n", "0 NaN NaN NaN \n", "\n", " records_retention_policy_2016 CEO_listed_2016 process_CEO_compensation_2016 \\\n", "0 NaN NaN NaN \n", "\n", " no_board_compensation_2016 donor_privacy_policy_2016 board_listed_2016 \\\n", "0 NaN NaN NaN \n", "\n", " audited_financials_web_2016 form_990_web_2016 staff_listed_2016 \\\n", "0 NaN NaN NaN \n", "\n", " contributions_gifts_grants_2016 federated_campaigns_2016 \\\n", "0 NaN NaN \n", "\n", " membership_dues_2016 fundraising_events_2016 related_organizations_2016 \\\n", "0 NaN NaN NaN \n", "\n", " government_grants_2016 total_contributions_2016 \\\n", "0 NaN NaN \n", "\n", " program_service_revenue_2016 total_primary_revenue_2016 \\\n", "0 NaN NaN \n", "\n", " other_revenue_2016 total_revenue_2016 program_expenses_2016 \\\n", "0 NaN NaN NaN \n", "\n", " administrative_expenses_2016 fundraising_expenses_2016 \\\n", "0 NaN NaN \n", "\n", " total_functional_expenses_2016 payments_to_affiliates_2016 \\\n", "0 NaN NaN \n", "\n", " excess_or_deficit_2016 net_assets_2016 comp_2016 cp_2016 mission_2016 \\\n", "0 NaN NaN NaN NaN NaN \n", "\n", " 2011_data charity_name_2011 category_2011 city_2011 state_2011 cause_2011 \\\n", "0 0.0 NaN NaN NaN NaN NaN \n", "\n", " tag_line_2011 url_2011 ein_2011 fye_2011 overall_rating_2011 \\\n", "0 NaN NaN NaN NaN NaN \n", "\n", " overall_rating_2011_plus_30 overall_rating_2011_plus_30_v2 \\\n", "0 NaN NaN \n", "\n", " overall_rating_star_2011 overall_rating_star_2011_text \\\n", "0 NaN NaN \n", "\n", " efficiency_rating_2011 AT_rating_2011 financial_rating_star_2011 \\\n", "0 NaN NaN NaN \n", "\n", " AT_rating_star_2011 program_expense_percent_2011 \\\n", "0 NaN NaN \n", "\n", " admin_expense_percent_2011 fund_expense_percent_2011 \\\n", "0 NaN NaN \n", "\n", " fund_efficiency_2011 primary_revenue_growth_2011 \\\n", "0 NaN NaN \n", "\n", " program_expense_growth_2011 working_capital_ratio_2011 \\\n", "0 NaN NaN \n", "\n", " independent_board_2011 no_material_division_2011 audited_financials_2011 \\\n", "0 NaN NaN NaN \n", "\n", " no_loans_related_2011 documents_minutes_2011 form_990_2011 \\\n", "0 NaN NaN NaN \n", "\n", " conflict_of_interest_policy_2011 whistleblower_policy_2011 \\\n", "0 NaN NaN \n", "\n", " records_retention_policy_2011 CEO_listed_2011 process_CEO_compensation_2011 \\\n", "0 NaN NaN NaN \n", "\n", " no_board_compensation_2011 donor_privacy_policy_2011 board_listed_2011 \\\n", "0 NaN NaN NaN \n", "\n", " audited_financials_web_2011 form_990_web_2011 staff_listed_2011 \\\n", "0 NaN NaN NaN \n", "\n", " primary_revenue_2011 other_revenue_2011 total_revenue_2011 \\\n", "0 NaN NaN NaN \n", "\n", " govt_revenue_2011 program_expense_2011 admin_expense_2011 \\\n", "0 NaN NaN NaN \n", "\n", " fund_expense_2011 total_functional_expense_2011 affiliate_payments_2011 \\\n", "0 NaN NaN NaN \n", "\n", " budget_surplus_2011 net_assets_2011 leader_comp_2011 \\\n", "0 NaN NaN NaN \n", "\n", " leader_comp_percent_2011 email_2011 website_2011 \\\n", "0 NaN NaN NaN \n", "\n", " 2016 Advisory - Date Posted 2016 Advisory - Charity Name \\\n", "0 NaN NaN \n", "\n", " 2016 Advisory - advisory_url 2016 Advisory - advisory _merge_v1 \\\n", "0 NaN NaN left_only \n", "\n", " to_be_merged NEW ROW NAME_2015_BMF \\\n", "0 0.0 NaN PORTSMOUTH GIRLS SOFTBALL ASSOCIATION \n", "\n", " STREET_2015_BMF CITY_2015_BMF STATE_2015_BMF ZIP_2015_BMF RULING_2015_BMF \\\n", "0 PO BOX 8092 PORTSMOUTH NH 03802-8092 201104.0 \n", "\n", " ACTIVITY_2015_BMF TAX_PERIOD_2015_BMF ASSET_AMT_2015_BMF \\\n", "0 0.0 201309.0 0.0 \n", "\n", " INCOME_AMT_2015_BMF REVENUE_AMT_2015_BMF NTEE_CD_2015_BMF 2015 BMF \\\n", "0 0.0 0.0 N63 1.0 \n", "\n", " ruledate_2004_BMF name_MSTRALL state_MSTRALL NTEE1_MSTRALL nteecc_MSTRALL \\\n", "0 NaN NaN NaN NaN NaN \n", "\n", " zip_MSTRALL fips_MSTRALL taxper_MSTRALL income_MSTRALL F990REV_MSTRALL \\\n", "0 NaN NaN NaN NaN NaN \n", "\n", " assets_MSTRALL ruledate_MSTRALL deductcd_MSTRALL accper_MSTRALL \\\n", "0 NaN NaN NaN NaN \n", "\n", " rule_date_v1 taxpd NAME_SOI yr_frmtn pt1_num_vtng_gvrn_bdy_mems \\\n", "0 2011 NaN NaN NaN NaN \n", "\n", " pt1_num_ind_vtng_mems num_vtng_gvrn_bdy_mems num_ind_vtng_mems \\\n", "0 NaN NaN NaN \n", "\n", " tot_num_empls tot_num_vlntrs contri_grnts_cy prog_srvc_rev_cy \\\n", "0 NaN NaN NaN NaN \n", "\n", " invst_incm_cy oth_rev_cy grnts_and_smlr_amts_cy tot_prof_fndrsng_exp_cy \\\n", "0 NaN NaN NaN NaN \n", "\n", " tot_fndrsng_exp_cy pt1_tot_asts_eoy aud_fincl_stmts mtrl_divrsn_or_misuse \\\n", "0 NaN NaN NaN NaN \n", "\n", " cnflct_int_plcy whistleblower_plcy doc_retention_plcy federated_campaigns \\\n", "0 NaN NaN NaN NaN \n", "\n", " memshp_dues rltd_orgs govt_grnts all_oth_contri nncsh_contri \\\n", "0 NaN NaN NaN NaN NaN \n", "\n", " tot_contri psr_tot inv_incm_tot_rev bonds_tot_rev roylrev_tot_rev \\\n", "0 NaN NaN NaN NaN NaN \n", "\n", " net_rent_tot_rev gain_or_loss_sec gain_or_loss_oth oth_rev_tot tot_rev \\\n", "0 NaN NaN NaN NaN NaN \n", "\n", " mgmt_srvc_fee_tot fee_for_srvc_leg_tot fee_for_srvc_acct_tot \\\n", "0 NaN NaN NaN \n", "\n", " fee_for_srvc_lbby_tot fee_for_srvc_prof_tot fee_for_srvc_invst_tot \\\n", "0 NaN NaN NaN \n", "\n", " fee_for_srvc_oth_tot fs_audited audit_committee vlntr_hrs _merge_v2 \\\n", "0 NaN NaN NaN NaN left_only \n", "\n", " rule_date ruledate_2004_BMF_v2 ruledate_MSTRALL_v2 yr_frmtn_v2 age \\\n", "0 2011 NaN NaN nan 5.0 \n", "\n", " category_Animals category_Arts, Culture, Humanities \\\n", "0 0.0 0.0 \n", "\n", " category_Community Development category_Education category_Environment \\\n", "0 0.0 0.0 0.0 \n", "\n", " category_Health category_Human Services category_Human and Civil Rights \\\n", "0 0.0 1.0 0.0 \n", "\n", " category_International category_Religion \\\n", "0 0.0 0.0 \n", "\n", " category_Research and Public Policy govt_revenue_2011_binary \\\n", "0 0.0 NaN \n", "\n", " other_revenue_2011_binary complexity_2011 advisory SOX_policies_2011 \\\n", "0 NaN NaN 1.0 NaN \n", "\n", " total_revenue_2011_logged total_revenue total_revenue_logged \\\n", "0 NaN NaN NaN \n", "\n", " program_efficiency_2016 state tot_func_expns_prg_srvcs \\\n", "0 NaN NH NaN \n", "\n", " tot_func_expns_tot _merge_v3 program_expenses total_expenses \\\n", "0 NaN left_only NaN NaN \n", "\n", " program_efficiency fndrsng_events _merge_v4 other_revenue_SOI \\\n", "0 NaN NaN left_only NaN \n", "\n", " complexity_2016 complexity_SOI complexity conflict_of_interest_policy \\\n", "0 0.0 0.0 0.0 NaN \n", "\n", " whistleblower_policy records_retention_policy \\\n", "0 NaN NaN \n", "\n", " conflict_of_interest_policy_v2 records_retention_policy_v2 \\\n", "0 NaN NaN \n", "\n", " whistleblower_policy_v2 SOX_policies SOX_policies_binary 2016_data \\\n", "0 NaN NaN NaN 1.0 \n", "\n", " Advisory Text \\\n", "0 On August 1, 2016, the New Hampshire Union Leader published an article titled, \"Former Portsmouth youth softball president accused of stealing thousands from nonprofit.\" \n", "\n", " donor_advisory donor_advisory_2016 donor_advisory_2011_to_2016 \\\n", "0 1.0 1.0 1.0 \n", "\n", " SOX_policies_all_binary total_revenue_no_neg EIN_47 \\\n", "0 NaN NaN NaN \n", "\n", " conflict_of_interest_policy_47 records_retention_policy_47 \\\n", "0 NaN NaN \n", "\n", " whistleblower_policy_47 SOX_policies_47 SOX_policies_all_binary_47 \\\n", "0 NaN NaN NaN \n", "\n", " SOX_policies_binary_47 tot_rev_47 total_revenue_logged_47 \\\n", "0 NaN NaN NaN \n", "\n", " program_expenses_47 total_expenses_47 program_efficiency_47 \\\n", "0 NaN NaN NaN \n", "\n", " complexity_47 _merge_47 OrganizationName_efile URL_efile \\\n", "0 NaN left_only NaN NaN \n", "\n", " SubmittedOn_efile TaxPeriod_efile whistleblower_policy_efile \\\n", "0 NaN NaN NaN \n", "\n", " conflict_of_interest_policy_efile records_retention_policy_efile \\\n", "0 NaN NaN \n", "\n", " SOX_policies_efile SOX_policies_binary_efile \\\n", "0 NaN NaN \n", "\n", " SOX_policies_all_binary_efile tot_rev_efile tot_rev_no_neg_efile \\\n", "0 NaN NaN NaN \n", "\n", " total_revenue_logged_efile program_expenses_efile total_expenses_efile \\\n", "0 NaN NaN NaN \n", "\n", " program_efficiency_efile complexity_efile _merge_efile \n", "0 NaN NaN left_only " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_pickle('Merged CN dataset with Age, State, Category, Total Revenues, Efficiency, Complexity, SOX, Donor Advisory (with added 990 data).pkl')\n", "print len(df.columns)\n", "print len(df)\n", "df.head(1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "'fund_expense_percent_2016', 'program_expense_growth_2016', 'independent_board_2016', 'audited_financials_2016',\n", "'no_loans_related_2016', 'contributions_gifts_grants_2016', 'total_contributions_2016',\n", "'fund_expense_percent_2011', 'fund_expense_2011', 'primary_revenue_growth_2011', 'program_expense_growth_2011',\n", "'independent_board_2011', 'audited_financials_2011', 'no_loans_related_2011',\n", "'pt1_num_vtng_gvrn_bdy_mems', 'pt1_num_ind_vtng_mems', 'num_vtng_gvrn_bdy_mems', 'num_ind_vtng_mems',\n", "'contri_grnts_cy', 'tot_fndrsng_exp_cy', 'aud_fincl_stmts', 'fs_audited', 'audit_committee'," ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "85401\n", "85401 \n", "\n", "FY2014 15155\n", "FY2013 8671\n", "FY2009 7865\n", "FY2012 7792\n", "FY2010 7145\n", "FY2011 6879\n", "FY2008 4770\n", "FY2007 4441\n", "FY2006 4315\n", "FY2005 4117\n", "FY2004 3887\n", "FY2003 3178\n", "FY2015 2386\n", "FY2002 2190\n", "FY2001 1698\n", "FY2000 591\n", "current 321\n", "Name: FYE, dtype: int64\n" ] } ], "source": [ "print len(df)\n", "print df['FYE'].value_counts().sum(), '\\n'\n", "print df['FYE'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Limit DF to 2016" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "False 75593\n", "True 8304\n", "False 48\n", "Name: latest_entry, dtype: int64" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['latest_entry'].value_counts()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "8304" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df[df['latest_entry']=='True']\n", "len(df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Merge in data for missing orgs" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "152\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EINFYEaudit_committeeboard_sizeindependent_membersaudited_financialsfundraising_explow_quality_reporting
0030498214FY20120.06601401830
1042129889FY20141.02319101
\n", "
" ], "text/plain": [ " EIN FYE audit_committee board_size independent_members \\\n", "0 030498214 FY2012 0.0 6 6 \n", "1 042129889 FY2014 1.0 23 19 \n", "\n", " audited_financials fundraising_exp low_quality_reporting \n", "0 0 140183 0 \n", "1 1 0 1 " ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_advisories = pd.read_pickle('Additional Variables for Robustness Tests - 152 advisory orgs.pkl')\n", "print len(df_advisories)\n", "df_advisories[:2]" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8304\n", "8304\n", "left_only 8152\n", "both 152\n", "right_only 0\n", "Name: _merge, dtype: int64\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
org_idEINorg_urlnamecategorycategory-fullDate PublishedForm 990 FYEForm 990 FYE, v2FYE_xEarliest Rating Publication Dateratings_systemOverall ScoreOverall Ratingadvisory text - current advisoryadvisory text - past advisorycurrent_or_past_donor_advisorycurrent_donor_advisorypast_donor_advisorylatest_entrycurrent_ratings_urlein_2016Publication_date_and_FY_2016Publication Date_2016FYE_2016donor_alert_2016overall_rating_2016efficiency_rating_rating_2016AT_rating_2016overall_rating_star_2016financial_rating_star_2016AT_rating_star_2016program_expense_percent_2016admin_expense_percent_2016fund_expense_percent_2016fund_efficiency_2016working_capital_ratio_2016program_expense_growth_2016liabilities_to_assets_2016independent_board_2016no_material_division_2016audited_financials_2016no_loans_related_2016documents_minutes_2016form_990_2016conflict_of_interest_policy_2016whistleblower_policy_2016records_retention_policy_2016CEO_listed_2016process_CEO_compensation_2016no_board_compensation_2016donor_privacy_policy_2016board_listed_2016audited_financials_web_2016form_990_web_2016staff_listed_2016contributions_gifts_grants_2016federated_campaigns_2016membership_dues_2016fundraising_events_2016related_organizations_2016government_grants_2016total_contributions_2016program_service_revenue_2016total_primary_revenue_2016other_revenue_2016total_revenue_2016program_expenses_2016administrative_expenses_2016fundraising_expenses_2016total_functional_expenses_2016payments_to_affiliates_2016excess_or_deficit_2016net_assets_2016comp_2016cp_2016mission_20162011_datacharity_name_2011category_2011city_2011state_2011cause_2011tag_line_2011url_2011ein_2011fye_2011overall_rating_2011overall_rating_2011_plus_30overall_rating_2011_plus_30_v2overall_rating_star_2011overall_rating_star_2011_textefficiency_rating_2011AT_rating_2011financial_rating_star_2011AT_rating_star_2011program_expense_percent_2011admin_expense_percent_2011fund_expense_percent_2011fund_efficiency_2011primary_revenue_growth_2011program_expense_growth_2011working_capital_ratio_2011independent_board_2011no_material_division_2011audited_financials_2011no_loans_related_2011documents_minutes_2011form_990_2011conflict_of_interest_policy_2011whistleblower_policy_2011records_retention_policy_2011CEO_listed_2011process_CEO_compensation_2011no_board_compensation_2011donor_privacy_policy_2011board_listed_2011audited_financials_web_2011form_990_web_2011staff_listed_2011primary_revenue_2011other_revenue_2011total_revenue_2011govt_revenue_2011program_expense_2011admin_expense_2011fund_expense_2011total_functional_expense_2011affiliate_payments_2011budget_surplus_2011net_assets_2011leader_comp_2011leader_comp_percent_2011email_2011website_20112016 Advisory - Date Posted2016 Advisory - Charity Name2016 Advisory - advisory_url2016 Advisory - advisory_merge_v1to_be_mergedNEW ROWNAME_2015_BMFSTREET_2015_BMFCITY_2015_BMFSTATE_2015_BMFZIP_2015_BMFRULING_2015_BMFACTIVITY_2015_BMFTAX_PERIOD_2015_BMFASSET_AMT_2015_BMFINCOME_AMT_2015_BMFREVENUE_AMT_2015_BMFNTEE_CD_2015_BMF2015 BMFruledate_2004_BMFname_MSTRALLstate_MSTRALLNTEE1_MSTRALLnteecc_MSTRALLzip_MSTRALLfips_MSTRALLtaxper_MSTRALLincome_MSTRALLF990REV_MSTRALLassets_MSTRALLruledate_MSTRALLdeductcd_MSTRALLaccper_MSTRALLrule_date_v1taxpdNAME_SOIyr_frmtnpt1_num_vtng_gvrn_bdy_memspt1_num_ind_vtng_memsnum_vtng_gvrn_bdy_memsnum_ind_vtng_memstot_num_emplstot_num_vlntrscontri_grnts_cyprog_srvc_rev_cyinvst_incm_cyoth_rev_cygrnts_and_smlr_amts_cytot_prof_fndrsng_exp_cytot_fndrsng_exp_cypt1_tot_asts_eoyaud_fincl_stmtsmtrl_divrsn_or_misusecnflct_int_plcywhistleblower_plcydoc_retention_plcyfederated_campaignsmemshp_duesrltd_orgsgovt_grntsall_oth_contrinncsh_contritot_contripsr_totinv_incm_tot_revbonds_tot_revroylrev_tot_revnet_rent_tot_revgain_or_loss_secgain_or_loss_othoth_rev_tottot_revmgmt_srvc_fee_totfee_for_srvc_leg_totfee_for_srvc_acct_totfee_for_srvc_lbby_totfee_for_srvc_prof_totfee_for_srvc_invst_totfee_for_srvc_oth_totfs_auditedaudit_committee_xvlntr_hrs_merge_v2rule_dateruledate_2004_BMF_v2ruledate_MSTRALL_v2yr_frmtn_v2agecategory_Animalscategory_Arts, Culture, Humanitiescategory_Community Developmentcategory_Educationcategory_Environmentcategory_Healthcategory_Human Servicescategory_Human and Civil Rightscategory_Internationalcategory_Religioncategory_Research and Public Policygovt_revenue_2011_binaryother_revenue_2011_binarycomplexity_2011advisorySOX_policies_2011total_revenue_2011_loggedtotal_revenuetotal_revenue_loggedprogram_efficiency_2016statetot_func_expns_prg_srvcstot_func_expns_tot_merge_v3program_expensestotal_expensesprogram_efficiencyfndrsng_events_merge_v4other_revenue_SOIcomplexity_2016complexity_SOIcomplexityconflict_of_interest_policywhistleblower_policyrecords_retention_policyconflict_of_interest_policy_v2records_retention_policy_v2whistleblower_policy_v2SOX_policiesSOX_policies_binary2016_dataAdvisory Textdonor_advisorydonor_advisory_2016donor_advisory_2011_to_2016SOX_policies_all_binarytotal_revenue_no_negEIN_47conflict_of_interest_policy_47records_retention_policy_47whistleblower_policy_47SOX_policies_47SOX_policies_all_binary_47SOX_policies_binary_47tot_rev_47total_revenue_logged_47program_expenses_47total_expenses_47program_efficiency_47complexity_47_merge_47OrganizationName_efileURL_efileSubmittedOn_efileTaxPeriod_efilewhistleblower_policy_efileconflict_of_interest_policy_efilerecords_retention_policy_efileSOX_policies_efileSOX_policies_binary_efileSOX_policies_all_binary_efiletot_rev_efiletot_rev_no_neg_efiletotal_revenue_logged_efileprogram_expenses_efiletotal_expenses_efileprogram_efficiency_efilecomplexity_efile_merge_efileFYE_yaudit_committee_yboard_sizeindependent_membersaudited_financialsfundraising_explow_quality_reporting_merge
016722020503776http://www.charitynavigator.org/index.cfm?bay=search.summary&orgid=16722Portsmouth Girls Softball AssociationHuman ServicesHuman Services : Multipurpose Human Service Organizations2016-08-12 00:00:00current2015-01-01currentNaNcurrentNaNcurrent (2016) donor advisoryOn August 1, 2016, the New Hampshire Union Leader published an article titled, \"Former Portsmouth youth softball president accused of stealing thousands from nonprofit.\"NaN1.01.00.0Truehttp://www.charitynavigator.org/index.cfm?bay=search.summary&orgid=16722NaNNaNNaNcurrentcurrent donor advisory 2016NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNleft_only0.0NaNPORTSMOUTH GIRLS SOFTBALL ASSOCIATIONPO BOX 8092PORTSMOUTHNH03802-8092201104.00.0201309.00.00.00.0N631.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2011NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNleft_only2011NaNNaNnan5.00.00.00.00.00.00.01.00.00.00.00.0NaNNaNNaN1.0NaNNaNNaNNaNNaNNHNaNNaNleft_onlyNaNNaNNaNNaNleft_onlyNaN0.00.00.0NaNNaNNaNNaNNaNNaNNaNNaN1.0On August 1, 2016, the New Hampshire Union Leader published an article titled, \"Former Portsmouth youth softball president accused of stealing thousands from nonprofit.\"1.01.01.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNleft_onlyNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNleft_onlyNaNNaNNaNNaNNaNNaNNaNleft_only
\n", "
" ], "text/plain": [ " org_id EIN \\\n", "0 16722 020503776 \n", "\n", " org_url \\\n", "0 http://www.charitynavigator.org/index.cfm?bay=search.summary&orgid=16722 \n", "\n", " name category \\\n", "0 Portsmouth Girls Softball Association Human Services \n", "\n", " category-full \\\n", "0 Human Services : Multipurpose Human Service Organizations \n", "\n", " Date Published Form 990 FYE Form 990 FYE, v2 FYE_x \\\n", "0 2016-08-12 00:00:00 current 2015-01-01 current \n", "\n", " Earliest Rating Publication Date ratings_system Overall Score \\\n", "0 NaN current NaN \n", "\n", " Overall Rating \\\n", "0 current (2016) donor advisory \n", "\n", " advisory text - current advisory \\\n", "0 On August 1, 2016, the New Hampshire Union Leader published an article titled, \"Former Portsmouth youth softball president accused of stealing thousands from nonprofit.\" \n", "\n", " advisory text - past advisory current_or_past_donor_advisory \\\n", "0 NaN 1.0 \n", "\n", " current_donor_advisory past_donor_advisory latest_entry \\\n", "0 1.0 0.0 True \n", "\n", " current_ratings_url \\\n", "0 http://www.charitynavigator.org/index.cfm?bay=search.summary&orgid=16722 \n", "\n", " ein_2016 Publication_date_and_FY_2016 Publication Date_2016 FYE_2016 \\\n", "0 NaN NaN NaN current \n", "\n", " donor_alert_2016 overall_rating_2016 \\\n", "0 current donor advisory 2016 NaN \n", "\n", " efficiency_rating_rating_2016 AT_rating_2016 overall_rating_star_2016 \\\n", "0 NaN NaN NaN \n", "\n", " financial_rating_star_2016 AT_rating_star_2016 program_expense_percent_2016 \\\n", "0 NaN NaN NaN \n", "\n", " admin_expense_percent_2016 fund_expense_percent_2016 fund_efficiency_2016 \\\n", "0 NaN NaN NaN \n", "\n", " working_capital_ratio_2016 program_expense_growth_2016 \\\n", "0 NaN NaN \n", "\n", " liabilities_to_assets_2016 independent_board_2016 no_material_division_2016 \\\n", "0 NaN NaN NaN \n", "\n", " audited_financials_2016 no_loans_related_2016 documents_minutes_2016 \\\n", "0 NaN NaN NaN \n", "\n", " form_990_2016 conflict_of_interest_policy_2016 whistleblower_policy_2016 \\\n", "0 NaN NaN NaN \n", "\n", " records_retention_policy_2016 CEO_listed_2016 process_CEO_compensation_2016 \\\n", "0 NaN NaN NaN \n", "\n", " no_board_compensation_2016 donor_privacy_policy_2016 board_listed_2016 \\\n", "0 NaN NaN NaN \n", "\n", " audited_financials_web_2016 form_990_web_2016 staff_listed_2016 \\\n", "0 NaN NaN NaN \n", "\n", " contributions_gifts_grants_2016 federated_campaigns_2016 \\\n", "0 NaN NaN \n", "\n", " membership_dues_2016 fundraising_events_2016 related_organizations_2016 \\\n", "0 NaN NaN NaN \n", "\n", " government_grants_2016 total_contributions_2016 \\\n", "0 NaN NaN \n", "\n", " program_service_revenue_2016 total_primary_revenue_2016 \\\n", "0 NaN NaN \n", "\n", " other_revenue_2016 total_revenue_2016 program_expenses_2016 \\\n", "0 NaN NaN NaN \n", "\n", " administrative_expenses_2016 fundraising_expenses_2016 \\\n", "0 NaN NaN \n", "\n", " total_functional_expenses_2016 payments_to_affiliates_2016 \\\n", "0 NaN NaN \n", "\n", " excess_or_deficit_2016 net_assets_2016 comp_2016 cp_2016 mission_2016 \\\n", "0 NaN NaN NaN NaN NaN \n", "\n", " 2011_data charity_name_2011 category_2011 city_2011 state_2011 cause_2011 \\\n", "0 0.0 NaN NaN NaN NaN NaN \n", "\n", " tag_line_2011 url_2011 ein_2011 fye_2011 overall_rating_2011 \\\n", "0 NaN NaN NaN NaN NaN \n", "\n", " overall_rating_2011_plus_30 overall_rating_2011_plus_30_v2 \\\n", "0 NaN NaN \n", "\n", " overall_rating_star_2011 overall_rating_star_2011_text \\\n", "0 NaN NaN \n", "\n", " efficiency_rating_2011 AT_rating_2011 financial_rating_star_2011 \\\n", "0 NaN NaN NaN \n", "\n", " AT_rating_star_2011 program_expense_percent_2011 \\\n", "0 NaN NaN \n", "\n", " admin_expense_percent_2011 fund_expense_percent_2011 \\\n", "0 NaN NaN \n", "\n", " fund_efficiency_2011 primary_revenue_growth_2011 \\\n", "0 NaN NaN \n", "\n", " program_expense_growth_2011 working_capital_ratio_2011 \\\n", "0 NaN NaN \n", "\n", " independent_board_2011 no_material_division_2011 audited_financials_2011 \\\n", "0 NaN NaN NaN \n", "\n", " no_loans_related_2011 documents_minutes_2011 form_990_2011 \\\n", "0 NaN NaN NaN \n", "\n", " conflict_of_interest_policy_2011 whistleblower_policy_2011 \\\n", "0 NaN NaN \n", "\n", " records_retention_policy_2011 CEO_listed_2011 process_CEO_compensation_2011 \\\n", "0 NaN NaN NaN \n", "\n", " no_board_compensation_2011 donor_privacy_policy_2011 board_listed_2011 \\\n", "0 NaN NaN NaN \n", "\n", " audited_financials_web_2011 form_990_web_2011 staff_listed_2011 \\\n", "0 NaN NaN NaN \n", "\n", " primary_revenue_2011 other_revenue_2011 total_revenue_2011 \\\n", "0 NaN NaN NaN \n", "\n", " govt_revenue_2011 program_expense_2011 admin_expense_2011 \\\n", "0 NaN NaN NaN \n", "\n", " fund_expense_2011 total_functional_expense_2011 affiliate_payments_2011 \\\n", "0 NaN NaN NaN \n", "\n", " budget_surplus_2011 net_assets_2011 leader_comp_2011 \\\n", "0 NaN NaN NaN \n", "\n", " leader_comp_percent_2011 email_2011 website_2011 \\\n", "0 NaN NaN NaN \n", "\n", " 2016 Advisory - Date Posted 2016 Advisory - Charity Name \\\n", "0 NaN NaN \n", "\n", " 2016 Advisory - advisory_url 2016 Advisory - advisory _merge_v1 \\\n", "0 NaN NaN left_only \n", "\n", " to_be_merged NEW ROW NAME_2015_BMF \\\n", "0 0.0 NaN PORTSMOUTH GIRLS SOFTBALL ASSOCIATION \n", "\n", " STREET_2015_BMF CITY_2015_BMF STATE_2015_BMF ZIP_2015_BMF RULING_2015_BMF \\\n", "0 PO BOX 8092 PORTSMOUTH NH 03802-8092 201104.0 \n", "\n", " ACTIVITY_2015_BMF TAX_PERIOD_2015_BMF ASSET_AMT_2015_BMF \\\n", "0 0.0 201309.0 0.0 \n", "\n", " INCOME_AMT_2015_BMF REVENUE_AMT_2015_BMF NTEE_CD_2015_BMF 2015 BMF \\\n", "0 0.0 0.0 N63 1.0 \n", "\n", " ruledate_2004_BMF name_MSTRALL state_MSTRALL NTEE1_MSTRALL nteecc_MSTRALL \\\n", "0 NaN NaN NaN NaN NaN \n", "\n", " zip_MSTRALL fips_MSTRALL taxper_MSTRALL income_MSTRALL F990REV_MSTRALL \\\n", "0 NaN NaN NaN NaN NaN \n", "\n", " assets_MSTRALL ruledate_MSTRALL deductcd_MSTRALL accper_MSTRALL \\\n", "0 NaN NaN NaN NaN \n", "\n", " rule_date_v1 taxpd NAME_SOI yr_frmtn pt1_num_vtng_gvrn_bdy_mems \\\n", "0 2011 NaN NaN NaN NaN \n", "\n", " pt1_num_ind_vtng_mems num_vtng_gvrn_bdy_mems num_ind_vtng_mems \\\n", "0 NaN NaN NaN \n", "\n", " tot_num_empls tot_num_vlntrs contri_grnts_cy prog_srvc_rev_cy \\\n", "0 NaN NaN NaN NaN \n", "\n", " invst_incm_cy oth_rev_cy grnts_and_smlr_amts_cy tot_prof_fndrsng_exp_cy \\\n", "0 NaN NaN NaN NaN \n", "\n", " tot_fndrsng_exp_cy pt1_tot_asts_eoy aud_fincl_stmts mtrl_divrsn_or_misuse \\\n", "0 NaN NaN NaN NaN \n", "\n", " cnflct_int_plcy whistleblower_plcy doc_retention_plcy federated_campaigns \\\n", "0 NaN NaN NaN NaN \n", "\n", " memshp_dues rltd_orgs govt_grnts all_oth_contri nncsh_contri \\\n", "0 NaN NaN NaN NaN NaN \n", "\n", " tot_contri psr_tot inv_incm_tot_rev bonds_tot_rev roylrev_tot_rev \\\n", "0 NaN NaN NaN NaN NaN \n", "\n", " net_rent_tot_rev gain_or_loss_sec gain_or_loss_oth oth_rev_tot tot_rev \\\n", "0 NaN NaN NaN NaN NaN \n", "\n", " mgmt_srvc_fee_tot fee_for_srvc_leg_tot fee_for_srvc_acct_tot \\\n", "0 NaN NaN NaN \n", "\n", " fee_for_srvc_lbby_tot fee_for_srvc_prof_tot fee_for_srvc_invst_tot \\\n", "0 NaN NaN NaN \n", "\n", " fee_for_srvc_oth_tot fs_audited audit_committee_x vlntr_hrs _merge_v2 \\\n", "0 NaN NaN NaN NaN left_only \n", "\n", " rule_date ruledate_2004_BMF_v2 ruledate_MSTRALL_v2 yr_frmtn_v2 age \\\n", "0 2011 NaN NaN nan 5.0 \n", "\n", " category_Animals category_Arts, Culture, Humanities \\\n", "0 0.0 0.0 \n", "\n", " category_Community Development category_Education category_Environment \\\n", "0 0.0 0.0 0.0 \n", "\n", " category_Health category_Human Services category_Human and Civil Rights \\\n", "0 0.0 1.0 0.0 \n", "\n", " category_International category_Religion \\\n", "0 0.0 0.0 \n", "\n", " category_Research and Public Policy govt_revenue_2011_binary \\\n", "0 0.0 NaN \n", "\n", " other_revenue_2011_binary complexity_2011 advisory SOX_policies_2011 \\\n", "0 NaN NaN 1.0 NaN \n", "\n", " total_revenue_2011_logged total_revenue total_revenue_logged \\\n", "0 NaN NaN NaN \n", "\n", " program_efficiency_2016 state tot_func_expns_prg_srvcs \\\n", "0 NaN NH NaN \n", "\n", " tot_func_expns_tot _merge_v3 program_expenses total_expenses \\\n", "0 NaN left_only NaN NaN \n", "\n", " program_efficiency fndrsng_events _merge_v4 other_revenue_SOI \\\n", "0 NaN NaN left_only NaN \n", "\n", " complexity_2016 complexity_SOI complexity conflict_of_interest_policy \\\n", "0 0.0 0.0 0.0 NaN \n", "\n", " whistleblower_policy records_retention_policy \\\n", "0 NaN NaN \n", "\n", " conflict_of_interest_policy_v2 records_retention_policy_v2 \\\n", "0 NaN NaN \n", "\n", " whistleblower_policy_v2 SOX_policies SOX_policies_binary 2016_data \\\n", "0 NaN NaN NaN 1.0 \n", "\n", " Advisory Text \\\n", "0 On August 1, 2016, the New Hampshire Union Leader published an article titled, \"Former Portsmouth youth softball president accused of stealing thousands from nonprofit.\" \n", "\n", " donor_advisory donor_advisory_2016 donor_advisory_2011_to_2016 \\\n", "0 1.0 1.0 1.0 \n", "\n", " SOX_policies_all_binary total_revenue_no_neg EIN_47 \\\n", "0 NaN NaN NaN \n", "\n", " conflict_of_interest_policy_47 records_retention_policy_47 \\\n", "0 NaN NaN \n", "\n", " whistleblower_policy_47 SOX_policies_47 SOX_policies_all_binary_47 \\\n", "0 NaN NaN NaN \n", "\n", " SOX_policies_binary_47 tot_rev_47 total_revenue_logged_47 \\\n", "0 NaN NaN NaN \n", "\n", " program_expenses_47 total_expenses_47 program_efficiency_47 \\\n", "0 NaN NaN NaN \n", "\n", " complexity_47 _merge_47 OrganizationName_efile URL_efile \\\n", "0 NaN left_only NaN NaN \n", "\n", " SubmittedOn_efile TaxPeriod_efile whistleblower_policy_efile \\\n", "0 NaN NaN NaN \n", "\n", " conflict_of_interest_policy_efile records_retention_policy_efile \\\n", "0 NaN NaN \n", "\n", " SOX_policies_efile SOX_policies_binary_efile \\\n", "0 NaN NaN \n", "\n", " SOX_policies_all_binary_efile tot_rev_efile tot_rev_no_neg_efile \\\n", "0 NaN NaN NaN \n", "\n", " total_revenue_logged_efile program_expenses_efile total_expenses_efile \\\n", "0 NaN NaN NaN \n", "\n", " program_efficiency_efile complexity_efile _merge_efile FYE_y \\\n", "0 NaN NaN left_only NaN \n", "\n", " audit_committee_y board_size independent_members audited_financials \\\n", "0 NaN NaN NaN NaN \n", "\n", " fundraising_exp low_quality_reporting _merge \n", "0 NaN NaN left_only " ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print len(pd.merge(df, df_advisories, left_on='EIN', right_on='EIN', how='left'))\n", "merged = pd.merge(df, df_advisories, left_on='EIN', right_on='EIN', how='left', indicator=True)\n", "print len(merged)\n", "print merged['_merge'].value_counts()\n", "merged[:1]" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
donor_advisory_20160.01.0
_merge
left_only7983169
right_only00
both0152
\n", "
" ], "text/plain": [ "donor_advisory_2016 0.0 1.0\n", "_merge \n", "left_only 7983 169\n", "right_only 0 0\n", "both 0 152" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.crosstab(merged['_merge'], merged['donor_advisory_2016'])" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": true }, "outputs": [], "source": [ "columns = ['org_id', 'EIN', 'FYE_x', \n", " 'fund_expense_percent_2016', 'independent_board_2016', #'program_expense_growth_2016', \n", " 'audited_financials_2016', #'no_loans_related_2016', 'contributions_gifts_grants_2016', \n", " #'total_contributions_2016', #'fund_expense_percent_2011', 'fund_expense_2011', \n", " #'primary_revenue_growth_2011', 'program_expense_growth_2011', 'independent_board_2011', \n", " #'audited_financials_2011', 'no_loans_related_2011', \n", " 'pt1_num_vtng_gvrn_bdy_mems', 'pt1_num_ind_vtng_mems', \n", " 'num_vtng_gvrn_bdy_mems', 'num_ind_vtng_mems', 'contri_grnts_cy', 'tot_fndrsng_exp_cy', 'aud_fincl_stmts', \n", " 'fs_audited', 'audit_committee_x', 'FYE_y', 'audit_committee_y', 'board_size', \n", " 'independent_members', 'audited_financials', 'fundraising_exp', 'low_quality_reporting']" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "ename": "NameError", "evalue": "name 'merged' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmerged\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m115\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m125\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mNameError\u001b[0m: name 'merged' is not defined" ] } ], "source": [ "merged[columns][115:125]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Independent Board" ] }, { "cell_type": "code", "execution_count": 55, "metadata": { "collapsed": true }, "outputs": [], "source": [ "merged['independent_members'] = merged['independent_members'].astype('float')\n", "merged['board_size'] = merged['board_size'].astype('float')" ] }, { "cell_type": "code", "execution_count": 158, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8152\n", "152\n" ] } ], "source": [ "print len(merged[merged['independent_members'].isnull()])\n", "print len(merged[merged['independent_members'].notnull()])" ] }, { "cell_type": "code", "execution_count": 159, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1.0 83\n", "0.0 69\n", "Name: independent_board, dtype: int64" ] }, "execution_count": 159, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged['independent_board'] = np.nan\n", "merged['independent_board'] = np.where(( (merged['independent_members'].notnull()) \n", " & (merged['independent_members']>4)\n", " & (merged['independent_members']>(merged['board_size']/2))), \n", " 1, 0 ) #merged['independent_board'])\n", "merged['independent_board'] = np.where(merged['independent_members'].isnull(), np.nan, merged['independent_board']) \n", "merged['independent_board'].value_counts()" ] }, { "cell_type": "code", "execution_count": 61, "metadata": { "collapsed": false }, "outputs": [], "source": [ "columns = ['org_id', 'EIN', 'FYE_x', 'fund_expense_percent_2016', 'independent_board_2016', \n", " 'board_size', 'independent_members', 'independent_board',\n", " 'audited_financials_2016', 'pt1_num_vtng_gvrn_bdy_mems', 'pt1_num_ind_vtng_mems', \n", " 'num_vtng_gvrn_bdy_mems', 'num_ind_vtng_mems', 'contri_grnts_cy', 'tot_fndrsng_exp_cy', \n", " 'aud_fincl_stmts', 'fs_audited', 'audit_committee_x', 'FYE_y', 'audit_committee_y', 'audited_financials', \n", " 'fundraising_exp', 'low_quality_reporting']" ] }, { "cell_type": "code", "execution_count": 67, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
org_idEINFYE_xfund_expense_percent_2016independent_board_2016board_sizeindependent_membersindependent_boardaudited_financials_2016pt1_num_vtng_gvrn_bdy_memspt1_num_ind_vtng_memsnum_vtng_gvrn_bdy_memsnum_ind_vtng_memscontri_grnts_cytot_fndrsng_exp_cyaud_fincl_stmtsfs_auditedaudit_committee_xFYE_yaudit_committee_yaudited_financialsfundraising_explow_quality_reporting
57984292941347046currentNaNNaN22.019.01NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNFY20141.00.031461370.0
588716234451258469currentNaNNaN4.02.00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNFY2013NaN0.001.0
591716254710987457currentNaNNaN1.00.00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNFY2014NaN0.001.0
595415512450579664currentNaNNaN2.02.00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNFY2011NaN0.0268270.0
605916633330807324currentNaNNaN16.016.01NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNFY2012NaN0.001.0
610411327720760857currentNaNNaN30.030.01NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNFY20141.01.0635300.0
631013735141368361currentNaNNaN15.012.01NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNFY20151.01.001.0
656113601742463670currentNaNNaN6.04.00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNFY20141.01.0285600.0
663616139510138358currentNaNNaN23.023.01NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNFY20140.00.0330860.0
66484441042129889currentNaNNaN23.019.01NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNFY20141.01.001.0
\n", "
" ], "text/plain": [ " org_id EIN FYE_x fund_expense_percent_2016 \\\n", "5798 4292 941347046 current NaN \n", "5887 16234 451258469 current NaN \n", "5917 16254 710987457 current NaN \n", "5954 15512 450579664 current NaN \n", "6059 16633 330807324 current NaN \n", "6104 11327 720760857 current NaN \n", "6310 13735 141368361 current NaN \n", "6561 13601 742463670 current NaN \n", "6636 16139 510138358 current NaN \n", "6648 4441 042129889 current NaN \n", "\n", " independent_board_2016 board_size independent_members \\\n", "5798 NaN 22.0 19.0 \n", "5887 NaN 4.0 2.0 \n", "5917 NaN 1.0 0.0 \n", "5954 NaN 2.0 2.0 \n", "6059 NaN 16.0 16.0 \n", "6104 NaN 30.0 30.0 \n", "6310 NaN 15.0 12.0 \n", "6561 NaN 6.0 4.0 \n", "6636 NaN 23.0 23.0 \n", "6648 NaN 23.0 19.0 \n", "\n", " independent_board audited_financials_2016 pt1_num_vtng_gvrn_bdy_mems \\\n", "5798 1 NaN NaN \n", "5887 0 NaN NaN \n", "5917 0 NaN NaN \n", "5954 0 NaN NaN \n", "6059 1 NaN NaN \n", "6104 1 NaN NaN \n", "6310 1 NaN NaN \n", "6561 0 NaN NaN \n", "6636 1 NaN NaN \n", "6648 1 NaN NaN \n", "\n", " pt1_num_ind_vtng_mems num_vtng_gvrn_bdy_mems num_ind_vtng_mems \\\n", "5798 NaN NaN NaN \n", "5887 NaN NaN NaN \n", "5917 NaN NaN NaN \n", "5954 NaN NaN NaN \n", "6059 NaN NaN NaN \n", "6104 NaN NaN NaN \n", "6310 NaN NaN NaN \n", "6561 NaN NaN NaN \n", "6636 NaN NaN NaN \n", "6648 NaN NaN NaN \n", "\n", " contri_grnts_cy tot_fndrsng_exp_cy aud_fincl_stmts fs_audited \\\n", "5798 NaN NaN NaN NaN \n", "5887 NaN NaN NaN NaN \n", "5917 NaN NaN NaN NaN \n", "5954 NaN NaN NaN NaN \n", "6059 NaN NaN NaN NaN \n", "6104 NaN NaN NaN NaN \n", "6310 NaN NaN NaN NaN \n", "6561 NaN NaN NaN NaN \n", "6636 NaN NaN NaN NaN \n", "6648 NaN NaN NaN NaN \n", "\n", " audit_committee_x FYE_y audit_committee_y audited_financials \\\n", "5798 NaN FY2014 1.0 0.0 \n", "5887 NaN FY2013 NaN 0.0 \n", "5917 NaN FY2014 NaN 0.0 \n", "5954 NaN FY2011 NaN 0.0 \n", "6059 NaN FY2012 NaN 0.0 \n", "6104 NaN FY2014 1.0 1.0 \n", "6310 NaN FY2015 1.0 1.0 \n", "6561 NaN FY2014 1.0 1.0 \n", "6636 NaN FY2014 0.0 0.0 \n", "6648 NaN FY2014 1.0 1.0 \n", "\n", " fundraising_exp low_quality_reporting \n", "5798 3146137 0.0 \n", "5887 0 1.0 \n", "5917 0 1.0 \n", "5954 26827 0.0 \n", "6059 0 1.0 \n", "6104 63530 0.0 \n", "6310 0 1.0 \n", "6561 28560 0.0 \n", "6636 33086 0.0 \n", "6648 0 1.0 " ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged[merged['independent_members'].notnull()][columns][110:120]" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n", "66\n", "0\n", "321\n", "321\n", "321\n", "7667\n", "7667\n", "7667\n", "7667\n", "7667\n", "7667\n", "7667\n", "7667\n", "7667\n", "8152\n", "8181\n", "8152\n", "8152\n", "8152\n", "8152\n", "8152\n" ] } ], "source": [ "print len(merged[merged[columns[0]].isnull()])\n", "print len(merged[merged[columns[1]].isnull()])\n", "print len(merged[merged[columns[2]].isnull()])\n", "print len(merged[merged[columns[3]].isnull()])\n", "print len(merged[merged[columns[4]].isnull()])\n", "print len(merged[merged[columns[5]].isnull()])\n", "print len(merged[merged[columns[6]].isnull()])\n", "print len(merged[merged[columns[7]].isnull()])\n", "print len(merged[merged[columns[8]].isnull()])\n", "print len(merged[merged[columns[9]].isnull()])\n", "print len(merged[merged[columns[10]].isnull()])\n", "print len(merged[merged[columns[11]].isnull()])\n", "print len(merged[merged[columns[12]].isnull()])\n", "print len(merged[merged[columns[13]].isnull()])\n", "print len(merged[merged[columns[14]].isnull()])\n", "print len(merged[merged[columns[15]].isnull()])\n", "print len(merged[merged[columns[16]].isnull()])\n", "print len(merged[merged[columns[17]].isnull()])\n", "print len(merged[merged[columns[18]].isnull()])\n", "print len(merged[merged[columns[19]].isnull()])\n", "print len(merged[merged[columns[20]].isnull()])\n", "print len(merged[merged[columns[21]].isnull()])" ] }, { "cell_type": "code", "execution_count": 96, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for index, row in merged[10:].iterrows():\n", " if type(row['independent_board_2016'])==list:\n", " #print 'yup'\n", " merged.ix[index, 'independent_board_2016'] = row['independent_board_2016'][0]" ] }, { "cell_type": "code", "execution_count": 93, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.0 nan\n", " 0.0 _gfx_/icons/checkboxX.gif\n", " 0.0 _gfx_/icons/checked.gif\n", " 0.0 _gfx_/icons/checked.gif\n", " 0.0 _gfx_/icons/checked.gif\n", " 0.0 _gfx_/icons/checked.gif\n", " 0.0 _gfx_/icons/checked.gif\n", " 0.0 _gfx_/icons/checked.gif\n", " 0.0 nan\n", " 0.0 nan\n" ] } ], "source": [ "for index, row in merged[:10].iterrows():\n", " print type(row['independent_board_v2']), row['independent_board_v2'], row['independent_board_2016']" ] }, { "cell_type": "code", "execution_count": 95, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "7983 \n", "\n", "_gfx_/icons/checked.gif 7658\n", "_gfx_/icons/checkboxX.gif 325\n", "Name: independent_board_2016, dtype: int64\n" ] } ], "source": [ "print merged['independent_board_2016'].value_counts().sum(),'\\n'\n", "print merged['independent_board_2016'].value_counts()" ] }, { "cell_type": "code", "execution_count": 160, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n", "7983\n", "8135\n" ] }, { "data": { "text/plain": [ "_gfx_/icons/checked.gif 7658\n", "_gfx_/icons/checkboxX.gif 325\n", "1.0 83\n", "0.0 69\n", "Name: independent_board_v2, dtype: int64" ] }, "execution_count": 160, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged['independent_board_v2'] = np.nan\n", "print len(merged[merged['independent_board_v2'].notnull()])\n", "merged['independent_board_v2'] = np.where(merged['independent_board_2016'].notnull(), merged['independent_board_2016'], \n", " merged['independent_board_v2'] )\n", "print len(merged[merged['independent_board_v2'].notnull()])\n", "merged['independent_board_v2'] = np.where( ((merged['independent_board_v2'].isnull()) & \n", " (merged['independent_board'].notnull())), \n", " merged['independent_board'], merged['independent_board_v2'] )\n", "print len(merged[merged['independent_board_v2'].notnull()])\n", "merged['independent_board_v2'].value_counts()" ] }, { "cell_type": "code", "execution_count": 161, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1 7741\n", "0 394\n", "Name: independent_board_v2, dtype: int64" ] }, "execution_count": 161, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged['independent_board_v2'] = np.where(merged['independent_board_v2']== '_gfx_/icons/checked.gif', \n", " 1, merged['independent_board_v2']) \n", "merged['independent_board_v2'] = np.where(merged['independent_board_v2']== '_gfx_/icons/checkboxX.gif', \n", " 0, merged['independent_board_v2']) \n", "merged['independent_board_v2'].value_counts()" ] }, { "cell_type": "code", "execution_count": 162, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "8135" ] }, "execution_count": 162, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged['independent_board_v2'].value_counts().sum()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Audited Financials" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "merged.rename(columns={'FYE_x':'FYE'}, inplace=True)" ] }, { "cell_type": "code", "execution_count": 137, "metadata": { "collapsed": false }, "outputs": [], "source": [ "columns = ['org_id', 'EIN', 'FYE', #'independent_board_2016', \n", " #'board_size', 'independent_members', 'independent_board', \n", " #'pt1_num_vtng_gvrn_bdy_mems', 'pt1_num_ind_vtng_mems', \n", " 'independent_board_v2',\n", " 'audited_financials_2016', \n", " #'FYE_y',\n", " 'audited_financials', 'aud_fincl_stmts', 'fs_audited', 'audit_committee_x', 'audit_committee_y', \n", " #'num_vtng_gvrn_bdy_mems', 'num_ind_vtng_mems', 'contri_grnts_cy', 'tot_fndrsng_exp_cy', \n", " #'fund_expense_percent_2016', \n", " #'fundraising_exp', 'low_quality_reporting'\n", " ]" ] }, { "cell_type": "code", "execution_count": 138, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
org_idEINFYEindependent_board_v2audited_financials_2016audited_financialsaud_fincl_stmtsfs_auditedaudit_committee_xaudit_committee_y
016722020503776current0NaNNaNNaNNaNNaNNaN
110166043314346FY201301NaNNaNNaNNaNNaN
26466953667812FY201411NaNNaNNaNNaNNaN
312098581974429FY201411NaNNaNNaNNaNNaN
412123237172077FY201411NaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " org_id EIN FYE independent_board_v2 audited_financials_2016 \\\n", "0 16722 020503776 current 0 NaN \n", "1 10166 043314346 FY2013 0 1 \n", "2 6466 953667812 FY2014 1 1 \n", "3 12098 581974429 FY2014 1 1 \n", "4 12123 237172077 FY2014 1 1 \n", "\n", " audited_financials aud_fincl_stmts fs_audited audit_committee_x \\\n", "0 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " audit_committee_y \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN " ] }, "execution_count": 138, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged[columns][:5]" ] }, { "cell_type": "code", "execution_count": 139, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n", "66\n", "0\n", "0\n", "321\n", "8152\n", "7667\n", "7667\n", "7667\n", "8181\n" ] } ], "source": [ "print len(merged[merged[columns[0]].isnull()])\n", "print len(merged[merged[columns[1]].isnull()])\n", "print len(merged[merged[columns[2]].isnull()])\n", "print len(merged[merged[columns[3]].isnull()])\n", "print len(merged[merged[columns[4]].isnull()])\n", "print len(merged[merged[columns[5]].isnull()])\n", "print len(merged[merged[columns[6]].isnull()])\n", "print len(merged[merged[columns[7]].isnull()])\n", "print len(merged[merged[columns[8]].isnull()])\n", "print len(merged[merged[columns[9]].isnull()])" ] }, { "cell_type": "code", "execution_count": 124, "metadata": { "collapsed": true }, "outputs": [], "source": [ "for index, row in merged.iterrows():\n", " if type(row['audited_financials_2016'])==list:\n", " merged.ix[index, 'audited_financials_2016'] = row['audited_financials_2016'][0]" ] }, { "cell_type": "code", "execution_count": 125, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
org_idEINFYEindependent_board_v2audited_financials_2016aud_fincl_stmtsfs_auditedaudit_committee_xaudit_committee_yaudited_financials
016722020503776current0NaNNaNNaNNaNNaNNaN
110166043314346FY20130_gfx_/icons/checked.gifNaNNaNNaNNaNNaN
26466953667812FY20141_gfx_/icons/checked.gifNaNNaNNaNNaNNaN
312098581974429FY20141_gfx_/icons/checked.gifNaNNaNNaNNaNNaN
412123237172077FY20141_gfx_/icons/checked.gifNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " org_id EIN FYE independent_board_v2 audited_financials_2016 \\\n", "0 16722 020503776 current 0 NaN \n", "1 10166 043314346 FY2013 0 _gfx_/icons/checked.gif \n", "2 6466 953667812 FY2014 1 _gfx_/icons/checked.gif \n", "3 12098 581974429 FY2014 1 _gfx_/icons/checked.gif \n", "4 12123 237172077 FY2014 1 _gfx_/icons/checked.gif \n", "\n", " aud_fincl_stmts fs_audited audit_committee_x audit_committee_y \\\n", "0 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " audited_financials \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN " ] }, "execution_count": 125, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged[columns][:5]" ] }, { "cell_type": "code", "execution_count": 126, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "_gfx_/icons/checked.gif 7247\n", "_gfx_/icons/checkboxX.gif 372\n", "_gfx_/icons/checkboxOptOut.png 364\n", "Name: audited_financials_2016, dtype: int64\n", "1 7247\n", "0 372\n", "_gfx_/icons/checkboxOptOut.png 364\n", "Name: audited_financials_2016, dtype: int64\n" ] } ], "source": [ "print merged['audited_financials_2016'].value_counts()\n", "merged['audited_financials_2016'] = np.where(merged['audited_financials_2016']== '_gfx_/icons/checked.gif', \n", " 1, merged['audited_financials_2016']) \n", "merged['audited_financials_2016'] = np.where(merged['audited_financials_2016']== '_gfx_/icons/checkboxX.gif', \n", " 0, merged['audited_financials_2016']) \n", "print merged['audited_financials_2016'].value_counts()" ] }, { "cell_type": "code", "execution_count": 127, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 7611\n", "0 372\n", "Name: audited_financials_2016_v2, dtype: int64\n" ] } ], "source": [ "merged['audited_financials_2016_v2'] = np.where(merged['audited_financials_2016']== '_gfx_/icons/checkboxOptOut.png', \n", " 1, merged['audited_financials_2016']) \n", "print merged['audited_financials_2016_v2'].value_counts()" ] }, { "cell_type": "code", "execution_count": 132, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "7667\n", "7667\n", "Y 433\n", "N 204\n", "Name: aud_fincl_stmts, dtype: int64 \n", "\n", "Y 629\n", "N 8\n", "Name: fs_audited, dtype: int64 \n", "\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
fs_auditedNY
aud_fincl_stmts
N7197
Y1432
\n", "
" ], "text/plain": [ "fs_audited N Y\n", "aud_fincl_stmts \n", "N 7 197\n", "Y 1 432" ] }, "execution_count": 132, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print len(merged[merged[columns[5]].isnull()])\n", "print len(merged[merged[columns[6]].isnull()])\n", "print merged[columns[5]].value_counts(), '\\n'\n", "print merged[columns[6]].value_counts(), '\\n'\n", "pd.crosstab(merged[columns[5]],merged[columns[6]])" ] }, { "cell_type": "code", "execution_count": 133, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "637" ] }, "execution_count": 133, "metadata": {}, "output_type": "execute_result" } ], "source": [ "432+197+1+7" ] }, { "cell_type": "code", "execution_count": 135, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "7983\n" ] } ], "source": [ "print len(merged[merged['audited_financials_2016_v2'].notnull()])" ] }, { "cell_type": "code", "execution_count": 140, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n", "7983\n", "8135\n", "8135\n", "8135\n" ] }, { "data": { "text/plain": [ "1 7703\n", "0 432\n", "Name: audited_financials_v2, dtype: int64" ] }, "execution_count": 140, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged['audited_financials_v2'] = np.nan\n", "print len(merged[merged['audited_financials_v2'].notnull()])\n", "merged['audited_financials_v2'] = np.where(merged['audited_financials_2016_v2'].notnull(), \n", " merged['audited_financials_2016_v2'], \n", " merged['audited_financials_v2'] )\n", "print len(merged[merged['audited_financials_v2'].notnull()])\n", "merged['audited_financials_v2'] = np.where( ((merged['audited_financials_v2'].isnull()) & \n", " (merged['audited_financials'].notnull())), \n", " merged['audited_financials'], merged['audited_financials_v2'] )\n", "print len(merged[merged['audited_financials_v2'].notnull()])\n", "merged['audited_financials_v2'] = np.where( ((merged['audited_financials_v2'].isnull()) & \n", " (merged['aud_fincl_stmts'].notnull())), \n", " merged['aud_fincl_stmts'], merged['audited_financials_v2'] )\n", "print len(merged[merged['audited_financials_v2'].notnull()])\n", "merged['audited_financials_v2'].value_counts()\n", "merged['audited_financials_v2'] = np.where( ((merged['audited_financials_v2'].isnull()) & \n", " (merged['fs_audited'].notnull())), \n", " merged['fs_audited'], merged['audited_financials_v2'] )\n", "print len(merged[merged['audited_financials_v2'].notnull()])\n", "merged['audited_financials_v2'].value_counts()" ] }, { "cell_type": "code", "execution_count": 136, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1.0 92\n", "0.0 60\n", "Name: audited_financials, dtype: int64" ] }, "execution_count": 136, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged['audited_financials'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Save DF" ] }, { "cell_type": "code", "execution_count": 163, "metadata": { "collapsed": true }, "outputs": [], "source": [ "columns_v2 = ['org_id', 'EIN', 'FYE', #'independent_board_2016', \n", " #'board_size', 'independent_members', 'independent_board', \n", " #'pt1_num_vtng_gvrn_bdy_mems', 'pt1_num_ind_vtng_mems', \n", " 'independent_board_v2',\n", " #'audited_financials_2016', \n", " 'audited_financials_v2',\n", " #'FYE_y',\n", " #'audited_financials', 'aud_fincl_stmts', 'fs_audited', 'audit_committee_x', 'audit_committee_y', \n", " #'num_vtng_gvrn_bdy_mems', 'num_ind_vtng_mems', 'contri_grnts_cy', 'tot_fndrsng_exp_cy', \n", " #'fund_expense_percent_2016', \n", " #'fundraising_exp', 'low_quality_reporting'\n", " ]" ] }, { "cell_type": "code", "execution_count": 165, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "org_id object\n", "EIN object\n", "FYE object\n", "independent_board_v2 object\n", "audited_financials_v2 object\n", "dtype: object" ] }, "execution_count": 165, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged[columns_v2].dtypes" ] }, { "cell_type": "code", "execution_count": 166, "metadata": { "collapsed": true }, "outputs": [], "source": [ "merged['independent_board_v2'] = merged['independent_board_v2'].astype('float')\n", "merged['audited_financials_v2'] = merged['audited_financials_v2'].astype('float')" ] }, { "cell_type": "code", "execution_count": 168, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countmeanstdmin25%50%75%max
independent_board_v28135.00.9515670.2146920.0NaNNaNNaN1.0
audited_financials_v28135.00.9468960.2242540.0NaNNaNNaN1.0
\n", "
" ], "text/plain": [ " count mean std min 25% 50% 75% max\n", "independent_board_v2 8135.0 0.951567 0.214692 0.0 NaN NaN NaN 1.0\n", "audited_financials_v2 8135.0 0.946896 0.224254 0.0 NaN NaN NaN 1.0" ] }, "execution_count": 168, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged[columns_v2].describe().T" ] }, { "cell_type": "code", "execution_count": 175, "metadata": { "collapsed": true }, "outputs": [], "source": [ "merged[columns_v2].to_pickle('additional variables for testing.pkl')" ] }, { "cell_type": "code", "execution_count": 176, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8304\n" ] } ], "source": [ "print len(merged)" ] }, { "cell_type": "code", "execution_count": 169, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8238\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Col1A2011_dataA2016_dataeinfyeForm_990_FYESOX_policiesSOX_policies_all_binarySOX_policies_binaryagecategorycategory_Animalscategory_Arts__Culture__Humaniticategory_Community_Developmentcategory_Educationcategory_Environmentcategory_Healthcategory_Human_Servicescategory_Human_and_Civil_Rightscategory_Internationalcategory_Religioncategory_Research_and_Public_Polcomplexitycomplexity_2011conflict_of_interest_policy_v2donor_advisorydonor_advisory_2011_to_2016donor_advisory_2016org_idprogram_efficiencyratings_systemrecords_retention_policy_v2statetot_revtotal_revenue_loggedwhistleblower_policy_v2ncategoryrevs
05070901010202467FY20142014-123.01.01.062.0Research and Public Policy000000000016.0NaN1.000059540.794457CN 2.11.0MENaN16.3779931.0Research and Public Policy12967968.0
17632001010211478FY20142014-063.01.01.057.0Community Development001000000003.0NaN1.0000125170.808606CN 2.11.0ME3054612.014.9321631.0Community Development3054612.0
\n", "
" ], "text/plain": [ " Col1 A2011_data A2016_data ein fye Form_990_FYE \\\n", "0 50709 0 1 010202467 FY2014 2014-12 \n", "1 76320 0 1 010211478 FY2014 2014-06 \n", "\n", " SOX_policies SOX_policies_all_binary SOX_policies_binary age \\\n", "0 3.0 1.0 1.0 62.0 \n", "1 3.0 1.0 1.0 57.0 \n", "\n", " category category_Animals \\\n", "0 Research and Public Policy 0 \n", "1 Community Development 0 \n", "\n", " category_Arts__Culture__Humaniti category_Community_Development \\\n", "0 0 0 \n", "1 0 1 \n", "\n", " category_Education category_Environment category_Health \\\n", "0 0 0 0 \n", "1 0 0 0 \n", "\n", " category_Human_Services category_Human_and_Civil_Rights \\\n", "0 0 0 \n", "1 0 0 \n", "\n", " category_International category_Religion \\\n", "0 0 0 \n", "1 0 0 \n", "\n", " category_Research_and_Public_Pol complexity complexity_2011 \\\n", "0 1 6.0 NaN \n", "1 0 3.0 NaN \n", "\n", " conflict_of_interest_policy_v2 donor_advisory \\\n", "0 1.0 0 \n", "1 1.0 0 \n", "\n", " donor_advisory_2011_to_2016 donor_advisory_2016 org_id \\\n", "0 0 0 5954 \n", "1 0 0 12517 \n", "\n", " program_efficiency ratings_system records_retention_policy_v2 state \\\n", "0 0.794457 CN 2.1 1.0 ME \n", "1 0.808606 CN 2.1 1.0 ME \n", "\n", " tot_rev total_revenue_logged whistleblower_policy_v2 \\\n", "0 NaN 16.377993 1.0 \n", "1 3054612.0 14.932163 1.0 \n", "\n", " ncategory revs \n", "0 Research and Public Policy 12967968.0 \n", "1 Community Development 3054612.0 " ] }, "execution_count": 169, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df4 = pd.read_stata('Test 4 data.dta')\n", "print len(df4)\n", "df4[:2]" ] }, { "cell_type": "code", "execution_count": 172, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8238\n", "both 8238\n", "right_only 0\n", "left_only 0\n", "Name: _merge, dtype: int64\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Col1A2011_dataA2016_dataeinfyeForm_990_FYESOX_policiesSOX_policies_all_binarySOX_policies_binaryagecategorycategory_Animalscategory_Arts__Culture__Humaniticategory_Community_Developmentcategory_Educationcategory_Environmentcategory_Healthcategory_Human_Servicescategory_Human_and_Civil_Rightscategory_Internationalcategory_Religioncategory_Research_and_Public_Polcomplexitycomplexity_2011conflict_of_interest_policy_v2donor_advisorydonor_advisory_2011_to_2016donor_advisory_2016org_id_xprogram_efficiencyratings_systemrecords_retention_policy_v2statetot_revtotal_revenue_loggedwhistleblower_policy_v2ncategoryrevsorg_id_yEINFYEindependent_board_v2audited_financials_v2_merge
05070901010202467FY20142014-123.01.01.062.0Research and Public Policy000000000016.0NaN1.000059540.794457CN 2.11.0MENaN16.3779931.0Research and Public Policy12967968.05954010202467FY20141.01.0both
17632001010211478FY20142014-063.01.01.057.0Community Development001000000003.0NaN1.0000125170.808606CN 2.11.0ME3054612.014.9321631.0Community Development3054612.012517010211478FY20141.01.0both
\n", "
" ], "text/plain": [ " Col1 A2011_data A2016_data ein fye Form_990_FYE \\\n", "0 50709 0 1 010202467 FY2014 2014-12 \n", "1 76320 0 1 010211478 FY2014 2014-06 \n", "\n", " SOX_policies SOX_policies_all_binary SOX_policies_binary age \\\n", "0 3.0 1.0 1.0 62.0 \n", "1 3.0 1.0 1.0 57.0 \n", "\n", " category category_Animals \\\n", "0 Research and Public Policy 0 \n", "1 Community Development 0 \n", "\n", " category_Arts__Culture__Humaniti category_Community_Development \\\n", "0 0 0 \n", "1 0 1 \n", "\n", " category_Education category_Environment category_Health \\\n", "0 0 0 0 \n", "1 0 0 0 \n", "\n", " category_Human_Services category_Human_and_Civil_Rights \\\n", "0 0 0 \n", "1 0 0 \n", "\n", " category_International category_Religion \\\n", "0 0 0 \n", "1 0 0 \n", "\n", " category_Research_and_Public_Pol complexity complexity_2011 \\\n", "0 1 6.0 NaN \n", "1 0 3.0 NaN \n", "\n", " conflict_of_interest_policy_v2 donor_advisory \\\n", "0 1.0 0 \n", "1 1.0 0 \n", "\n", " donor_advisory_2011_to_2016 donor_advisory_2016 org_id_x \\\n", "0 0 0 5954 \n", "1 0 0 12517 \n", "\n", " program_efficiency ratings_system records_retention_policy_v2 state \\\n", "0 0.794457 CN 2.1 1.0 ME \n", "1 0.808606 CN 2.1 1.0 ME \n", "\n", " tot_rev total_revenue_logged whistleblower_policy_v2 \\\n", "0 NaN 16.377993 1.0 \n", "1 3054612.0 14.932163 1.0 \n", "\n", " ncategory revs org_id_y EIN FYE \\\n", "0 Research and Public Policy 12967968.0 5954 010202467 FY2014 \n", "1 Community Development 3054612.0 12517 010211478 FY2014 \n", "\n", " independent_board_v2 audited_financials_v2 _merge \n", "0 1.0 1.0 both \n", "1 1.0 1.0 both " ] }, "execution_count": 172, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged2 = pd.merge(df4, merged[columns_v2], left_on='ein', right_on='EIN', how='left', indicator=True)\n", "print len(merged2)\n", "print merged2['_merge'].value_counts()\n", "merged2[:2]" ] }, { "cell_type": "code", "execution_count": 173, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 7983\n", "1 255\n", "Name: donor_advisory_2016, dtype: int64" ] }, "execution_count": 173, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged2['donor_advisory_2016'].value_counts()" ] }, { "cell_type": "code", "execution_count": 174, "metadata": { "collapsed": true }, "outputs": [], "source": [ "merged2.to_pickle('Test 4 with independent_board and audited_financials.pkl')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "columns = ['screen_name', 'followers_first', 'followers_end', 'followers_last', \n", " 'orgid', 'retweeted_count', 'favorited_count']\n", "aggregations = {'orgid': 'first',\n", " 'screen_name': 'first',\n", " 'retweeted_count': 'sum',\n", " 'favorited_count': 'sum',\n", " 'followers_first': 'first',\n", " 'followers_end': 'last',\n", " }" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "columns = ['screen_name', 'followers_first', 'followers_end', 'followers_last', \n", " 'orgid', 'retweeted_count', 'favorited_count']\n", "aggregations = {'orgid': 'first',\n", " 'screen_name': 'first',\n", " 'retweeted_count': 'sum',\n", " 'favorited_count': 'sum',\n", " 'followers_first': 'first',\n", " 'followers_end': 'last',\n", " }\n", "org_df = df[columns].groupby('orgid').agg(aggregations)\n", "print len(org_df)\n", "org_df = org_df.dropna()\n", "org_df['followers_first'] = org_df['followers_first'].astype('int')\n", "org_df['followers_end'] = org_df['followers_end'].astype('int')\n", "org_df['favorited_count'] = org_df['favorited_count'].astype('int')\n", "org_df['followers_change'] = org_df['followers_end'] - org_df['followers_first']\n", "org_df['followers_pct_change'] = (org_df['followers_end'] - org_df['followers_first'])/org_df['followers_first']\n", "print len(org_df)\n", "org_df[:5]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 0 }