{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 113th Congress as News Commentators on Twitter\n", "\n", "### [Blogged](http://talhaoz.com/?p=634) about this study.\n", "\n", "In this project I am answering the following questions:\n", "\n", "* Who are the most active news commentators among members of Congress ?\n", "* Which events (news) got the most attention by these politicians ?\n", "* How many news (of 7376) are commentated by democrats and/or republicans...\n", "* How many comments made on these news by each group ?\n", "* Which news in particular describe each group?\n", "\n", "See [here](http://talhaoz.com/news/) for other iPython notebooks on this project.\n", "\n", "Project (datasets and the source code) is available on [GitHub](https://github.com/oztalha/News-Commentary-Tweets-of-Elites)\n", "\n", "The news and the curated tweets used in this study are scraped from theplazz.com approximately matching the duration of [113th US Congress](https://en.wikipedia.org/wiki/113th_United_States_Congress), i.e. between Jan 2013 - Jan 2015. Here is an annotated screenshot of one of the news published on this news media site:\n", "\n", "![](ThePlazz-Screenshot.png)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/toz/Documents/workspace/News-Commentary-Tweets-of-Elites\n" ] } ], "source": [ "cd .." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import twitter\n", "import pandas as pd\n", "import numpy as np\n", "import plotly.plotly as py\n", "from plotly.graph_objs import *\n", "from mykeys import tw\n", "import networkx as nx\n", "import itertools\n", "from collections import Counter\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def oauth_login():\n", " \"\"\"Twitter authorization \"\"\"\n", " #tw is a dictionary, the only variable in mykeys.py\n", " auth = twitter.oauth.OAuth(tw['OAUTH_TOKEN'], tw['OAUTH_TOKEN_SECRET'],\n", " tw['CONSUMER_KEY'], tw['CONSUMER_SECRET'])\n", " twitter_api = twitter.Twitter(auth=auth)\n", " return twitter_api\n", "\n", "\n", "def get_members(members):\n", " \"\"\"Scrape only the interesting info from twitter json response \"\"\"\n", " return [(m['id'],m['screen_name'],m['name'],m['location'],m['description'],\n", " m['created_at'], m['friends_count'],m['followers_count'],\n", " m['statuses_count'],m['favourites_count']) for m in members['users']]\n", "\n", "\n", "def tw_to_pol(twitter_api,slug,owner_screen_name,group):\n", " \"\"\"Get members of a twitter list with known political group into a dataframe \"\"\"\n", " resp = twitter_api.lists.members(slug=slug,owner_screen_name=owner_screen_name,cursor=-1,count=5000)\n", " members = get_members(resp)\n", " df = pd.DataFrame(members,columns=header)\n", " df['party'] = group\n", " return df\n", "\n", "\n", "def get_politicians():\n", " \"\"\"Download 113th congress tweeps using public Twitter lists\"\"\"\n", "\n", " header = ['id','screen_name','name','location','description','created_at',\n", " 'friends','followers','statuses','favorites']\n", "\n", " polists = [{'slug':'senaterepublicans', 'owner_screen_name':'Senate_GOPs', 'group':'gop'}, #62\n", " {'slug':'house-republicans', 'owner_screen_name':'HouseGOP', 'group':'gop'}, #260\n", " {'slug':'housegop', 'owner_screen_name':'GOPLeader', 'group':'gop'}, #237\n", " {'slug':'elected-democrats', 'owner_screen_name':'TheDemocrats', 'group':'dem'}, #259\n", " {'slug':'house-democrats', 'owner_screen_name':'DannyMariachi', 'group':'dem'}, #188\n", " {'slug':'senatedemocrats', 'owner_screen_name':'SenateDems', 'group':'dem'} #52\n", " ]\n", " \n", " twitter_api = oauth_login()\n", "\n", " df = pd.DataFrame(columns=header)\n", " for polist in polists:\n", " df = df.append(tw_to_pol(twitter_api,polist['slug'],polist['owner_screen_name'],polist['group']))\n", " df = df.drop_duplicates()\n", " df.to_csv('data/US-politicians.csv',encoding='utf-8',index=False)\n", " return df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# get twitter IDs of congressmen and senators\n", "df = pd.read_csv('data/US-politicians.csv',encoding='utf-8')\n", "gop = df[df['party']=='gop']\n", "dem = df[df['party']=='dem']\n", "dem_tweeps = set(dem.screen_name.values)\n", "gop_tweeps = set(gop.screen_name.values)\n", "# Principal Accounts of Members of the U.S. Senate (a mix of campaign and government accounts)\n", "senate = pd.read_csv('data/US-senate.csv',encoding='utf-8')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# get commentary tweets of US newsmakers and opinion-shapers\n", "tweets = pd.read_csv('data/US-tweets.csv',encoding='utf-8',parse_dates=['dt'])\n", "tweets.twhandle = tweets.twhandle.str[1:]\n", "#tweets.dt = pd.to_datetime(tweets.dt,unit='D')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "7376 news commentated between 14-Jan-2013 and 09-Jan-2015 by\n" ] }, { "data": { "text/html": [ "
\n", " | # of tweeps | \n", "
---|---|
senator | \n", "44 | \n", "
democrat | \n", "36 | \n", "
republican | \n", "30 | \n", "
total | \n", "1442 | \n", "