{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"toc": "true"
},
"source": [
"# Table of Contents\n",
"
"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Gender dynamics"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Tweet data prep"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load the tweets"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:root:Loading from tweets/642bf140607547cb9d4c6b1fc49772aa_001.json.gz\n",
"DEBUG:root:Loaded 50000\n",
"DEBUG:root:Loaded 100000\n",
"DEBUG:root:Loaded 150000\n",
"DEBUG:root:Loaded 200000\n",
"DEBUG:root:Loaded 250000\n",
"INFO:root:Loading from tweets/9f7ed17c16a1494c8690b4053609539d_001.json.gz\n",
"DEBUG:root:Loaded 300000\n",
"DEBUG:root:Loaded 350000\n",
"DEBUG:root:Loaded 400000\n",
"DEBUG:root:Loaded 450000\n",
"DEBUG:root:Loaded 500000\n",
"INFO:root:Loading from tweets/41feff28312c433ab004cd822212f4c2_001.json.gz\n",
"DEBUG:root:Loaded 550000\n",
"DEBUG:root:Loaded 600000\n",
"DEBUG:root:Loaded 650000\n",
"DEBUG:root:Loaded 700000\n",
"DEBUG:root:Loaded 750000\n",
"DEBUG:root:Loaded 800000\n"
]
},
{
"data": {
"text/plain": [
"tweet_id 817136\n",
"user_id 817136\n",
"screen_name 817136\n",
"tweet_created_at 817136\n",
"tweet_type 817136\n",
"dtype: int64"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%matplotlib inline\n",
"import pandas as pd\n",
"import numpy as np\n",
"import logging\n",
"from dateutil.parser import parse as date_parse\n",
"from utils import load_tweet_df, tweet_type\n",
"import matplotlib.pyplot as plt\n",
"\n",
"\n",
"logger = logging.getLogger()\n",
"logger.setLevel(logging.DEBUG)\n",
"\n",
"# Set float format so doesn't display scientific notation\n",
"pd.options.display.float_format = '{:20,.2f}'.format\n",
"\n",
"def tweet_transform(tweet):\n",
" return {\n",
" 'tweet_id': tweet['id_str'], \n",
" 'tweet_created_at': date_parse(tweet['created_at']),\n",
" 'user_id': tweet['user']['id_str'],\n",
" 'screen_name': tweet['user']['screen_name'],\n",
" 'tweet_type': tweet_type(tweet)\n",
" }\n",
"\n",
"tweet_df = load_tweet_df(tweet_transform, ['tweet_id', 'user_id', 'screen_name', 'tweet_created_at', 'tweet_type'], dedupe_columns=['tweet_id'])\n",
"tweet_df.count()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" tweet_id | \n",
" user_id | \n",
" screen_name | \n",
" tweet_created_at | \n",
" tweet_type | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 872631046088601600 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 2017-06-08 01:47:08+00:00 | \n",
" retweet | \n",
"
\n",
" \n",
" 1 | \n",
" 872610483647516673 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 2017-06-08 00:25:26+00:00 | \n",
" retweet | \n",
"
\n",
" \n",
" 2 | \n",
" 872609618626826240 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 2017-06-08 00:22:00+00:00 | \n",
" retweet | \n",
"
\n",
" \n",
" 3 | \n",
" 872605974699311104 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 2017-06-08 00:07:31+00:00 | \n",
" retweet | \n",
"
\n",
" \n",
" 4 | \n",
" 872603191518646276 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 2017-06-07 23:56:27+00:00 | \n",
" retweet | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" tweet_id user_id screen_name tweet_created_at \\\n",
"0 872631046088601600 327862439 jonathanvswan 2017-06-08 01:47:08+00:00 \n",
"1 872610483647516673 327862439 jonathanvswan 2017-06-08 00:25:26+00:00 \n",
"2 872609618626826240 327862439 jonathanvswan 2017-06-08 00:22:00+00:00 \n",
"3 872605974699311104 327862439 jonathanvswan 2017-06-08 00:07:31+00:00 \n",
"4 872603191518646276 327862439 jonathanvswan 2017-06-07 23:56:27+00:00 \n",
"\n",
" tweet_type \n",
"0 retweet \n",
"1 retweet \n",
"2 retweet \n",
"3 retweet \n",
"4 retweet "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tweet_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Tweet analysis"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### What are the first and last tweets in the dataset?"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timestamp('2017-06-01 04:00:01+0000', tz='UTC')"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tweet_df.tweet_created_at.min()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timestamp('2017-08-01 03:59:58+0000', tz='UTC')"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tweet_df.tweet_created_at.max()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### How many retweets, original tweets, replies, and quotes are in dataset?"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
"
\n",
" \n",
" \n",
" \n",
" retweet | \n",
" 345266 | \n",
" 42.3% | \n",
"
\n",
" \n",
" original | \n",
" 233926 | \n",
" 28.6% | \n",
"
\n",
" \n",
" reply | \n",
" 126254 | \n",
" 15.5% | \n",
"
\n",
" \n",
" quote | \n",
" 111690 | \n",
" 13.7% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage\n",
"retweet 345266 42.3%\n",
"original 233926 28.6%\n",
"reply 126254 15.5%\n",
"quote 111690 13.7%"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame({'count':tweet_df.tweet_type.value_counts(), \n",
" 'percentage':tweet_df.tweet_type.value_counts(normalize=True).mul(100).round(1).astype(str) + '%'})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Tweeter data prep"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prepare the tweeter data\n",
"This comes from the following sources:\n",
"1. User lookup: These are lists of users exported from SFM. These are the final set of beltway journalists. Accounts that were suspended or deleted have been removed from this list. Also, this list will include users that did not tweet (i.e., have no tweets in dataset).\n",
"2. Tweets in the dataset: Used to generate tweet counts per tweeter. However, since some beltway journalists may not have tweeted, this may be a subset of the user lookup. Also, it may include the tweets of some users that were later excluded because their accounts were suspended or deleted or determined to not be beltway journalists.\n",
"3. User info lookup: Information on users that was manually coded in the beltway journalist spreadsheet or looked up from Twitter's API. This includes some accounts that were excluded from data collection for various reasons such as working for a foreign news organization or no longer working as a beltway journalist. Thus, these are a superset of the user lookup.\n",
"\n",
"Thus, the tweeter data should include tweet and user info data only from users in the user lookup."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load user lookup"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"screen_name 2487\n",
"dtype: int64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_lookup_filepaths = ('lookups/senate_press_lookup.csv',\n",
" 'lookups/periodical_press_lookup.csv',\n",
" 'lookups/radio_and_television_lookup.csv')\n",
"user_lookup_df = pd.concat((pd.read_csv(user_lookup_filepath, usecols=['Uid', 'Token'], dtype={'Uid': str}) for user_lookup_filepath in user_lookup_filepaths))\n",
"user_lookup_df.set_index('Uid', inplace=True)\n",
"user_lookup_df.rename(columns={'Token': 'screen_name'}, inplace=True)\n",
"user_lookup_df.index.names = ['user_id']\n",
"# Some users may be in multiple lists, so need to drop duplicates\n",
"user_lookup_df = user_lookup_df[~user_lookup_df.index.duplicated()]\n",
"\n",
"user_lookup_df.count()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 23455653 | \n",
" abettel | \n",
"
\n",
" \n",
" 33919343 | \n",
" AshleyRParker | \n",
"
\n",
" \n",
" 18580432 | \n",
" b_fung | \n",
"
\n",
" \n",
" 399225358 | \n",
" b_muzz | \n",
"
\n",
" \n",
" 18834692 | \n",
" becca_milfeld | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name\n",
"user_id \n",
"23455653 abettel\n",
"33919343 AshleyRParker\n",
"18580432 b_fung\n",
"399225358 b_muzz\n",
"18834692 becca_milfeld"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_lookup_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Tweets in dataset per tweeter"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tweet_type\n",
"original 2292\n",
"quote 2292\n",
"reply 2292\n",
"retweet 2292\n",
"tweets_in_dataset 2292\n",
"dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_tweet_count_df = tweet_df[['user_id', 'tweet_type']].groupby(['user_id', 'tweet_type']).size().unstack()\n",
"user_tweet_count_df.fillna(0, inplace=True)\n",
"user_tweet_count_df['tweets_in_dataset'] = user_tweet_count_df.original + user_tweet_count_df.quote + user_tweet_count_df.reply + user_tweet_count_df.retweet\n",
"user_tweet_count_df.count()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" tweet_type | \n",
" original | \n",
" quote | \n",
" reply | \n",
" retweet | \n",
" tweets_in_dataset | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 1001991865 | \n",
" 13.00 | \n",
" 3.00 | \n",
" 1.00 | \n",
" 31.00 | \n",
" 48.00 | \n",
"
\n",
" \n",
" 1002229862 | \n",
" 48.00 | \n",
" 20.00 | \n",
" 3.00 | \n",
" 118.00 | \n",
" 189.00 | \n",
"
\n",
" \n",
" 100270054 | \n",
" 1.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 1.00 | \n",
"
\n",
" \n",
" 100802089 | \n",
" 4.00 | \n",
" 7.00 | \n",
" 12.00 | \n",
" 17.00 | \n",
" 40.00 | \n",
"
\n",
" \n",
" 100860790 | \n",
" 102.00 | \n",
" 26.00 | \n",
" 4.00 | \n",
" 166.00 | \n",
" 298.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"tweet_type original quote reply \\\n",
"user_id \n",
"1001991865 13.00 3.00 1.00 \n",
"1002229862 48.00 20.00 3.00 \n",
"100270054 1.00 0.00 0.00 \n",
"100802089 4.00 7.00 12.00 \n",
"100860790 102.00 26.00 4.00 \n",
"\n",
"tweet_type retweet tweets_in_dataset \n",
"user_id \n",
"1001991865 31.00 48.00 \n",
"1002229862 118.00 189.00 \n",
"100270054 0.00 1.00 \n",
"100802089 17.00 40.00 \n",
"100860790 166.00 298.00 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_tweet_count_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load user info"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"name 2506\n",
"organization 2477\n",
"position 2503\n",
"gender 2505\n",
"followers_count 2506\n",
"following_count 2506\n",
"tweet_count 2506\n",
"user_created_at 2506\n",
"verified 2506\n",
"protected 2506\n",
"dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_info_df = pd.read_csv('source_data/user_info_lookup.csv', names=['user_id', 'name', 'organization', 'position',\n",
" 'gender', 'followers_count', 'following_count', 'tweet_count',\n",
" 'user_created_at', 'verified', 'protected'],\n",
" dtype={'user_id': str}).set_index(['user_id'])\n",
"user_info_df.count()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" organization | \n",
" position | \n",
" gender | \n",
" followers_count | \n",
" following_count | \n",
" tweet_count | \n",
" user_created_at | \n",
" verified | \n",
" protected | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 20711445 | \n",
" Glinski, Nina | \n",
" NaN | \n",
" Freelance Reporter | \n",
" F | \n",
" 963 | \n",
" 507 | \n",
" 909 | \n",
" Thu Feb 12 20:00:53 +0000 2009 | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 258917371 | \n",
" Enders, David | \n",
" NaN | \n",
" Journalist | \n",
" M | \n",
" 1444 | \n",
" 484 | \n",
" 6296 | \n",
" Mon Feb 28 19:52:03 +0000 2011 | \n",
" True | \n",
" False | \n",
"
\n",
" \n",
" 297046834 | \n",
" Barakat, Matthew | \n",
" Associated Press | \n",
" Northern Virginia Correspondent | \n",
" M | \n",
" 759 | \n",
" 352 | \n",
" 631 | \n",
" Wed May 11 20:55:24 +0000 2011 | \n",
" True | \n",
" False | \n",
"
\n",
" \n",
" 455585786 | \n",
" Atkins, Kimberly | \n",
" Boston Herald | \n",
" Chief Washington Reporter/Columnist | \n",
" F | \n",
" 2944 | \n",
" 2691 | \n",
" 6277 | \n",
" Thu Jan 05 08:26:46 +0000 2012 | \n",
" True | \n",
" False | \n",
"
\n",
" \n",
" 42584840 | \n",
" Vlahou, Toula | \n",
" CQ Roll Call | \n",
" Editor & Podcast Producer | \n",
" F | \n",
" 2703 | \n",
" 201 | \n",
" 6366 | \n",
" Tue May 26 07:41:38 +0000 2009 | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name organization \\\n",
"user_id \n",
"20711445 Glinski, Nina NaN \n",
"258917371 Enders, David NaN \n",
"297046834 Barakat, Matthew Associated Press \n",
"455585786 Atkins, Kimberly Boston Herald \n",
"42584840 Vlahou, Toula CQ Roll Call \n",
"\n",
" position gender followers_count \\\n",
"user_id \n",
"20711445 Freelance Reporter F 963 \n",
"258917371 Journalist M 1444 \n",
"297046834 Northern Virginia Correspondent M 759 \n",
"455585786 Chief Washington Reporter/Columnist F 2944 \n",
"42584840 Editor & Podcast Producer F 2703 \n",
"\n",
" following_count tweet_count user_created_at \\\n",
"user_id \n",
"20711445 507 909 Thu Feb 12 20:00:53 +0000 2009 \n",
"258917371 484 6296 Mon Feb 28 19:52:03 +0000 2011 \n",
"297046834 352 631 Wed May 11 20:55:24 +0000 2011 \n",
"455585786 2691 6277 Thu Jan 05 08:26:46 +0000 2012 \n",
"42584840 201 6366 Tue May 26 07:41:38 +0000 2009 \n",
"\n",
" verified protected \n",
"user_id \n",
"20711445 False False \n",
"258917371 True False \n",
"297046834 True False \n",
"455585786 True False \n",
"42584840 False False "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_info_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"screen_name 2487\n",
"name 2487\n",
"organization 2487\n",
"position 2484\n",
"gender 2486\n",
"followers_count 2487\n",
"following_count 2487\n",
"tweet_count 2487\n",
"user_created_at 2487\n",
"verified 2487\n",
"protected 2487\n",
"original 2487\n",
"quote 2487\n",
"reply 2487\n",
"retweet 2487\n",
"tweets_in_dataset 2487\n",
"dtype: int64"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_summary_df = user_lookup_df.join((user_info_df, user_tweet_count_df), how='left')\n",
"# Fill Nans\n",
"user_summary_df['organization'].fillna('', inplace=True)\n",
"user_summary_df['original'].fillna(0, inplace=True)\n",
"user_summary_df['quote'].fillna(0, inplace=True)\n",
"user_summary_df['reply'].fillna(0, inplace=True)\n",
"user_summary_df['retweet'].fillna(0, inplace=True)\n",
"user_summary_df['tweets_in_dataset'].fillna(0, inplace=True)\n",
"user_summary_df.count()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" position | \n",
" gender | \n",
" followers_count | \n",
" following_count | \n",
" tweet_count | \n",
" user_created_at | \n",
" verified | \n",
" protected | \n",
" original | \n",
" quote | \n",
" reply | \n",
" retweet | \n",
" tweets_in_dataset | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 23455653 | \n",
" abettel | \n",
" Bettelheim, Adriel | \n",
" Politico | \n",
" Health Care Editor | \n",
" F | \n",
" 2664 | \n",
" 1055 | \n",
" 15990 | \n",
" Mon Mar 09 16:32:20 +0000 2009 | \n",
" True | \n",
" False | \n",
" 289.00 | \n",
" 12.00 | \n",
" 6.00 | \n",
" 52.00 | \n",
" 359.00 | \n",
"
\n",
" \n",
" 33919343 | \n",
" AshleyRParker | \n",
" Parker, Ashley | \n",
" Washington Post | \n",
" White House Reporter | \n",
" F | \n",
" 122382 | \n",
" 2342 | \n",
" 12433 | \n",
" Tue Apr 21 14:28:57 +0000 2009 | \n",
" True | \n",
" False | \n",
" 172.00 | \n",
" 67.00 | \n",
" 11.00 | \n",
" 120.00 | \n",
" 370.00 | \n",
"
\n",
" \n",
" 18580432 | \n",
" b_fung | \n",
" Fung, Brian | \n",
" Washington Post | \n",
" Tech Reporter | \n",
" M | \n",
" 16558 | \n",
" 2062 | \n",
" 44799 | \n",
" Sat Jan 03 15:15:57 +0000 2009 | \n",
" True | \n",
" False | \n",
" 257.00 | \n",
" 85.00 | \n",
" 205.00 | \n",
" 82.00 | \n",
" 629.00 | \n",
"
\n",
" \n",
" 399225358 | \n",
" b_muzz | \n",
" Murray, Brendan | \n",
" Bloomberg News | \n",
" Managing Editor, U.S. Economy | \n",
" M | \n",
" 624 | \n",
" 382 | \n",
" 360 | \n",
" Thu Oct 27 05:34:05 +0000 2011 | \n",
" True | \n",
" False | \n",
" 3.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 5.00 | \n",
" 8.00 | \n",
"
\n",
" \n",
" 18834692 | \n",
" becca_milfeld | \n",
" Milfeld, Becca | \n",
" Agence France-Presse | \n",
" English Desk Editor and Journalist | \n",
" F | \n",
" 483 | \n",
" 993 | \n",
" 1484 | \n",
" Sat Jan 10 13:58:43 +0000 2009 | \n",
" False | \n",
" False | \n",
" 3.00 | \n",
" 14.00 | \n",
" 0.00 | \n",
" 7.00 | \n",
" 24.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name organization \\\n",
"user_id \n",
"23455653 abettel Bettelheim, Adriel Politico \n",
"33919343 AshleyRParker Parker, Ashley Washington Post \n",
"18580432 b_fung Fung, Brian Washington Post \n",
"399225358 b_muzz Murray, Brendan Bloomberg News \n",
"18834692 becca_milfeld Milfeld, Becca Agence France-Presse \n",
"\n",
" position gender followers_count \\\n",
"user_id \n",
"23455653 Health Care Editor F 2664 \n",
"33919343 White House Reporter F 122382 \n",
"18580432 Tech Reporter M 16558 \n",
"399225358 Managing Editor, U.S. Economy M 624 \n",
"18834692 English Desk Editor and Journalist F 483 \n",
"\n",
" following_count tweet_count user_created_at \\\n",
"user_id \n",
"23455653 1055 15990 Mon Mar 09 16:32:20 +0000 2009 \n",
"33919343 2342 12433 Tue Apr 21 14:28:57 +0000 2009 \n",
"18580432 2062 44799 Sat Jan 03 15:15:57 +0000 2009 \n",
"399225358 382 360 Thu Oct 27 05:34:05 +0000 2011 \n",
"18834692 993 1484 Sat Jan 10 13:58:43 +0000 2009 \n",
"\n",
" verified protected original quote \\\n",
"user_id \n",
"23455653 True False 289.00 12.00 \n",
"33919343 True False 172.00 67.00 \n",
"18580432 True False 257.00 85.00 \n",
"399225358 True False 3.00 0.00 \n",
"18834692 False False 3.00 14.00 \n",
"\n",
" reply retweet tweets_in_dataset \n",
"user_id \n",
"23455653 6.00 52.00 359.00 \n",
"33919343 11.00 120.00 370.00 \n",
"18580432 205.00 82.00 629.00 \n",
"399225358 0.00 5.00 8.00 \n",
"18834692 0.00 7.00 24.00 "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_summary_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"### Remove users with no tweets in dataset"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"screen_name 195\n",
"name 195\n",
"organization 195\n",
"position 195\n",
"gender 194\n",
"followers_count 195\n",
"following_count 195\n",
"tweet_count 195\n",
"user_created_at 195\n",
"verified 195\n",
"protected 195\n",
"original 195\n",
"quote 195\n",
"reply 195\n",
"retweet 195\n",
"tweets_in_dataset 195\n",
"dtype: int64"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_summary_df[user_summary_df.tweets_in_dataset == 0].count()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"screen_name 2292\n",
"name 2292\n",
"organization 2292\n",
"position 2289\n",
"gender 2292\n",
"followers_count 2292\n",
"following_count 2292\n",
"tweet_count 2292\n",
"user_created_at 2292\n",
"verified 2292\n",
"protected 2292\n",
"original 2292\n",
"quote 2292\n",
"reply 2292\n",
"retweet 2292\n",
"tweets_in_dataset 2292\n",
"dtype: int64"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_summary_df = user_summary_df[user_summary_df.tweets_in_dataset != 0]\n",
"user_summary_df.count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Tweeter analysis"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### How many of the journalists are men / women?"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
"
\n",
" \n",
" \n",
" \n",
" M | \n",
" 1299 | \n",
" 56.7% | \n",
"
\n",
" \n",
" F | \n",
" 993 | \n",
" 43.3% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage\n",
"M 1299 56.7%\n",
"F 993 43.3%"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame({'count':user_summary_df.gender.value_counts(), 'percentage':user_summary_df.gender.value_counts(normalize=True).mul(100).round(1).astype(str) + '%'})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Summary\n",
"\n",
"* 25%, 50%, 75% are the percentiles. (Min is equivalent to 0%. Max is equivalent to 100%. 50% is the median.)\n",
"* std is standard deviation, normalized by N-1."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### All"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" followers_count | \n",
" following_count | \n",
" tweet_count | \n",
" original | \n",
" quote | \n",
" reply | \n",
" retweet | \n",
" tweets_in_dataset | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 2,292.00 | \n",
" 2,292.00 | \n",
" 2,292.00 | \n",
" 2,292.00 | \n",
" 2,292.00 | \n",
" 2,292.00 | \n",
" 2,292.00 | \n",
" 2,292.00 | \n",
"
\n",
" \n",
" mean | \n",
" 16,467.62 | \n",
" 1,444.83 | \n",
" 9,619.69 | \n",
" 102.06 | \n",
" 48.73 | \n",
" 55.08 | \n",
" 150.64 | \n",
" 356.52 | \n",
"
\n",
" \n",
" std | \n",
" 91,886.90 | \n",
" 3,003.00 | \n",
" 16,618.09 | \n",
" 169.43 | \n",
" 135.90 | \n",
" 249.18 | \n",
" 585.08 | \n",
" 833.76 | \n",
"
\n",
" \n",
" min | \n",
" 6.00 | \n",
" 0.00 | \n",
" 1.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 1.00 | \n",
"
\n",
" \n",
" 25% | \n",
" 831.75 | \n",
" 505.75 | \n",
" 1,449.50 | \n",
" 10.00 | \n",
" 1.00 | \n",
" 1.00 | \n",
" 8.00 | \n",
" 32.00 | \n",
"
\n",
" \n",
" 50% | \n",
" 2,419.50 | \n",
" 998.50 | \n",
" 4,211.50 | \n",
" 41.00 | \n",
" 9.00 | \n",
" 5.00 | \n",
" 39.00 | \n",
" 122.00 | \n",
"
\n",
" \n",
" 75% | \n",
" 7,348.75 | \n",
" 1,713.50 | \n",
" 10,817.25 | \n",
" 124.25 | \n",
" 43.00 | \n",
" 30.00 | \n",
" 129.00 | \n",
" 375.00 | \n",
"
\n",
" \n",
" max | \n",
" 2,176,578.00 | \n",
" 96,194.00 | \n",
" 208,763.00 | \n",
" 2,693.00 | \n",
" 3,069.00 | \n",
" 9,033.00 | \n",
" 21,524.00 | \n",
" 21,547.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" followers_count following_count tweet_count \\\n",
"count 2,292.00 2,292.00 2,292.00 \n",
"mean 16,467.62 1,444.83 9,619.69 \n",
"std 91,886.90 3,003.00 16,618.09 \n",
"min 6.00 0.00 1.00 \n",
"25% 831.75 505.75 1,449.50 \n",
"50% 2,419.50 998.50 4,211.50 \n",
"75% 7,348.75 1,713.50 10,817.25 \n",
"max 2,176,578.00 96,194.00 208,763.00 \n",
"\n",
" original quote reply \\\n",
"count 2,292.00 2,292.00 2,292.00 \n",
"mean 102.06 48.73 55.08 \n",
"std 169.43 135.90 249.18 \n",
"min 0.00 0.00 0.00 \n",
"25% 10.00 1.00 1.00 \n",
"50% 41.00 9.00 5.00 \n",
"75% 124.25 43.00 30.00 \n",
"max 2,693.00 3,069.00 9,033.00 \n",
"\n",
" retweet tweets_in_dataset \n",
"count 2,292.00 2,292.00 \n",
"mean 150.64 356.52 \n",
"std 585.08 833.76 \n",
"min 0.00 1.00 \n",
"25% 8.00 32.00 \n",
"50% 39.00 122.00 \n",
"75% 129.00 375.00 \n",
"max 21,524.00 21,547.00 "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_summary_df[['followers_count', 'following_count', 'tweet_count', 'original', 'quote', 'reply', 'retweet', 'tweets_in_dataset']].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Female"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" followers_count | \n",
" following_count | \n",
" tweet_count | \n",
" original | \n",
" quote | \n",
" reply | \n",
" retweet | \n",
" tweets_in_dataset | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 993.00 | \n",
" 993.00 | \n",
" 993.00 | \n",
" 993.00 | \n",
" 993.00 | \n",
" 993.00 | \n",
" 993.00 | \n",
" 993.00 | \n",
"
\n",
" \n",
" mean | \n",
" 11,609.53 | \n",
" 1,314.07 | \n",
" 7,498.74 | \n",
" 83.84 | \n",
" 39.27 | \n",
" 32.06 | \n",
" 135.55 | \n",
" 290.72 | \n",
"
\n",
" \n",
" std | \n",
" 65,563.72 | \n",
" 1,250.56 | \n",
" 11,312.72 | \n",
" 124.86 | \n",
" 135.05 | \n",
" 94.73 | \n",
" 724.92 | \n",
" 833.07 | \n",
"
\n",
" \n",
" min | \n",
" 6.00 | \n",
" 1.00 | \n",
" 1.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 1.00 | \n",
"
\n",
" \n",
" 25% | \n",
" 825.00 | \n",
" 567.00 | \n",
" 1,393.00 | \n",
" 8.00 | \n",
" 1.00 | \n",
" 1.00 | \n",
" 9.00 | \n",
" 32.00 | \n",
"
\n",
" \n",
" 50% | \n",
" 2,327.00 | \n",
" 1,034.00 | \n",
" 4,055.00 | \n",
" 39.00 | \n",
" 9.00 | \n",
" 4.00 | \n",
" 37.00 | \n",
" 111.00 | \n",
"
\n",
" \n",
" 75% | \n",
" 6,340.00 | \n",
" 1,659.00 | \n",
" 8,983.00 | \n",
" 111.00 | \n",
" 33.00 | \n",
" 21.00 | \n",
" 115.00 | \n",
" 314.00 | \n",
"
\n",
" \n",
" max | \n",
" 1,388,543.00 | \n",
" 18,197.00 | \n",
" 118,713.00 | \n",
" 1,440.00 | \n",
" 3,069.00 | \n",
" 1,458.00 | \n",
" 21,524.00 | \n",
" 21,547.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" followers_count following_count tweet_count \\\n",
"count 993.00 993.00 993.00 \n",
"mean 11,609.53 1,314.07 7,498.74 \n",
"std 65,563.72 1,250.56 11,312.72 \n",
"min 6.00 1.00 1.00 \n",
"25% 825.00 567.00 1,393.00 \n",
"50% 2,327.00 1,034.00 4,055.00 \n",
"75% 6,340.00 1,659.00 8,983.00 \n",
"max 1,388,543.00 18,197.00 118,713.00 \n",
"\n",
" original quote reply \\\n",
"count 993.00 993.00 993.00 \n",
"mean 83.84 39.27 32.06 \n",
"std 124.86 135.05 94.73 \n",
"min 0.00 0.00 0.00 \n",
"25% 8.00 1.00 1.00 \n",
"50% 39.00 9.00 4.00 \n",
"75% 111.00 33.00 21.00 \n",
"max 1,440.00 3,069.00 1,458.00 \n",
"\n",
" retweet tweets_in_dataset \n",
"count 993.00 993.00 \n",
"mean 135.55 290.72 \n",
"std 724.92 833.07 \n",
"min 0.00 1.00 \n",
"25% 9.00 32.00 \n",
"50% 37.00 111.00 \n",
"75% 115.00 314.00 \n",
"max 21,524.00 21,547.00 "
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_summary_df[user_summary_df.gender == 'F'][['followers_count', 'following_count', 'tweet_count', 'original', 'quote', 'reply', 'retweet', 'tweets_in_dataset']].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Male"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" followers_count | \n",
" following_count | \n",
" tweet_count | \n",
" original | \n",
" quote | \n",
" reply | \n",
" retweet | \n",
" tweets_in_dataset | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 1,299.00 | \n",
" 1,299.00 | \n",
" 1,299.00 | \n",
" 1,299.00 | \n",
" 1,299.00 | \n",
" 1,299.00 | \n",
" 1,299.00 | \n",
" 1,299.00 | \n",
"
\n",
" \n",
" mean | \n",
" 20,181.31 | \n",
" 1,544.78 | \n",
" 11,241.02 | \n",
" 115.99 | \n",
" 55.96 | \n",
" 72.69 | \n",
" 162.17 | \n",
" 406.81 | \n",
"
\n",
" \n",
" std | \n",
" 107,635.37 | \n",
" 3,833.89 | \n",
" 19,584.46 | \n",
" 195.72 | \n",
" 136.16 | \n",
" 319.41 | \n",
" 449.75 | \n",
" 831.10 | \n",
"
\n",
" \n",
" min | \n",
" 10.00 | \n",
" 0.00 | \n",
" 5.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 1.00 | \n",
"
\n",
" \n",
" 25% | \n",
" 857.50 | \n",
" 472.00 | \n",
" 1,477.00 | \n",
" 12.00 | \n",
" 0.00 | \n",
" 1.00 | \n",
" 6.00 | \n",
" 33.00 | \n",
"
\n",
" \n",
" 50% | \n",
" 2,498.00 | \n",
" 953.00 | \n",
" 4,401.00 | \n",
" 44.00 | \n",
" 9.00 | \n",
" 6.00 | \n",
" 40.00 | \n",
" 131.00 | \n",
"
\n",
" \n",
" 75% | \n",
" 8,341.50 | \n",
" 1,763.00 | \n",
" 12,584.50 | \n",
" 140.00 | \n",
" 50.50 | \n",
" 38.50 | \n",
" 142.00 | \n",
" 428.00 | \n",
"
\n",
" \n",
" max | \n",
" 2,176,578.00 | \n",
" 96,194.00 | \n",
" 208,763.00 | \n",
" 2,693.00 | \n",
" 1,955.00 | \n",
" 9,033.00 | \n",
" 7,528.00 | \n",
" 11,432.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" followers_count following_count tweet_count \\\n",
"count 1,299.00 1,299.00 1,299.00 \n",
"mean 20,181.31 1,544.78 11,241.02 \n",
"std 107,635.37 3,833.89 19,584.46 \n",
"min 10.00 0.00 5.00 \n",
"25% 857.50 472.00 1,477.00 \n",
"50% 2,498.00 953.00 4,401.00 \n",
"75% 8,341.50 1,763.00 12,584.50 \n",
"max 2,176,578.00 96,194.00 208,763.00 \n",
"\n",
" original quote reply \\\n",
"count 1,299.00 1,299.00 1,299.00 \n",
"mean 115.99 55.96 72.69 \n",
"std 195.72 136.16 319.41 \n",
"min 0.00 0.00 0.00 \n",
"25% 12.00 0.00 1.00 \n",
"50% 44.00 9.00 6.00 \n",
"75% 140.00 50.50 38.50 \n",
"max 2,693.00 1,955.00 9,033.00 \n",
"\n",
" retweet tweets_in_dataset \n",
"count 1,299.00 1,299.00 \n",
"mean 162.17 406.81 \n",
"std 449.75 831.10 \n",
"min 0.00 1.00 \n",
"25% 6.00 33.00 \n",
"50% 40.00 131.00 \n",
"75% 142.00 428.00 \n",
"max 7,528.00 11,432.00 "
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_summary_df[user_summary_df.gender == 'M'][['followers_count', 'following_count', 'tweet_count', 'original', 'quote', 'reply', 'retweet', 'tweets_in_dataset']].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Verified"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Of all journalists, how many are verified?"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
"
\n",
" \n",
" \n",
" \n",
" True | \n",
" 1240 | \n",
" 54.1% | \n",
"
\n",
" \n",
" False | \n",
" 1052 | \n",
" 45.9% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage\n",
"True 1240 54.1%\n",
"False 1052 45.9%"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame({'count':user_summary_df.verified.value_counts(), 'percentage':user_summary_df.verified.value_counts(normalize=True).mul(100).round(1).astype(str) + '%'})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Of female journalists, how many are verified?"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
"
\n",
" \n",
" \n",
" \n",
" True | \n",
" 512 | \n",
" 51.6% | \n",
"
\n",
" \n",
" False | \n",
" 481 | \n",
" 48.4% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage\n",
"True 512 51.6%\n",
"False 481 48.4%"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame({'count':user_summary_df[user_summary_df.gender == 'F'].verified.value_counts(), 'percentage':user_summary_df[user_summary_df.gender == 'F'].verified.value_counts(normalize=True).mul(100).round(1).astype(str) + '%'})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Of male journalists, how many are verified?"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
"
\n",
" \n",
" \n",
" \n",
" True | \n",
" 728 | \n",
" 56.0% | \n",
"
\n",
" \n",
" False | \n",
" 571 | \n",
" 44.0% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage\n",
"True 728 56.0%\n",
"False 571 44.0%"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame({'count':user_summary_df[user_summary_df.gender == 'M'].verified.value_counts(), 'percentage':user_summary_df[user_summary_df.gender == 'M'].verified.value_counts(normalize=True).mul(100).round(1).astype(str) + '%'})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Mention data prep"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load mentions from tweets\n",
"Including original tweets only"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:root:Loading from tweets/642bf140607547cb9d4c6b1fc49772aa_001.json.gz\n",
"DEBUG:root:Loaded 50000\n",
"DEBUG:root:Loaded 100000\n",
"DEBUG:root:Loaded 150000\n",
"DEBUG:root:Loaded 200000\n",
"DEBUG:root:Loaded 250000\n",
"INFO:root:Loading from tweets/9f7ed17c16a1494c8690b4053609539d_001.json.gz\n",
"DEBUG:root:Loaded 300000\n",
"DEBUG:root:Loaded 350000\n",
"DEBUG:root:Loaded 400000\n",
"DEBUG:root:Loaded 450000\n",
"DEBUG:root:Loaded 500000\n",
"INFO:root:Loading from tweets/41feff28312c433ab004cd822212f4c2_001.json.gz\n",
"DEBUG:root:Loaded 550000\n",
"DEBUG:root:Loaded 600000\n",
"DEBUG:root:Loaded 650000\n",
"DEBUG:root:Loaded 700000\n",
"DEBUG:root:Loaded 750000\n",
"DEBUG:root:Loaded 800000\n"
]
},
{
"data": {
"text/plain": [
"tweet_id 118210\n",
"user_id 118210\n",
"screen_name 118210\n",
"mention_user_id 118210\n",
"mention_screen_name 118210\n",
"tweet_created_at 118210\n",
"dtype: int64"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%matplotlib inline\n",
"import pandas as pd\n",
"import numpy as np\n",
"import logging\n",
"from dateutil.parser import parse as date_parse\n",
"from utils import load_tweet_df, tweet_type\n",
"import matplotlib.pyplot as plt\n",
"\n",
"\n",
"logger = logging.getLogger()\n",
"logger.setLevel(logging.DEBUG)\n",
"\n",
"# Set float format so doesn't display scientific notation\n",
"pd.options.display.float_format = '{:20,.2f}'.format\n",
"\n",
"# Simply the tweet on load\n",
"def mention_transform(tweet):\n",
" mentions = []\n",
" if tweet_type(tweet) == 'original':\n",
" for mention in tweet.get('entities', {}).get('user_mentions', []):\n",
" mentions.append({\n",
" 'tweet_id': tweet['id_str'],\n",
" 'user_id': tweet['user']['id_str'],\n",
" 'screen_name': tweet['user']['screen_name'],\n",
" 'mention_user_id': mention['id_str'],\n",
" 'mention_screen_name': mention['screen_name'],\n",
" 'tweet_created_at': date_parse(tweet['created_at'])\n",
" })\n",
" return mentions\n",
"\n",
"base_mention_df = load_tweet_df(mention_transform, ['tweet_id', 'user_id', 'screen_name', 'mention_user_id',\n",
" 'mention_screen_name', 'tweet_created_at'], \n",
" dedupe_columns=['tweet_id', 'mention_user_id'])\n",
"base_mention_df.count()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" tweet_id | \n",
" user_id | \n",
" screen_name | \n",
" mention_user_id | \n",
" mention_screen_name | \n",
" tweet_created_at | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 872522339962978307 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 800707492346925056 | \n",
" axios | \n",
" 2017-06-07 18:35:11+00:00 | \n",
"
\n",
" \n",
" 1 | \n",
" 872484939530461184 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 17494010 | \n",
" SenSchumer | \n",
" 2017-06-07 16:06:34+00:00 | \n",
"
\n",
" \n",
" 2 | \n",
" 872475140575170562 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 2836421 | \n",
" MSNBC | \n",
" 2017-06-07 15:27:37+00:00 | \n",
"
\n",
" \n",
" 3 | \n",
" 872475140575170562 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 800707492346925056 | \n",
" axios | \n",
" 2017-06-07 15:27:37+00:00 | \n",
"
\n",
" \n",
" 4 | \n",
" 872459457946673154 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 800707492346925056 | \n",
" axios | \n",
" 2017-06-07 14:25:18+00:00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" tweet_id user_id screen_name mention_user_id \\\n",
"0 872522339962978307 327862439 jonathanvswan 800707492346925056 \n",
"1 872484939530461184 327862439 jonathanvswan 17494010 \n",
"2 872475140575170562 327862439 jonathanvswan 2836421 \n",
"3 872475140575170562 327862439 jonathanvswan 800707492346925056 \n",
"4 872459457946673154 327862439 jonathanvswan 800707492346925056 \n",
"\n",
" mention_screen_name tweet_created_at \n",
"0 axios 2017-06-07 18:35:11+00:00 \n",
"1 SenSchumer 2017-06-07 16:06:34+00:00 \n",
"2 MSNBC 2017-06-07 15:27:37+00:00 \n",
"3 axios 2017-06-07 15:27:37+00:00 \n",
"4 axios 2017-06-07 14:25:18+00:00 "
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"base_mention_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Add gender of mentioner"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tweet_id 118210\n",
"user_id 118210\n",
"screen_name 118210\n",
"mention_user_id 118210\n",
"mention_screen_name 118210\n",
"tweet_created_at 118210\n",
"gender 118210\n",
"dtype: int64"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mention_df = base_mention_df.join(user_summary_df['gender'], on='user_id')\n",
"mention_df.count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### How many tweets are in dataset?"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"84942"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mention_df['tweet_id'].unique().size"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### How many users are mentioned? (All users, not just journalists)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"17730"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mention_df['mention_user_id'].unique().size"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Limit to mentions of journalists"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tweet_id 14298\n",
"user_id 14298\n",
"screen_name 14298\n",
"mention_user_id 14298\n",
"mention_screen_name 14298\n",
"tweet_created_at 14298\n",
"gender 14298\n",
"mention_gender 14298\n",
"dtype: int64"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalists_mention_df = mention_df.join(user_summary_df['gender'], how='inner', on='mention_user_id', rsuffix='_mention')\n",
"journalists_mention_df.rename(columns = {'gender_mention': 'mention_gender'}, inplace=True)\n",
"journalists_mention_df.count()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" tweet_id | \n",
" user_id | \n",
" screen_name | \n",
" mention_user_id | \n",
" mention_screen_name | \n",
" tweet_created_at | \n",
" gender | \n",
" mention_gender | \n",
"
\n",
" \n",
" \n",
" \n",
" 16 | \n",
" 870408075878027268 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 16031927 | \n",
" greta | \n",
" 2017-06-01 22:33:51+00:00 | \n",
" M | \n",
" F | \n",
"
\n",
" \n",
" 283 | \n",
" 872581449861541893 | \n",
" 19847765 | \n",
" sahilkapur | \n",
" 16031927 | \n",
" greta | \n",
" 2017-06-07 22:30:04+00:00 | \n",
" M | \n",
" F | \n",
"
\n",
" \n",
" 2202 | \n",
" 872578055910371328 | \n",
" 21252618 | \n",
" JakeSherman | \n",
" 16031927 | \n",
" greta | \n",
" 2017-06-07 22:16:34+00:00 | \n",
" M | \n",
" F | \n",
"
\n",
" \n",
" 15977 | \n",
" 880841069243629568 | \n",
" 70511174 | \n",
" Hadas_Gold | \n",
" 16031927 | \n",
" greta | \n",
" 2017-06-30 17:30:50+00:00 | \n",
" F | \n",
" F | \n",
"
\n",
" \n",
" 17258 | \n",
" 880183952018886661 | \n",
" 90077282 | \n",
" politicoalex | \n",
" 16031927 | \n",
" greta | \n",
" 2017-06-28 21:59:41+00:00 | \n",
" M | \n",
" F | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" tweet_id user_id screen_name mention_user_id \\\n",
"16 870408075878027268 327862439 jonathanvswan 16031927 \n",
"283 872581449861541893 19847765 sahilkapur 16031927 \n",
"2202 872578055910371328 21252618 JakeSherman 16031927 \n",
"15977 880841069243629568 70511174 Hadas_Gold 16031927 \n",
"17258 880183952018886661 90077282 politicoalex 16031927 \n",
"\n",
" mention_screen_name tweet_created_at gender mention_gender \n",
"16 greta 2017-06-01 22:33:51+00:00 M F \n",
"283 greta 2017-06-07 22:30:04+00:00 M F \n",
"2202 greta 2017-06-07 22:16:34+00:00 M F \n",
"15977 greta 2017-06-30 17:30:50+00:00 F F \n",
"17258 greta 2017-06-28 21:59:41+00:00 M F "
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalists_mention_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Functions for summarizing mentions by beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"# Gender of beltway journalists mentioned by beltway journalists\n",
"def journalist_mention_gender_summary(mention_df):\n",
" return pd.DataFrame({'count': mention_df.mention_gender.value_counts(), \n",
" 'percentage': mention_df.mention_gender.value_counts(normalize=True).mul(100).round(1).astype(str) + '%'})\n",
"\n",
"def journalist_mention_summary(mention_df):\n",
" # Mention count\n",
" mention_count_df = pd.DataFrame(mention_df.mention_user_id.value_counts().rename('mention_count'))\n",
"\n",
" # Mentioning users. That is, the number of unique users mentioning each user.\n",
" mention_user_id_per_user_df = mention_df[['mention_user_id', 'user_id']].drop_duplicates()\n",
" mentioning_user_count_df = pd.DataFrame(mention_user_id_per_user_df.groupby('mention_user_id').size(), columns=['mentioning_count'])\n",
" mentioning_user_count_df.index.name = 'user_id'\n",
"\n",
" # Join with user summary\n",
" journalist_mention_summary_df = user_summary_df.join([mention_count_df, mentioning_user_count_df])\n",
" journalist_mention_summary_df.fillna(0, inplace=True)\n",
" journalist_mention_summary_df = journalist_mention_summary_df.sort_values(['mention_count', 'mentioning_count', 'followers_count'], ascending=False)\n",
" return journalist_mention_summary_df\n",
"\n",
"# Gender of top journalists mentioned by beltway journalists\n",
"def top_journalist_mention_gender_summary(mention_summary_df, mentioning_count_threshold=0, head=100):\n",
" top_mention_summary_df = mention_summary_df[mention_summary_df.mentioning_count > mentioning_count_threshold].head(head)\n",
" return pd.DataFrame({'count': top_mention_summary_df.gender.value_counts(), \n",
" 'percentage': top_mention_summary_df.gender.value_counts(normalize=True).mul(100).round(1).astype(str) + '%'})\n",
"\n",
"\n",
"# Fields for displaying journalist mention summaries\n",
"journalist_mention_summary_fields = ['screen_name', 'name', 'organization', 'gender', 'followers_count', 'mention_count', 'mentioning_count']\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Mentioned analysis\n",
"*Note that for each of these, the complete list is being written to CSV in the output directory.*\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Of the original tweets, how many were posted by male journalists / female journalists?"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" original | \n",
" percentage | \n",
"
\n",
" \n",
" gender | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" F | \n",
" 83,251.00 | \n",
" 35.6% | \n",
"
\n",
" \n",
" M | \n",
" 150,675.00 | \n",
" 64.4% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" original percentage\n",
"gender \n",
"F 83,251.00 35.6%\n",
"M 150,675.00 64.4%"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"original_tweets_by_gender_df = user_summary_df[['gender', 'original']].groupby('gender').sum()\n",
"original_tweets_by_gender_df['percentage'] = original_tweets_by_gender_df.original.div(user_summary_df.original.sum()).mul(100).round(1).astype(str) + '%'\n",
"original_tweets_by_gender_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Who posted the most original tweets?"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" tweet_count | \n",
" original | \n",
" tweets_in_dataset | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 16187637 | \n",
" ChadPergram | \n",
" Pergram, Chad | \n",
" Fox News | \n",
" M | \n",
" 59305 | \n",
" 61461 | \n",
" 2,693.00 | \n",
" 2,693.00 | \n",
"
\n",
" \n",
" 31127446 | \n",
" markknoller | \n",
" Knoller, Mark | \n",
" CBS News | \n",
" M | \n",
" 301474 | \n",
" 115132 | \n",
" 1,858.00 | \n",
" 2,089.00 | \n",
"
\n",
" \n",
" 16459325 | \n",
" ryanbeckwith | \n",
" Beckwith, Ryan Teague | \n",
" Time Magazine | \n",
" M | \n",
" 20947 | \n",
" 92203 | \n",
" 1,534.00 | \n",
" 5,187.00 | \n",
"
\n",
" \n",
" 19580890 | \n",
" LeeCamp | \n",
" Camp, Lee | \n",
" RTTV America | \n",
" M | \n",
" 67601 | \n",
" 52051 | \n",
" 1,517.00 | \n",
" 3,708.00 | \n",
"
\n",
" \n",
" 18825339 | \n",
" CahnEmily | \n",
" Cahn, Emily | \n",
" Mic | \n",
" F | \n",
" 16980 | \n",
" 100803 | \n",
" 1,440.00 | \n",
" 8,196.00 | \n",
"
\n",
" \n",
" 593813785 | \n",
" DonnaYoungDC | \n",
" Young, Donna | \n",
" S&P Global Market Intelligence | \n",
" F | \n",
" 5894 | \n",
" 49967 | \n",
" 1,332.00 | \n",
" 4,414.00 | \n",
"
\n",
" \n",
" 14529929 | \n",
" jaketapper | \n",
" Tapper, Jake | \n",
" CNN | \n",
" M | \n",
" 1305680 | \n",
" 148143 | \n",
" 1,316.00 | \n",
" 5,078.00 | \n",
"
\n",
" \n",
" 21316253 | \n",
" ZekeJMiller | \n",
" Miller, Zeke J. | \n",
" Time Magazine | \n",
" M | \n",
" 198517 | \n",
" 161148 | \n",
" 1,271.00 | \n",
" 2,106.00 | \n",
"
\n",
" \n",
" 36246939 | \n",
" malbertnews | \n",
" Albert, Mark | \n",
" The Voyage Report | \n",
" M | \n",
" 3575 | \n",
" 28230 | \n",
" 1,078.00 | \n",
" 1,151.00 | \n",
"
\n",
" \n",
" 117467779 | \n",
" palbergo | \n",
" Albergo, Paul F. | \n",
" Bloomberg BNA | \n",
" M | \n",
" 1191 | \n",
" 18083 | \n",
" 1,043.00 | \n",
" 1,236.00 | \n",
"
\n",
" \n",
" 102171691 | \n",
" rlocker12 | \n",
" Locker, Ray | \n",
" USA Today | \n",
" M | \n",
" 3665 | \n",
" 41194 | \n",
" 1,038.00 | \n",
" 2,496.00 | \n",
"
\n",
" \n",
" 15486163 | \n",
" SimonMarksFSN | \n",
" Marks, Simon | \n",
" Feature Story News | \n",
" M | \n",
" 7767 | \n",
" 41541 | \n",
" 984.00 | \n",
" 3,432.00 | \n",
"
\n",
" \n",
" 275207082 | \n",
" AlexParkerDC | \n",
" Parker, Alexander M. | \n",
" Bloomberg BNA | \n",
" M | \n",
" 3828 | \n",
" 142150 | \n",
" 972.00 | \n",
" 3,983.00 | \n",
"
\n",
" \n",
" 190360266 | \n",
" connorobrienNH | \n",
" O’Brien, Connor | \n",
" Politico | \n",
" M | \n",
" 6158 | \n",
" 17242 | \n",
" 954.00 | \n",
" 1,944.00 | \n",
"
\n",
" \n",
" 16031927 | \n",
" greta | \n",
" Van Susteren, Greta | \n",
" MSNBC | \n",
" F | \n",
" 1186850 | \n",
" 116645 | \n",
" 907.00 | \n",
" 4,792.00 | \n",
"
\n",
" \n",
" 300497193 | \n",
" tackettdc | \n",
" Tackett, R. Michael | \n",
" New York Times | \n",
" M | \n",
" 16857 | \n",
" 38620 | \n",
" 896.00 | \n",
" 1,041.00 | \n",
"
\n",
" \n",
" 191964162 | \n",
" SamLitzinger | \n",
" Litzinger, Sam | \n",
" CBS News | \n",
" M | \n",
" 2329 | \n",
" 95236 | \n",
" 891.00 | \n",
" 7,537.00 | \n",
"
\n",
" \n",
" 118130765 | \n",
" dylanlscott | \n",
" Scott, Dylan L. | \n",
" Stat News | \n",
" M | \n",
" 20122 | \n",
" 42497 | \n",
" 885.00 | \n",
" 3,960.00 | \n",
"
\n",
" \n",
" 3817401 | \n",
" ericgeller | \n",
" Geller, Eric | \n",
" Politico | \n",
" M | \n",
" 58173 | \n",
" 208763 | \n",
" 871.00 | \n",
" 11,432.00 | \n",
"
\n",
" \n",
" 259395895 | \n",
" JohnJHarwood | \n",
" Harwood, John | \n",
" CNBC | \n",
" M | \n",
" 149040 | \n",
" 78015 | \n",
" 846.00 | \n",
" 6,377.00 | \n",
"
\n",
" \n",
" 27882000 | \n",
" jamiedupree | \n",
" Dupree, Jamie | \n",
" Cox Broadcasting | \n",
" M | \n",
" 140848 | \n",
" 46181 | \n",
" 841.00 | \n",
" 2,108.00 | \n",
"
\n",
" \n",
" 407013776 | \n",
" burgessev | \n",
" Everett, John B. | \n",
" Politico | \n",
" M | \n",
" 31010 | \n",
" 27294 | \n",
" 836.00 | \n",
" 1,673.00 | \n",
"
\n",
" \n",
" 104299137 | \n",
" DavidMDrucker | \n",
" Drucker, David | \n",
" Washington Examiner | \n",
" M | \n",
" 35033 | \n",
" 104613 | \n",
" 824.00 | \n",
" 4,907.00 | \n",
"
\n",
" \n",
" 63149389 | \n",
" hbwx | \n",
" Bernstein, Howard | \n",
" WUSA–TV | \n",
" M | \n",
" 8337 | \n",
" 48025 | \n",
" 822.00 | \n",
" 1,604.00 | \n",
"
\n",
" \n",
" 13262862 | \n",
" HowardMortman | \n",
" Mortman, Howard | \n",
" C–SPAN | \n",
" M | \n",
" 6211 | \n",
" 38406 | \n",
" 819.00 | \n",
" 1,289.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name \\\n",
"user_id \n",
"16187637 ChadPergram Pergram, Chad \n",
"31127446 markknoller Knoller, Mark \n",
"16459325 ryanbeckwith Beckwith, Ryan Teague \n",
"19580890 LeeCamp Camp, Lee \n",
"18825339 CahnEmily Cahn, Emily \n",
"593813785 DonnaYoungDC Young, Donna \n",
"14529929 jaketapper Tapper, Jake \n",
"21316253 ZekeJMiller Miller, Zeke J. \n",
"36246939 malbertnews Albert, Mark \n",
"117467779 palbergo Albergo, Paul F. \n",
"102171691 rlocker12 Locker, Ray \n",
"15486163 SimonMarksFSN Marks, Simon \n",
"275207082 AlexParkerDC Parker, Alexander M. \n",
"190360266 connorobrienNH O’Brien, Connor \n",
"16031927 greta Van Susteren, Greta \n",
"300497193 tackettdc Tackett, R. Michael \n",
"191964162 SamLitzinger Litzinger, Sam \n",
"118130765 dylanlscott Scott, Dylan L. \n",
"3817401 ericgeller Geller, Eric \n",
"259395895 JohnJHarwood Harwood, John \n",
"27882000 jamiedupree Dupree, Jamie \n",
"407013776 burgessev Everett, John B. \n",
"104299137 DavidMDrucker Drucker, David \n",
"63149389 hbwx Bernstein, Howard \n",
"13262862 HowardMortman Mortman, Howard \n",
"\n",
" organization gender followers_count \\\n",
"user_id \n",
"16187637 Fox News M 59305 \n",
"31127446 CBS News M 301474 \n",
"16459325 Time Magazine M 20947 \n",
"19580890 RTTV America M 67601 \n",
"18825339 Mic F 16980 \n",
"593813785 S&P Global Market Intelligence F 5894 \n",
"14529929 CNN M 1305680 \n",
"21316253 Time Magazine M 198517 \n",
"36246939 The Voyage Report M 3575 \n",
"117467779 Bloomberg BNA M 1191 \n",
"102171691 USA Today M 3665 \n",
"15486163 Feature Story News M 7767 \n",
"275207082 Bloomberg BNA M 3828 \n",
"190360266 Politico M 6158 \n",
"16031927 MSNBC F 1186850 \n",
"300497193 New York Times M 16857 \n",
"191964162 CBS News M 2329 \n",
"118130765 Stat News M 20122 \n",
"3817401 Politico M 58173 \n",
"259395895 CNBC M 149040 \n",
"27882000 Cox Broadcasting M 140848 \n",
"407013776 Politico M 31010 \n",
"104299137 Washington Examiner M 35033 \n",
"63149389 WUSA–TV M 8337 \n",
"13262862 C–SPAN M 6211 \n",
"\n",
" tweet_count original tweets_in_dataset \n",
"user_id \n",
"16187637 61461 2,693.00 2,693.00 \n",
"31127446 115132 1,858.00 2,089.00 \n",
"16459325 92203 1,534.00 5,187.00 \n",
"19580890 52051 1,517.00 3,708.00 \n",
"18825339 100803 1,440.00 8,196.00 \n",
"593813785 49967 1,332.00 4,414.00 \n",
"14529929 148143 1,316.00 5,078.00 \n",
"21316253 161148 1,271.00 2,106.00 \n",
"36246939 28230 1,078.00 1,151.00 \n",
"117467779 18083 1,043.00 1,236.00 \n",
"102171691 41194 1,038.00 2,496.00 \n",
"15486163 41541 984.00 3,432.00 \n",
"275207082 142150 972.00 3,983.00 \n",
"190360266 17242 954.00 1,944.00 \n",
"16031927 116645 907.00 4,792.00 \n",
"300497193 38620 896.00 1,041.00 \n",
"191964162 95236 891.00 7,537.00 \n",
"118130765 42497 885.00 3,960.00 \n",
"3817401 208763 871.00 11,432.00 \n",
"259395895 78015 846.00 6,377.00 \n",
"27882000 46181 841.00 2,108.00 \n",
"407013776 27294 836.00 1,673.00 \n",
"104299137 104613 824.00 4,907.00 \n",
"63149389 48025 822.00 1,604.00 \n",
"13262862 38406 819.00 1,289.00 "
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_summary_df[['screen_name', 'name', 'organization', 'gender', 'followers_count', 'tweet_count', 'original', 'tweets_in_dataset']].sort_values(['original'], ascending=False).head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Of accounts mentions by journalists, which are mentioned the most? (All accounts, not just journalists)\n",
"This is based on screen name, which could have changed during collection period. However, for the users that would be at the top of this list, seems unlikely."
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" mention_count | \n",
" mentioning_count | \n",
"
\n",
" \n",
" \n",
" \n",
" realDonaldTrump | \n",
" 2876 | \n",
" 452 | \n",
"
\n",
" \n",
" POTUS | \n",
" 2265 | \n",
" 253 | \n",
"
\n",
" \n",
" wusa9 | \n",
" 2111 | \n",
" 41 | \n",
"
\n",
" \n",
" AP | \n",
" 1948 | \n",
" 143 | \n",
"
\n",
" \n",
" USATODAY | \n",
" 1235 | \n",
" 105 | \n",
"
\n",
" \n",
" nbcwashington | \n",
" 1230 | \n",
" 70 | \n",
"
\n",
" \n",
" WSJ | \n",
" 1227 | \n",
" 152 | \n",
"
\n",
" \n",
" dcexaminer | \n",
" 1034 | \n",
" 53 | \n",
"
\n",
" \n",
" SHSanders45 | \n",
" 927 | \n",
" 148 | \n",
"
\n",
" \n",
" nytimes | \n",
" 829 | \n",
" 289 | \n",
"
\n",
" \n",
" BloombergBNA | \n",
" 759 | \n",
" 45 | \n",
"
\n",
" \n",
" politico | \n",
" 747 | \n",
" 181 | \n",
"
\n",
" \n",
" SpeakerRyan | \n",
" 700 | \n",
" 181 | \n",
"
\n",
" \n",
" Scaramucci | \n",
" 657 | \n",
" 198 | \n",
"
\n",
" \n",
" PressSec | \n",
" 654 | \n",
" 178 | \n",
"
\n",
" \n",
" CNN | \n",
" 628 | \n",
" 186 | \n",
"
\n",
" \n",
" ABC7News | \n",
" 604 | \n",
" 24 | \n",
"
\n",
" \n",
" SenJohnMcCain | \n",
" 599 | \n",
" 231 | \n",
"
\n",
" \n",
" WTOP | \n",
" 529 | \n",
" 43 | \n",
"
\n",
" \n",
" BloombergLaw | \n",
" 517 | \n",
" 15 | \n",
"
\n",
" \n",
" VP | \n",
" 506 | \n",
" 140 | \n",
"
\n",
" \n",
" SteveScalise | \n",
" 505 | \n",
" 150 | \n",
"
\n",
" \n",
" MSNBC | \n",
" 486 | \n",
" 92 | \n",
"
\n",
" \n",
" Reuters | \n",
" 483 | \n",
" 84 | \n",
"
\n",
" \n",
" bpolitics | \n",
" 432 | \n",
" 69 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" mention_count mentioning_count\n",
"realDonaldTrump 2876 452\n",
"POTUS 2265 253\n",
"wusa9 2111 41\n",
"AP 1948 143\n",
"USATODAY 1235 105\n",
"nbcwashington 1230 70\n",
"WSJ 1227 152\n",
"dcexaminer 1034 53\n",
"SHSanders45 927 148\n",
"nytimes 829 289\n",
"BloombergBNA 759 45\n",
"politico 747 181\n",
"SpeakerRyan 700 181\n",
"Scaramucci 657 198\n",
"PressSec 654 178\n",
"CNN 628 186\n",
"ABC7News 604 24\n",
"SenJohnMcCain 599 231\n",
"WTOP 529 43\n",
"BloombergLaw 517 15\n",
"VP 506 140\n",
"SteveScalise 505 150\n",
"MSNBC 486 92\n",
"Reuters 483 84\n",
"bpolitics 432 69"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Mention count\n",
"mention_count_screen_name_df = pd.DataFrame(mention_df.mention_screen_name.value_counts().rename('mention_count'))\n",
"\n",
"# Count of mentioning users\n",
"mention_user_id_per_user_screen_name_df = mention_df[['mention_screen_name', 'user_id']].drop_duplicates()\n",
"mentioning_count_screen_name_df = pd.DataFrame(mention_user_id_per_user_screen_name_df.groupby('mention_screen_name').size(), columns=['mentioning_count'])\n",
"mentioning_count_screen_name_df.index.name = 'screen_name'\n",
"\n",
"all_mentioned_df = mention_count_screen_name_df.join(mentioning_count_screen_name_df)\n",
"all_mentioned_df.to_csv('output/all_mentioned_by_journalists.csv')\n",
"all_mentioned_df.head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Same, but ordered by the number of journalists mentioning the account"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" mention_count | \n",
" mentioning_count | \n",
"
\n",
" \n",
" \n",
" \n",
" realDonaldTrump | \n",
" 2876 | \n",
" 452 | \n",
"
\n",
" \n",
" nytimes | \n",
" 829 | \n",
" 289 | \n",
"
\n",
" \n",
" POTUS | \n",
" 2265 | \n",
" 253 | \n",
"
\n",
" \n",
" SenJohnMcCain | \n",
" 599 | \n",
" 231 | \n",
"
\n",
" \n",
" Scaramucci | \n",
" 657 | \n",
" 198 | \n",
"
\n",
" \n",
" CNN | \n",
" 628 | \n",
" 186 | \n",
"
\n",
" \n",
" politico | \n",
" 747 | \n",
" 181 | \n",
"
\n",
" \n",
" SpeakerRyan | \n",
" 700 | \n",
" 181 | \n",
"
\n",
" \n",
" PressSec | \n",
" 654 | \n",
" 178 | \n",
"
\n",
" \n",
" washingtonpost | \n",
" 413 | \n",
" 154 | \n",
"
\n",
" \n",
" WSJ | \n",
" 1227 | \n",
" 152 | \n",
"
\n",
" \n",
" SteveScalise | \n",
" 505 | \n",
" 150 | \n",
"
\n",
" \n",
" SHSanders45 | \n",
" 927 | \n",
" 148 | \n",
"
\n",
" \n",
" AP | \n",
" 1948 | \n",
" 143 | \n",
"
\n",
" \n",
" VP | \n",
" 506 | \n",
" 140 | \n",
"
\n",
" \n",
" SenateMajLdr | \n",
" 412 | \n",
" 120 | \n",
"
\n",
" \n",
" DonaldJTrumpJr | \n",
" 199 | \n",
" 110 | \n",
"
\n",
" \n",
" RandPaul | \n",
" 206 | \n",
" 107 | \n",
"
\n",
" \n",
" USATODAY | \n",
" 1235 | \n",
" 105 | \n",
"
\n",
" \n",
" LindseyGrahamSC | \n",
" 253 | \n",
" 105 | \n",
"
\n",
" \n",
" SenSchumer | \n",
" 265 | \n",
" 97 | \n",
"
\n",
" \n",
" NancyPelosi | \n",
" 266 | \n",
" 95 | \n",
"
\n",
" \n",
" MSNBC | \n",
" 486 | \n",
" 92 | \n",
"
\n",
" \n",
" CNNPolitics | \n",
" 329 | \n",
" 91 | \n",
"
\n",
" \n",
" MarkWarner | \n",
" 204 | \n",
" 89 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" mention_count mentioning_count\n",
"realDonaldTrump 2876 452\n",
"nytimes 829 289\n",
"POTUS 2265 253\n",
"SenJohnMcCain 599 231\n",
"Scaramucci 657 198\n",
"CNN 628 186\n",
"politico 747 181\n",
"SpeakerRyan 700 181\n",
"PressSec 654 178\n",
"washingtonpost 413 154\n",
"WSJ 1227 152\n",
"SteveScalise 505 150\n",
"SHSanders45 927 148\n",
"AP 1948 143\n",
"VP 506 140\n",
"SenateMajLdr 412 120\n",
"DonaldJTrumpJr 199 110\n",
"RandPaul 206 107\n",
"USATODAY 1235 105\n",
"LindseyGrahamSC 253 105\n",
"SenSchumer 265 97\n",
"NancyPelosi 266 95\n",
"MSNBC 486 92\n",
"CNNPolitics 329 91\n",
"MarkWarner 204 89"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_mentioned_df.sort_values(['mentioning_count', 'mention_count'], ascending=False).head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Journalists mentioning journalists"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Of journalists mentioning journalists, who is mentioned the most?"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" mention_count | \n",
" mentioning_count | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 325050734 | \n",
" AllysonRaeWx | \n",
" Banks, Allyson | \n",
" WUSA–TV | \n",
" F | \n",
" 6918 | \n",
" 330.00 | \n",
" 7.00 | \n",
"
\n",
" \n",
" 28496589 | \n",
" TenaciousTopper | \n",
" Shutt, Charles | \n",
" WUSA–TV | \n",
" M | \n",
" 15868 | \n",
" 239.00 | \n",
" 13.00 | \n",
"
\n",
" \n",
" 63149389 | \n",
" hbwx | \n",
" Bernstein, Howard | \n",
" WUSA–TV | \n",
" M | \n",
" 8337 | \n",
" 235.00 | \n",
" 10.00 | \n",
"
\n",
" \n",
" 407013776 | \n",
" burgessev | \n",
" Everett, John B. | \n",
" Politico | \n",
" M | \n",
" 31010 | \n",
" 212.00 | \n",
" 46.00 | \n",
"
\n",
" \n",
" 16018516 | \n",
" jenhab | \n",
" Haberkorn, Jennifer A. | \n",
" Politico | \n",
" F | \n",
" 20028 | \n",
" 200.00 | \n",
" 31.00 | \n",
"
\n",
" \n",
" 19186003 | \n",
" seungminkim | \n",
" Kim, Seung Min | \n",
" Politico | \n",
" F | \n",
" 33980 | \n",
" 143.00 | \n",
" 41.00 | \n",
"
\n",
" \n",
" 14529929 | \n",
" jaketapper | \n",
" Tapper, Jake | \n",
" CNN | \n",
" M | \n",
" 1305680 | \n",
" 127.00 | \n",
" 51.00 | \n",
"
\n",
" \n",
" 169586280 | \n",
" WaPoSean | \n",
" Sullivan, Sean | \n",
" Washington Post | \n",
" M | \n",
" 22860 | \n",
" 117.00 | \n",
" 20.00 | \n",
"
\n",
" \n",
" 997684836 | \n",
" pkcapitol | \n",
" Kane, Paul | \n",
" Washington Post | \n",
" M | \n",
" 31300 | \n",
" 116.00 | \n",
" 47.00 | \n",
"
\n",
" \n",
" 108617810 | \n",
" DanaBashCNN | \n",
" Bash, Dana | \n",
" CNN | \n",
" F | \n",
" 281861 | \n",
" 115.00 | \n",
" 55.00 | \n",
"
\n",
" \n",
" 82151660 | \n",
" kelsey_snell | \n",
" Snell, Kelse | \n",
" Washington Post | \n",
" F | \n",
" 8108 | \n",
" 109.00 | \n",
" 22.00 | \n",
"
\n",
" \n",
" 123327472 | \n",
" peterbakernyt | \n",
" Baker, Peter | \n",
" New York Times | \n",
" M | \n",
" 96956 | \n",
" 107.00 | \n",
" 43.00 | \n",
"
\n",
" \n",
" 13524182 | \n",
" daveweigel | \n",
" Weigel, David | \n",
" Washington Post | \n",
" M | \n",
" 332344 | \n",
" 106.00 | \n",
" 42.00 | \n",
"
\n",
" \n",
" 46557945 | \n",
" StevenTDennis | \n",
" Dennis, Steven T. | \n",
" Bloomberg News | \n",
" M | \n",
" 55762 | \n",
" 105.00 | \n",
" 27.00 | \n",
"
\n",
" \n",
" 15931637 | \n",
" jonkarl | \n",
" Karl, Jonathan | \n",
" ABC News | \n",
" M | \n",
" 183467 | \n",
" 104.00 | \n",
" 40.00 | \n",
"
\n",
" \n",
" 33919343 | \n",
" AshleyRParker | \n",
" Parker, Ashley | \n",
" Washington Post | \n",
" F | \n",
" 122382 | \n",
" 100.00 | \n",
" 31.00 | \n",
"
\n",
" \n",
" 9126752 | \n",
" reporterjoe | \n",
" Gould, Joseph M. | \n",
" Sightline Media Group | \n",
" M | \n",
" 4702 | \n",
" 98.00 | \n",
" 16.00 | \n",
"
\n",
" \n",
" 39155029 | \n",
" mkraju | \n",
" Raju, Manu K. | \n",
" CNN | \n",
" M | \n",
" 88366 | \n",
" 95.00 | \n",
" 43.00 | \n",
"
\n",
" \n",
" 52392666 | \n",
" ZoeTillman | \n",
" Tillman, Zoe | \n",
" BuzzFeed | \n",
" F | \n",
" 15246 | \n",
" 87.00 | \n",
" 14.00 | \n",
"
\n",
" \n",
" 16930125 | \n",
" edatpost | \n",
" O’Keefe, Edward | \n",
" Washington Post | \n",
" M | \n",
" 58670 | \n",
" 84.00 | \n",
" 41.00 | \n",
"
\n",
" \n",
" 26632935 | \n",
" HopeSeck | \n",
" Hodge Seck, Hope | \n",
" Military.com | \n",
" F | \n",
" 4584 | \n",
" 83.00 | \n",
" 3.00 | \n",
"
\n",
" \n",
" 48802204 | \n",
" HardballChris | \n",
" Matthews, Chris | \n",
" NBC News | \n",
" M | \n",
" 718330 | \n",
" 80.00 | \n",
" 9.00 | \n",
"
\n",
" \n",
" 19107878 | \n",
" GlennThrush | \n",
" Thrush, Glenn H. | \n",
" New York Times | \n",
" M | \n",
" 308181 | \n",
" 78.00 | \n",
" 37.00 | \n",
"
\n",
" \n",
" 217550862 | \n",
" BresPolitico | \n",
" Bresnahan, John | \n",
" Politico | \n",
" M | \n",
" 40562 | \n",
" 78.00 | \n",
" 27.00 | \n",
"
\n",
" \n",
" 24439201 | \n",
" jameshohmann | \n",
" Hohmann, James P. | \n",
" Washington Post | \n",
" M | \n",
" 38708 | \n",
" 78.00 | \n",
" 27.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name organization \\\n",
"user_id \n",
"325050734 AllysonRaeWx Banks, Allyson WUSA–TV \n",
"28496589 TenaciousTopper Shutt, Charles WUSA–TV \n",
"63149389 hbwx Bernstein, Howard WUSA–TV \n",
"407013776 burgessev Everett, John B. Politico \n",
"16018516 jenhab Haberkorn, Jennifer A. Politico \n",
"19186003 seungminkim Kim, Seung Min Politico \n",
"14529929 jaketapper Tapper, Jake CNN \n",
"169586280 WaPoSean Sullivan, Sean Washington Post \n",
"997684836 pkcapitol Kane, Paul Washington Post \n",
"108617810 DanaBashCNN Bash, Dana CNN \n",
"82151660 kelsey_snell Snell, Kelse Washington Post \n",
"123327472 peterbakernyt Baker, Peter New York Times \n",
"13524182 daveweigel Weigel, David Washington Post \n",
"46557945 StevenTDennis Dennis, Steven T. Bloomberg News \n",
"15931637 jonkarl Karl, Jonathan ABC News \n",
"33919343 AshleyRParker Parker, Ashley Washington Post \n",
"9126752 reporterjoe Gould, Joseph M. Sightline Media Group \n",
"39155029 mkraju Raju, Manu K. CNN \n",
"52392666 ZoeTillman Tillman, Zoe BuzzFeed \n",
"16930125 edatpost O’Keefe, Edward Washington Post \n",
"26632935 HopeSeck Hodge Seck, Hope Military.com \n",
"48802204 HardballChris Matthews, Chris NBC News \n",
"19107878 GlennThrush Thrush, Glenn H. New York Times \n",
"217550862 BresPolitico Bresnahan, John Politico \n",
"24439201 jameshohmann Hohmann, James P. Washington Post \n",
"\n",
" gender followers_count mention_count mentioning_count \n",
"user_id \n",
"325050734 F 6918 330.00 7.00 \n",
"28496589 M 15868 239.00 13.00 \n",
"63149389 M 8337 235.00 10.00 \n",
"407013776 M 31010 212.00 46.00 \n",
"16018516 F 20028 200.00 31.00 \n",
"19186003 F 33980 143.00 41.00 \n",
"14529929 M 1305680 127.00 51.00 \n",
"169586280 M 22860 117.00 20.00 \n",
"997684836 M 31300 116.00 47.00 \n",
"108617810 F 281861 115.00 55.00 \n",
"82151660 F 8108 109.00 22.00 \n",
"123327472 M 96956 107.00 43.00 \n",
"13524182 M 332344 106.00 42.00 \n",
"46557945 M 55762 105.00 27.00 \n",
"15931637 M 183467 104.00 40.00 \n",
"33919343 F 122382 100.00 31.00 \n",
"9126752 M 4702 98.00 16.00 \n",
"39155029 M 88366 95.00 43.00 \n",
"52392666 F 15246 87.00 14.00 \n",
"16930125 M 58670 84.00 41.00 \n",
"26632935 F 4584 83.00 3.00 \n",
"48802204 M 718330 80.00 9.00 \n",
"19107878 M 308181 78.00 37.00 \n",
"217550862 M 40562 78.00 27.00 \n",
"24439201 M 38708 78.00 27.00 "
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalists_mention_summary_df = journalist_mention_summary(journalists_mention_df)\n",
"journalists_mention_summary_df.to_csv('output/journalists_mentioned_by_journalists.csv')\n",
"journalists_mention_summary_df[journalist_mention_summary_fields].head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Same, but ordered by number of journalists mentioning"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" mention_count | \n",
" mentioning_count | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 108617810 | \n",
" DanaBashCNN | \n",
" Bash, Dana | \n",
" CNN | \n",
" F | \n",
" 281861 | \n",
" 115.00 | \n",
" 55.00 | \n",
"
\n",
" \n",
" 14529929 | \n",
" jaketapper | \n",
" Tapper, Jake | \n",
" CNN | \n",
" M | \n",
" 1305680 | \n",
" 127.00 | \n",
" 51.00 | \n",
"
\n",
" \n",
" 997684836 | \n",
" pkcapitol | \n",
" Kane, Paul | \n",
" Washington Post | \n",
" M | \n",
" 31300 | \n",
" 116.00 | \n",
" 47.00 | \n",
"
\n",
" \n",
" 407013776 | \n",
" burgessev | \n",
" Everett, John B. | \n",
" Politico | \n",
" M | \n",
" 31010 | \n",
" 212.00 | \n",
" 46.00 | \n",
"
\n",
" \n",
" 112526560 | \n",
" kenvogel | \n",
" Vogel, Kenneth P. | \n",
" Politico | \n",
" M | \n",
" 53894 | \n",
" 67.00 | \n",
" 45.00 | \n",
"
\n",
" \n",
" 18227519 | \n",
" morningmika | \n",
" Brzezinski, Mika | \n",
" MSNBC | \n",
" F | \n",
" 653031 | \n",
" 70.00 | \n",
" 44.00 | \n",
"
\n",
" \n",
" 123327472 | \n",
" peterbakernyt | \n",
" Baker, Peter | \n",
" New York Times | \n",
" M | \n",
" 96956 | \n",
" 107.00 | \n",
" 43.00 | \n",
"
\n",
" \n",
" 39155029 | \n",
" mkraju | \n",
" Raju, Manu K. | \n",
" CNN | \n",
" M | \n",
" 88366 | \n",
" 95.00 | \n",
" 43.00 | \n",
"
\n",
" \n",
" 13524182 | \n",
" daveweigel | \n",
" Weigel, David | \n",
" Washington Post | \n",
" M | \n",
" 332344 | \n",
" 106.00 | \n",
" 42.00 | \n",
"
\n",
" \n",
" 19186003 | \n",
" seungminkim | \n",
" Kim, Seung Min | \n",
" Politico | \n",
" F | \n",
" 33980 | \n",
" 143.00 | \n",
" 41.00 | \n",
"
\n",
" \n",
" 16930125 | \n",
" edatpost | \n",
" O’Keefe, Edward | \n",
" Washington Post | \n",
" M | \n",
" 58670 | \n",
" 84.00 | \n",
" 41.00 | \n",
"
\n",
" \n",
" 15931637 | \n",
" jonkarl | \n",
" Karl, Jonathan | \n",
" ABC News | \n",
" M | \n",
" 183467 | \n",
" 104.00 | \n",
" 40.00 | \n",
"
\n",
" \n",
" 22771961 | \n",
" Acosta | \n",
" Acosta, Jim | \n",
" CNN | \n",
" M | \n",
" 350650 | \n",
" 61.00 | \n",
" 38.00 | \n",
"
\n",
" \n",
" 19107878 | \n",
" GlennThrush | \n",
" Thrush, Glenn H. | \n",
" New York Times | \n",
" M | \n",
" 308181 | \n",
" 78.00 | \n",
" 37.00 | \n",
"
\n",
" \n",
" 18678924 | \n",
" jmartNYT | \n",
" Martin, Jonathan | \n",
" New York Times | \n",
" M | \n",
" 197322 | \n",
" 75.00 | \n",
" 37.00 | \n",
"
\n",
" \n",
" 61734492 | \n",
" Fahrenthold | \n",
" Fahrenthold, David | \n",
" Washington Post | \n",
" M | \n",
" 451778 | \n",
" 43.00 | \n",
" 32.00 | \n",
"
\n",
" \n",
" 16018516 | \n",
" jenhab | \n",
" Haberkorn, Jennifer A. | \n",
" Politico | \n",
" F | \n",
" 20028 | \n",
" 200.00 | \n",
" 31.00 | \n",
"
\n",
" \n",
" 33919343 | \n",
" AshleyRParker | \n",
" Parker, Ashley | \n",
" Washington Post | \n",
" F | \n",
" 122382 | \n",
" 100.00 | \n",
" 31.00 | \n",
"
\n",
" \n",
" 50325797 | \n",
" chucktodd | \n",
" Todd, Chuck | \n",
" NBC News | \n",
" M | \n",
" 1781247 | \n",
" 40.00 | \n",
" 31.00 | \n",
"
\n",
" \n",
" 71294756 | \n",
" wolfblitzer | \n",
" Blitzer, Wolf | \n",
" CNN | \n",
" M | \n",
" 1281914 | \n",
" 56.00 | \n",
" 30.00 | \n",
"
\n",
" \n",
" 28181835 | \n",
" jpaceDC | \n",
" Pace, Julie | \n",
" Associated Press | \n",
" F | \n",
" 46017 | \n",
" 52.00 | \n",
" 30.00 | \n",
"
\n",
" \n",
" 12354832 | \n",
" kasie | \n",
" Hunt, Kasie | \n",
" NBC News | \n",
" F | \n",
" 187357 | \n",
" 67.00 | \n",
" 29.00 | \n",
"
\n",
" \n",
" 16031927 | \n",
" greta | \n",
" Van Susteren, Greta | \n",
" MSNBC | \n",
" F | \n",
" 1186850 | \n",
" 37.00 | \n",
" 28.00 | \n",
"
\n",
" \n",
" 46557945 | \n",
" StevenTDennis | \n",
" Dennis, Steven T. | \n",
" Bloomberg News | \n",
" M | \n",
" 55762 | \n",
" 105.00 | \n",
" 27.00 | \n",
"
\n",
" \n",
" 217550862 | \n",
" BresPolitico | \n",
" Bresnahan, John | \n",
" Politico | \n",
" M | \n",
" 40562 | \n",
" 78.00 | \n",
" 27.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name organization gender \\\n",
"user_id \n",
"108617810 DanaBashCNN Bash, Dana CNN F \n",
"14529929 jaketapper Tapper, Jake CNN M \n",
"997684836 pkcapitol Kane, Paul Washington Post M \n",
"407013776 burgessev Everett, John B. Politico M \n",
"112526560 kenvogel Vogel, Kenneth P. Politico M \n",
"18227519 morningmika Brzezinski, Mika MSNBC F \n",
"123327472 peterbakernyt Baker, Peter New York Times M \n",
"39155029 mkraju Raju, Manu K. CNN M \n",
"13524182 daveweigel Weigel, David Washington Post M \n",
"19186003 seungminkim Kim, Seung Min Politico F \n",
"16930125 edatpost O’Keefe, Edward Washington Post M \n",
"15931637 jonkarl Karl, Jonathan ABC News M \n",
"22771961 Acosta Acosta, Jim CNN M \n",
"19107878 GlennThrush Thrush, Glenn H. New York Times M \n",
"18678924 jmartNYT Martin, Jonathan New York Times M \n",
"61734492 Fahrenthold Fahrenthold, David Washington Post M \n",
"16018516 jenhab Haberkorn, Jennifer A. Politico F \n",
"33919343 AshleyRParker Parker, Ashley Washington Post F \n",
"50325797 chucktodd Todd, Chuck NBC News M \n",
"71294756 wolfblitzer Blitzer, Wolf CNN M \n",
"28181835 jpaceDC Pace, Julie Associated Press F \n",
"12354832 kasie Hunt, Kasie NBC News F \n",
"16031927 greta Van Susteren, Greta MSNBC F \n",
"46557945 StevenTDennis Dennis, Steven T. Bloomberg News M \n",
"217550862 BresPolitico Bresnahan, John Politico M \n",
"\n",
" followers_count mention_count mentioning_count \n",
"user_id \n",
"108617810 281861 115.00 55.00 \n",
"14529929 1305680 127.00 51.00 \n",
"997684836 31300 116.00 47.00 \n",
"407013776 31010 212.00 46.00 \n",
"112526560 53894 67.00 45.00 \n",
"18227519 653031 70.00 44.00 \n",
"123327472 96956 107.00 43.00 \n",
"39155029 88366 95.00 43.00 \n",
"13524182 332344 106.00 42.00 \n",
"19186003 33980 143.00 41.00 \n",
"16930125 58670 84.00 41.00 \n",
"15931637 183467 104.00 40.00 \n",
"22771961 350650 61.00 38.00 \n",
"19107878 308181 78.00 37.00 \n",
"18678924 197322 75.00 37.00 \n",
"61734492 451778 43.00 32.00 \n",
"16018516 20028 200.00 31.00 \n",
"33919343 122382 100.00 31.00 \n",
"50325797 1781247 40.00 31.00 \n",
"71294756 1281914 56.00 30.00 \n",
"28181835 46017 52.00 30.00 \n",
"12354832 187357 67.00 29.00 \n",
"16031927 1186850 37.00 28.00 \n",
"46557945 55762 105.00 27.00 \n",
"217550862 40562 78.00 27.00 "
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalists_mention_summary_df[journalist_mention_summary_fields].sort_values(['mentioning_count', 'mention_count'], ascending=False).head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Of journalists mentioning other journalists, how many are male / female?"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
"
\n",
" \n",
" \n",
" \n",
" M | \n",
" 8298 | \n",
" 58.0% | \n",
"
\n",
" \n",
" F | \n",
" 6000 | \n",
" 42.0% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage\n",
"M 8298 58.0%\n",
"F 6000 42.0%"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalist_mention_gender_summary(journalists_mention_df)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### On average how many times are journalists mentioned by other journalists?"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" mention_count | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 2,292.00 | \n",
"
\n",
" \n",
" mean | \n",
" 6.24 | \n",
"
\n",
" \n",
" std | \n",
" 17.59 | \n",
"
\n",
" \n",
" min | \n",
" 0.00 | \n",
"
\n",
" \n",
" 25% | \n",
" 0.00 | \n",
"
\n",
" \n",
" 50% | \n",
" 1.00 | \n",
"
\n",
" \n",
" 75% | \n",
" 5.00 | \n",
"
\n",
" \n",
" max | \n",
" 330.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" mention_count\n",
"count 2,292.00\n",
"mean 6.24\n",
"std 17.59\n",
"min 0.00\n",
"25% 0.00\n",
"50% 1.00\n",
"75% 5.00\n",
"max 330.00"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalists_mention_summary_df[['mention_count']].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Of journalists mentioning female journalists which are mentioned the most?"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" mention_count | \n",
" mentioning_count | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 325050734 | \n",
" AllysonRaeWx | \n",
" Banks, Allyson | \n",
" WUSA–TV | \n",
" F | \n",
" 6918 | \n",
" 330.00 | \n",
" 7.00 | \n",
"
\n",
" \n",
" 16018516 | \n",
" jenhab | \n",
" Haberkorn, Jennifer A. | \n",
" Politico | \n",
" F | \n",
" 20028 | \n",
" 200.00 | \n",
" 31.00 | \n",
"
\n",
" \n",
" 19186003 | \n",
" seungminkim | \n",
" Kim, Seung Min | \n",
" Politico | \n",
" F | \n",
" 33980 | \n",
" 143.00 | \n",
" 41.00 | \n",
"
\n",
" \n",
" 108617810 | \n",
" DanaBashCNN | \n",
" Bash, Dana | \n",
" CNN | \n",
" F | \n",
" 281861 | \n",
" 115.00 | \n",
" 55.00 | \n",
"
\n",
" \n",
" 82151660 | \n",
" kelsey_snell | \n",
" Snell, Kelse | \n",
" Washington Post | \n",
" F | \n",
" 8108 | \n",
" 109.00 | \n",
" 22.00 | \n",
"
\n",
" \n",
" 33919343 | \n",
" AshleyRParker | \n",
" Parker, Ashley | \n",
" Washington Post | \n",
" F | \n",
" 122382 | \n",
" 100.00 | \n",
" 31.00 | \n",
"
\n",
" \n",
" 52392666 | \n",
" ZoeTillman | \n",
" Tillman, Zoe | \n",
" BuzzFeed | \n",
" F | \n",
" 15246 | \n",
" 87.00 | \n",
" 14.00 | \n",
"
\n",
" \n",
" 26632935 | \n",
" HopeSeck | \n",
" Hodge Seck, Hope | \n",
" Military.com | \n",
" F | \n",
" 4584 | \n",
" 83.00 | \n",
" 3.00 | \n",
"
\n",
" \n",
" 16441088 | \n",
" jestei | \n",
" Steinhauer, Jennifer | \n",
" New York Times | \n",
" F | \n",
" 13452 | \n",
" 76.00 | \n",
" 26.00 | \n",
"
\n",
" \n",
" 18227519 | \n",
" morningmika | \n",
" Brzezinski, Mika | \n",
" MSNBC | \n",
" F | \n",
" 653031 | \n",
" 70.00 | \n",
" 44.00 | \n",
"
\n",
" \n",
" 12354832 | \n",
" kasie | \n",
" Hunt, Kasie | \n",
" NBC News | \n",
" F | \n",
" 187357 | \n",
" 67.00 | \n",
" 29.00 | \n",
"
\n",
" \n",
" 139738464 | \n",
" mj_lee | \n",
" Lee, MJ | \n",
" CNN | \n",
" F | \n",
" 31940 | \n",
" 67.00 | \n",
" 27.00 | \n",
"
\n",
" \n",
" 204599219 | \n",
" pw_cunningham | \n",
" Cunningham, Paige | \n",
" Washington Examiner | \n",
" F | \n",
" 9255 | \n",
" 67.00 | \n",
" 18.00 | \n",
"
\n",
" \n",
" 118747545 | \n",
" eilperin | \n",
" Eilperin, Juliet | \n",
" Washington Post | \n",
" F | \n",
" 20483 | \n",
" 67.00 | \n",
" 16.00 | \n",
"
\n",
" \n",
" 360080772 | \n",
" FoxReports | \n",
" Fox, Lauren | \n",
" CNN | \n",
" F | \n",
" 7282 | \n",
" 65.00 | \n",
" 15.00 | \n",
"
\n",
" \n",
" 58869089 | \n",
" margarettalev | \n",
" Talev, Margaret | \n",
" Bloomberg News | \n",
" F | \n",
" 19588 | \n",
" 58.00 | \n",
" 27.00 | \n",
"
\n",
" \n",
" 313545488 | \n",
" LauraLitvan | \n",
" Litvan, Laura | \n",
" Bloomberg News | \n",
" F | \n",
" 4468 | \n",
" 58.00 | \n",
" 5.00 | \n",
"
\n",
" \n",
" 19734832 | \n",
" sarahkliff | \n",
" Kliff, Sarah L. | \n",
" Vox Media | \n",
" F | \n",
" 100090 | \n",
" 57.00 | \n",
" 27.00 | \n",
"
\n",
" \n",
" 381664207 | \n",
" caitlinnowens | \n",
" Owens, Caitlin N. | \n",
" Axios | \n",
" F | \n",
" 5749 | \n",
" 57.00 | \n",
" 9.00 | \n",
"
\n",
" \n",
" 167024520 | \n",
" rachaelmbade | \n",
" Bade, Rachel M. | \n",
" Politico | \n",
" F | \n",
" 30164 | \n",
" 56.00 | \n",
" 26.00 | \n",
"
\n",
" \n",
" 247852986 | \n",
" rachanadixit | \n",
" Pradhan, Rachana D. | \n",
" Politico | \n",
" F | \n",
" 6178 | \n",
" 55.00 | \n",
" 14.00 | \n",
"
\n",
" \n",
" 237477771 | \n",
" juliehdavis | \n",
" Davis, Julie | \n",
" New York Times | \n",
" F | \n",
" 49821 | \n",
" 55.00 | \n",
" 10.00 | \n",
"
\n",
" \n",
" 36607254 | \n",
" Oriana0214 | \n",
" Pawlyk, Oriana | \n",
" Military.com | \n",
" F | \n",
" 6397 | \n",
" 55.00 | \n",
" 4.00 | \n",
"
\n",
" \n",
" 28181835 | \n",
" jpaceDC | \n",
" Pace, Julie | \n",
" Associated Press | \n",
" F | \n",
" 46017 | \n",
" 52.00 | \n",
" 30.00 | \n",
"
\n",
" \n",
" 48144950 | \n",
" JudyWoodruff | \n",
" Woodruff, Judy | \n",
" PBS NewsHour | \n",
" F | \n",
" 64294 | \n",
" 49.00 | \n",
" 7.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name organization gender \\\n",
"user_id \n",
"325050734 AllysonRaeWx Banks, Allyson WUSA–TV F \n",
"16018516 jenhab Haberkorn, Jennifer A. Politico F \n",
"19186003 seungminkim Kim, Seung Min Politico F \n",
"108617810 DanaBashCNN Bash, Dana CNN F \n",
"82151660 kelsey_snell Snell, Kelse Washington Post F \n",
"33919343 AshleyRParker Parker, Ashley Washington Post F \n",
"52392666 ZoeTillman Tillman, Zoe BuzzFeed F \n",
"26632935 HopeSeck Hodge Seck, Hope Military.com F \n",
"16441088 jestei Steinhauer, Jennifer New York Times F \n",
"18227519 morningmika Brzezinski, Mika MSNBC F \n",
"12354832 kasie Hunt, Kasie NBC News F \n",
"139738464 mj_lee Lee, MJ CNN F \n",
"204599219 pw_cunningham Cunningham, Paige Washington Examiner F \n",
"118747545 eilperin Eilperin, Juliet Washington Post F \n",
"360080772 FoxReports Fox, Lauren CNN F \n",
"58869089 margarettalev Talev, Margaret Bloomberg News F \n",
"313545488 LauraLitvan Litvan, Laura Bloomberg News F \n",
"19734832 sarahkliff Kliff, Sarah L. Vox Media F \n",
"381664207 caitlinnowens Owens, Caitlin N. Axios F \n",
"167024520 rachaelmbade Bade, Rachel M. Politico F \n",
"247852986 rachanadixit Pradhan, Rachana D. Politico F \n",
"237477771 juliehdavis Davis, Julie New York Times F \n",
"36607254 Oriana0214 Pawlyk, Oriana Military.com F \n",
"28181835 jpaceDC Pace, Julie Associated Press F \n",
"48144950 JudyWoodruff Woodruff, Judy PBS NewsHour F \n",
"\n",
" followers_count mention_count mentioning_count \n",
"user_id \n",
"325050734 6918 330.00 7.00 \n",
"16018516 20028 200.00 31.00 \n",
"19186003 33980 143.00 41.00 \n",
"108617810 281861 115.00 55.00 \n",
"82151660 8108 109.00 22.00 \n",
"33919343 122382 100.00 31.00 \n",
"52392666 15246 87.00 14.00 \n",
"26632935 4584 83.00 3.00 \n",
"16441088 13452 76.00 26.00 \n",
"18227519 653031 70.00 44.00 \n",
"12354832 187357 67.00 29.00 \n",
"139738464 31940 67.00 27.00 \n",
"204599219 9255 67.00 18.00 \n",
"118747545 20483 67.00 16.00 \n",
"360080772 7282 65.00 15.00 \n",
"58869089 19588 58.00 27.00 \n",
"313545488 4468 58.00 5.00 \n",
"19734832 100090 57.00 27.00 \n",
"381664207 5749 57.00 9.00 \n",
"167024520 30164 56.00 26.00 \n",
"247852986 6178 55.00 14.00 \n",
"237477771 49821 55.00 10.00 \n",
"36607254 6397 55.00 4.00 \n",
"28181835 46017 52.00 30.00 \n",
"48144950 64294 49.00 7.00 "
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"female_journalists_mention_summary_df = journalists_mention_summary_df[journalists_mention_summary_df.gender == 'F']\n",
"female_journalists_mention_summary_df.to_csv('output/female_journalists_mentioned_by_journalists.csv')\n",
"female_journalists_mention_summary_df[journalist_mention_summary_fields].head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### On average, how many times are female journalists mentioned by journalists?"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" mention_count | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 993.00 | \n",
"
\n",
" \n",
" mean | \n",
" 6.04 | \n",
"
\n",
" \n",
" std | \n",
" 17.95 | \n",
"
\n",
" \n",
" min | \n",
" 0.00 | \n",
"
\n",
" \n",
" 25% | \n",
" 0.00 | \n",
"
\n",
" \n",
" 50% | \n",
" 1.00 | \n",
"
\n",
" \n",
" 75% | \n",
" 4.00 | \n",
"
\n",
" \n",
" max | \n",
" 330.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" mention_count\n",
"count 993.00\n",
"mean 6.04\n",
"std 17.95\n",
"min 0.00\n",
"25% 0.00\n",
"50% 1.00\n",
"75% 4.00\n",
"max 330.00"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"female_journalists_mention_summary_df[['mention_count']].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Of journalists mentioning male journalists, who do they mention the most?"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" mention_count | \n",
" mentioning_count | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 28496589 | \n",
" TenaciousTopper | \n",
" Shutt, Charles | \n",
" WUSA–TV | \n",
" M | \n",
" 15868 | \n",
" 239.00 | \n",
" 13.00 | \n",
"
\n",
" \n",
" 63149389 | \n",
" hbwx | \n",
" Bernstein, Howard | \n",
" WUSA–TV | \n",
" M | \n",
" 8337 | \n",
" 235.00 | \n",
" 10.00 | \n",
"
\n",
" \n",
" 407013776 | \n",
" burgessev | \n",
" Everett, John B. | \n",
" Politico | \n",
" M | \n",
" 31010 | \n",
" 212.00 | \n",
" 46.00 | \n",
"
\n",
" \n",
" 14529929 | \n",
" jaketapper | \n",
" Tapper, Jake | \n",
" CNN | \n",
" M | \n",
" 1305680 | \n",
" 127.00 | \n",
" 51.00 | \n",
"
\n",
" \n",
" 169586280 | \n",
" WaPoSean | \n",
" Sullivan, Sean | \n",
" Washington Post | \n",
" M | \n",
" 22860 | \n",
" 117.00 | \n",
" 20.00 | \n",
"
\n",
" \n",
" 997684836 | \n",
" pkcapitol | \n",
" Kane, Paul | \n",
" Washington Post | \n",
" M | \n",
" 31300 | \n",
" 116.00 | \n",
" 47.00 | \n",
"
\n",
" \n",
" 123327472 | \n",
" peterbakernyt | \n",
" Baker, Peter | \n",
" New York Times | \n",
" M | \n",
" 96956 | \n",
" 107.00 | \n",
" 43.00 | \n",
"
\n",
" \n",
" 13524182 | \n",
" daveweigel | \n",
" Weigel, David | \n",
" Washington Post | \n",
" M | \n",
" 332344 | \n",
" 106.00 | \n",
" 42.00 | \n",
"
\n",
" \n",
" 46557945 | \n",
" StevenTDennis | \n",
" Dennis, Steven T. | \n",
" Bloomberg News | \n",
" M | \n",
" 55762 | \n",
" 105.00 | \n",
" 27.00 | \n",
"
\n",
" \n",
" 15931637 | \n",
" jonkarl | \n",
" Karl, Jonathan | \n",
" ABC News | \n",
" M | \n",
" 183467 | \n",
" 104.00 | \n",
" 40.00 | \n",
"
\n",
" \n",
" 9126752 | \n",
" reporterjoe | \n",
" Gould, Joseph M. | \n",
" Sightline Media Group | \n",
" M | \n",
" 4702 | \n",
" 98.00 | \n",
" 16.00 | \n",
"
\n",
" \n",
" 39155029 | \n",
" mkraju | \n",
" Raju, Manu K. | \n",
" CNN | \n",
" M | \n",
" 88366 | \n",
" 95.00 | \n",
" 43.00 | \n",
"
\n",
" \n",
" 16930125 | \n",
" edatpost | \n",
" O’Keefe, Edward | \n",
" Washington Post | \n",
" M | \n",
" 58670 | \n",
" 84.00 | \n",
" 41.00 | \n",
"
\n",
" \n",
" 48802204 | \n",
" HardballChris | \n",
" Matthews, Chris | \n",
" NBC News | \n",
" M | \n",
" 718330 | \n",
" 80.00 | \n",
" 9.00 | \n",
"
\n",
" \n",
" 19107878 | \n",
" GlennThrush | \n",
" Thrush, Glenn H. | \n",
" New York Times | \n",
" M | \n",
" 308181 | \n",
" 78.00 | \n",
" 37.00 | \n",
"
\n",
" \n",
" 217550862 | \n",
" BresPolitico | \n",
" Bresnahan, John | \n",
" Politico | \n",
" M | \n",
" 40562 | \n",
" 78.00 | \n",
" 27.00 | \n",
"
\n",
" \n",
" 24439201 | \n",
" jameshohmann | \n",
" Hohmann, James P. | \n",
" Washington Post | \n",
" M | \n",
" 38708 | \n",
" 78.00 | \n",
" 27.00 | \n",
"
\n",
" \n",
" 18678924 | \n",
" jmartNYT | \n",
" Martin, Jonathan | \n",
" New York Times | \n",
" M | \n",
" 197322 | \n",
" 75.00 | \n",
" 37.00 | \n",
"
\n",
" \n",
" 22891564 | \n",
" chrisgeidner | \n",
" Geidner, Chris | \n",
" BuzzFeed | \n",
" M | \n",
" 83316 | \n",
" 73.00 | \n",
" 15.00 | \n",
"
\n",
" \n",
" 112526560 | \n",
" kenvogel | \n",
" Vogel, Kenneth P. | \n",
" Politico | \n",
" M | \n",
" 53894 | \n",
" 67.00 | \n",
" 45.00 | \n",
"
\n",
" \n",
" 18646108 | \n",
" BretBaier | \n",
" Baier, Bret | \n",
" Fox News | \n",
" M | \n",
" 1095184 | \n",
" 66.00 | \n",
" 18.00 | \n",
"
\n",
" \n",
" 22771961 | \n",
" Acosta | \n",
" Acosta, Jim | \n",
" CNN | \n",
" M | \n",
" 350650 | \n",
" 61.00 | \n",
" 38.00 | \n",
"
\n",
" \n",
" 16067683 | \n",
" pauldemko | \n",
" Demko, Paul Jeffrey | \n",
" Politico | \n",
" M | \n",
" 8170 | \n",
" 60.00 | \n",
" 13.00 | \n",
"
\n",
" \n",
" 59676104 | \n",
" danbalz | \n",
" Balz, Daniel | \n",
" Washington Post | \n",
" M | \n",
" 90819 | \n",
" 57.00 | \n",
" 26.00 | \n",
"
\n",
" \n",
" 71294756 | \n",
" wolfblitzer | \n",
" Blitzer, Wolf | \n",
" CNN | \n",
" M | \n",
" 1281914 | \n",
" 56.00 | \n",
" 30.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name organization gender \\\n",
"user_id \n",
"28496589 TenaciousTopper Shutt, Charles WUSA–TV M \n",
"63149389 hbwx Bernstein, Howard WUSA–TV M \n",
"407013776 burgessev Everett, John B. Politico M \n",
"14529929 jaketapper Tapper, Jake CNN M \n",
"169586280 WaPoSean Sullivan, Sean Washington Post M \n",
"997684836 pkcapitol Kane, Paul Washington Post M \n",
"123327472 peterbakernyt Baker, Peter New York Times M \n",
"13524182 daveweigel Weigel, David Washington Post M \n",
"46557945 StevenTDennis Dennis, Steven T. Bloomberg News M \n",
"15931637 jonkarl Karl, Jonathan ABC News M \n",
"9126752 reporterjoe Gould, Joseph M. Sightline Media Group M \n",
"39155029 mkraju Raju, Manu K. CNN M \n",
"16930125 edatpost O’Keefe, Edward Washington Post M \n",
"48802204 HardballChris Matthews, Chris NBC News M \n",
"19107878 GlennThrush Thrush, Glenn H. New York Times M \n",
"217550862 BresPolitico Bresnahan, John Politico M \n",
"24439201 jameshohmann Hohmann, James P. Washington Post M \n",
"18678924 jmartNYT Martin, Jonathan New York Times M \n",
"22891564 chrisgeidner Geidner, Chris BuzzFeed M \n",
"112526560 kenvogel Vogel, Kenneth P. Politico M \n",
"18646108 BretBaier Baier, Bret Fox News M \n",
"22771961 Acosta Acosta, Jim CNN M \n",
"16067683 pauldemko Demko, Paul Jeffrey Politico M \n",
"59676104 danbalz Balz, Daniel Washington Post M \n",
"71294756 wolfblitzer Blitzer, Wolf CNN M \n",
"\n",
" followers_count mention_count mentioning_count \n",
"user_id \n",
"28496589 15868 239.00 13.00 \n",
"63149389 8337 235.00 10.00 \n",
"407013776 31010 212.00 46.00 \n",
"14529929 1305680 127.00 51.00 \n",
"169586280 22860 117.00 20.00 \n",
"997684836 31300 116.00 47.00 \n",
"123327472 96956 107.00 43.00 \n",
"13524182 332344 106.00 42.00 \n",
"46557945 55762 105.00 27.00 \n",
"15931637 183467 104.00 40.00 \n",
"9126752 4702 98.00 16.00 \n",
"39155029 88366 95.00 43.00 \n",
"16930125 58670 84.00 41.00 \n",
"48802204 718330 80.00 9.00 \n",
"19107878 308181 78.00 37.00 \n",
"217550862 40562 78.00 27.00 \n",
"24439201 38708 78.00 27.00 \n",
"18678924 197322 75.00 37.00 \n",
"22891564 83316 73.00 15.00 \n",
"112526560 53894 67.00 45.00 \n",
"18646108 1095184 66.00 18.00 \n",
"22771961 350650 61.00 38.00 \n",
"16067683 8170 60.00 13.00 \n",
"59676104 90819 57.00 26.00 \n",
"71294756 1281914 56.00 30.00 "
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"male_journalists_mention_summary_df = journalists_mention_summary_df[journalists_mention_summary_df.gender == 'M']\n",
"male_journalists_mention_summary_df.to_csv('output/male_journalists_mentioned_by_journalists.csv')\n",
"male_journalists_mention_summary_df[journalist_mention_summary_fields].head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### On average, how many times are male journalists mentioned by journalists?"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" mention_count | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 1,299.00 | \n",
"
\n",
" \n",
" mean | \n",
" 6.39 | \n",
"
\n",
" \n",
" std | \n",
" 17.31 | \n",
"
\n",
" \n",
" min | \n",
" 0.00 | \n",
"
\n",
" \n",
" 25% | \n",
" 0.00 | \n",
"
\n",
" \n",
" 50% | \n",
" 1.00 | \n",
"
\n",
" \n",
" 75% | \n",
" 5.00 | \n",
"
\n",
" \n",
" max | \n",
" 239.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" mention_count\n",
"count 1,299.00\n",
"mean 6.39\n",
"std 17.31\n",
"min 0.00\n",
"25% 0.00\n",
"50% 1.00\n",
"75% 5.00\n",
"max 239.00"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"male_journalists_mention_summary_df[['mention_count']].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Of female journalists mentioning journalists, who do they mention the most?"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" mention_count | \n",
" mentioning_count | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 407013776 | \n",
" burgessev | \n",
" Everett, John B. | \n",
" Politico | \n",
" M | \n",
" 31010 | \n",
" 164.00 | \n",
" 20.00 | \n",
"
\n",
" \n",
" 16018516 | \n",
" jenhab | \n",
" Haberkorn, Jennifer A. | \n",
" Politico | \n",
" F | \n",
" 20028 | \n",
" 116.00 | \n",
" 13.00 | \n",
"
\n",
" \n",
" 46557945 | \n",
" StevenTDennis | \n",
" Dennis, Steven T. | \n",
" Bloomberg News | \n",
" M | \n",
" 55762 | \n",
" 79.00 | \n",
" 10.00 | \n",
"
\n",
" \n",
" 169586280 | \n",
" WaPoSean | \n",
" Sullivan, Sean | \n",
" Washington Post | \n",
" M | \n",
" 22860 | \n",
" 71.00 | \n",
" 11.00 | \n",
"
\n",
" \n",
" 48802204 | \n",
" HardballChris | \n",
" Matthews, Chris | \n",
" NBC News | \n",
" M | \n",
" 718330 | \n",
" 70.00 | \n",
" 3.00 | \n",
"
\n",
" \n",
" 19186003 | \n",
" seungminkim | \n",
" Kim, Seung Min | \n",
" Politico | \n",
" F | \n",
" 33980 | \n",
" 64.00 | \n",
" 16.00 | \n",
"
\n",
" \n",
" 22891564 | \n",
" chrisgeidner | \n",
" Geidner, Chris | \n",
" BuzzFeed | \n",
" M | \n",
" 83316 | \n",
" 61.00 | \n",
" 6.00 | \n",
"
\n",
" \n",
" 108617810 | \n",
" DanaBashCNN | \n",
" Bash, Dana | \n",
" CNN | \n",
" F | \n",
" 281861 | \n",
" 60.00 | \n",
" 26.00 | \n",
"
\n",
" \n",
" 16067683 | \n",
" pauldemko | \n",
" Demko, Paul Jeffrey | \n",
" Politico | \n",
" M | \n",
" 8170 | \n",
" 57.00 | \n",
" 10.00 | \n",
"
\n",
" \n",
" 313545488 | \n",
" LauraLitvan | \n",
" Litvan, Laura | \n",
" Bloomberg News | \n",
" F | \n",
" 4468 | \n",
" 53.00 | \n",
" 2.00 | \n",
"
\n",
" \n",
" 52392666 | \n",
" ZoeTillman | \n",
" Tillman, Zoe | \n",
" BuzzFeed | \n",
" F | \n",
" 15246 | \n",
" 52.00 | \n",
" 8.00 | \n",
"
\n",
" \n",
" 33919343 | \n",
" AshleyRParker | \n",
" Parker, Ashley | \n",
" Washington Post | \n",
" F | \n",
" 122382 | \n",
" 49.00 | \n",
" 11.00 | \n",
"
\n",
" \n",
" 82151660 | \n",
" kelsey_snell | \n",
" Snell, Kelse | \n",
" Washington Post | \n",
" F | \n",
" 8108 | \n",
" 47.00 | \n",
" 10.00 | \n",
"
\n",
" \n",
" 247852986 | \n",
" rachanadixit | \n",
" Pradhan, Rachana D. | \n",
" Politico | \n",
" F | \n",
" 6178 | \n",
" 43.00 | \n",
" 7.00 | \n",
"
\n",
" \n",
" 9126752 | \n",
" reporterjoe | \n",
" Gould, Joseph M. | \n",
" Sightline Media Group | \n",
" M | \n",
" 4702 | \n",
" 43.00 | \n",
" 7.00 | \n",
"
\n",
" \n",
" 14529929 | \n",
" jaketapper | \n",
" Tapper, Jake | \n",
" CNN | \n",
" M | \n",
" 1305680 | \n",
" 40.00 | \n",
" 21.00 | \n",
"
\n",
" \n",
" 16930125 | \n",
" edatpost | \n",
" O’Keefe, Edward | \n",
" Washington Post | \n",
" M | \n",
" 58670 | \n",
" 40.00 | \n",
" 18.00 | \n",
"
\n",
" \n",
" 217550862 | \n",
" BresPolitico | \n",
" Bresnahan, John | \n",
" Politico | \n",
" M | \n",
" 40562 | \n",
" 37.00 | \n",
" 13.00 | \n",
"
\n",
" \n",
" 16149614 | \n",
" jrovner | \n",
" Rovner, Julie | \n",
" Kaiser Health News | \n",
" F | \n",
" 21844 | \n",
" 35.00 | \n",
" 14.00 | \n",
"
\n",
" \n",
" 997684836 | \n",
" pkcapitol | \n",
" Kane, Paul | \n",
" Washington Post | \n",
" M | \n",
" 31300 | \n",
" 35.00 | \n",
" 13.00 | \n",
"
\n",
" \n",
" 12354832 | \n",
" kasie | \n",
" Hunt, Kasie | \n",
" NBC News | \n",
" F | \n",
" 187357 | \n",
" 35.00 | \n",
" 12.00 | \n",
"
\n",
" \n",
" 158072303 | \n",
" ValerieInsinna | \n",
" Insinna, Valerie | \n",
" Defense News | \n",
" F | \n",
" 4572 | \n",
" 35.00 | \n",
" 2.00 | \n",
"
\n",
" \n",
" 15931637 | \n",
" jonkarl | \n",
" Karl, Jonathan | \n",
" ABC News | \n",
" M | \n",
" 183467 | \n",
" 33.00 | \n",
" 18.00 | \n",
"
\n",
" \n",
" 342226913 | \n",
" GregStohr | \n",
" Stohr, Greg | \n",
" Bloomberg News | \n",
" M | \n",
" 7245 | \n",
" 32.00 | \n",
" 2.00 | \n",
"
\n",
" \n",
" 297532865 | \n",
" kwelkernbc | \n",
" Welker, Kristen | \n",
" NBC News | \n",
" F | \n",
" 99234 | \n",
" 31.00 | \n",
" 9.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name organization \\\n",
"user_id \n",
"407013776 burgessev Everett, John B. Politico \n",
"16018516 jenhab Haberkorn, Jennifer A. Politico \n",
"46557945 StevenTDennis Dennis, Steven T. Bloomberg News \n",
"169586280 WaPoSean Sullivan, Sean Washington Post \n",
"48802204 HardballChris Matthews, Chris NBC News \n",
"19186003 seungminkim Kim, Seung Min Politico \n",
"22891564 chrisgeidner Geidner, Chris BuzzFeed \n",
"108617810 DanaBashCNN Bash, Dana CNN \n",
"16067683 pauldemko Demko, Paul Jeffrey Politico \n",
"313545488 LauraLitvan Litvan, Laura Bloomberg News \n",
"52392666 ZoeTillman Tillman, Zoe BuzzFeed \n",
"33919343 AshleyRParker Parker, Ashley Washington Post \n",
"82151660 kelsey_snell Snell, Kelse Washington Post \n",
"247852986 rachanadixit Pradhan, Rachana D. Politico \n",
"9126752 reporterjoe Gould, Joseph M. Sightline Media Group \n",
"14529929 jaketapper Tapper, Jake CNN \n",
"16930125 edatpost O’Keefe, Edward Washington Post \n",
"217550862 BresPolitico Bresnahan, John Politico \n",
"16149614 jrovner Rovner, Julie Kaiser Health News \n",
"997684836 pkcapitol Kane, Paul Washington Post \n",
"12354832 kasie Hunt, Kasie NBC News \n",
"158072303 ValerieInsinna Insinna, Valerie Defense News \n",
"15931637 jonkarl Karl, Jonathan ABC News \n",
"342226913 GregStohr Stohr, Greg Bloomberg News \n",
"297532865 kwelkernbc Welker, Kristen NBC News \n",
"\n",
" gender followers_count mention_count mentioning_count \n",
"user_id \n",
"407013776 M 31010 164.00 20.00 \n",
"16018516 F 20028 116.00 13.00 \n",
"46557945 M 55762 79.00 10.00 \n",
"169586280 M 22860 71.00 11.00 \n",
"48802204 M 718330 70.00 3.00 \n",
"19186003 F 33980 64.00 16.00 \n",
"22891564 M 83316 61.00 6.00 \n",
"108617810 F 281861 60.00 26.00 \n",
"16067683 M 8170 57.00 10.00 \n",
"313545488 F 4468 53.00 2.00 \n",
"52392666 F 15246 52.00 8.00 \n",
"33919343 F 122382 49.00 11.00 \n",
"82151660 F 8108 47.00 10.00 \n",
"247852986 F 6178 43.00 7.00 \n",
"9126752 M 4702 43.00 7.00 \n",
"14529929 M 1305680 40.00 21.00 \n",
"16930125 M 58670 40.00 18.00 \n",
"217550862 M 40562 37.00 13.00 \n",
"16149614 F 21844 35.00 14.00 \n",
"997684836 M 31300 35.00 13.00 \n",
"12354832 F 187357 35.00 12.00 \n",
"158072303 F 4572 35.00 2.00 \n",
"15931637 M 183467 33.00 18.00 \n",
"342226913 M 7245 32.00 2.00 \n",
"297532865 F 99234 31.00 9.00 "
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalists_mentioned_by_female_summary_df = journalist_mention_summary(journalists_mention_df[journalists_mention_df.gender == 'F'])\n",
"journalists_mentioned_by_female_summary_df.to_csv('output/journalists_mentioned_by_female_journalists.csv')\n",
"journalists_mentioned_by_female_summary_df[journalist_mention_summary_fields].head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Of female journalists mentioning journalists, how many are male / female?"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
"
\n",
" \n",
" \n",
" \n",
" M | \n",
" 3162 | \n",
" 54.8% | \n",
"
\n",
" \n",
" F | \n",
" 2605 | \n",
" 45.2% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage\n",
"M 3162 54.8%\n",
"F 2605 45.2%"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalist_mention_gender_summary(journalists_mention_df[journalists_mention_df.gender == 'F'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Of male journalists mentioning journalists, who do they mention the most?"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" mention_count | \n",
" mentioning_count | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 325050734 | \n",
" AllysonRaeWx | \n",
" Banks, Allyson | \n",
" WUSA–TV | \n",
" F | \n",
" 6918 | \n",
" 324.00 | \n",
" 4.00 | \n",
"
\n",
" \n",
" 28496589 | \n",
" TenaciousTopper | \n",
" Shutt, Charles | \n",
" WUSA–TV | \n",
" M | \n",
" 15868 | \n",
" 225.00 | \n",
" 7.00 | \n",
"
\n",
" \n",
" 63149389 | \n",
" hbwx | \n",
" Bernstein, Howard | \n",
" WUSA–TV | \n",
" M | \n",
" 8337 | \n",
" 225.00 | \n",
" 4.00 | \n",
"
\n",
" \n",
" 14529929 | \n",
" jaketapper | \n",
" Tapper, Jake | \n",
" CNN | \n",
" M | \n",
" 1305680 | \n",
" 87.00 | \n",
" 30.00 | \n",
"
\n",
" \n",
" 13524182 | \n",
" daveweigel | \n",
" Weigel, David | \n",
" Washington Post | \n",
" M | \n",
" 332344 | \n",
" 84.00 | \n",
" 30.00 | \n",
"
\n",
" \n",
" 16018516 | \n",
" jenhab | \n",
" Haberkorn, Jennifer A. | \n",
" Politico | \n",
" F | \n",
" 20028 | \n",
" 84.00 | \n",
" 18.00 | \n",
"
\n",
" \n",
" 997684836 | \n",
" pkcapitol | \n",
" Kane, Paul | \n",
" Washington Post | \n",
" M | \n",
" 31300 | \n",
" 81.00 | \n",
" 34.00 | \n",
"
\n",
" \n",
" 19186003 | \n",
" seungminkim | \n",
" Kim, Seung Min | \n",
" Politico | \n",
" F | \n",
" 33980 | \n",
" 79.00 | \n",
" 25.00 | \n",
"
\n",
" \n",
" 123327472 | \n",
" peterbakernyt | \n",
" Baker, Peter | \n",
" New York Times | \n",
" M | \n",
" 96956 | \n",
" 78.00 | \n",
" 29.00 | \n",
"
\n",
" \n",
" 26632935 | \n",
" HopeSeck | \n",
" Hodge Seck, Hope | \n",
" Military.com | \n",
" F | \n",
" 4584 | \n",
" 76.00 | \n",
" 1.00 | \n",
"
\n",
" \n",
" 15931637 | \n",
" jonkarl | \n",
" Karl, Jonathan | \n",
" ABC News | \n",
" M | \n",
" 183467 | \n",
" 71.00 | \n",
" 22.00 | \n",
"
\n",
" \n",
" 18678924 | \n",
" jmartNYT | \n",
" Martin, Jonathan | \n",
" New York Times | \n",
" M | \n",
" 197322 | \n",
" 69.00 | \n",
" 31.00 | \n",
"
\n",
" \n",
" 39155029 | \n",
" mkraju | \n",
" Raju, Manu K. | \n",
" CNN | \n",
" M | \n",
" 88366 | \n",
" 67.00 | \n",
" 27.00 | \n",
"
\n",
" \n",
" 19107878 | \n",
" GlennThrush | \n",
" Thrush, Glenn H. | \n",
" New York Times | \n",
" M | \n",
" 308181 | \n",
" 66.00 | \n",
" 29.00 | \n",
"
\n",
" \n",
" 16441088 | \n",
" jestei | \n",
" Steinhauer, Jennifer | \n",
" New York Times | \n",
" F | \n",
" 13452 | \n",
" 64.00 | \n",
" 17.00 | \n",
"
\n",
" \n",
" 82151660 | \n",
" kelsey_snell | \n",
" Snell, Kelse | \n",
" Washington Post | \n",
" F | \n",
" 8108 | \n",
" 62.00 | \n",
" 12.00 | \n",
"
\n",
" \n",
" 24439201 | \n",
" jameshohmann | \n",
" Hohmann, James P. | \n",
" Washington Post | \n",
" M | \n",
" 38708 | \n",
" 59.00 | \n",
" 17.00 | \n",
"
\n",
" \n",
" 18646108 | \n",
" BretBaier | \n",
" Baier, Bret | \n",
" Fox News | \n",
" M | \n",
" 1095184 | \n",
" 59.00 | \n",
" 14.00 | \n",
"
\n",
" \n",
" 108617810 | \n",
" DanaBashCNN | \n",
" Bash, Dana | \n",
" CNN | \n",
" F | \n",
" 281861 | \n",
" 55.00 | \n",
" 29.00 | \n",
"
\n",
" \n",
" 9126752 | \n",
" reporterjoe | \n",
" Gould, Joseph M. | \n",
" Sightline Media Group | \n",
" M | \n",
" 4702 | \n",
" 55.00 | \n",
" 9.00 | \n",
"
\n",
" \n",
" 381664207 | \n",
" caitlinnowens | \n",
" Owens, Caitlin N. | \n",
" Axios | \n",
" F | \n",
" 5749 | \n",
" 55.00 | \n",
" 7.00 | \n",
"
\n",
" \n",
" 33919343 | \n",
" AshleyRParker | \n",
" Parker, Ashley | \n",
" Washington Post | \n",
" F | \n",
" 122382 | \n",
" 51.00 | \n",
" 20.00 | \n",
"
\n",
" \n",
" 204599219 | \n",
" pw_cunningham | \n",
" Cunningham, Paige | \n",
" Washington Examiner | \n",
" F | \n",
" 9255 | \n",
" 51.00 | \n",
" 9.00 | \n",
"
\n",
" \n",
" 112526560 | \n",
" kenvogel | \n",
" Vogel, Kenneth P. | \n",
" Politico | \n",
" M | \n",
" 53894 | \n",
" 50.00 | \n",
" 32.00 | \n",
"
\n",
" \n",
" 36607254 | \n",
" Oriana0214 | \n",
" Pawlyk, Oriana | \n",
" Military.com | \n",
" F | \n",
" 6397 | \n",
" 50.00 | \n",
" 3.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name organization \\\n",
"user_id \n",
"325050734 AllysonRaeWx Banks, Allyson WUSA–TV \n",
"28496589 TenaciousTopper Shutt, Charles WUSA–TV \n",
"63149389 hbwx Bernstein, Howard WUSA–TV \n",
"14529929 jaketapper Tapper, Jake CNN \n",
"13524182 daveweigel Weigel, David Washington Post \n",
"16018516 jenhab Haberkorn, Jennifer A. Politico \n",
"997684836 pkcapitol Kane, Paul Washington Post \n",
"19186003 seungminkim Kim, Seung Min Politico \n",
"123327472 peterbakernyt Baker, Peter New York Times \n",
"26632935 HopeSeck Hodge Seck, Hope Military.com \n",
"15931637 jonkarl Karl, Jonathan ABC News \n",
"18678924 jmartNYT Martin, Jonathan New York Times \n",
"39155029 mkraju Raju, Manu K. CNN \n",
"19107878 GlennThrush Thrush, Glenn H. New York Times \n",
"16441088 jestei Steinhauer, Jennifer New York Times \n",
"82151660 kelsey_snell Snell, Kelse Washington Post \n",
"24439201 jameshohmann Hohmann, James P. Washington Post \n",
"18646108 BretBaier Baier, Bret Fox News \n",
"108617810 DanaBashCNN Bash, Dana CNN \n",
"9126752 reporterjoe Gould, Joseph M. Sightline Media Group \n",
"381664207 caitlinnowens Owens, Caitlin N. Axios \n",
"33919343 AshleyRParker Parker, Ashley Washington Post \n",
"204599219 pw_cunningham Cunningham, Paige Washington Examiner \n",
"112526560 kenvogel Vogel, Kenneth P. Politico \n",
"36607254 Oriana0214 Pawlyk, Oriana Military.com \n",
"\n",
" gender followers_count mention_count mentioning_count \n",
"user_id \n",
"325050734 F 6918 324.00 4.00 \n",
"28496589 M 15868 225.00 7.00 \n",
"63149389 M 8337 225.00 4.00 \n",
"14529929 M 1305680 87.00 30.00 \n",
"13524182 M 332344 84.00 30.00 \n",
"16018516 F 20028 84.00 18.00 \n",
"997684836 M 31300 81.00 34.00 \n",
"19186003 F 33980 79.00 25.00 \n",
"123327472 M 96956 78.00 29.00 \n",
"26632935 F 4584 76.00 1.00 \n",
"15931637 M 183467 71.00 22.00 \n",
"18678924 M 197322 69.00 31.00 \n",
"39155029 M 88366 67.00 27.00 \n",
"19107878 M 308181 66.00 29.00 \n",
"16441088 F 13452 64.00 17.00 \n",
"82151660 F 8108 62.00 12.00 \n",
"24439201 M 38708 59.00 17.00 \n",
"18646108 M 1095184 59.00 14.00 \n",
"108617810 F 281861 55.00 29.00 \n",
"9126752 M 4702 55.00 9.00 \n",
"381664207 F 5749 55.00 7.00 \n",
"33919343 F 122382 51.00 20.00 \n",
"204599219 F 9255 51.00 9.00 \n",
"112526560 M 53894 50.00 32.00 \n",
"36607254 F 6397 50.00 3.00 "
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalists_mentioned_by_male_summary_df = journalist_mention_summary(journalists_mention_df[journalists_mention_df.gender == 'M'])\n",
"journalists_mentioned_by_male_summary_df.to_csv('output/journalists_mentioned_by_male_journalists.csv')\n",
"journalists_mentioned_by_male_summary_df[journalist_mention_summary_fields].head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Of male journalists mentioning other journalists, how many are male / female?"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
"
\n",
" \n",
" \n",
" \n",
" M | \n",
" 5136 | \n",
" 60.2% | \n",
"
\n",
" \n",
" F | \n",
" 3395 | \n",
" 39.8% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage\n",
"M 5136 60.2%\n",
"F 3395 39.8%"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalist_mention_gender_summary(journalists_mention_df[journalists_mention_df.gender == 'M'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Retweet data prep"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load retweets from tweets\n",
"Including retweets and quotes"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:root:Loading from tweets/642bf140607547cb9d4c6b1fc49772aa_001.json.gz\n",
"DEBUG:root:Loaded 50000\n",
"DEBUG:root:Loaded 100000\n",
"DEBUG:root:Loaded 150000\n",
"DEBUG:root:Loaded 200000\n",
"DEBUG:root:Loaded 250000\n",
"INFO:root:Loading from tweets/9f7ed17c16a1494c8690b4053609539d_001.json.gz\n",
"DEBUG:root:Loaded 300000\n",
"DEBUG:root:Loaded 350000\n",
"DEBUG:root:Loaded 400000\n",
"DEBUG:root:Loaded 450000\n",
"DEBUG:root:Loaded 500000\n",
"INFO:root:Loading from tweets/41feff28312c433ab004cd822212f4c2_001.json.gz\n",
"DEBUG:root:Loaded 550000\n",
"DEBUG:root:Loaded 600000\n",
"DEBUG:root:Loaded 650000\n",
"DEBUG:root:Loaded 700000\n",
"DEBUG:root:Loaded 750000\n",
"DEBUG:root:Loaded 800000\n"
]
},
{
"data": {
"text/plain": [
"tweet_id 456956\n",
"user_id 456956\n",
"screen_name 456956\n",
"retweet_user_id 456956\n",
"retweet_screen_name 456956\n",
"tweet_created_at 456956\n",
"dtype: int64"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Simply the tweet on load\n",
"def retweet_transform(tweet):\n",
" if tweet_type(tweet) in ('retweet', 'quote'):\n",
" retweet = tweet.get('retweeted_status') or tweet.get('quoted_status')\n",
" return {\n",
" 'tweet_id': tweet['id_str'],\n",
" 'user_id': tweet['user']['id_str'],\n",
" 'screen_name': tweet['user']['screen_name'],\n",
" 'retweet_user_id': retweet['user']['id_str'],\n",
" 'retweet_screen_name': retweet['user']['screen_name'],\n",
" 'tweet_created_at': date_parse(tweet['created_at']) \n",
" }\n",
" return None\n",
"\n",
"base_retweet_df = load_tweet_df(retweet_transform, ['tweet_id', 'user_id', 'screen_name', 'retweet_user_id',\n",
" 'retweet_screen_name', 'tweet_created_at'],\n",
" dedupe_columns=['tweet_id'])\n",
"\n",
"base_retweet_df.count()"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" tweet_id | \n",
" user_id | \n",
" screen_name | \n",
" retweet_user_id | \n",
" retweet_screen_name | \n",
" tweet_created_at | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 872631046088601600 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 93069110 | \n",
" maggieNYT | \n",
" 2017-06-08 01:47:08+00:00 | \n",
"
\n",
" \n",
" 1 | \n",
" 872610483647516673 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 160951141 | \n",
" TomNamako | \n",
" 2017-06-08 00:25:26+00:00 | \n",
"
\n",
" \n",
" 2 | \n",
" 872609618626826240 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 18678924 | \n",
" jmartNYT | \n",
" 2017-06-08 00:22:00+00:00 | \n",
"
\n",
" \n",
" 3 | \n",
" 872605974699311104 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 93069110 | \n",
" maggieNYT | \n",
" 2017-06-08 00:07:31+00:00 | \n",
"
\n",
" \n",
" 4 | \n",
" 872603191518646276 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 94784682 | \n",
" JonathanTurley | \n",
" 2017-06-07 23:56:27+00:00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" tweet_id user_id screen_name retweet_user_id \\\n",
"0 872631046088601600 327862439 jonathanvswan 93069110 \n",
"1 872610483647516673 327862439 jonathanvswan 160951141 \n",
"2 872609618626826240 327862439 jonathanvswan 18678924 \n",
"3 872605974699311104 327862439 jonathanvswan 93069110 \n",
"4 872603191518646276 327862439 jonathanvswan 94784682 \n",
"\n",
" retweet_screen_name tweet_created_at \n",
"0 maggieNYT 2017-06-08 01:47:08+00:00 \n",
"1 TomNamako 2017-06-08 00:25:26+00:00 \n",
"2 jmartNYT 2017-06-08 00:22:00+00:00 \n",
"3 maggieNYT 2017-06-08 00:07:31+00:00 \n",
"4 JonathanTurley 2017-06-07 23:56:27+00:00 "
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"base_retweet_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Add gender of retweeter"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tweet_id 456956\n",
"user_id 456956\n",
"screen_name 456956\n",
"retweet_user_id 456956\n",
"retweet_screen_name 456956\n",
"tweet_created_at 456956\n",
"gender 456956\n",
"dtype: int64"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"retweet_df = base_retweet_df.join(user_summary_df['gender'], on='user_id')\n",
"retweet_df.count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### How many users have been retweeted by journalists?"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"49154"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"retweet_df['retweet_user_id'].unique().size"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Limit to retweeted journalists"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tweet_id 117048\n",
"user_id 117048\n",
"screen_name 117048\n",
"retweet_user_id 117048\n",
"retweet_screen_name 117048\n",
"tweet_created_at 117048\n",
"gender 117048\n",
"retweet_gender 117048\n",
"dtype: int64"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalists_retweet_df = retweet_df.join(user_summary_df['gender'], how='inner', on='retweet_user_id', rsuffix='_retweet')\n",
"journalists_retweet_df.rename(columns = {'gender_retweet': 'retweet_gender'}, inplace=True)\n",
"journalists_retweet_df.count()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" tweet_id | \n",
" user_id | \n",
" screen_name | \n",
" retweet_user_id | \n",
" retweet_screen_name | \n",
" tweet_created_at | \n",
" gender | \n",
" retweet_gender | \n",
"
\n",
" \n",
" \n",
" \n",
" 2 | \n",
" 872609618626826240 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 18678924 | \n",
" jmartNYT | \n",
" 2017-06-08 00:22:00+00:00 | \n",
" M | \n",
" M | \n",
"
\n",
" \n",
" 435 | \n",
" 871437820044464128 | \n",
" 242169927 | \n",
" colinwilhelm | \n",
" 18678924 | \n",
" jmartNYT | \n",
" 2017-06-04 18:45:41+00:00 | \n",
" M | \n",
" M | \n",
"
\n",
" \n",
" 1406 | \n",
" 872620054889857024 | \n",
" 163589845 | \n",
" PoliticoKevin | \n",
" 18678924 | \n",
" jmartNYT | \n",
" 2017-06-08 01:03:28+00:00 | \n",
" M | \n",
" M | \n",
"
\n",
" \n",
" 1424 | \n",
" 872240756597174272 | \n",
" 163589845 | \n",
" PoliticoKevin | \n",
" 18678924 | \n",
" jmartNYT | \n",
" 2017-06-06 23:56:16+00:00 | \n",
" M | \n",
" M | \n",
"
\n",
" \n",
" 1455 | \n",
" 870749993279385601 | \n",
" 163589845 | \n",
" PoliticoKevin | \n",
" 18678924 | \n",
" jmartNYT | \n",
" 2017-06-02 21:12:30+00:00 | \n",
" M | \n",
" M | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" tweet_id user_id screen_name retweet_user_id \\\n",
"2 872609618626826240 327862439 jonathanvswan 18678924 \n",
"435 871437820044464128 242169927 colinwilhelm 18678924 \n",
"1406 872620054889857024 163589845 PoliticoKevin 18678924 \n",
"1424 872240756597174272 163589845 PoliticoKevin 18678924 \n",
"1455 870749993279385601 163589845 PoliticoKevin 18678924 \n",
"\n",
" retweet_screen_name tweet_created_at gender retweet_gender \n",
"2 jmartNYT 2017-06-08 00:22:00+00:00 M M \n",
"435 jmartNYT 2017-06-04 18:45:41+00:00 M M \n",
"1406 jmartNYT 2017-06-08 01:03:28+00:00 M M \n",
"1424 jmartNYT 2017-06-06 23:56:16+00:00 M M \n",
"1455 jmartNYT 2017-06-02 21:12:30+00:00 M M "
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalists_retweet_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Functions for summarizing retweets by beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"# Gender of beltway journalists retweeted by beltway journalists\n",
"def journalist_retweet_gender_summary(retweet_df):\n",
" return pd.DataFrame({'count':retweet_df.retweet_gender.value_counts(), \n",
" 'percentage': retweet_df.retweet_gender.value_counts(normalize=True).mul(100).round(1).astype(str) + '%'})\n",
"\n",
"def journalist_retweet_summary(retweet_df):\n",
" # Retweet count\n",
" retweet_count_df = pd.DataFrame(retweet_df.retweet_user_id.value_counts().rename('retweet_count'))\n",
"\n",
" # Retweeting users. That is, the number of unique users retweeting each user.\n",
" retweet_user_id_per_user_df = retweet_df[['retweet_user_id', 'user_id']].drop_duplicates()\n",
" retweeting_user_count_df = pd.DataFrame(retweet_user_id_per_user_df.groupby('retweet_user_id').size(), columns=['retweeting_count'])\n",
" retweeting_user_count_df.index.name = 'user_id'\n",
"\n",
" # Join with user summary\n",
" journalist_retweet_summary_df = user_summary_df.join([retweet_count_df, retweeting_user_count_df])\n",
" journalist_retweet_summary_df.fillna(0, inplace=True)\n",
" journalist_retweet_summary_df = journalist_retweet_summary_df.sort_values(['retweet_count', 'retweeting_count', 'followers_count'], ascending=False)\n",
" return journalist_retweet_summary_df\n",
"\n",
"# Gender of top journalists retweeted by beltway journalists\n",
"def top_journalist_retweet_gender_summary(retweet_summary_df, retweeting_count_threshold=0, head=100):\n",
" top_retweet_summary_df = retweet_summary_df[retweet_summary_df.retweeting_count > retweeting_count_threshold].head(head)\n",
" return pd.DataFrame({'count': top_retweet_summary_df.gender.value_counts(), \n",
" 'percentage': top_retweet_summary_df.gender.value_counts(normalize=True).mul(100).round(1).astype(str) + '%'})\n",
"\n",
"# Fields for displaying journalist mention summaries\n",
"journalist_retweet_summary_fields = ['screen_name', 'name', 'organization', 'gender', 'followers_count', 'retweet_count', 'retweeting_count']\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Retweet analysis\n",
"*Note that for each of these, the complete list is being written to CSV in the output directory.*\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Of all journalists retweeting other accounts, how many of the retweets are from males / females?\n",
"That is, by gender of retweeter."
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" retweet | \n",
" quote | \n",
" total | \n",
" percentage | \n",
"
\n",
" \n",
" gender | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" F | \n",
" 134,606.00 | \n",
" 38,998.00 | \n",
" 173,604.00 | \n",
" 38.0% | \n",
"
\n",
" \n",
" M | \n",
" 210,660.00 | \n",
" 72,692.00 | \n",
" 283,352.00 | \n",
" 62.0% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" retweet quote total \\\n",
"gender \n",
"F 134,606.00 38,998.00 173,604.00 \n",
"M 210,660.00 72,692.00 283,352.00 \n",
"\n",
" percentage \n",
"gender \n",
"F 38.0% \n",
"M 62.0% "
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"retweets_by_gender_df = user_summary_df[['gender', 'retweet', 'quote']].groupby('gender').sum()\n",
"retweets_by_gender_df['total'] = retweets_by_gender_df.retweet + retweets_by_gender_df.quote\n",
"retweets_by_gender_df['percentage'] = retweets_by_gender_df.total.div(retweets_by_gender_df.total.sum()).mul(100).round(1).astype(str) + '%'\n",
"retweets_by_gender_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Of journalists retweeting other accounts, who retweets the most?"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" tweet_count | \n",
" retweet | \n",
" quote | \n",
" tweets_in_dataset | \n",
" retweet_count | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 2453025128 | \n",
" gloriaminott | \n",
" Minott, Gloria | \n",
" WPFW–FM | \n",
" F | \n",
" 586 | \n",
" 61473 | \n",
" 21,524.00 | \n",
" 0.00 | \n",
" 21,547.00 | \n",
" 21,524.00 | \n",
"
\n",
" \n",
" 304988603 | \n",
" NeilWMcCabe | \n",
" McCabe, Neil | \n",
" Breitbart News | \n",
" M | \n",
" 18903 | \n",
" 64673 | \n",
" 7,528.00 | \n",
" 625.00 | \n",
" 9,370.00 | \n",
" 8,153.00 | \n",
"
\n",
" \n",
" 18825339 | \n",
" CahnEmily | \n",
" Cahn, Emily | \n",
" Mic | \n",
" F | \n",
" 16980 | \n",
" 100803 | \n",
" 4,449.00 | \n",
" 1,834.00 | \n",
" 8,196.00 | \n",
" 6,283.00 | \n",
"
\n",
" \n",
" 191964162 | \n",
" SamLitzinger | \n",
" Litzinger, Sam | \n",
" CBS News | \n",
" M | \n",
" 2329 | \n",
" 95236 | \n",
" 6,017.00 | \n",
" 225.00 | \n",
" 7,537.00 | \n",
" 6,242.00 | \n",
"
\n",
" \n",
" 21612122 | \n",
" HotlineJosh | \n",
" Kraushaar, Josh P. | \n",
" National Journal | \n",
" M | \n",
" 50438 | \n",
" 156610 | \n",
" 4,881.00 | \n",
" 893.00 | \n",
" 6,703.00 | \n",
" 5,774.00 | \n",
"
\n",
" \n",
" 259395895 | \n",
" JohnJHarwood | \n",
" Harwood, John | \n",
" CNBC | \n",
" M | \n",
" 149040 | \n",
" 78015 | \n",
" 4,570.00 | \n",
" 822.00 | \n",
" 6,377.00 | \n",
" 5,392.00 | \n",
"
\n",
" \n",
" 16031927 | \n",
" greta | \n",
" Van Susteren, Greta | \n",
" MSNBC | \n",
" F | \n",
" 1186850 | \n",
" 116645 | \n",
" 794.00 | \n",
" 3,069.00 | \n",
" 4,792.00 | \n",
" 3,863.00 | \n",
"
\n",
" \n",
" 21810329 | \n",
" sdonnan | \n",
" Donnan, Shawn | \n",
" Financial Times | \n",
" M | \n",
" 12311 | \n",
" 79125 | \n",
" 3,332.00 | \n",
" 449.00 | \n",
" 4,537.00 | \n",
" 3,781.00 | \n",
"
\n",
" \n",
" 47408060 | \n",
" JonathanLanday | \n",
" Landay, Jonathan | \n",
" McClatchy Newspapers | \n",
" M | \n",
" 11213 | \n",
" 81042 | \n",
" 3,687.00 | \n",
" 80.00 | \n",
" 4,285.00 | \n",
" 3,767.00 | \n",
"
\n",
" \n",
" 13524182 | \n",
" daveweigel | \n",
" Weigel, David | \n",
" Washington Post | \n",
" M | \n",
" 332344 | \n",
" 169908 | \n",
" 2,703.00 | \n",
" 859.00 | \n",
" 4,564.00 | \n",
" 3,562.00 | \n",
"
\n",
" \n",
" 21696279 | \n",
" brianbeutler | \n",
" Beutler, Brian Alfred | \n",
" New Republic | \n",
" M | \n",
" 74435 | \n",
" 99050 | \n",
" 2,694.00 | \n",
" 684.00 | \n",
" 4,560.00 | \n",
" 3,378.00 | \n",
"
\n",
" \n",
" 104299137 | \n",
" DavidMDrucker | \n",
" Drucker, David | \n",
" Washington Examiner | \n",
" M | \n",
" 35033 | \n",
" 104613 | \n",
" 1,377.00 | \n",
" 1,955.00 | \n",
" 4,907.00 | \n",
" 3,332.00 | \n",
"
\n",
" \n",
" 593813785 | \n",
" DonnaYoungDC | \n",
" Young, Donna | \n",
" S&P Global Market Intelligence | \n",
" F | \n",
" 5894 | \n",
" 49967 | \n",
" 1,740.00 | \n",
" 1,327.00 | \n",
" 4,414.00 | \n",
" 3,067.00 | \n",
"
\n",
" \n",
" 456994513 | \n",
" maria_e_recio | \n",
" Recio, Maria | \n",
" Austin American-Statesman | \n",
" F | \n",
" 1072 | \n",
" 40822 | \n",
" 2,613.00 | \n",
" 336.00 | \n",
" 3,370.00 | \n",
" 2,949.00 | \n",
"
\n",
" \n",
" 19576571 | \n",
" JaredRizzi | \n",
" Rizzi, Jared | \n",
" Sirius XM Satellite Radio | \n",
" M | \n",
" 13545 | \n",
" 41620 | \n",
" 2,112.00 | \n",
" 828.00 | \n",
" 5,567.00 | \n",
" 2,940.00 | \n",
"
\n",
" \n",
" 16459325 | \n",
" ryanbeckwith | \n",
" Beckwith, Ryan Teague | \n",
" Time Magazine | \n",
" M | \n",
" 20947 | \n",
" 92203 | \n",
" 2,231.00 | \n",
" 521.00 | \n",
" 5,187.00 | \n",
" 2,752.00 | \n",
"
\n",
" \n",
" 14529929 | \n",
" jaketapper | \n",
" Tapper, Jake | \n",
" CNN | \n",
" M | \n",
" 1305680 | \n",
" 148143 | \n",
" 2,435.00 | \n",
" 287.00 | \n",
" 5,078.00 | \n",
" 2,722.00 | \n",
"
\n",
" \n",
" 61734492 | \n",
" Fahrenthold | \n",
" Fahrenthold, David | \n",
" Washington Post | \n",
" M | \n",
" 451778 | \n",
" 27573 | \n",
" 2,505.00 | \n",
" 184.00 | \n",
" 2,871.00 | \n",
" 2,689.00 | \n",
"
\n",
" \n",
" 19545932 | \n",
" kampeas | \n",
" Kampeas, Ron | \n",
" Jewish Telegraphic Agency | \n",
" M | \n",
" 6977 | \n",
" 53053 | \n",
" 1,988.00 | \n",
" 444.00 | \n",
" 3,249.00 | \n",
" 2,432.00 | \n",
"
\n",
" \n",
" 42352386 | \n",
" rschles | \n",
" Schlesinger, Robert | \n",
" U.S. News & World Report | \n",
" M | \n",
" 4553 | \n",
" 35375 | \n",
" 1,644.00 | \n",
" 617.00 | \n",
" 2,459.00 | \n",
" 2,261.00 | \n",
"
\n",
" \n",
" 25702314 | \n",
" EricMGarcia | \n",
" Garcia, Eric M. | \n",
" CQ Roll Call | \n",
" M | \n",
" 3094 | \n",
" 44783 | \n",
" 528.00 | \n",
" 1,723.00 | \n",
" 3,584.00 | \n",
" 2,251.00 | \n",
"
\n",
" \n",
" 18646108 | \n",
" BretBaier | \n",
" Baier, Bret | \n",
" Fox News | \n",
" M | \n",
" 1095184 | \n",
" 52271 | \n",
" 1,623.00 | \n",
" 615.00 | \n",
" 2,379.00 | \n",
" 2,238.00 | \n",
"
\n",
" \n",
" 15486163 | \n",
" SimonMarksFSN | \n",
" Marks, Simon | \n",
" Feature Story News | \n",
" M | \n",
" 7767 | \n",
" 41541 | \n",
" 1,296.00 | \n",
" 934.00 | \n",
" 3,432.00 | \n",
" 2,230.00 | \n",
"
\n",
" \n",
" 18678924 | \n",
" jmartNYT | \n",
" Martin, Jonathan | \n",
" New York Times | \n",
" M | \n",
" 197322 | \n",
" 106970 | \n",
" 1,665.00 | \n",
" 467.00 | \n",
" 2,810.00 | \n",
" 2,132.00 | \n",
"
\n",
" \n",
" 15730608 | \n",
" edroso | \n",
" Edroso, Roy | \n",
" UCG | \n",
" M | \n",
" 4696 | \n",
" 38064 | \n",
" 1,714.00 | \n",
" 379.00 | \n",
" 2,883.00 | \n",
" 2,093.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name \\\n",
"user_id \n",
"2453025128 gloriaminott Minott, Gloria \n",
"304988603 NeilWMcCabe McCabe, Neil \n",
"18825339 CahnEmily Cahn, Emily \n",
"191964162 SamLitzinger Litzinger, Sam \n",
"21612122 HotlineJosh Kraushaar, Josh P. \n",
"259395895 JohnJHarwood Harwood, John \n",
"16031927 greta Van Susteren, Greta \n",
"21810329 sdonnan Donnan, Shawn \n",
"47408060 JonathanLanday Landay, Jonathan \n",
"13524182 daveweigel Weigel, David \n",
"21696279 brianbeutler Beutler, Brian Alfred \n",
"104299137 DavidMDrucker Drucker, David \n",
"593813785 DonnaYoungDC Young, Donna \n",
"456994513 maria_e_recio Recio, Maria \n",
"19576571 JaredRizzi Rizzi, Jared \n",
"16459325 ryanbeckwith Beckwith, Ryan Teague \n",
"14529929 jaketapper Tapper, Jake \n",
"61734492 Fahrenthold Fahrenthold, David \n",
"19545932 kampeas Kampeas, Ron \n",
"42352386 rschles Schlesinger, Robert \n",
"25702314 EricMGarcia Garcia, Eric M. \n",
"18646108 BretBaier Baier, Bret \n",
"15486163 SimonMarksFSN Marks, Simon \n",
"18678924 jmartNYT Martin, Jonathan \n",
"15730608 edroso Edroso, Roy \n",
"\n",
" organization gender followers_count \\\n",
"user_id \n",
"2453025128 WPFW–FM F 586 \n",
"304988603 Breitbart News M 18903 \n",
"18825339 Mic F 16980 \n",
"191964162 CBS News M 2329 \n",
"21612122 National Journal M 50438 \n",
"259395895 CNBC M 149040 \n",
"16031927 MSNBC F 1186850 \n",
"21810329 Financial Times M 12311 \n",
"47408060 McClatchy Newspapers M 11213 \n",
"13524182 Washington Post M 332344 \n",
"21696279 New Republic M 74435 \n",
"104299137 Washington Examiner M 35033 \n",
"593813785 S&P Global Market Intelligence F 5894 \n",
"456994513 Austin American-Statesman F 1072 \n",
"19576571 Sirius XM Satellite Radio M 13545 \n",
"16459325 Time Magazine M 20947 \n",
"14529929 CNN M 1305680 \n",
"61734492 Washington Post M 451778 \n",
"19545932 Jewish Telegraphic Agency M 6977 \n",
"42352386 U.S. News & World Report M 4553 \n",
"25702314 CQ Roll Call M 3094 \n",
"18646108 Fox News M 1095184 \n",
"15486163 Feature Story News M 7767 \n",
"18678924 New York Times M 197322 \n",
"15730608 UCG M 4696 \n",
"\n",
" tweet_count retweet quote \\\n",
"user_id \n",
"2453025128 61473 21,524.00 0.00 \n",
"304988603 64673 7,528.00 625.00 \n",
"18825339 100803 4,449.00 1,834.00 \n",
"191964162 95236 6,017.00 225.00 \n",
"21612122 156610 4,881.00 893.00 \n",
"259395895 78015 4,570.00 822.00 \n",
"16031927 116645 794.00 3,069.00 \n",
"21810329 79125 3,332.00 449.00 \n",
"47408060 81042 3,687.00 80.00 \n",
"13524182 169908 2,703.00 859.00 \n",
"21696279 99050 2,694.00 684.00 \n",
"104299137 104613 1,377.00 1,955.00 \n",
"593813785 49967 1,740.00 1,327.00 \n",
"456994513 40822 2,613.00 336.00 \n",
"19576571 41620 2,112.00 828.00 \n",
"16459325 92203 2,231.00 521.00 \n",
"14529929 148143 2,435.00 287.00 \n",
"61734492 27573 2,505.00 184.00 \n",
"19545932 53053 1,988.00 444.00 \n",
"42352386 35375 1,644.00 617.00 \n",
"25702314 44783 528.00 1,723.00 \n",
"18646108 52271 1,623.00 615.00 \n",
"15486163 41541 1,296.00 934.00 \n",
"18678924 106970 1,665.00 467.00 \n",
"15730608 38064 1,714.00 379.00 \n",
"\n",
" tweets_in_dataset retweet_count \n",
"user_id \n",
"2453025128 21,547.00 21,524.00 \n",
"304988603 9,370.00 8,153.00 \n",
"18825339 8,196.00 6,283.00 \n",
"191964162 7,537.00 6,242.00 \n",
"21612122 6,703.00 5,774.00 \n",
"259395895 6,377.00 5,392.00 \n",
"16031927 4,792.00 3,863.00 \n",
"21810329 4,537.00 3,781.00 \n",
"47408060 4,285.00 3,767.00 \n",
"13524182 4,564.00 3,562.00 \n",
"21696279 4,560.00 3,378.00 \n",
"104299137 4,907.00 3,332.00 \n",
"593813785 4,414.00 3,067.00 \n",
"456994513 3,370.00 2,949.00 \n",
"19576571 5,567.00 2,940.00 \n",
"16459325 5,187.00 2,752.00 \n",
"14529929 5,078.00 2,722.00 \n",
"61734492 2,871.00 2,689.00 \n",
"19545932 3,249.00 2,432.00 \n",
"42352386 2,459.00 2,261.00 \n",
"25702314 3,584.00 2,251.00 \n",
"18646108 2,379.00 2,238.00 \n",
"15486163 3,432.00 2,230.00 \n",
"18678924 2,810.00 2,132.00 \n",
"15730608 2,883.00 2,093.00 "
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"retweet_user_summary_df = user_summary_df.loc[:,('screen_name', 'name', 'organization', 'gender', 'followers_count', 'tweet_count', 'retweet', 'quote', 'tweets_in_dataset')]\n",
"retweet_user_summary_df['retweet_count'] = retweet_user_summary_df.retweet + retweet_user_summary_df.quote\n",
"retweet_user_summary_df.sort_values(['retweet_count'], ascending=False).head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Of journalists retweeting other accounts, who is retweeted the most?\n",
"This is based on screen name, which could have changed during collection period. However, for the users that would be at the top of this list, seems unlikely."
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" retweet_count | \n",
" retweeting_count | \n",
"
\n",
" \n",
" \n",
" \n",
" realDonaldTrump | \n",
" 6650 | \n",
" 807 | \n",
"
\n",
" \n",
" thehill | \n",
" 5424 | \n",
" 457 | \n",
"
\n",
" \n",
" BraddJaffy | \n",
" 3564 | \n",
" 554 | \n",
"
\n",
" \n",
" maggieNYT | \n",
" 3024 | \n",
" 530 | \n",
"
\n",
" \n",
" business | \n",
" 3000 | \n",
" 229 | \n",
"
\n",
" \n",
" washingtonpost | \n",
" 2638 | \n",
" 498 | \n",
"
\n",
" \n",
" AP | \n",
" 2480 | \n",
" 581 | \n",
"
\n",
" \n",
" politico | \n",
" 2335 | \n",
" 334 | \n",
"
\n",
" \n",
" nytimes | \n",
" 2268 | \n",
" 485 | \n",
"
\n",
" \n",
" WSJ | \n",
" 1949 | \n",
" 213 | \n",
"
\n",
" \n",
" burgessev | \n",
" 1836 | \n",
" 289 | \n",
"
\n",
" \n",
" kylegriffin1 | \n",
" 1803 | \n",
" 429 | \n",
"
\n",
" \n",
" ZekeJMiller | \n",
" 1723 | \n",
" 387 | \n",
"
\n",
" \n",
" CNN | \n",
" 1602 | \n",
" 366 | \n",
"
\n",
" \n",
" GlennThrush | \n",
" 1577 | \n",
" 451 | \n",
"
\n",
" \n",
" Reuters | \n",
" 1487 | \n",
" 265 | \n",
"
\n",
" \n",
" jaketapper | \n",
" 1459 | \n",
" 397 | \n",
"
\n",
" \n",
" TheEconomist | \n",
" 1458 | \n",
" 86 | \n",
"
\n",
" \n",
" StevenTDennis | \n",
" 1403 | \n",
" 280 | \n",
"
\n",
" \n",
" FoxNews | \n",
" 1400 | \n",
" 258 | \n",
"
\n",
" \n",
" seungminkim | \n",
" 1393 | \n",
" 327 | \n",
"
\n",
" \n",
" mkraju | \n",
" 1359 | \n",
" 341 | \n",
"
\n",
" \n",
" PhilipRucker | \n",
" 1349 | \n",
" 365 | \n",
"
\n",
" \n",
" markknoller | \n",
" 1343 | \n",
" 341 | \n",
"
\n",
" \n",
" MEPFuller | \n",
" 1324 | \n",
" 286 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" retweet_count retweeting_count\n",
"realDonaldTrump 6650 807\n",
"thehill 5424 457\n",
"BraddJaffy 3564 554\n",
"maggieNYT 3024 530\n",
"business 3000 229\n",
"washingtonpost 2638 498\n",
"AP 2480 581\n",
"politico 2335 334\n",
"nytimes 2268 485\n",
"WSJ 1949 213\n",
"burgessev 1836 289\n",
"kylegriffin1 1803 429\n",
"ZekeJMiller 1723 387\n",
"CNN 1602 366\n",
"GlennThrush 1577 451\n",
"Reuters 1487 265\n",
"jaketapper 1459 397\n",
"TheEconomist 1458 86\n",
"StevenTDennis 1403 280\n",
"FoxNews 1400 258\n",
"seungminkim 1393 327\n",
"mkraju 1359 341\n",
"PhilipRucker 1349 365\n",
"markknoller 1343 341\n",
"MEPFuller 1324 286"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Retweet count\n",
"retweet_count_screen_name_df = pd.DataFrame(retweet_df.retweet_screen_name.value_counts().rename('retweet_count'))\n",
"\n",
"# Count of retweeting users\n",
"retweet_user_id_per_user_screen_name_df = retweet_df[['retweet_screen_name', 'user_id']].drop_duplicates()\n",
"retweeting_count_screen_name_df = pd.DataFrame(retweet_user_id_per_user_screen_name_df.groupby('retweet_screen_name').size(), columns=['retweeting_count'])\n",
"retweeting_count_screen_name_df.index.name = 'screen_name'\n",
"\n",
"all_retweeted_df = retweet_count_screen_name_df.join(retweeting_count_screen_name_df)\n",
"all_retweeted_df.to_csv('output/all_retweeted_by_journalists.csv')\n",
"all_retweeted_df.head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Of journalists retweeting other journalists, who is retweeted the most?"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" retweet_count | \n",
" retweeting_count | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 407013776 | \n",
" burgessev | \n",
" Everett, John B. | \n",
" Politico | \n",
" M | \n",
" 31010 | \n",
" 1,836.00 | \n",
" 289.00 | \n",
"
\n",
" \n",
" 21316253 | \n",
" ZekeJMiller | \n",
" Miller, Zeke J. | \n",
" Time Magazine | \n",
" M | \n",
" 198517 | \n",
" 1,723.00 | \n",
" 387.00 | \n",
"
\n",
" \n",
" 19107878 | \n",
" GlennThrush | \n",
" Thrush, Glenn H. | \n",
" New York Times | \n",
" M | \n",
" 308181 | \n",
" 1,577.00 | \n",
" 451.00 | \n",
"
\n",
" \n",
" 14529929 | \n",
" jaketapper | \n",
" Tapper, Jake | \n",
" CNN | \n",
" M | \n",
" 1305680 | \n",
" 1,459.00 | \n",
" 397.00 | \n",
"
\n",
" \n",
" 46557945 | \n",
" StevenTDennis | \n",
" Dennis, Steven T. | \n",
" Bloomberg News | \n",
" M | \n",
" 55762 | \n",
" 1,403.00 | \n",
" 280.00 | \n",
"
\n",
" \n",
" 19186003 | \n",
" seungminkim | \n",
" Kim, Seung Min | \n",
" Politico | \n",
" F | \n",
" 33980 | \n",
" 1,393.00 | \n",
" 327.00 | \n",
"
\n",
" \n",
" 39155029 | \n",
" mkraju | \n",
" Raju, Manu K. | \n",
" CNN | \n",
" M | \n",
" 88366 | \n",
" 1,359.00 | \n",
" 341.00 | \n",
"
\n",
" \n",
" 31127446 | \n",
" markknoller | \n",
" Knoller, Mark | \n",
" CBS News | \n",
" M | \n",
" 301474 | \n",
" 1,343.00 | \n",
" 341.00 | \n",
"
\n",
" \n",
" 398088661 | \n",
" MEPFuller | \n",
" Fuller, Matt E. | \n",
" Huffington Post | \n",
" M | \n",
" 77919 | \n",
" 1,324.00 | \n",
" 286.00 | \n",
"
\n",
" \n",
" 13524182 | \n",
" daveweigel | \n",
" Weigel, David | \n",
" Washington Post | \n",
" M | \n",
" 332344 | \n",
" 1,221.00 | \n",
" 306.00 | \n",
"
\n",
" \n",
" 14007532 | \n",
" frankthorp | \n",
" Thorp, Frank | \n",
" NBC News | \n",
" M | \n",
" 39798 | \n",
" 1,207.00 | \n",
" 334.00 | \n",
"
\n",
" \n",
" 19847765 | \n",
" sahilkapur | \n",
" Kapur, Sahil | \n",
" Bloomberg News | \n",
" M | \n",
" 69086 | \n",
" 1,186.00 | \n",
" 296.00 | \n",
"
\n",
" \n",
" 16187637 | \n",
" ChadPergram | \n",
" Pergram, Chad | \n",
" Fox News | \n",
" M | \n",
" 59305 | \n",
" 1,177.00 | \n",
" 297.00 | \n",
"
\n",
" \n",
" 104914594 | \n",
" Phil_Mattingly | \n",
" Mattingly, Phil | \n",
" CNN | \n",
" M | \n",
" 40119 | \n",
" 1,120.00 | \n",
" 314.00 | \n",
"
\n",
" \n",
" 16006592 | \n",
" BenjySarlin | \n",
" Sarlin, Benjamin | \n",
" NBC News | \n",
" M | \n",
" 78075 | \n",
" 1,039.00 | \n",
" 215.00 | \n",
"
\n",
" \n",
" 259395895 | \n",
" JohnJHarwood | \n",
" Harwood, John | \n",
" CNBC | \n",
" M | \n",
" 149040 | \n",
" 1,011.00 | \n",
" 277.00 | \n",
"
\n",
" \n",
" 21252618 | \n",
" JakeSherman | \n",
" Sherman, Jacob S. | \n",
" Politico | \n",
" M | \n",
" 81762 | \n",
" 943.00 | \n",
" 281.00 | \n",
"
\n",
" \n",
" 33653195 | \n",
" ericawerner | \n",
" Werner, Erica | \n",
" Associated Press | \n",
" F | \n",
" 14049 | \n",
" 939.00 | \n",
" 281.00 | \n",
"
\n",
" \n",
" 18678924 | \n",
" jmartNYT | \n",
" Martin, Jonathan | \n",
" New York Times | \n",
" M | \n",
" 197322 | \n",
" 916.00 | \n",
" 247.00 | \n",
"
\n",
" \n",
" 12354832 | \n",
" kasie | \n",
" Hunt, Kasie | \n",
" NBC News | \n",
" F | \n",
" 187357 | \n",
" 909.00 | \n",
" 388.00 | \n",
"
\n",
" \n",
" 70511174 | \n",
" Hadas_Gold | \n",
" Gold, Hadas | \n",
" Politico | \n",
" F | \n",
" 45221 | \n",
" 849.00 | \n",
" 306.00 | \n",
"
\n",
" \n",
" 22771961 | \n",
" Acosta | \n",
" Acosta, Jim | \n",
" CNN | \n",
" M | \n",
" 350650 | \n",
" 829.00 | \n",
" 315.00 | \n",
"
\n",
" \n",
" 104299137 | \n",
" DavidMDrucker | \n",
" Drucker, David | \n",
" Washington Examiner | \n",
" M | \n",
" 35033 | \n",
" 770.00 | \n",
" 193.00 | \n",
"
\n",
" \n",
" 593813785 | \n",
" DonnaYoungDC | \n",
" Young, Donna | \n",
" S&P Global Market Intelligence | \n",
" F | \n",
" 5894 | \n",
" 708.00 | \n",
" 13.00 | \n",
"
\n",
" \n",
" 118130765 | \n",
" dylanlscott | \n",
" Scott, Dylan L. | \n",
" Stat News | \n",
" M | \n",
" 20122 | \n",
" 705.00 | \n",
" 155.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name organization \\\n",
"user_id \n",
"407013776 burgessev Everett, John B. Politico \n",
"21316253 ZekeJMiller Miller, Zeke J. Time Magazine \n",
"19107878 GlennThrush Thrush, Glenn H. New York Times \n",
"14529929 jaketapper Tapper, Jake CNN \n",
"46557945 StevenTDennis Dennis, Steven T. Bloomberg News \n",
"19186003 seungminkim Kim, Seung Min Politico \n",
"39155029 mkraju Raju, Manu K. CNN \n",
"31127446 markknoller Knoller, Mark CBS News \n",
"398088661 MEPFuller Fuller, Matt E. Huffington Post \n",
"13524182 daveweigel Weigel, David Washington Post \n",
"14007532 frankthorp Thorp, Frank NBC News \n",
"19847765 sahilkapur Kapur, Sahil Bloomberg News \n",
"16187637 ChadPergram Pergram, Chad Fox News \n",
"104914594 Phil_Mattingly Mattingly, Phil CNN \n",
"16006592 BenjySarlin Sarlin, Benjamin NBC News \n",
"259395895 JohnJHarwood Harwood, John CNBC \n",
"21252618 JakeSherman Sherman, Jacob S. Politico \n",
"33653195 ericawerner Werner, Erica Associated Press \n",
"18678924 jmartNYT Martin, Jonathan New York Times \n",
"12354832 kasie Hunt, Kasie NBC News \n",
"70511174 Hadas_Gold Gold, Hadas Politico \n",
"22771961 Acosta Acosta, Jim CNN \n",
"104299137 DavidMDrucker Drucker, David Washington Examiner \n",
"593813785 DonnaYoungDC Young, Donna S&P Global Market Intelligence \n",
"118130765 dylanlscott Scott, Dylan L. Stat News \n",
"\n",
" gender followers_count retweet_count retweeting_count \n",
"user_id \n",
"407013776 M 31010 1,836.00 289.00 \n",
"21316253 M 198517 1,723.00 387.00 \n",
"19107878 M 308181 1,577.00 451.00 \n",
"14529929 M 1305680 1,459.00 397.00 \n",
"46557945 M 55762 1,403.00 280.00 \n",
"19186003 F 33980 1,393.00 327.00 \n",
"39155029 M 88366 1,359.00 341.00 \n",
"31127446 M 301474 1,343.00 341.00 \n",
"398088661 M 77919 1,324.00 286.00 \n",
"13524182 M 332344 1,221.00 306.00 \n",
"14007532 M 39798 1,207.00 334.00 \n",
"19847765 M 69086 1,186.00 296.00 \n",
"16187637 M 59305 1,177.00 297.00 \n",
"104914594 M 40119 1,120.00 314.00 \n",
"16006592 M 78075 1,039.00 215.00 \n",
"259395895 M 149040 1,011.00 277.00 \n",
"21252618 M 81762 943.00 281.00 \n",
"33653195 F 14049 939.00 281.00 \n",
"18678924 M 197322 916.00 247.00 \n",
"12354832 F 187357 909.00 388.00 \n",
"70511174 F 45221 849.00 306.00 \n",
"22771961 M 350650 829.00 315.00 \n",
"104299137 M 35033 770.00 193.00 \n",
"593813785 F 5894 708.00 13.00 \n",
"118130765 M 20122 705.00 155.00 "
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalists_retweet_summary_df = journalist_retweet_summary(journalists_retweet_df)\n",
"journalists_retweet_summary_df.to_csv('output/journalists_retweeted_by_journalists.csv')\n",
"journalists_retweet_summary_df[journalist_retweet_summary_fields].head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Of journalists retweeting other journalists, how many of the retweets are of males / females?"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
"
\n",
" \n",
" \n",
" \n",
" M | \n",
" 80634 | \n",
" 68.9% | \n",
"
\n",
" \n",
" F | \n",
" 36414 | \n",
" 31.1% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage\n",
"M 80634 68.9%\n",
"F 36414 31.1%"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalist_retweet_gender_summary(journalists_retweet_df)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### On average, how many times are journalists retweeted by other journalists?"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" retweet_count | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 2,292.00 | \n",
"
\n",
" \n",
" mean | \n",
" 51.07 | \n",
"
\n",
" \n",
" std | \n",
" 149.06 | \n",
"
\n",
" \n",
" min | \n",
" 0.00 | \n",
"
\n",
" \n",
" 25% | \n",
" 0.00 | \n",
"
\n",
" \n",
" 50% | \n",
" 6.00 | \n",
"
\n",
" \n",
" 75% | \n",
" 33.00 | \n",
"
\n",
" \n",
" max | \n",
" 1,836.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" retweet_count\n",
"count 2,292.00\n",
"mean 51.07\n",
"std 149.06\n",
"min 0.00\n",
"25% 0.00\n",
"50% 6.00\n",
"75% 33.00\n",
"max 1,836.00"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalists_retweet_summary_df[['retweet_count']].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Of journalists retweeting female journalists, who is retweeted the most?"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" retweet_count | \n",
" retweeting_count | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 19186003 | \n",
" seungminkim | \n",
" Kim, Seung Min | \n",
" Politico | \n",
" F | \n",
" 33980 | \n",
" 1,393.00 | \n",
" 327.00 | \n",
"
\n",
" \n",
" 33653195 | \n",
" ericawerner | \n",
" Werner, Erica | \n",
" Associated Press | \n",
" F | \n",
" 14049 | \n",
" 939.00 | \n",
" 281.00 | \n",
"
\n",
" \n",
" 12354832 | \n",
" kasie | \n",
" Hunt, Kasie | \n",
" NBC News | \n",
" F | \n",
" 187357 | \n",
" 909.00 | \n",
" 388.00 | \n",
"
\n",
" \n",
" 70511174 | \n",
" Hadas_Gold | \n",
" Gold, Hadas | \n",
" Politico | \n",
" F | \n",
" 45221 | \n",
" 849.00 | \n",
" 306.00 | \n",
"
\n",
" \n",
" 593813785 | \n",
" DonnaYoungDC | \n",
" Young, Donna | \n",
" S&P Global Market Intelligence | \n",
" F | \n",
" 5894 | \n",
" 708.00 | \n",
" 13.00 | \n",
"
\n",
" \n",
" 167024520 | \n",
" rachaelmbade | \n",
" Bade, Rachel M. | \n",
" Politico | \n",
" F | \n",
" 30164 | \n",
" 614.00 | \n",
" 161.00 | \n",
"
\n",
" \n",
" 33919343 | \n",
" AshleyRParker | \n",
" Parker, Ashley | \n",
" Washington Post | \n",
" F | \n",
" 122382 | \n",
" 539.00 | \n",
" 268.00 | \n",
"
\n",
" \n",
" 139738464 | \n",
" mj_lee | \n",
" Lee, MJ | \n",
" CNN | \n",
" F | \n",
" 31940 | \n",
" 518.00 | \n",
" 189.00 | \n",
"
\n",
" \n",
" 16018516 | \n",
" jenhab | \n",
" Haberkorn, Jennifer A. | \n",
" Politico | \n",
" F | \n",
" 20028 | \n",
" 474.00 | \n",
" 136.00 | \n",
"
\n",
" \n",
" 18825339 | \n",
" CahnEmily | \n",
" Cahn, Emily | \n",
" Mic | \n",
" F | \n",
" 16980 | \n",
" 444.00 | \n",
" 118.00 | \n",
"
\n",
" \n",
" 45399148 | \n",
" jeneps | \n",
" Epstein, Jennifer | \n",
" Bloomberg News | \n",
" F | \n",
" 61242 | \n",
" 443.00 | \n",
" 189.00 | \n",
"
\n",
" \n",
" 705706292 | \n",
" rebeccaballhaus | \n",
" Ballhaus, Rebecca | \n",
" Wall Street Journal / Dow Jones | \n",
" F | \n",
" 24638 | \n",
" 409.00 | \n",
" 154.00 | \n",
"
\n",
" \n",
" 19734832 | \n",
" sarahkliff | \n",
" Kliff, Sarah L. | \n",
" Vox Media | \n",
" F | \n",
" 100090 | \n",
" 392.00 | \n",
" 136.00 | \n",
"
\n",
" \n",
" 163995093 | \n",
" AlexNBCNews | \n",
" Moe, Alexandra | \n",
" NBC News | \n",
" F | \n",
" 21689 | \n",
" 388.00 | \n",
" 134.00 | \n",
"
\n",
" \n",
" 237477771 | \n",
" juliehdavis | \n",
" Davis, Julie | \n",
" New York Times | \n",
" F | \n",
" 49821 | \n",
" 375.00 | \n",
" 194.00 | \n",
"
\n",
" \n",
" 16149614 | \n",
" jrovner | \n",
" Rovner, Julie | \n",
" Kaiser Health News | \n",
" F | \n",
" 21844 | \n",
" 351.00 | \n",
" 137.00 | \n",
"
\n",
" \n",
" 116341480 | \n",
" RosieGray | \n",
" Gray, Rosie | \n",
" The Atlantic | \n",
" F | \n",
" 96935 | \n",
" 345.00 | \n",
" 125.00 | \n",
"
\n",
" \n",
" 28181835 | \n",
" jpaceDC | \n",
" Pace, Julie | \n",
" Associated Press | \n",
" F | \n",
" 46017 | \n",
" 328.00 | \n",
" 132.00 | \n",
"
\n",
" \n",
" 52392666 | \n",
" ZoeTillman | \n",
" Tillman, Zoe | \n",
" BuzzFeed | \n",
" F | \n",
" 15246 | \n",
" 312.00 | \n",
" 70.00 | \n",
"
\n",
" \n",
" 906734342 | \n",
" KimberlyRobinsn | \n",
" Robinson, Kimberly S. | \n",
" Bloomberg BNA | \n",
" F | \n",
" 7170 | \n",
" 308.00 | \n",
" 38.00 | \n",
"
\n",
" \n",
" 188857501 | \n",
" alexis_levinson | \n",
" Levinson, Alexis R. | \n",
" BuzzFeed | \n",
" F | \n",
" 25375 | \n",
" 288.00 | \n",
" 111.00 | \n",
"
\n",
" \n",
" 56552341 | \n",
" LACaldwellDC | \n",
" Caldwell, Leigh Ann | \n",
" NBC News | \n",
" F | \n",
" 8464 | \n",
" 282.00 | \n",
" 98.00 | \n",
"
\n",
" \n",
" 151444950 | \n",
" DaviSusan | \n",
" Davis, Susan | \n",
" National Public Radio | \n",
" F | \n",
" 27297 | \n",
" 270.00 | \n",
" 150.00 | \n",
"
\n",
" \n",
" 360080772 | \n",
" FoxReports | \n",
" Fox, Lauren | \n",
" CNN | \n",
" F | \n",
" 7282 | \n",
" 269.00 | \n",
" 116.00 | \n",
"
\n",
" \n",
" 313545488 | \n",
" LauraLitvan | \n",
" Litvan, Laura | \n",
" Bloomberg News | \n",
" F | \n",
" 4468 | \n",
" 269.00 | \n",
" 115.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name \\\n",
"user_id \n",
"19186003 seungminkim Kim, Seung Min \n",
"33653195 ericawerner Werner, Erica \n",
"12354832 kasie Hunt, Kasie \n",
"70511174 Hadas_Gold Gold, Hadas \n",
"593813785 DonnaYoungDC Young, Donna \n",
"167024520 rachaelmbade Bade, Rachel M. \n",
"33919343 AshleyRParker Parker, Ashley \n",
"139738464 mj_lee Lee, MJ \n",
"16018516 jenhab Haberkorn, Jennifer A. \n",
"18825339 CahnEmily Cahn, Emily \n",
"45399148 jeneps Epstein, Jennifer \n",
"705706292 rebeccaballhaus Ballhaus, Rebecca \n",
"19734832 sarahkliff Kliff, Sarah L. \n",
"163995093 AlexNBCNews Moe, Alexandra \n",
"237477771 juliehdavis Davis, Julie \n",
"16149614 jrovner Rovner, Julie \n",
"116341480 RosieGray Gray, Rosie \n",
"28181835 jpaceDC Pace, Julie \n",
"52392666 ZoeTillman Tillman, Zoe \n",
"906734342 KimberlyRobinsn Robinson, Kimberly S. \n",
"188857501 alexis_levinson Levinson, Alexis R. \n",
"56552341 LACaldwellDC Caldwell, Leigh Ann \n",
"151444950 DaviSusan Davis, Susan \n",
"360080772 FoxReports Fox, Lauren \n",
"313545488 LauraLitvan Litvan, Laura \n",
"\n",
" organization gender followers_count \\\n",
"user_id \n",
"19186003 Politico F 33980 \n",
"33653195 Associated Press F 14049 \n",
"12354832 NBC News F 187357 \n",
"70511174 Politico F 45221 \n",
"593813785 S&P Global Market Intelligence F 5894 \n",
"167024520 Politico F 30164 \n",
"33919343 Washington Post F 122382 \n",
"139738464 CNN F 31940 \n",
"16018516 Politico F 20028 \n",
"18825339 Mic F 16980 \n",
"45399148 Bloomberg News F 61242 \n",
"705706292 Wall Street Journal / Dow Jones F 24638 \n",
"19734832 Vox Media F 100090 \n",
"163995093 NBC News F 21689 \n",
"237477771 New York Times F 49821 \n",
"16149614 Kaiser Health News F 21844 \n",
"116341480 The Atlantic F 96935 \n",
"28181835 Associated Press F 46017 \n",
"52392666 BuzzFeed F 15246 \n",
"906734342 Bloomberg BNA F 7170 \n",
"188857501 BuzzFeed F 25375 \n",
"56552341 NBC News F 8464 \n",
"151444950 National Public Radio F 27297 \n",
"360080772 CNN F 7282 \n",
"313545488 Bloomberg News F 4468 \n",
"\n",
" retweet_count retweeting_count \n",
"user_id \n",
"19186003 1,393.00 327.00 \n",
"33653195 939.00 281.00 \n",
"12354832 909.00 388.00 \n",
"70511174 849.00 306.00 \n",
"593813785 708.00 13.00 \n",
"167024520 614.00 161.00 \n",
"33919343 539.00 268.00 \n",
"139738464 518.00 189.00 \n",
"16018516 474.00 136.00 \n",
"18825339 444.00 118.00 \n",
"45399148 443.00 189.00 \n",
"705706292 409.00 154.00 \n",
"19734832 392.00 136.00 \n",
"163995093 388.00 134.00 \n",
"237477771 375.00 194.00 \n",
"16149614 351.00 137.00 \n",
"116341480 345.00 125.00 \n",
"28181835 328.00 132.00 \n",
"52392666 312.00 70.00 \n",
"906734342 308.00 38.00 \n",
"188857501 288.00 111.00 \n",
"56552341 282.00 98.00 \n",
"151444950 270.00 150.00 \n",
"360080772 269.00 116.00 \n",
"313545488 269.00 115.00 "
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"female_journalists_retweet_summary_df = journalists_retweet_summary_df[journalists_retweet_summary_df.gender == 'F']\n",
"female_journalists_retweet_summary_df.to_csv('output/female_journalists_retweeted_by_journalists.csv')\n",
"female_journalists_retweet_summary_df[journalist_retweet_summary_fields].head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### On average, how many times are female journalists retweeted by other journalists?"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" retweet_count | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 993.00 | \n",
"
\n",
" \n",
" mean | \n",
" 36.67 | \n",
"
\n",
" \n",
" std | \n",
" 97.34 | \n",
"
\n",
" \n",
" min | \n",
" 0.00 | \n",
"
\n",
" \n",
" 25% | \n",
" 0.00 | \n",
"
\n",
" \n",
" 50% | \n",
" 5.00 | \n",
"
\n",
" \n",
" 75% | \n",
" 25.00 | \n",
"
\n",
" \n",
" max | \n",
" 1,393.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" retweet_count\n",
"count 993.00\n",
"mean 36.67\n",
"std 97.34\n",
"min 0.00\n",
"25% 0.00\n",
"50% 5.00\n",
"75% 25.00\n",
"max 1,393.00"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"female_journalists_retweet_summary_df[['retweet_count']].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Of journalists retweeting male journalists, who is retweeted the most?"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" retweet_count | \n",
" retweeting_count | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 407013776 | \n",
" burgessev | \n",
" Everett, John B. | \n",
" Politico | \n",
" M | \n",
" 31010 | \n",
" 1,836.00 | \n",
" 289.00 | \n",
"
\n",
" \n",
" 21316253 | \n",
" ZekeJMiller | \n",
" Miller, Zeke J. | \n",
" Time Magazine | \n",
" M | \n",
" 198517 | \n",
" 1,723.00 | \n",
" 387.00 | \n",
"
\n",
" \n",
" 19107878 | \n",
" GlennThrush | \n",
" Thrush, Glenn H. | \n",
" New York Times | \n",
" M | \n",
" 308181 | \n",
" 1,577.00 | \n",
" 451.00 | \n",
"
\n",
" \n",
" 14529929 | \n",
" jaketapper | \n",
" Tapper, Jake | \n",
" CNN | \n",
" M | \n",
" 1305680 | \n",
" 1,459.00 | \n",
" 397.00 | \n",
"
\n",
" \n",
" 46557945 | \n",
" StevenTDennis | \n",
" Dennis, Steven T. | \n",
" Bloomberg News | \n",
" M | \n",
" 55762 | \n",
" 1,403.00 | \n",
" 280.00 | \n",
"
\n",
" \n",
" 39155029 | \n",
" mkraju | \n",
" Raju, Manu K. | \n",
" CNN | \n",
" M | \n",
" 88366 | \n",
" 1,359.00 | \n",
" 341.00 | \n",
"
\n",
" \n",
" 31127446 | \n",
" markknoller | \n",
" Knoller, Mark | \n",
" CBS News | \n",
" M | \n",
" 301474 | \n",
" 1,343.00 | \n",
" 341.00 | \n",
"
\n",
" \n",
" 398088661 | \n",
" MEPFuller | \n",
" Fuller, Matt E. | \n",
" Huffington Post | \n",
" M | \n",
" 77919 | \n",
" 1,324.00 | \n",
" 286.00 | \n",
"
\n",
" \n",
" 13524182 | \n",
" daveweigel | \n",
" Weigel, David | \n",
" Washington Post | \n",
" M | \n",
" 332344 | \n",
" 1,221.00 | \n",
" 306.00 | \n",
"
\n",
" \n",
" 14007532 | \n",
" frankthorp | \n",
" Thorp, Frank | \n",
" NBC News | \n",
" M | \n",
" 39798 | \n",
" 1,207.00 | \n",
" 334.00 | \n",
"
\n",
" \n",
" 19847765 | \n",
" sahilkapur | \n",
" Kapur, Sahil | \n",
" Bloomberg News | \n",
" M | \n",
" 69086 | \n",
" 1,186.00 | \n",
" 296.00 | \n",
"
\n",
" \n",
" 16187637 | \n",
" ChadPergram | \n",
" Pergram, Chad | \n",
" Fox News | \n",
" M | \n",
" 59305 | \n",
" 1,177.00 | \n",
" 297.00 | \n",
"
\n",
" \n",
" 104914594 | \n",
" Phil_Mattingly | \n",
" Mattingly, Phil | \n",
" CNN | \n",
" M | \n",
" 40119 | \n",
" 1,120.00 | \n",
" 314.00 | \n",
"
\n",
" \n",
" 16006592 | \n",
" BenjySarlin | \n",
" Sarlin, Benjamin | \n",
" NBC News | \n",
" M | \n",
" 78075 | \n",
" 1,039.00 | \n",
" 215.00 | \n",
"
\n",
" \n",
" 259395895 | \n",
" JohnJHarwood | \n",
" Harwood, John | \n",
" CNBC | \n",
" M | \n",
" 149040 | \n",
" 1,011.00 | \n",
" 277.00 | \n",
"
\n",
" \n",
" 21252618 | \n",
" JakeSherman | \n",
" Sherman, Jacob S. | \n",
" Politico | \n",
" M | \n",
" 81762 | \n",
" 943.00 | \n",
" 281.00 | \n",
"
\n",
" \n",
" 18678924 | \n",
" jmartNYT | \n",
" Martin, Jonathan | \n",
" New York Times | \n",
" M | \n",
" 197322 | \n",
" 916.00 | \n",
" 247.00 | \n",
"
\n",
" \n",
" 22771961 | \n",
" Acosta | \n",
" Acosta, Jim | \n",
" CNN | \n",
" M | \n",
" 350650 | \n",
" 829.00 | \n",
" 315.00 | \n",
"
\n",
" \n",
" 104299137 | \n",
" DavidMDrucker | \n",
" Drucker, David | \n",
" Washington Examiner | \n",
" M | \n",
" 35033 | \n",
" 770.00 | \n",
" 193.00 | \n",
"
\n",
" \n",
" 118130765 | \n",
" dylanlscott | \n",
" Scott, Dylan L. | \n",
" Stat News | \n",
" M | \n",
" 20122 | \n",
" 705.00 | \n",
" 155.00 | \n",
"
\n",
" \n",
" 3817401 | \n",
" ericgeller | \n",
" Geller, Eric | \n",
" Politico | \n",
" M | \n",
" 58173 | \n",
" 704.00 | \n",
" 225.00 | \n",
"
\n",
" \n",
" 217550862 | \n",
" BresPolitico | \n",
" Bresnahan, John | \n",
" Politico | \n",
" M | \n",
" 40562 | \n",
" 699.00 | \n",
" 223.00 | \n",
"
\n",
" \n",
" 22129280 | \n",
" jimsciutto | \n",
" Sciutto, James | \n",
" CNN | \n",
" M | \n",
" 172012 | \n",
" 688.00 | \n",
" 242.00 | \n",
"
\n",
" \n",
" 61734492 | \n",
" Fahrenthold | \n",
" Fahrenthold, David | \n",
" Washington Post | \n",
" M | \n",
" 451778 | \n",
" 654.00 | \n",
" 284.00 | \n",
"
\n",
" \n",
" 15463671 | \n",
" samstein | \n",
" Stein, Sam | \n",
" Huffington Post | \n",
" M | \n",
" 313211 | \n",
" 642.00 | \n",
" 229.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name organization gender \\\n",
"user_id \n",
"407013776 burgessev Everett, John B. Politico M \n",
"21316253 ZekeJMiller Miller, Zeke J. Time Magazine M \n",
"19107878 GlennThrush Thrush, Glenn H. New York Times M \n",
"14529929 jaketapper Tapper, Jake CNN M \n",
"46557945 StevenTDennis Dennis, Steven T. Bloomberg News M \n",
"39155029 mkraju Raju, Manu K. CNN M \n",
"31127446 markknoller Knoller, Mark CBS News M \n",
"398088661 MEPFuller Fuller, Matt E. Huffington Post M \n",
"13524182 daveweigel Weigel, David Washington Post M \n",
"14007532 frankthorp Thorp, Frank NBC News M \n",
"19847765 sahilkapur Kapur, Sahil Bloomberg News M \n",
"16187637 ChadPergram Pergram, Chad Fox News M \n",
"104914594 Phil_Mattingly Mattingly, Phil CNN M \n",
"16006592 BenjySarlin Sarlin, Benjamin NBC News M \n",
"259395895 JohnJHarwood Harwood, John CNBC M \n",
"21252618 JakeSherman Sherman, Jacob S. Politico M \n",
"18678924 jmartNYT Martin, Jonathan New York Times M \n",
"22771961 Acosta Acosta, Jim CNN M \n",
"104299137 DavidMDrucker Drucker, David Washington Examiner M \n",
"118130765 dylanlscott Scott, Dylan L. Stat News M \n",
"3817401 ericgeller Geller, Eric Politico M \n",
"217550862 BresPolitico Bresnahan, John Politico M \n",
"22129280 jimsciutto Sciutto, James CNN M \n",
"61734492 Fahrenthold Fahrenthold, David Washington Post M \n",
"15463671 samstein Stein, Sam Huffington Post M \n",
"\n",
" followers_count retweet_count retweeting_count \n",
"user_id \n",
"407013776 31010 1,836.00 289.00 \n",
"21316253 198517 1,723.00 387.00 \n",
"19107878 308181 1,577.00 451.00 \n",
"14529929 1305680 1,459.00 397.00 \n",
"46557945 55762 1,403.00 280.00 \n",
"39155029 88366 1,359.00 341.00 \n",
"31127446 301474 1,343.00 341.00 \n",
"398088661 77919 1,324.00 286.00 \n",
"13524182 332344 1,221.00 306.00 \n",
"14007532 39798 1,207.00 334.00 \n",
"19847765 69086 1,186.00 296.00 \n",
"16187637 59305 1,177.00 297.00 \n",
"104914594 40119 1,120.00 314.00 \n",
"16006592 78075 1,039.00 215.00 \n",
"259395895 149040 1,011.00 277.00 \n",
"21252618 81762 943.00 281.00 \n",
"18678924 197322 916.00 247.00 \n",
"22771961 350650 829.00 315.00 \n",
"104299137 35033 770.00 193.00 \n",
"118130765 20122 705.00 155.00 \n",
"3817401 58173 704.00 225.00 \n",
"217550862 40562 699.00 223.00 \n",
"22129280 172012 688.00 242.00 \n",
"61734492 451778 654.00 284.00 \n",
"15463671 313211 642.00 229.00 "
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"male_journalists_retweet_summary_df = journalists_retweet_summary_df[journalists_retweet_summary_df.gender == 'M']\n",
"male_journalists_retweet_summary_df.to_csv('output/male_journalists_retweeted_by_journalists.csv')\n",
"male_journalists_retweet_summary_df[journalist_retweet_summary_fields].head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### On average, how many times are male journalists retweeted by other journalists?"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" retweet_count | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 1,299.00 | \n",
"
\n",
" \n",
" mean | \n",
" 62.07 | \n",
"
\n",
" \n",
" std | \n",
" 178.04 | \n",
"
\n",
" \n",
" min | \n",
" 0.00 | \n",
"
\n",
" \n",
" 25% | \n",
" 1.00 | \n",
"
\n",
" \n",
" 50% | \n",
" 8.00 | \n",
"
\n",
" \n",
" 75% | \n",
" 39.50 | \n",
"
\n",
" \n",
" max | \n",
" 1,836.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" retweet_count\n",
"count 1,299.00\n",
"mean 62.07\n",
"std 178.04\n",
"min 0.00\n",
"25% 1.00\n",
"50% 8.00\n",
"75% 39.50\n",
"max 1,836.00"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"male_journalists_retweet_summary_df[['retweet_count']].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Of female journalists retweeting other journalists, who is retweeted the most?"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" retweet_count | \n",
" retweeting_count | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 407013776 | \n",
" burgessev | \n",
" Everett, John B. | \n",
" Politico | \n",
" M | \n",
" 31010 | \n",
" 748.00 | \n",
" 122.00 | \n",
"
\n",
" \n",
" 593813785 | \n",
" DonnaYoungDC | \n",
" Young, Donna | \n",
" S&P Global Market Intelligence | \n",
" F | \n",
" 5894 | \n",
" 704.00 | \n",
" 9.00 | \n",
"
\n",
" \n",
" 19186003 | \n",
" seungminkim | \n",
" Kim, Seung Min | \n",
" Politico | \n",
" F | \n",
" 33980 | \n",
" 572.00 | \n",
" 142.00 | \n",
"
\n",
" \n",
" 31127446 | \n",
" markknoller | \n",
" Knoller, Mark | \n",
" CBS News | \n",
" M | \n",
" 301474 | \n",
" 549.00 | \n",
" 140.00 | \n",
"
\n",
" \n",
" 21316253 | \n",
" ZekeJMiller | \n",
" Miller, Zeke J. | \n",
" Time Magazine | \n",
" M | \n",
" 198517 | \n",
" 516.00 | \n",
" 149.00 | \n",
"
\n",
" \n",
" 46557945 | \n",
" StevenTDennis | \n",
" Dennis, Steven T. | \n",
" Bloomberg News | \n",
" M | \n",
" 55762 | \n",
" 503.00 | \n",
" 97.00 | \n",
"
\n",
" \n",
" 14007532 | \n",
" frankthorp | \n",
" Thorp, Frank | \n",
" NBC News | \n",
" M | \n",
" 39798 | \n",
" 470.00 | \n",
" 140.00 | \n",
"
\n",
" \n",
" 19107878 | \n",
" GlennThrush | \n",
" Thrush, Glenn H. | \n",
" New York Times | \n",
" M | \n",
" 308181 | \n",
" 463.00 | \n",
" 165.00 | \n",
"
\n",
" \n",
" 33653195 | \n",
" ericawerner | \n",
" Werner, Erica | \n",
" Associated Press | \n",
" F | \n",
" 14049 | \n",
" 452.00 | \n",
" 119.00 | \n",
"
\n",
" \n",
" 398088661 | \n",
" MEPFuller | \n",
" Fuller, Matt E. | \n",
" Huffington Post | \n",
" M | \n",
" 77919 | \n",
" 447.00 | \n",
" 116.00 | \n",
"
\n",
" \n",
" 39155029 | \n",
" mkraju | \n",
" Raju, Manu K. | \n",
" CNN | \n",
" M | \n",
" 88366 | \n",
" 403.00 | \n",
" 132.00 | \n",
"
\n",
" \n",
" 14529929 | \n",
" jaketapper | \n",
" Tapper, Jake | \n",
" CNN | \n",
" M | \n",
" 1305680 | \n",
" 388.00 | \n",
" 158.00 | \n",
"
\n",
" \n",
" 104914594 | \n",
" Phil_Mattingly | \n",
" Mattingly, Phil | \n",
" CNN | \n",
" M | \n",
" 40119 | \n",
" 372.00 | \n",
" 129.00 | \n",
"
\n",
" \n",
" 118130765 | \n",
" dylanlscott | \n",
" Scott, Dylan L. | \n",
" Stat News | \n",
" M | \n",
" 20122 | \n",
" 367.00 | \n",
" 67.00 | \n",
"
\n",
" \n",
" 16187637 | \n",
" ChadPergram | \n",
" Pergram, Chad | \n",
" Fox News | \n",
" M | \n",
" 59305 | \n",
" 365.00 | \n",
" 122.00 | \n",
"
\n",
" \n",
" 12354832 | \n",
" kasie | \n",
" Hunt, Kasie | \n",
" NBC News | \n",
" F | \n",
" 187357 | \n",
" 344.00 | \n",
" 164.00 | \n",
"
\n",
" \n",
" 19847765 | \n",
" sahilkapur | \n",
" Kapur, Sahil | \n",
" Bloomberg News | \n",
" M | \n",
" 69086 | \n",
" 338.00 | \n",
" 103.00 | \n",
"
\n",
" \n",
" 167024520 | \n",
" rachaelmbade | \n",
" Bade, Rachel M. | \n",
" Politico | \n",
" F | \n",
" 30164 | \n",
" 303.00 | \n",
" 59.00 | \n",
"
\n",
" \n",
" 21252618 | \n",
" JakeSherman | \n",
" Sherman, Jacob S. | \n",
" Politico | \n",
" M | \n",
" 81762 | \n",
" 302.00 | \n",
" 106.00 | \n",
"
\n",
" \n",
" 22891564 | \n",
" chrisgeidner | \n",
" Geidner, Chris | \n",
" BuzzFeed | \n",
" M | \n",
" 83316 | \n",
" 287.00 | \n",
" 61.00 | \n",
"
\n",
" \n",
" 70511174 | \n",
" Hadas_Gold | \n",
" Gold, Hadas | \n",
" Politico | \n",
" F | \n",
" 45221 | \n",
" 279.00 | \n",
" 111.00 | \n",
"
\n",
" \n",
" 22771961 | \n",
" Acosta | \n",
" Acosta, Jim | \n",
" CNN | \n",
" M | \n",
" 350650 | \n",
" 265.00 | \n",
" 119.00 | \n",
"
\n",
" \n",
" 139738464 | \n",
" mj_lee | \n",
" Lee, MJ | \n",
" CNN | \n",
" F | \n",
" 31940 | \n",
" 259.00 | \n",
" 79.00 | \n",
"
\n",
" \n",
" 217550862 | \n",
" BresPolitico | \n",
" Bresnahan, John | \n",
" Politico | \n",
" M | \n",
" 40562 | \n",
" 256.00 | \n",
" 82.00 | \n",
"
\n",
" \n",
" 61734492 | \n",
" Fahrenthold | \n",
" Fahrenthold, David | \n",
" Washington Post | \n",
" M | \n",
" 451778 | \n",
" 253.00 | \n",
" 115.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name organization \\\n",
"user_id \n",
"407013776 burgessev Everett, John B. Politico \n",
"593813785 DonnaYoungDC Young, Donna S&P Global Market Intelligence \n",
"19186003 seungminkim Kim, Seung Min Politico \n",
"31127446 markknoller Knoller, Mark CBS News \n",
"21316253 ZekeJMiller Miller, Zeke J. Time Magazine \n",
"46557945 StevenTDennis Dennis, Steven T. Bloomberg News \n",
"14007532 frankthorp Thorp, Frank NBC News \n",
"19107878 GlennThrush Thrush, Glenn H. New York Times \n",
"33653195 ericawerner Werner, Erica Associated Press \n",
"398088661 MEPFuller Fuller, Matt E. Huffington Post \n",
"39155029 mkraju Raju, Manu K. CNN \n",
"14529929 jaketapper Tapper, Jake CNN \n",
"104914594 Phil_Mattingly Mattingly, Phil CNN \n",
"118130765 dylanlscott Scott, Dylan L. Stat News \n",
"16187637 ChadPergram Pergram, Chad Fox News \n",
"12354832 kasie Hunt, Kasie NBC News \n",
"19847765 sahilkapur Kapur, Sahil Bloomberg News \n",
"167024520 rachaelmbade Bade, Rachel M. Politico \n",
"21252618 JakeSherman Sherman, Jacob S. Politico \n",
"22891564 chrisgeidner Geidner, Chris BuzzFeed \n",
"70511174 Hadas_Gold Gold, Hadas Politico \n",
"22771961 Acosta Acosta, Jim CNN \n",
"139738464 mj_lee Lee, MJ CNN \n",
"217550862 BresPolitico Bresnahan, John Politico \n",
"61734492 Fahrenthold Fahrenthold, David Washington Post \n",
"\n",
" gender followers_count retweet_count retweeting_count \n",
"user_id \n",
"407013776 M 31010 748.00 122.00 \n",
"593813785 F 5894 704.00 9.00 \n",
"19186003 F 33980 572.00 142.00 \n",
"31127446 M 301474 549.00 140.00 \n",
"21316253 M 198517 516.00 149.00 \n",
"46557945 M 55762 503.00 97.00 \n",
"14007532 M 39798 470.00 140.00 \n",
"19107878 M 308181 463.00 165.00 \n",
"33653195 F 14049 452.00 119.00 \n",
"398088661 M 77919 447.00 116.00 \n",
"39155029 M 88366 403.00 132.00 \n",
"14529929 M 1305680 388.00 158.00 \n",
"104914594 M 40119 372.00 129.00 \n",
"118130765 M 20122 367.00 67.00 \n",
"16187637 M 59305 365.00 122.00 \n",
"12354832 F 187357 344.00 164.00 \n",
"19847765 M 69086 338.00 103.00 \n",
"167024520 F 30164 303.00 59.00 \n",
"21252618 M 81762 302.00 106.00 \n",
"22891564 M 83316 287.00 61.00 \n",
"70511174 F 45221 279.00 111.00 \n",
"22771961 M 350650 265.00 119.00 \n",
"139738464 F 31940 259.00 79.00 \n",
"217550862 M 40562 256.00 82.00 \n",
"61734492 M 451778 253.00 115.00 "
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalists_retweeted_by_female_summary_df = journalist_retweet_summary(journalists_retweet_df[journalists_retweet_df.gender == 'F'])\n",
"journalists_retweeted_by_female_summary_df.to_csv('output/journalists_retweeted_by_female_journalists.csv')\n",
"journalists_retweeted_by_female_summary_df[journalist_retweet_summary_fields].head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Of female journalists retweeting other journalists, how many are male / female?"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
"
\n",
" \n",
" \n",
" \n",
" M | \n",
" 25410 | \n",
" 59.6% | \n",
"
\n",
" \n",
" F | \n",
" 17228 | \n",
" 40.4% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage\n",
"M 25410 59.6%\n",
"F 17228 40.4%"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalist_retweet_gender_summary(journalists_retweet_df[journalists_retweet_df.gender == 'F'])\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### On average, how many times are male / female journalists retweeted by female journalists? "
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" retweet_gender | \n",
" F | \n",
" M | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 736.00 | \n",
" 771.00 | \n",
"
\n",
" \n",
" mean | \n",
" 23.41 | \n",
" 32.96 | \n",
"
\n",
" \n",
" std | \n",
" 51.31 | \n",
" 83.17 | \n",
"
\n",
" \n",
" min | \n",
" 1.00 | \n",
" 1.00 | \n",
"
\n",
" \n",
" 25% | \n",
" 3.00 | \n",
" 4.00 | \n",
"
\n",
" \n",
" 50% | \n",
" 8.00 | \n",
" 10.00 | \n",
"
\n",
" \n",
" 75% | \n",
" 23.00 | \n",
" 32.00 | \n",
"
\n",
" \n",
" max | \n",
" 857.00 | \n",
" 1,779.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"retweet_gender F M\n",
"count 736.00 771.00\n",
"mean 23.41 32.96\n",
"std 51.31 83.17\n",
"min 1.00 1.00\n",
"25% 3.00 4.00\n",
"50% 8.00 10.00\n",
"75% 23.00 32.00\n",
"max 857.00 1,779.00"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"female_journalists_retweet_df = journalists_retweet_df[journalists_retweet_df.gender == 'F']\n",
"female_journalists_retweet_df.groupby(['user_id', 'retweet_gender']).size().unstack().describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Of male journalists retweeting other journalists, who is retweeted the most?"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" retweet_count | \n",
" retweeting_count | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 21316253 | \n",
" ZekeJMiller | \n",
" Miller, Zeke J. | \n",
" Time Magazine | \n",
" M | \n",
" 198517 | \n",
" 1,207.00 | \n",
" 238.00 | \n",
"
\n",
" \n",
" 19107878 | \n",
" GlennThrush | \n",
" Thrush, Glenn H. | \n",
" New York Times | \n",
" M | \n",
" 308181 | \n",
" 1,114.00 | \n",
" 286.00 | \n",
"
\n",
" \n",
" 407013776 | \n",
" burgessev | \n",
" Everett, John B. | \n",
" Politico | \n",
" M | \n",
" 31010 | \n",
" 1,088.00 | \n",
" 167.00 | \n",
"
\n",
" \n",
" 14529929 | \n",
" jaketapper | \n",
" Tapper, Jake | \n",
" CNN | \n",
" M | \n",
" 1305680 | \n",
" 1,071.00 | \n",
" 239.00 | \n",
"
\n",
" \n",
" 13524182 | \n",
" daveweigel | \n",
" Weigel, David | \n",
" Washington Post | \n",
" M | \n",
" 332344 | \n",
" 975.00 | \n",
" 209.00 | \n",
"
\n",
" \n",
" 39155029 | \n",
" mkraju | \n",
" Raju, Manu K. | \n",
" CNN | \n",
" M | \n",
" 88366 | \n",
" 956.00 | \n",
" 209.00 | \n",
"
\n",
" \n",
" 46557945 | \n",
" StevenTDennis | \n",
" Dennis, Steven T. | \n",
" Bloomberg News | \n",
" M | \n",
" 55762 | \n",
" 900.00 | \n",
" 183.00 | \n",
"
\n",
" \n",
" 398088661 | \n",
" MEPFuller | \n",
" Fuller, Matt E. | \n",
" Huffington Post | \n",
" M | \n",
" 77919 | \n",
" 877.00 | \n",
" 170.00 | \n",
"
\n",
" \n",
" 19847765 | \n",
" sahilkapur | \n",
" Kapur, Sahil | \n",
" Bloomberg News | \n",
" M | \n",
" 69086 | \n",
" 848.00 | \n",
" 193.00 | \n",
"
\n",
" \n",
" 16006592 | \n",
" BenjySarlin | \n",
" Sarlin, Benjamin | \n",
" NBC News | \n",
" M | \n",
" 78075 | \n",
" 828.00 | \n",
" 141.00 | \n",
"
\n",
" \n",
" 19186003 | \n",
" seungminkim | \n",
" Kim, Seung Min | \n",
" Politico | \n",
" F | \n",
" 33980 | \n",
" 821.00 | \n",
" 185.00 | \n",
"
\n",
" \n",
" 16187637 | \n",
" ChadPergram | \n",
" Pergram, Chad | \n",
" Fox News | \n",
" M | \n",
" 59305 | \n",
" 812.00 | \n",
" 175.00 | \n",
"
\n",
" \n",
" 31127446 | \n",
" markknoller | \n",
" Knoller, Mark | \n",
" CBS News | \n",
" M | \n",
" 301474 | \n",
" 794.00 | \n",
" 201.00 | \n",
"
\n",
" \n",
" 259395895 | \n",
" JohnJHarwood | \n",
" Harwood, John | \n",
" CNBC | \n",
" M | \n",
" 149040 | \n",
" 777.00 | \n",
" 196.00 | \n",
"
\n",
" \n",
" 104914594 | \n",
" Phil_Mattingly | \n",
" Mattingly, Phil | \n",
" CNN | \n",
" M | \n",
" 40119 | \n",
" 748.00 | \n",
" 185.00 | \n",
"
\n",
" \n",
" 14007532 | \n",
" frankthorp | \n",
" Thorp, Frank | \n",
" NBC News | \n",
" M | \n",
" 39798 | \n",
" 737.00 | \n",
" 194.00 | \n",
"
\n",
" \n",
" 18678924 | \n",
" jmartNYT | \n",
" Martin, Jonathan | \n",
" New York Times | \n",
" M | \n",
" 197322 | \n",
" 726.00 | \n",
" 167.00 | \n",
"
\n",
" \n",
" 21252618 | \n",
" JakeSherman | \n",
" Sherman, Jacob S. | \n",
" Politico | \n",
" M | \n",
" 81762 | \n",
" 641.00 | \n",
" 175.00 | \n",
"
\n",
" \n",
" 104299137 | \n",
" DavidMDrucker | \n",
" Drucker, David | \n",
" Washington Examiner | \n",
" M | \n",
" 35033 | \n",
" 583.00 | \n",
" 127.00 | \n",
"
\n",
" \n",
" 70511174 | \n",
" Hadas_Gold | \n",
" Gold, Hadas | \n",
" Politico | \n",
" F | \n",
" 45221 | \n",
" 570.00 | \n",
" 195.00 | \n",
"
\n",
" \n",
" 12354832 | \n",
" kasie | \n",
" Hunt, Kasie | \n",
" NBC News | \n",
" F | \n",
" 187357 | \n",
" 565.00 | \n",
" 224.00 | \n",
"
\n",
" \n",
" 22771961 | \n",
" Acosta | \n",
" Acosta, Jim | \n",
" CNN | \n",
" M | \n",
" 350650 | \n",
" 564.00 | \n",
" 196.00 | \n",
"
\n",
" \n",
" 19580890 | \n",
" LeeCamp | \n",
" Camp, Lee | \n",
" RTTV America | \n",
" M | \n",
" 67601 | \n",
" 560.00 | \n",
" 6.00 | \n",
"
\n",
" \n",
" 3817401 | \n",
" ericgeller | \n",
" Geller, Eric | \n",
" Politico | \n",
" M | \n",
" 58173 | \n",
" 524.00 | \n",
" 149.00 | \n",
"
\n",
" \n",
" 22129280 | \n",
" jimsciutto | \n",
" Sciutto, James | \n",
" CNN | \n",
" M | \n",
" 172012 | \n",
" 507.00 | \n",
" 151.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name organization gender \\\n",
"user_id \n",
"21316253 ZekeJMiller Miller, Zeke J. Time Magazine M \n",
"19107878 GlennThrush Thrush, Glenn H. New York Times M \n",
"407013776 burgessev Everett, John B. Politico M \n",
"14529929 jaketapper Tapper, Jake CNN M \n",
"13524182 daveweigel Weigel, David Washington Post M \n",
"39155029 mkraju Raju, Manu K. CNN M \n",
"46557945 StevenTDennis Dennis, Steven T. Bloomberg News M \n",
"398088661 MEPFuller Fuller, Matt E. Huffington Post M \n",
"19847765 sahilkapur Kapur, Sahil Bloomberg News M \n",
"16006592 BenjySarlin Sarlin, Benjamin NBC News M \n",
"19186003 seungminkim Kim, Seung Min Politico F \n",
"16187637 ChadPergram Pergram, Chad Fox News M \n",
"31127446 markknoller Knoller, Mark CBS News M \n",
"259395895 JohnJHarwood Harwood, John CNBC M \n",
"104914594 Phil_Mattingly Mattingly, Phil CNN M \n",
"14007532 frankthorp Thorp, Frank NBC News M \n",
"18678924 jmartNYT Martin, Jonathan New York Times M \n",
"21252618 JakeSherman Sherman, Jacob S. Politico M \n",
"104299137 DavidMDrucker Drucker, David Washington Examiner M \n",
"70511174 Hadas_Gold Gold, Hadas Politico F \n",
"12354832 kasie Hunt, Kasie NBC News F \n",
"22771961 Acosta Acosta, Jim CNN M \n",
"19580890 LeeCamp Camp, Lee RTTV America M \n",
"3817401 ericgeller Geller, Eric Politico M \n",
"22129280 jimsciutto Sciutto, James CNN M \n",
"\n",
" followers_count retweet_count retweeting_count \n",
"user_id \n",
"21316253 198517 1,207.00 238.00 \n",
"19107878 308181 1,114.00 286.00 \n",
"407013776 31010 1,088.00 167.00 \n",
"14529929 1305680 1,071.00 239.00 \n",
"13524182 332344 975.00 209.00 \n",
"39155029 88366 956.00 209.00 \n",
"46557945 55762 900.00 183.00 \n",
"398088661 77919 877.00 170.00 \n",
"19847765 69086 848.00 193.00 \n",
"16006592 78075 828.00 141.00 \n",
"19186003 33980 821.00 185.00 \n",
"16187637 59305 812.00 175.00 \n",
"31127446 301474 794.00 201.00 \n",
"259395895 149040 777.00 196.00 \n",
"104914594 40119 748.00 185.00 \n",
"14007532 39798 737.00 194.00 \n",
"18678924 197322 726.00 167.00 \n",
"21252618 81762 641.00 175.00 \n",
"104299137 35033 583.00 127.00 \n",
"70511174 45221 570.00 195.00 \n",
"12354832 187357 565.00 224.00 \n",
"22771961 350650 564.00 196.00 \n",
"19580890 67601 560.00 6.00 \n",
"3817401 58173 524.00 149.00 \n",
"22129280 172012 507.00 151.00 "
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalists_retweeted_by_male_summary_df = journalist_retweet_summary(journalists_retweet_df[journalists_retweet_df.gender == 'M'])\n",
"journalists_retweeted_by_male_summary_df.to_csv('output/journalists_retweeted_by_male_journalists.csv')\n",
"journalists_retweeted_by_male_summary_df[journalist_retweet_summary_fields].head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Of male journalists retweeting other journalists, how many are male / female?"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
"
\n",
" \n",
" \n",
" \n",
" M | \n",
" 55224 | \n",
" 74.2% | \n",
"
\n",
" \n",
" F | \n",
" 19186 | \n",
" 25.8% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage\n",
"M 55224 74.2%\n",
"F 19186 25.8%"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalist_retweet_gender_summary(journalists_retweet_df[journalists_retweet_df.gender == 'M'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### On average, how many times are male / female journalists retweeted by male journalists?"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" retweet_gender | \n",
" F | \n",
" M | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 886.00 | \n",
" 1,002.00 | \n",
"
\n",
" \n",
" mean | \n",
" 21.65 | \n",
" 55.11 | \n",
"
\n",
" \n",
" std | \n",
" 38.69 | \n",
" 118.80 | \n",
"
\n",
" \n",
" min | \n",
" 1.00 | \n",
" 1.00 | \n",
"
\n",
" \n",
" 25% | \n",
" 3.00 | \n",
" 4.00 | \n",
"
\n",
" \n",
" 50% | \n",
" 8.00 | \n",
" 15.00 | \n",
"
\n",
" \n",
" 75% | \n",
" 23.00 | \n",
" 52.00 | \n",
"
\n",
" \n",
" max | \n",
" 442.00 | \n",
" 1,414.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"retweet_gender F M\n",
"count 886.00 1,002.00\n",
"mean 21.65 55.11\n",
"std 38.69 118.80\n",
"min 1.00 1.00\n",
"25% 3.00 4.00\n",
"50% 8.00 15.00\n",
"75% 23.00 52.00\n",
"max 442.00 1,414.00"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"male_journalists_retweet_df = journalists_retweet_df[journalists_retweet_df.gender == 'M']\n",
"male_journalists_retweet_df.groupby(['user_id', 'retweet_gender']).size().unstack().describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Reply data prep"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load replies from tweets"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:root:Loading from tweets/642bf140607547cb9d4c6b1fc49772aa_001.json.gz\n",
"DEBUG:root:Loaded 50000\n",
"DEBUG:root:Loaded 100000\n",
"DEBUG:root:Loaded 150000\n",
"DEBUG:root:Loaded 200000\n",
"DEBUG:root:Loaded 250000\n",
"INFO:root:Loading from tweets/9f7ed17c16a1494c8690b4053609539d_001.json.gz\n",
"DEBUG:root:Loaded 300000\n",
"DEBUG:root:Loaded 350000\n",
"DEBUG:root:Loaded 400000\n",
"DEBUG:root:Loaded 450000\n",
"DEBUG:root:Loaded 500000\n",
"INFO:root:Loading from tweets/41feff28312c433ab004cd822212f4c2_001.json.gz\n",
"DEBUG:root:Loaded 550000\n",
"DEBUG:root:Loaded 600000\n",
"DEBUG:root:Loaded 650000\n",
"DEBUG:root:Loaded 700000\n",
"DEBUG:root:Loaded 750000\n",
"DEBUG:root:Loaded 800000\n"
]
},
{
"data": {
"text/plain": [
"tweet_id 126254\n",
"user_id 126254\n",
"screen_name 126254\n",
"reply_to_user_id 126254\n",
"reply_to_screen_name 126254\n",
"tweet_created_at 126254\n",
"dtype: int64"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Simply the tweet on load\n",
"def reply_transform(tweet):\n",
" if tweet_type(tweet) == 'reply': \n",
" return {\n",
" 'tweet_id': tweet['id_str'],\n",
" 'user_id': tweet['user']['id_str'],\n",
" 'screen_name': tweet['user']['screen_name'],\n",
" 'reply_to_user_id': tweet['in_reply_to_user_id_str'],\n",
" 'reply_to_screen_name': tweet['in_reply_to_screen_name'],\n",
" 'tweet_created_at': date_parse(tweet['created_at']) \n",
" }\n",
" return None\n",
"\n",
"base_reply_df = load_tweet_df(reply_transform, ['tweet_id', 'user_id', 'screen_name', 'reply_to_user_id',\n",
" 'reply_to_screen_name', 'tweet_created_at'],\n",
" dedupe_columns=['tweet_id'])\n",
"\n",
"base_reply_df.count()"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" tweet_id | \n",
" user_id | \n",
" screen_name | \n",
" reply_to_user_id | \n",
" reply_to_screen_name | \n",
" tweet_created_at | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 872495244062978048 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 59331128 | \n",
" PhilipRucker | \n",
" 2017-06-07 16:47:31+00:00 | \n",
"
\n",
" \n",
" 1 | \n",
" 872473152160399361 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 2856617865 | \n",
" RPhuket | \n",
" 2017-06-07 15:19:43+00:00 | \n",
"
\n",
" \n",
" 2 | \n",
" 872266930341728256 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 1854392378 | \n",
" hrm_1973 | \n",
" 2017-06-07 01:40:16+00:00 | \n",
"
\n",
" \n",
" 3 | \n",
" 872250430109175809 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 390985197 | \n",
" MikeBastasch | \n",
" 2017-06-07 00:34:42+00:00 | \n",
"
\n",
" \n",
" 4 | \n",
" 872218322187767808 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 407013776 | \n",
" burgessev | \n",
" 2017-06-06 22:27:07+00:00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" tweet_id user_id screen_name reply_to_user_id \\\n",
"0 872495244062978048 327862439 jonathanvswan 59331128 \n",
"1 872473152160399361 327862439 jonathanvswan 2856617865 \n",
"2 872266930341728256 327862439 jonathanvswan 1854392378 \n",
"3 872250430109175809 327862439 jonathanvswan 390985197 \n",
"4 872218322187767808 327862439 jonathanvswan 407013776 \n",
"\n",
" reply_to_screen_name tweet_created_at \n",
"0 PhilipRucker 2017-06-07 16:47:31+00:00 \n",
"1 RPhuket 2017-06-07 15:19:43+00:00 \n",
"2 hrm_1973 2017-06-07 01:40:16+00:00 \n",
"3 MikeBastasch 2017-06-07 00:34:42+00:00 \n",
"4 burgessev 2017-06-06 22:27:07+00:00 "
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"base_reply_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Add gender of replier"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tweet_id 126254\n",
"user_id 126254\n",
"screen_name 126254\n",
"reply_to_user_id 126254\n",
"reply_to_screen_name 126254\n",
"tweet_created_at 126254\n",
"gender 126254\n",
"dtype: int64"
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reply_df = base_reply_df.join(user_summary_df['gender'], on='user_id')\n",
"reply_df.count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Distinct replied to users"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"31034"
]
},
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reply_df['reply_to_user_id'].unique().size"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Limit to beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tweet_id 43390\n",
"user_id 43390\n",
"screen_name 43390\n",
"reply_to_user_id 43390\n",
"reply_to_screen_name 43390\n",
"tweet_created_at 43390\n",
"gender 43390\n",
"reply_to_gender 43390\n",
"dtype: int64"
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalists_reply_df = reply_df.join(user_summary_df['gender'], how='inner', on='reply_to_user_id', rsuffix='_reply')\n",
"journalists_reply_df.rename(columns = {'gender_reply': 'reply_to_gender'}, inplace=True)\n",
"journalists_reply_df.count()"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" tweet_id | \n",
" user_id | \n",
" screen_name | \n",
" reply_to_user_id | \n",
" reply_to_screen_name | \n",
" tweet_created_at | \n",
" gender | \n",
" reply_to_gender | \n",
"
\n",
" \n",
" \n",
" \n",
" 4 | \n",
" 872218322187767808 | \n",
" 327862439 | \n",
" jonathanvswan | \n",
" 407013776 | \n",
" burgessev | \n",
" 2017-06-06 22:27:07+00:00 | \n",
" M | \n",
" M | \n",
"
\n",
" \n",
" 234 | \n",
" 871795694020984833 | \n",
" 195840597 | \n",
" JNicholsonInDC | \n",
" 407013776 | \n",
" burgessev | \n",
" 2017-06-05 18:27:45+00:00 | \n",
" M | \n",
" M | \n",
"
\n",
" \n",
" 572 | \n",
" 870371176866041856 | \n",
" 163589845 | \n",
" PoliticoKevin | \n",
" 407013776 | \n",
" burgessev | \n",
" 2017-06-01 20:07:13+00:00 | \n",
" M | \n",
" M | \n",
"
\n",
" \n",
" 728 | \n",
" 870659438901940224 | \n",
" 115564212 | \n",
" IsaacDovere | \n",
" 407013776 | \n",
" burgessev | \n",
" 2017-06-02 15:12:40+00:00 | \n",
" M | \n",
" M | \n",
"
\n",
" \n",
" 731 | \n",
" 872473152143667201 | \n",
" 167024520 | \n",
" rachaelmbade | \n",
" 407013776 | \n",
" burgessev | \n",
" 2017-06-07 15:19:43+00:00 | \n",
" F | \n",
" M | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" tweet_id user_id screen_name reply_to_user_id \\\n",
"4 872218322187767808 327862439 jonathanvswan 407013776 \n",
"234 871795694020984833 195840597 JNicholsonInDC 407013776 \n",
"572 870371176866041856 163589845 PoliticoKevin 407013776 \n",
"728 870659438901940224 115564212 IsaacDovere 407013776 \n",
"731 872473152143667201 167024520 rachaelmbade 407013776 \n",
"\n",
" reply_to_screen_name tweet_created_at gender reply_to_gender \n",
"4 burgessev 2017-06-06 22:27:07+00:00 M M \n",
"234 burgessev 2017-06-05 18:27:45+00:00 M M \n",
"572 burgessev 2017-06-01 20:07:13+00:00 M M \n",
"728 burgessev 2017-06-02 15:12:40+00:00 M M \n",
"731 burgessev 2017-06-07 15:19:43+00:00 F M "
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalists_reply_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Functions for summarizing replies by beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [],
"source": [
"# Gender of beltway journalists replied to by beltway journalists\n",
"def journalist_reply_gender_summary(reply_df):\n",
" return pd.DataFrame({'count':reply_df.reply_to_gender.value_counts(), \n",
" 'percentage': reply_df.reply_to_gender.value_counts(normalize=True).mul(100).round(1).astype(str) + '%'})\n",
"\n",
"# Reply to beltway journalists by beltway journalists\n",
"def journalist_reply_summary(reply_df):\n",
" # Reply to count\n",
" reply_count_df = pd.DataFrame(reply_df.reply_to_user_id.value_counts().rename('reply_to_count'))\n",
" \n",
" # Replying to users. That is, the number of unique users replying to each user.\n",
" reply_to_user_id_per_user_df = reply_df[['reply_to_user_id', 'user_id']].drop_duplicates()\n",
" replying_to_user_count_df = pd.DataFrame(reply_to_user_id_per_user_df.groupby('reply_to_user_id').size(), columns=['replying_count'])\n",
" replying_to_user_count_df.index.name = 'user_id'\n",
" \n",
" # Join with user summary\n",
" journalist_reply_summary_df = user_summary_df.join([reply_count_df, replying_to_user_count_df])\n",
" journalist_reply_summary_df.fillna(0, inplace=True)\n",
" journalist_reply_summary_df = journalist_reply_summary_df.sort_values(['reply_to_count', 'replying_count', 'followers_count'], ascending=False)\n",
" return journalist_reply_summary_df\n",
"\n",
"# Gender of top journalists replied to by beltway journalists\n",
"def top_journalist_reply_gender_summary(reply_summary_df, replying_count_threshold=0, head=100):\n",
" top_reply_summary_df = reply_summary_df[reply_summary_df.replying_count > replying_count_threshold].head(head)\n",
" return pd.DataFrame({'count': top_reply_summary_df.gender.value_counts(), \n",
" 'percentage': top_reply_summary_df.gender.value_counts(normalize=True).mul(100).round(1).astype(str) + '%'})\n",
"\n",
"# Fields for displaying journalist mention summaries\n",
"journalist_reply_summary_fields = ['screen_name', 'name', 'organization', 'gender', 'followers_count', 'reply_to_count', 'replying_count']\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Reply analysis\n",
"*Note that for each of these, the complete list is being written to CSV in the output directory.*\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Replies by gender"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" reply | \n",
" percentage | \n",
"
\n",
" \n",
" gender | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" F | \n",
" 31,831.00 | \n",
" 25.2% | \n",
"
\n",
" \n",
" M | \n",
" 94,423.00 | \n",
" 74.8% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" reply percentage\n",
"gender \n",
"F 31,831.00 25.2%\n",
"M 94,423.00 74.8%"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"replies_by_gender_df = user_summary_df[['gender', 'reply']].groupby('gender').sum()\n",
"replies_by_gender_df['percentage'] = replies_by_gender_df.reply.div(replies_by_gender_df.reply.sum()).mul(100).round(1).astype(str) + '%'\n",
"replies_by_gender_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Most prolific repliers"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" tweet_count | \n",
" reply | \n",
" tweets_in_dataset | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 3817401 | \n",
" ericgeller | \n",
" Geller, Eric | \n",
" Politico | \n",
" M | \n",
" 58173 | \n",
" 208763 | \n",
" 9,033.00 | \n",
" 11,432.00 | \n",
"
\n",
" \n",
" 22891564 | \n",
" chrisgeidner | \n",
" Geidner, Chris | \n",
" BuzzFeed | \n",
" M | \n",
" 83316 | \n",
" 205504 | \n",
" 3,917.00 | \n",
" 6,244.00 | \n",
"
\n",
" \n",
" 118130765 | \n",
" dylanlscott | \n",
" Scott, Dylan L. | \n",
" Stat News | \n",
" M | \n",
" 20122 | \n",
" 42497 | \n",
" 2,040.00 | \n",
" 3,960.00 | \n",
"
\n",
" \n",
" 19576571 | \n",
" JaredRizzi | \n",
" Rizzi, Jared | \n",
" Sirius XM Satellite Radio | \n",
" M | \n",
" 13545 | \n",
" 41620 | \n",
" 1,949.00 | \n",
" 5,567.00 | \n",
"
\n",
" \n",
" 275207082 | \n",
" AlexParkerDC | \n",
" Parker, Alexander M. | \n",
" Bloomberg BNA | \n",
" M | \n",
" 3828 | \n",
" 142150 | \n",
" 1,714.00 | \n",
" 3,983.00 | \n",
"
\n",
" \n",
" 63717541 | \n",
" phillyrich1 | \n",
" Weinstein, Richard | \n",
" C–SPAN | \n",
" M | \n",
" 3827 | \n",
" 27341 | \n",
" 1,532.00 | \n",
" 2,261.00 | \n",
"
\n",
" \n",
" 203226736 | \n",
" SharylAttkisson | \n",
" Attkisson, Sharyl | \n",
" Sinclair Broadcast Group | \n",
" F | \n",
" 132973 | \n",
" 24539 | \n",
" 1,458.00 | \n",
" 2,154.00 | \n",
"
\n",
" \n",
" 16812908 | \n",
" crousselle | \n",
" Rousselle, Christine | \n",
" Townhall | \n",
" F | \n",
" 5327 | \n",
" 118713 | \n",
" 1,089.00 | \n",
" 2,351.00 | \n",
"
\n",
" \n",
" 14529929 | \n",
" jaketapper | \n",
" Tapper, Jake | \n",
" CNN | \n",
" M | \n",
" 1305680 | \n",
" 148143 | \n",
" 1,040.00 | \n",
" 5,078.00 | \n",
"
\n",
" \n",
" 46557945 | \n",
" StevenTDennis | \n",
" Dennis, Steven T. | \n",
" Bloomberg News | \n",
" M | \n",
" 55762 | \n",
" 67526 | \n",
" 1,026.00 | \n",
" 3,066.00 | \n",
"
\n",
" \n",
" 27882000 | \n",
" jamiedupree | \n",
" Dupree, Jamie | \n",
" Cox Broadcasting | \n",
" M | \n",
" 140848 | \n",
" 46181 | \n",
" 993.00 | \n",
" 2,108.00 | \n",
"
\n",
" \n",
" 3372900155 | \n",
" samtayrey | \n",
" Reyes, Samantha | \n",
" CNN | \n",
" F | \n",
" 10344 | \n",
" 4783 | \n",
" 933.00 | \n",
" 1,349.00 | \n",
"
\n",
" \n",
" 132482136 | \n",
" Yaro_RT | \n",
" Yaroshevsky, Alexey | \n",
" RTTV America | \n",
" M | \n",
" 12968 | \n",
" 26795 | \n",
" 910.00 | \n",
" 1,199.00 | \n",
"
\n",
" \n",
" 46955476 | \n",
" GrahamDavidA | \n",
" Graham, David A. | \n",
" The Atlantic | \n",
" M | \n",
" 22112 | \n",
" 93391 | \n",
" 908.00 | \n",
" 1,566.00 | \n",
"
\n",
" \n",
" 16459325 | \n",
" ryanbeckwith | \n",
" Beckwith, Ryan Teague | \n",
" Time Magazine | \n",
" M | \n",
" 20947 | \n",
" 92203 | \n",
" 901.00 | \n",
" 5,187.00 | \n",
"
\n",
" \n",
" 25702314 | \n",
" EricMGarcia | \n",
" Garcia, Eric M. | \n",
" CQ Roll Call | \n",
" M | \n",
" 3094 | \n",
" 44783 | \n",
" 863.00 | \n",
" 3,584.00 | \n",
"
\n",
" \n",
" 12245632 | \n",
" jackshafer | \n",
" Shafer, Jack | \n",
" Politico | \n",
" M | \n",
" 73996 | \n",
" 44726 | \n",
" 861.00 | \n",
" 2,016.00 | \n",
"
\n",
" \n",
" 273540698 | \n",
" MKTWgoldstein | \n",
" Goldstein, Steven | \n",
" MarketWatch | \n",
" M | \n",
" 10185 | \n",
" 41497 | \n",
" 857.00 | \n",
" 1,897.00 | \n",
"
\n",
" \n",
" 19847765 | \n",
" sahilkapur | \n",
" Kapur, Sahil | \n",
" Bloomberg News | \n",
" M | \n",
" 69086 | \n",
" 51628 | \n",
" 853.00 | \n",
" 2,022.00 | \n",
"
\n",
" \n",
" 6904552 | \n",
" juliemason | \n",
" Mason, Julie | \n",
" Sirius XM Satellite Radio | \n",
" F | \n",
" 31276 | \n",
" 29214 | \n",
" 852.00 | \n",
" 1,213.00 | \n",
"
\n",
" \n",
" 225265639 | \n",
" ddale8 | \n",
" Dale, Daniel | \n",
" Toronto Star | \n",
" M | \n",
" 180671 | \n",
" 69807 | \n",
" 848.00 | \n",
" 2,496.00 | \n",
"
\n",
" \n",
" 15837659 | \n",
" jbendery | \n",
" Bendery, Jennifer | \n",
" Huffington Post | \n",
" M | \n",
" 41000 | \n",
" 65406 | \n",
" 844.00 | \n",
" 2,600.00 | \n",
"
\n",
" \n",
" 15146659 | \n",
" JSwiftTWS | \n",
" Swift, James A. | \n",
" Weekly Standard | \n",
" M | \n",
" 5691 | \n",
" 84245 | \n",
" 830.00 | \n",
" 2,612.00 | \n",
"
\n",
" \n",
" 227790723 | \n",
" RichardRubinDC | \n",
" Rubin, Richard | \n",
" Bloomberg News | \n",
" M | \n",
" 13015 | \n",
" 17796 | \n",
" 807.00 | \n",
" 1,312.00 | \n",
"
\n",
" \n",
" 14517538 | \n",
" derekwillis | \n",
" Willis, Derek | \n",
" ProPublica | \n",
" M | \n",
" 18049 | \n",
" 79502 | \n",
" 781.00 | \n",
" 1,811.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name organization \\\n",
"user_id \n",
"3817401 ericgeller Geller, Eric Politico \n",
"22891564 chrisgeidner Geidner, Chris BuzzFeed \n",
"118130765 dylanlscott Scott, Dylan L. Stat News \n",
"19576571 JaredRizzi Rizzi, Jared Sirius XM Satellite Radio \n",
"275207082 AlexParkerDC Parker, Alexander M. Bloomberg BNA \n",
"63717541 phillyrich1 Weinstein, Richard C–SPAN \n",
"203226736 SharylAttkisson Attkisson, Sharyl Sinclair Broadcast Group \n",
"16812908 crousselle Rousselle, Christine Townhall \n",
"14529929 jaketapper Tapper, Jake CNN \n",
"46557945 StevenTDennis Dennis, Steven T. Bloomberg News \n",
"27882000 jamiedupree Dupree, Jamie Cox Broadcasting \n",
"3372900155 samtayrey Reyes, Samantha CNN \n",
"132482136 Yaro_RT Yaroshevsky, Alexey RTTV America \n",
"46955476 GrahamDavidA Graham, David A. The Atlantic \n",
"16459325 ryanbeckwith Beckwith, Ryan Teague Time Magazine \n",
"25702314 EricMGarcia Garcia, Eric M. CQ Roll Call \n",
"12245632 jackshafer Shafer, Jack Politico \n",
"273540698 MKTWgoldstein Goldstein, Steven MarketWatch \n",
"19847765 sahilkapur Kapur, Sahil Bloomberg News \n",
"6904552 juliemason Mason, Julie Sirius XM Satellite Radio \n",
"225265639 ddale8 Dale, Daniel Toronto Star \n",
"15837659 jbendery Bendery, Jennifer Huffington Post \n",
"15146659 JSwiftTWS Swift, James A. Weekly Standard \n",
"227790723 RichardRubinDC Rubin, Richard Bloomberg News \n",
"14517538 derekwillis Willis, Derek ProPublica \n",
"\n",
" gender followers_count tweet_count reply \\\n",
"user_id \n",
"3817401 M 58173 208763 9,033.00 \n",
"22891564 M 83316 205504 3,917.00 \n",
"118130765 M 20122 42497 2,040.00 \n",
"19576571 M 13545 41620 1,949.00 \n",
"275207082 M 3828 142150 1,714.00 \n",
"63717541 M 3827 27341 1,532.00 \n",
"203226736 F 132973 24539 1,458.00 \n",
"16812908 F 5327 118713 1,089.00 \n",
"14529929 M 1305680 148143 1,040.00 \n",
"46557945 M 55762 67526 1,026.00 \n",
"27882000 M 140848 46181 993.00 \n",
"3372900155 F 10344 4783 933.00 \n",
"132482136 M 12968 26795 910.00 \n",
"46955476 M 22112 93391 908.00 \n",
"16459325 M 20947 92203 901.00 \n",
"25702314 M 3094 44783 863.00 \n",
"12245632 M 73996 44726 861.00 \n",
"273540698 M 10185 41497 857.00 \n",
"19847765 M 69086 51628 853.00 \n",
"6904552 F 31276 29214 852.00 \n",
"225265639 M 180671 69807 848.00 \n",
"15837659 M 41000 65406 844.00 \n",
"15146659 M 5691 84245 830.00 \n",
"227790723 M 13015 17796 807.00 \n",
"14517538 M 18049 79502 781.00 \n",
"\n",
" tweets_in_dataset \n",
"user_id \n",
"3817401 11,432.00 \n",
"22891564 6,244.00 \n",
"118130765 3,960.00 \n",
"19576571 5,567.00 \n",
"275207082 3,983.00 \n",
"63717541 2,261.00 \n",
"203226736 2,154.00 \n",
"16812908 2,351.00 \n",
"14529929 5,078.00 \n",
"46557945 3,066.00 \n",
"27882000 2,108.00 \n",
"3372900155 1,349.00 \n",
"132482136 1,199.00 \n",
"46955476 1,566.00 \n",
"16459325 5,187.00 \n",
"25702314 3,584.00 \n",
"12245632 2,016.00 \n",
"273540698 1,897.00 \n",
"19847765 2,022.00 \n",
"6904552 1,213.00 \n",
"225265639 2,496.00 \n",
"15837659 2,600.00 \n",
"15146659 2,612.00 \n",
"227790723 1,312.00 \n",
"14517538 1,811.00 "
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_summary_df[['screen_name', 'name', 'organization', 'gender', 'followers_count', 'tweet_count', 'reply', 'tweets_in_dataset']].sort_values(['reply'], ascending=False).head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### All replied to by beltway journalists\n",
"This is based on screen name, which could have changed during collection period. However, for the users that would be at the top of this list, seems unlikely."
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" reply_to_count | \n",
" replying_count | \n",
"
\n",
" \n",
" \n",
" \n",
" ericgeller | \n",
" 1980 | \n",
" 75 | \n",
"
\n",
" \n",
" chrisgeidner | \n",
" 1901 | \n",
" 37 | \n",
"
\n",
" \n",
" dylanlscott | \n",
" 1091 | \n",
" 65 | \n",
"
\n",
" \n",
" JaredRizzi | \n",
" 750 | \n",
" 46 | \n",
"
\n",
" \n",
" StevenTDennis | \n",
" 745 | \n",
" 93 | \n",
"
\n",
" \n",
" AlexParkerDC | \n",
" 720 | \n",
" 23 | \n",
"
\n",
" \n",
" sahilkapur | \n",
" 662 | \n",
" 35 | \n",
"
\n",
" \n",
" jseldin | \n",
" 653 | \n",
" 2 | \n",
"
\n",
" \n",
" MEPFuller | \n",
" 522 | \n",
" 92 | \n",
"
\n",
" \n",
" amaxsmith | \n",
" 498 | \n",
" 6 | \n",
"
\n",
" \n",
" ddale8 | \n",
" 495 | \n",
" 20 | \n",
"
\n",
" \n",
" CraigCaplan | \n",
" 388 | \n",
" 8 | \n",
"
\n",
" \n",
" ChuckWendig | \n",
" 372 | \n",
" 1 | \n",
"
\n",
" \n",
" pbump | \n",
" 355 | \n",
" 43 | \n",
"
\n",
" \n",
" kelmej | \n",
" 340 | \n",
" 29 | \n",
"
\n",
" \n",
" benjamin_oc | \n",
" 322 | \n",
" 11 | \n",
"
\n",
" \n",
" KimberlyRobinsn | \n",
" 321 | \n",
" 7 | \n",
"
\n",
" \n",
" darth | \n",
" 315 | \n",
" 32 | \n",
"
\n",
" \n",
" ZoeTillman | \n",
" 311 | \n",
" 8 | \n",
"
\n",
" \n",
" RichardRubinDC | \n",
" 305 | \n",
" 41 | \n",
"
\n",
" \n",
" sdonnan | \n",
" 304 | \n",
" 7 | \n",
"
\n",
" \n",
" AaronMehta | \n",
" 304 | \n",
" 35 | \n",
"
\n",
" \n",
" MikeSacksEsq | \n",
" 299 | \n",
" 18 | \n",
"
\n",
" \n",
" heathdwilliams | \n",
" 298 | \n",
" 1 | \n",
"
\n",
" \n",
" ryanbeckwith | \n",
" 297 | \n",
" 49 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" reply_to_count replying_count\n",
"ericgeller 1980 75\n",
"chrisgeidner 1901 37\n",
"dylanlscott 1091 65\n",
"JaredRizzi 750 46\n",
"StevenTDennis 745 93\n",
"AlexParkerDC 720 23\n",
"sahilkapur 662 35\n",
"jseldin 653 2\n",
"MEPFuller 522 92\n",
"amaxsmith 498 6\n",
"ddale8 495 20\n",
"CraigCaplan 388 8\n",
"ChuckWendig 372 1\n",
"pbump 355 43\n",
"kelmej 340 29\n",
"benjamin_oc 322 11\n",
"KimberlyRobinsn 321 7\n",
"darth 315 32\n",
"ZoeTillman 311 8\n",
"RichardRubinDC 305 41\n",
"sdonnan 304 7\n",
"AaronMehta 304 35\n",
"MikeSacksEsq 299 18\n",
"heathdwilliams 298 1\n",
"ryanbeckwith 297 49"
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Reply to count\n",
"reply_to_count_screen_name_df = pd.DataFrame(reply_df.reply_to_screen_name.value_counts().rename('reply_to_count'))\n",
"\n",
"# Count of replying users\n",
"reply_to_user_id_per_user_screen_name_df = reply_df[['reply_to_screen_name', 'user_id']].drop_duplicates()\n",
"replying_count_screen_name_df = pd.DataFrame(reply_to_user_id_per_user_screen_name_df.groupby('reply_to_screen_name').size(), columns=['replying_count'])\n",
"replying_count_screen_name_df.index.name = 'screen_name'\n",
"\n",
"all_replied_to_df = reply_to_count_screen_name_df.join(replying_count_screen_name_df)\n",
"all_replied_to_df.to_csv('output/all_replied_to_by_journalists.csv')\n",
"all_replied_to_df.head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### All beltway journalists replied to by beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" reply_to_count | \n",
" replying_count | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 3817401 | \n",
" ericgeller | \n",
" Geller, Eric | \n",
" Politico | \n",
" M | \n",
" 58173 | \n",
" 1,980.00 | \n",
" 75.00 | \n",
"
\n",
" \n",
" 22891564 | \n",
" chrisgeidner | \n",
" Geidner, Chris | \n",
" BuzzFeed | \n",
" M | \n",
" 83316 | \n",
" 1,901.00 | \n",
" 37.00 | \n",
"
\n",
" \n",
" 118130765 | \n",
" dylanlscott | \n",
" Scott, Dylan L. | \n",
" Stat News | \n",
" M | \n",
" 20122 | \n",
" 1,091.00 | \n",
" 65.00 | \n",
"
\n",
" \n",
" 19576571 | \n",
" JaredRizzi | \n",
" Rizzi, Jared | \n",
" Sirius XM Satellite Radio | \n",
" M | \n",
" 13545 | \n",
" 750.00 | \n",
" 46.00 | \n",
"
\n",
" \n",
" 46557945 | \n",
" StevenTDennis | \n",
" Dennis, Steven T. | \n",
" Bloomberg News | \n",
" M | \n",
" 55762 | \n",
" 745.00 | \n",
" 93.00 | \n",
"
\n",
" \n",
" 275207082 | \n",
" AlexParkerDC | \n",
" Parker, Alexander M. | \n",
" Bloomberg BNA | \n",
" M | \n",
" 3828 | \n",
" 720.00 | \n",
" 23.00 | \n",
"
\n",
" \n",
" 19847765 | \n",
" sahilkapur | \n",
" Kapur, Sahil | \n",
" Bloomberg News | \n",
" M | \n",
" 69086 | \n",
" 662.00 | \n",
" 35.00 | \n",
"
\n",
" \n",
" 583821006 | \n",
" jseldin | \n",
" Seldin, Jeff | \n",
" Voice of America | \n",
" M | \n",
" 5365 | \n",
" 653.00 | \n",
" 2.00 | \n",
"
\n",
" \n",
" 398088661 | \n",
" MEPFuller | \n",
" Fuller, Matt E. | \n",
" Huffington Post | \n",
" M | \n",
" 77919 | \n",
" 522.00 | \n",
" 92.00 | \n",
"
\n",
" \n",
" 44951698 | \n",
" amaxsmith | \n",
" Smith, Max | \n",
" WTOP Radio | \n",
" M | \n",
" 4726 | \n",
" 498.00 | \n",
" 6.00 | \n",
"
\n",
" \n",
" 225265639 | \n",
" ddale8 | \n",
" Dale, Daniel | \n",
" Toronto Star | \n",
" M | \n",
" 180671 | \n",
" 495.00 | \n",
" 20.00 | \n",
"
\n",
" \n",
" 317980134 | \n",
" CraigCaplan | \n",
" Caplan, Craig | \n",
" C–SPAN | \n",
" M | \n",
" 6143 | \n",
" 388.00 | \n",
" 8.00 | \n",
"
\n",
" \n",
" 16061946 | \n",
" kelmej | \n",
" Mejdrich, Kellie | \n",
" CQ Roll Call | \n",
" F | \n",
" 4146 | \n",
" 340.00 | \n",
" 29.00 | \n",
"
\n",
" \n",
" 15365623 | \n",
" benjamin_oc | \n",
" O’Connell, Benjamin | \n",
" C–SPAN | \n",
" M | \n",
" 1455 | \n",
" 322.00 | \n",
" 11.00 | \n",
"
\n",
" \n",
" 906734342 | \n",
" KimberlyRobinsn | \n",
" Robinson, Kimberly S. | \n",
" Bloomberg BNA | \n",
" F | \n",
" 7170 | \n",
" 321.00 | \n",
" 7.00 | \n",
"
\n",
" \n",
" 52392666 | \n",
" ZoeTillman | \n",
" Tillman, Zoe | \n",
" BuzzFeed | \n",
" F | \n",
" 15246 | \n",
" 311.00 | \n",
" 8.00 | \n",
"
\n",
" \n",
" 227790723 | \n",
" RichardRubinDC | \n",
" Rubin, Richard | \n",
" Bloomberg News | \n",
" M | \n",
" 13015 | \n",
" 305.00 | \n",
" 41.00 | \n",
"
\n",
" \n",
" 103016675 | \n",
" AaronMehta | \n",
" Mehta, Aaron | \n",
" Sightline Media Group | \n",
" M | \n",
" 11124 | \n",
" 304.00 | \n",
" 35.00 | \n",
"
\n",
" \n",
" 21810329 | \n",
" sdonnan | \n",
" Donnan, Shawn | \n",
" Financial Times | \n",
" M | \n",
" 12311 | \n",
" 304.00 | \n",
" 7.00 | \n",
"
\n",
" \n",
" 90478926 | \n",
" MikeSacksEsq | \n",
" Sacks, Mike | \n",
" Scripps Howard News Service | \n",
" M | \n",
" 9289 | \n",
" 299.00 | \n",
" 18.00 | \n",
"
\n",
" \n",
" 16459325 | \n",
" ryanbeckwith | \n",
" Beckwith, Ryan Teague | \n",
" Time Magazine | \n",
" M | \n",
" 20947 | \n",
" 297.00 | \n",
" 49.00 | \n",
"
\n",
" \n",
" 21252618 | \n",
" JakeSherman | \n",
" Sherman, Jacob S. | \n",
" Politico | \n",
" M | \n",
" 81762 | \n",
" 283.00 | \n",
" 72.00 | \n",
"
\n",
" \n",
" 11771512 | \n",
" OKnox | \n",
" Knox, Olivier | \n",
" Yahoo News | \n",
" M | \n",
" 44715 | \n",
" 269.00 | \n",
" 45.00 | \n",
"
\n",
" \n",
" 21696279 | \n",
" brianbeutler | \n",
" Beutler, Brian Alfred | \n",
" New Republic | \n",
" M | \n",
" 74435 | \n",
" 269.00 | \n",
" 34.00 | \n",
"
\n",
" \n",
" 21212087 | \n",
" Olivianuzzi | \n",
" Nuzzi, Olivia | \n",
" New York | \n",
" F | \n",
" 136276 | \n",
" 243.00 | \n",
" 25.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name \\\n",
"user_id \n",
"3817401 ericgeller Geller, Eric \n",
"22891564 chrisgeidner Geidner, Chris \n",
"118130765 dylanlscott Scott, Dylan L. \n",
"19576571 JaredRizzi Rizzi, Jared \n",
"46557945 StevenTDennis Dennis, Steven T. \n",
"275207082 AlexParkerDC Parker, Alexander M. \n",
"19847765 sahilkapur Kapur, Sahil \n",
"583821006 jseldin Seldin, Jeff \n",
"398088661 MEPFuller Fuller, Matt E. \n",
"44951698 amaxsmith Smith, Max \n",
"225265639 ddale8 Dale, Daniel \n",
"317980134 CraigCaplan Caplan, Craig \n",
"16061946 kelmej Mejdrich, Kellie \n",
"15365623 benjamin_oc O’Connell, Benjamin \n",
"906734342 KimberlyRobinsn Robinson, Kimberly S. \n",
"52392666 ZoeTillman Tillman, Zoe \n",
"227790723 RichardRubinDC Rubin, Richard \n",
"103016675 AaronMehta Mehta, Aaron \n",
"21810329 sdonnan Donnan, Shawn \n",
"90478926 MikeSacksEsq Sacks, Mike \n",
"16459325 ryanbeckwith Beckwith, Ryan Teague \n",
"21252618 JakeSherman Sherman, Jacob S. \n",
"11771512 OKnox Knox, Olivier \n",
"21696279 brianbeutler Beutler, Brian Alfred \n",
"21212087 Olivianuzzi Nuzzi, Olivia \n",
"\n",
" organization gender followers_count \\\n",
"user_id \n",
"3817401 Politico M 58173 \n",
"22891564 BuzzFeed M 83316 \n",
"118130765 Stat News M 20122 \n",
"19576571 Sirius XM Satellite Radio M 13545 \n",
"46557945 Bloomberg News M 55762 \n",
"275207082 Bloomberg BNA M 3828 \n",
"19847765 Bloomberg News M 69086 \n",
"583821006 Voice of America M 5365 \n",
"398088661 Huffington Post M 77919 \n",
"44951698 WTOP Radio M 4726 \n",
"225265639 Toronto Star M 180671 \n",
"317980134 C–SPAN M 6143 \n",
"16061946 CQ Roll Call F 4146 \n",
"15365623 C–SPAN M 1455 \n",
"906734342 Bloomberg BNA F 7170 \n",
"52392666 BuzzFeed F 15246 \n",
"227790723 Bloomberg News M 13015 \n",
"103016675 Sightline Media Group M 11124 \n",
"21810329 Financial Times M 12311 \n",
"90478926 Scripps Howard News Service M 9289 \n",
"16459325 Time Magazine M 20947 \n",
"21252618 Politico M 81762 \n",
"11771512 Yahoo News M 44715 \n",
"21696279 New Republic M 74435 \n",
"21212087 New York F 136276 \n",
"\n",
" reply_to_count replying_count \n",
"user_id \n",
"3817401 1,980.00 75.00 \n",
"22891564 1,901.00 37.00 \n",
"118130765 1,091.00 65.00 \n",
"19576571 750.00 46.00 \n",
"46557945 745.00 93.00 \n",
"275207082 720.00 23.00 \n",
"19847765 662.00 35.00 \n",
"583821006 653.00 2.00 \n",
"398088661 522.00 92.00 \n",
"44951698 498.00 6.00 \n",
"225265639 495.00 20.00 \n",
"317980134 388.00 8.00 \n",
"16061946 340.00 29.00 \n",
"15365623 322.00 11.00 \n",
"906734342 321.00 7.00 \n",
"52392666 311.00 8.00 \n",
"227790723 305.00 41.00 \n",
"103016675 304.00 35.00 \n",
"21810329 304.00 7.00 \n",
"90478926 299.00 18.00 \n",
"16459325 297.00 49.00 \n",
"21252618 283.00 72.00 \n",
"11771512 269.00 45.00 \n",
"21696279 269.00 34.00 \n",
"21212087 243.00 25.00 "
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalists_reply_summary_df = journalist_reply_summary(journalists_reply_df)\n",
"journalists_reply_summary_df.to_csv('output/journalists_replied_to_by_journalists.csv')\n",
"journalists_reply_summary_df[journalist_reply_summary_fields].head(25)"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
"
\n",
" \n",
" \n",
" \n",
" M | \n",
" 33178 | \n",
" 76.5% | \n",
"
\n",
" \n",
" F | \n",
" 10212 | \n",
" 23.5% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage\n",
"M 33178 76.5%\n",
"F 10212 23.5%"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalist_reply_gender_summary(journalists_reply_df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Stats of beltway journalists replied to by beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" reply_to_count | \n",
" replying_count | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 2,292.00 | \n",
" 2,292.00 | \n",
"
\n",
" \n",
" mean | \n",
" 18.93 | \n",
" 3.81 | \n",
"
\n",
" \n",
" std | \n",
" 81.76 | \n",
" 8.41 | \n",
"
\n",
" \n",
" min | \n",
" 0.00 | \n",
" 0.00 | \n",
"
\n",
" \n",
" 25% | \n",
" 0.00 | \n",
" 0.00 | \n",
"
\n",
" \n",
" 50% | \n",
" 1.00 | \n",
" 1.00 | \n",
"
\n",
" \n",
" 75% | \n",
" 8.00 | \n",
" 4.00 | \n",
"
\n",
" \n",
" max | \n",
" 1,980.00 | \n",
" 93.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" reply_to_count replying_count\n",
"count 2,292.00 2,292.00\n",
"mean 18.93 3.81\n",
"std 81.76 8.41\n",
"min 0.00 0.00\n",
"25% 0.00 0.00\n",
"50% 1.00 1.00\n",
"75% 8.00 4.00\n",
"max 1,980.00 93.00"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalists_reply_summary_df[['reply_to_count', 'replying_count']].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Top 100 journalists replied to by beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
"
\n",
" \n",
" \n",
" \n",
" M | \n",
" 81 | \n",
" 81.0% | \n",
"
\n",
" \n",
" F | \n",
" 19 | \n",
" 19.0% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage\n",
"M 81 81.0%\n",
"F 19 19.0%"
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"top_journalist_reply_gender_summary(journalists_reply_summary_df, replying_count_threshold=0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Female beltway journalists replied to by beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" reply_to_count | \n",
" replying_count | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 16061946 | \n",
" kelmej | \n",
" Mejdrich, Kellie | \n",
" CQ Roll Call | \n",
" F | \n",
" 4146 | \n",
" 340.00 | \n",
" 29.00 | \n",
"
\n",
" \n",
" 906734342 | \n",
" KimberlyRobinsn | \n",
" Robinson, Kimberly S. | \n",
" Bloomberg BNA | \n",
" F | \n",
" 7170 | \n",
" 321.00 | \n",
" 7.00 | \n",
"
\n",
" \n",
" 52392666 | \n",
" ZoeTillman | \n",
" Tillman, Zoe | \n",
" BuzzFeed | \n",
" F | \n",
" 15246 | \n",
" 311.00 | \n",
" 8.00 | \n",
"
\n",
" \n",
" 21212087 | \n",
" Olivianuzzi | \n",
" Nuzzi, Olivia | \n",
" New York | \n",
" F | \n",
" 136276 | \n",
" 243.00 | \n",
" 25.00 | \n",
"
\n",
" \n",
" 83462293 | \n",
" SarahMMimms | \n",
" Mimms, Sarah | \n",
" BuzzFeed | \n",
" F | \n",
" 6216 | \n",
" 236.00 | \n",
" 24.00 | \n",
"
\n",
" \n",
" 19186003 | \n",
" seungminkim | \n",
" Kim, Seung Min | \n",
" Politico | \n",
" F | \n",
" 33980 | \n",
" 233.00 | \n",
" 84.00 | \n",
"
\n",
" \n",
" 3372900155 | \n",
" samtayrey | \n",
" Reyes, Samantha | \n",
" CNN | \n",
" F | \n",
" 10344 | \n",
" 219.00 | \n",
" 18.00 | \n",
"
\n",
" \n",
" 18825339 | \n",
" CahnEmily | \n",
" Cahn, Emily | \n",
" Mic | \n",
" F | \n",
" 16980 | \n",
" 212.00 | \n",
" 48.00 | \n",
"
\n",
" \n",
" 1132012321 | \n",
" DaniellaMicaela | \n",
" Diaz, Daniella | \n",
" CNN | \n",
" F | \n",
" 14612 | \n",
" 181.00 | \n",
" 36.00 | \n",
"
\n",
" \n",
" 158072303 | \n",
" ValerieInsinna | \n",
" Insinna, Valerie | \n",
" Defense News | \n",
" F | \n",
" 4572 | \n",
" 175.00 | \n",
" 20.00 | \n",
"
\n",
" \n",
" 36607254 | \n",
" Oriana0214 | \n",
" Pawlyk, Oriana | \n",
" Military.com | \n",
" F | \n",
" 6397 | \n",
" 174.00 | \n",
" 21.00 | \n",
"
\n",
" \n",
" 96405362 | \n",
" laurenonthehill | \n",
" Camera, Lauren S. | \n",
" U.S. News & World Report | \n",
" F | \n",
" 3396 | \n",
" 162.00 | \n",
" 6.00 | \n",
"
\n",
" \n",
" 16812908 | \n",
" crousselle | \n",
" Rousselle, Christine | \n",
" Townhall | \n",
" F | \n",
" 5327 | \n",
" 149.00 | \n",
" 5.00 | \n",
"
\n",
" \n",
" 47758416 | \n",
" marissaaevans | \n",
" Evans, Marissa | \n",
" Texas Tribune | \n",
" F | \n",
" 6850 | \n",
" 137.00 | \n",
" 1.00 | \n",
"
\n",
" \n",
" 45399148 | \n",
" jeneps | \n",
" Epstein, Jennifer | \n",
" Bloomberg News | \n",
" F | \n",
" 61242 | \n",
" 134.00 | \n",
" 23.00 | \n",
"
\n",
" \n",
" 16434028 | \n",
" gabbilevy | \n",
" Levy, Gabrielle F. | \n",
" U.S. News & World Report | \n",
" F | \n",
" 2209 | \n",
" 132.00 | \n",
" 4.00 | \n",
"
\n",
" \n",
" 14870670 | \n",
" KateNocera | \n",
" Nocera, Kate | \n",
" BuzzFeed | \n",
" F | \n",
" 27714 | \n",
" 116.00 | \n",
" 36.00 | \n",
"
\n",
" \n",
" 18501487 | \n",
" leighmunsil | \n",
" Munsil, Leigh | \n",
" CNN | \n",
" F | \n",
" 11059 | \n",
" 107.00 | \n",
" 30.00 | \n",
"
\n",
" \n",
" 313545488 | \n",
" LauraLitvan | \n",
" Litvan, Laura | \n",
" Bloomberg News | \n",
" F | \n",
" 4468 | \n",
" 104.00 | \n",
" 12.00 | \n",
"
\n",
" \n",
" 116341480 | \n",
" RosieGray | \n",
" Gray, Rosie | \n",
" The Atlantic | \n",
" F | \n",
" 96935 | \n",
" 99.00 | \n",
" 31.00 | \n",
"
\n",
" \n",
" 82151660 | \n",
" kelsey_snell | \n",
" Snell, Kelse | \n",
" Washington Post | \n",
" F | \n",
" 8108 | \n",
" 96.00 | \n",
" 44.00 | \n",
"
\n",
" \n",
" 70511174 | \n",
" Hadas_Gold | \n",
" Gold, Hadas | \n",
" Politico | \n",
" F | \n",
" 45221 | \n",
" 95.00 | \n",
" 47.00 | \n",
"
\n",
" \n",
" 38855868 | \n",
" brennawilliams | \n",
" Williams, Brenna | \n",
" CNN | \n",
" F | \n",
" 7299 | \n",
" 93.00 | \n",
" 22.00 | \n",
"
\n",
" \n",
" 273700859 | \n",
" kpolantz | \n",
" Polantz, Katelyn J. | \n",
" National Law Journal | \n",
" F | \n",
" 2483 | \n",
" 91.00 | \n",
" 6.00 | \n",
"
\n",
" \n",
" 3273220608 | \n",
" KatherineBScott | \n",
" Scott, Katherine | \n",
" Bloomberg Government | \n",
" F | \n",
" 1841 | \n",
" 85.00 | \n",
" 14.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name organization \\\n",
"user_id \n",
"16061946 kelmej Mejdrich, Kellie CQ Roll Call \n",
"906734342 KimberlyRobinsn Robinson, Kimberly S. Bloomberg BNA \n",
"52392666 ZoeTillman Tillman, Zoe BuzzFeed \n",
"21212087 Olivianuzzi Nuzzi, Olivia New York \n",
"83462293 SarahMMimms Mimms, Sarah BuzzFeed \n",
"19186003 seungminkim Kim, Seung Min Politico \n",
"3372900155 samtayrey Reyes, Samantha CNN \n",
"18825339 CahnEmily Cahn, Emily Mic \n",
"1132012321 DaniellaMicaela Diaz, Daniella CNN \n",
"158072303 ValerieInsinna Insinna, Valerie Defense News \n",
"36607254 Oriana0214 Pawlyk, Oriana Military.com \n",
"96405362 laurenonthehill Camera, Lauren S. U.S. News & World Report \n",
"16812908 crousselle Rousselle, Christine Townhall \n",
"47758416 marissaaevans Evans, Marissa Texas Tribune \n",
"45399148 jeneps Epstein, Jennifer Bloomberg News \n",
"16434028 gabbilevy Levy, Gabrielle F. U.S. News & World Report \n",
"14870670 KateNocera Nocera, Kate BuzzFeed \n",
"18501487 leighmunsil Munsil, Leigh CNN \n",
"313545488 LauraLitvan Litvan, Laura Bloomberg News \n",
"116341480 RosieGray Gray, Rosie The Atlantic \n",
"82151660 kelsey_snell Snell, Kelse Washington Post \n",
"70511174 Hadas_Gold Gold, Hadas Politico \n",
"38855868 brennawilliams Williams, Brenna CNN \n",
"273700859 kpolantz Polantz, Katelyn J. National Law Journal \n",
"3273220608 KatherineBScott Scott, Katherine Bloomberg Government \n",
"\n",
" gender followers_count reply_to_count replying_count \n",
"user_id \n",
"16061946 F 4146 340.00 29.00 \n",
"906734342 F 7170 321.00 7.00 \n",
"52392666 F 15246 311.00 8.00 \n",
"21212087 F 136276 243.00 25.00 \n",
"83462293 F 6216 236.00 24.00 \n",
"19186003 F 33980 233.00 84.00 \n",
"3372900155 F 10344 219.00 18.00 \n",
"18825339 F 16980 212.00 48.00 \n",
"1132012321 F 14612 181.00 36.00 \n",
"158072303 F 4572 175.00 20.00 \n",
"36607254 F 6397 174.00 21.00 \n",
"96405362 F 3396 162.00 6.00 \n",
"16812908 F 5327 149.00 5.00 \n",
"47758416 F 6850 137.00 1.00 \n",
"45399148 F 61242 134.00 23.00 \n",
"16434028 F 2209 132.00 4.00 \n",
"14870670 F 27714 116.00 36.00 \n",
"18501487 F 11059 107.00 30.00 \n",
"313545488 F 4468 104.00 12.00 \n",
"116341480 F 96935 99.00 31.00 \n",
"82151660 F 8108 96.00 44.00 \n",
"70511174 F 45221 95.00 47.00 \n",
"38855868 F 7299 93.00 22.00 \n",
"273700859 F 2483 91.00 6.00 \n",
"3273220608 F 1841 85.00 14.00 "
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"female_journalists_reply_summary_df = journalists_reply_summary_df[journalists_reply_summary_df.gender == 'F']\n",
"female_journalists_reply_summary_df.to_csv('output/female_journalists_replied_to_by_journalists.csv')\n",
"female_journalists_reply_summary_df[journalist_reply_summary_fields].head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Stats of female beltway journalists replied to by beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" reply_to_count | \n",
" replying_count | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 993.00 | \n",
" 993.00 | \n",
"
\n",
" \n",
" mean | \n",
" 10.28 | \n",
" 2.95 | \n",
"
\n",
" \n",
" std | \n",
" 31.00 | \n",
" 6.33 | \n",
"
\n",
" \n",
" min | \n",
" 0.00 | \n",
" 0.00 | \n",
"
\n",
" \n",
" 25% | \n",
" 0.00 | \n",
" 0.00 | \n",
"
\n",
" \n",
" 50% | \n",
" 1.00 | \n",
" 1.00 | \n",
"
\n",
" \n",
" 75% | \n",
" 6.00 | \n",
" 3.00 | \n",
"
\n",
" \n",
" max | \n",
" 340.00 | \n",
" 84.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" reply_to_count replying_count\n",
"count 993.00 993.00\n",
"mean 10.28 2.95\n",
"std 31.00 6.33\n",
"min 0.00 0.00\n",
"25% 0.00 0.00\n",
"50% 1.00 1.00\n",
"75% 6.00 3.00\n",
"max 340.00 84.00"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"female_journalists_reply_summary_df[['reply_to_count', 'replying_count']].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Male beltway journalists replied to by beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" reply_to_count | \n",
" replying_count | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 3817401 | \n",
" ericgeller | \n",
" Geller, Eric | \n",
" Politico | \n",
" M | \n",
" 58173 | \n",
" 1,980.00 | \n",
" 75.00 | \n",
"
\n",
" \n",
" 22891564 | \n",
" chrisgeidner | \n",
" Geidner, Chris | \n",
" BuzzFeed | \n",
" M | \n",
" 83316 | \n",
" 1,901.00 | \n",
" 37.00 | \n",
"
\n",
" \n",
" 118130765 | \n",
" dylanlscott | \n",
" Scott, Dylan L. | \n",
" Stat News | \n",
" M | \n",
" 20122 | \n",
" 1,091.00 | \n",
" 65.00 | \n",
"
\n",
" \n",
" 19576571 | \n",
" JaredRizzi | \n",
" Rizzi, Jared | \n",
" Sirius XM Satellite Radio | \n",
" M | \n",
" 13545 | \n",
" 750.00 | \n",
" 46.00 | \n",
"
\n",
" \n",
" 46557945 | \n",
" StevenTDennis | \n",
" Dennis, Steven T. | \n",
" Bloomberg News | \n",
" M | \n",
" 55762 | \n",
" 745.00 | \n",
" 93.00 | \n",
"
\n",
" \n",
" 275207082 | \n",
" AlexParkerDC | \n",
" Parker, Alexander M. | \n",
" Bloomberg BNA | \n",
" M | \n",
" 3828 | \n",
" 720.00 | \n",
" 23.00 | \n",
"
\n",
" \n",
" 19847765 | \n",
" sahilkapur | \n",
" Kapur, Sahil | \n",
" Bloomberg News | \n",
" M | \n",
" 69086 | \n",
" 662.00 | \n",
" 35.00 | \n",
"
\n",
" \n",
" 583821006 | \n",
" jseldin | \n",
" Seldin, Jeff | \n",
" Voice of America | \n",
" M | \n",
" 5365 | \n",
" 653.00 | \n",
" 2.00 | \n",
"
\n",
" \n",
" 398088661 | \n",
" MEPFuller | \n",
" Fuller, Matt E. | \n",
" Huffington Post | \n",
" M | \n",
" 77919 | \n",
" 522.00 | \n",
" 92.00 | \n",
"
\n",
" \n",
" 44951698 | \n",
" amaxsmith | \n",
" Smith, Max | \n",
" WTOP Radio | \n",
" M | \n",
" 4726 | \n",
" 498.00 | \n",
" 6.00 | \n",
"
\n",
" \n",
" 225265639 | \n",
" ddale8 | \n",
" Dale, Daniel | \n",
" Toronto Star | \n",
" M | \n",
" 180671 | \n",
" 495.00 | \n",
" 20.00 | \n",
"
\n",
" \n",
" 317980134 | \n",
" CraigCaplan | \n",
" Caplan, Craig | \n",
" C–SPAN | \n",
" M | \n",
" 6143 | \n",
" 388.00 | \n",
" 8.00 | \n",
"
\n",
" \n",
" 15365623 | \n",
" benjamin_oc | \n",
" O’Connell, Benjamin | \n",
" C–SPAN | \n",
" M | \n",
" 1455 | \n",
" 322.00 | \n",
" 11.00 | \n",
"
\n",
" \n",
" 227790723 | \n",
" RichardRubinDC | \n",
" Rubin, Richard | \n",
" Bloomberg News | \n",
" M | \n",
" 13015 | \n",
" 305.00 | \n",
" 41.00 | \n",
"
\n",
" \n",
" 103016675 | \n",
" AaronMehta | \n",
" Mehta, Aaron | \n",
" Sightline Media Group | \n",
" M | \n",
" 11124 | \n",
" 304.00 | \n",
" 35.00 | \n",
"
\n",
" \n",
" 21810329 | \n",
" sdonnan | \n",
" Donnan, Shawn | \n",
" Financial Times | \n",
" M | \n",
" 12311 | \n",
" 304.00 | \n",
" 7.00 | \n",
"
\n",
" \n",
" 90478926 | \n",
" MikeSacksEsq | \n",
" Sacks, Mike | \n",
" Scripps Howard News Service | \n",
" M | \n",
" 9289 | \n",
" 299.00 | \n",
" 18.00 | \n",
"
\n",
" \n",
" 16459325 | \n",
" ryanbeckwith | \n",
" Beckwith, Ryan Teague | \n",
" Time Magazine | \n",
" M | \n",
" 20947 | \n",
" 297.00 | \n",
" 49.00 | \n",
"
\n",
" \n",
" 21252618 | \n",
" JakeSherman | \n",
" Sherman, Jacob S. | \n",
" Politico | \n",
" M | \n",
" 81762 | \n",
" 283.00 | \n",
" 72.00 | \n",
"
\n",
" \n",
" 11771512 | \n",
" OKnox | \n",
" Knox, Olivier | \n",
" Yahoo News | \n",
" M | \n",
" 44715 | \n",
" 269.00 | \n",
" 45.00 | \n",
"
\n",
" \n",
" 21696279 | \n",
" brianbeutler | \n",
" Beutler, Brian Alfred | \n",
" New Republic | \n",
" M | \n",
" 74435 | \n",
" 269.00 | \n",
" 34.00 | \n",
"
\n",
" \n",
" 190360266 | \n",
" connorobrienNH | \n",
" O’Brien, Connor | \n",
" Politico | \n",
" M | \n",
" 6158 | \n",
" 241.00 | \n",
" 35.00 | \n",
"
\n",
" \n",
" 63717541 | \n",
" phillyrich1 | \n",
" Weinstein, Richard | \n",
" C–SPAN | \n",
" M | \n",
" 3827 | \n",
" 241.00 | \n",
" 4.00 | \n",
"
\n",
" \n",
" 407013776 | \n",
" burgessev | \n",
" Everett, John B. | \n",
" Politico | \n",
" M | \n",
" 31010 | \n",
" 238.00 | \n",
" 79.00 | \n",
"
\n",
" \n",
" 80111587 | \n",
" JeffYoung | \n",
" Young, Jeffrey | \n",
" Huffington Post | \n",
" M | \n",
" 26497 | \n",
" 238.00 | \n",
" 31.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name organization \\\n",
"user_id \n",
"3817401 ericgeller Geller, Eric Politico \n",
"22891564 chrisgeidner Geidner, Chris BuzzFeed \n",
"118130765 dylanlscott Scott, Dylan L. Stat News \n",
"19576571 JaredRizzi Rizzi, Jared Sirius XM Satellite Radio \n",
"46557945 StevenTDennis Dennis, Steven T. Bloomberg News \n",
"275207082 AlexParkerDC Parker, Alexander M. Bloomberg BNA \n",
"19847765 sahilkapur Kapur, Sahil Bloomberg News \n",
"583821006 jseldin Seldin, Jeff Voice of America \n",
"398088661 MEPFuller Fuller, Matt E. Huffington Post \n",
"44951698 amaxsmith Smith, Max WTOP Radio \n",
"225265639 ddale8 Dale, Daniel Toronto Star \n",
"317980134 CraigCaplan Caplan, Craig C–SPAN \n",
"15365623 benjamin_oc O’Connell, Benjamin C–SPAN \n",
"227790723 RichardRubinDC Rubin, Richard Bloomberg News \n",
"103016675 AaronMehta Mehta, Aaron Sightline Media Group \n",
"21810329 sdonnan Donnan, Shawn Financial Times \n",
"90478926 MikeSacksEsq Sacks, Mike Scripps Howard News Service \n",
"16459325 ryanbeckwith Beckwith, Ryan Teague Time Magazine \n",
"21252618 JakeSherman Sherman, Jacob S. Politico \n",
"11771512 OKnox Knox, Olivier Yahoo News \n",
"21696279 brianbeutler Beutler, Brian Alfred New Republic \n",
"190360266 connorobrienNH O’Brien, Connor Politico \n",
"63717541 phillyrich1 Weinstein, Richard C–SPAN \n",
"407013776 burgessev Everett, John B. Politico \n",
"80111587 JeffYoung Young, Jeffrey Huffington Post \n",
"\n",
" gender followers_count reply_to_count replying_count \n",
"user_id \n",
"3817401 M 58173 1,980.00 75.00 \n",
"22891564 M 83316 1,901.00 37.00 \n",
"118130765 M 20122 1,091.00 65.00 \n",
"19576571 M 13545 750.00 46.00 \n",
"46557945 M 55762 745.00 93.00 \n",
"275207082 M 3828 720.00 23.00 \n",
"19847765 M 69086 662.00 35.00 \n",
"583821006 M 5365 653.00 2.00 \n",
"398088661 M 77919 522.00 92.00 \n",
"44951698 M 4726 498.00 6.00 \n",
"225265639 M 180671 495.00 20.00 \n",
"317980134 M 6143 388.00 8.00 \n",
"15365623 M 1455 322.00 11.00 \n",
"227790723 M 13015 305.00 41.00 \n",
"103016675 M 11124 304.00 35.00 \n",
"21810329 M 12311 304.00 7.00 \n",
"90478926 M 9289 299.00 18.00 \n",
"16459325 M 20947 297.00 49.00 \n",
"21252618 M 81762 283.00 72.00 \n",
"11771512 M 44715 269.00 45.00 \n",
"21696279 M 74435 269.00 34.00 \n",
"190360266 M 6158 241.00 35.00 \n",
"63717541 M 3827 241.00 4.00 \n",
"407013776 M 31010 238.00 79.00 \n",
"80111587 M 26497 238.00 31.00 "
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"male_journalists_reply_summary_df = journalists_reply_summary_df[journalists_reply_summary_df.gender == 'M']\n",
"male_journalists_reply_summary_df.to_csv('output/male_journalists_replied_to_by_journalists.csv')\n",
"male_journalists_reply_summary_df[journalist_reply_summary_fields].head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Stats of male beltway journalists replied to by beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" reply_to_count | \n",
" replying_count | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 1,299.00 | \n",
" 1,299.00 | \n",
"
\n",
" \n",
" mean | \n",
" 25.54 | \n",
" 4.46 | \n",
"
\n",
" \n",
" std | \n",
" 104.71 | \n",
" 9.66 | \n",
"
\n",
" \n",
" min | \n",
" 0.00 | \n",
" 0.00 | \n",
"
\n",
" \n",
" 25% | \n",
" 0.00 | \n",
" 0.00 | \n",
"
\n",
" \n",
" 50% | \n",
" 1.00 | \n",
" 1.00 | \n",
"
\n",
" \n",
" 75% | \n",
" 11.00 | \n",
" 4.00 | \n",
"
\n",
" \n",
" max | \n",
" 1,980.00 | \n",
" 93.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" reply_to_count replying_count\n",
"count 1,299.00 1,299.00\n",
"mean 25.54 4.46\n",
"std 104.71 9.66\n",
"min 0.00 0.00\n",
"25% 0.00 0.00\n",
"50% 1.00 1.00\n",
"75% 11.00 4.00\n",
"max 1,980.00 93.00"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"male_journalists_reply_summary_df[['reply_to_count', 'replying_count']].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Beltway journalists replied to by female beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" reply_to_count | \n",
" replying_count | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 906734342 | \n",
" KimberlyRobinsn | \n",
" Robinson, Kimberly S. | \n",
" Bloomberg BNA | \n",
" F | \n",
" 7170 | \n",
" 313.00 | \n",
" 2.00 | \n",
"
\n",
" \n",
" 52392666 | \n",
" ZoeTillman | \n",
" Tillman, Zoe | \n",
" BuzzFeed | \n",
" F | \n",
" 15246 | \n",
" 305.00 | \n",
" 3.00 | \n",
"
\n",
" \n",
" 16061946 | \n",
" kelmej | \n",
" Mejdrich, Kellie | \n",
" CQ Roll Call | \n",
" F | \n",
" 4146 | \n",
" 295.00 | \n",
" 15.00 | \n",
"
\n",
" \n",
" 83462293 | \n",
" SarahMMimms | \n",
" Mimms, Sarah | \n",
" BuzzFeed | \n",
" F | \n",
" 6216 | \n",
" 195.00 | \n",
" 7.00 | \n",
"
\n",
" \n",
" 21212087 | \n",
" Olivianuzzi | \n",
" Nuzzi, Olivia | \n",
" New York | \n",
" F | \n",
" 136276 | \n",
" 190.00 | \n",
" 9.00 | \n",
"
\n",
" \n",
" 3372900155 | \n",
" samtayrey | \n",
" Reyes, Samantha | \n",
" CNN | \n",
" F | \n",
" 10344 | \n",
" 179.00 | \n",
" 7.00 | \n",
"
\n",
" \n",
" 96405362 | \n",
" laurenonthehill | \n",
" Camera, Lauren S. | \n",
" U.S. News & World Report | \n",
" F | \n",
" 3396 | \n",
" 159.00 | \n",
" 5.00 | \n",
"
\n",
" \n",
" 18825339 | \n",
" CahnEmily | \n",
" Cahn, Emily | \n",
" Mic | \n",
" F | \n",
" 16980 | \n",
" 148.00 | \n",
" 18.00 | \n",
"
\n",
" \n",
" 1132012321 | \n",
" DaniellaMicaela | \n",
" Diaz, Daniella | \n",
" CNN | \n",
" F | \n",
" 14612 | \n",
" 144.00 | \n",
" 22.00 | \n",
"
\n",
" \n",
" 16812908 | \n",
" crousselle | \n",
" Rousselle, Christine | \n",
" Townhall | \n",
" F | \n",
" 5327 | \n",
" 144.00 | \n",
" 3.00 | \n",
"
\n",
" \n",
" 47758416 | \n",
" marissaaevans | \n",
" Evans, Marissa | \n",
" Texas Tribune | \n",
" F | \n",
" 6850 | \n",
" 137.00 | \n",
" 1.00 | \n",
"
\n",
" \n",
" 36607254 | \n",
" Oriana0214 | \n",
" Pawlyk, Oriana | \n",
" Military.com | \n",
" F | \n",
" 6397 | \n",
" 133.00 | \n",
" 5.00 | \n",
"
\n",
" \n",
" 16434028 | \n",
" gabbilevy | \n",
" Levy, Gabrielle F. | \n",
" U.S. News & World Report | \n",
" F | \n",
" 2209 | \n",
" 130.00 | \n",
" 2.00 | \n",
"
\n",
" \n",
" 19186003 | \n",
" seungminkim | \n",
" Kim, Seung Min | \n",
" Politico | \n",
" F | \n",
" 33980 | \n",
" 108.00 | \n",
" 36.00 | \n",
"
\n",
" \n",
" 45399148 | \n",
" jeneps | \n",
" Epstein, Jennifer | \n",
" Bloomberg News | \n",
" F | \n",
" 61242 | \n",
" 103.00 | \n",
" 7.00 | \n",
"
\n",
" \n",
" 158072303 | \n",
" ValerieInsinna | \n",
" Insinna, Valerie | \n",
" Defense News | \n",
" F | \n",
" 4572 | \n",
" 97.00 | \n",
" 8.00 | \n",
"
\n",
" \n",
" 313545488 | \n",
" LauraLitvan | \n",
" Litvan, Laura | \n",
" Bloomberg News | \n",
" F | \n",
" 4468 | \n",
" 97.00 | \n",
" 5.00 | \n",
"
\n",
" \n",
" 18501487 | \n",
" leighmunsil | \n",
" Munsil, Leigh | \n",
" CNN | \n",
" F | \n",
" 11059 | \n",
" 88.00 | \n",
" 13.00 | \n",
"
\n",
" \n",
" 273700859 | \n",
" kpolantz | \n",
" Polantz, Katelyn J. | \n",
" National Law Journal | \n",
" F | \n",
" 2483 | \n",
" 84.00 | \n",
" 2.00 | \n",
"
\n",
" \n",
" 114670081 | \n",
" rebleber | \n",
" Leber, Rebecca J. | \n",
" Mother Jones | \n",
" F | \n",
" 16467 | \n",
" 79.00 | \n",
" 3.00 | \n",
"
\n",
" \n",
" 407013776 | \n",
" burgessev | \n",
" Everett, John B. | \n",
" Politico | \n",
" M | \n",
" 31010 | \n",
" 78.00 | \n",
" 30.00 | \n",
"
\n",
" \n",
" 118130765 | \n",
" dylanlscott | \n",
" Scott, Dylan L. | \n",
" Stat News | \n",
" M | \n",
" 20122 | \n",
" 78.00 | \n",
" 20.00 | \n",
"
\n",
" \n",
" 116341480 | \n",
" RosieGray | \n",
" Gray, Rosie | \n",
" The Atlantic | \n",
" F | \n",
" 96935 | \n",
" 73.00 | \n",
" 13.00 | \n",
"
\n",
" \n",
" 103016675 | \n",
" AaronMehta | \n",
" Mehta, Aaron | \n",
" Sightline Media Group | \n",
" M | \n",
" 11124 | \n",
" 72.00 | \n",
" 10.00 | \n",
"
\n",
" \n",
" 48038024 | \n",
" karentravers | \n",
" Travers, Karen | \n",
" ABC News | \n",
" F | \n",
" 17155 | \n",
" 71.00 | \n",
" 7.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name organization \\\n",
"user_id \n",
"906734342 KimberlyRobinsn Robinson, Kimberly S. Bloomberg BNA \n",
"52392666 ZoeTillman Tillman, Zoe BuzzFeed \n",
"16061946 kelmej Mejdrich, Kellie CQ Roll Call \n",
"83462293 SarahMMimms Mimms, Sarah BuzzFeed \n",
"21212087 Olivianuzzi Nuzzi, Olivia New York \n",
"3372900155 samtayrey Reyes, Samantha CNN \n",
"96405362 laurenonthehill Camera, Lauren S. U.S. News & World Report \n",
"18825339 CahnEmily Cahn, Emily Mic \n",
"1132012321 DaniellaMicaela Diaz, Daniella CNN \n",
"16812908 crousselle Rousselle, Christine Townhall \n",
"47758416 marissaaevans Evans, Marissa Texas Tribune \n",
"36607254 Oriana0214 Pawlyk, Oriana Military.com \n",
"16434028 gabbilevy Levy, Gabrielle F. U.S. News & World Report \n",
"19186003 seungminkim Kim, Seung Min Politico \n",
"45399148 jeneps Epstein, Jennifer Bloomberg News \n",
"158072303 ValerieInsinna Insinna, Valerie Defense News \n",
"313545488 LauraLitvan Litvan, Laura Bloomberg News \n",
"18501487 leighmunsil Munsil, Leigh CNN \n",
"273700859 kpolantz Polantz, Katelyn J. National Law Journal \n",
"114670081 rebleber Leber, Rebecca J. Mother Jones \n",
"407013776 burgessev Everett, John B. Politico \n",
"118130765 dylanlscott Scott, Dylan L. Stat News \n",
"116341480 RosieGray Gray, Rosie The Atlantic \n",
"103016675 AaronMehta Mehta, Aaron Sightline Media Group \n",
"48038024 karentravers Travers, Karen ABC News \n",
"\n",
" gender followers_count reply_to_count replying_count \n",
"user_id \n",
"906734342 F 7170 313.00 2.00 \n",
"52392666 F 15246 305.00 3.00 \n",
"16061946 F 4146 295.00 15.00 \n",
"83462293 F 6216 195.00 7.00 \n",
"21212087 F 136276 190.00 9.00 \n",
"3372900155 F 10344 179.00 7.00 \n",
"96405362 F 3396 159.00 5.00 \n",
"18825339 F 16980 148.00 18.00 \n",
"1132012321 F 14612 144.00 22.00 \n",
"16812908 F 5327 144.00 3.00 \n",
"47758416 F 6850 137.00 1.00 \n",
"36607254 F 6397 133.00 5.00 \n",
"16434028 F 2209 130.00 2.00 \n",
"19186003 F 33980 108.00 36.00 \n",
"45399148 F 61242 103.00 7.00 \n",
"158072303 F 4572 97.00 8.00 \n",
"313545488 F 4468 97.00 5.00 \n",
"18501487 F 11059 88.00 13.00 \n",
"273700859 F 2483 84.00 2.00 \n",
"114670081 F 16467 79.00 3.00 \n",
"407013776 M 31010 78.00 30.00 \n",
"118130765 M 20122 78.00 20.00 \n",
"116341480 F 96935 73.00 13.00 \n",
"103016675 M 11124 72.00 10.00 \n",
"48038024 F 17155 71.00 7.00 "
]
},
"execution_count": 90,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalists_replied_to_by_female_summary_df = journalist_reply_summary(journalists_reply_df[journalists_reply_df.gender == 'F'])\n",
"journalists_replied_to_by_female_summary_df.to_csv('output/journalists_replied_to_by_female_journalists.csv')\n",
"journalists_replied_to_by_female_summary_df[journalist_reply_summary_fields].head(25)"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
"
\n",
" \n",
" \n",
" \n",
" F | \n",
" 7412 | \n",
" 72.1% | \n",
"
\n",
" \n",
" M | \n",
" 2864 | \n",
" 27.9% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage\n",
"F 7412 72.1%\n",
"M 2864 27.9%"
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalist_reply_gender_summary(journalists_reply_df[journalists_reply_df.gender == 'F'])\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Top 100 journalists replied to by female beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
"
\n",
" \n",
" \n",
" \n",
" F | \n",
" 75 | \n",
" 75.0% | \n",
"
\n",
" \n",
" M | \n",
" 25 | \n",
" 25.0% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage\n",
"F 75 75.0%\n",
"M 25 25.0%"
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"top_journalist_reply_gender_summary(journalists_replied_to_by_female_summary_df, replying_count_threshold=0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Beltway journalists replied to by male beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" reply_to_count | \n",
" replying_count | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 3817401 | \n",
" ericgeller | \n",
" Geller, Eric | \n",
" Politico | \n",
" M | \n",
" 58173 | \n",
" 1,926.00 | \n",
" 58.00 | \n",
"
\n",
" \n",
" 22891564 | \n",
" chrisgeidner | \n",
" Geidner, Chris | \n",
" BuzzFeed | \n",
" M | \n",
" 83316 | \n",
" 1,864.00 | \n",
" 28.00 | \n",
"
\n",
" \n",
" 118130765 | \n",
" dylanlscott | \n",
" Scott, Dylan L. | \n",
" Stat News | \n",
" M | \n",
" 20122 | \n",
" 1,013.00 | \n",
" 45.00 | \n",
"
\n",
" \n",
" 19576571 | \n",
" JaredRizzi | \n",
" Rizzi, Jared | \n",
" Sirius XM Satellite Radio | \n",
" M | \n",
" 13545 | \n",
" 726.00 | \n",
" 35.00 | \n",
"
\n",
" \n",
" 275207082 | \n",
" AlexParkerDC | \n",
" Parker, Alexander M. | \n",
" Bloomberg BNA | \n",
" M | \n",
" 3828 | \n",
" 709.00 | \n",
" 20.00 | \n",
"
\n",
" \n",
" 46557945 | \n",
" StevenTDennis | \n",
" Dennis, Steven T. | \n",
" Bloomberg News | \n",
" M | \n",
" 55762 | \n",
" 686.00 | \n",
" 61.00 | \n",
"
\n",
" \n",
" 583821006 | \n",
" jseldin | \n",
" Seldin, Jeff | \n",
" Voice of America | \n",
" M | \n",
" 5365 | \n",
" 653.00 | \n",
" 2.00 | \n",
"
\n",
" \n",
" 19847765 | \n",
" sahilkapur | \n",
" Kapur, Sahil | \n",
" Bloomberg News | \n",
" M | \n",
" 69086 | \n",
" 646.00 | \n",
" 24.00 | \n",
"
\n",
" \n",
" 44951698 | \n",
" amaxsmith | \n",
" Smith, Max | \n",
" WTOP Radio | \n",
" M | \n",
" 4726 | \n",
" 495.00 | \n",
" 4.00 | \n",
"
\n",
" \n",
" 225265639 | \n",
" ddale8 | \n",
" Dale, Daniel | \n",
" Toronto Star | \n",
" M | \n",
" 180671 | \n",
" 490.00 | \n",
" 16.00 | \n",
"
\n",
" \n",
" 398088661 | \n",
" MEPFuller | \n",
" Fuller, Matt E. | \n",
" Huffington Post | \n",
" M | \n",
" 77919 | \n",
" 456.00 | \n",
" 64.00 | \n",
"
\n",
" \n",
" 317980134 | \n",
" CraigCaplan | \n",
" Caplan, Craig | \n",
" C–SPAN | \n",
" M | \n",
" 6143 | \n",
" 388.00 | \n",
" 8.00 | \n",
"
\n",
" \n",
" 15365623 | \n",
" benjamin_oc | \n",
" O’Connell, Benjamin | \n",
" C–SPAN | \n",
" M | \n",
" 1455 | \n",
" 318.00 | \n",
" 8.00 | \n",
"
\n",
" \n",
" 21810329 | \n",
" sdonnan | \n",
" Donnan, Shawn | \n",
" Financial Times | \n",
" M | \n",
" 12311 | \n",
" 303.00 | \n",
" 6.00 | \n",
"
\n",
" \n",
" 90478926 | \n",
" MikeSacksEsq | \n",
" Sacks, Mike | \n",
" Scripps Howard News Service | \n",
" M | \n",
" 9289 | \n",
" 294.00 | \n",
" 13.00 | \n",
"
\n",
" \n",
" 227790723 | \n",
" RichardRubinDC | \n",
" Rubin, Richard | \n",
" Bloomberg News | \n",
" M | \n",
" 13015 | \n",
" 284.00 | \n",
" 33.00 | \n",
"
\n",
" \n",
" 21696279 | \n",
" brianbeutler | \n",
" Beutler, Brian Alfred | \n",
" New Republic | \n",
" M | \n",
" 74435 | \n",
" 262.00 | \n",
" 29.00 | \n",
"
\n",
" \n",
" 21252618 | \n",
" JakeSherman | \n",
" Sherman, Jacob S. | \n",
" Politico | \n",
" M | \n",
" 81762 | \n",
" 249.00 | \n",
" 52.00 | \n",
"
\n",
" \n",
" 16459325 | \n",
" ryanbeckwith | \n",
" Beckwith, Ryan Teague | \n",
" Time Magazine | \n",
" M | \n",
" 20947 | \n",
" 241.00 | \n",
" 30.00 | \n",
"
\n",
" \n",
" 11771512 | \n",
" OKnox | \n",
" Knox, Olivier | \n",
" Yahoo News | \n",
" M | \n",
" 44715 | \n",
" 240.00 | \n",
" 35.00 | \n",
"
\n",
" \n",
" 63717541 | \n",
" phillyrich1 | \n",
" Weinstein, Richard | \n",
" C–SPAN | \n",
" M | \n",
" 3827 | \n",
" 240.00 | \n",
" 3.00 | \n",
"
\n",
" \n",
" 103016675 | \n",
" AaronMehta | \n",
" Mehta, Aaron | \n",
" Sightline Media Group | \n",
" M | \n",
" 11124 | \n",
" 232.00 | \n",
" 25.00 | \n",
"
\n",
" \n",
" 26559241 | \n",
" fordm | \n",
" Ford, Matt S. | \n",
" The Atlantic | \n",
" M | \n",
" 27571 | \n",
" 232.00 | \n",
" 15.00 | \n",
"
\n",
" \n",
" 437019753 | \n",
" TimothyNoah1 | \n",
" Noah, Timothy R. | \n",
" Politico | \n",
" M | \n",
" 15090 | \n",
" 231.00 | \n",
" 12.00 | \n",
"
\n",
" \n",
" 23332846 | \n",
" mattzap | \n",
" Zapotosky, Matt | \n",
" Washington Post | \n",
" M | \n",
" 56887 | \n",
" 230.00 | \n",
" 7.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name organization \\\n",
"user_id \n",
"3817401 ericgeller Geller, Eric Politico \n",
"22891564 chrisgeidner Geidner, Chris BuzzFeed \n",
"118130765 dylanlscott Scott, Dylan L. Stat News \n",
"19576571 JaredRizzi Rizzi, Jared Sirius XM Satellite Radio \n",
"275207082 AlexParkerDC Parker, Alexander M. Bloomberg BNA \n",
"46557945 StevenTDennis Dennis, Steven T. Bloomberg News \n",
"583821006 jseldin Seldin, Jeff Voice of America \n",
"19847765 sahilkapur Kapur, Sahil Bloomberg News \n",
"44951698 amaxsmith Smith, Max WTOP Radio \n",
"225265639 ddale8 Dale, Daniel Toronto Star \n",
"398088661 MEPFuller Fuller, Matt E. Huffington Post \n",
"317980134 CraigCaplan Caplan, Craig C–SPAN \n",
"15365623 benjamin_oc O’Connell, Benjamin C–SPAN \n",
"21810329 sdonnan Donnan, Shawn Financial Times \n",
"90478926 MikeSacksEsq Sacks, Mike Scripps Howard News Service \n",
"227790723 RichardRubinDC Rubin, Richard Bloomberg News \n",
"21696279 brianbeutler Beutler, Brian Alfred New Republic \n",
"21252618 JakeSherman Sherman, Jacob S. Politico \n",
"16459325 ryanbeckwith Beckwith, Ryan Teague Time Magazine \n",
"11771512 OKnox Knox, Olivier Yahoo News \n",
"63717541 phillyrich1 Weinstein, Richard C–SPAN \n",
"103016675 AaronMehta Mehta, Aaron Sightline Media Group \n",
"26559241 fordm Ford, Matt S. The Atlantic \n",
"437019753 TimothyNoah1 Noah, Timothy R. Politico \n",
"23332846 mattzap Zapotosky, Matt Washington Post \n",
"\n",
" gender followers_count reply_to_count replying_count \n",
"user_id \n",
"3817401 M 58173 1,926.00 58.00 \n",
"22891564 M 83316 1,864.00 28.00 \n",
"118130765 M 20122 1,013.00 45.00 \n",
"19576571 M 13545 726.00 35.00 \n",
"275207082 M 3828 709.00 20.00 \n",
"46557945 M 55762 686.00 61.00 \n",
"583821006 M 5365 653.00 2.00 \n",
"19847765 M 69086 646.00 24.00 \n",
"44951698 M 4726 495.00 4.00 \n",
"225265639 M 180671 490.00 16.00 \n",
"398088661 M 77919 456.00 64.00 \n",
"317980134 M 6143 388.00 8.00 \n",
"15365623 M 1455 318.00 8.00 \n",
"21810329 M 12311 303.00 6.00 \n",
"90478926 M 9289 294.00 13.00 \n",
"227790723 M 13015 284.00 33.00 \n",
"21696279 M 74435 262.00 29.00 \n",
"21252618 M 81762 249.00 52.00 \n",
"16459325 M 20947 241.00 30.00 \n",
"11771512 M 44715 240.00 35.00 \n",
"63717541 M 3827 240.00 3.00 \n",
"103016675 M 11124 232.00 25.00 \n",
"26559241 M 27571 232.00 15.00 \n",
"437019753 M 15090 231.00 12.00 \n",
"23332846 M 56887 230.00 7.00 "
]
},
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalists_replied_to_by_male_summary_df = journalist_reply_summary(journalists_reply_df[journalists_reply_df.gender == 'M'])\n",
"journalists_replied_to_by_male_summary_df.to_csv('output/journalists_replied_to_by_male_journalists.csv')\n",
"journalists_replied_to_by_male_summary_df[journalist_reply_summary_fields].head(25)"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"screen_name 2292\n",
"name 2292\n",
"organization 2292\n",
"position 2292\n",
"gender 2292\n",
"followers_count 2292\n",
"following_count 2292\n",
"tweet_count 2292\n",
"user_created_at 2292\n",
"verified 2292\n",
"protected 2292\n",
"original 2292\n",
"quote 2292\n",
"reply 2292\n",
"retweet 2292\n",
"tweets_in_dataset 2292\n",
"reply_to_count 2292\n",
"replying_count 2292\n",
"dtype: int64"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalists_replied_to_by_male_summary_df.count()"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
"
\n",
" \n",
" \n",
" \n",
" M | \n",
" 30314 | \n",
" 91.5% | \n",
"
\n",
" \n",
" F | \n",
" 2800 | \n",
" 8.5% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage\n",
"M 30314 91.5%\n",
"F 2800 8.5%"
]
},
"execution_count": 95,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalist_reply_gender_summary(journalists_reply_df[journalists_reply_df.gender == 'M'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Top 100 journalists replied to by male beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
"
\n",
" \n",
" \n",
" \n",
" M | \n",
" 98 | \n",
" 98.0% | \n",
"
\n",
" \n",
" F | \n",
" 2 | \n",
" 2.0% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage\n",
"M 98 98.0%\n",
"F 2 2.0%"
]
},
"execution_count": 96,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"top_journalist_reply_gender_summary(journalists_replied_to_by_male_summary_df, replying_count_threshold=0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Following data prep"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load following\n",
"Users that are followed by beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"follower_user_id 3417018\n",
"followed_user_id 3417018\n",
"dtype: int64"
]
},
"execution_count": 97,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"base_follower_to_followed_df = pd.read_csv('source_data/follower_to_followed.csv', \n",
" names=['follower_user_id', 'followed_user_id'],\n",
" dtype={'follower_user_id': np.str, 'followed_user_id': np.str})\n",
"base_follower_to_followed_df.drop_duplicates(inplace=True)\n",
"base_follower_to_followed_df.count()"
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" follower_user_id | \n",
" followed_user_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 91156486 | \n",
" 3092427779 | \n",
"
\n",
" \n",
" 1 | \n",
" 91156486 | \n",
" 36953109 | \n",
"
\n",
" \n",
" 2 | \n",
" 91156486 | \n",
" 424274008 | \n",
"
\n",
" \n",
" 3 | \n",
" 91156486 | \n",
" 779044378929168384 | \n",
"
\n",
" \n",
" 4 | \n",
" 91156486 | \n",
" 339834914 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" follower_user_id followed_user_id\n",
"0 91156486 3092427779\n",
"1 91156486 36953109\n",
"2 91156486 424274008\n",
"3 91156486 779044378929168384\n",
"4 91156486 339834914"
]
},
"execution_count": 98,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"base_follower_to_followed_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" organization | \n",
" position | \n",
" gender | \n",
" followers_count | \n",
" following_count | \n",
" tweet_count | \n",
" user_created_at | \n",
" verified | \n",
" protected | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 20711445 | \n",
" Glinski, Nina | \n",
" NaN | \n",
" Freelance Reporter | \n",
" F | \n",
" 963 | \n",
" 507 | \n",
" 909 | \n",
" Thu Feb 12 20:00:53 +0000 2009 | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 258917371 | \n",
" Enders, David | \n",
" NaN | \n",
" Journalist | \n",
" M | \n",
" 1444 | \n",
" 484 | \n",
" 6296 | \n",
" Mon Feb 28 19:52:03 +0000 2011 | \n",
" True | \n",
" False | \n",
"
\n",
" \n",
" 297046834 | \n",
" Barakat, Matthew | \n",
" Associated Press | \n",
" Northern Virginia Correspondent | \n",
" M | \n",
" 759 | \n",
" 352 | \n",
" 631 | \n",
" Wed May 11 20:55:24 +0000 2011 | \n",
" True | \n",
" False | \n",
"
\n",
" \n",
" 455585786 | \n",
" Atkins, Kimberly | \n",
" Boston Herald | \n",
" Chief Washington Reporter/Columnist | \n",
" F | \n",
" 2944 | \n",
" 2691 | \n",
" 6277 | \n",
" Thu Jan 05 08:26:46 +0000 2012 | \n",
" True | \n",
" False | \n",
"
\n",
" \n",
" 42584840 | \n",
" Vlahou, Toula | \n",
" CQ Roll Call | \n",
" Editor & Podcast Producer | \n",
" F | \n",
" 2703 | \n",
" 201 | \n",
" 6366 | \n",
" Tue May 26 07:41:38 +0000 2009 | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name organization \\\n",
"user_id \n",
"20711445 Glinski, Nina NaN \n",
"258917371 Enders, David NaN \n",
"297046834 Barakat, Matthew Associated Press \n",
"455585786 Atkins, Kimberly Boston Herald \n",
"42584840 Vlahou, Toula CQ Roll Call \n",
"\n",
" position gender followers_count \\\n",
"user_id \n",
"20711445 Freelance Reporter F 963 \n",
"258917371 Journalist M 1444 \n",
"297046834 Northern Virginia Correspondent M 759 \n",
"455585786 Chief Washington Reporter/Columnist F 2944 \n",
"42584840 Editor & Podcast Producer F 2703 \n",
"\n",
" following_count tweet_count user_created_at \\\n",
"user_id \n",
"20711445 507 909 Thu Feb 12 20:00:53 +0000 2009 \n",
"258917371 484 6296 Mon Feb 28 19:52:03 +0000 2011 \n",
"297046834 352 631 Wed May 11 20:55:24 +0000 2011 \n",
"455585786 2691 6277 Thu Jan 05 08:26:46 +0000 2012 \n",
"42584840 201 6366 Tue May 26 07:41:38 +0000 2009 \n",
"\n",
" verified protected \n",
"user_id \n",
"20711445 False False \n",
"258917371 True False \n",
"297046834 True False \n",
"455585786 True False \n",
"42584840 False False "
]
},
"execution_count": 99,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_info_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"follower_user_id 3311406\n",
"followed_user_id 3311406\n",
"gender 3311406\n",
"dtype: int64"
]
},
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# This will drop followers of journalists that have no tweets\n",
"follower_to_followed_df = base_follower_to_followed_df.join(user_summary_df['gender'], on='follower_user_id', how='inner')\n",
"follower_to_followed_df.count()"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" follower_user_id | \n",
" followed_user_id | \n",
" gender | \n",
"
\n",
" \n",
" \n",
" \n",
" 261 | \n",
" 15219888 | \n",
" 3291076716 | \n",
" F | \n",
"
\n",
" \n",
" 262 | \n",
" 15219888 | \n",
" 119175339 | \n",
" F | \n",
"
\n",
" \n",
" 263 | \n",
" 15219888 | \n",
" 418837047 | \n",
" F | \n",
"
\n",
" \n",
" 264 | \n",
" 15219888 | \n",
" 259817885 | \n",
" F | \n",
"
\n",
" \n",
" 265 | \n",
" 15219888 | \n",
" 287263845 | \n",
" F | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" follower_user_id followed_user_id gender\n",
"261 15219888 3291076716 F\n",
"262 15219888 119175339 F\n",
"263 15219888 418837047 F\n",
"264 15219888 259817885 F\n",
"265 15219888 287263845 F"
]
},
"execution_count": 101,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"follower_to_followed_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load followed users"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 17665874 | \n",
" onlinehigh | \n",
"
\n",
" \n",
" 2389275799 | \n",
" HLSPOLICY | \n",
"
\n",
" \n",
" 314728983 | \n",
" Veolia_NA | \n",
"
\n",
" \n",
" 239409802 | \n",
" fishingbuk | \n",
"
\n",
" \n",
" 522799320 | \n",
" GoldsmithBev | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name\n",
"user_id \n",
"17665874 onlinehigh\n",
"2389275799 HLSPOLICY\n",
"314728983 Veolia_NA\n",
"239409802 fishingbuk\n",
"522799320 GoldsmithBev"
]
},
"execution_count": 102,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"followed_screen_name_lookup_df = pd.read_csv('source_data/followed.csv', \n",
" names=['screen_name', 'user_id'],\n",
" dtype={'user_id': np.str}).set_index(['user_id'])\n",
"followed_screen_name_lookup_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Limit to beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"follower_user_id 280340\n",
"followed_user_id 280340\n",
"gender 280340\n",
"followed_gender 280340\n",
"dtype: int64"
]
},
"execution_count": 103,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"follower_to_journalist_followed_df = follower_to_followed_df.join(user_summary_df['gender'], how='inner', on='followed_user_id', rsuffix='_followed')\n",
"follower_to_journalist_followed_df.rename(columns = {'gender_followed': 'followed_gender'}, inplace=True)\n",
"follower_to_journalist_followed_df.count()"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" follower_user_id | \n",
" followed_user_id | \n",
" gender | \n",
" followed_gender | \n",
"
\n",
" \n",
" \n",
" \n",
" 287 | \n",
" 15219888 | \n",
" 46582653 | \n",
" F | \n",
" M | \n",
"
\n",
" \n",
" 21810 | \n",
" 15780280 | \n",
" 46582653 | \n",
" M | \n",
" M | \n",
"
\n",
" \n",
" 24153 | \n",
" 14245722 | \n",
" 46582653 | \n",
" M | \n",
" M | \n",
"
\n",
" \n",
" 40694 | \n",
" 37865281 | \n",
" 46582653 | \n",
" F | \n",
" M | \n",
"
\n",
" \n",
" 66585 | \n",
" 165204211 | \n",
" 46582653 | \n",
" M | \n",
" M | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" follower_user_id followed_user_id gender followed_gender\n",
"287 15219888 46582653 F M\n",
"21810 15780280 46582653 M M\n",
"24153 14245722 46582653 M M\n",
"40694 37865281 46582653 F M\n",
"66585 165204211 46582653 M M"
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"follower_to_journalist_followed_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Functions for summarizing following by beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [],
"source": [
"# Gender of beltway journalists followed by beltway journalists\n",
"def journalist_followed_gender_summary(follower_to_followed_df):\n",
" gender_summary_df = pd.DataFrame({'count':follower_to_followed_df.followed_gender.value_counts(), \n",
" 'percentage': follower_to_followed_df.followed_gender.value_counts(normalize=True).mul(100).round(1).astype(str) + '%'})\n",
" gender_summary_df.reset_index(inplace=True)\n",
" gender_summary_df['avg_followed'] = gender_summary_df.apply(lambda row: row['count'] / user_info_df[user_info_df.gender == row['index']]['name'].count(), axis=1)\n",
" gender_summary_df.set_index('index', inplace=True, drop=True)\n",
" return gender_summary_df\n",
"\n",
"def journalist_following_summary(follower_to_followed_df):\n",
" # Following count\n",
" following_count_df = pd.DataFrame(follower_to_followed_df.followed_user_id.value_counts().rename('journalist_follower_count'))\n",
"\n",
" # Join with user summary\n",
" journalist_following_summary_df = user_summary_df.join(following_count_df)\n",
" journalist_following_summary_df.fillna(0, inplace=True)\n",
" journalist_following_summary_df = journalist_following_summary_df.sort_values(['journalist_follower_count', 'followers_count'], ascending=False)\n",
" return journalist_following_summary_df\n",
"\n",
"# Gender of top journalists followed by beltway journalists\n",
"def top_journalist_followed_gender_summary(followed_summary_df, head=100):\n",
" top_followed_summary_df = followed_summary_df.head(head)\n",
" return pd.DataFrame({'count': top_followed_summary_df.gender.value_counts(), \n",
" 'percentage': top_followed_summary_df.gender.value_counts(normalize=True).mul(100).round(1).astype(str) + '%'})\n",
"\n",
"# Fields for displaying journalist mention summaries\n",
"journalist_following_summary_fields = ['screen_name', 'name', 'organization', 'gender', 'followers_count', 'journalist_follower_count']\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Following analysis"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### All followed by beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" following_count | \n",
" screen_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 813286 | \n",
" 1671 | \n",
" BarackObama | \n",
"
\n",
" \n",
" 51241574 | \n",
" 1629 | \n",
" AP | \n",
"
\n",
" \n",
" 25073877 | \n",
" 1613 | \n",
" realDonaldTrump | \n",
"
\n",
" \n",
" 807095 | \n",
" 1581 | \n",
" nytimes | \n",
"
\n",
" \n",
" 2467791 | \n",
" 1532 | \n",
" washingtonpost | \n",
"
\n",
" \n",
" 1339835893 | \n",
" 1531 | \n",
" HillaryClinton | \n",
"
\n",
" \n",
" 818927131883356161 | \n",
" 1522 | \n",
" PressSec | \n",
"
\n",
" \n",
" 822215673812119553 | \n",
" 1507 | \n",
" WhiteHouse | \n",
"
\n",
" \n",
" 822215679726100480 | \n",
" 1488 | \n",
" POTUS | \n",
"
\n",
" \n",
" 9300262 | \n",
" 1457 | \n",
" politico | \n",
"
\n",
" \n",
" 30313925 | \n",
" 1402 | \n",
" ObamaWhiteHouse | \n",
"
\n",
" \n",
" 14246001 | \n",
" 1384 | \n",
" mikeallen | \n",
"
\n",
" \n",
" 93069110 | \n",
" 1368 | \n",
" maggieNYT | \n",
"
\n",
" \n",
" 14529929 | \n",
" 1337 | \n",
" jaketapper | \n",
"
\n",
" \n",
" 428333 | \n",
" 1289 | \n",
" cnnbrk | \n",
"
\n",
" \n",
" 1536791610 | \n",
" 1279 | \n",
" POTUS44 | \n",
"
\n",
" \n",
" 3108351 | \n",
" 1279 | \n",
" WSJ | \n",
"
\n",
" \n",
" 50325797 | \n",
" 1258 | \n",
" chucktodd | \n",
"
\n",
" \n",
" 113420831 | \n",
" 1258 | \n",
" PressSec44 | \n",
"
\n",
" \n",
" 16017475 | \n",
" 1234 | \n",
" NateSilver538 | \n",
"
\n",
" \n",
" 18622869 | \n",
" 1231 | \n",
" ezraklein | \n",
"
\n",
" \n",
" 86129724 | \n",
" 1173 | \n",
" costareports | \n",
"
\n",
" \n",
" 1652541 | \n",
" 1144 | \n",
" Reuters | \n",
"
\n",
" \n",
" 1330457336 | \n",
" 1128 | \n",
" billclinton | \n",
"
\n",
" \n",
" 5392522 | \n",
" 1124 | \n",
" NPR | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" following_count screen_name\n",
"813286 1671 BarackObama\n",
"51241574 1629 AP\n",
"25073877 1613 realDonaldTrump\n",
"807095 1581 nytimes\n",
"2467791 1532 washingtonpost\n",
"1339835893 1531 HillaryClinton\n",
"818927131883356161 1522 PressSec\n",
"822215673812119553 1507 WhiteHouse\n",
"822215679726100480 1488 POTUS\n",
"9300262 1457 politico\n",
"30313925 1402 ObamaWhiteHouse\n",
"14246001 1384 mikeallen\n",
"93069110 1368 maggieNYT\n",
"14529929 1337 jaketapper\n",
"428333 1289 cnnbrk\n",
"1536791610 1279 POTUS44\n",
"3108351 1279 WSJ\n",
"50325797 1258 chucktodd\n",
"113420831 1258 PressSec44\n",
"16017475 1234 NateSilver538\n",
"18622869 1231 ezraklein\n",
"86129724 1173 costareports\n",
"1652541 1144 Reuters\n",
"1330457336 1128 billclinton\n",
"5392522 1124 NPR"
]
},
"execution_count": 106,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Following count\n",
"all_followed_df = pd.DataFrame(follower_to_followed_df.followed_user_id.value_counts().rename('following_count')).join(followed_screen_name_lookup_df)\n",
"all_followed_df.to_csv('output/all_followed_by_journalists.csv')\n",
"all_followed_df.head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### All beltway journalists followed by beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" journalist_follower_count | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 14529929 | \n",
" jaketapper | \n",
" Tapper, Jake | \n",
" CNN | \n",
" M | \n",
" 1305680 | \n",
" 1,337.00 | \n",
"
\n",
" \n",
" 50325797 | \n",
" chucktodd | \n",
" Todd, Chuck | \n",
" NBC News | \n",
" M | \n",
" 1781247 | \n",
" 1,258.00 | \n",
"
\n",
" \n",
" 19107878 | \n",
" GlennThrush | \n",
" Thrush, Glenn H. | \n",
" New York Times | \n",
" M | \n",
" 308181 | \n",
" 1,116.00 | \n",
"
\n",
" \n",
" 31127446 | \n",
" markknoller | \n",
" Knoller, Mark | \n",
" CBS News | \n",
" M | \n",
" 301474 | \n",
" 1,107.00 | \n",
"
\n",
" \n",
" 13524182 | \n",
" daveweigel | \n",
" Weigel, David | \n",
" Washington Post | \n",
" M | \n",
" 332344 | \n",
" 1,106.00 | \n",
"
\n",
" \n",
" 61734492 | \n",
" Fahrenthold | \n",
" Fahrenthold, David | \n",
" Washington Post | \n",
" M | \n",
" 451778 | \n",
" 1,082.00 | \n",
"
\n",
" \n",
" 18678924 | \n",
" jmartNYT | \n",
" Martin, Jonathan | \n",
" New York Times | \n",
" M | \n",
" 197322 | \n",
" 1,032.00 | \n",
"
\n",
" \n",
" 39155029 | \n",
" mkraju | \n",
" Raju, Manu K. | \n",
" CNN | \n",
" M | \n",
" 88366 | \n",
" 977.00 | \n",
"
\n",
" \n",
" 16930125 | \n",
" edatpost | \n",
" O’Keefe, Edward | \n",
" Washington Post | \n",
" M | \n",
" 58670 | \n",
" 973.00 | \n",
"
\n",
" \n",
" 85131054 | \n",
" jeffzeleny | \n",
" Zeleny, Jeff | \n",
" CNN | \n",
" M | \n",
" 244114 | \n",
" 970.00 | \n",
"
\n",
" \n",
" 21316253 | \n",
" ZekeJMiller | \n",
" Miller, Zeke J. | \n",
" Time Magazine | \n",
" M | \n",
" 198517 | \n",
" 915.00 | \n",
"
\n",
" \n",
" 89820928 | \n",
" mitchellreports | \n",
" Mitchell, Andrea | \n",
" NBC News | \n",
" F | \n",
" 1388543 | \n",
" 909.00 | \n",
"
\n",
" \n",
" 59676104 | \n",
" danbalz | \n",
" Balz, Daniel | \n",
" Washington Post | \n",
" M | \n",
" 90819 | \n",
" 892.00 | \n",
"
\n",
" \n",
" 108617810 | \n",
" DanaBashCNN | \n",
" Bash, Dana | \n",
" CNN | \n",
" F | \n",
" 281861 | \n",
" 884.00 | \n",
"
\n",
" \n",
" 15463671 | \n",
" samstein | \n",
" Stein, Sam | \n",
" Huffington Post | \n",
" M | \n",
" 313211 | \n",
" 880.00 | \n",
"
\n",
" \n",
" 130945778 | \n",
" mollyesque | \n",
" Ball, Molly | \n",
" The Atlantic | \n",
" F | \n",
" 116857 | \n",
" 877.00 | \n",
"
\n",
" \n",
" 46176168 | \n",
" MajorCBS | \n",
" Garrett, Major | \n",
" CBS News | \n",
" M | \n",
" 178640 | \n",
" 872.00 | \n",
"
\n",
" \n",
" 21252618 | \n",
" JakeSherman | \n",
" Sherman, Jacob S. | \n",
" Politico | \n",
" M | \n",
" 81762 | \n",
" 868.00 | \n",
"
\n",
" \n",
" 16187637 | \n",
" ChadPergram | \n",
" Pergram, Chad | \n",
" Fox News | \n",
" M | \n",
" 59305 | \n",
" 866.00 | \n",
"
\n",
" \n",
" 22771961 | \n",
" Acosta | \n",
" Acosta, Jim | \n",
" CNN | \n",
" M | \n",
" 350650 | \n",
" 860.00 | \n",
"
\n",
" \n",
" 12354832 | \n",
" kasie | \n",
" Hunt, Kasie | \n",
" NBC News | \n",
" F | \n",
" 187357 | \n",
" 860.00 | \n",
"
\n",
" \n",
" 123327472 | \n",
" peterbakernyt | \n",
" Baker, Peter | \n",
" New York Times | \n",
" M | \n",
" 96956 | \n",
" 856.00 | \n",
"
\n",
" \n",
" 15931637 | \n",
" jonkarl | \n",
" Karl, Jonathan | \n",
" ABC News | \n",
" M | \n",
" 183467 | \n",
" 830.00 | \n",
"
\n",
" \n",
" 11771512 | \n",
" OKnox | \n",
" Knox, Olivier | \n",
" Yahoo News | \n",
" M | \n",
" 44715 | \n",
" 788.00 | \n",
"
\n",
" \n",
" 259395895 | \n",
" JohnJHarwood | \n",
" Harwood, John | \n",
" CNBC | \n",
" M | \n",
" 149040 | \n",
" 783.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name organization gender \\\n",
"user_id \n",
"14529929 jaketapper Tapper, Jake CNN M \n",
"50325797 chucktodd Todd, Chuck NBC News M \n",
"19107878 GlennThrush Thrush, Glenn H. New York Times M \n",
"31127446 markknoller Knoller, Mark CBS News M \n",
"13524182 daveweigel Weigel, David Washington Post M \n",
"61734492 Fahrenthold Fahrenthold, David Washington Post M \n",
"18678924 jmartNYT Martin, Jonathan New York Times M \n",
"39155029 mkraju Raju, Manu K. CNN M \n",
"16930125 edatpost O’Keefe, Edward Washington Post M \n",
"85131054 jeffzeleny Zeleny, Jeff CNN M \n",
"21316253 ZekeJMiller Miller, Zeke J. Time Magazine M \n",
"89820928 mitchellreports Mitchell, Andrea NBC News F \n",
"59676104 danbalz Balz, Daniel Washington Post M \n",
"108617810 DanaBashCNN Bash, Dana CNN F \n",
"15463671 samstein Stein, Sam Huffington Post M \n",
"130945778 mollyesque Ball, Molly The Atlantic F \n",
"46176168 MajorCBS Garrett, Major CBS News M \n",
"21252618 JakeSherman Sherman, Jacob S. Politico M \n",
"16187637 ChadPergram Pergram, Chad Fox News M \n",
"22771961 Acosta Acosta, Jim CNN M \n",
"12354832 kasie Hunt, Kasie NBC News F \n",
"123327472 peterbakernyt Baker, Peter New York Times M \n",
"15931637 jonkarl Karl, Jonathan ABC News M \n",
"11771512 OKnox Knox, Olivier Yahoo News M \n",
"259395895 JohnJHarwood Harwood, John CNBC M \n",
"\n",
" followers_count journalist_follower_count \n",
"user_id \n",
"14529929 1305680 1,337.00 \n",
"50325797 1781247 1,258.00 \n",
"19107878 308181 1,116.00 \n",
"31127446 301474 1,107.00 \n",
"13524182 332344 1,106.00 \n",
"61734492 451778 1,082.00 \n",
"18678924 197322 1,032.00 \n",
"39155029 88366 977.00 \n",
"16930125 58670 973.00 \n",
"85131054 244114 970.00 \n",
"21316253 198517 915.00 \n",
"89820928 1388543 909.00 \n",
"59676104 90819 892.00 \n",
"108617810 281861 884.00 \n",
"15463671 313211 880.00 \n",
"130945778 116857 877.00 \n",
"46176168 178640 872.00 \n",
"21252618 81762 868.00 \n",
"16187637 59305 866.00 \n",
"22771961 350650 860.00 \n",
"12354832 187357 860.00 \n",
"123327472 96956 856.00 \n",
"15931637 183467 830.00 \n",
"11771512 44715 788.00 \n",
"259395895 149040 783.00 "
]
},
"execution_count": 107,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"follower_to_journalist_followed_summary_df = journalist_following_summary(follower_to_journalist_followed_df)\n",
"follower_to_journalist_followed_summary_df.to_csv('output/journalists_followed_by_journalists.csv')\n",
"follower_to_journalist_followed_summary_df[journalist_following_summary_fields].head(25)"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
" avg_followed | \n",
"
\n",
" \n",
" index | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" M | \n",
" 174283 | \n",
" 62.2% | \n",
" 124.04 | \n",
"
\n",
" \n",
" F | \n",
" 106057 | \n",
" 37.8% | \n",
" 96.42 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage avg_followed\n",
"index \n",
"M 174283 62.2% 124.04\n",
"F 106057 37.8% 96.42"
]
},
"execution_count": 108,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalist_followed_gender_summary(follower_to_journalist_followed_df)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Stats of beltway journalists followed by beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" journalist_follower_count | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 2,292.00 | \n",
"
\n",
" \n",
" mean | \n",
" 122.31 | \n",
"
\n",
" \n",
" std | \n",
" 161.53 | \n",
"
\n",
" \n",
" min | \n",
" 0.00 | \n",
"
\n",
" \n",
" 25% | \n",
" 26.00 | \n",
"
\n",
" \n",
" 50% | \n",
" 64.00 | \n",
"
\n",
" \n",
" 75% | \n",
" 145.00 | \n",
"
\n",
" \n",
" max | \n",
" 1,337.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" journalist_follower_count\n",
"count 2,292.00\n",
"mean 122.31\n",
"std 161.53\n",
"min 0.00\n",
"25% 26.00\n",
"50% 64.00\n",
"75% 145.00\n",
"max 1,337.00"
]
},
"execution_count": 109,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"follower_to_journalist_followed_summary_df[['journalist_follower_count']].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Top 100 journalists followed by beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
"
\n",
" \n",
" \n",
" \n",
" M | \n",
" 76 | \n",
" 76.0% | \n",
"
\n",
" \n",
" F | \n",
" 24 | \n",
" 24.0% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage\n",
"M 76 76.0%\n",
"F 24 24.0%"
]
},
"execution_count": 110,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"top_journalist_followed_gender_summary(follower_to_journalist_followed_summary_df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Female beltway journalists followed by beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" journalist_follower_count | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 89820928 | \n",
" mitchellreports | \n",
" Mitchell, Andrea | \n",
" NBC News | \n",
" F | \n",
" 1388543 | \n",
" 909.00 | \n",
"
\n",
" \n",
" 108617810 | \n",
" DanaBashCNN | \n",
" Bash, Dana | \n",
" CNN | \n",
" F | \n",
" 281861 | \n",
" 884.00 | \n",
"
\n",
" \n",
" 130945778 | \n",
" mollyesque | \n",
" Ball, Molly | \n",
" The Atlantic | \n",
" F | \n",
" 116857 | \n",
" 877.00 | \n",
"
\n",
" \n",
" 12354832 | \n",
" kasie | \n",
" Hunt, Kasie | \n",
" NBC News | \n",
" F | \n",
" 187357 | \n",
" 860.00 | \n",
"
\n",
" \n",
" 33919343 | \n",
" AshleyRParker | \n",
" Parker, Ashley | \n",
" Washington Post | \n",
" F | \n",
" 122382 | \n",
" 777.00 | \n",
"
\n",
" \n",
" 28181835 | \n",
" jpaceDC | \n",
" Pace, Julie | \n",
" Associated Press | \n",
" F | \n",
" 46017 | \n",
" 738.00 | \n",
"
\n",
" \n",
" 70511174 | \n",
" Hadas_Gold | \n",
" Gold, Hadas | \n",
" Politico | \n",
" F | \n",
" 45221 | \n",
" 679.00 | \n",
"
\n",
" \n",
" 21307076 | \n",
" SusanPage | \n",
" Page, Susan | \n",
" USA Today | \n",
" F | \n",
" 48675 | \n",
" 670.00 | \n",
"
\n",
" \n",
" 19186003 | \n",
" seungminkim | \n",
" Kim, Seung Min | \n",
" Politico | \n",
" F | \n",
" 33980 | \n",
" 664.00 | \n",
"
\n",
" \n",
" 45399148 | \n",
" jeneps | \n",
" Epstein, Jennifer | \n",
" Bloomberg News | \n",
" F | \n",
" 61242 | \n",
" 631.00 | \n",
"
\n",
" \n",
" 224320485 | \n",
" KellyO | \n",
" O’Donnell, Kelly | \n",
" NBC News | \n",
" F | \n",
" 148476 | \n",
" 630.00 | \n",
"
\n",
" \n",
" 20776497 | \n",
" BFischerMartin | \n",
" Fischer Martin, Betsy | \n",
" Bloomberg News | \n",
" F | \n",
" 50890 | \n",
" 609.00 | \n",
"
\n",
" \n",
" 77032777 | \n",
" apalmerdc | \n",
" Palmer, Anna A. | \n",
" Politico | \n",
" F | \n",
" 30523 | \n",
" 591.00 | \n",
"
\n",
" \n",
" 116341480 | \n",
" RosieGray | \n",
" Gray, Rosie | \n",
" The Atlantic | \n",
" F | \n",
" 96935 | \n",
" 589.00 | \n",
"
\n",
" \n",
" 237477771 | \n",
" juliehdavis | \n",
" Davis, Julie | \n",
" New York Times | \n",
" F | \n",
" 49821 | \n",
" 570.00 | \n",
"
\n",
" \n",
" 58869089 | \n",
" margarettalev | \n",
" Talev, Margaret | \n",
" Bloomberg News | \n",
" F | \n",
" 19588 | \n",
" 569.00 | \n",
"
\n",
" \n",
" 14870670 | \n",
" KateNocera | \n",
" Nocera, Kate | \n",
" BuzzFeed | \n",
" F | \n",
" 27714 | \n",
" 567.00 | \n",
"
\n",
" \n",
" 46817943 | \n",
" brikeilarcnn | \n",
" Keilar, Brianna | \n",
" CNN | \n",
" F | \n",
" 105276 | \n",
" 557.00 | \n",
"
\n",
" \n",
" 22772264 | \n",
" carolelee | \n",
" Lee, Carol | \n",
" Wall Street Journal / Dow Jones | \n",
" F | \n",
" 31840 | \n",
" 552.00 | \n",
"
\n",
" \n",
" 15159913 | \n",
" JFKucinich | \n",
" Kucinich, Jacqueline | \n",
" Daily Beast | \n",
" F | \n",
" 31210 | \n",
" 549.00 | \n",
"
\n",
" \n",
" 297532865 | \n",
" kwelkernbc | \n",
" Welker, Kristen | \n",
" NBC News | \n",
" F | \n",
" 99234 | \n",
" 537.00 | \n",
"
\n",
" \n",
" 15727317 | \n",
" aterkel | \n",
" Terkel, Amanda | \n",
" Huffington Post | \n",
" F | \n",
" 78736 | \n",
" 527.00 | \n",
"
\n",
" \n",
" 17881467 | \n",
" rebeccagberg | \n",
" Berg, Rebecca | \n",
" RealClearPolitics | \n",
" F | \n",
" 48798 | \n",
" 516.00 | \n",
"
\n",
" \n",
" 151444950 | \n",
" DaviSusan | \n",
" Davis, Susan | \n",
" National Public Radio | \n",
" F | \n",
" 27297 | \n",
" 506.00 | \n",
"
\n",
" \n",
" 27055034 | \n",
" SabrinaSiddiqui | \n",
" Siddiqui, Sabrina | \n",
" Guardian US | \n",
" F | \n",
" 53835 | \n",
" 474.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name \\\n",
"user_id \n",
"89820928 mitchellreports Mitchell, Andrea \n",
"108617810 DanaBashCNN Bash, Dana \n",
"130945778 mollyesque Ball, Molly \n",
"12354832 kasie Hunt, Kasie \n",
"33919343 AshleyRParker Parker, Ashley \n",
"28181835 jpaceDC Pace, Julie \n",
"70511174 Hadas_Gold Gold, Hadas \n",
"21307076 SusanPage Page, Susan \n",
"19186003 seungminkim Kim, Seung Min \n",
"45399148 jeneps Epstein, Jennifer \n",
"224320485 KellyO O’Donnell, Kelly \n",
"20776497 BFischerMartin Fischer Martin, Betsy \n",
"77032777 apalmerdc Palmer, Anna A. \n",
"116341480 RosieGray Gray, Rosie \n",
"237477771 juliehdavis Davis, Julie \n",
"58869089 margarettalev Talev, Margaret \n",
"14870670 KateNocera Nocera, Kate \n",
"46817943 brikeilarcnn Keilar, Brianna \n",
"22772264 carolelee Lee, Carol \n",
"15159913 JFKucinich Kucinich, Jacqueline \n",
"297532865 kwelkernbc Welker, Kristen \n",
"15727317 aterkel Terkel, Amanda \n",
"17881467 rebeccagberg Berg, Rebecca \n",
"151444950 DaviSusan Davis, Susan \n",
"27055034 SabrinaSiddiqui Siddiqui, Sabrina \n",
"\n",
" organization gender followers_count \\\n",
"user_id \n",
"89820928 NBC News F 1388543 \n",
"108617810 CNN F 281861 \n",
"130945778 The Atlantic F 116857 \n",
"12354832 NBC News F 187357 \n",
"33919343 Washington Post F 122382 \n",
"28181835 Associated Press F 46017 \n",
"70511174 Politico F 45221 \n",
"21307076 USA Today F 48675 \n",
"19186003 Politico F 33980 \n",
"45399148 Bloomberg News F 61242 \n",
"224320485 NBC News F 148476 \n",
"20776497 Bloomberg News F 50890 \n",
"77032777 Politico F 30523 \n",
"116341480 The Atlantic F 96935 \n",
"237477771 New York Times F 49821 \n",
"58869089 Bloomberg News F 19588 \n",
"14870670 BuzzFeed F 27714 \n",
"46817943 CNN F 105276 \n",
"22772264 Wall Street Journal / Dow Jones F 31840 \n",
"15159913 Daily Beast F 31210 \n",
"297532865 NBC News F 99234 \n",
"15727317 Huffington Post F 78736 \n",
"17881467 RealClearPolitics F 48798 \n",
"151444950 National Public Radio F 27297 \n",
"27055034 Guardian US F 53835 \n",
"\n",
" journalist_follower_count \n",
"user_id \n",
"89820928 909.00 \n",
"108617810 884.00 \n",
"130945778 877.00 \n",
"12354832 860.00 \n",
"33919343 777.00 \n",
"28181835 738.00 \n",
"70511174 679.00 \n",
"21307076 670.00 \n",
"19186003 664.00 \n",
"45399148 631.00 \n",
"224320485 630.00 \n",
"20776497 609.00 \n",
"77032777 591.00 \n",
"116341480 589.00 \n",
"237477771 570.00 \n",
"58869089 569.00 \n",
"14870670 567.00 \n",
"46817943 557.00 \n",
"22772264 552.00 \n",
"15159913 549.00 \n",
"297532865 537.00 \n",
"15727317 527.00 \n",
"17881467 516.00 \n",
"151444950 506.00 \n",
"27055034 474.00 "
]
},
"execution_count": 111,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"follower_to_female_journalist_followed_df = follower_to_journalist_followed_summary_df[follower_to_journalist_followed_summary_df.gender == 'F']\n",
"follower_to_female_journalist_followed_df.to_csv('output/female_journalists_followed_by_journalists.csv')\n",
"follower_to_female_journalist_followed_df[journalist_following_summary_fields].head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Stats of female beltway journalists followed by beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" journalist_follower_count | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 993.00 | \n",
"
\n",
" \n",
" mean | \n",
" 106.80 | \n",
"
\n",
" \n",
" std | \n",
" 131.81 | \n",
"
\n",
" \n",
" min | \n",
" 0.00 | \n",
"
\n",
" \n",
" 25% | \n",
" 24.00 | \n",
"
\n",
" \n",
" 50% | \n",
" 59.00 | \n",
"
\n",
" \n",
" 75% | \n",
" 131.00 | \n",
"
\n",
" \n",
" max | \n",
" 909.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" journalist_follower_count\n",
"count 993.00\n",
"mean 106.80\n",
"std 131.81\n",
"min 0.00\n",
"25% 24.00\n",
"50% 59.00\n",
"75% 131.00\n",
"max 909.00"
]
},
"execution_count": 112,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"follower_to_female_journalist_followed_df[['journalist_follower_count']].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Male beltway journalists followed by beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" journalist_follower_count | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 14529929 | \n",
" jaketapper | \n",
" Tapper, Jake | \n",
" CNN | \n",
" M | \n",
" 1305680 | \n",
" 1,337.00 | \n",
"
\n",
" \n",
" 50325797 | \n",
" chucktodd | \n",
" Todd, Chuck | \n",
" NBC News | \n",
" M | \n",
" 1781247 | \n",
" 1,258.00 | \n",
"
\n",
" \n",
" 19107878 | \n",
" GlennThrush | \n",
" Thrush, Glenn H. | \n",
" New York Times | \n",
" M | \n",
" 308181 | \n",
" 1,116.00 | \n",
"
\n",
" \n",
" 31127446 | \n",
" markknoller | \n",
" Knoller, Mark | \n",
" CBS News | \n",
" M | \n",
" 301474 | \n",
" 1,107.00 | \n",
"
\n",
" \n",
" 13524182 | \n",
" daveweigel | \n",
" Weigel, David | \n",
" Washington Post | \n",
" M | \n",
" 332344 | \n",
" 1,106.00 | \n",
"
\n",
" \n",
" 61734492 | \n",
" Fahrenthold | \n",
" Fahrenthold, David | \n",
" Washington Post | \n",
" M | \n",
" 451778 | \n",
" 1,082.00 | \n",
"
\n",
" \n",
" 18678924 | \n",
" jmartNYT | \n",
" Martin, Jonathan | \n",
" New York Times | \n",
" M | \n",
" 197322 | \n",
" 1,032.00 | \n",
"
\n",
" \n",
" 39155029 | \n",
" mkraju | \n",
" Raju, Manu K. | \n",
" CNN | \n",
" M | \n",
" 88366 | \n",
" 977.00 | \n",
"
\n",
" \n",
" 16930125 | \n",
" edatpost | \n",
" O’Keefe, Edward | \n",
" Washington Post | \n",
" M | \n",
" 58670 | \n",
" 973.00 | \n",
"
\n",
" \n",
" 85131054 | \n",
" jeffzeleny | \n",
" Zeleny, Jeff | \n",
" CNN | \n",
" M | \n",
" 244114 | \n",
" 970.00 | \n",
"
\n",
" \n",
" 21316253 | \n",
" ZekeJMiller | \n",
" Miller, Zeke J. | \n",
" Time Magazine | \n",
" M | \n",
" 198517 | \n",
" 915.00 | \n",
"
\n",
" \n",
" 59676104 | \n",
" danbalz | \n",
" Balz, Daniel | \n",
" Washington Post | \n",
" M | \n",
" 90819 | \n",
" 892.00 | \n",
"
\n",
" \n",
" 15463671 | \n",
" samstein | \n",
" Stein, Sam | \n",
" Huffington Post | \n",
" M | \n",
" 313211 | \n",
" 880.00 | \n",
"
\n",
" \n",
" 46176168 | \n",
" MajorCBS | \n",
" Garrett, Major | \n",
" CBS News | \n",
" M | \n",
" 178640 | \n",
" 872.00 | \n",
"
\n",
" \n",
" 21252618 | \n",
" JakeSherman | \n",
" Sherman, Jacob S. | \n",
" Politico | \n",
" M | \n",
" 81762 | \n",
" 868.00 | \n",
"
\n",
" \n",
" 16187637 | \n",
" ChadPergram | \n",
" Pergram, Chad | \n",
" Fox News | \n",
" M | \n",
" 59305 | \n",
" 866.00 | \n",
"
\n",
" \n",
" 22771961 | \n",
" Acosta | \n",
" Acosta, Jim | \n",
" CNN | \n",
" M | \n",
" 350650 | \n",
" 860.00 | \n",
"
\n",
" \n",
" 123327472 | \n",
" peterbakernyt | \n",
" Baker, Peter | \n",
" New York Times | \n",
" M | \n",
" 96956 | \n",
" 856.00 | \n",
"
\n",
" \n",
" 15931637 | \n",
" jonkarl | \n",
" Karl, Jonathan | \n",
" ABC News | \n",
" M | \n",
" 183467 | \n",
" 830.00 | \n",
"
\n",
" \n",
" 11771512 | \n",
" OKnox | \n",
" Knox, Olivier | \n",
" Yahoo News | \n",
" M | \n",
" 44715 | \n",
" 788.00 | \n",
"
\n",
" \n",
" 259395895 | \n",
" JohnJHarwood | \n",
" Harwood, John | \n",
" CNBC | \n",
" M | \n",
" 149040 | \n",
" 783.00 | \n",
"
\n",
" \n",
" 46557945 | \n",
" StevenTDennis | \n",
" Dennis, Steven T. | \n",
" Bloomberg News | \n",
" M | \n",
" 55762 | \n",
" 781.00 | \n",
"
\n",
" \n",
" 18172905 | \n",
" rickklein | \n",
" Klein, Richard | \n",
" ABC News | \n",
" M | \n",
" 109170 | \n",
" 737.00 | \n",
"
\n",
" \n",
" 21768766 | \n",
" jonathanweisman | \n",
" Weisman, Jonathan | \n",
" New York Times | \n",
" M | \n",
" 57549 | \n",
" 728.00 | \n",
"
\n",
" \n",
" 997684836 | \n",
" pkcapitol | \n",
" Kane, Paul | \n",
" Washington Post | \n",
" M | \n",
" 31300 | \n",
" 728.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name organization gender \\\n",
"user_id \n",
"14529929 jaketapper Tapper, Jake CNN M \n",
"50325797 chucktodd Todd, Chuck NBC News M \n",
"19107878 GlennThrush Thrush, Glenn H. New York Times M \n",
"31127446 markknoller Knoller, Mark CBS News M \n",
"13524182 daveweigel Weigel, David Washington Post M \n",
"61734492 Fahrenthold Fahrenthold, David Washington Post M \n",
"18678924 jmartNYT Martin, Jonathan New York Times M \n",
"39155029 mkraju Raju, Manu K. CNN M \n",
"16930125 edatpost O’Keefe, Edward Washington Post M \n",
"85131054 jeffzeleny Zeleny, Jeff CNN M \n",
"21316253 ZekeJMiller Miller, Zeke J. Time Magazine M \n",
"59676104 danbalz Balz, Daniel Washington Post M \n",
"15463671 samstein Stein, Sam Huffington Post M \n",
"46176168 MajorCBS Garrett, Major CBS News M \n",
"21252618 JakeSherman Sherman, Jacob S. Politico M \n",
"16187637 ChadPergram Pergram, Chad Fox News M \n",
"22771961 Acosta Acosta, Jim CNN M \n",
"123327472 peterbakernyt Baker, Peter New York Times M \n",
"15931637 jonkarl Karl, Jonathan ABC News M \n",
"11771512 OKnox Knox, Olivier Yahoo News M \n",
"259395895 JohnJHarwood Harwood, John CNBC M \n",
"46557945 StevenTDennis Dennis, Steven T. Bloomberg News M \n",
"18172905 rickklein Klein, Richard ABC News M \n",
"21768766 jonathanweisman Weisman, Jonathan New York Times M \n",
"997684836 pkcapitol Kane, Paul Washington Post M \n",
"\n",
" followers_count journalist_follower_count \n",
"user_id \n",
"14529929 1305680 1,337.00 \n",
"50325797 1781247 1,258.00 \n",
"19107878 308181 1,116.00 \n",
"31127446 301474 1,107.00 \n",
"13524182 332344 1,106.00 \n",
"61734492 451778 1,082.00 \n",
"18678924 197322 1,032.00 \n",
"39155029 88366 977.00 \n",
"16930125 58670 973.00 \n",
"85131054 244114 970.00 \n",
"21316253 198517 915.00 \n",
"59676104 90819 892.00 \n",
"15463671 313211 880.00 \n",
"46176168 178640 872.00 \n",
"21252618 81762 868.00 \n",
"16187637 59305 866.00 \n",
"22771961 350650 860.00 \n",
"123327472 96956 856.00 \n",
"15931637 183467 830.00 \n",
"11771512 44715 788.00 \n",
"259395895 149040 783.00 \n",
"46557945 55762 781.00 \n",
"18172905 109170 737.00 \n",
"21768766 57549 728.00 \n",
"997684836 31300 728.00 "
]
},
"execution_count": 113,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"follower_to_male_journalist_followed_df = follower_to_journalist_followed_summary_df[follower_to_journalist_followed_summary_df.gender == 'M']\n",
"follower_to_male_journalist_followed_df.to_csv('output/male_journalists_followed_by_journalists.csv')\n",
"follower_to_male_journalist_followed_df[journalist_following_summary_fields].head(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Stats of male beltway journalists followed by beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" journalist_follower_count | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 1,299.00 | \n",
"
\n",
" \n",
" mean | \n",
" 134.17 | \n",
"
\n",
" \n",
" std | \n",
" 180.14 | \n",
"
\n",
" \n",
" min | \n",
" 0.00 | \n",
"
\n",
" \n",
" 25% | \n",
" 28.00 | \n",
"
\n",
" \n",
" 50% | \n",
" 67.00 | \n",
"
\n",
" \n",
" 75% | \n",
" 156.00 | \n",
"
\n",
" \n",
" max | \n",
" 1,337.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" journalist_follower_count\n",
"count 1,299.00\n",
"mean 134.17\n",
"std 180.14\n",
"min 0.00\n",
"25% 28.00\n",
"50% 67.00\n",
"75% 156.00\n",
"max 1,337.00"
]
},
"execution_count": 114,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"follower_to_male_journalist_followed_df[['journalist_follower_count']].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Beltway journalists followed by female beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" journalist_follower_count | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 14529929 | \n",
" jaketapper | \n",
" Tapper, Jake | \n",
" CNN | \n",
" M | \n",
" 1305680 | \n",
" 619.00 | \n",
"
\n",
" \n",
" 50325797 | \n",
" chucktodd | \n",
" Todd, Chuck | \n",
" NBC News | \n",
" M | \n",
" 1781247 | \n",
" 569.00 | \n",
"
\n",
" \n",
" 31127446 | \n",
" markknoller | \n",
" Knoller, Mark | \n",
" CBS News | \n",
" M | \n",
" 301474 | \n",
" 505.00 | \n",
"
\n",
" \n",
" 19107878 | \n",
" GlennThrush | \n",
" Thrush, Glenn H. | \n",
" New York Times | \n",
" M | \n",
" 308181 | \n",
" 490.00 | \n",
"
\n",
" \n",
" 13524182 | \n",
" daveweigel | \n",
" Weigel, David | \n",
" Washington Post | \n",
" M | \n",
" 332344 | \n",
" 484.00 | \n",
"
\n",
" \n",
" 61734492 | \n",
" Fahrenthold | \n",
" Fahrenthold, David | \n",
" Washington Post | \n",
" M | \n",
" 451778 | \n",
" 474.00 | \n",
"
\n",
" \n",
" 18678924 | \n",
" jmartNYT | \n",
" Martin, Jonathan | \n",
" New York Times | \n",
" M | \n",
" 197322 | \n",
" 445.00 | \n",
"
\n",
" \n",
" 16930125 | \n",
" edatpost | \n",
" O’Keefe, Edward | \n",
" Washington Post | \n",
" M | \n",
" 58670 | \n",
" 444.00 | \n",
"
\n",
" \n",
" 89820928 | \n",
" mitchellreports | \n",
" Mitchell, Andrea | \n",
" NBC News | \n",
" F | \n",
" 1388543 | \n",
" 441.00 | \n",
"
\n",
" \n",
" 85131054 | \n",
" jeffzeleny | \n",
" Zeleny, Jeff | \n",
" CNN | \n",
" M | \n",
" 244114 | \n",
" 435.00 | \n",
"
\n",
" \n",
" 39155029 | \n",
" mkraju | \n",
" Raju, Manu K. | \n",
" CNN | \n",
" M | \n",
" 88366 | \n",
" 434.00 | \n",
"
\n",
" \n",
" 108617810 | \n",
" DanaBashCNN | \n",
" Bash, Dana | \n",
" CNN | \n",
" F | \n",
" 281861 | \n",
" 430.00 | \n",
"
\n",
" \n",
" 21316253 | \n",
" ZekeJMiller | \n",
" Miller, Zeke J. | \n",
" Time Magazine | \n",
" M | \n",
" 198517 | \n",
" 420.00 | \n",
"
\n",
" \n",
" 22771961 | \n",
" Acosta | \n",
" Acosta, Jim | \n",
" CNN | \n",
" M | \n",
" 350650 | \n",
" 402.00 | \n",
"
\n",
" \n",
" 15463671 | \n",
" samstein | \n",
" Stein, Sam | \n",
" Huffington Post | \n",
" M | \n",
" 313211 | \n",
" 398.00 | \n",
"
\n",
" \n",
" 16187637 | \n",
" ChadPergram | \n",
" Pergram, Chad | \n",
" Fox News | \n",
" M | \n",
" 59305 | \n",
" 397.00 | \n",
"
\n",
" \n",
" 21252618 | \n",
" JakeSherman | \n",
" Sherman, Jacob S. | \n",
" Politico | \n",
" M | \n",
" 81762 | \n",
" 394.00 | \n",
"
\n",
" \n",
" 46176168 | \n",
" MajorCBS | \n",
" Garrett, Major | \n",
" CBS News | \n",
" M | \n",
" 178640 | \n",
" 390.00 | \n",
"
\n",
" \n",
" 15931637 | \n",
" jonkarl | \n",
" Karl, Jonathan | \n",
" ABC News | \n",
" M | \n",
" 183467 | \n",
" 389.00 | \n",
"
\n",
" \n",
" 130945778 | \n",
" mollyesque | \n",
" Ball, Molly | \n",
" The Atlantic | \n",
" F | \n",
" 116857 | \n",
" 386.00 | \n",
"
\n",
" \n",
" 59676104 | \n",
" danbalz | \n",
" Balz, Daniel | \n",
" Washington Post | \n",
" M | \n",
" 90819 | \n",
" 382.00 | \n",
"
\n",
" \n",
" 123327472 | \n",
" peterbakernyt | \n",
" Baker, Peter | \n",
" New York Times | \n",
" M | \n",
" 96956 | \n",
" 379.00 | \n",
"
\n",
" \n",
" 12354832 | \n",
" kasie | \n",
" Hunt, Kasie | \n",
" NBC News | \n",
" F | \n",
" 187357 | \n",
" 366.00 | \n",
"
\n",
" \n",
" 11771512 | \n",
" OKnox | \n",
" Knox, Olivier | \n",
" Yahoo News | \n",
" M | \n",
" 44715 | \n",
" 354.00 | \n",
"
\n",
" \n",
" 33919343 | \n",
" AshleyRParker | \n",
" Parker, Ashley | \n",
" Washington Post | \n",
" F | \n",
" 122382 | \n",
" 339.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name organization gender \\\n",
"user_id \n",
"14529929 jaketapper Tapper, Jake CNN M \n",
"50325797 chucktodd Todd, Chuck NBC News M \n",
"31127446 markknoller Knoller, Mark CBS News M \n",
"19107878 GlennThrush Thrush, Glenn H. New York Times M \n",
"13524182 daveweigel Weigel, David Washington Post M \n",
"61734492 Fahrenthold Fahrenthold, David Washington Post M \n",
"18678924 jmartNYT Martin, Jonathan New York Times M \n",
"16930125 edatpost O’Keefe, Edward Washington Post M \n",
"89820928 mitchellreports Mitchell, Andrea NBC News F \n",
"85131054 jeffzeleny Zeleny, Jeff CNN M \n",
"39155029 mkraju Raju, Manu K. CNN M \n",
"108617810 DanaBashCNN Bash, Dana CNN F \n",
"21316253 ZekeJMiller Miller, Zeke J. Time Magazine M \n",
"22771961 Acosta Acosta, Jim CNN M \n",
"15463671 samstein Stein, Sam Huffington Post M \n",
"16187637 ChadPergram Pergram, Chad Fox News M \n",
"21252618 JakeSherman Sherman, Jacob S. Politico M \n",
"46176168 MajorCBS Garrett, Major CBS News M \n",
"15931637 jonkarl Karl, Jonathan ABC News M \n",
"130945778 mollyesque Ball, Molly The Atlantic F \n",
"59676104 danbalz Balz, Daniel Washington Post M \n",
"123327472 peterbakernyt Baker, Peter New York Times M \n",
"12354832 kasie Hunt, Kasie NBC News F \n",
"11771512 OKnox Knox, Olivier Yahoo News M \n",
"33919343 AshleyRParker Parker, Ashley Washington Post F \n",
"\n",
" followers_count journalist_follower_count \n",
"user_id \n",
"14529929 1305680 619.00 \n",
"50325797 1781247 569.00 \n",
"31127446 301474 505.00 \n",
"19107878 308181 490.00 \n",
"13524182 332344 484.00 \n",
"61734492 451778 474.00 \n",
"18678924 197322 445.00 \n",
"16930125 58670 444.00 \n",
"89820928 1388543 441.00 \n",
"85131054 244114 435.00 \n",
"39155029 88366 434.00 \n",
"108617810 281861 430.00 \n",
"21316253 198517 420.00 \n",
"22771961 350650 402.00 \n",
"15463671 313211 398.00 \n",
"16187637 59305 397.00 \n",
"21252618 81762 394.00 \n",
"46176168 178640 390.00 \n",
"15931637 183467 389.00 \n",
"130945778 116857 386.00 \n",
"59676104 90819 382.00 \n",
"123327472 96956 379.00 \n",
"12354832 187357 366.00 \n",
"11771512 44715 354.00 \n",
"33919343 122382 339.00 "
]
},
"execution_count": 115,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"female_follower_to_journalist_followed_df = journalist_following_summary(follower_to_journalist_followed_df[follower_to_journalist_followed_df.gender == 'F'])\n",
"female_follower_to_journalist_followed_df.to_csv('output/journalists_followed_by_female_journalists.csv')\n",
"female_follower_to_journalist_followed_df[journalist_following_summary_fields].head(25)"
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
" avg_followed | \n",
"
\n",
" \n",
" index | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" M | \n",
" 73950 | \n",
" 62.0% | \n",
" 52.63 | \n",
"
\n",
" \n",
" F | \n",
" 45300 | \n",
" 38.0% | \n",
" 41.18 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage avg_followed\n",
"index \n",
"M 73950 62.0% 52.63\n",
"F 45300 38.0% 41.18"
]
},
"execution_count": 116,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalist_followed_gender_summary(follower_to_journalist_followed_df[follower_to_journalist_followed_df.gender == 'F'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Top 100 journalists followed by female beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
"
\n",
" \n",
" \n",
" \n",
" M | \n",
" 74 | \n",
" 74.0% | \n",
"
\n",
" \n",
" F | \n",
" 26 | \n",
" 26.0% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage\n",
"M 74 74.0%\n",
"F 26 26.0%"
]
},
"execution_count": 117,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"top_journalist_followed_gender_summary(female_follower_to_journalist_followed_df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Beltway journalists followed by male beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 118,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" gender | \n",
" followers_count | \n",
" journalist_follower_count | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 14529929 | \n",
" jaketapper | \n",
" Tapper, Jake | \n",
" CNN | \n",
" M | \n",
" 1305680 | \n",
" 718.00 | \n",
"
\n",
" \n",
" 50325797 | \n",
" chucktodd | \n",
" Todd, Chuck | \n",
" NBC News | \n",
" M | \n",
" 1781247 | \n",
" 689.00 | \n",
"
\n",
" \n",
" 19107878 | \n",
" GlennThrush | \n",
" Thrush, Glenn H. | \n",
" New York Times | \n",
" M | \n",
" 308181 | \n",
" 626.00 | \n",
"
\n",
" \n",
" 13524182 | \n",
" daveweigel | \n",
" Weigel, David | \n",
" Washington Post | \n",
" M | \n",
" 332344 | \n",
" 622.00 | \n",
"
\n",
" \n",
" 61734492 | \n",
" Fahrenthold | \n",
" Fahrenthold, David | \n",
" Washington Post | \n",
" M | \n",
" 451778 | \n",
" 608.00 | \n",
"
\n",
" \n",
" 31127446 | \n",
" markknoller | \n",
" Knoller, Mark | \n",
" CBS News | \n",
" M | \n",
" 301474 | \n",
" 602.00 | \n",
"
\n",
" \n",
" 18678924 | \n",
" jmartNYT | \n",
" Martin, Jonathan | \n",
" New York Times | \n",
" M | \n",
" 197322 | \n",
" 587.00 | \n",
"
\n",
" \n",
" 39155029 | \n",
" mkraju | \n",
" Raju, Manu K. | \n",
" CNN | \n",
" M | \n",
" 88366 | \n",
" 543.00 | \n",
"
\n",
" \n",
" 85131054 | \n",
" jeffzeleny | \n",
" Zeleny, Jeff | \n",
" CNN | \n",
" M | \n",
" 244114 | \n",
" 535.00 | \n",
"
\n",
" \n",
" 16930125 | \n",
" edatpost | \n",
" O’Keefe, Edward | \n",
" Washington Post | \n",
" M | \n",
" 58670 | \n",
" 529.00 | \n",
"
\n",
" \n",
" 59676104 | \n",
" danbalz | \n",
" Balz, Daniel | \n",
" Washington Post | \n",
" M | \n",
" 90819 | \n",
" 510.00 | \n",
"
\n",
" \n",
" 21316253 | \n",
" ZekeJMiller | \n",
" Miller, Zeke J. | \n",
" Time Magazine | \n",
" M | \n",
" 198517 | \n",
" 495.00 | \n",
"
\n",
" \n",
" 12354832 | \n",
" kasie | \n",
" Hunt, Kasie | \n",
" NBC News | \n",
" F | \n",
" 187357 | \n",
" 494.00 | \n",
"
\n",
" \n",
" 130945778 | \n",
" mollyesque | \n",
" Ball, Molly | \n",
" The Atlantic | \n",
" F | \n",
" 116857 | \n",
" 491.00 | \n",
"
\n",
" \n",
" 15463671 | \n",
" samstein | \n",
" Stein, Sam | \n",
" Huffington Post | \n",
" M | \n",
" 313211 | \n",
" 482.00 | \n",
"
\n",
" \n",
" 46176168 | \n",
" MajorCBS | \n",
" Garrett, Major | \n",
" CBS News | \n",
" M | \n",
" 178640 | \n",
" 482.00 | \n",
"
\n",
" \n",
" 123327472 | \n",
" peterbakernyt | \n",
" Baker, Peter | \n",
" New York Times | \n",
" M | \n",
" 96956 | \n",
" 477.00 | \n",
"
\n",
" \n",
" 21252618 | \n",
" JakeSherman | \n",
" Sherman, Jacob S. | \n",
" Politico | \n",
" M | \n",
" 81762 | \n",
" 474.00 | \n",
"
\n",
" \n",
" 16187637 | \n",
" ChadPergram | \n",
" Pergram, Chad | \n",
" Fox News | \n",
" M | \n",
" 59305 | \n",
" 469.00 | \n",
"
\n",
" \n",
" 89820928 | \n",
" mitchellreports | \n",
" Mitchell, Andrea | \n",
" NBC News | \n",
" F | \n",
" 1388543 | \n",
" 468.00 | \n",
"
\n",
" \n",
" 259395895 | \n",
" JohnJHarwood | \n",
" Harwood, John | \n",
" CNBC | \n",
" M | \n",
" 149040 | \n",
" 464.00 | \n",
"
\n",
" \n",
" 22771961 | \n",
" Acosta | \n",
" Acosta, Jim | \n",
" CNN | \n",
" M | \n",
" 350650 | \n",
" 458.00 | \n",
"
\n",
" \n",
" 108617810 | \n",
" DanaBashCNN | \n",
" Bash, Dana | \n",
" CNN | \n",
" F | \n",
" 281861 | \n",
" 454.00 | \n",
"
\n",
" \n",
" 46557945 | \n",
" StevenTDennis | \n",
" Dennis, Steven T. | \n",
" Bloomberg News | \n",
" M | \n",
" 55762 | \n",
" 446.00 | \n",
"
\n",
" \n",
" 15931637 | \n",
" jonkarl | \n",
" Karl, Jonathan | \n",
" ABC News | \n",
" M | \n",
" 183467 | \n",
" 441.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" screen_name name organization gender \\\n",
"user_id \n",
"14529929 jaketapper Tapper, Jake CNN M \n",
"50325797 chucktodd Todd, Chuck NBC News M \n",
"19107878 GlennThrush Thrush, Glenn H. New York Times M \n",
"13524182 daveweigel Weigel, David Washington Post M \n",
"61734492 Fahrenthold Fahrenthold, David Washington Post M \n",
"31127446 markknoller Knoller, Mark CBS News M \n",
"18678924 jmartNYT Martin, Jonathan New York Times M \n",
"39155029 mkraju Raju, Manu K. CNN M \n",
"85131054 jeffzeleny Zeleny, Jeff CNN M \n",
"16930125 edatpost O’Keefe, Edward Washington Post M \n",
"59676104 danbalz Balz, Daniel Washington Post M \n",
"21316253 ZekeJMiller Miller, Zeke J. Time Magazine M \n",
"12354832 kasie Hunt, Kasie NBC News F \n",
"130945778 mollyesque Ball, Molly The Atlantic F \n",
"15463671 samstein Stein, Sam Huffington Post M \n",
"46176168 MajorCBS Garrett, Major CBS News M \n",
"123327472 peterbakernyt Baker, Peter New York Times M \n",
"21252618 JakeSherman Sherman, Jacob S. Politico M \n",
"16187637 ChadPergram Pergram, Chad Fox News M \n",
"89820928 mitchellreports Mitchell, Andrea NBC News F \n",
"259395895 JohnJHarwood Harwood, John CNBC M \n",
"22771961 Acosta Acosta, Jim CNN M \n",
"108617810 DanaBashCNN Bash, Dana CNN F \n",
"46557945 StevenTDennis Dennis, Steven T. Bloomberg News M \n",
"15931637 jonkarl Karl, Jonathan ABC News M \n",
"\n",
" followers_count journalist_follower_count \n",
"user_id \n",
"14529929 1305680 718.00 \n",
"50325797 1781247 689.00 \n",
"19107878 308181 626.00 \n",
"13524182 332344 622.00 \n",
"61734492 451778 608.00 \n",
"31127446 301474 602.00 \n",
"18678924 197322 587.00 \n",
"39155029 88366 543.00 \n",
"85131054 244114 535.00 \n",
"16930125 58670 529.00 \n",
"59676104 90819 510.00 \n",
"21316253 198517 495.00 \n",
"12354832 187357 494.00 \n",
"130945778 116857 491.00 \n",
"15463671 313211 482.00 \n",
"46176168 178640 482.00 \n",
"123327472 96956 477.00 \n",
"21252618 81762 474.00 \n",
"16187637 59305 469.00 \n",
"89820928 1388543 468.00 \n",
"259395895 149040 464.00 \n",
"22771961 350650 458.00 \n",
"108617810 281861 454.00 \n",
"46557945 55762 446.00 \n",
"15931637 183467 441.00 "
]
},
"execution_count": 118,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"male_follower_to_journalist_followed_df = journalist_following_summary(follower_to_journalist_followed_df[follower_to_journalist_followed_df.gender == 'M'])\n",
"male_follower_to_journalist_followed_df.to_csv('output/journalists_followed_by_male_journalists.csv')\n",
"male_follower_to_journalist_followed_df[journalist_following_summary_fields].head(25)"
]
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
" avg_followed | \n",
"
\n",
" \n",
" index | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" M | \n",
" 100333 | \n",
" 62.3% | \n",
" 71.41 | \n",
"
\n",
" \n",
" F | \n",
" 60757 | \n",
" 37.7% | \n",
" 55.23 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage avg_followed\n",
"index \n",
"M 100333 62.3% 71.41\n",
"F 60757 37.7% 55.23"
]
},
"execution_count": 119,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journalist_followed_gender_summary(follower_to_journalist_followed_df[follower_to_journalist_followed_df.gender == 'M'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Top 100 journalists followed by male beltway journalists"
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" percentage | \n",
"
\n",
" \n",
" \n",
" \n",
" M | \n",
" 77 | \n",
" 77.0% | \n",
"
\n",
" \n",
" F | \n",
" 23 | \n",
" 23.0% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count percentage\n",
"M 77 77.0%\n",
"F 23 23.0%"
]
},
"execution_count": 120,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"top_journalist_followed_gender_summary(male_follower_to_journalist_followed_df)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"## Merge all"
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"screen_name 2292\n",
"name 2292\n",
"organization 2292\n",
"position 2292\n",
"gender 2292\n",
"followers_count 2292\n",
"following_count 2292\n",
"tweet_count 2292\n",
"user_created_at 2292\n",
"verified 2292\n",
"protected 2292\n",
"original 2292\n",
"quote 2292\n",
"reply 2292\n",
"retweet 2292\n",
"tweets_in_dataset 2292\n",
"mention_count 2292\n",
"mentioning_count 2292\n",
"mention_count_by_female 2292\n",
"mentioning_count_by_female 2292\n",
"mention_count_by_male 2292\n",
"mentioning_count_by_male 2292\n",
"retweet_count 2292\n",
"retweeting_count 2292\n",
"retweet_count_by_female 2292\n",
"retweeting_count_by_female 2292\n",
"retweet_count_by_male 2292\n",
"retweeting_count_by_male 2292\n",
"reply_to_count 2292\n",
"replying_count 2292\n",
"reply_to_count_by_female 2292\n",
"replying_count_by_female 2292\n",
"reply_to_count_by_male 2292\n",
"replying_count_by_male 2292\n",
"journalist_follower_count 2292\n",
"journalist_follower_count_by_female 2292\n",
"journalist_follower_count_by_male 2292\n",
"dtype: int64"
]
},
"execution_count": 121,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Mention by all\n",
"user_merge_df = user_summary_df.join(journalists_mention_summary_df[['mention_count', 'mentioning_count']])\n",
"# Mention by female\n",
"user_merge_df = user_merge_df.join(journalists_mentioned_by_female_summary_df[['mention_count', 'mentioning_count']], rsuffix='_by_female')\n",
"# Mention by male\n",
"user_merge_df = user_merge_df.join(journalists_mentioned_by_male_summary_df[['mention_count', 'mentioning_count']], rsuffix='_by_male')\n",
"# Retweet by all\n",
"user_merge_df = user_merge_df.join(journalists_retweet_summary_df[['retweet_count', 'retweeting_count']])\n",
"# Retweet by female\n",
"user_merge_df = user_merge_df.join(journalists_retweeted_by_female_summary_df[['retweet_count', 'retweeting_count']], rsuffix='_by_female')\n",
"# Retweet by male\n",
"user_merge_df = user_merge_df.join(journalists_retweeted_by_male_summary_df[['retweet_count', 'retweeting_count']], rsuffix='_by_male')\n",
"# Reply by all\n",
"user_merge_df = user_merge_df.join(journalists_reply_summary_df[['reply_to_count', 'replying_count']])\n",
"# Reply by female\n",
"user_merge_df = user_merge_df.join(journalists_replied_to_by_female_summary_df[['reply_to_count', 'replying_count']], rsuffix='_by_female')\n",
"# Reply by male\n",
"user_merge_df = user_merge_df.join(journalists_replied_to_by_male_summary_df[['reply_to_count', 'replying_count']], rsuffix='_by_male')\n",
"# Follows all\n",
"user_merge_df = user_merge_df.join(follower_to_journalist_followed_summary_df[['journalist_follower_count']])\n",
"# Follows female\n",
"user_merge_df = user_merge_df.join(female_follower_to_journalist_followed_df[['journalist_follower_count']], rsuffix='_by_female')\n",
"# Follows male\n",
"user_merge_df = user_merge_df.join(male_follower_to_journalist_followed_df[['journalist_follower_count']], rsuffix='_by_male')\n",
"user_merge_df.fillna(0, inplace=True)\n",
"# Write to CSV\n",
"user_merge_df.to_csv('output/journalists_summary.csv')\n",
"user_merge_df.count()"
]
},
{
"cell_type": "code",
"execution_count": 122,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" screen_name | \n",
" name | \n",
" organization | \n",
" position | \n",
" gender | \n",
" followers_count | \n",
" following_count | \n",
" tweet_count | \n",
" user_created_at | \n",
" verified | \n",
" ... | \n",
" retweeting_count_by_male | \n",
" reply_to_count | \n",
" replying_count | \n",
" reply_to_count_by_female | \n",
" replying_count_by_female | \n",
" reply_to_count_by_male | \n",
" replying_count_by_male | \n",
" journalist_follower_count | \n",
" journalist_follower_count_by_female | \n",
" journalist_follower_count_by_male | \n",
"
\n",
" \n",
" user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 23455653 | \n",
" abettel | \n",
" Bettelheim, Adriel | \n",
" Politico | \n",
" Health Care Editor | \n",
" F | \n",
" 2664 | \n",
" 1055 | \n",
" 15990 | \n",
" Mon Mar 09 16:32:20 +0000 2009 | \n",
" True | \n",
" ... | \n",
" 16.00 | \n",
" 3.00 | \n",
" 3.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 3.00 | \n",
" 3.00 | \n",
" 179.00 | \n",
" 80.00 | \n",
" 99.00 | \n",
"
\n",
" \n",
" 33919343 | \n",
" AshleyRParker | \n",
" Parker, Ashley | \n",
" Washington Post | \n",
" White House Reporter | \n",
" F | \n",
" 122382 | \n",
" 2342 | \n",
" 12433 | \n",
" Tue Apr 21 14:28:57 +0000 2009 | \n",
" True | \n",
" ... | \n",
" 172.00 | \n",
" 26.00 | \n",
" 18.00 | \n",
" 4.00 | \n",
" 4.00 | \n",
" 22.00 | \n",
" 14.00 | \n",
" 777.00 | \n",
" 339.00 | \n",
" 438.00 | \n",
"
\n",
" \n",
" 18580432 | \n",
" b_fung | \n",
" Fung, Brian | \n",
" Washington Post | \n",
" Tech Reporter | \n",
" M | \n",
" 16558 | \n",
" 2062 | \n",
" 44799 | \n",
" Sat Jan 03 15:15:57 +0000 2009 | \n",
" True | \n",
" ... | \n",
" 22.00 | \n",
" 93.00 | \n",
" 17.00 | \n",
" 10.00 | \n",
" 6.00 | \n",
" 83.00 | \n",
" 11.00 | \n",
" 221.00 | \n",
" 94.00 | \n",
" 127.00 | \n",
"
\n",
" \n",
" 399225358 | \n",
" b_muzz | \n",
" Murray, Brendan | \n",
" Bloomberg News | \n",
" Managing Editor, U.S. Economy | \n",
" M | \n",
" 624 | \n",
" 382 | \n",
" 360 | \n",
" Thu Oct 27 05:34:05 +0000 2011 | \n",
" True | \n",
" ... | \n",
" 2.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 45.00 | \n",
" 13.00 | \n",
" 32.00 | \n",
"
\n",
" \n",
" 18834692 | \n",
" becca_milfeld | \n",
" Milfeld, Becca | \n",
" Agence France-Presse | \n",
" English Desk Editor and Journalist | \n",
" F | \n",
" 483 | \n",
" 993 | \n",
" 1484 | \n",
" Sat Jan 10 13:58:43 +0000 2009 | \n",
" False | \n",
" ... | \n",
" 0.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 18.00 | \n",
" 9.00 | \n",
" 9.00 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 37 columns
\n",
"
"
],
"text/plain": [
" screen_name name organization \\\n",
"user_id \n",
"23455653 abettel Bettelheim, Adriel Politico \n",
"33919343 AshleyRParker Parker, Ashley Washington Post \n",
"18580432 b_fung Fung, Brian Washington Post \n",
"399225358 b_muzz Murray, Brendan Bloomberg News \n",
"18834692 becca_milfeld Milfeld, Becca Agence France-Presse \n",
"\n",
" position gender followers_count \\\n",
"user_id \n",
"23455653 Health Care Editor F 2664 \n",
"33919343 White House Reporter F 122382 \n",
"18580432 Tech Reporter M 16558 \n",
"399225358 Managing Editor, U.S. Economy M 624 \n",
"18834692 English Desk Editor and Journalist F 483 \n",
"\n",
" following_count tweet_count user_created_at \\\n",
"user_id \n",
"23455653 1055 15990 Mon Mar 09 16:32:20 +0000 2009 \n",
"33919343 2342 12433 Tue Apr 21 14:28:57 +0000 2009 \n",
"18580432 2062 44799 Sat Jan 03 15:15:57 +0000 2009 \n",
"399225358 382 360 Thu Oct 27 05:34:05 +0000 2011 \n",
"18834692 993 1484 Sat Jan 10 13:58:43 +0000 2009 \n",
"\n",
" verified ... \\\n",
"user_id ... \n",
"23455653 True ... \n",
"33919343 True ... \n",
"18580432 True ... \n",
"399225358 True ... \n",
"18834692 False ... \n",
"\n",
" retweeting_count_by_male reply_to_count replying_count \\\n",
"user_id \n",
"23455653 16.00 3.00 3.00 \n",
"33919343 172.00 26.00 18.00 \n",
"18580432 22.00 93.00 17.00 \n",
"399225358 2.00 0.00 0.00 \n",
"18834692 0.00 0.00 0.00 \n",
"\n",
" reply_to_count_by_female replying_count_by_female \\\n",
"user_id \n",
"23455653 0.00 0.00 \n",
"33919343 4.00 4.00 \n",
"18580432 10.00 6.00 \n",
"399225358 0.00 0.00 \n",
"18834692 0.00 0.00 \n",
"\n",
" reply_to_count_by_male replying_count_by_male \\\n",
"user_id \n",
"23455653 3.00 3.00 \n",
"33919343 22.00 14.00 \n",
"18580432 83.00 11.00 \n",
"399225358 0.00 0.00 \n",
"18834692 0.00 0.00 \n",
"\n",
" journalist_follower_count journalist_follower_count_by_female \\\n",
"user_id \n",
"23455653 179.00 80.00 \n",
"33919343 777.00 339.00 \n",
"18580432 221.00 94.00 \n",
"399225358 45.00 13.00 \n",
"18834692 18.00 9.00 \n",
"\n",
" journalist_follower_count_by_male \n",
"user_id \n",
"23455653 99.00 \n",
"33919343 438.00 \n",
"18580432 127.00 \n",
"399225358 32.00 \n",
"18834692 9.00 \n",
"\n",
"[5 rows x 37 columns]"
]
},
"execution_count": 122,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_merge_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Describe a row"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"screen_name burgessev\n",
"name Everett, John B.\n",
"organization Politico\n",
"position Congressional Reporter\n",
"gender M\n",
"followers_count 31010\n",
"following_count 1782\n",
"tweet_count 27294\n",
"user_created_at Mon Nov 07 14:22:19 +0000 2011\n",
"verified True\n",
"protected False\n",
"original 836.00\n",
"quote 344.00\n",
"reply 275.00\n",
"retweet 218.00\n",
"tweets_in_dataset 1,673.00\n",
"mention_count 212.00\n",
"mentioning_count 46.00\n",
"mention_count_by_female 164.00\n",
"mentioning_count_by_female 20.00\n",
"mention_count_by_male 48.00\n",
"mentioning_count_by_male 26.00\n",
"retweet_count 1,836.00\n",
"retweeting_count 289.00\n",
"retweet_count_by_female 748.00\n",
"retweeting_count_by_female 122.00\n",
"retweet_count_by_male 1,088.00\n",
"retweeting_count_by_male 167.00\n",
"reply_to_count 238.00\n",
"replying_count 79.00\n",
"reply_to_count_by_female 78.00\n",
"replying_count_by_female 30.00\n",
"reply_to_count_by_male 160.00\n",
"replying_count_by_male 49.00\n",
"journalist_follower_count 570.00\n",
"journalist_follower_count_by_female 265.00\n",
"journalist_follower_count_by_male 305.00\n",
"Name: 407013776, dtype: object"
]
},
"execution_count": 123,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_merge_df.loc['407013776']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"* followers_count: The number of followers (including non-journalists)\n",
"* following_count: The number of friends, i.e., accounts that @burgessev is following including non-journalists)\n",
"* tweet_count: The number of lifetime tweets posted by @burgessev.\n",
"* original: The number of original tweets posted by @burgessev in this dataset.\n",
"* quote: The number of quote tweets posted by @burgessev in this dataset.\n",
"* reply: The number of reply tweets posted by @burgessev in this dataset.\n",
"* retweet: The number of retweets posted by @burgessev in this dataset.\n",
"* tweets_in_dataset: The total number of tweets posted by @burgessev in this dataset. Equal to original + quote + reply + retweet\n",
"* mention_count: The number of mentions of @burgessev by journalists. Equal to mention_count_by_female + mention_count_by_male.\n",
"* mentioning_count: The number of journalists that mentioned @burgessev. Equal to mentioning_count_by_female + mentioning_count_by_male.\n",
"* mention_count_by_female / mention_count_by_male: The number of mentions of @burgessev by female / male journalists.\n",
"* mentioning_count_by_female / mentioning_count_by_male: The number of female / male journalists that mentioned @burgessev.\n",
"* retweet_count: The number of retweets of @burgessev by journalists. Equal to retweet_count_by_female + retweet_count_by_male.\n",
"* retweeting_count: The number of journalists that retweeted @burgessev. Equal to retweeting_count_by_female + retweeting_count_by_male.\n",
"* retweet_count_by_female / retweet_count_by_male: The number of retweets of @burgessev by female / male journalists.\n",
"* retweeting_count_by_female / retweeting_count_by_male: The number of female / male journalists that retweeted @burgessev.\n",
"* reply_count: The number of replies to @burgessev by journalists. Equal to reply_count_by_female + reply_count_by_male.\n",
"* replying_count: The number of journalists that replied to @burgessev. Equal to replying_count_by_female + replying_count_by_male.\n",
"* reply_count_by_female / reply_count_by_male: The number of replies to @burgessev by female / male journalists.\n",
"* replying_count_by_female / replying_count_by_male: The number of female / male journalists that replied to @burgessev.\n",
"* journalist_follower_count: The number of journalists following @burgessev. Equal to journalist_follower_count_by_female + journalist_follower_count_by_male.\n",
"* journalist_follower_count_by_female / journalist_follower_count_by_female: The number of female / male journalists following @burgessev."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
},
"toc": {
"colors": {
"hover_highlight": "#DAA520",
"navigate_num": "#000000",
"navigate_text": "#333333",
"running_highlight": "#FF0000",
"selected_highlight": "#FFD700",
"sidebar_border": "#EEEEEE",
"wrapper_background": "#FFFFFF"
},
"moveMenuLeft": true,
"nav_menu": {
"height": "512px",
"width": "252px"
},
"navigate_menu": true,
"number_sections": true,
"sideBar": true,
"threshold": 4,
"toc_cell": true,
"toc_section_display": "block",
"toc_window_display": false,
"widenNotebook": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}