{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Retweets and quotes\n", "Hereafter referring to retweets and quotes as retweets." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Data prep" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load the data and count." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "scrolled": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:root:Loading from tweets/6eea2088e010437da4b6031c2abffdc9_001.json.gz\n", "DEBUG:root:Loaded 50000\n", "DEBUG:root:Loaded 100000\n", "DEBUG:root:Loaded 150000\n", "DEBUG:root:Loaded 200000\n", "DEBUG:root:Loaded 250000\n", "DEBUG:root:Loaded 300000\n", "INFO:root:Loading from tweets/a7bcdbde7a104285b92fe26e286f2543_001.json.gz\n", "DEBUG:root:Loaded 350000\n", "DEBUG:root:Loaded 400000\n", "DEBUG:root:Loaded 450000\n", "DEBUG:root:Loaded 500000\n", "DEBUG:root:Loaded 550000\n", "DEBUG:root:Loaded 600000\n", "INFO:root:Loading from tweets/e1c824ff2b3c4c5a9a93a16e5036d09a_001.json.gz\n", "DEBUG:root:Loaded 650000\n", "DEBUG:root:Loaded 700000\n", "DEBUG:root:Loaded 750000\n" ] } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "import logging\n", "from dateutil.parser import parse as date_parse\n", "from utils import load_tweet_df, tweet_type\n", "\n", "logger = logging.getLogger()\n", "logger.setLevel(logging.DEBUG)\n", "\n", "# Simply the tweet on load\n", "def retweet_transform(tweet):\n", " retweet = tweet.get('retweeted_status') or tweet.get('quoted_status')\n", " if retweet:\n", " return {\n", " 'tweet_id': tweet['id_str'],\n", " 'user_id': tweet['user']['id_str'],\n", " 'screen_name': tweet['user']['screen_name'],\n", " 'retweet_user_id': retweet['user']['id_str'],\n", " 'retweet_screen_name': retweet['user']['screen_name'],\n", " 'tweet_created_at': date_parse(tweet['created_at']) \n", " }\n", " return None\n", "\n", "retweet_df = load_tweet_df(retweet_transform, ['tweet_id', 'user_id', 'screen_name', 'retweet_user_id',\n", " 'retweet_screen_name', 'tweet_created_at'])\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Number of retweets found in the dataset" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "retweet_user_id 398988\n", "dtype: int64" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retweet_df[['retweet_user_id']].count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### The retweet data\n", "Each retweet consists of the tweet id, the screen name and user id that is retweeting,\n", "and the screen_name and user_id that is retweeted." ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tweet_iduser_idscreen_nameretweet_user_idretweet_screen_nametweet_created_at
0847787664963239936285772181akesslerdc85131054jeffzeleny2017-03-31 12:28:25+00:00
1847634105118318594285772181akesslerdc128558424erin_pelton2017-03-31 02:18:13+00:00
2847617579627630592285772181akesslerdc318502583ksacknyt2017-03-31 01:12:33+00:00
3847601029654880258285772181akesslerdc58504135shaneharris2017-03-31 00:06:47+00:00
4847388672785694720285772181akesslerdc22772264carolelee2017-03-30 10:02:57+00:00
\n", "
" ], "text/plain": [ " tweet_id user_id screen_name retweet_user_id \\\n", "0 847787664963239936 285772181 akesslerdc 85131054 \n", "1 847634105118318594 285772181 akesslerdc 128558424 \n", "2 847617579627630592 285772181 akesslerdc 318502583 \n", "3 847601029654880258 285772181 akesslerdc 58504135 \n", "4 847388672785694720 285772181 akesslerdc 22772264 \n", "\n", " retweet_screen_name tweet_created_at \n", "0 jeffzeleny 2017-03-31 12:28:25+00:00 \n", "1 erin_pelton 2017-03-31 02:18:13+00:00 \n", "2 ksacknyt 2017-03-31 01:12:33+00:00 \n", "3 shaneharris 2017-03-31 00:06:47+00:00 \n", "4 carolelee 2017-03-30 10:02:57+00:00 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retweet_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create lookup of retweeted user ids to screen names" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "retweet_screen_name 45104\n", "dtype: int64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# From the retweets, extract map of user ids to screen names\n", "retweet_user_id_lookup_df = retweet_df.loc[retweet_df.groupby('retweet_user_id')['tweet_created_at'].idxmax()].ix[:,['retweet_user_id', 'retweet_screen_name']].set_index(['retweet_user_id'])\n", "retweet_user_id_lookup_df.count()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
retweet_screen_name
retweet_user_id
100002112whyyradiotimes
100005598hotelkeys
1000228238adwooldridge
100026898tvnewzted
1000318321AJGuglielmi
\n", "
" ], "text/plain": [ " retweet_screen_name\n", "retweet_user_id \n", "100002112 whyyradiotimes\n", "100005598 hotelkeys\n", "1000228238 adwooldridge\n", "100026898 tvnewzted\n", "1000318321 AJGuglielmi" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retweet_user_id_lookup_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create lookup of user ids to screen names" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "screen_name 1836\n", "dtype: int64" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# From the users (not the retweets), extract map of user ids to screen names\n", "user_id_lookup_df = retweet_df.loc[retweet_df.groupby('user_id')['tweet_created_at'].idxmax()].ix[:,['user_id', 'screen_name']].set_index(['user_id'])\n", "user_id_lookup_df.count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Group reteeet by retweeted user id" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "retweet_count 45104\n", "dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Group by user_id\n", "# This count should match the user_id map count\n", "retweet_summary_user_id_df = pd.DataFrame(retweet_df.groupby('retweet_user_id').size(), columns=['retweet_count'])\n", "retweet_summary_user_id_df.count()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
retweet_count
retweet_user_id
1000021121
1000055981
10002282382
1000268981
10003183212
\n", "
" ], "text/plain": [ " retweet_count\n", "retweet_user_id \n", "100002112 1\n", "100005598 1\n", "1000228238 2\n", "100026898 1\n", "1000318321 2" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retweet_summary_user_id_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Add back in the retweet screen names" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "retweet_count 45104\n", "retweet_screen_name 45104\n", "dtype: int64" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Join with user id map\n", "retweet_summary_screen_name_df = retweet_summary_user_id_df.join(retweet_user_id_lookup_df)\n", "retweet_summary_screen_name_df.count()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
retweet_countretweet_screen_name
retweet_user_id
1000021121whyyradiotimes
1000055981hotelkeys
10002282382adwooldridge
1000268981tvnewzted
10003183212AJGuglielmi
\n", "
" ], "text/plain": [ " retweet_count retweet_screen_name\n", "retweet_user_id \n", "100002112 1 whyyradiotimes\n", "100005598 1 hotelkeys\n", "1000228238 2 adwooldridge\n", "100026898 1 tvnewzted\n", "1000318321 2 AJGuglielmi" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retweet_summary_screen_name_df.head()" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "### Add users types for retweets" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "type 13160\n", "dtype: int64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Load lookups of known users\n", "from utils import load_user_type_lookup_df\n", "\n", "user_type_lookup_df = load_user_type_lookup_df()[['type']]\n", "user_type_lookup_df.count()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
type
user_id
2345626885journalists
780221130journalists
285772181journalists
29607664journalists
9484732journalists
\n", "
" ], "text/plain": [ " type\n", "user_id \n", "2345626885 journalists\n", "780221130 journalists\n", "285772181 journalists\n", "29607664 journalists\n", "9484732 journalists" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user_type_lookup_df.head()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "media 4538\n", "journalists 3576\n", "government 3055\n", "politicians 817\n", "ngo 250\n", "pundit 195\n", "other 160\n", "other_political 156\n", "cultural 131\n", "academic 129\n", "business 125\n", "foreign_political 28\n", "Name: type, dtype: int64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user_type_lookup_df['type'].value_counts()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "retweet_count 45104\n", "retweet_screen_name 45104\n", "type 45104\n", "dtype: int64" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Join the retweets and the known users\n", "retweet_summary_type_df = retweet_summary_screen_name_df.join(user_type_lookup_df, how='left')\n", "retweet_summary_type_df['type'].fillna('unknown', inplace=True)\n", "retweet_summary_type_df.index.name = 'user_id'\n", "retweet_summary_type_df.count()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
retweet_countretweet_screen_nametype
user_id
1000021121whyyradiotimesunknown
1000055981hotelkeysunknown
10002282382adwooldridgeunknown
1000268981tvnewztedunknown
10003183212AJGuglielmiunknown
\n", "
" ], "text/plain": [ " retweet_count retweet_screen_name type\n", "user_id \n", "100002112 1 whyyradiotimes unknown\n", "100005598 1 hotelkeys unknown\n", "1000228238 2 adwooldridge unknown\n", "100026898 1 tvnewzted unknown\n", "1000318321 2 AJGuglielmi unknown" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retweet_summary_type_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Add number of users retweeting\n", "Which is different than the number of retweets." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
retweet_countretweet_screen_nametypeusers_retweeting_countpercent_of_users_retweeting
user_id
1000021121whyyradiotimesunknown10.000545
1000055981hotelkeysunknown10.000545
10002282382adwooldridgeunknown20.001089
1000268981tvnewztedunknown10.000545
10003183212AJGuglielmiunknown20.001089
\n", "
" ], "text/plain": [ " retweet_count retweet_screen_name type \\\n", "user_id \n", "100002112 1 whyyradiotimes unknown \n", "100005598 1 hotelkeys unknown \n", "1000228238 2 adwooldridge unknown \n", "100026898 1 tvnewzted unknown \n", "1000318321 2 AJGuglielmi unknown \n", "\n", " users_retweeting_count percent_of_users_retweeting \n", "user_id \n", "100002112 1 0.000545 \n", "100005598 1 0.000545 \n", "1000228238 2 0.001089 \n", "100026898 1 0.000545 \n", "1000318321 2 0.001089 " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retweet_user_id_per_user_df = retweet_df[['retweet_user_id', 'user_id']].drop_duplicates()\n", "retweet_user_id_per_user_summary_df = pd.DataFrame(retweet_user_id_per_user_df.groupby('retweet_user_id').size(), columns=['users_retweeting_count'])\n", "retweet_user_id_per_user_summary_df.index.name = 'user_id'\n", "retweet_user_id_per_user_summary_df.head()\n", "# Join with retweet_summary_type_df\n", "retweet_summary_df = retweet_summary_type_df.join(retweet_user_id_per_user_summary_df)\n", "retweet_summary_df['percent_of_users_retweeting'] = retweet_summary_df.users_retweeting_count / user_id_lookup_df['screen_name'].count()\n", "retweet_summary_df.head()\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Retweet summary" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Retweets per user\n", "For users that made any retweets. Also to possible to figure this out for all users." ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "count 1836.000000\n", "mean 217.313725\n", "std 456.459939\n", "min 1.000000\n", "25% 17.000000\n", "50% 70.000000\n", "75% 219.000000\n", "max 6483.000000\n", "Name: user_id, dtype: float64" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retweet_df['user_id'].value_counts().describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### How long is the tail?" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
retweet_countcumulative_retweet_count_sumcumulative_retweet_count_sum_percentagecumulative_retweeted_userscumulative_retweeted_users_percentage
sumsize
users_retweeting_count
13874928998387490.097118289980.642914
2158605534546090.136869345320.765608
3117162592663250.166233371240.823076
491121536754370.189071386600.857130
585301070839670.210450397300.880853
66824703907910.227553404330.896439
76330541971210.243418409740.908434
859624361030830.258361414100.918100
946083161076910.269910417260.925106
1045352671122260.281277419930.931026
1143412491165670.292157422420.936547
1242492081208160.302806424500.941158
1345681941253840.314255426440.945459
1441541731295380.324666428170.949295
1534811351330190.333391429520.952288
1628061041358250.340424430560.954594
173368971391930.348865431530.956744
183010921422030.356409432450.958784
192537761447400.362768433210.960469
203361841481010.371192434050.962332
212955851510560.378598434900.964216
222963791540190.386024435690.965968
232508581565270.392310436270.967253
242467561589940.398493436830.968495
252514541615080.404794437370.969692
262566551640740.411225437920.970912
272349411664230.417113438330.971821
282665471690880.423792438800.972863
292575431716630.430246439230.973816
302473441741360.436444439670.974792
.....................
25558213499240.877029450680.999202
25785613507800.879174450690.999224
259123713520170.882275450700.999246
26188513529020.884493450710.999268
263104513539470.887112450720.999291
266217723561240.892568450740.999335
268205923581830.897729450760.999379
26998713591700.900203450770.999401
27286113600310.902360450780.999424
27967913607100.904062450790.999446
287198423626940.909035450810.999490
293125413639480.912178450820.999512
294228623662340.917907450840.999557
29896913672030.920336450850.999579
300111913683220.923141450860.999601
303126013695820.926299450870.999623
308205723716390.931454450890.999667
312231123739500.937246450910.999712
33896013749100.939652450920.999734
360135213762620.943041450930.999756
363117313774350.945981450940.999778
366187613793110.950683450950.999800
398300523823160.958214450970.999845
403239813847140.964224450980.999867
411167913863930.968433450990.999889
437163213880250.972523451000.999911
483208513901100.977749451010.999933
489250813926180.984035451020.999956
492271513953330.990839451030.999978
603365513989881.000000451041.000000
\n", "

229 rows × 6 columns

\n", "
" ], "text/plain": [ " retweet_count cumulative_retweet_count_sum \\\n", " sum size \n", "users_retweeting_count \n", "1 38749 28998 38749 \n", "2 15860 5534 54609 \n", "3 11716 2592 66325 \n", "4 9112 1536 75437 \n", "5 8530 1070 83967 \n", "6 6824 703 90791 \n", "7 6330 541 97121 \n", "8 5962 436 103083 \n", "9 4608 316 107691 \n", "10 4535 267 112226 \n", "11 4341 249 116567 \n", "12 4249 208 120816 \n", "13 4568 194 125384 \n", "14 4154 173 129538 \n", "15 3481 135 133019 \n", "16 2806 104 135825 \n", "17 3368 97 139193 \n", "18 3010 92 142203 \n", "19 2537 76 144740 \n", "20 3361 84 148101 \n", "21 2955 85 151056 \n", "22 2963 79 154019 \n", "23 2508 58 156527 \n", "24 2467 56 158994 \n", "25 2514 54 161508 \n", "26 2566 55 164074 \n", "27 2349 41 166423 \n", "28 2665 47 169088 \n", "29 2575 43 171663 \n", "30 2473 44 174136 \n", "... ... ... ... \n", "255 582 1 349924 \n", "257 856 1 350780 \n", "259 1237 1 352017 \n", "261 885 1 352902 \n", "263 1045 1 353947 \n", "266 2177 2 356124 \n", "268 2059 2 358183 \n", "269 987 1 359170 \n", "272 861 1 360031 \n", "279 679 1 360710 \n", "287 1984 2 362694 \n", "293 1254 1 363948 \n", "294 2286 2 366234 \n", "298 969 1 367203 \n", "300 1119 1 368322 \n", "303 1260 1 369582 \n", "308 2057 2 371639 \n", "312 2311 2 373950 \n", "338 960 1 374910 \n", "360 1352 1 376262 \n", "363 1173 1 377435 \n", "366 1876 1 379311 \n", "398 3005 2 382316 \n", "403 2398 1 384714 \n", "411 1679 1 386393 \n", "437 1632 1 388025 \n", "483 2085 1 390110 \n", "489 2508 1 392618 \n", "492 2715 1 395333 \n", "603 3655 1 398988 \n", "\n", " cumulative_retweet_count_sum_percentage \\\n", " \n", "users_retweeting_count \n", "1 0.097118 \n", "2 0.136869 \n", "3 0.166233 \n", "4 0.189071 \n", "5 0.210450 \n", "6 0.227553 \n", "7 0.243418 \n", "8 0.258361 \n", "9 0.269910 \n", "10 0.281277 \n", "11 0.292157 \n", "12 0.302806 \n", "13 0.314255 \n", "14 0.324666 \n", "15 0.333391 \n", "16 0.340424 \n", "17 0.348865 \n", "18 0.356409 \n", "19 0.362768 \n", "20 0.371192 \n", "21 0.378598 \n", "22 0.386024 \n", "23 0.392310 \n", "24 0.398493 \n", "25 0.404794 \n", "26 0.411225 \n", "27 0.417113 \n", "28 0.423792 \n", "29 0.430246 \n", "30 0.436444 \n", "... ... \n", "255 0.877029 \n", "257 0.879174 \n", "259 0.882275 \n", "261 0.884493 \n", "263 0.887112 \n", "266 0.892568 \n", "268 0.897729 \n", "269 0.900203 \n", "272 0.902360 \n", "279 0.904062 \n", "287 0.909035 \n", "293 0.912178 \n", "294 0.917907 \n", "298 0.920336 \n", "300 0.923141 \n", "303 0.926299 \n", "308 0.931454 \n", "312 0.937246 \n", "338 0.939652 \n", "360 0.943041 \n", "363 0.945981 \n", "366 0.950683 \n", "398 0.958214 \n", "403 0.964224 \n", "411 0.968433 \n", "437 0.972523 \n", "483 0.977749 \n", "489 0.984035 \n", "492 0.990839 \n", "603 1.000000 \n", "\n", " cumulative_retweeted_users \\\n", " \n", "users_retweeting_count \n", "1 28998 \n", "2 34532 \n", "3 37124 \n", "4 38660 \n", "5 39730 \n", "6 40433 \n", "7 40974 \n", "8 41410 \n", "9 41726 \n", "10 41993 \n", "11 42242 \n", "12 42450 \n", "13 42644 \n", "14 42817 \n", "15 42952 \n", "16 43056 \n", "17 43153 \n", "18 43245 \n", "19 43321 \n", "20 43405 \n", "21 43490 \n", "22 43569 \n", "23 43627 \n", "24 43683 \n", "25 43737 \n", "26 43792 \n", "27 43833 \n", "28 43880 \n", "29 43923 \n", "30 43967 \n", "... ... \n", "255 45068 \n", "257 45069 \n", "259 45070 \n", "261 45071 \n", "263 45072 \n", "266 45074 \n", "268 45076 \n", "269 45077 \n", "272 45078 \n", "279 45079 \n", "287 45081 \n", "293 45082 \n", "294 45084 \n", "298 45085 \n", "300 45086 \n", "303 45087 \n", "308 45089 \n", "312 45091 \n", "338 45092 \n", "360 45093 \n", "363 45094 \n", "366 45095 \n", "398 45097 \n", "403 45098 \n", "411 45099 \n", "437 45100 \n", "483 45101 \n", "489 45102 \n", "492 45103 \n", "603 45104 \n", "\n", " cumulative_retweeted_users_percentage \n", " \n", "users_retweeting_count \n", "1 0.642914 \n", "2 0.765608 \n", "3 0.823076 \n", "4 0.857130 \n", "5 0.880853 \n", "6 0.896439 \n", "7 0.908434 \n", "8 0.918100 \n", "9 0.925106 \n", "10 0.931026 \n", "11 0.936547 \n", "12 0.941158 \n", "13 0.945459 \n", "14 0.949295 \n", "15 0.952288 \n", "16 0.954594 \n", "17 0.956744 \n", "18 0.958784 \n", "19 0.960469 \n", "20 0.962332 \n", "21 0.964216 \n", "22 0.965968 \n", "23 0.967253 \n", "24 0.968495 \n", "25 0.969692 \n", "26 0.970912 \n", "27 0.971821 \n", "28 0.972863 \n", "29 0.973816 \n", "30 0.974792 \n", "... ... \n", "255 0.999202 \n", "257 0.999224 \n", "259 0.999246 \n", "261 0.999268 \n", "263 0.999291 \n", "266 0.999335 \n", "268 0.999379 \n", "269 0.999401 \n", "272 0.999424 \n", "279 0.999446 \n", "287 0.999490 \n", "293 0.999512 \n", "294 0.999557 \n", "298 0.999579 \n", "300 0.999601 \n", "303 0.999623 \n", "308 0.999667 \n", "312 0.999712 \n", "338 0.999734 \n", "360 0.999756 \n", "363 0.999778 \n", "366 0.999800 \n", "398 0.999845 \n", "403 0.999867 \n", "411 0.999889 \n", "437 0.999911 \n", "483 0.999933 \n", "489 0.999956 \n", "492 0.999978 \n", "603 1.000000 \n", "\n", "[229 rows x 6 columns]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retweet_grouped_by_users_retweeting_df = retweet_summary_df[['retweet_count', 'users_retweeting_count']].groupby(by='users_retweeting_count').agg([np.sum, np.size])\n", "retweet_grouped_by_users_retweeting_df['cumulative_retweet_count_sum'] = retweet_grouped_by_users_retweeting_df['retweet_count', 'sum'].cumsum()\n", "retweet_grouped_by_users_retweeting_df['cumulative_retweet_count_sum_percentage'] = retweet_grouped_by_users_retweeting_df['cumulative_retweet_count_sum'] / retweet_grouped_by_users_retweeting_df['retweet_count', 'sum'].sum()\n", "retweet_grouped_by_users_retweeting_df['cumulative_retweeted_users'] = retweet_grouped_by_users_retweeting_df['retweet_count', 'size'].cumsum()\n", "retweet_grouped_by_users_retweeting_df['cumulative_retweeted_users_percentage'] = retweet_grouped_by_users_retweeting_df['cumulative_retweeted_users'] / retweet_grouped_by_users_retweeting_df['retweet_count', 'size'].sum()\n", "retweet_grouped_by_users_retweeting_df" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXsAAAELCAYAAAA4HCbKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xd4FVX6wPHvSS8kAUJApIVAAAMkAUIgAtKWJohtLYiC\niouu3R+i2LGtura1l12xgYJiQ2QXBFGKtAQiJfTQAgiBFNJzy/n9cSeXACkXSDI3N+/nee5zZ87M\nnXnP3Mubw5mZM0prjRBCCM/mZXYAQgghap8keyGEaAAk2QshRAMgyV4IIRoASfZCCNEASLIXQogG\nQJK9EEI0AJLshRCiAZBkL4QQDYCPWTtu1qyZjoyMNGv3QghRL6WkpBzTWkec7edMS/aRkZEkJyeb\ntXshhKiXlFL7zuVz0o0jhBANgCR7IYRoACTZCyFEAyDJXgghGgBJ9kII0QBUm+yVUjOUUkeVUpsr\nWa6UUm8qpXYppTYqpXrWfJhCCCHOhyst+0+AkVUsHwVEG6/JwHvnH5YQQoiaVO119lrrZUqpyCpW\nuRz4TDueb7haKdVYKdVSa324hmIUZtDaeNkdL8pNa/upy7Suerlz2envlW2bM5eXxXQywFoqw8X1\nKig7pbwuynBxvdrYb0XH4XzV0LbqUUw2rbHaNFa7xmKzY7VrrDY7Fpvj3Tlv187pc1UTN1W1Ag6U\nm88wys5I9kqpyTha/7Rt27YGdm0SrcFSCKUFjndLEZQWgq0EbBawW8BuOzltsxrv57PM6ni5ukzb\nqkiqFSTd0xOuEKLWeRsv/zrYV53eQau1/hD4ECAhIcE9nnRus0BBpuOVb7wXHD05X3gcSvKgNN/x\nXnLC8W631mwcXj7Gyxe8y959HWVl76cv8wkA/5CTn/X2PblMeZ32UienUaeVqVPX5bR5RTXLT9vO\nKcsr2nZ1y8svUyc/A47pMqaVnTFRbr1y5W5RVkvxnVJewf7OVUWxV+BgThG/7z5GscVGqVVTYrVR\narFTYrNTYrFRYtOUWu2UWO2OZVY7JWXrWe2UWm2UWB3rWKppLetq6ufrrfD38cLPxws/H2/ntL+v\nF/7ejjI/b4W/r4+jvOzl64Wft3e59bzw9/V2TPt6EVBumwHGu5+PgqfPraFcE8n+INCm3Hxro8w9\nFGZB9l7I2X/mK/9PKMqu+HPe/tCoOQSFOxJq43bg3wj8QyEg1PHuF+x4+QaCTyD4+J9MuF4+lSTt\nSpa5+CMXoqFbtzeL2z7dSG6RxVnm46WMBOpIjP6+Ps55fx8v/IO8CPb1pqmPF/4+3gT4Ot79fb0I\nMN79fbwIKFvfSNoBvhUvK/u8n48X3l71499uTST7ecDdSqnZQB8g15T+emspZG6Fw3/A4Y2Quc3x\nKsg8db2AMGjcFsI7QGR/R0IPbgbBERBsTDdqDn6NJAEL4Ua2/5nHzNX7mJN8gNZNApl7RxIXNg7E\n38cLH2+5irw61SZ7pdSXwCCgmVIqA3gK8AXQWr8PLAAuBXYBhcAttRXsKWxW2P87pP8G+36HgymO\nPnMAvxBo3gU6jYBmnR2JvXFbCGsDgY3rJDwhROVKrXYKS60Ultqc7wUlNoosRlmJo7yg1EZRqY21\ne7NYuycLP28vxsS15InRMTQJ9jO7GvWKK1fjjKtmuQbuqrGIqmKzwr4VsOV72PojFB4D5Q0t4yDx\nb9Cql2O6SXvwkr/0QtSUEquNvGIrJ4osnHC+W8grtlJQUpa0bRSVS9AFpyXzsgReWGrDanf9lJ2X\ngnbhwTwyqgvXJLShqST5c2LaEMdnxVIMqTNhxb8g9wD4Bjta7V2vgA5DHH3qQoizYrHZ2f5nHhsz\nctmfVehM3mWJvHxiL7FWf4WWn48XQX7eBPl6E+Tv45j286Z5SIBzOsjPUR7s70Ogr1Hm72N8xrE8\n2M+bQD9vgv18CPRz9JMr6VI9b+6f7PeugO/ucCT51okw/DmIHg5+QWZHJkS9s+94ATNX7yN5XzZp\nh044k7ivtyIs0JfQAF9CAn0JDfDhwrBAQgN9CA3wJdQoCw30JSTgZFlIgA/BRrKWfnP35r7J3m6D\n316C3/4JTaNgwg/QfqCcNBXCRVprCkptZOaVcDiniNnrDjB/4yF8vLyIb9OYm/q2I7ZNY+Jah9G2\naZC0nj2ceyb7omz4+hZIXwpx4+DSVxyXPQohKLY4EnhmfgmZeSUcM95PmTbeiy0nu1+C/by5bUAU\nk/q3p0VogIk1EGZwv2RffAI+vwqObIbL3oSeE6Q1Lxoku10zNyWD33ZmOhK5kdDzSiq+oa9JkC8R\nIf5EhPjTs20TIho5pps18qdZiD/xrRsTFuRbx7UQ7sL9kv3/pjmulb9uJnS51OxohKhzVpud33Zk\n8vbSXWzYn0OrxoG0ahLIRS1DuaRTWQL3cyT2RgFEhPjTNNgPPx/pMxeVc69kv2c5pM6C/v8niV7U\nSxabnSKLjWKLjeJSO8VWx2WIecVW8ootp13xYjWueilb5njPLiiloNRGRIg/r10bx5U9Wkl/ujhv\n7pPstYafn4SwtnDJVLOjEcKp2GJjxso9rN+X40jiFpsziRdbHGOvFJXaKLbasbl4/bhSEOLvQ0i5\nq1paNQ4kNCCE0EBfkjqEM6RLc3zlChdRQ9wn2e/8GQ6th7FvyWWVwi2UWu3M33iIVxft4GBOEZ1a\nNKKRvw8Bvt6EBfo6x00J9PUmwNfbeHeMoRJwWpkjqRvJPcCHYD8fvOrJmCrCM7hPsl/5hqNVH1fl\nDbtC1LoDWYV8uXY/XyUf4Fh+KTEtQ3n12jj6RoWbHZoQ58w9kv2xnY5hEIY+5RgFUggTpOzL5u1f\ndvLrjkwUMKRLC27s25ZLoiOkFS7qPfdI9us/c4xxEz/e7EhEA7P9zzxmrdnH2j1ZbPszj2aN/Lh7\ncEeuT2xLq8aBZocnRI1xj2S/9UfoOBRCWpgdiWhAii02Jn26juP5pfRq14RHL23F+D7tCPZ3j38W\nQtQk83/VeX9C9h7oPcnsSISHO5BVyNo9WaTsz2ZjRg7b/8zDYtN8cFMvRnS9wOzwhKhV5if7/asd\n722TzI1DeASrze4cKuDoiRIOnyhmw75s1uzJ4mBOEQAhAT7Etg5jUv8o+kQ1ZXDn5iZHLUTtc49k\n7xMIF8SaHYmox1btPs6/l6ezavdxiiy2U5aFB/vRJ6opky+JIrF9Uzq3CJETrqLBcYNkvwpaJ4CP\nPJBAnJul249y+2cphDfy45qE1nS5IJTmxhgxESH+tAwLkDtQRYNnbrK3WeFoGvS5w9QwRP2ktear\n5AM88f0Wols0YtZtfWgcJI0GISpibrLP3gu2Umh+kalhiPpn258neHXRDn5OO0JSVDjv3dhTEr0Q\nVTA32Wduc7xHdDY1DFF/7M7M54UF21i89QhBft5MG9WFvw2Iwlv64IWokrnJ/vhOx3t4tKlhCPdV\nYrXxx4Fc9h4rYPWe4/z4xyECfL25/y/R3HxxpLTmhXCRucn+xCHwD4OAUFPDEO4pr9jC+P+sYWNG\nLuB40tK4xLbcMySaiBB/k6MTon4xP9mHtjQ1BOF+1u3N4qPle0jel01OYSkvXtWdpA7htGocKA+1\nFuIcmZ/sQyTZN2RaazKyi9iVmc/B7CJSD+QwNyWDiBB/+ncM5+perRkQHWF2mELUe+Ym+7zD0DzG\n1BBE3csuKGXJtqP8vvsYa9JP3tkK4OftxS39Ipk6ojNBfubfBiKEpzD3X1P+EQiRMUkagh1H8pi5\neh9bD59g/f4cbHZN02A/+kY15Y6BUcRcGEqrxkFEhPjLlTVC1ALzkr3dCtoOwfJfdE91othCRlYR\nyfuyeGXhdqx2zUUtQ5l8SRSXdmtJ1wtDZdgCIeqIicneGL8kqKlpIYjakVtk4el5W/h2w0FnWWL7\nprx6TRxtmsojJ4Uwg7kte4BASfb1ndaa7Ufy+ONADumZBcxNySCnyMKk/u3p2bYJ7cKD6HphqIxP\nI4SJzE/2QU1MC0Gcn4zsQhanHWHeH4dYvz8HAF9vRUK7pjw2+iK6tQozOUIhRBnzu3GkZV/vbD18\ngunztrBmTxYAHSKCeXJMDEO6NKdN0yA5wSqEG3KDlr0k+/pk3d4sbvl4HX4+Xjw8sgsju11A+2bB\nZoclhKiGucneywf8ZagEd3Uop4hNB3M5kFVIRnYRGzNyWL8/h4gQf76782JaN5GTrULUF+Yle21z\nJHo5aeeW9h8vZMS/ljmf+hTs502bpkE8MSaG63u3kYdyC1HPuPQvVik1EngD8Ab+o7V+8bTlbYFP\ngcbGOtO01guq3Ki2g1+jc4lZ1LKiUhvTvt0IwFe3JxHdvBGNg3zlahoh6rFqk71Syht4BxgGZADr\nlFLztNZp5VZ7HPhKa/2eUioGWABEVrlhbQc/6QZwJ5l5Jbz76y6+23CQnEILL/81lsT2ck5FCE/g\nSss+EdiltU4HUErNBi4Hyid7DZR1vocBh6rdqt0OfnJizx0UldqY8nUqi7ceRWvN8K4XcMvFkSRE\nSqIXwlO4kuxbAQfKzWcAfU5bZzqwSCl1DxAM/KXarWo7+ErL3kz7jxcyc80+fkg9yNG8EiYmRTK+\nT1uiW4SYHZoQoobV1Fm2ccAnWutXlVJJwOdKqW5aa3v5lZRSk4HJAHEXBkjL3iRFpTZWpR/jobkb\nySm0MKhzBDclRTKwk4xTJISnciXZHwTalJtvbZSVNwkYCaC1XqWUCgCaAUfLr6S1/hD4ECChbZCW\nZF+3MvNK+GnjIV79eQd5xVYaB/ny4z39uailXP4qhKdzJdmvA6KVUu1xJPnrgRtOW2c/MBT4RCl1\nERAAZFa5VenGqROFpVYWbPqTr5IPsNa44/XiDuHcPrADfdo3JcDX2+QIhRB1odpkr7W2KqXuBhbi\nuKxyhtZ6i1LqGSBZaz0PmAL8Wyn1AI6TtTdrrXXVG5ZLL2vT5oO5PPzNRrb/mYfVrolqFszUEZ3p\n0bYxfdqHy5AGQjQwLvXZG9fMLzit7Mly02lAv7Pas10uvawNxRYbj367iW83HCQ82I/Jl0QxuEtz\nEto1kevkhWjATLwNUks3Tg3LK7Yw+bMUVqUf5++DOvC3AVE0DfYzOywhhBsw95536capEcl7s3j/\nt3RS9mWRV2zlX9fFc0WPVmaHJYRwI+Yme99AU3df3+UWWvjflsNMn5dGSIAPl3SK4Ka+7eRmKCHE\nGcxN9j7+pu6+vtp1NI9/Ld7Joi1HKLXZ6dYqlBk396Z5SIDZoQkh3JS5yd7b19Td1ycFJVZ+SD3E\nlkO5LNzyJ6VWO+P7tuWyuAuJb91YHtwthKiSucneS5J9dbTWrNmTxWPfbWJ3ZgEhAT50vTCU6WO7\n0uUCuRlKCOEaadm7MbtdM+XrP/huw0GaNfLj80mJ9O/YTC6hFEKcNUn2bshm12z78wQfLkvnh9RD\n3DW4A3cPjibQT+52FUKcG+nGcRN2u+aXbUf5dNVeUvZlU1hqw8/bi7sHd2TK8E7SmhdCnBdp2Zus\nxGrj+w0H+XBZOrszC2jVOJBrE9oQ1yaMfh2byRU2QogaYXKyb7h3dx7LL+GpeVtYtiOTvGIrMS1D\neeP6eC7t3hJfby+zwxNCeBiTu3Ea3kOrj+WXMC/1EK8u2o7FrrkyvhWXxV1Iv47h0lUjhKg10o1T\nh77fcJAnvt9MXomVhHZNeOGq7vJUKCFEnZATtHUgt8jCE99vZt4fh0ho14RnLu/GRS1DpCUvhKgz\n0rKvRZl5Jfy2I5PXFm3nSF4JU4Z14u+DOuAjffJCiDomyb6WHMgq5PJ3VpJVUEpkeBBz70iiR9sm\nZoclhGigpBunlry8cDvFFhtz70givk1jac0LIUxlbgbywJa93a75cNlu5v1xiIkXR5IQ2VQSvRDC\ndNKNU4OKSm3c8+V6Fm89SlJUOPcNjTY7JCGEAMxO9h7UjVNQYuUfC7ayeOtRHrv0Im4b0F6uthFC\nuA1p2deAA1mF3PCf1RzIKmJcYlv+dkmU2SEJIcQpTEz2Crzq/yiOGdmFXP/havKKLXz5t74kdQg3\nOyQhhDiDecneA7o4dh3N54E5qZwotvDFbX3p3jrM7JCEEKJC5rbs67ElW49w22fJ+Hp58cb18ZLo\nhRBuTVr25+A/y9N58b/b6BjRiM8mJdIyLNDskIQQokrSsj9LC7f8yXM/bWV4TAv++ddYGgc13GGa\nhRD1h7Tsz0JWQSkPzEklrnUYb47rQYBv/T/BLIRoGEy8tbP+JfsZK/ZQZLHx6rXxkuiFEPWKecm+\nnrXs0zPz+fT3vYzqdgEdmzcyOxwhhDgrkuxdkHboBJe/sxJfHy+mjuhidjhCCHHWzOuzD2tj2q7P\nxrH8Em6fmUyQnzdf334xbcODzA5JCCHOmnnJ3i/YtF27KmVfFnd/sYHswlK++FtfSfRCiHpLxt6t\nROqBHCbOWIefjxezJyfRUx48IoSox8wdCM1N7TtewMQZa2kS7MvsyX3lpikhRL0nLfvTFFts3Dlr\nPQCzJkmiF0J4BpeSvVJqpFJqu1Jql1JqWiXrXKuUSlNKbVFKfVGzYdYNrTWPfbeZLYdO8Nq1cdJH\nL4TwGNV24yilvIF3gGFABrBOKTVPa51Wbp1o4BGgn9Y6WynVvLYCri1aa/65cDvfrM/gvqHRDL2o\nhdkhCSFEjXGlZZ8I7NJap2utS4HZwOWnrfM34B2tdTaA1vpozYZZ+z79fS/v/bqbG/q0lccJCiE8\njivJvhVwoNx8hlFWXiegk1JqpVJqtVJqZE0FWBcO5RTx6s87GBDdjOev6IaXV/254UsIIVxRU1fj\n+ADRwCCgNbBMKdVda51TfiWl1GRgMkDbtm1raNfnx27X3D8nFbtd8+zl3eS5sUIIj+RKy/4gUP52\n19ZGWXkZwDyttUVrvQfYgSP5n0Jr/aHWOkFrnRAREXGuMdeoZTszWbsni8fHxBDZzP1v9BJCiHPh\nSrJfB0QrpdorpfyA64F5p63zPY5WPUqpZji6ddJrMM5aUWq18/LC7USE+HN1z9ZmhyOEELWm2mSv\ntbYCdwMLga3AV1rrLUqpZ5RSY43VFgLHlVJpwFJgqtb6eG0FXVMWbDrMlkMneGZsV/x85JYDIYTn\ncqnPXmu9AFhwWtmT5aY18H/Gq16w2zUfr9xDu/AgRnS9wOxwhBCiVjXY5uzc9Rn8kZHLvUOi5eob\nIYTHa5DJPrfIwj//t41e7ZpwZY/TryIVQgjP0yAHQvvX4h0cLyjlk1sSpVUvhGgQGlzLfvufeXy2\nah83JLalW6sws8MRQog60aCSvdaa6fO2EBLgw4PDO5sdjhBC1JkGlez/t/lPVqUfZ8qwTjQJ9jM7\nHCGEqDMNJtkXlFh57qetdLkghHGJ7jFUgxBC1JUGkeztds3fZ63ncG4RT4/tio93g6i2EEI4NYir\ncT75fS/LdmTy7BXd6BMVbnY4QghR5zy+iZuemc8/F25jcOcIbuwj3TdCiIbJo5O9za558Os/8Pfx\n5sWrY2X4YiFEg+XR3Tj/WZ7O+v05/Ou6eFqEBpgdjhBCmMZjW/Y7j+Tx6s87GNG1BZfHX2h2OEII\nYSqPTPZaa574YTPBft48f2V36b4RQjR4HpnsV+46zur0LO4bGk2zRv5mhyOEEKbzuGSvteblRdtp\n1TiQcXL1jRBCAB6Y7H9OO8IfB3K4d2hH/H28zQ5HCCHcgkcl+5zCUl787zbaNwuWZ8oKIUQ5HnXp\n5dS5G8nILuLTWxNlSAQhhCjHYzJi2qET/Jx2hHuHdiSpgwyJIIQQ5XlMsv9i7T78fby4sW87s0MR\nQgi34xHJPr/EyvcbDjE6tiWNg2SceiGEOJ1HJPvPVu0lv8TKzRdHmh2KEEK4pXqf7AtKrPx7WTqD\nO0cQ27qx2eEIIYRbqvfJfubqfWQXWrhnaLTZoQghhNuq18m+qNTGh8vSGRDdjJ5tm5gdjhBCuK16\nnexnrdnH8YJS7v+LtOqFEKIq9TbZF1tsvP9bOv06htOrXVOzwxFCCLdWb5P9zNX7OJZfwr1DpFUv\nhBDVqZfJPrfIwttLdzEgupk8QFwIIVxQL5P9R8vTyS2yMG1UF7NDEUKIeqHeJXuLzc4Xaw8wpHNz\nul4YZnY4QghRL9S7ZP9z2hGO5Zcwvq88mEQIIVxV75L9F2v206pxIAM7NTc7FCGEqDfqVbLfe6yA\nFbuOcV3vNnh7yUPEhRDCVS4le6XUSKXUdqXULqXUtCrWu1oppZVSCTUX4klfrtuPt5fiut5tamPz\nQgjhsapN9kopb+AdYBQQA4xTSsVUsF4IcB+wpqaDBCix2pibnMHQLs1pERpQG7sQQgiP5UrLPhHY\npbVO11qXArOByytY71ngJaC4BuNzWrjlCMcLShkvDycRQoiz5kqybwUcKDefYZQ5KaV6Am201j/V\nYGyn+GLNPto0DWRAx2a1tQshhPBY532CVinlBbwGTHFh3clKqWSlVHJmZqbL+9idmc/q9Cyu790W\nLzkxK4QQZ82VZH8QKH9GtLVRViYE6Ab8qpTaC/QF5lV0klZr/aHWOkFrnRAREeFykF+u2Y+Pl+Ka\nhNYuf0YIIcRJriT7dUC0Uqq9UsoPuB6YV7ZQa52rtW6mtY7UWkcCq4GxWuvkmgiw1Grnm/UZDO/a\nguYhcmJWCCHORbXJXmttBe4GFgJbga+01luUUs8opcbWdoDLd2aSXWjhr72kVS+EEOfKx5WVtNYL\ngAWnlT1ZybqDzj+sk+ZvPExYoC/9O7re7SOEEOJUbn0HbbHFxqItfzKy6wX4+bh1qEII4dbcOoP+\nuv0oBaU2Lou70OxQhBCiXnPrZP/jH4cJD/ajb5Q8dlAIIc6H2yb7ghIrS7Yd4dLuLfHxdtswhRCi\nXnDbLLp46xGKLXbGxLY0OxQhhKj33DbZz994mBah/vSOlC4cIYQ4X26Z7POKLfy2I5NR3VrK8AhC\nCFED3DLZ/7LtKKVWO6OlC0cIIWqEWyb7/276k+Yh/vRq28TsUIQQwiO4XbIvtdr5bUcmw2JaSBeO\nEELUELdL9hv2Z1NksXFJJxkeQQghaorbJfuVu47hpaBvVLjZoQghhMdwu2S/YtcxYls3JizQ1+xQ\nhBDCY7hVsi8osfJHRi79OkqrXgghapJbJfs/DuRgs2u5kUoIIWqYWyX7lH3ZAPSQSy6FEKJGufTw\nkrqSsj+bTi0aeWR/vcViISMjg+LiYrNDEULUAwEBAbRu3Rpf35rJh26T7O12zfp92R5712xGRgYh\nISFERkailNw/IISonNaa48ePk5GRQfv27Wtkm27TjbMrM58TxVZ6tfPM/vri4mLCw8Ml0QshqqWU\nIjw8vEZ7Atwm2Zf11/dq57n99ZLohRCuqul84VbJvmmwH5HhQWaHIgRKKaZMmeKcf+WVV5g+fXqt\n7jMyMpKrr77aOT937lxuvvnmWt2naDjcJtmv35dNz7ZNpPUr3IK/vz/ffvstx44dq9P9pqSkkJaW\nVqf7FA2DWyT7rIJS0o8VeHQXjqhffHx8mDx5Mq+//voZy/bu3cuQIUOIjY1l6NCh7N+/H4Cbb76Z\ne++9l4svvpioqCjmzp3r/MzLL79M7969iY2N5amnnqp0v1OmTOH5558/ozwrK4srrriC2NhY+vbt\ny8aNGwGYPn06t956K4MGDSIqKoo333zT+ZmZM2eSmJhIfHw8t99+Ozab7ZyPh6j/3CLZrzf66xMi\nJdkL93HXXXcxa9YscnNzTym/5557mDhxIhs3bmT8+PHce++9zmWHDx9mxYoVzJ8/n2nTpgGwaNEi\ndu7cydq1a0lNTSUlJYVly5ZVuM9rr72W9evXs2vXrlPKn3rqKXr06MHGjRv5xz/+wYQJE5zLtm3b\nxsKFC1m7di1PP/00FouFrVu3MmfOHFauXElqaire3t7MmjWrpg6NqIfc4tLL5H3Z+HorurcKMzsU\nIZxCQ0OZMGECb775JoGBgc7yVatW8e233wJw00038dBDDzmXXXHFFXh5eRETE8ORI0cAR7JftGgR\nPXr0ACA/P5+dO3dyySWXnLFPb29vpk6dygsvvMCoUaOc5StWrOCbb74BYMiQIRw/fpwTJ04AMHr0\naPz9/fH396d58+YcOXKEJUuWkJKSQu/evQEoKiqiefPmNXl4RD3jFsl+w/5sYi4MI8DX2+xQhDjF\n/fffT8+ePbnllltcWt/f3985rbV2vj/yyCPcfvvtLm3jpptu4oUXXqBbt25nvU9vb2+sVitaayZO\nnMgLL7zg0jaE53OLbpwdR/KIaRlqdhhCnKFp06Zce+21fPTRR86yiy++mNmzZwMwa9YsBgwYUOU2\nRowYwYwZM8jPzwfg4MGDHD16FIChQ4dy8ODBU9b39fXlgQceOOV8wYABA5zdML/++ivNmjUjNLTy\nfzNDhw5l7ty5zv1kZWWxb98+V6stPJDpyf54fgnZhRY6RASbHYoQFZoyZcopV+W89dZbfPzxx8TG\nxvL555/zxhtvVPn54cOHc8MNN5CUlET37t3561//Sl5eHna7nV27dtG06Zk3Ek6aNAmr1eqcnz59\nOikpKcTGxjJt2jQ+/fTTKvcZExPDc889x/Dhw4mNjWXYsGEcPnz4LGsuPIkq+69mXUtISNDJycms\n3ZPFtR+s4pNbejOos+f2KW7dupWLLrrI7DCEG9m8eTMzZszgtddeMzsU4aYqyhtKqRStdcLZbsv0\nlv2uo47/2nZs3sjkSISoW926dZNEL+qMWyT7QF9vLgwLrH5lIYQQ58T8ZJ+ZT1REMF5ecuesEELU\nFtOT/e6j+dKFI4QQtczUZF9YauVgThEdIiTZCyFEbTI12adnFgByclYIIWqbS8leKTVSKbVdKbVL\nKTWtguX/p5RKU0ptVEotUUq1c2W7ciWOEELUjWqTvVLKG3gHGAXEAOOUUjGnrbYBSNBaxwJzgX+6\nsvO9xx0t+7ZNZQz7ulBUVMTAgQNNGf3w119/ZcyYMVWuk5qayoIFC5zz8+bN48UXX6zt0KqNo6bl\n5OTw7rvv1tr2XfWPf/zD7BDcTm1/9xV5++23mTFjRq3vx5WWfSKwS2udrrUuBWYDl5dfQWu9VGtd\naMyuBlqDavITAAAcz0lEQVS7svPDOcU0a+QvY+LUkRkzZnDVVVfh7e2ex/v0f2hjx451jhxZ08rf\nnVpdHDVNkn3Nquq7PFtmJPtbb72Vt956q9b340qybwUcKDefYZRVZhLw34oWKKUmK6WSlVLJmZmZ\nHMotolXjANejFedl1qxZXH75yb/TL730Et27dycuLs6ZVAcNGkRycjIAx44dIzIyEoBPPvmEK664\ngmHDhhEZGcnbb7/Na6+9Ro8ePejbty9ZWVlVfr68tWvXkpSURI8ePbj44ovZvn07paWlPPnkk8yZ\nM4f4+HjmzJnDJ598wt13301ubi7t2rXDbrcDUFBQQJs2bbBYLOzevZuRI0fSq1cvBgwYwLZt2yqt\n/80338wdd9xBnz59eOihhygoKODWW28lMTGRHj168MMPP1QYR/fu3cnJyUFrTXh4OJ999hkAEyZM\n4Oeff8ZmszF16lTnePUffPCBc58VjWM/bdo0du/eTXx8PFOnTq003oq+n9TUVPr27UtsbCxXXnkl\n2dnZ1X5vV111FSNHjiQ6Oto5Que0adMoKioiPj6e8ePHV7j/goICRo8eTVxcHN26dWPOnDmA44la\nZcNHJCcnM2jQIMAxpMPEiRMZMGAA7dq149tvv+Whhx6ie/fujBw5EovFUmldIyMjnesmJiY6h3jO\nzMzk6quvpnfv3vTu3ZuVK1c693XTTTfRr18/brrpJmw2Gw8++CDdunUjNjbWmTxTUlIYOHAgvXr1\nYsSIEc4hIwYNGsTDDz9MYmIinTp1Yvny5RV+9xX9VgEKCwu59tpriYmJ4corr6RPnz7O479o0SKS\nkpLo2bMn11xzjXNMpMoEBQURGRnJ2rVrq1zvfNXoqJdKqRuBBGBgRcu11h8CH4JjuIRDOUVENw+p\nyRDqhad/3ELaoRM1us2YC0N56rKulS4vLS0lPT3dmQT++9//8sMPP7BmzRqCgoKcyboqmzdvZsOG\nDRQXF9OxY0deeuklNmzYwAMPPMBnn33G/fff71KsXbp0Yfny5fj4+LB48WIeffRRvvnmG5555hmS\nk5N5++23AUeiAggLCyM+Pp7ffvuNwYMHM3/+fEaMGIGvry+TJ0/m/fffJzo6mjVr1nDnnXfyyy+/\nVLrvjIwMfv/9d7y9vXn00UcZMmQIM2bMICcnh8TERP7yl7+cEcfSpUtZuXIl7dq1IyoqiuXLlzNh\nwgRWrVrFe++9x0cffURYWBjr1q2jpKSEfv36MXz4cHbu3Okcx15rzdixY1m2bBkvvvgimzdvJjU1\ntdI4K/t+JkyYwFtvvcXAgQN58sknefrpp/nXv/5V5fFOTU1lw4YN+Pv707lzZ+655x5efPFF3n77\n7Spj+N///seFF17ITz/9BHDGuP4V2b17N0uXLiUtLY2kpCS++eYb/vnPf3LllVfy008/ccUVV1T6\n2bCwMDZt2uT8Lc2fP5/77ruPBx54gP79+7N//35GjBjB1q1bAUhLS2PFihUEBgby3nvvsXfvXlJT\nU/Hx8SErKwuLxcI999zDDz/8QEREBHPmzOGxxx5zdplYrVbWrl3LggULePrpp1m8ePEZ3/2JEycq\n/K2+++67NGnShLS0NDZv3kx8fDzg+EP73HPPsXjxYoKDg3nppZd47bXXePLJJ6s8bgkJCSxfvpzE\nxMRqj/G5ciXZHwTalJtvbZSdQin1F+AxYKDWusSVnR/OLWZgJ88dD8edHDt2jMaNGzvnFy9ezC23\n3EJQkON8SUWDcZ1u8ODBhISEEBISQlhYGJdddhkA3bt3dz45yRW5ublMnDiRnTt3opSqssVX5rrr\nrmPOnDkMHjyY2bNnc+edd5Kfn8/vv//ONddc41yvpKTqn94111zj7MZatGgR8+bN45VXXgGguLjY\n+dSp8gYMGMCyZcto164df//73/nwww85ePAgTZo0ITg4mEWLFrFx40bnk6lyc3PZuXNnpePYt23b\nttr6VvT95ObmkpOTw8CBjrbUxIkTT6l7ZYYOHUpYmONZETExMezbt482bdpU8ynH9zplyhQefvhh\nxowZU+3ongCjRo3C19eX7t27Y7PZGDlypHNbe/furfKz48aNc74/8MADgOM4lH9M44kTJ5wt5bFj\nxzqfM7B48WLuuOMOfHwcKa1p06Zs3ryZzZs3M2zYMABsNhstW7Z0buuqq64CoFevXpXGVtlvdcWK\nFdx3330Azv9NAKxevZq0tDT69esHOBpZSUlJ1R02mjdvXuX/SmuCK8l+HRCtlGqPI8lfD9xQfgWl\nVA/gA2Ck1vqoKzu22TWFpTYubIDdOFW1wGtLYGAgxcXF1a7n4+Pj7C45ff3y46Z7eXk55728vJz9\nplV9vswTTzzB4MGD+e6779i7d6+zG6AqY8eO5dFHHyUrK4uUlBSGDBlCQUEBjRs3rrJ1errg4JOj\nq2qt+eabb+jcufMp66xZs+aU+UsuuYR33nmH/fv38/zzz/Pdd98xd+5cZ/LTWvPWW28xYsSIUz63\ncOHCCsexry7pnQtXv7ey8e5d0alTJ9avX8+CBQt4/PHHGTp0KE8++aRL+/Ly8sLX19f5TOnyv5HK\nlH/+dNm03W5n9erVBAScmSfKf5cV0VrTtWtXVq1aVeHyslirOiZn+1vVWjNs2DC+/PLLKtc7XXFx\n8SkPyKkN1fbZa62twN3AQmAr8JXWeotS6hml1FhjtZeBRsDXSqlUpdS86rZrsTl+LBc2ljFx6kKT\nJk2w2WzOf5zDhg3j448/prDQcV69rJsgMjKSlJQUgFOeoeoqVz6fm5tLq1aO0z5lXTUAISEh5OXl\nVfiZRo0a0bt3b+677z7GjBmDt7c3oaGhtG/fnq+//hpw/EP7448/XI51xIgRvPXWW86HjGzYsKHC\nONq0acOxY8fYuXMnUVFR9O/fn1deecX5pKkRI0bw3nvvOVt9O3bsoKCgoNJx7KuqZ5mKvp+wsDCa\nNGnC8uXLAfj888+drfxz+d58fX2r/F/VoUOHCAoK4sYbb2Tq1KmsX7/+jH2VPT2rJpSdE5gzZ46z\nNTx8+PBTTl5W9od92LBhfPDBB86knZWVRefOncnMzHQme4vFwpYtW6qM4fTvprLfar9+/fjqq68A\nR3fSpk2bAOjbty8rV650nnMoKChgx44dADzyyCN89913Fe53x44dLj+s5ly5dJ291nqB1rqT1rqD\n1vp5o+xJrfU8Y/ovWusWWut44zW26i2eTPYtwxpey94sw4cPZ8WKFQCMHDmSsWPHkpCQQHx8vLMr\n48EHH+S9996jR48ep4zh7ipXPv/QQw/xyCOP0KNHj1NaVIMHDyYtLc15cux01113HTNnzuS6665z\nls2aNYuPPvqIuLg4unbtyg8//OByrE888QQWi4XY2Fi6du3KE088UWkcffr0oVOnToCjW+fgwYP0\n798fgNtuu42YmBh69uxJt27duP3227FarZWOYx8eHk6/fv3o1q1bpSdoK/t+Pv30U6ZOnUpsbCyp\nqanOvuBz+d4mT55MbGxspSdoN23a5Hxg+dNPP83jjz8OOJ6He99995GQkFCjV3ZlZ2cTGxvLG2+8\n4Xxwy5tvvklycjKxsbHExMTw/vvvV/jZ2267jbZt2xIbG0tcXBxffPEFfn5+zJ07l4cffpi4uDji\n4+P5/fffq4zh9O++st/qnXfeSWZmJjExMTz++ON07dqVsLAwIiIi+OSTTxg3bhyxsbEkJSU5u2c2\nbdrEBRdcUOF+V65c6exuqjVaa1NekV2663YPz9d/5hbphiAtLc3sEHRKSoq+8cYbzQ5DiDO0a9dO\nZ2Zmmh2Gy6xWqy4qcuSuXbt26cjISF1SUlLlZ4YPH15h+fr16yv9d1lR3gCS9TnkXNOeQWux2Qnw\nUjRr5F/9yqJG9OzZk8GDB2Oz2dz2Wnsh6oPCwkIGDx6MxWJBa827776Ln59flZ9ZuHBhheXHjh3j\n2WefrY0wT2FasrfaNM1D/PGWoY3r1K233mp2CLXu+eefd/bjl7nmmmt47LHHTIqocps2beKmm246\npczf3/+Mk8S16fjx4wwdOvSM8iVLlhAeHl6j+7ryyivZs2fPKWUvvfRSrZy0rk0hISHO6+rPV613\n3xhMeyxhs8iLdNKUf/PjPf1N2X9dk8cSCiHOlkc8ltBq14Q3qvq/PUIIIWqGuck+WPrrhRCiLpiX\n7G12mknLXggh6oRpyV6DdOMIIUQdMfVJVU2lG6dOyXj2rnHXIY6nT5/uvLlKVM6MIaQ3bdrEzTff\nXKf7PFumJvuwQF8zd9/gyHj2J8l49q6pq4ZBTY5Jb8bx7d69OxkZGRUOpOcuTE32oQGmXeZvrv9O\ng49H1+zrv9UnRRnP3n3Hs69oPXDcM9CpUyf69+/vHEu9MpUd+y1btjiHPYiNjWXnzp0AzJw501l+\n++23OxN7o0aNmDJlCnFxcaxatYpp06YRExNDbGwsDz74YLXHNyEhgU6dOjF//nyASo/Pr7/+yoAB\nAxg7diwxMY6H33322WfOIQ/K7j+oakz7W2+9lUGDBhEVFcWbb75Z4fHNz89n6NCh9OzZk+7du58y\npMazzz5L586d6d+/P+PGjXP+z+lsfldlLrvsMmbPnl3teqY5l9tua+Lld0FHvfVwbpW3F3uSU257\nXvCw1jMurdnXgoer3H9JSYlu0aLFyRAWLNBJSUm6oKBAa6318ePHtdZaDxw4UK9bt05rrXVmZqZu\n166d1lrrjz/+WHfo0EGfOHFCHz16VIeGhur33ntPa631/fffr19//fUqP7906VI9evRorbXWubm5\n2mKxaK21/vnnn/VVV13l3Mddd93ljLH8/NixY/Uvv/yitdZ69uzZetKkSVprrYcMGaJ37NihtdZ6\n9erVevDgwZUeg4kTJ+rRo0drq9Wqtdb6kUce0Z9//rnWWuvs7GwdHR2t8/Pzz4jj9ttv1/Pnz9eb\nNm3SCQkJ+rbbbtNaa92xY0edn5+vP/jgA/3ss89qrbUuLi7WvXr10unp6XrhwoX6b3/7m7bb7dpm\ns+nRo0fr3377Te/Zs0d37drVuf3K1ktOTtbdunXTBQUFOjc3V3fo0EG//PLLldavsmN/991365kz\nZ2qtHb+DwsJCnZaWpseMGaNLS0u11lr//e9/159++qnWWmtAz5kzR2ut9bFjx3SnTp203W53Hqeq\nju+IESO0zWbTO3bs0K1atdJFRUWVHp+lS5fqoKAgnZ6errXWevPmzTo6Oto5bELZb3LcuHF6+fLl\nWmut9+3bp7t06aK11vqpp57SSUlJuri4WGdmZuqmTZvq0tLSM46vxWLRubm5zuPSoUMHbbfb9dq1\na3VcXJwuKirSJ06c0B07dnQe37P5XZVZsWKFHjNmTLXrnQ2PGC4BIDSggXbjjKr7fmgZz97BHcez\nr2y9vLw8rrzySud3NHZsteMLVigpKYnnn3+ejIwMrrrqKqKjo1myZAkpKSn07t0bcJzPad7c8WwJ\nb29vrr76asDxQJGAgAAmTZrEmDFjqj3vcu211+Ll5UV0dDRRUVFs27at0uPj5+dHYmIi7du3B+CX\nX37hmmuuoVmzZsDJ32RVY9qPHj0af39//P39ad68OUeOHDkjJq01jz76KMuWLcPLy4uDBw9y5MgR\nVq5cyeWXX05AQAABAQHO3/O5/K7AMSb9oUOHql3PLOYme+mzrzMynr2DO45nr7WucL3qnkB1usqO\n/Q033ECfPn346aefuPTSS/nggw/QWjNx4kReeOGFM7YTEBDg/IPo4+PD2rVrWbJkCXPnzuXtt9+u\n8klg5cekL5uv7Pj8+uuv1Y5JD1WPae/KWP2zZs0iMzOTlJQUfH19iYyMrPLfgt1uP+vfFdTNmPTn\nw9Q++2A/9zxR6IlkPPszuct49pWtd8kll/D9999TVFREXl4eP/74Y5X1qezYp6enExUVxb333svl\nl1/Oxo0bGTp0KHPnzuXoUcezhrKysti3b98Z28zPzyc3N5dLL72U119/vdrj+/XXX2O329m9ezfp\n6el07ty50uNzuiFDhvD1119z/PhxZ0zg+pj2ZSoak7558+b4+vqydOlSZz379evHjz/+SHFxMfn5\n+c5zDFX9rr777jseeeSRCvdbF2PSnw/Tkr23lzqjFSBql4xnfyp3Gc++svV69uzJddddR1xcHKNG\njXJ2uZztsf/qq6/o1q0b8fHxbN68mQkTJhATE8Nzzz3H8OHDiY2NZdiwYc6HcZeXl5fHmDFjiI2N\npX///rz22mtVxtC2bVsSExMZNWoU77//PgEBAZUen9N17dqVxx57jIEDBxIXF8f//d//Aa6PaV/m\n9OM7fvx4kpOT6d69O5999hldunQBoHfv3owdO5bY2FhGjRpF9+7dnY9vrOx3tXv3bkJDQyvc79Kl\nSxk9enSVsZnqXDr6a+LVqFWn8zx1Ub/IePbC002cOFF//fXXZodxVvLy8rTWWhcUFOhevXrplJSU\nKtcfP368Pnr06BnlxcXFuk+fPs4LD2qKR5ygDfKXLpy6JuPZC3GqyZMnk5aWRnFxMRMnTqRnz55V\nrj9z5swKy/fv38+LL77ofOC5OzItsjZNgszadYMm49nXf3fddZfzWvMy9913H7fcckud7L+y41v+\n/Et98cUXX9TIdqKjo4mOjq6RbdUW08azT0hI0DU1+H99IOPZCyHOlkeMZ98QmfWHVQhR/9R0vpBk\nX0cCAgI4fvy4JHwhRLW01hw/frzCewvOlfueTfAwrVu3JiMjg8zMTLNDEULUAwEBAbRu3brGtifJ\nvo74+vo6bwsXQoi6Jt04QgjRAEiyF0KIBkCSvRBCNACmXWevlMoEzhx5yTM0A85+YJn6w5Pr58l1\nA6lffVZWt3Za64iz/bBpyd6TKaWSz+Wmh/rCk+vnyXUDqV99dr51k24cIYRoACTZCyFEAyDJvnZ8\naHYAtcyT6+fJdQOpX312XnWTPnshhGgApGUvhBANgCT7c6CUmqGUOqqU2lyurKlS6mel1E7jvYlR\nrpRSbyqldimlNiqlqn46gsmUUm2UUkuVUmlKqS1KqfuMck+pX4BSaq1S6g+jfk8b5e2VUmuMesxR\nSvkZ5f7G/C5jeaSZ8btCKeWtlNqglJpvzHtS3fYqpTYppVKVUslGmUf8NgGUUo2VUnOVUtuUUluV\nUkk1VT9J9ufmE2DkaWXTgCVa62hgiTEPMAqINl6TgffqKMZzZQWmaK1jgL7AXUqpGDynfiXAEK11\nHBAPjFRK9QVeAl7XWncEsoFJxvqTgGyj/HVjPXd3H7C13Lwn1Q1gsNY6vtxliJ7y2wR4A/if1roL\nEIfje6yZ+p3LswzlpQEigc3l5rcDLY3plsB2Y/oDYFxF69WHF/ADMMwT6wcEAeuBPjhuVvExypOA\nhcb0QiDJmPYx1lNmx15FnVobCWEIMB9QnlI3I869QLPTyjzitwmEAXtO/w5qqn7Ssq85LbTWh43p\nP4EWxnQr4EC59TKMMrdn/Le+B7AGD6qf0c2RChwFfgZ2Azlaa6uxSvk6OOtnLM8Fwus24rPyL+Ah\nwG7Mh+M5dQPQwCKlVIpSarJR5im/zfZAJvCx0Q33H6VUMDVUP0n2tUA7/szW68uclFKNgG+A+7XW\nJ8ovq+/101rbtNbxOFrBiUAXk0OqEUqpMcBRrXWK2bHUov5a6544ujDuUkpdUn5hPf9t+gA9gfe0\n1j2AAk522QDnVz9J9jXniFKqJYDxftQoPwi0Kbdea6PMbSmlfHEk+lla62+NYo+pXxmtdQ6wFEfX\nRmOlVNnzHcrXwVk/Y3kYcLyOQ3VVP2CsUmovMBtHV84beEbdANBaHzTejwLf4fhj7Sm/zQwgQ2u9\nxpifiyP510j9JNnXnHnARGN6Io6+7rLyCcaZ875Abrn/krkdpZQCPgK2aq1fK7fIU+oXoZRqbEwH\n4jgfsRVH0v+rsdrp9Sur91+BX4zWldvRWj+itW6ttY4ErscR63g8oG4ASqlgpVRI2TQwHNiMh/w2\ntdZ/AgeUUp2NoqFAGjVVP7NPStTHF/AlcBiw4PhrPAlHX+cSYCewGGhqrKuAd3D0C28CEsyOv5q6\n9cfx38SNQKrxutSD6hcLbDDqtxl40iiPAtYCu4CvAX+jPMCY32UsjzK7Di7WcxAw35PqZtTjD+O1\nBXjMKPeI36YRczyQbPw+vwea1FT95A5aIYRoAKQbRwghGgBJ9kII0QBIshdCiAZAkr0QQjQAkuyF\nEKIBkGQvhBANgCR74XGUUjcrpS6sxe0PUkpdXG7+DqXUhNraX22o7WMk3I8ke2Gacrfwn8tnvatY\nfDNQm4lsEOBM9lrr97XWn9Xi/mrDzdTuMRJuRpK9cJlSKlKd+sCWB5VS05VS9yrHw042KqVmG8uC\nleMhL2uNEfwuN8pvVkrNU0r9AixRSrVUSi0zHkaxWSk1oIr95yulXlVK/QEkKaV6KaV+M0ZAXGhs\n669AAjDL2OYApdS3xucvV0oVKaX8lOMhJulGeQel1P+M7SxXSnUxyiOUUt8opdYZr37GSKB3AA+U\n2/50pdSDxmd+VUq9ZNR7R1l9lFJBSqmvjOP0nXI8LCThjEqerOtIpdR65XjIyhKjrKlS6nvjOK9W\nSsUa5c79G/Obje8qUjkegPFv5XhQyyKlVGAFxyjw7H4Jol4y+/ZgedWfF2eO4f8gMB04xMlb8Bsb\n7/8AbiwrA3YAwThalBmcvOV7Cidve/cGQqrYvwauNaZ9gd+BCGP+OmCGMf0rxq3jOEYSTDemXwHW\n4RgwbCDwpVG+BIg2pvvgGCMG4AscoywCtMUxXhBGnR8sF5dz3tj3q8b0pcDicsfqA2O6G46HxFR4\nezsQgWPo2vbGfNmxegt4ypgeAqRWEs9m47uKNPYTb5R/Ve47+bWy/cvLM1/n/N9oIcrZiKOV+D2O\n8TzAMUjV2HItzgAcCRPgZ611ljG9DpihHCNtfq+1Tq1iPzYco3ECdMaRNH92jN2GN47xik6htbYq\npXYrpS7CMULia8AlxvrLlWMo54uBr43tAPgb738BYsqVhxrrV6dspNAUHAkXHGMOvWHEtFkptbGK\nz/cFlmmt9xjrlx2r/sDVRtkvSqlwpVRoNbHsKXdMy8cjGhhJ9uJsWDm16y/AeB+NI4FeBjymlOqO\nY5Cmq7XW28tvQCnVB8c43QBorZcpx5jko4FPlFKv6cr7v4u11rayTQFbtNZJLsS9DMf45xYcA0l9\ngiPZTzXqk6Md49ufzgvoq7UuPq0O1e2vxHi3UTf/xir7XsrHUhaPdNk0UNJnL87GEaC50aL0B8bg\n+A210VovBR7GMSZ6IxyPvLtHGZlRKdWjog0qpdoBR7TW/wb+g2P8bldsByKUUknGdnyVUl2NZXlA\nSLl1lwP3A6u01pk4RhHsjKNL6gSwRyl1jbEdpZSKMz63CLinXKxlfxBO374rVgLXGtuJAbpXse5q\n4BKlVHtj/abl6jHeKBsEHDPi34tx3JTjodPtXYjnXOog6jFJ9sJlWmsL8AyO4XB/BrbhaCHPVEpt\nwjF08Jva8VCQZ3H0q29USm0x5isyCPhDKbUBR7/7Gy7GUopjDPaXjBO2qZy8QuYT4P1yJx/X4HiU\n2zJj+UZgk9a6bMjX8cAkYztbgMuN8nuBBOOEaBqOE7MAPwJXlp2gdSVe4F0cf5zSgOeM/eRWUrdM\nHA+Q/taIaY6xaDrQy+gCepGTY5x/AzQ1jvPdOM6PVOcTTj1GwsPJEMdC1AHluFTUV2tdrJTqgKM7\nqbPxR0uIWid99kLUjSBgqXEiWgF3SqIXdUla9sLtKKXWcPKKmDI3aa03mRFPbWpIdRXmkmQvhBAN\ngJygFUKIBkCSvRBCNACS7IUQogGQZC+EEA2AJHshhGgA/h+AuR0RjSuGwAAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%matplotlib inline\n", "retweet_grouped_by_users_retweeting_df[['cumulative_retweet_count_sum_percentage', 'cumulative_retweeted_users_percentage']].plot()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Cut off the tail.\n", "Removes users that were only retweeted by 5 or less users." ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "5374" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retweet_summary_df.drop(retweet_summary_df[retweet_summary_df.users_retweeting_count <= 5].index, inplace=True)\n", "retweet_summary_df['retweet_screen_name'].count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Approach 1: By retweet count" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Top accounts (by retweet count)\n", "Unknown for type indicates that it is not matched with an known Twitter account." ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
retweet_countretweet_screen_nametypeusers_retweeting_countpercent_of_users_retweeting
user_id
250738773655realDonaldTrumppoliticians6030.328431
930691102715maggieNYTjournalists4920.267974
2998022772508BraddJaffymedia4890.266340
19177312398thehillmedia4030.219499
512415742085APmedia4830.263072
7592511876CNNmedia3660.199346
93002621683politicomedia3980.216776
145299291679jaketapperjournalists4110.223856
24677911632washingtonpostmedia4370.238017
3980886611418MEPFullerjournalists2660.144880
213162531352ZekeJMillerjournalists3600.196078
8070951322nytimesmedia3980.216776
1049145941260Phil_Mattinglyjournalists3030.165033
465579451254StevenTDennisjournalists2930.159586
4268028331237AP_Politicsmedia2590.141068
135241821220daveweigeljournalists2940.160131
138504221206CNNPoliticsmedia2680.145969
861297241173costareportsjournalists3630.197712
16525411171Reutersmedia3120.169935
347133621161businessmedia2460.133987
154334521140JenniferJJacobsjournalists3120.169935
232322041124ShaneGoldmacherjournalists2870.156318
328710861119kylegriffin1journalists3000.163399
311274461066markknollerjournalists2940.160131
391550291048mkrajujournalists3080.167756
198477651045sahilkapurjournalists2630.143246
4070137761035burgessevjournalists2260.123094
13675311012FoxNewsmedia2320.126362
212526181009JakeShermanjournalists3080.167756
16187637987ChadPergramjournalists2690.146514
259395895969JohnJHarwoodjournalists2980.162309
19107878960GlennThrushjournalists3380.184096
90614279906EENewsUpdatesmedia360.019608
19186003892seungminkimjournalists2380.129630
217550862885BresPoliticojournalists2610.142157
28785486861ABCmedia2720.148148
59331128860PhilipRuckerjournalists2870.156318
17243582856blakehounshelljournalists2570.139978
38936142853jdawsey1journalists2680.145969
136550204843scottwongDCjournalists2020.110022
33653195817ericawernerjournalists2460.133987
15446531796mattyglesiasjournalists1580.086057
15463671791samsteinhpjournalists2510.136710
18678924767jmartNYTjournalists2190.119281
18956073761dcexaminermedia1050.057190
22129280759jimsciuttojournalists2660.144880
2312829909716CQnowmedia610.033224
326255267713KFILEjournalists2510.136710
22891564697chrisgeidnerjournalists1890.102941
15012486686CBSNewsmedia2040.111111
\n", "
" ], "text/plain": [ " retweet_count retweet_screen_name type \\\n", "user_id \n", "25073877 3655 realDonaldTrump politicians \n", "93069110 2715 maggieNYT journalists \n", "299802277 2508 BraddJaffy media \n", "1917731 2398 thehill media \n", "51241574 2085 AP media \n", "759251 1876 CNN media \n", "9300262 1683 politico media \n", "14529929 1679 jaketapper journalists \n", "2467791 1632 washingtonpost media \n", "398088661 1418 MEPFuller journalists \n", "21316253 1352 ZekeJMiller journalists \n", "807095 1322 nytimes media \n", "104914594 1260 Phil_Mattingly journalists \n", "46557945 1254 StevenTDennis journalists \n", "426802833 1237 AP_Politics media \n", "13524182 1220 daveweigel journalists \n", "13850422 1206 CNNPolitics media \n", "86129724 1173 costareports journalists \n", "1652541 1171 Reuters media \n", "34713362 1161 business media \n", "15433452 1140 JenniferJJacobs journalists \n", "23232204 1124 ShaneGoldmacher journalists \n", "32871086 1119 kylegriffin1 journalists \n", "31127446 1066 markknoller journalists \n", "39155029 1048 mkraju journalists \n", "19847765 1045 sahilkapur journalists \n", "407013776 1035 burgessev journalists \n", "1367531 1012 FoxNews media \n", "21252618 1009 JakeSherman journalists \n", "16187637 987 ChadPergram journalists \n", "259395895 969 JohnJHarwood journalists \n", "19107878 960 GlennThrush journalists \n", "90614279 906 EENewsUpdates media \n", "19186003 892 seungminkim journalists \n", "217550862 885 BresPolitico journalists \n", "28785486 861 ABC media \n", "59331128 860 PhilipRucker journalists \n", "17243582 856 blakehounshell journalists \n", "38936142 853 jdawsey1 journalists \n", "136550204 843 scottwongDC journalists \n", "33653195 817 ericawerner journalists \n", "15446531 796 mattyglesias journalists \n", "15463671 791 samsteinhp journalists \n", "18678924 767 jmartNYT journalists \n", "18956073 761 dcexaminer media \n", "22129280 759 jimsciutto journalists \n", "2312829909 716 CQnow media \n", "326255267 713 KFILE journalists \n", "22891564 697 chrisgeidner journalists \n", "15012486 686 CBSNews media \n", "\n", " users_retweeting_count percent_of_users_retweeting \n", "user_id \n", "25073877 603 0.328431 \n", "93069110 492 0.267974 \n", "299802277 489 0.266340 \n", "1917731 403 0.219499 \n", "51241574 483 0.263072 \n", "759251 366 0.199346 \n", "9300262 398 0.216776 \n", "14529929 411 0.223856 \n", "2467791 437 0.238017 \n", "398088661 266 0.144880 \n", "21316253 360 0.196078 \n", "807095 398 0.216776 \n", "104914594 303 0.165033 \n", "46557945 293 0.159586 \n", "426802833 259 0.141068 \n", "13524182 294 0.160131 \n", "13850422 268 0.145969 \n", "86129724 363 0.197712 \n", "1652541 312 0.169935 \n", "34713362 246 0.133987 \n", "15433452 312 0.169935 \n", "23232204 287 0.156318 \n", "32871086 300 0.163399 \n", "31127446 294 0.160131 \n", "39155029 308 0.167756 \n", "19847765 263 0.143246 \n", "407013776 226 0.123094 \n", "1367531 232 0.126362 \n", "21252618 308 0.167756 \n", "16187637 269 0.146514 \n", "259395895 298 0.162309 \n", "19107878 338 0.184096 \n", "90614279 36 0.019608 \n", "19186003 238 0.129630 \n", "217550862 261 0.142157 \n", "28785486 272 0.148148 \n", "59331128 287 0.156318 \n", "17243582 257 0.139978 \n", "38936142 268 0.145969 \n", "136550204 202 0.110022 \n", "33653195 246 0.133987 \n", "15446531 158 0.086057 \n", "15463671 251 0.136710 \n", "18678924 219 0.119281 \n", "18956073 105 0.057190 \n", "22129280 266 0.144880 \n", "2312829909 61 0.033224 \n", "326255267 251 0.136710 \n", "22891564 189 0.102941 \n", "15012486 204 0.111111 " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retweet_summary_df.sort_values('retweet_count', ascending=False).head(50)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Account types (by retweet count)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
retweet_counttype_percentage
type
journalists1854940.588831
media552390.175350
unknown388930.123462
politicians133120.042258
academic42940.013631
ngo40700.012920
pundit40530.012866
other_political38170.012117
government23200.007365
cultural13670.004339
business11050.003508
other8700.002762
foreign_political1870.000594
\n", "
" ], "text/plain": [ " retweet_count type_percentage\n", "type \n", "journalists 185494 0.588831\n", "media 55239 0.175350\n", "unknown 38893 0.123462\n", "politicians 13312 0.042258\n", "academic 4294 0.013631\n", "ngo 4070 0.012920\n", "pundit 4053 0.012866\n", "other_political 3817 0.012117\n", "government 2320 0.007365\n", "cultural 1367 0.004339\n", "business 1105 0.003508\n", "other 870 0.002762\n", "foreign_political 187 0.000594" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "types_by_retweet_count_df = retweet_summary_df[['type', 'retweet_count']].groupby('type').sum()\n", "types_by_retweet_count_df['type_percentage']= types_by_retweet_count_df['retweet_count'] / types_by_retweet_count_df['retweet_count'].sum()\n", "types_by_retweet_count_df.sort_values('retweet_count', ascending=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Approach 2: Per user\n", "Retweets by type per user." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Add type by merging screen name lookup" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tweet_iduser_idscreen_nameretweet_user_idretweet_screen_nametweet_created_attype
0847787664963239936285772181akesslerdc85131054jeffzeleny2017-03-31 12:28:25+00:00journalists
1847634105118318594285772181akesslerdc128558424erin_pelton2017-03-31 02:18:13+00:00unknown
3847601029654880258285772181akesslerdc58504135shaneharris2017-03-31 00:06:47+00:00journalists
4847388672785694720285772181akesslerdc22772264carolelee2017-03-30 10:02:57+00:00journalists
5847200340613189633285772181akesslerdc23911915joshledermanAP2017-03-29 21:34:36+00:00journalists
\n", "
" ], "text/plain": [ " tweet_id user_id screen_name retweet_user_id \\\n", "0 847787664963239936 285772181 akesslerdc 85131054 \n", "1 847634105118318594 285772181 akesslerdc 128558424 \n", "3 847601029654880258 285772181 akesslerdc 58504135 \n", "4 847388672785694720 285772181 akesslerdc 22772264 \n", "5 847200340613189633 285772181 akesslerdc 23911915 \n", "\n", " retweet_screen_name tweet_created_at type \n", "0 jeffzeleny 2017-03-31 12:28:25+00:00 journalists \n", "1 erin_pelton 2017-03-31 02:18:13+00:00 unknown \n", "3 shaneharris 2017-03-31 00:06:47+00:00 journalists \n", "4 carolelee 2017-03-30 10:02:57+00:00 journalists \n", "5 joshledermanAP 2017-03-29 21:34:36+00:00 journalists " ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retweet_all_join_df = pd.merge(retweet_df, user_type_lookup_df[['type']], how='left', left_on='retweet_user_id', right_index=True)\n", "retweet_all_join_df['type'].fillna('unknown', inplace=True)\n", "# Drop tail\n", "retweet_all_join_limited_df = retweet_all_join_df[retweet_all_join_df.retweet_user_id.isin(retweet_summary_df.index)]\n", "retweet_all_join_limited_df.head()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
typeacademicbusinessculturalforeign_politicalgovernmentjournalistsmediangootherother_political...foreign_political_percentgovernment_percentjournalists_percentmedia_percentngo_percentother_percentother_political_percentpoliticians_percentpundit_percentunknown_percent
user_id
1001653781.01.02.00.00.017.07.00.00.01.0...0.0000000.0000000.3863640.1590910.0000000.00000.0227270.0681820.0681820.204545
10019918650.00.00.00.00.04.013.00.00.00.0...0.0000000.0000000.2352940.7647060.0000000.00000.0000000.0000000.0000000.000000
10022298620.00.00.00.01.058.010.00.00.00.0...0.0000000.0128210.7435900.1282050.0000000.00000.0000000.0128210.0000000.102564
1008020890.00.00.00.00.04.01.00.00.00.0...0.0000000.0000000.5714290.1428570.0000000.00000.0000000.1428570.0000000.142857
1008607902.00.00.01.01.093.039.01.00.02.0...0.0047170.0047170.4386790.1839620.0047170.00000.0094340.0141510.0047170.325472
10097492294.00.00.00.06.0133.014.010.04.00.0...0.0000000.0298510.6616920.0696520.0497510.01990.0000000.0000000.0199000.129353
10137852200.00.00.00.00.020.06.00.00.00.0...0.0000000.0000000.5405410.1621620.0000000.00000.0000000.1081080.0000000.189189
10217169117.02.01.00.01.0494.065.09.00.019.0...0.0000000.0014290.7057140.0928570.0128570.00000.0271430.0214290.0042860.105714
1022389970.00.00.00.00.00.02.00.00.00.0...0.0000000.0000000.0000001.0000000.0000000.00000.0000000.0000000.0000000.000000
1027894880.00.00.00.00.028.01.00.00.01.0...0.0000000.0000000.8484850.0303030.0000000.00000.0303030.0000000.0000000.090909
\n", "

10 rows × 27 columns

\n", "
" ], "text/plain": [ "type academic business cultural foreign_political government \\\n", "user_id \n", "100165378 1.0 1.0 2.0 0.0 0.0 \n", "1001991865 0.0 0.0 0.0 0.0 0.0 \n", "1002229862 0.0 0.0 0.0 0.0 1.0 \n", "100802089 0.0 0.0 0.0 0.0 0.0 \n", "100860790 2.0 0.0 0.0 1.0 1.0 \n", "1009749229 4.0 0.0 0.0 0.0 6.0 \n", "1013785220 0.0 0.0 0.0 0.0 0.0 \n", "102171691 17.0 2.0 1.0 0.0 1.0 \n", "102238997 0.0 0.0 0.0 0.0 0.0 \n", "102789488 0.0 0.0 0.0 0.0 0.0 \n", "\n", "type journalists media ngo other other_political ... \\\n", "user_id ... \n", "100165378 17.0 7.0 0.0 0.0 1.0 ... \n", "1001991865 4.0 13.0 0.0 0.0 0.0 ... \n", "1002229862 58.0 10.0 0.0 0.0 0.0 ... \n", "100802089 4.0 1.0 0.0 0.0 0.0 ... \n", "100860790 93.0 39.0 1.0 0.0 2.0 ... \n", "1009749229 133.0 14.0 10.0 4.0 0.0 ... \n", "1013785220 20.0 6.0 0.0 0.0 0.0 ... \n", "102171691 494.0 65.0 9.0 0.0 19.0 ... \n", "102238997 0.0 2.0 0.0 0.0 0.0 ... \n", "102789488 28.0 1.0 0.0 0.0 1.0 ... \n", "\n", "type foreign_political_percent government_percent \\\n", "user_id \n", "100165378 0.000000 0.000000 \n", "1001991865 0.000000 0.000000 \n", "1002229862 0.000000 0.012821 \n", "100802089 0.000000 0.000000 \n", "100860790 0.004717 0.004717 \n", "1009749229 0.000000 0.029851 \n", "1013785220 0.000000 0.000000 \n", "102171691 0.000000 0.001429 \n", "102238997 0.000000 0.000000 \n", "102789488 0.000000 0.000000 \n", "\n", "type journalists_percent media_percent ngo_percent other_percent \\\n", "user_id \n", "100165378 0.386364 0.159091 0.000000 0.0000 \n", "1001991865 0.235294 0.764706 0.000000 0.0000 \n", "1002229862 0.743590 0.128205 0.000000 0.0000 \n", "100802089 0.571429 0.142857 0.000000 0.0000 \n", "100860790 0.438679 0.183962 0.004717 0.0000 \n", "1009749229 0.661692 0.069652 0.049751 0.0199 \n", "1013785220 0.540541 0.162162 0.000000 0.0000 \n", "102171691 0.705714 0.092857 0.012857 0.0000 \n", "102238997 0.000000 1.000000 0.000000 0.0000 \n", "102789488 0.848485 0.030303 0.000000 0.0000 \n", "\n", "type other_political_percent politicians_percent pundit_percent \\\n", "user_id \n", "100165378 0.022727 0.068182 0.068182 \n", "1001991865 0.000000 0.000000 0.000000 \n", "1002229862 0.000000 0.012821 0.000000 \n", "100802089 0.000000 0.142857 0.000000 \n", "100860790 0.009434 0.014151 0.004717 \n", "1009749229 0.000000 0.000000 0.019900 \n", "1013785220 0.000000 0.108108 0.000000 \n", "102171691 0.027143 0.021429 0.004286 \n", "102238997 0.000000 0.000000 0.000000 \n", "102789488 0.030303 0.000000 0.000000 \n", "\n", "type unknown_percent \n", "user_id \n", "100165378 0.204545 \n", "1001991865 0.000000 \n", "1002229862 0.102564 \n", "100802089 0.142857 \n", "100860790 0.325472 \n", "1009749229 0.129353 \n", "1013785220 0.189189 \n", "102171691 0.105714 \n", "102238997 0.000000 \n", "102789488 0.090909 \n", "\n", "[10 rows x 27 columns]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retweet_summary_by_user_df = retweet_all_join_limited_df.groupby([retweet_all_join_limited_df.user_id, retweet_all_join_limited_df.type]).size().unstack().fillna(0)\n", "# Add a total column\n", "retweet_summary_by_user_df['total'] = retweet_summary_by_user_df.sum(axis=1)\n", "for col_name in retweet_summary_by_user_df.columns[:-1]:\n", " retweet_summary_by_user_df['{}_percent'.format(col_name)] = retweet_summary_by_user_df[col_name] / retweet_summary_by_user_df.total\n", "retweet_summary_by_user_df.head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Average of percent of retweets by type for each user\n", "That is, for each user determine the percent of retweets by type. Then take the average of each type.\n", "\n", "Thus, this retweet analysis is on a per-user basis, accounting for how prolific a tweeter a user is. (That is, users who tweet aren't weighed more heavily.)\n" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "type\n", "academic_percent 0.011965\n", "business_percent 0.003702\n", "cultural_percent 0.006935\n", "foreign_political_percent 0.000697\n", "government_percent 0.011508\n", "journalists_percent 0.509036\n", "media_percent 0.232894\n", "ngo_percent 0.013284\n", "other_percent 0.003179\n", "other_political_percent 0.007644\n", "politicians_percent 0.043010\n", "pundit_percent 0.007771\n", "unknown_percent 0.148374\n", "dtype: float64" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retweet_summary_by_user_df.filter(axis=1, regex=\"_percent$\").mean()" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "## Approach 3: By count of users retweeting\n", "The number of users that retweeted an account. Thus, each user counts as 1, even if that user made multiple retweets of the account.\n", "\n", "This weights an account that is retweeted by a 100 users more heavily than an account that is retweeted a 100 times by a single user." ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
retweet_countretweet_screen_nametypeusers_retweeting_countpercent_of_users_retweeting
user_id
250738773655realDonaldTrumppoliticians6030.328431
930691102715maggieNYTjournalists4920.267974
2998022772508BraddJaffymedia4890.266340
512415742085APmedia4830.263072
24677911632washingtonpostmedia4370.238017
145299291679jaketapperjournalists4110.223856
19177312398thehillmedia4030.219499
93002621683politicomedia3980.216776
8070951322nytimesmedia3980.216776
7592511876CNNmedia3660.199346
861297241173costareportsjournalists3630.197712
213162531352ZekeJMillerjournalists3600.196078
19107878960GlennThrushjournalists3380.184096
154334521140JenniferJJacobsjournalists3120.169935
16525411171Reutersmedia3120.169935
391550291048mkrajujournalists3080.167756
212526181009JakeShermanjournalists3080.167756
1049145941260Phil_Mattinglyjournalists3030.165033
328710861119kylegriffin1journalists3000.163399
259395895969JohnJHarwoodjournalists2980.162309
\n", "
" ], "text/plain": [ " retweet_count retweet_screen_name type \\\n", "user_id \n", "25073877 3655 realDonaldTrump politicians \n", "93069110 2715 maggieNYT journalists \n", "299802277 2508 BraddJaffy media \n", "51241574 2085 AP media \n", "2467791 1632 washingtonpost media \n", "14529929 1679 jaketapper journalists \n", "1917731 2398 thehill media \n", "9300262 1683 politico media \n", "807095 1322 nytimes media \n", "759251 1876 CNN media \n", "86129724 1173 costareports journalists \n", "21316253 1352 ZekeJMiller journalists \n", "19107878 960 GlennThrush journalists \n", "15433452 1140 JenniferJJacobs journalists \n", "1652541 1171 Reuters media \n", "39155029 1048 mkraju journalists \n", "21252618 1009 JakeSherman journalists \n", "104914594 1260 Phil_Mattingly journalists \n", "32871086 1119 kylegriffin1 journalists \n", "259395895 969 JohnJHarwood journalists \n", "\n", " users_retweeting_count percent_of_users_retweeting \n", "user_id \n", "25073877 603 0.328431 \n", "93069110 492 0.267974 \n", "299802277 489 0.266340 \n", "51241574 483 0.263072 \n", "2467791 437 0.238017 \n", "14529929 411 0.223856 \n", "1917731 403 0.219499 \n", "9300262 398 0.216776 \n", "807095 398 0.216776 \n", "759251 366 0.199346 \n", "86129724 363 0.197712 \n", "21316253 360 0.196078 \n", "19107878 338 0.184096 \n", "15433452 312 0.169935 \n", "1652541 312 0.169935 \n", "39155029 308 0.167756 \n", "21252618 308 0.167756 \n", "104914594 303 0.165033 \n", "32871086 300 0.163399 \n", "259395895 298 0.162309 " ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retweet_summary_df.sort_values('users_retweeting_count', ascending=False).head(20)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "### Account types (by count of users retweeting)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
users_retweeting_counttype_percentage
type
journalists792910.559404
unknown252360.178042
media176310.124388
politicians73000.051502
other_political23400.016509
ngo22420.015817
academic21160.014929
pundit19610.013835
government14850.010477
cultural8400.005926
business7110.005016
other4500.003175
foreign_political1390.000981
\n", "
" ], "text/plain": [ " users_retweeting_count type_percentage\n", "type \n", "journalists 79291 0.559404\n", "unknown 25236 0.178042\n", "media 17631 0.124388\n", "politicians 7300 0.051502\n", "other_political 2340 0.016509\n", "ngo 2242 0.015817\n", "academic 2116 0.014929\n", "pundit 1961 0.013835\n", "government 1485 0.010477\n", "cultural 840 0.005926\n", "business 711 0.005016\n", "other 450 0.003175\n", "foreign_political 139 0.000981" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "types_by_users_retweeting_df = retweet_summary_df[['type', 'users_retweeting_count']].groupby('type').sum()\n", "types_by_users_retweeting_df['type_percentage']= types_by_users_retweeting_df['users_retweeting_count'] / types_by_users_retweeting_df['users_retweeting_count'].sum()\n", "types_by_users_retweeting_df.sort_values('users_retweeting_count', ascending=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Unknown accounts\n", "Remember, the tail has been cut off" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Number of unknown accounts" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "retweet_count 2167\n", "retweet_screen_name 2167\n", "type 2167\n", "users_retweeting_count 2167\n", "percent_of_users_retweeting 2167\n", "dtype: int64" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retweet_summary_df[retweet_summary_df.type == 'unknown'].count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Number of known accounts" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "retweet_count 3207\n", "retweet_screen_name 3207\n", "type 3207\n", "users_retweeting_count 3207\n", "percent_of_users_retweeting 3207\n", "dtype: int64" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retweet_summary_df[retweet_summary_df.type != 'unknown'].count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Top unknown by retweet count that are retweeted by at least 5 users" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
retweet_screen_nameretweet_countusers_retweeting_count
user_id
26574283CNBCnow21085
18028893JesseRodriguez195104
36397873FoxNewsResearch19013
327484803WSJCentralBanks1899
2316383071RVAwonk16660
39279821brianklaas16275
3066084185APBusiness15829
738767160395321345ChadBown15112
15110357ReutersBiz11838
218347440chrisdonovan11570
17470695jacobkornbluh11532
371889510TeddyDavisCNN11265
286998245Phil_Lewis_11273
1267887043RusEmbUSA10879
20017835evanmcmurry10759
564069706NPRKelly10659
4276158575NixonLibrary105101
78400475michikokakutani9957
297100174anneapplebaum9861
31997610AP_CorpComm9823
20097201eorden9859
21344549jonshorman9839
109369991MarkZuckerman9623
4267082849APEastRegion9655
229599399MicahZenko9465
3889878142ChrisSnyderFox9423
449588356Kasparov639349
269314519MichaelLaRosaDC9156
424385350APCentralRegion9150
135173872DafnaLinzer8761
9567972CNNnewsroom8543
16827148ChristopherJM8432
813311743NumbersMuncher8350
47233194TreyYingst8364
14146966aravosis7929
954590804planetepics7913
4091551984tribelaw7944
51263592AdamSchefter7846
474232856AP_Planner7760
539665155lyman_brian7616
3223426134SethAbramson7628
1626294277spectatorindex7554
1754641nytimesbusiness7531
824473943931293697RoguePOTUSStaff728
788697546mgerrydoyle7210
1222716350RCDefense7129
90275200ASLuhn7139
15111062thomaswright087158
92854623hannahdreier7040
1767741NYTNational6930
\n", "
" ], "text/plain": [ " retweet_screen_name retweet_count users_retweeting_count\n", "user_id \n", "26574283 CNBCnow 210 85\n", "18028893 JesseRodriguez 195 104\n", "36397873 FoxNewsResearch 190 13\n", "327484803 WSJCentralBanks 189 9\n", "2316383071 RVAwonk 166 60\n", "39279821 brianklaas 162 75\n", "3066084185 APBusiness 158 29\n", "738767160395321345 ChadBown 151 12\n", "15110357 ReutersBiz 118 38\n", "218347440 chrisdonovan 115 70\n", "17470695 jacobkornbluh 115 32\n", "371889510 TeddyDavisCNN 112 65\n", "286998245 Phil_Lewis_ 112 73\n", "1267887043 RusEmbUSA 108 79\n", "20017835 evanmcmurry 107 59\n", "564069706 NPRKelly 106 59\n", "4276158575 NixonLibrary 105 101\n", "78400475 michikokakutani 99 57\n", "297100174 anneapplebaum 98 61\n", "31997610 AP_CorpComm 98 23\n", "20097201 eorden 98 59\n", "21344549 jonshorman 98 39\n", "109369991 MarkZuckerman 96 23\n", "4267082849 APEastRegion 96 55\n", "229599399 MicahZenko 94 65\n", "3889878142 ChrisSnyderFox 94 23\n", "449588356 Kasparov63 93 49\n", "269314519 MichaelLaRosaDC 91 56\n", "424385350 APCentralRegion 91 50\n", "135173872 DafnaLinzer 87 61\n", "9567972 CNNnewsroom 85 43\n", "16827148 ChristopherJM 84 32\n", "813311743 NumbersMuncher 83 50\n", "47233194 TreyYingst 83 64\n", "14146966 aravosis 79 29\n", "954590804 planetepics 79 13\n", "4091551984 tribelaw 79 44\n", "51263592 AdamSchefter 78 46\n", "474232856 AP_Planner 77 60\n", "539665155 lyman_brian 76 16\n", "3223426134 SethAbramson 76 28\n", "1626294277 spectatorindex 75 54\n", "1754641 nytimesbusiness 75 31\n", "824473943931293697 RoguePOTUSStaff 72 8\n", "788697546 mgerrydoyle 72 10\n", "1222716350 RCDefense 71 29\n", "90275200 ASLuhn 71 39\n", "15111062 thomaswright08 71 58\n", "92854623 hannahdreier 70 40\n", "1767741 NYTNational 69 30" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top_not_known_retweet_df = retweet_summary_df[(retweet_summary_df.type == 'unknown') & (retweet_summary_df.users_retweeting_count >= 5)].sort_values('retweet_count', ascending=False)[['retweet_screen_name', 'retweet_count', 'users_retweeting_count']]\n", "top_not_known_retweet_df.head(50)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Write top accounts to file" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": true }, "outputs": [], "source": [ "top_not_known_retweet_df.to_csv('unknown_retweets.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }