{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Replies" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Data prep" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load the data and count." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "scrolled": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:root:Loading from tweets/6eea2088e010437da4b6031c2abffdc9_001.json.gz\n", "DEBUG:root:Loaded 50000\n", "DEBUG:root:Loaded 100000\n", "DEBUG:root:Loaded 150000\n", "DEBUG:root:Loaded 200000\n", "DEBUG:root:Loaded 250000\n", "DEBUG:root:Loaded 300000\n", "INFO:root:Loading from tweets/a7bcdbde7a104285b92fe26e286f2543_001.json.gz\n", "DEBUG:root:Loaded 350000\n", "DEBUG:root:Loaded 400000\n", "DEBUG:root:Loaded 450000\n", "DEBUG:root:Loaded 500000\n", "DEBUG:root:Loaded 550000\n", "DEBUG:root:Loaded 600000\n", "INFO:root:Loading from tweets/e1c824ff2b3c4c5a9a93a16e5036d09a_001.json.gz\n", "DEBUG:root:Loaded 650000\n", "DEBUG:root:Loaded 700000\n", "DEBUG:root:Loaded 750000\n" ] } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "import logging\n", "from dateutil.parser import parse as date_parse\n", "from utils import load_tweet_df, tweet_type\n", "\n", "logger = logging.getLogger()\n", "logger.setLevel(logging.DEBUG)\n", "\n", "# Simply the tweet on load\n", "def reply_transform(tweet):\n", " if tweet.get('in_reply_to_status_id'):\n", " return {\n", " 'tweet_id': tweet['id_str'],\n", " 'user_id': tweet['user']['id_str'],\n", " 'screen_name': tweet['user']['screen_name'],\n", " 'reply_to_user_id': tweet['in_reply_to_user_id_str'],\n", " 'reply_to_screen_name': tweet['in_reply_to_screen_name'],\n", " 'reply_to_tweet_id': tweet['in_reply_to_status_id_str'],\n", " 'tweet_created_at': date_parse(tweet['created_at']) \n", " }\n", " return None\n", "\n", "reply_df = load_tweet_df(reply_transform, ['tweet_id', 'user_id', 'screen_name', 'reply_to_user_id',\n", " 'reply_to_screen_name', 'reply_to_tweet_id', 'tweet_created_at'])\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Number of replies found in the dataset" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "reply_to_user_id 118570\n", "dtype: int64" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reply_df[['reply_to_user_id']].count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### The reply data" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tweet_iduser_idscreen_namereply_to_user_idreply_to_screen_namereply_to_tweet_idtweet_created_at
0847428582821449730780221130loren_duggan140286364nielslesniewski8474245770093690942017-03-30 12:41:33+00:00
184647217990255001729607664adamliptak106729916espinsegall8464716747699363842017-03-27 21:21:09+00:00
284635729001809920029607664adamliptak147586500EdWhelanEPPC8463565763992125442017-03-27 13:44:37+00:00
38477898856920186909484732amacker26117379scottpllc8470462842970316812017-03-31 12:37:14+00:00
48474864917270855689484732amacker9484732amacker8474862111742197762017-03-30 16:31:39+00:00
\n", "
" ], "text/plain": [ " tweet_id user_id screen_name reply_to_user_id \\\n", "0 847428582821449730 780221130 loren_duggan 140286364 \n", "1 846472179902550017 29607664 adamliptak 106729916 \n", "2 846357290018099200 29607664 adamliptak 147586500 \n", "3 847789885692018690 9484732 amacker 26117379 \n", "4 847486491727085568 9484732 amacker 9484732 \n", "\n", " reply_to_screen_name reply_to_tweet_id tweet_created_at \n", "0 nielslesniewski 847424577009369094 2017-03-30 12:41:33+00:00 \n", "1 espinsegall 846471674769936384 2017-03-27 21:21:09+00:00 \n", "2 EdWhelanEPPC 846356576399212544 2017-03-27 13:44:37+00:00 \n", "3 scottpllc 847046284297031681 2017-03-31 12:37:14+00:00 \n", "4 amacker 847486211174219776 2017-03-30 16:31:39+00:00 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reply_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create lookup of replied user ids to screen names" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "reply_to_screen_name 27041\n", "dtype: int64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# From the replies, extract map of user ids to screen names\n", "reply_to_user_id_lookup_df = reply_df.loc[reply_df.groupby('reply_to_user_id')['tweet_created_at'].idxmax()].ix[:,['reply_to_user_id', 'reply_to_screen_name']].set_index(['reply_to_user_id'])\n", "reply_to_user_id_lookup_df.count()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
reply_to_screen_name
reply_to_user_id
100005598hotelkeys
10000772JMoLawre
100025240itsbull
100028531Stevempars
100036032Mitch_Tischler
\n", "
" ], "text/plain": [ " reply_to_screen_name\n", "reply_to_user_id \n", "100005598 hotelkeys\n", "10000772 JMoLawre\n", "100025240 itsbull\n", "100028531 Stevempars\n", "100036032 Mitch_Tischler" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reply_to_user_id_lookup_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create lookup of user ids to screen names" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "screen_name 1510\n", "dtype: int64" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# From the users (not the mentions), extract map of user ids to screen names\n", "user_id_lookup_df = reply_df.loc[reply_df.groupby('user_id')['tweet_created_at'].idxmax()].ix[:,['user_id', 'screen_name']].set_index(['user_id'])\n", "user_id_lookup_df.count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Group replies by reply to user id" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "reply_to_count 27041\n", "dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Group by user_id\n", "# This count should match the user_id map count\n", "reply_to_summary_user_id_df = pd.DataFrame(reply_df.groupby('reply_to_user_id').size(), columns=['reply_to_count'])\n", "reply_to_summary_user_id_df.count()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
reply_to_count
reply_to_user_id
1000055985
100007721
1000252401
1000285313
1000360321
\n", "
" ], "text/plain": [ " reply_to_count\n", "reply_to_user_id \n", "100005598 5\n", "10000772 1\n", "100025240 1\n", "100028531 3\n", "100036032 1" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reply_to_summary_user_id_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Add back in the reply to screen names" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "reply_to_count 27041\n", "reply_to_screen_name 27041\n", "dtype: int64" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Join with user id map\n", "reply_to_summary_screen_name_df = reply_to_summary_user_id_df.join(reply_to_user_id_lookup_df)\n", "reply_to_summary_screen_name_df.count()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
reply_to_countreply_to_screen_name
reply_to_user_id
1000055985hotelkeys
100007721JMoLawre
1000252401itsbull
1000285313Stevempars
1000360321Mitch_Tischler
\n", "
" ], "text/plain": [ " reply_to_count reply_to_screen_name\n", "reply_to_user_id \n", "100005598 5 hotelkeys\n", "10000772 1 JMoLawre\n", "100025240 1 itsbull\n", "100028531 3 Stevempars\n", "100036032 1 Mitch_Tischler" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reply_to_summary_screen_name_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Add users types for replies to" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "type 13160\n", "dtype: int64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Load lookups of known users\n", "from utils import load_user_type_lookup_df\n", "\n", "user_type_lookup_df = load_user_type_lookup_df()[['type']]\n", "user_type_lookup_df.count()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
type
user_id
2345626885journalists
780221130journalists
285772181journalists
29607664journalists
9484732journalists
\n", "
" ], "text/plain": [ " type\n", "user_id \n", "2345626885 journalists\n", "780221130 journalists\n", "285772181 journalists\n", "29607664 journalists\n", "9484732 journalists" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user_type_lookup_df.head()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "media 4538\n", "journalists 3576\n", "government 3055\n", "politicians 817\n", "ngo 250\n", "pundit 195\n", "other 160\n", "other_political 156\n", "cultural 131\n", "academic 129\n", "business 125\n", "foreign_political 28\n", "Name: type, dtype: int64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user_type_lookup_df['type'].value_counts()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "reply_to_count 27041\n", "reply_to_screen_name 27041\n", "type 27041\n", "dtype: int64" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Join the mentions and the known users\n", "reply_to_summary_type_df = reply_to_summary_screen_name_df.join(user_type_lookup_df, how='left')\n", "reply_to_summary_type_df['type'].fillna('unknown', inplace=True)\n", "reply_to_summary_type_df.index.name = 'user_id'\n", "reply_to_summary_type_df.count()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
reply_to_countreply_to_screen_nametype
user_id
1000055985hotelkeysunknown
100007721JMoLawreunknown
1000252401itsbullunknown
1000285313Stevemparsunknown
1000360321Mitch_Tischlerunknown
\n", "
" ], "text/plain": [ " reply_to_count reply_to_screen_name type\n", "user_id \n", "100005598 5 hotelkeys unknown\n", "10000772 1 JMoLawre unknown\n", "100025240 1 itsbull unknown\n", "100028531 3 Stevempars unknown\n", "100036032 1 Mitch_Tischler unknown" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reply_to_summary_type_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Add number of users replying to\n", "Which is different than the number of replies to." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
reply_to_countreply_to_screen_nametypeusers_replying_to_countpercent_of_users_replying_to
user_id
1000055985hotelkeysunknown30.001987
100007721JMoLawreunknown10.000662
1000252401itsbullunknown10.000662
1000285313Stevemparsunknown30.001987
1000360321Mitch_Tischlerunknown10.000662
\n", "
" ], "text/plain": [ " reply_to_count reply_to_screen_name type \\\n", "user_id \n", "100005598 5 hotelkeys unknown \n", "10000772 1 JMoLawre unknown \n", "100025240 1 itsbull unknown \n", "100028531 3 Stevempars unknown \n", "100036032 1 Mitch_Tischler unknown \n", "\n", " users_replying_to_count percent_of_users_replying_to \n", "user_id \n", "100005598 3 0.001987 \n", "10000772 1 0.000662 \n", "100025240 1 0.000662 \n", "100028531 3 0.001987 \n", "100036032 1 0.000662 " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reply_to_user_id_per_user_df = reply_df[['reply_to_user_id', 'user_id']].drop_duplicates()\n", "reply_to_user_id_per_user_summary_df = pd.DataFrame(reply_to_user_id_per_user_df.groupby('reply_to_user_id').size(), columns=['users_replying_to_count'])\n", "reply_to_user_id_per_user_summary_df.index.name = 'user_id'\n", "# Join with reply_to_summary_type_df\n", "reply_to_summary_df = reply_to_summary_type_df.join(reply_to_user_id_per_user_summary_df)\n", "reply_to_summary_df['percent_of_users_replying_to'] = reply_to_summary_df.users_replying_to_count / user_id_lookup_df['screen_name'].count()\n", "reply_to_summary_df.head()\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Reply summary" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Replies per user\n", "For users that made any replies. Also to possible to figure this out for all users." ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "count 1510.000000\n", "mean 78.523179\n", "std 290.394805\n", "min 1.000000\n", "25% 3.000000\n", "50% 13.000000\n", "75% 57.000000\n", "max 8009.000000\n", "Name: user_id, dtype: float64" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reply_df['user_id'].value_counts().describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### How long is the tail?" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
reply_to_countcumulative_reply_to_count_sumcumulative_reply_to_count_sum_percentagecumulative_replied_to_userscumulative_replied_to_users_percentage
sumsize
users_replying_to_count
13633921420363390.306477214200.792130
2110642616474030.399789240360.888872
369091045543120.458059250810.927517
44561467588730.496525255480.944788
54022348628950.530446258960.957657
63277206661720.558084261020.965275
73144133693160.584600262350.970193
82111109714270.602404263440.974224
9294088743670.627199264320.977479
10292681772930.651877265130.980474
11179472790870.667007265850.983137
12199058810770.683790266430.985282
1393831820150.691701266740.986428
14121437832290.701940267110.987796
15164632848750.715822267430.988980
16143325863080.727908267680.989904
17181429881220.743207267970.990977
18131226894340.754272268230.991938
19132925907630.765480268480.992863
20129919920620.776436268670.993565
21132715933890.787628268820.994120
22171619951050.802100269010.994823
233707954750.805221269080.995082
24151311969880.817981269190.995488
257307977180.824138269260.995747
264788981960.828169269340.996043
2710198992150.836763269420.996339
2879761000120.843485269480.996561
291046101010580.852307269580.996931
3015731012150.853631269610.997042
.....................
3888271061320.895100269980.998410
3956931067010.899899270010.998521
4043511071360.903568270020.998558
4124311073790.905617270030.998595
4336331077420.908678270060.998706
447211078140.909286270070.998743
4527621080900.911613270090.998817
4611911082090.912617270100.998854
4723711084460.914616270110.998891
4828121087270.916986270130.998965
50274441114710.940128270170.999112
5137131118420.943257270200.999223
5322421120660.945146270220.999297
5412111121870.946167270230.999334
5537011125570.949287270240.999371
5615011127070.950552270250.999408
5712411128310.951598270260.999445
5821311130440.953395270270.999482
59172521147690.967943270290.999556
6021411149830.969748270300.999593
6212311151060.970785270310.999630
6384811159540.977937270320.999667
6411511160690.978907270330.999704
6511311161820.979860270340.999741
6721211163940.981648270350.999778
7199721173910.990057270370.999852
7223011176210.991996270380.999889
7614711177680.993236270390.999926
7818611179540.994805270400.999963
9461611185701.000000270411.000000
\n", "

67 rows × 6 columns

\n", "
" ], "text/plain": [ " reply_to_count cumulative_reply_to_count_sum \\\n", " sum size \n", "users_replying_to_count \n", "1 36339 21420 36339 \n", "2 11064 2616 47403 \n", "3 6909 1045 54312 \n", "4 4561 467 58873 \n", "5 4022 348 62895 \n", "6 3277 206 66172 \n", "7 3144 133 69316 \n", "8 2111 109 71427 \n", "9 2940 88 74367 \n", "10 2926 81 77293 \n", "11 1794 72 79087 \n", "12 1990 58 81077 \n", "13 938 31 82015 \n", "14 1214 37 83229 \n", "15 1646 32 84875 \n", "16 1433 25 86308 \n", "17 1814 29 88122 \n", "18 1312 26 89434 \n", "19 1329 25 90763 \n", "20 1299 19 92062 \n", "21 1327 15 93389 \n", "22 1716 19 95105 \n", "23 370 7 95475 \n", "24 1513 11 96988 \n", "25 730 7 97718 \n", "26 478 8 98196 \n", "27 1019 8 99215 \n", "28 797 6 100012 \n", "29 1046 10 101058 \n", "30 157 3 101215 \n", "... ... ... ... \n", "38 882 7 106132 \n", "39 569 3 106701 \n", "40 435 1 107136 \n", "41 243 1 107379 \n", "43 363 3 107742 \n", "44 72 1 107814 \n", "45 276 2 108090 \n", "46 119 1 108209 \n", "47 237 1 108446 \n", "48 281 2 108727 \n", "50 2744 4 111471 \n", "51 371 3 111842 \n", "53 224 2 112066 \n", "54 121 1 112187 \n", "55 370 1 112557 \n", "56 150 1 112707 \n", "57 124 1 112831 \n", "58 213 1 113044 \n", "59 1725 2 114769 \n", "60 214 1 114983 \n", "62 123 1 115106 \n", "63 848 1 115954 \n", "64 115 1 116069 \n", "65 113 1 116182 \n", "67 212 1 116394 \n", "71 997 2 117391 \n", "72 230 1 117621 \n", "76 147 1 117768 \n", "78 186 1 117954 \n", "94 616 1 118570 \n", "\n", " cumulative_reply_to_count_sum_percentage \\\n", " \n", "users_replying_to_count \n", "1 0.306477 \n", "2 0.399789 \n", "3 0.458059 \n", "4 0.496525 \n", "5 0.530446 \n", "6 0.558084 \n", "7 0.584600 \n", "8 0.602404 \n", "9 0.627199 \n", "10 0.651877 \n", "11 0.667007 \n", "12 0.683790 \n", "13 0.691701 \n", "14 0.701940 \n", "15 0.715822 \n", "16 0.727908 \n", "17 0.743207 \n", "18 0.754272 \n", "19 0.765480 \n", "20 0.776436 \n", "21 0.787628 \n", "22 0.802100 \n", "23 0.805221 \n", "24 0.817981 \n", "25 0.824138 \n", "26 0.828169 \n", "27 0.836763 \n", "28 0.843485 \n", "29 0.852307 \n", "30 0.853631 \n", "... ... \n", "38 0.895100 \n", "39 0.899899 \n", "40 0.903568 \n", "41 0.905617 \n", "43 0.908678 \n", "44 0.909286 \n", "45 0.911613 \n", "46 0.912617 \n", "47 0.914616 \n", "48 0.916986 \n", "50 0.940128 \n", "51 0.943257 \n", "53 0.945146 \n", "54 0.946167 \n", "55 0.949287 \n", "56 0.950552 \n", "57 0.951598 \n", "58 0.953395 \n", "59 0.967943 \n", "60 0.969748 \n", "62 0.970785 \n", "63 0.977937 \n", "64 0.978907 \n", "65 0.979860 \n", "67 0.981648 \n", "71 0.990057 \n", "72 0.991996 \n", "76 0.993236 \n", "78 0.994805 \n", "94 1.000000 \n", "\n", " cumulative_replied_to_users \\\n", " \n", "users_replying_to_count \n", "1 21420 \n", "2 24036 \n", "3 25081 \n", "4 25548 \n", "5 25896 \n", "6 26102 \n", "7 26235 \n", "8 26344 \n", "9 26432 \n", "10 26513 \n", "11 26585 \n", "12 26643 \n", "13 26674 \n", "14 26711 \n", "15 26743 \n", "16 26768 \n", "17 26797 \n", "18 26823 \n", "19 26848 \n", "20 26867 \n", "21 26882 \n", "22 26901 \n", "23 26908 \n", "24 26919 \n", "25 26926 \n", "26 26934 \n", "27 26942 \n", "28 26948 \n", "29 26958 \n", "30 26961 \n", "... ... \n", "38 26998 \n", "39 27001 \n", "40 27002 \n", "41 27003 \n", "43 27006 \n", "44 27007 \n", "45 27009 \n", "46 27010 \n", "47 27011 \n", "48 27013 \n", "50 27017 \n", "51 27020 \n", "53 27022 \n", "54 27023 \n", "55 27024 \n", "56 27025 \n", "57 27026 \n", "58 27027 \n", "59 27029 \n", "60 27030 \n", "62 27031 \n", "63 27032 \n", "64 27033 \n", "65 27034 \n", "67 27035 \n", "71 27037 \n", "72 27038 \n", "76 27039 \n", "78 27040 \n", "94 27041 \n", "\n", " cumulative_replied_to_users_percentage \n", " \n", "users_replying_to_count \n", "1 0.792130 \n", "2 0.888872 \n", "3 0.927517 \n", "4 0.944788 \n", "5 0.957657 \n", "6 0.965275 \n", "7 0.970193 \n", "8 0.974224 \n", "9 0.977479 \n", "10 0.980474 \n", "11 0.983137 \n", "12 0.985282 \n", "13 0.986428 \n", "14 0.987796 \n", "15 0.988980 \n", "16 0.989904 \n", "17 0.990977 \n", "18 0.991938 \n", "19 0.992863 \n", "20 0.993565 \n", "21 0.994120 \n", "22 0.994823 \n", "23 0.995082 \n", "24 0.995488 \n", "25 0.995747 \n", "26 0.996043 \n", "27 0.996339 \n", "28 0.996561 \n", "29 0.996931 \n", "30 0.997042 \n", "... ... \n", "38 0.998410 \n", "39 0.998521 \n", "40 0.998558 \n", "41 0.998595 \n", "43 0.998706 \n", "44 0.998743 \n", "45 0.998817 \n", "46 0.998854 \n", "47 0.998891 \n", "48 0.998965 \n", "50 0.999112 \n", "51 0.999223 \n", "53 0.999297 \n", "54 0.999334 \n", "55 0.999371 \n", "56 0.999408 \n", "57 0.999445 \n", "58 0.999482 \n", "59 0.999556 \n", "60 0.999593 \n", "62 0.999630 \n", "63 0.999667 \n", "64 0.999704 \n", "65 0.999741 \n", "67 0.999778 \n", "71 0.999852 \n", "72 0.999889 \n", "76 0.999926 \n", "78 0.999963 \n", "94 1.000000 \n", "\n", "[67 rows x 6 columns]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "replies_grouped_by_users_replying_to_df = reply_to_summary_df[['reply_to_count', 'users_replying_to_count']].groupby(by='users_replying_to_count').agg([np.sum, np.size])\n", "replies_grouped_by_users_replying_to_df['cumulative_reply_to_count_sum'] = replies_grouped_by_users_replying_to_df['reply_to_count', 'sum'].cumsum()\n", "replies_grouped_by_users_replying_to_df['cumulative_reply_to_count_sum_percentage'] = replies_grouped_by_users_replying_to_df['cumulative_reply_to_count_sum'] / replies_grouped_by_users_replying_to_df['reply_to_count', 'sum'].sum()\n", "replies_grouped_by_users_replying_to_df['cumulative_replied_to_users'] = replies_grouped_by_users_replying_to_df['reply_to_count', 'size'].cumsum()\n", "replies_grouped_by_users_replying_to_df['cumulative_replied_to_users_percentage'] = replies_grouped_by_users_replying_to_df['cumulative_replied_to_users'] / replies_grouped_by_users_replying_to_df['reply_to_count', 'size'].sum()\n", "replies_grouped_by_users_replying_to_df" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAELCAYAAAAiIMZEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xd4VGX2wPHvSYMEUujSA9IMaZCQUFRApKgsoNhAQdRd\nXBVF1oIFu666utgbKmBBQFGRn+KiKIqgUkKXGnqoIY0UUmby/v64k3EIKQMkTMr5PM88M7efGS4n\nd95573nFGINSSqmaxcvTASillKp4mtyVUqoG0uSulFI1kCZ3pZSqgTS5K6VUDaTJXSmlaiBN7kop\nVQNpcldKqRpIk7tSStVAPp46cOPGjU1oaKinDq+UUtVSQkLCMWNMk/LW81hyDw0NZfXq1Z46vFJK\nVUsisted9bRZRimlaiBN7kopVQNpcldKqRpIk7tSStVAmtyVUqoGKje5i8h0ETkqIptKWS4i8pqI\nJIrIBhHpXvFhKqWUOh3uXLnPBIaUsfwyoKPjMR54++zDUkopdTbK7edujFkqIqFlrDIc+MhY4/X9\nISIhItLcGHOogmJUCoyxHhiX6UJrumi+O88nzaOM9Yr2XegyzcnTxR+n7JdSjlFsXtH7KS0e5/Ki\n45a3nstxi+/7pH2VtP8yht10Z99lzit2zHKPUeIKbhy3IlTgvlziMoC90Jz0sBUabIWFJcxzTNsN\ndmOw2Q32ovPQDRVxE1NLYL/LdJJj3inJXUTGY13d06ZNmwo4dC1kDNgLwJYLtjzr2Z5vzSsssJ7t\nBY55+ae+Llqn0O6Yl+fYTx4U2lwSl/2vfRUWWMsKbdZ2hXbH6wKX145lxn5yAiy0c1LSKNp3oesx\n8v/aR9E2RclSqRpEsJLuubh79JzeoWqMmQZMA4iNja09/3ONgdwMOJEKOWlwIs16nZsBecchL9N6\n5Gdbj4Ic6zm3aNnxvxJ0oa1yYvT2Ay9fEC/HQ8Db1zHfp9jDy1rXy8dax8sHfOqClzeIt+PZsQ/x\nAsR6XfTsXMcbvH3+OraX67aOB1jbwcn7OGXanWevU/dR4n68ir0u4b0UvQ/X5a77cN3vScco4/2U\nOL/Y8uKxOTcrYT/OfRXft+v6xfcvJ29/0jal7buM9YrNKzRwLLuAnHw7BYUGm72QfHshBfZCbHZD\nQdGjsJACW6Fj2lqWX1hIbr6dzDw72Xk2MnNtZObZyMy1k5lXQHa+nXybId9eWCHXBcblvfr6CH7e\nXvh6e+HrLfj6eOHrZb32K3rt89dyP29r2s/bCx9vwdfb29qHl2MdHy/8fLzw8XJs7120rpdjf4Kf\nz1/b+Tq28/MReNK9C+OKSO4HgNYu060c82quQjvkpEDmYcg+6kjYqVbSzkmB7GTIPmY956RY88tK\nyl4+UCcQ/OqDXz3wDbCeG7az5tcJtBKgt58jmfqCb10roXr7gU+dv+YXJWTnwwe867i8LkrWjmRa\ntA8v7TilKk6BvZA9x7LZfiSLHUczSTyaxa7kbHYfy+ZEgf2M9+slEOTvS1BdX4L86xJU15fgIF9a\n+/tQr44Pfj5e1PEuSoR/PfsVPfu4LhPqFFvH19vrpHlFyVqK/yGrBioiuS8AJojIHCAeyKjW7e35\nOZCyA44fshJ31hHIOgrHD/71yD76VxtscXVDoF4T69G4IwT0hIBG4N8QAhq6PDew1q0TaCXnanjy\nKFU8ie84ksX2I5nsPpaNrdC6fBaBVg38Ob9JfXq2b0S7JvUIquvjTLi+3n9dlfoVXal6exVbLs4E\nXB0TrSeUm9xFZDbQD2gsIknA44AvgDHmHWAhcDmQCOQAN1dWsBUu+xgcSLAehzfC0c2QtpdTvtPV\nCYagFtajWRgEtoD6TaF+MyuJBzSyEnbdEOvqWKkaJLfATsaJAlKz89mVnF1mEm/TMICOTetzaVgz\nOjWrT8emgZzfpD7+ft4efhe1jzu9ZUaVs9wAd1ZYRJWlsBCObYN9v8Pe32H/H5C+z1omXtC4E7To\nBlGjoUlnCG4N9ZtAvaZWE4hSNUBhoWFfag4p2fmkZeeTllP0KHBOp+cUkHGigPScAtJP5JNbcPK3\n1L+SeKAm8Sqs5l5mGgMpO2H3L47Hr1a7OFhX3G16Qtx4aBkDzaOsNm6larCVu1N55tvNbEjKOGWZ\nr7cQEuBHgwBfQgL8aNMwgMhW1utgf19CAnwJ8fejbaMAOjStT11fTeJVXc1K7oWFVhPLlgWw5f8g\nbbc1P6gldBoCoX2gTS9o2F7buFWNV1hoyLXZOZiey9QftrFw42GaB9flyWFdadsogAYBftajni/1\n6/hoW3YNUzOSe+ouWPsJrJsNmQetniDtLobeE6B9f03mqsbZl5LDit0p7EnJZk9KDklpJ8jOs3Ei\n386JAjs5+baTmlP8fb2ZdGknxl/cXptOaonqndy3fw+/vw67l1rt5h0uhYFPQsdB4B/i6eiUqhSb\nDmRw7bu/k5Nvx9tLaN3An9YNA2gZUhd/Xx/8/bwI8POhrq83AX7e1PPzZmDYeZwXrL8d1SbVM7mn\n7YHvHoTt30FIG+g/BaJHQ3BLT0emVKU6kH6CW2auokGAH1/eEcv5Terj6633KKhTVa/kbrfBsqnw\n63+tuwMHPg09b7du3FGqhjueW8AtM1ZxIt/OJ3fE06lZoKdDUlVY9Unudht8+Q/480sIGwGD/61X\n6qrWyMm3cccna9iZnMWHt8RpYlflqh7JvdAO82+3EvvAp6HP3Z6OSKkKYYzh+AkbRzJzOXI8l2NZ\neaRk5ZOSnU9KVh77UnPYm5LDoYxcAF68OpI+HRp7OGpVHVT95F5YCF9PgI2fwYDHNLGrassYQ3a+\nnY1JGazYncIfu1JYvz+jxForPl5Cw3p+tGrgT6/zGxHaqB7d2zTgwo6a2JV7qn5y//ZfsP5T6Pcw\nXHSvp6NR6hRJaTnsTM4mPce66zM1p8B67XhOddwNmpKdT57N6p4oAmHNg7iuR2taNfCnaVBdmgXW\noXFgHRrXq0OQv/Y7V2enaif3dbMhYQb0mQj9Jns6GqUAq1jWgnUH+W2ndfV9IP3EKesE+/s67/Zs\nGliHLucF0ai+ddNQx6b16dGuIcH+2hFAVZ6qm9yPJcK390LbC2HA456ORikATuTbuWNWAku2JdOw\nnh/x7Rryj4va0bVlsOOOT1+C/X3x0e6JysOqZnK35cG8m8HHD66aZtUdV8rD0nPyufXD1azdl8Yz\nI8IZHdcGLy9tOlFVU9VM7j88Doc3wKg52t1RVQmHM3IZO30Fe47l8Obo7lwW0dzTISlVpqqX3Hf+\nBCvehvh/QufLPB2NUuxKzmLMBytJz8ln5s096K1dEVU1UPWS+6oPILA5DHzK05EoxcakDG6asRIB\n5ozvRUSrYE+HpJRbqtavPnlZkLgYLhhmDT2nlAf9lniM66f9jr+vN5//UxO7ql6q1pX7ju/Blgth\nwz0diaqFMnIK2HL4OJsPHufPg8f5v/UHade4Hh/dGkezIK2oqKqXqpXctyywxiRt09PTkaga6Hhu\nAYlHs0jJyic1O4/U7AIOpOew82g2iclZJGfmOddtXL8Og7o249kREQQHaH90Vf24ldxFZAjwKuAN\nvG+Meb7Y8rbAdKAJkArcaIxJOq1I8nOs+uxR12nXR3XajucWcCDtBAfSTnA0M4+cfBtZeTay82zs\nSclhy6HjJKWderNRYF0fOjStT79OTTi/aX26nBdIWIsgmgbqlbqq3spN7iLiDbwJDASSgFUissAY\ns9lltZeAj4wxH4rIJcBzwJjTimTnj1CQbbW3K1WMMYYjx/PYcTSTxKNZ7E3JISnNGoHoQPoJMnNt\nJW5X19eLliH+RLcOYVRcG7qcF0jTwLo0rO9HwwA/HZVI1VjuXLnHAYnGmF0AIjIHGA64Jvcw4F+O\n10uA+acdyeYF4N8QQi887U1VzXTkeC4/bzvKT1uP8tvOlJMSeGAdH1o28KdliD9x7RrSMsTfOX1e\ncF3q1/EhwM8Hb73JSNVS7iT3lsB+l+kkIL7YOuuBq7Cabq4EAkWkkTEmxa0obHmw7TvoOkIH3qjl\nMnIKWLD+APMSkliflAFA8+C6XBHRnK4tgujQNJAOTevTuL6fFtZSqgwV9YPqfcAbIjIOWAocAE6p\nYyoi44HxAG3atPlrwc4lkJ+pvWRqsU0HMnjnl518v/kI+bZCLmgexANDOnNJl6Z0bhaoiVyp0+RO\ncj8AtHaZbuWY52SMOYh15Y6I1AdGGmPSi+/IGDMNmAYQGxtrnAs2fw11gqFd39ONX1Vzadn5vPj9\nNmav3EdQXV9G9WjNNbGtCW+pfcqVOhvuJPdVQEcRaYeV1K8HRruuICKNgVRjTCHwEFbPGffYC2Db\nQqvUgI+f25up6i0n38bnq5OY+sN2svJsjOsdyj2XdtIyuEpVkHKTuzHGJiITgEVYXSGnG2P+FJGn\ngNXGmAVAP+A5ETFYzTJ3uh1B2l7ITYf2etVeG2w+eJzZK/cxf+0BMvNs9GrfiCeGdaXzeTomqFIV\nya02d2PMQmBhsXmPubyeB8w7owjS9ljPDULPaHNVtRhj2Jeaw7r96azbn86WQ8fJOGEjK6+AzFwb\n6TkF+Pl4MTSiOdfHtaFHaANtT1eqEnj+DtX0PdazJvdqb+vh49z16Vp2HM0CrD7mFzQPomWIP4F1\nA6lfx4fzm9RjRLeWhARoE5xSlcnzyT1tD3jXgfrneToSdYaMMXy+OolHv95EkL8vT48Ip3ubEDo3\nC9QRiZTykCqQ3PdCSBvw0iRQHaVl5/PMt1v4Yk0SfTo04pXrutEkUCt6KuVpnk/u6XuhQVtPR6FO\nw6GMEyzadJhFfx5h5Z5UCo1h4oCO3D2go94RqlQV4fnknrYHWsZ6OgpVjjybnR82H2Huqv0sSzyG\nMdChaX3+2bc9QyNbcEHzIE+HqJRy4dnkfiINcjP0x9QqLDvPxrtLd/HJH3tJzc6nZYg/d1/SkWHR\nLTi/SX1Ph6eUKoVnk3vaXutZm2WqHHuh4YuEJF78fhvJmXkMDGvGjT3bcmGHxtr0olQ14Nnknl6U\n3EM9GoayujGu25fOkeN5HD6ey5q9aWw7kkm3NiG8c2MMMW0beDpEpdRpqBpX7iF65e4p+bZCXlm8\nnXd+2Umho9pP4/p+tGwQwOujujE0srneZKRUNeTh5L4H6oaAf4hHw6itdhzJ5J656/jz4HGui23N\nhEs60CyoLn4+2i1VqerO880y2t5+zmXn2Xj/19289XMi9er48O6YGAZ31ZvIlKpJPH/l3jTMoyHU\nJnk2O7NX7OP1nxJJyc7n8ojzeHJYuN50pFQN5OEr933Q+XKPhlBb/L4zhclfbGBfag692jfigSGd\n6dZGfyRVqqbyXHK3F4A9X5tlKtmJfDsv/G8rM3/bQ7vG9fjwljgu7thYfyRVqobzYHLPs561G2Sl\nyLPZWbr9GM8t3MKuY9mM6x3K5CFd8Pfz9nRoSqlzwHPJ3ZZvPYeEeiyEmibfVsgPm4/wvz8Ps2Tr\nUbLybLQM8efTv8fTu0NjT4enlDqHPHjlng8IhLQud1VVttwCO5+v3s87v+ziQPoJGtXz429RzRnc\n9Tx6n99YuzYqVQt5tlkmqAX4aE+NszF31T6m/rCdI8fziGnbgGdGhHNxpyZaIkCpWs6zzTJ6Z+pZ\neeeXnTz/3VZ6hDbg5Wuj6XV+I/2hVCkFeLpZRn9MPWPTllqJfVhUC16+Llqv1JVSJ3GrMVZEhojI\nNhFJFJEHS1jeRkSWiMhaEdkgIuV3XrcXaDfIM/T+r7v498KtDI1sztRrozSxK6VOUe6Vu4h4A28C\nA4EkYJWILDDGbHZZbQrwmTHmbREJAxYCoWXv2eiVu5vybHY2JGXwx84U/tidwvLEFK6IaM4r10Xr\nGKVKqRK50ywTByQaY3YBiMgcYDjgmtwNUDQUTzBw0K2ja5t7ubYdzmT0e3+Qkm11He1yXiAT+ndg\n4qUdNbErpUrlTnJvCex3mU4C4out8wTwvYjcBdQDLi1pRyIyHhgPENPcS5tlynHkeC43z1iJt5fw\nzo0xxLdrSIN6fp4OSylVDVTUpd8oYKYxphVwOfCxiJyyb2PMNGNMrDEmFvGC+lqJsDRZeTZunrGK\njBMFTB/XgyHh52liV0q5zZ3kfgBwvdOolWOeq1uBzwCMMb8DdYGyb4n09gMvbVYoic1eyJ2z1rDt\nSCZv3tCd8JbBng5JKVXNuJNdVwEdRaSdiPgB1wMLiq2zDxgAICIXYCX35DL3GtTytIOtDQrshdw/\nbwO/bE/mmRHh9Ovc1NMhKaWqoXLb3I0xNhGZACwCvIHpxpg/ReQpYLUxZgFwL/CeiEzC+nF1nDHG\nlLnjukFlLq6NsvJs3P5JAr/uOMb9gzszKq6Np0NSSlVTbt3EZIxZiNW90XXeYy6vNwN9Kja02uXo\n8VxunrmKrYcz+c/ISK7toTV3lFJnzrODdSgAEo9mctP0VaTl5PP+TbH016YYpdRZ0uTuYb/vTOG2\nj1fj5+PFnPE9iWylg4Urpc6eJncP+nJNEpO/2EDbRvWYMa4HrRsGeDokpVQNocndA4wxvPZjIi8v\n3k7P9g1598ZYggN8PR2WUqoG0eR+jtnshUyZv4k5q/ZzVbeWPD8yUgfTUEpVOE3u51BOvo07Z61h\nybZkJvTvwL2DOmn9daVUpdDkfo4cy8rj1pmr2Hggg2evDOeGeK2ro5SqPJrcz4FdyVmMm7GKo5m5\nvDsmloFhzTwdklKqhtPkXslW70nl7x+txluE2f/oSbc2DTwdklKqFtDkXokWbjzEPXPX0TLEn5k3\n96Bto3qeDkkpVUtocq8kS7YdZcKna+jWpgHvjY2loZbrVUqdQ5rcK0Hi0Uzu/nQtXc4L4uNb4wjw\n049ZKXVuaQfrCpaWnc+tH66mjq83790Uq4ldKeURmtwrUIG9kDs/XcOh9FzeHRNDyxB/T4eklKql\n9LKygtgLDY98tZHfdqbw0jVRxLTVXjFKKc/R5F4BbPZC7vt8PfPXHeTuSzpwdUwrT4eklKrlNLmf\npXxbIRPnrOW7TYe5f3Bn7uzfwdMhKaWUJvezkVtg5/ZPEliyLZlHh4Zx64XtPB2SUkoBmtzPmL3Q\nMHHOWpZsS+bfV0YwOl7HO1VKVR1u9ZYRkSEisk1EEkXkwRKWvywi6xyP7SKSXvGhVh3GGKbM38ii\nP4/w+N/CNLErpaqccq/cRcQbeBMYCCQBq0RkgWNQbACMMZNc1r8L6FYJsVYZU3/YzuyV+7mz//nc\n3EebYpRSVY87V+5xQKIxZpcxJh+YAwwvY/1RwOyKCK4qmr5sN6//lMj1PVpz36DOng5HKaVK5E6b\ne0tgv8t0EhBf0ooi0hZoB/x09qFVLTZ7Ic8u3MKM5XsYFNaMZ0aE60AbSqkqq6J/UL0emGeMsZe0\nUETGA+MB2rSpPu3Uadn53PnpGn7bmcItfdrx8OVd8PHWm3uVUlWXO8n9ANDaZbqVY15JrgfuLG1H\nxphpwDSA2NhY42aMHrV+fzoTZq/hyPE8XromSm9QUkpVC+4k91VARxFph5XUrwdGF19JRLoADYDf\nKzRCD8nKs/HSom189PsemgbWZe54HWhDKVV9lJvcjTE2EZkALAK8genGmD9F5ClgtTFmgWPV64E5\nxphqcUVelh82H+HR+Zs4kpnLmJ5tuW9wZ4Lq+no6LKWUcptbbe7GmIXAwmLzHis2/UTFheU5P245\nwj8+Wk2X8wJ568budNerdaVUNaR3qLo4kH6Cf322nq4tgvji9t7U9fX2dEhKKXVGtMuHQ76tkAmf\nrsFeaHhzdHdN7Eqpak2v3B1eXLSVtfvSeXN0d0Ib60DWSqnqTa/csX5Afe/X3Yzt1ZYrIpt7Ohyl\nlDprtT65bz+SyaS564hoGcwjV1zg6XCUUqpC1OrknpKVx60frsLfz5tpY2Oo46Pt7EqpmqHWJvc8\nm51/fpLA0eN5vDc2lubBOpi1UqrmqJU/qBpjmPLVJlbtSeP1Ud2Ibh3i6ZCUUqpC1cor9283HuLz\nhCTuHtCRv0W18HQ4SilV4WpdcrcXGl7+YTudmtXnngEdPR2OUkpVilqX3L9ed4CdydlMurQTXl5a\nj10pVTPVquReYC/k1R93ENY8iMFdz/N0OEopVWlqVXL/ck0Se1Ny+NdAvWpXStVstSa559sKee3H\nRKJahzDggqaeDkcppSpVrUnuc1fvt6o+DuykY58qpWq8WpHccwvsvPlTIrFtG3Bxx8aeDkcppSpd\nrUjus1fu4/DxXP41SK/alVK1Q41P7ify7bz18056tm9I7/P1ql0pVTvU+OT+yR97Sc7M418DO3s6\nFKWUOmdqdHLPzrPxzi87uahjY+LaNfR0OEopdc64ldxFZIiIbBORRBF5sJR1rhWRzSLyp4h8WrFh\nnpkPf99DSnY+kwZ28nQoSil1TpVbFVJEvIE3gYFAErBKRBYYYza7rNMReAjoY4xJExGPdyTPzC1g\n2tJd9O/chO5tGng6HKWUOqfcuXKPAxKNMbuMMfnAHGB4sXX+AbxpjEkDMMYcrdgwT98bPyWSnlOg\nbe1KqVrJneTeEtjvMp3kmOeqE9BJRJaLyB8iMqSkHYnIeBFZLSKrk5OTzyxiN/y+M4Vpv+5iVFxr\nIloFV9pxlFKqqqqoH1R9gI5AP2AU8J6InDIChjFmmjEm1hgT26RJkwo69Mkycgr412fraNeoHo8O\nDauUYyilVFXnTnI/ALR2mW7lmOcqCVhgjCkwxuwGtmMl+3PKGMPD8zeSnJnHK9dHE+BXKweaUkop\nt5L7KqCjiLQTET/gemBBsXXmY121IyKNsZppdlVgnG75cs0Bvt1wiH8N6kRkKx06TylVe5Wb3I0x\nNmACsAjYAnxmjPlTRJ4SkWGO1RYBKSKyGVgC3G+MSamsoEtyLCuPxxf8SVy7htx28fnn8tBKKVXl\nuNVuYYxZCCwsNu8xl9cG+Jfj4RHTl+0mO9/Gv6+MwFtrtSularkacYdqxokCPv59L5eHN6dD0/qe\nDkcppTyuRiT3T/7YS2aejdv7aXOMUkpBDUjuJ/LtfLBsN/06NyG8pfZpV0opqAHJfc6qfaRm53Nn\n/w6eDkUppaqMap3c822FTFu6i7jQhvQI1aqPSilVpFon9/nrDnAoI5c7+mtbu1JKuaq2yb2w0PDO\nLzsJax5E306VU8pAKaWqq2qb3H/cepRdydnc1re9jouqlFLFVNvkPm3pTlqG+HNFRHNPh6KUUlVO\ntUzua/alsWpPGrde2A4f72r5FpRSqlJVy8w47ZddBPv7cl2P1uWvrJRStVC1S+67j2WzaPNhbuzZ\nhnp1tKSvUkqVpNol9/d/3YWvlxc39Q71dChKKVVlVavknpKVx7yEJK7q3pKmgXU9HY5SSlVZ1Sq5\nf7nmAHm2Qm69sJ2nQ1FKqSqt2iR3YwzzEpKIbh1Cx2aBng5HKaWqtGqT3P88eJxtRzK5OqaVp0NR\nSqkqr9ok93kJSfj5ePG3yBaeDkUppaq8apHc822FLFh/kIEXNCM4wNfT4SilVJXnVnIXkSEisk1E\nEkXkwRKWjxORZBFZ53j8vSKDXLLtKKnZ+doko5RSbir3LiAR8QbeBAYCScAqEVlgjNlcbNW5xpgJ\nlRAjXyQk0SSwDhd1bFwZu1dKqRrHnSv3OCDRGLPLGJMPzAGGV25Yf0nJyuOnrUcZEd1C68gopZSb\n3MmWLYH9LtNJjnnFjRSRDSIyT0QqrOjLgvUHsRUaRmqTjFJKua2iLoX/Dwg1xkQCPwAflrSSiIwX\nkdUisjo5OdmtHX+55gDhLYPocl5QBYWqlFI1nzvJ/QDgeiXeyjHPyRiTYozJc0y+D8SUtCNjzDRj\nTKwxJrZJk/JHT9qbks3GAxkMjyrpi4JSSqnSuJPcVwEdRaSdiPgB1wMLXFcQEdcRM4YBWyoiuO82\nHQbgsojzKmJ3SilVa5TbW8YYYxORCcAiwBuYboz5U0SeAlYbYxYAd4vIMMAGpALjKiK47zYeIqpV\nMK0aBFTE7pRSqtZwqyC6MWYhsLDYvMdcXj8EPFSRgSWl5bA+KYMHL+tSkbtVSqlaocr2LfxfUZNM\nuDbJKKXU6aqyyX3hxkN0bRFE20b1PB2KUkpVO1UyuR/KOMGafelcHtG8/JWVUkqdokomd22SUUqp\ns1Mlk/t3Gw/T5bxA2jep7+lQlFKqWnKrt8y5dPR4Lqv2pnLPgE6eDqVCFBQUkJSURG5urqdDUUpV\nI3Xr1qVVq1b4+p5ZmfMql9wXbT6CMXB5DblxKSkpicDAQEJDQxERT4ejlKoGjDGkpKSQlJREu3Zn\nNmZ0lWuWWb0nlWZBdWrMOKm5ubk0atRIE7tSym0iQqNGjc7qG3+VS+5r96XTrXUDT4dRoTSxK6VO\n19nmjSqV3FOy8tiXmkO3NiGeDkXVYiLCvffe65x+6aWXeOKJJyr1mKGhoYwcOdI5PW/ePMaNG1ep\nx1Q1W5VK7uv2pwMQ3VqTu/KcOnXq8OWXX3Ls2LFzetyEhAQ2by4+wJlSZ6bKJXdvLyGiVbCnQ1G1\nmI+PD+PHj+fll18+ZdmePXu45JJLiIyMZMCAAezbtw+AcePGcffdd9O7d2/at2/PvHnznNu8+OKL\n9OjRg8jISB5//PFSj3vvvffy7LPPnjI/NTWVESNGEBkZSc+ePdmwYQMATzzxBLfccgv9+vWjffv2\nvPbaa85tPvnkE+Li4oiOjua2227Dbref8eehqqcqldzX7kunc7NAAvyqXCceVcvceeedzJo1i4yM\njJPm33XXXdx0001s2LCBG264gbvvvtu57NChQyxbtoxvvvmGBx+0xpH//vvv2bFjBytXrmTdunUk\nJCSwdOnSEo957bXXsmbNGhITE0+a//jjj9OtWzc2bNjAv//9b8aOHetctnXrVhYtWsTKlSt58skn\nKSgoYMuWLcydO5fly5ezbt06vL29mTVrVkV9NKqaqDJZtLDQsH5/OsOiW3g6FKUICgpi7NixvPba\na/j7+zvn//7773z55ZcAjBkzhgceeMC5bMSIEXh5eREWFsaRI0cAK7l///33dOvWDYCsrCx27NjB\nxRdffMo1rDGEAAAgAElEQVQxvb29uf/++3nuuee47LLLnPOXLVvGF198AcAll1xCSkoKx48fB+CK\nK66gTp061KlTh6ZNm3LkyBF+/PFHEhIS6NGjBwAnTpygadOmFfnxqGqgyiT3XceyyMyzaXu7qjLu\nueceunfvzs033+zW+nXq1HG+NsY4nx966CFuu+02t/YxZswYnnvuOcLDw0/7mN7e3thsNowx3HTT\nTTz33HNu7UPVTFWmWWbNPuvH1G5talY3SFV9NWzYkGuvvZYPPvjAOa93797MmTMHgFmzZnHRRReV\nuY/Bgwczffp0srKyADhw4ABHjx4FYMCAARw4cNKIlfj6+jJp0qST2vsvuugiZ7PKzz//TOPGjQkK\nKn1M4QEDBjBv3jzncVJTU9m7d6+7b1vVEFUmua/bn05gXR/aN9YSv6rquPfee0/qNfP6668zY8YM\nIiMj+fjjj3n11VfL3H7QoEGMHj2aXr16ERERwdVXX01mZiaFhYUkJibSsGHDU7a59dZbsdlszukn\nnniChIQEIiMjefDBB/nwwxLHn3cKCwvjmWeeYdCgQURGRjJw4EAOHTp0mu9cVXdS9PXxXIuNjTWr\nV692Tl/26q80ru/Hx7fGeySeyrJlyxYuuOACT4ehqphNmzYxffp0pk6d6ulQVBVWUv4QkQRjTGx5\n21aJK/ecfBvbDh+nm7a3q1oiPDxcE7uqVFUiuW9IyqDQaHu7UkpVFLeSu4gMEZFtIpIoIg+Wsd5I\nETEiUu5XBldFd6ZG6ZW7UkpViHKTu4h4A28ClwFhwCgRCSthvUBgIrDidINYuy+N0EYBNKznd7qb\nKqWUKoE7V+5xQKIxZpcxJh+YAwwvYb2ngReA06pRaYxh7b507d+ulFIVyJ3k3hLY7zKd5JjnJCLd\ngdbGmG/L2pGIjBeR1SKyOjk5GYBjWfkczcwjspUmd6WUqihn/YOqiHgBU4F7y1vXGDPNGBNrjIlt\n0qQJAHtTsgFo10T7tyulVEVxJ7kfAFq7TLdyzCsSCIQDP4vIHqAnsMDdH1X3puQA0LZhgDurqzNw\n4sQJ+vbt65HKgD///DNDhw4tc51169axcOFC5/SCBQt4/vnnKzu001K//ukP1l78fVUle/bs4dNP\nP/V0GFXO/Pnzz3nZ5euvv54dO3ZU+H7dSe6rgI4i0k5E/IDrgQVFC40xGcaYxsaYUGNMKPAHMMwY\ns7rk3Z1sX2oOItCqgSb3yjJ9+nSuuuoqvL29PR1KiYonwWHDhjmrKlY01zs/K5sm93OjIv9NPZHc\nb7/9dv7zn/9U/I6NMeU+gMuB7cBO4BHHvKewknjxdX8GYsvbZ0xMjDHGmHvmrDW9n/vR1FSbN2/2\ndAimV69eZvfu3c7p559/3oSHh5vIyEgzefJkY4wxffv2NatWrTLGGJOcnGzatm1rjDFmxowZZvjw\n4ebSSy81bdu2Na+//rr573//a6Kjo018fLxJSUkpc/slS5aYK664whhjzIoVK0zPnj1NdHS06dWr\nl9m6davJy8szrVu3No0bNzZRUVFmzpw5ZsaMGebOO+806enppk2bNsZutxtjjMnKyjKtWrUy+fn5\nJjEx0QwePNh0797dXHjhhWbLli2lvv+bbrrJ3HbbbSYuLs5MmjTJZGVlmZtvvtn06NHDREdHm/nz\n5zvf67Bhw0zfvn1Nhw4dzBNPPOHcR7169YwxxowZM8Z89dVXzvmjR492bu+qpPeVkpJihg8fbiIi\nIkx8fLxZv359qTFnZmaacePGmfDwcBMREWHmzZtnjDHm008/NeHh4aZr167mgQceOCU+Y4z5/PPP\nzU033eR873fddZfp1auXadeunfn888+NMcbEx8eboKAgExUVZaZOnVpiDJs2bTI9evQwUVFRJiIi\nwmzfvt3s3r3bdO3a1bnOiy++aB5//HFjjHUO3HPPPSYmJsZ06dLFrFy50lx55ZWmQ4cO5pFHHin1\nve7evdt07tzZjB492nTp0sWMHDnSZGdnG2OMWb16tbn44otN9+7dzaBBg8zBgwedx5o4caKJiYkx\nL730kjl8+LAZMWKEiYyMNJGRkWb58uXGGGM+/vhj53sYP368sdlszs/r4YcfNpGRkSY+Pt4cPnzY\nLF++3DRo0MCEhoaaqKgok5iYaKZNm2ZiY2NNZGSkueqqq5xxJSYmmvj4eBMeHm4eeeSRkz7///zn\nPyY2NtZERESYxx57rNT3XcRut5vQ0FBTUFBwyrKS8gew2riRt92qCmmMWQgsLDbvsVLW7Xc6f1z2\npmTTppY0yTz5f3+y+eDxCt1nWIsgHv9b11KX5+fns2vXLkJDQwH47rvv+Prrr1mxYgUBAQGkpqaW\ne4xNmzaxdu1acnNz6dChAy+88AJr165l0qRJfPTRR9xzzz1uxdqlSxd+/fVXfHx8WLx4MQ8//DBf\nfPEFTz31FKtXr+aNN94AYObMmQAEBwcTHR3NL7/8Qv/+/fnmm28YPHgwvr6+jB8/nnfeeYeOHTuy\nYsUK7rjjDn766adSj52UlMRvv/2Gt7c3Dz/8MJdccgnTp08nPT2duLg4Lr30UgBWrlzJpk2bCAgI\noEePHlxxxRXExv7Vwnjrrbfy8ssvM2LECDIyMvjtt99KrPXi5+d3yvu666676NatG/Pnz+enn35i\n7NixrFu3rsR4n376aYKDg9m4cSMAaWlpHDx4kMmTJ5OQkECDBg0YNGgQ8+fPZ8SIEWV+7kV15rdu\n3cqwYcO4+uqref7553nppZf45ptvSt3unXfeYeLEidxwww3k5+djt9udpYxL4+fnx+rVq3n11VcZ\nPnw4CQkJNGzYkPPPP59JkybRqFGjErfbtm0bH3zwAX369OGWW27hrbfeYuLEidx11118/fXXNGnS\nhLlz5/LII48wffp0wDq3i0qYXHfddfTt25evvvoKu91OVlbWSXXtfX19ueOOO5g1axZjx44lOzub\nnj178uyzz/LAAw/w3nvvMWXKFIYNG8bQoUO5+uqrAQgJCeEf//gHAFOmTOGDDz7grrvuYuLEiUyc\nOJFRo0bxzjvvON+Ha/1+YwzDhg1j6dKlJZZ4LuLl5UWHDh1Yv349MTExZX6+p8PjJX/3pZ7g0gu0\n1nRlOXbsGCEhf/VEWrx4MTfffDMBAdYf1JIKVxXXv39/AgMDCQwMJDg4mL/97W8AREREOEcFckdG\nRgY33XQTO3bsQEQoKCgod5vrrruOuXPn0r9/f+bMmcMdd9xBVlYWv/32G9dcc41zvby8vDL3c801\n1zibpb7//nsWLFjASy+9BEBubq5zRKWBAwc6E9BVV13FsmXLTkruffv25Y477iA5OZkvvviCkSNH\n4uPj3n+j0uqyl1ThcfHixc7qkwANGjRg6dKl9OvXj6LOCDfccANLly4tN7mXVGfeHb169eLZZ58l\nKSmJq666io4dO5a7zbBhwwDr3OjatSvNmzcHoH379uzfv7/U5N66dWv69OkDwI033shrr73GkCFD\n2LRpEwMHDgTAbrc79wfWuVHkp59+4qOPPgKs0sfBwcF8/PHHpda19/Pzc/4WFBMTww8//FBiXJs2\nbWLKlCmkp6eTlZXF4MGDAauu//z58wEYPXo09913H3B69ftdNW3alIMHD9ac5J6dZ+NYVh6ta8mV\ne1lX2JXF39+f3Nzybz3w8fGhsLAQ4JT1XWuGe3l5Oae9vLyc7Z1lbV/k0UcfpX///nz11Vfs2bOH\nfv36lRvXsGHDePjhh0lNTSUhIYFLLrmE7OxsQkJCSr3qLUm9en/1xjLG8MUXX9C5c+eT1lmxYsUp\nI86XNAL92LFj+eSTT5gzZw4zZsxwO4bK5BpnWf9+5jQKBY4ePZr4+Hi+/fZbLr/8ct599106derk\n/Hcu61iu50nRdFlt4yV97sYYunbtyu+//17iNq7/piUxZdS19/X1dR6zqA5+ScaNG8f8+fOJiopi\n5syZ/Pzzz+Ue83Tq9xfJzc09aVCYiuDR2jL7Uh09ZRrVjuTuCQ0aNMButzv/Ew4cOJAZM2aQk2N9\n9kXNMqGhoSQkJACcNP6nu9zZPiMjg5YtrVskippeAAIDA8nMzCxxm/r169OjRw8mTpzI0KFD8fb2\nJigoiHbt2vH5558D1n+o9evXux3r4MGDef31152Jbu3atc5lP/zwA6mpqZw4cYL58+c7ryZdjRs3\njldeeQWwyuuWpvj7Op267AMHDuTNN990TqelpREXF8cvv/zCsWPHsNvtzJ49m759+wLQrFkztmzZ\nQmFhIV999VW5n0FZn3mRXbt20b59e+6++26GDx/Ohg0baNasGUePHiUlJYW8vLwym3VOx759+5xJ\n/NNPP+XCCy+kc+fOJCcnO+cXFBTw559/lrj9gAEDePvttwHrCj8jI+OM6toX/1wyMzNp3rw5BQUF\nJw1V2LNnT+e3MNdvWKdbv7/I9u3b3R6gxV1VIrnXljZ3Txk0aBDLli0DYMiQIQwbNozY2Fiio6Od\nTRP33Xcfb7/9Nt26dTupfrm73Nn+gQce4KGHHqJbt24nXSn179+fzZs3Ex0dzdy5c0/Z7rrrruOT\nTz456Wv4rFmz+OCDD4iKiqJr1658/fXXbsf66KOPUlBQQGRkJF27duXRRx91LouLi2PkyJFERkYy\ncuTIk5pkijRr1owLLrig3BGair+v06nLPmXKFNLS0ggPDycqKoolS5bQvHlznn/+efr3709UVBQx\nMTEMH27dLP78888zdOhQevfufVLTRWkiIyPx9vYmKiqqxIHAAT777DPCw8OJjo5m06ZNjB07Fl9f\nXx577DHi4uIYOHAgXbp0KfdY7ujcuTNvvvkmF1xwAWlpadx+++34+fkxb948Jk+eTFRUFNHR0fz2\n228lbv/qq6+yZMkSIiIiiImJYfPmzWdU1/7666/nxRdfpFu3buzcuZOnn36a+Ph4+vTpc9J7feWV\nV5g6dSqRkZEkJiYSHBwMnFn9/iNHjuDv78955513Fp9gCdz51bUyHjExMWbaLztN28nfmPTs/HJ/\nUa6uqkJvmYSEBHPjjTd6Oowqr6iXTnmys7NN+/btTXp6+jmIquYr3gOnOsjOzjaFhYXGGGNmz55t\nhg0bVub6GzduNJMmTSpx2dSpU837779f4rJK7y1TWfamZhPs70twgK8nw6jxunfvTv/+/bHb7VW2\nr3t1sXjxYm699VYmTZrkvFpTtU9CQgITJkzAGENISIizB09pyqrfHxISwpgxYyo8Ro+OxBR2x1uk\nZefzf3dd6JEYzgUdiencefbZZ53t8EWuueYaHnnkkUo97qJFi5g8efJJ89q1a+dW2/eMGTNOGaqv\nT58+J7W3V7azif90pKSkMGDAgFPm//jjj6X2oqntzmYkJo8m9/rXvUjXlsG8Obq7R2I4FzS5K6XO\nVLUcZs8ASWkntKaMUkpVAo8l9wJbIbZCoz1llFKqEngsuefbrRsh2mgfd6WUqnCeS+42K7m3baR1\n3JVSqqJ5NLn7egvnBdX1VAi1htZzP3tF9dwPHjzoLCrlrnHjxpV51+8rr7zivGNYnRlPlFd+4403\nyu0C6UkebZZp3SAAb69Ta3eoiqX13P9ytrW/W7RocUblGcpSlZL7uax3X5HH8kRyv+WWW3j99dfP\n6TFPh8eSe56tsNYUDHP67kGYcUXFPr4rPwnOmjXLeZs6wAsvvEBERARRUVHOJNqvXz9n+dRjx445\nSwTPnDmTESNGMHDgQEJDQ3njjTeYOnUq3bp1o2fPns7aNKVt72rlypX06tWLbt260bt3b7Zt20Z+\nfj6PPfYYc+fOdd6mP3PmTCZMmEBGRgZt27Z1FqrKzs6mdevWFBQUsHPnToYMGUJMTAwXXXQRW7du\nLfX9jxs3jn/+85/Ex8fzwAMPkJ2dzS233EJcXBzdunVzli6YOXMmw4cPp1+/fnTs2JEnn3zylH3t\n2bPHWQPEbrdz//3306NHDyIjI3n33XcB667vCRMm0LlzZy699FJnbZGSvPbaaxw8eJD+/fvTv39/\nAGbPnk1ERATh4eGn9D8vznWEqHnz5jFu3DgAPv/8c2fpgqKKhKXF+/PPP3PRRRcxbNgwwsLCyM7O\n5oorriAqKorw8PASS0IUCQ0N5YEHHiAiIoK4uDgSExMBSE5OZuTIkfTo0YMePXqwfPlyAJ544gnG\njBlDnz59GDNmDHa7nfvuu4/w8HAiIyOdyTIhIYG+ffsSExPD4MGDnWUD+vXrx+TJk4mLi6NTp078\n+uuvJZ5DJZ1rADk5OVx77bWEhYVx5ZVXEh8f7zxvv//+e3r16kX37t255pprnPVhShMQEEBoaCgr\nV64scz1P8dgdqvm2Qi0Ydg5oPXdLRdVzd/XBBx8QHBzMqlWryMvLo0+fPgwaNIi1a9eybds2Nm/e\nzJEjRwgLC+OWW24pcR933303U6dOZcmSJTRu3PiMa7YX99RTT7Fo0SJatmxJenp6mfECrFmzhk2b\nNtGuXTu++OILWrRowbffWuPdZ2RklHmsorrzRefCN998w8SJE5k0aRIXXngh+/btY/DgwWzZsgWA\nzZs3s2zZMvz9/Xn77bfZs2cP69atw8fHh9TUVAoKCsqs426z2Vi5ciULFy7kySefZPHixaecQ8eP\nHy/xXHvrrbdo0KABmzdvZtOmTURHRwPWBckzzzzD4sWLqVevHi+88AJTp07lscdKHLbCKTY2ll9/\n/ZW4uLjT+vc5FzyW3AtNLewGedm5b0fWeu6Wiqrn7ur7779nw4YNzmaajIwMduzYwdKlSxk1ahTe\n3t60aNGCSy65pNz3WWTVqlVnVLO9uD59+jBu3DiuvfZarrrqqjLj9fPzIy4ujnbt2gHWv+u9997L\n5MmTGTp0KBdddFGZxxo1apTzedKkSYB1nrkOV3f8+HHnlfCwYcOc5W0XL17MP//5T2dN/IYNG7Jp\n06Yy67gXvZ+YmBj27NlTYkylnWvLli1j4sSJAM5vCwB//PEHmzdvdlYBzc/Pp1evXmW+b7DqsJf1\nrdGTPFpbptYldw/Qeu6Wiqzn7rqf119/3TmAQ5Fz2fZbWh33d955hxUrVvDtt98SExNDQkJCqfH+\n/PPPJ30+nTp1Ys2aNSxcuJApU6YwYMCAMq9gXWMoel1YWMgff/xB3bqndphwpw57WXXci86/suqw\nn+65Zoxh4MCBzJ49u8z1iquMOuwVxaMlf7UbZOXTeu6nOtt67q77efvtt51Xhdu3byc7O5uLL76Y\nuXPnYrfbOXToEEuWLCkzHtf3X1bN9pKUVsd9586dxMfH89RTT9GkSRP2799farzFHTx4kICAAG68\n8Ubuv/9+1qxZU2b8RW3yc+fOdV7tDho06KQfG0v7Qzxw4EDeffddZ5JOTU09rTruRYqfQ6Wda336\n9OGzzz4DrOahomEMe/bsyfLly52/GWRnZ7N9+3YAHnrooVLr7FRGHfaK4lZyF5EhIrJNRBJF5JRf\n8ETknyKyUUTWicgyESl9BAMXeuV+bmg995OdbT33In//+98JCwuje/fuhIeHc9ttt2Gz2bjyyivp\n2LEjYWFhjB07ttyv9+PHj2fIkCH079+/zJrtJSmtjvv999/v/FG2d+/eREVFlRpvcRs3biQuLo7o\n6GiefPJJpkyZUmb8aWlpREZG8uqrrzprw7/22musXr2ayMhIwsLCThpntPhn2KZNGyIjI4mKiuLT\nTz89rTruRYqfQ6Wda0VDJIaFhTFlyhS6du1KcHAwTZo0YebMmYwaNYrIyEh69erlbG7ZuHFjqbXW\nly9f7mw+qnLKqwkMeAM7gfaAH7AeCCu2TpDL62HA/8rbb0CLjmXWP64ptJ579eFuPXf1l7Zt25rk\n5GRPh+E2m81mTpw4YYwxJjEx0YSGhpq8vLwytxk0aFCJ89esWVPp/68qu557HJBojNkFICJzgOGA\n89cSY8xxl/XrYdUFK1P9uh4fm7vW0HruSllycnLo378/BQUFGGN466238PPzK3ObRYsWlTj/2LFj\nPP3005URZoVwJ8O2BPa7TCcB8cVXEpE7gX9hXd2X2z2gdQNtkjmXSuuKV5OcbT33cePGOfuJV4Yr\nr7yS3bt3nzTvhRdeOOUHzpLEx8ef0iPo448/JiIiokJjLE1psZfWW6WqCgwMdPZrP1tVtjnGodx6\n7iJyNTDEGPN3x/QYIN4YM6GU9UcDg40xN5WwbDwwHqBNmzYx5Q1WWxNoPXel1Jmq7HruB4DWLtOt\nHPNKMwcosVOuMWaaMSbWGBNb1I+3NijvD6hSShV3tnnDneS+CugoIu1ExA+4HljguoKIdHSZvALY\ncVZR1SB169YlJSVFE7xSym3GGFJSUkq8T8Bd5ba5G2NsIjIBWITVc2a6MeZPEXkK61fbBcAEEbkU\nKADSgFOaZGqrVq1akZSURHJysqdDUUpVI3Xr1qVVq1ZnvL1Hx1CtqB82lFKqtqjyY6gqpZSqPJrc\nlVKqBtLkrpRSNZDH2txFJBmo+R3d3dcYOP2iLrWHfj5l08+nbDXp82lrjCm3L7nHkrs6mYisdudH\nktpKP5+y6edTttr4+WizjFJK1UCa3JVSqgbS5F51TPN0AFWcfj5l08+nbLXu89E2d6WUqoH0yl0p\npWogTe4eICKtRWSJiGwWkT9FZKJjfkMR+UFEdjieG3g6Vk8SEW8RWSsi3zim24nICsdwj3Mdhexq\nJREJEZF5IrJVRLaISC89f/4iIpMc/7c2ichsEalb284fTe6eYQPuNcaEAT2BOx3jzj4I/GiM6Qj8\n6JiuzSYCW1ymXwBeNsZ0wCpQd6tHoqoaXsUazrILEIX1Oen5A4hIS+BuINYYE45V8PB6atn5o8nd\nA4wxh4wxaxyvM7H+Y7bEGr7wQ8dqH1JKXfzaQERaYZWPft8xLVgjfM1zrFJrPx8RCQYuBj4AMMbk\nG2PS0fPHlQ/gLyI+QABwiFp2/mhy9zARCQW6ASuAZsaYQ45Fh4FmHgqrKngFeAAodEw3AtKNMUVD\n2Sdh/UGsjdoBycAMR7PV+yJSDz1/ADDGHABeAvZhJfUMIIFadv5ocvcgEakPfAHcU2yQcRyjnNfK\nrkwiMhQ4aoxJ8HQsVZQP0B142xjTDcimWBNMLT9/GmB9i2kHtADqAUM8GpQHaHL3EBHxxUrss4wx\nXzpmHxGR5o7lzYGjnorPw/oAw0RkD9awjZdgtTGHOL5mQ/nDPdZkSUCSMWaFY3oeVrLX88dyKbDb\nGJNsjCkAvsQ6p2rV+aPJ3QMc7ccfAFuMMVNdFi3gr1GsbgK+PtexVQXGmIeMMa2MMaFYP4T9ZIy5\nAVgCXO1YrTZ/PoeB/SLS2TFrALAZPX+K7AN6ikiA4/9a0edTq84fvYnJA0TkQuBXYCN/tSk/jNXu\n/hnQBqti5rXGmFSPBFlFiEg/4D5jzFARaY91Jd8QWAvcaIzJ82R8niIi0Vg/NvsBu4CbsS7W9PwB\nRORJ4Dqsnmlrgb9jtbHXmvNHk7tSStVA2iyjlFI1kCZ3pZSqgTS5K6VUDaTJXSmlaiBN7kopVQNp\ncldKqRpIk7uqFURknIi8Uc46T4nIpRV4zBARuaOi9lfRRORhT8egKo8md+URLreBn8m23hUZSxFj\nzGPGmMUVuMsQoMomd6wb51QNpclduUVEQkVkk8v0fSLyhIjc7Rh0ZIOIzHEsqyci00VkpaNq4XDH\n/HEiskBEfgJ+FJHmIrJURNY5BlW4qIzjZ4nIf0VkPdBLRGJE5BcRSRCRRS41VX4WkVdd9hlXbD+B\nIrLbUdsHEQkqmhaRmSJytWP+HhF5UkTWiMhGEenimN/EMRDGn45qjHtFpHEpYT8PnO+I5UWxvOiI\na6OIXFfOZz7Zsd56EXneMS9aRP5wfN5fFQ3I4XjfsY7XjR11eYo+8y9F5H9iDeLxH8f857FK4q4T\nkVllxaGqKWOMPvRR7gMIBTa5TN8HPAEcBOo45oU4nv+NdWs3WFev27Eq843DKnrV0LHsXuARx2tv\nILCM4xus2+kBfIHfgCaO6euA6Y7XPwPvOV5fXBSz49hvOF7PAEY4Xo8H/ut4PRO42vF6D3CX4/Ud\nwPuO128ADzleD3HE1djNz2wk8IPjvTbDqoHSvJRtL3O8xwDHdNFntgHo63j9FPCKy/uOdbxuDOxx\ned+7gGCgLlZZgtaOZVmePq/0UXkPvXJXZ2sDMEtEbsSq4wEwCHhQRNZhJZ26WPVOAH4wf9U7WQXc\nLCJPABHGGrikNHasKpoAnYFw4AfHMaZgVfkrMhvAGLMUCBKRkGL7eh+rFguO5xmlHLOoWmcCVqIG\nuBCrPgnGmP9hjejjrguB2cYYuzHmCPAL0KOUdS8FZhhjchzHShVrkI4QY8wvjnU+xPoDVp4fjTEZ\nxphcrAJabU8jZlVNaXJX7rJx8vlS1/F8BfAmVsnZVY62dAFGGmOiHY82xpii4fKyi3bgSL4XY5Ve\nnSkiY8s4fq4xxu54LcCfLvuPMMYMclm3eMGkk6aNMcuBUEdRMm9jzCZKVlRUyo5VQ70qc/33qVts\nmWtxrOrwXlQF0OSu3HUEaCoijUSkDjAU6/xpbYxZAkzG+upfH1gE3OUot4qIdCtphyLSFjhijHkP\n62q6u5uxbAOaiEgvx358RaSry/LrHPMvBDKMMRkl7OMj4FNKv2ovzXLgWsf+BwFlDUKdCQS6TP8K\nXCfWwN9NsP6wrSxl2x+wvtUEOI7V0PE+0lx+mxiDdfUPVjNSjOP11binoOi3B1Xz6F9w5RZjTIGI\nPIWVjA4AW7Hajj9xNBcI8JoxJl1EnsYaJm+DiHgBu7H+GBTXD7hfRAqALKCsK3fXWPIdP3y+5ji2\nj+N4fzpWyRWRtVht87eUsptZwDM4mnBOw5PAbBEZA/yONZxdic1JxpgUEVnu+CH6O6xhA3sB67G+\nTTxgrNrsJW37P7HK+q4WkXxgIVbvlpuAdxxJv6jUL1jDyn0mIuOBb918L9Ow/o3WGKtevqpBtOSv\nqts0dyIAAACfSURBVFFE5Ges+u+ry1nvamC4MWbMae6/DmA3xtgc3xzeNsZEn3HASlUSvXJXtY6I\nvI7VG+XyM9i8DdYVsheQD/yjImNTqqLolbuqUkRkBVCn2OwxxpiNnojHHSLSCPixhEUDjDEp5Wwb\nAXxcbHaeMSa+ouJTtZMmd6WUqoG0t4xSStVAmtyVUqoG0uSulFI1kCZ3pZSqgTS5K6VUDfT/xaCK\n3f46V0UAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%matplotlib inline\n", "replies_grouped_by_users_replying_to_df[['cumulative_reply_to_count_sum_percentage', 'cumulative_replied_to_users_percentage']].plot()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Cut off the tail.\n", "Removes users that were only replied to by 1 user." ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "5621" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reply_to_summary_df.drop(reply_to_summary_df[reply_to_summary_df.users_replying_to_count == 1].index, inplace=True)\n", "reply_to_summary_df['reply_to_screen_name'].count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Approach 1: By reply to count" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Top accounts (by reply to count)\n", "Unknown for type indicates that it is not matched with an known Twitter account." ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
reply_to_countreply_to_screen_nametypeusers_replying_to_countpercent_of_users_replying_to
user_id
228915642373chrisgeidnerjournalists500.033113
38174011516ericgellerjournalists590.039073
118130765881dylanlscottjournalists710.047020
46557945848StevenTDennisjournalists630.041722
275207082734AlexParkerDCjournalists240.015894
17466186707tomlobiancojournalists170.011258
906734342694KimberlyRobinsnjournalists70.004636
19847765647sahilkapurjournalists340.022517
398088661616MEPFullerjournalists940.062252
20176845535heathdwilliamsother30.001987
493756786512amir_anasrjournalists220.014570
46555511494Alex_Panettajournalists20.001325
26559241491fordmjournalists270.017881
317980134462CraigCaplanjournalists90.005960
17907987451timkmakjournalists290.019205
103016675435AaronMehtajournalists400.026490
14597239370TonyRommjournalists550.036424
47758416359marissaaevansjournalists20.001325
21696279347brianbeutlerjournalists370.024503
52392666341ZoeTillmanjournalists90.005960
225265639333ddale8journalists160.010596
16285830330philewingjournalists100.006623
227790723314RichardRubinDCjournalists390.025828
15146659305JSwiftTWSjournalists340.022517
23332846299mattzapjournalists40.002649
90478926290MikeSacksEsqjournalists210.013907
16061946283kelmejjournalists210.013907
29771100276lawrencehurleyjournalists250.016556
63717541263phillyrich1journalists20.001325
1337271255darthother320.021192
46955476243GrahamDavidAjournalists410.027152
14362404242bradheathjournalists100.006623
158072303240ValerieInsinnajournalists190.012583
16459325237ryanbeckwithjournalists470.031126
19186003230seungminkimjournalists720.047682
950531230pbumpjournalists280.018543
22429979222nycsouthpawpundit310.020530
12245632222jackshaferjournalists380.025166
23664429221dnvolzjournalists220.014570
46213956218JamilSmithjournalists70.004636
80111587217JeffYoungjournalists310.020530
407013776214burgessevjournalists600.039735
11771512213OKnoxjournalists580.038411
16244449212jbarrojournalists670.044371
16125224209ByronTaujournalists590.039073
36607254207Oriana0214journalists200.013245
437019753206TimothyNoah1journalists120.007947
269911034206YAppelbaumjournalists120.007947
14529929193jaketapperjournalists390.025828
48120914186SopanDebjournalists780.051656
\n", "
" ], "text/plain": [ " reply_to_count reply_to_screen_name type \\\n", "user_id \n", "22891564 2373 chrisgeidner journalists \n", "3817401 1516 ericgeller journalists \n", "118130765 881 dylanlscott journalists \n", "46557945 848 StevenTDennis journalists \n", "275207082 734 AlexParkerDC journalists \n", "17466186 707 tomlobianco journalists \n", "906734342 694 KimberlyRobinsn journalists \n", "19847765 647 sahilkapur journalists \n", "398088661 616 MEPFuller journalists \n", "20176845 535 heathdwilliams other \n", "493756786 512 amir_anasr journalists \n", "46555511 494 Alex_Panetta journalists \n", "26559241 491 fordm journalists \n", "317980134 462 CraigCaplan journalists \n", "17907987 451 timkmak journalists \n", "103016675 435 AaronMehta journalists \n", "14597239 370 TonyRomm journalists \n", "47758416 359 marissaaevans journalists \n", "21696279 347 brianbeutler journalists \n", "52392666 341 ZoeTillman journalists \n", "225265639 333 ddale8 journalists \n", "16285830 330 philewing journalists \n", "227790723 314 RichardRubinDC journalists \n", "15146659 305 JSwiftTWS journalists \n", "23332846 299 mattzap journalists \n", "90478926 290 MikeSacksEsq journalists \n", "16061946 283 kelmej journalists \n", "29771100 276 lawrencehurley journalists \n", "63717541 263 phillyrich1 journalists \n", "1337271 255 darth other \n", "46955476 243 GrahamDavidA journalists \n", "14362404 242 bradheath journalists \n", "158072303 240 ValerieInsinna journalists \n", "16459325 237 ryanbeckwith journalists \n", "19186003 230 seungminkim journalists \n", "950531 230 pbump journalists \n", "22429979 222 nycsouthpaw pundit \n", "12245632 222 jackshafer journalists \n", "23664429 221 dnvolz journalists \n", "46213956 218 JamilSmith journalists \n", "80111587 217 JeffYoung journalists \n", "407013776 214 burgessev journalists \n", "11771512 213 OKnox journalists \n", "16244449 212 jbarro journalists \n", "16125224 209 ByronTau journalists \n", "36607254 207 Oriana0214 journalists \n", "437019753 206 TimothyNoah1 journalists \n", "269911034 206 YAppelbaum journalists \n", "14529929 193 jaketapper journalists \n", "48120914 186 SopanDeb journalists \n", "\n", " users_replying_to_count percent_of_users_replying_to \n", "user_id \n", "22891564 50 0.033113 \n", "3817401 59 0.039073 \n", "118130765 71 0.047020 \n", "46557945 63 0.041722 \n", "275207082 24 0.015894 \n", "17466186 17 0.011258 \n", "906734342 7 0.004636 \n", "19847765 34 0.022517 \n", "398088661 94 0.062252 \n", "20176845 3 0.001987 \n", "493756786 22 0.014570 \n", "46555511 2 0.001325 \n", "26559241 27 0.017881 \n", "317980134 9 0.005960 \n", "17907987 29 0.019205 \n", "103016675 40 0.026490 \n", "14597239 55 0.036424 \n", "47758416 2 0.001325 \n", "21696279 37 0.024503 \n", "52392666 9 0.005960 \n", "225265639 16 0.010596 \n", "16285830 10 0.006623 \n", "227790723 39 0.025828 \n", "15146659 34 0.022517 \n", "23332846 4 0.002649 \n", "90478926 21 0.013907 \n", "16061946 21 0.013907 \n", "29771100 25 0.016556 \n", "63717541 2 0.001325 \n", "1337271 32 0.021192 \n", "46955476 41 0.027152 \n", "14362404 10 0.006623 \n", "158072303 19 0.012583 \n", "16459325 47 0.031126 \n", "19186003 72 0.047682 \n", "950531 28 0.018543 \n", "22429979 31 0.020530 \n", "12245632 38 0.025166 \n", "23664429 22 0.014570 \n", "46213956 7 0.004636 \n", "80111587 31 0.020530 \n", "407013776 60 0.039735 \n", "11771512 58 0.038411 \n", "16244449 67 0.044371 \n", "16125224 59 0.039073 \n", "36607254 20 0.013245 \n", "437019753 12 0.007947 \n", "269911034 12 0.007947 \n", "14529929 39 0.025828 \n", "48120914 78 0.051656 " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reply_to_summary_df.sort_values('reply_to_count', ascending=False).head(50)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Account types (by reply to count)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
reply_to_counttype_percentage
type
journalists590320.717880
unknown128900.156754
pundit32760.039839
other_political16870.020515
other15770.019178
academic11950.014532
ngo11310.013754
media6050.007357
politicians3330.004050
business2650.003223
cultural1270.001544
government1130.001374
\n", "
" ], "text/plain": [ " reply_to_count type_percentage\n", "type \n", "journalists 59032 0.717880\n", "unknown 12890 0.156754\n", "pundit 3276 0.039839\n", "other_political 1687 0.020515\n", "other 1577 0.019178\n", "academic 1195 0.014532\n", "ngo 1131 0.013754\n", "media 605 0.007357\n", "politicians 333 0.004050\n", "business 265 0.003223\n", "cultural 127 0.001544\n", "government 113 0.001374" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "types_by_reply_to_count_df = reply_to_summary_df[['type', 'reply_to_count']].groupby('type').sum()\n", "types_by_reply_to_count_df['type_percentage']= types_by_reply_to_count_df['reply_to_count'] / types_by_reply_to_count_df['reply_to_count'].sum()\n", "types_by_reply_to_count_df.sort_values('reply_to_count', ascending=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Approach 2: Per user\n", "Replies by type per user." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Add type by merging screen name lookup" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tweet_iduser_idscreen_namereply_to_user_idreply_to_screen_namereply_to_tweet_idtweet_created_attype
0847428582821449730780221130loren_duggan140286364nielslesniewski8474245770093690942017-03-30 12:41:33+00:00journalists
184647217990255001729607664adamliptak106729916espinsegall8464716747699363842017-03-27 21:21:09+00:00academic
284635729001809920029607664adamliptak147586500EdWhelanEPPC8463565763992125442017-03-27 13:44:37+00:00academic
38477898856920186909484732amacker26117379scottpllc8470462842970316812017-03-31 12:37:14+00:00unknown
48474864917270855689484732amacker9484732amacker8474862111742197762017-03-30 16:31:39+00:00journalists
\n", "
" ], "text/plain": [ " tweet_id user_id screen_name reply_to_user_id \\\n", "0 847428582821449730 780221130 loren_duggan 140286364 \n", "1 846472179902550017 29607664 adamliptak 106729916 \n", "2 846357290018099200 29607664 adamliptak 147586500 \n", "3 847789885692018690 9484732 amacker 26117379 \n", "4 847486491727085568 9484732 amacker 9484732 \n", "\n", " reply_to_screen_name reply_to_tweet_id tweet_created_at \\\n", "0 nielslesniewski 847424577009369094 2017-03-30 12:41:33+00:00 \n", "1 espinsegall 846471674769936384 2017-03-27 21:21:09+00:00 \n", "2 EdWhelanEPPC 846356576399212544 2017-03-27 13:44:37+00:00 \n", "3 scottpllc 847046284297031681 2017-03-31 12:37:14+00:00 \n", "4 amacker 847486211174219776 2017-03-30 16:31:39+00:00 \n", "\n", " type \n", "0 journalists \n", "1 academic \n", "2 academic \n", "3 unknown \n", "4 journalists " ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reply_all_join_df = pd.merge(reply_df, user_type_lookup_df[['type']], how='left', left_on='reply_to_user_id', right_index=True)\n", "reply_all_join_df['type'].fillna('unknown', inplace=True)\n", "# Drop tail\n", "reply_all_join_df = reply_all_join_df[reply_all_join_df.reply_to_user_id.isin(reply_to_summary_df.index)]\n", "reply_all_join_df.head()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
typeacademicbusinessculturalgovernmentjournalistsmediangootherother_politicalpoliticians...cultural_percentgovernment_percentjournalists_percentmedia_percentngo_percentother_percentother_political_percentpoliticians_percentpundit_percentunknown_percent
user_id
1001653780.00.013.00.04.00.00.00.01.01.0...0.520.0000000.1600000.00.0000000.00.040.040.0000000.240000
10019918650.00.00.00.00.00.00.00.00.00.0...0.000.0000000.0000000.00.0000000.00.000.000.0000001.000000
10022298620.00.00.00.03.00.00.00.00.00.0...0.000.0000000.7500000.00.0000000.00.000.000.0000000.250000
1008020890.00.00.00.01.00.00.00.00.00.0...0.000.0000000.3333330.00.0000000.00.000.000.0000000.666667
1008607900.00.00.00.04.00.00.00.00.00.0...0.000.0000000.5000000.00.0000000.00.000.000.0000000.500000
10097492292.00.00.02.073.00.04.00.00.00.0...0.000.0240960.8795180.00.0481930.00.000.000.0240960.000000
10137852200.00.00.00.01.00.00.00.00.00.0...0.000.0000001.0000000.00.0000000.00.000.000.0000000.000000
1021716910.00.00.01.05.00.00.00.00.00.0...0.000.1428570.7142860.00.0000000.00.000.000.0000000.142857
1022389970.00.00.00.01.00.00.01.00.00.0...0.000.0000000.5000000.00.0000000.50.000.000.0000000.000000
1029947400.00.00.00.01.00.00.00.00.00.0...0.000.0000001.0000000.00.0000000.00.000.000.0000000.000000
\n", "

10 rows × 25 columns

\n", "
" ], "text/plain": [ "type academic business cultural government journalists media ngo \\\n", "user_id \n", "100165378 0.0 0.0 13.0 0.0 4.0 0.0 0.0 \n", "1001991865 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "1002229862 0.0 0.0 0.0 0.0 3.0 0.0 0.0 \n", "100802089 0.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", "100860790 0.0 0.0 0.0 0.0 4.0 0.0 0.0 \n", "1009749229 2.0 0.0 0.0 2.0 73.0 0.0 4.0 \n", "1013785220 0.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", "102171691 0.0 0.0 0.0 1.0 5.0 0.0 0.0 \n", "102238997 0.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", "102994740 0.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", "\n", "type other other_political politicians ... \\\n", "user_id ... \n", "100165378 0.0 1.0 1.0 ... \n", "1001991865 0.0 0.0 0.0 ... \n", "1002229862 0.0 0.0 0.0 ... \n", "100802089 0.0 0.0 0.0 ... \n", "100860790 0.0 0.0 0.0 ... \n", "1009749229 0.0 0.0 0.0 ... \n", "1013785220 0.0 0.0 0.0 ... \n", "102171691 0.0 0.0 0.0 ... \n", "102238997 1.0 0.0 0.0 ... \n", "102994740 0.0 0.0 0.0 ... \n", "\n", "type cultural_percent government_percent journalists_percent \\\n", "user_id \n", "100165378 0.52 0.000000 0.160000 \n", "1001991865 0.00 0.000000 0.000000 \n", "1002229862 0.00 0.000000 0.750000 \n", "100802089 0.00 0.000000 0.333333 \n", "100860790 0.00 0.000000 0.500000 \n", "1009749229 0.00 0.024096 0.879518 \n", "1013785220 0.00 0.000000 1.000000 \n", "102171691 0.00 0.142857 0.714286 \n", "102238997 0.00 0.000000 0.500000 \n", "102994740 0.00 0.000000 1.000000 \n", "\n", "type media_percent ngo_percent other_percent \\\n", "user_id \n", "100165378 0.0 0.000000 0.0 \n", "1001991865 0.0 0.000000 0.0 \n", "1002229862 0.0 0.000000 0.0 \n", "100802089 0.0 0.000000 0.0 \n", "100860790 0.0 0.000000 0.0 \n", "1009749229 0.0 0.048193 0.0 \n", "1013785220 0.0 0.000000 0.0 \n", "102171691 0.0 0.000000 0.0 \n", "102238997 0.0 0.000000 0.5 \n", "102994740 0.0 0.000000 0.0 \n", "\n", "type other_political_percent politicians_percent pundit_percent \\\n", "user_id \n", "100165378 0.04 0.04 0.000000 \n", "1001991865 0.00 0.00 0.000000 \n", "1002229862 0.00 0.00 0.000000 \n", "100802089 0.00 0.00 0.000000 \n", "100860790 0.00 0.00 0.000000 \n", "1009749229 0.00 0.00 0.024096 \n", "1013785220 0.00 0.00 0.000000 \n", "102171691 0.00 0.00 0.000000 \n", "102238997 0.00 0.00 0.000000 \n", "102994740 0.00 0.00 0.000000 \n", "\n", "type unknown_percent \n", "user_id \n", "100165378 0.240000 \n", "1001991865 1.000000 \n", "1002229862 0.250000 \n", "100802089 0.666667 \n", "100860790 0.500000 \n", "1009749229 0.000000 \n", "1013785220 0.000000 \n", "102171691 0.142857 \n", "102238997 0.000000 \n", "102994740 0.000000 \n", "\n", "[10 rows x 25 columns]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reply_summary_by_user_df = reply_all_join_df.groupby([reply_all_join_df.user_id, reply_all_join_df.type]).size().unstack().fillna(0)\n", "# Add a total column\n", "reply_summary_by_user_df['total'] = reply_summary_by_user_df.sum(axis=1)\n", "for col_name in reply_summary_by_user_df.columns[:-1]:\n", " reply_summary_by_user_df['{}_percent'.format(col_name)] = reply_summary_by_user_df[col_name] / reply_summary_by_user_df.total\n", "reply_summary_by_user_df.head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Average of percent of replies by type for each user\n", "That is, for each user determine the percent of replies by type. Then take the average of each type.\n", "\n", "Thus, this mention analysis is on a per-user basis, accounting for how prolific a tweeter a user is. (That is, users who tweet aren't weighed more heavily.)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "type\n", "academic_percent 0.018718\n", "business_percent 0.007318\n", "cultural_percent 0.004393\n", "government_percent 0.004183\n", "journalists_percent 0.620576\n", "media_percent 0.023570\n", "ngo_percent 0.016683\n", "other_percent 0.010210\n", "other_political_percent 0.019884\n", "politicians_percent 0.006863\n", "pundit_percent 0.019340\n", "unknown_percent 0.248261\n", "dtype: float64" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reply_summary_by_user_df.filter(axis=1, regex=\"_percent$\").mean()" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "## Approach 3: By count of users replying to\n", "The number of users that replied to an account. Thus, each user counts as 1, even if that user made multiple replies to the account.\n", "\n", "This weights an account that is replied to a 100 users more heavily than an account that is replied to a 100 times by a single user." ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
reply_to_countreply_to_screen_nametypeusers_replying_to_countpercent_of_users_replying_to
user_id
398088661616MEPFullerjournalists940.062252
48120914186SopanDebjournalists780.051656
93069110147maggieNYTjournalists760.050331
19186003230seungminkimjournalists720.047682
118130765881dylanlscottjournalists710.047020
13524182116daveweigeljournalists710.047020
16244449212jbarrojournalists670.044371
14412533113CillizzaCNNjournalists650.043046
19107878115GlennThrushjournalists640.042384
46557945848StevenTDennisjournalists630.041722
218325695123Bencjacobsjournalists620.041060
407013776214burgessevjournalists600.039735
16125224209ByronTaujournalists590.039073
38174011516ericgellerjournalists590.039073
11771512213OKnoxjournalists580.038411
51462013124lizzieohreallyjournalists570.037748
217550862150BresPoliticojournalists560.037086
14597239370TonyRommjournalists550.036424
4207961121chrislhayesjournalists540.035762
32625526789KFILEjournalists530.035099
\n", "
" ], "text/plain": [ " reply_to_count reply_to_screen_name type \\\n", "user_id \n", "398088661 616 MEPFuller journalists \n", "48120914 186 SopanDeb journalists \n", "93069110 147 maggieNYT journalists \n", "19186003 230 seungminkim journalists \n", "118130765 881 dylanlscott journalists \n", "13524182 116 daveweigel journalists \n", "16244449 212 jbarro journalists \n", "14412533 113 CillizzaCNN journalists \n", "19107878 115 GlennThrush journalists \n", "46557945 848 StevenTDennis journalists \n", "218325695 123 Bencjacobs journalists \n", "407013776 214 burgessev journalists \n", "16125224 209 ByronTau journalists \n", "3817401 1516 ericgeller journalists \n", "11771512 213 OKnox journalists \n", "51462013 124 lizzieohreally journalists \n", "217550862 150 BresPolitico journalists \n", "14597239 370 TonyRomm journalists \n", "4207961 121 chrislhayes journalists \n", "326255267 89 KFILE journalists \n", "\n", " users_replying_to_count percent_of_users_replying_to \n", "user_id \n", "398088661 94 0.062252 \n", "48120914 78 0.051656 \n", "93069110 76 0.050331 \n", "19186003 72 0.047682 \n", "118130765 71 0.047020 \n", "13524182 71 0.047020 \n", "16244449 67 0.044371 \n", "14412533 65 0.043046 \n", "19107878 64 0.042384 \n", "46557945 63 0.041722 \n", "218325695 62 0.041060 \n", "407013776 60 0.039735 \n", "16125224 59 0.039073 \n", "3817401 59 0.039073 \n", "11771512 58 0.038411 \n", "51462013 57 0.037748 \n", "217550862 56 0.037086 \n", "14597239 55 0.036424 \n", "4207961 54 0.035762 \n", "326255267 53 0.035099 " ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reply_to_summary_df.sort_values('users_replying_to_count', ascending=False).head(20)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "### Account types (by count of users replying to)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
users_replying_to_counttype_percentage
type
journalists159130.575682
unknown76440.276536
pundit9980.036104
other_political8830.031944
academic5100.018450
media4530.016388
ngo4510.016316
other3010.010889
politicians2030.007344
business1330.004812
cultural790.002858
government740.002677
\n", "
" ], "text/plain": [ " users_replying_to_count type_percentage\n", "type \n", "journalists 15913 0.575682\n", "unknown 7644 0.276536\n", "pundit 998 0.036104\n", "other_political 883 0.031944\n", "academic 510 0.018450\n", "media 453 0.016388\n", "ngo 451 0.016316\n", "other 301 0.010889\n", "politicians 203 0.007344\n", "business 133 0.004812\n", "cultural 79 0.002858\n", "government 74 0.002677" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "types_by_users_replying_to_df = reply_to_summary_df[['type', 'users_replying_to_count']].groupby('type').sum()\n", "types_by_users_replying_to_df['type_percentage']= types_by_users_replying_to_df['users_replying_to_count'] / types_by_users_replying_to_df['users_replying_to_count'].sum()\n", "types_by_users_replying_to_df.sort_values('users_replying_to_count', ascending=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Unknown accounts\n", "Remember, the tail has been cut off" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Number of unknown accounts" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "reply_to_count 3120\n", "reply_to_screen_name 3120\n", "type 3120\n", "users_replying_to_count 3120\n", "percent_of_users_replying_to 3120\n", "dtype: int64" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ " reply_to_summary_df[reply_to_summary_df.type == 'unknown'].count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Number of known accounts" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "reply_to_count 2501\n", "reply_to_screen_name 2501\n", "type 2501\n", "users_replying_to_count 2501\n", "percent_of_users_replying_to 2501\n", "dtype: int64" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ " reply_to_summary_df[reply_to_summary_df.type != 'unknown'].count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Top unknown by reploy to count that are replied to by at least 5 users" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
reply_to_screen_namereply_to_countusers_replying_to_count
user_id
1173121356xenocryptsite175
18111042michaelpfreeman115
39100192Southfive115
415794979AndStrats105
166207886BrianLaslie105
4440118883luke_j_obrien106
48585729cam_mason106
14372270mcbyrne95
338164741bsdtectr95
55038792ELSchillinger95
152145921jason_howerton95
15111062thomaswright0895
14668111BGrueskin95
8790702dhm96
80669530bungdan96
593909348mis212797
2936965923RobertKYarbro95
6576292jhaverly97
6931262rachsyme95
21700839BharatKrishnan86
19087309AlexKoppelman85
1922583464SpectatrCitizen85
26377458pcdunham86
241280143CommsDirector85
20097201eorden86
34643610EricBoehlert87
29090846_Drew_McCoy_86
23141473calvinstowell85
343063239Carter_PE87
21093964TiffanyHaverly86
392705809econwonk86
32071013DeanClancy85
166782000henrycobb87
56701775DavidRutz85
59133139keithcrc87
108338399lukeoneil4786
24972610K_Schallhorn85
15826886CarolBlymire87
71569841JoshSchwerin87
4120521028scoejarborough85
14364006jrosenbaum86
278175882leximccammond75
8475532sdkstl75
61664932jasonahart75
40804509matthewjsinger76
296513648jdubya6576
51639553sdjacksondc75
475957325DamonLinker75
47020338VincentMorris75
384841636hash_said76
\n", "
" ], "text/plain": [ " reply_to_screen_name reply_to_count users_replying_to_count\n", "user_id \n", "1173121356 xenocryptsite 17 5\n", "18111042 michaelpfreeman 11 5\n", "39100192 Southfive 11 5\n", "415794979 AndStrats 10 5\n", "166207886 BrianLaslie 10 5\n", "4440118883 luke_j_obrien 10 6\n", "48585729 cam_mason 10 6\n", "14372270 mcbyrne 9 5\n", "338164741 bsdtectr 9 5\n", "55038792 ELSchillinger 9 5\n", "152145921 jason_howerton 9 5\n", "15111062 thomaswright08 9 5\n", "14668111 BGrueskin 9 5\n", "8790702 dhm 9 6\n", "80669530 bungdan 9 6\n", "593909348 mis2127 9 7\n", "2936965923 RobertKYarbro 9 5\n", "6576292 jhaverly 9 7\n", "6931262 rachsyme 9 5\n", "21700839 BharatKrishnan 8 6\n", "19087309 AlexKoppelman 8 5\n", "1922583464 SpectatrCitizen 8 5\n", "26377458 pcdunham 8 6\n", "241280143 CommsDirector 8 5\n", "20097201 eorden 8 6\n", "34643610 EricBoehlert 8 7\n", "29090846 _Drew_McCoy_ 8 6\n", "23141473 calvinstowell 8 5\n", "343063239 Carter_PE 8 7\n", "21093964 TiffanyHaverly 8 6\n", "392705809 econwonk 8 6\n", "32071013 DeanClancy 8 5\n", "166782000 henrycobb 8 7\n", "56701775 DavidRutz 8 5\n", "59133139 keithcrc 8 7\n", "108338399 lukeoneil47 8 6\n", "24972610 K_Schallhorn 8 5\n", "15826886 CarolBlymire 8 7\n", "71569841 JoshSchwerin 8 7\n", "4120521028 scoejarborough 8 5\n", "14364006 jrosenbaum 8 6\n", "278175882 leximccammond 7 5\n", "8475532 sdkstl 7 5\n", "61664932 jasonahart 7 5\n", "40804509 matthewjsinger 7 6\n", "296513648 jdubya65 7 6\n", "51639553 sdjacksondc 7 5\n", "475957325 DamonLinker 7 5\n", "47020338 VincentMorris 7 5\n", "384841636 hash_said 7 6" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top_not_known_reply_to_df = reply_to_summary_df[(reply_to_summary_df.type == 'unknown') & (reply_to_summary_df.users_replying_to_count >= 5)].sort_values('reply_to_count', ascending=False)[['reply_to_screen_name', 'reply_to_count', 'users_replying_to_count']]\n", "top_not_known_reply_to_df.head(50)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Write top accounts to file" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": true }, "outputs": [], "source": [ "top_not_known_reply_to_df.to_csv('unknown_replies.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }