{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Tweet summary" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Prepare the tweet data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load the tweets" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:root:Loading from tweets/6b6a0be4f70640648b56447b387f17a2_001.json.gz\n", "DEBUG:root:Loaded 50000\n", "DEBUG:root:Loaded 100000\n", "DEBUG:root:Loaded 150000\n", "DEBUG:root:Loaded 200000\n", "DEBUG:root:Loaded 250000\n", "INFO:root:Loading from tweets/6b6a0be4f70640648b56447b387f17a2_002.json.gz\n", "INFO:root:Loading from tweets/7bff8603fb4a49d5953197361d548346_001.json.gz\n", "DEBUG:root:Loaded 300000\n", "DEBUG:root:Loaded 350000\n", "DEBUG:root:Loaded 400000\n", "DEBUG:root:Loaded 450000\n", "INFO:root:Loading from tweets/b3f330f5b6cc4572b6d7dabc3752b2b9_001.json.gz\n", "DEBUG:root:Loaded 500000\n", "DEBUG:root:Loaded 550000\n", "DEBUG:root:Loaded 600000\n", "DEBUG:root:Loaded 650000\n" ] }, { "data": { "text/plain": [ "tweet_id 650350\n", "user_id 650350\n", "screen_name 650350\n", "tweet_created_at 650350\n", "user_created_at 650350\n", "tweets_to_date 650350\n", "tweet_type 650350\n", "dtype: int64" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%matplotlib inline\n", "import pandas as pd\n", "import numpy as np\n", "import logging\n", "from dateutil.parser import parse as date_parse\n", "from utils import load_tweet_df, tweet_type\n", "import matplotlib.pyplot as plt\n", "\n", "\n", "logger = logging.getLogger()\n", "logger.setLevel(logging.DEBUG)\n", "\n", "# Set float format so doesn't display scientific notation\n", "pd.options.display.float_format = '{:20,.2f}'.format\n", "\n", "def tweet_transform(tweet):\n", " return {\n", " 'tweet_id': tweet['id_str'], \n", " 'tweet_created_at': date_parse(tweet['created_at']),\n", " 'user_id': tweet['user']['id_str'],\n", " 'screen_name': tweet['user']['screen_name'],\n", " 'user_created_at': date_parse(tweet['user']['created_at']),\n", " 'tweets_to_date': tweet['user']['statuses_count'],\n", " 'tweet_type': tweet_type(tweet)\n", " }\n", "\n", "tweet_df = load_tweet_df(tweet_transform, ['tweet_id', 'user_id', 'screen_name', 'tweet_created_at', \n", " 'user_created_at', 'tweets_to_date', 'tweet_type'])\n", "tweet_df.count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### View the top of the data." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tweet_iduser_idscreen_nametweet_created_atuser_created_attweets_to_datetweet_type
08594633820423782402343897943AmberCStrong2017-05-02 17:43:32+00:002014-02-14 17:33:36+00:001701original
1859803200152588288307982591JaxAlemany2017-05-03 16:13:51+00:002011-05-30 16:43:13+00:006328original
2859788527705493504307982591JaxAlemany2017-05-03 15:15:33+00:002011-05-30 16:43:13+00:006328quote
3859788479076732930307982591JaxAlemany2017-05-03 15:15:22+00:002011-05-30 16:43:13+00:006328original
4859781841955500032307982591JaxAlemany2017-05-03 14:48:59+00:002011-05-30 16:43:13+00:006328retweet
\n", "
" ], "text/plain": [ " tweet_id user_id screen_name tweet_created_at \\\n", "0 859463382042378240 2343897943 AmberCStrong 2017-05-02 17:43:32+00:00 \n", "1 859803200152588288 307982591 JaxAlemany 2017-05-03 16:13:51+00:00 \n", "2 859788527705493504 307982591 JaxAlemany 2017-05-03 15:15:33+00:00 \n", "3 859788479076732930 307982591 JaxAlemany 2017-05-03 15:15:22+00:00 \n", "4 859781841955500032 307982591 JaxAlemany 2017-05-03 14:48:59+00:00 \n", "\n", " user_created_at tweets_to_date tweet_type \n", "0 2014-02-14 17:33:36+00:00 1701 original \n", "1 2011-05-30 16:43:13+00:00 6328 original \n", "2 2011-05-30 16:43:13+00:00 6328 quote \n", "3 2011-05-30 16:43:13+00:00 6328 original \n", "4 2011-05-30 16:43:13+00:00 6328 retweet " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tweet_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Prepare the user data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Tweets in dataset for each user" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tweet_typeoriginalquotereplyretweettweets_in_datasettweets_in_dataset_bin
user_id
100199186512.001.003.0035.0051.00Bottom 90%
100222986235.005.002.0099.00141.00Bottom 90%
1008020894.003.005.0012.0024.00Bottom 90%
100860790117.0019.009.00215.00360.00Bottom 90%
100974922979.0085.0034.00156.00354.00Bottom 90%
\n", "
" ], "text/plain": [ "tweet_type original quote reply \\\n", "user_id \n", "1001991865 12.00 1.00 3.00 \n", "1002229862 35.00 5.00 2.00 \n", "100802089 4.00 3.00 5.00 \n", "100860790 117.00 19.00 9.00 \n", "1009749229 79.00 85.00 34.00 \n", "\n", "tweet_type retweet tweets_in_dataset tweets_in_dataset_bin \n", "user_id \n", "1001991865 35.00 51.00 Bottom 90% \n", "1002229862 99.00 141.00 Bottom 90% \n", "100802089 12.00 24.00 Bottom 90% \n", "100860790 215.00 360.00 Bottom 90% \n", "1009749229 156.00 354.00 Bottom 90% " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user_tweet_count_df = tweet_df[['user_id', 'tweet_type']].groupby(['user_id', 'tweet_type']).size().unstack()\n", "user_tweet_count_df.fillna(0, inplace=True)\n", "user_tweet_count_df['tweets_in_dataset'] = user_tweet_count_df.original + user_tweet_count_df.quote + user_tweet_count_df.reply + user_tweet_count_df.retweet\n", "user_tweet_count_df['tweets_in_dataset_bin'] = pd.qcut(user_tweet_count_df.tweets_in_dataset, [0, .9, .99, 1.], labels=['Bottom 90%', 'Middle 9%', 'Top 1%'])\n", "user_tweet_count_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load and join user info\n", "This is information that was coded in the spreadsheet or looked up for each user via API." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "screen_name 2484\n", "name 2484\n", "organization 2455\n", "position 2481\n", "gender 2483\n", "followers_count 2484\n", "following_count 2484\n", "tweet_count 2484\n", "user_created_at 2484\n", "verified 2484\n", "protected 2484\n", "dtype: int64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user_info_df = pd.read_csv('source_data/user_info_lookup.csv', names=['screen_name', 'user_id', 'name', 'organization', 'position',\n", " 'gender', 'followers_count', 'following_count', 'tweet_count',\n", " 'user_created_at', 'verified', 'protected'],\n", " dtype={'user_id': str}).set_index(['user_id'])\n", "user_info_df.count()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
screen_namenameorganizationpositiongenderfollowers_countfollowing_counttweet_countuser_created_atverifiedprotected
user_id
20711445ninglinGlinski, NinaNaNFreelance ReporterF968507909Thu Feb 12 20:00:53 +0000 2009FalseFalse
258917371davidjendersEnders, DavidNaNJournalistM14514806299Mon Feb 28 19:52:03 +0000 2011TrueFalse
297046834mattbarakatBarakat, MatthewAssociated PressNorthern Virginia CorrespondentM754349620Wed May 11 20:55:24 +0000 2011TrueFalse
455585786kimberlyeatkinsAtkins, KimberlyBoston HeraldChief Washington Reporter/ColumnistF239926615846Thu Jan 05 08:26:46 +0000 2012TrueFalse
42584840toulavlahouVlahou, ToulaCQ Roll CallEditor & Podcast ProducerF27131986325Tue May 26 07:41:38 +0000 2009FalseFalse
\n", "
" ], "text/plain": [ " screen_name name organization \\\n", "user_id \n", "20711445 ninglin Glinski, Nina NaN \n", "258917371 davidjenders Enders, David NaN \n", "297046834 mattbarakat Barakat, Matthew Associated Press \n", "455585786 kimberlyeatkins Atkins, Kimberly Boston Herald \n", "42584840 toulavlahou Vlahou, Toula CQ Roll Call \n", "\n", " position gender followers_count \\\n", "user_id \n", "20711445 Freelance Reporter F 968 \n", "258917371 Journalist M 1451 \n", "297046834 Northern Virginia Correspondent M 754 \n", "455585786 Chief Washington Reporter/Columnist F 2399 \n", "42584840 Editor & Podcast Producer F 2713 \n", "\n", " following_count tweet_count user_created_at \\\n", "user_id \n", "20711445 507 909 Thu Feb 12 20:00:53 +0000 2009 \n", "258917371 480 6299 Mon Feb 28 19:52:03 +0000 2011 \n", "297046834 349 620 Wed May 11 20:55:24 +0000 2011 \n", "455585786 2661 5846 Thu Jan 05 08:26:46 +0000 2012 \n", "42584840 198 6325 Tue May 26 07:41:38 +0000 2009 \n", "\n", " verified protected \n", "user_id \n", "20711445 False False \n", "258917371 True False \n", "297046834 True False \n", "455585786 True False \n", "42584840 False False " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user_info_df.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "screen_name 2484\n", "name 2484\n", "organization 2484\n", "position 2481\n", "gender 2483\n", "followers_count 2484\n", "following_count 2484\n", "tweet_count 2484\n", "user_created_at 2484\n", "verified 2484\n", "protected 2484\n", "original 2484\n", "quote 2484\n", "reply 2484\n", "retweet 2484\n", "tweets_in_dataset 2484\n", "tweets_in_dataset_bin 2272\n", "dtype: int64" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Join\n", "user_summary_df = user_info_df.join(user_tweet_count_df, how='left')\n", "# Fill Nans\n", "user_summary_df['organization'].fillna('', inplace=True)\n", "user_summary_df['original'].fillna(0, inplace=True)\n", "user_summary_df['quote'].fillna(0, inplace=True)\n", "user_summary_df['reply'].fillna(0, inplace=True)\n", "user_summary_df['retweet'].fillna(0, inplace=True)\n", "user_summary_df['tweets_in_dataset'].fillna(0, inplace=True)\n", "user_summary_df.count()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
screen_namenameorganizationpositiongenderfollowers_countfollowing_counttweet_countuser_created_atverifiedprotectedoriginalquotereplyretweettweets_in_datasettweets_in_dataset_bin
user_id
20711445ninglinGlinski, NinaFreelance ReporterF968507909Thu Feb 12 20:00:53 +0000 2009FalseFalse0.000.000.000.000.00NaN
258917371davidjendersEnders, DavidJournalistM14514806299Mon Feb 28 19:52:03 +0000 2011TrueFalse0.000.000.000.000.00NaN
297046834mattbarakatBarakat, MatthewAssociated PressNorthern Virginia CorrespondentM754349620Wed May 11 20:55:24 +0000 2011TrueFalse12.000.000.002.0014.00Bottom 90%
455585786kimberlyeatkinsAtkins, KimberlyBoston HeraldChief Washington Reporter/ColumnistF239926615846Thu Jan 05 08:26:46 +0000 2012TrueFalse228.00144.0039.00196.00607.00Bottom 90%
42584840toulavlahouVlahou, ToulaCQ Roll CallEditor & Podcast ProducerF27131986325Tue May 26 07:41:38 +0000 2009FalseFalse32.0025.000.0025.0082.00Bottom 90%
\n", "
" ], "text/plain": [ " screen_name name organization \\\n", "user_id \n", "20711445 ninglin Glinski, Nina \n", "258917371 davidjenders Enders, David \n", "297046834 mattbarakat Barakat, Matthew Associated Press \n", "455585786 kimberlyeatkins Atkins, Kimberly Boston Herald \n", "42584840 toulavlahou Vlahou, Toula CQ Roll Call \n", "\n", " position gender followers_count \\\n", "user_id \n", "20711445 Freelance Reporter F 968 \n", "258917371 Journalist M 1451 \n", "297046834 Northern Virginia Correspondent M 754 \n", "455585786 Chief Washington Reporter/Columnist F 2399 \n", "42584840 Editor & Podcast Producer F 2713 \n", "\n", " following_count tweet_count user_created_at \\\n", "user_id \n", "20711445 507 909 Thu Feb 12 20:00:53 +0000 2009 \n", "258917371 480 6299 Mon Feb 28 19:52:03 +0000 2011 \n", "297046834 349 620 Wed May 11 20:55:24 +0000 2011 \n", "455585786 2661 5846 Thu Jan 05 08:26:46 +0000 2012 \n", "42584840 198 6325 Tue May 26 07:41:38 +0000 2009 \n", "\n", " verified protected original quote \\\n", "user_id \n", "20711445 False False 0.00 0.00 \n", "258917371 True False 0.00 0.00 \n", "297046834 True False 12.00 0.00 \n", "455585786 True False 228.00 144.00 \n", "42584840 False False 32.00 25.00 \n", "\n", " reply retweet tweets_in_dataset \\\n", "user_id \n", "20711445 0.00 0.00 0.00 \n", "258917371 0.00 0.00 0.00 \n", "297046834 0.00 2.00 14.00 \n", "455585786 39.00 196.00 607.00 \n", "42584840 0.00 25.00 82.00 \n", "\n", " tweets_in_dataset_bin \n", "user_id \n", "20711445 NaN \n", "258917371 NaN \n", "297046834 Bottom 90% \n", "455585786 Bottom 90% \n", "42584840 Bottom 90% " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user_summary_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Write to file as output/user_summary.csv" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "user_summary_df.to_csv('output/user_summary.csv')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Prepare the organization data\n", "This is for users that are members of each organization." ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "followers_count sum 347\n", " size 347\n", " average 347\n", "following_count sum 347\n", " size 347\n", " average 347\n", "tweet_count sum 347\n", " size 347\n", " average 347\n", "tweets_in_dataset sum 347\n", " size 347\n", " average 347\n", "dtype: int64" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "org_summary_df = user_summary_df[['organization', 'followers_count', 'following_count', 'tweet_count', 'tweets_in_dataset']].groupby('organization').agg([np.sum, np.size, np.average])\n", "org_summary_df.count()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
followers_countfollowing_counttweet_counttweets_in_dataset
sumsizeaveragesumsizeaveragesumsizeaveragesumsizeaverage
organization
57347291,977.4830788291,061.66151441295,222.102,767.0029.0095.41
ABC 78891889.00109211,092.00194611,946.00464.001.00464.00
ABC News6027905211,592.1272154521,387.58372200527,157.698,629.0052.00165.94
AP–Broadcast530515353.67797415531.6016794151,119.60527.0015.0035.13
Afro American Newspapers1891189.002021202.005961596.0014.001.0014.00
\n", "
" ], "text/plain": [ " followers_count \\\n", " sum size average \n", "organization \n", " 57347 29 1,977.48 \n", "ABC 7 889 1 889.00 \n", "ABC News 602790 52 11,592.12 \n", "AP–Broadcast 5305 15 353.67 \n", "Afro American Newspapers 189 1 189.00 \n", "\n", " following_count \\\n", " sum size average \n", "organization \n", " 30788 29 1,061.66 \n", "ABC 7 1092 1 1,092.00 \n", "ABC News 72154 52 1,387.58 \n", "AP–Broadcast 7974 15 531.60 \n", "Afro American Newspapers 202 1 202.00 \n", "\n", " tweet_count \\\n", " sum size average \n", "organization \n", " 151441 29 5,222.10 \n", "ABC 7 1946 1 1,946.00 \n", "ABC News 372200 52 7,157.69 \n", "AP–Broadcast 16794 15 1,119.60 \n", "Afro American Newspapers 596 1 596.00 \n", "\n", " tweets_in_dataset \\\n", " sum size \n", "organization \n", " 2,767.00 29.00 \n", "ABC 7 464.00 1.00 \n", "ABC News 8,629.00 52.00 \n", "AP–Broadcast 527.00 15.00 \n", "Afro American Newspapers 14.00 1.00 \n", "\n", " \n", " average \n", "organization \n", " 95.41 \n", "ABC 7 464.00 \n", "ABC News 165.94 \n", "AP–Broadcast 35.13 \n", "Afro American Newspapers 14.00 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "org_summary_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Write to file as output/organization_summary.csv" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "org_summary_df.to_csv('output/organization_summary.csv')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### List of organizations <--- This probably requires some cleanup" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['',\n", " 'ABC 7',\n", " 'ABC News',\n", " 'AP–Broadcast',\n", " 'Afro American Newspapers',\n", " 'Agence France Presse (AFP–TV)',\n", " 'Agence France-Presse',\n", " 'Agri-Pulse',\n", " 'Air Force Magazine',\n", " 'Alaska Dispatch News',\n", " 'Alaska Public Radio Network',\n", " 'Albuquerque Journal',\n", " 'Aljazeera America',\n", " 'Aljazeera English',\n", " 'Allentown Morning Call',\n", " 'American Banker',\n", " 'American Gaming Association',\n", " 'American Prospect',\n", " 'Argus Media',\n", " 'Army Times',\n", " 'Associated Press',\n", " 'Atlanta Journal-Consitution',\n", " 'Austin American-Statesman',\n", " 'Axios',\n", " 'BBC',\n", " 'Baltimore Sun',\n", " 'Bankrate',\n", " 'Bloomberg BNA',\n", " 'Bloomberg Government',\n", " 'Bloomberg News',\n", " 'Bloomberg TV',\n", " 'Bond Buyer',\n", " 'Boston Globe',\n", " 'Boston Herald',\n", " 'Breitbart News',\n", " 'Broadcasting & Cable',\n", " 'Buffalo News',\n", " 'BuzzFeed',\n", " 'Buzzfeed',\n", " 'CBN News',\n", " 'CBS News',\n", " 'CDC Gaming Reports',\n", " 'CEO Update',\n", " 'CNBC',\n", " 'CNN',\n", " 'CNN International',\n", " 'CNSNews.com',\n", " 'CQ Researcher',\n", " 'CQ Roll Call',\n", " 'CRTV',\n", " 'CTV–Community TV of PG County',\n", " 'Canadian Press',\n", " 'Carroll County Times',\n", " 'Center for Public Integrity',\n", " 'Charleston Post and Courier',\n", " 'Chicago Sun-Times',\n", " 'Chicago Tribune',\n", " 'Christian Science Monitor',\n", " 'Chronicle of Higher Education',\n", " 'Chronicle of Philanthropy',\n", " 'Circa',\n", " 'CityLab',\n", " 'Cleveland Plain Dealer',\n", " 'Colorado Public Radio',\n", " 'Columbus Dispatch',\n", " 'Communications Daily',\n", " 'Consumer Reports',\n", " 'Cook Political Report',\n", " 'Corporate Crime Reporter',\n", " 'Cosmopolitan',\n", " 'Court House News',\n", " 'Cox Broadcasting',\n", " 'Crain Communications',\n", " 'Cronkite News Service',\n", " 'Crux: Catholic News Agency',\n", " 'C–SPAN',\n", " 'DC Spotlight Newspaper',\n", " 'DCist',\n", " 'Daily Beast',\n", " 'Daily Caller',\n", " 'Daily Deal',\n", " 'Daily Mail',\n", " 'Daily Mail (UK)',\n", " 'Dallas Morning News',\n", " 'Defense Daily',\n", " 'Defense News',\n", " 'Defense One',\n", " 'Denver Post',\n", " 'Detroit News',\n", " 'Diverse: Issues in Higher Education',\n", " 'E! Networks',\n", " 'E&E News',\n", " 'EWTN',\n", " 'Eater',\n", " 'Economist',\n", " 'Education Week',\n", " 'Energy Daily',\n", " 'Energy Intelligence',\n", " 'Environment & Energy Publishing, LLC',\n", " \"FERN's Ag Insider\",\n", " 'FTC Watch',\n", " 'Fairchild Publications',\n", " 'Falls Church News Press',\n", " 'Famous DC',\n", " 'Feature Story News',\n", " 'FedNet',\n", " 'Federal Computer Week',\n", " 'Federal News Radio 1500 AM',\n", " 'Financial Times',\n", " 'Financial Times ',\n", " 'Fiscal Times',\n", " 'FiveThirtyEight',\n", " 'Foreign Policy',\n", " 'Fortune Magazine',\n", " 'Fox Business Network',\n", " 'Fox News',\n", " 'Fox News Radio',\n", " 'France24',\n", " 'Freelance',\n", " 'Freelance ',\n", " 'Frontline Medical Communications',\n", " 'Fusion',\n", " 'Gannett Government Media Corp',\n", " 'Gannett Washington Bureau',\n", " 'Glamour Magazine',\n", " 'Global Competition Review',\n", " 'Globe and Mail',\n", " 'Governing',\n", " 'Government Executive',\n", " 'Gray Television',\n", " 'Guardian US',\n", " 'Haddad Media',\n", " 'Hearst Newspapers',\n", " 'Hearst Television Inc.',\n", " 'Hispanic Outlook',\n", " 'Honolulu Civil Beat',\n", " 'Houston Chronicle',\n", " 'Huffington Post',\n", " 'IDG Communications',\n", " 'IDG News Service',\n", " 'Independent Journal Review',\n", " 'Independent Television News (ITN)',\n", " 'Industry Dive',\n", " 'Informavore Media, LLC',\n", " 'Inside Elections',\n", " 'InsideClimate News',\n", " 'InsidePolitics',\n", " 'Internews Network',\n", " 'Investor’s Business Daily',\n", " 'Irish Times',\n", " 'Jewish Journal',\n", " 'Jewish Telegraphic Agency',\n", " 'Journal Media Group',\n", " 'KATU News',\n", " 'KCETLink',\n", " 'KFI',\n", " 'KNTV',\n", " 'KTWO TV',\n", " 'Kaiser Health News',\n", " 'Kansas City Star',\n", " 'LRP Publications',\n", " 'Laslo Congressional Bureau',\n", " 'Lilly Broadcasting',\n", " 'LocalNews Now',\n", " 'Los Angeles Times',\n", " 'MLEX US',\n", " 'MRCTV',\n", " 'MSNBC',\n", " 'MTV News',\n", " 'Manifest',\n", " 'MapLight',\n", " 'Market News International',\n", " 'MarketWatch',\n", " 'Marketplace Radio',\n", " 'McClatchy',\n", " 'McClatchy Newspapers',\n", " 'MedPage Today',\n", " 'MedTech Insight',\n", " 'Media General',\n", " 'Merger Market of Financial Times',\n", " 'Metro Weekly',\n", " 'Mic',\n", " 'Military.com',\n", " 'MinnPost',\n", " 'Minneapolis Star Tribune',\n", " 'Montgomery County Sentinel',\n", " 'Morning Consult',\n", " 'Morning Edition',\n", " 'Mother Jones',\n", " 'NBC',\n", " 'NBC News',\n", " 'NBC Newschannel',\n", " 'NJ Advance Media',\n", " 'Nation',\n", " 'National Catholic Reporter',\n", " 'National Geographic Magazine',\n", " 'National Journal',\n", " 'National Law Journal',\n", " 'National Mortgage News',\n", " 'National Public Radio',\n", " 'National Review',\n", " 'Nature',\n", " 'NerdWallet',\n", " 'New Republic',\n", " 'New York ',\n", " 'New York Daily News',\n", " 'New York Post',\n", " 'New York Times',\n", " 'New York Times Magazine',\n", " 'New Yorker',\n", " 'NewsMax',\n", " 'Newsday',\n", " 'Newsweek',\n", " 'Nexstar Media Group',\n", " 'Omaha World-Herald',\n", " 'Ozy',\n", " 'PBS',\n", " 'PBS NewsHour',\n", " 'People Magazine',\n", " 'Pew Charitable Trusts',\n", " 'Philadelphia Inquirer',\n", " 'Pittsburgh Post-Gazette',\n", " 'Politico',\n", " 'Power Markets Today',\n", " 'Praetorian Digital',\n", " 'ProPublica',\n", " 'RTTV America',\n", " 'Radio Free Asia',\n", " 'Radio One',\n", " 'Real Clear Politics',\n", " 'Real News Network',\n", " 'RealClearPolitics',\n", " 'Record (Bergen County, NJ)',\n", " 'Religion & Ethics Newsweekly',\n", " 'Religion News Service',\n", " 'Religious News Service',\n", " 'Reuters Radio & TV',\n", " 'Rural TV News',\n", " 'S&P Global Market Intelligence',\n", " 'S&P Global Platts',\n", " 'SAGE Business Researcher',\n", " 'SB Nation',\n", " 'SRN News (Salem)',\n", " 'Salt Lake Tribune',\n", " 'San Francisco Chronicle',\n", " 'Scientific American',\n", " 'Scoop News',\n", " 'Scripps Howard News Service',\n", " 'Scripps News',\n", " 'Scudder Publishing',\n", " 'Senate Democrats',\n", " 'Sightline Media Group',\n", " 'Sinclair Broadcast Group',\n", " 'Sirius XM',\n", " 'Sirius XM Satellite Radio',\n", " 'Sky News',\n", " 'Slate',\n", " 'Smithsonian Magazine',\n", " 'Snapchat',\n", " 'Space News',\n", " 'St. Louis Post-Dispatch',\n", " 'St. Louis Public Radio',\n", " 'Standard - Examiner',\n", " 'Stars and Stripes',\n", " 'Stat News',\n", " 'Stateline.org',\n", " 'Stephens Media Group',\n", " 'SurveyMonkey',\n", " 'Syracuse Post-Standard',\n", " 'TEGNA',\n", " 'Talk Radio News Service',\n", " 'Talking Points Memo',\n", " 'Tampa Bay Times',\n", " 'Telemundo Network',\n", " 'Texas Tribune',\n", " 'The 74 Media',\n", " 'The Atlantic',\n", " 'The Cipher Brief',\n", " 'The Hill',\n", " 'The Hotline',\n", " 'The New York Times On The Web',\n", " 'The Root',\n", " 'The Voyage Report',\n", " 'The atlantic',\n", " 'TheStreet',\n", " 'ThinkProgress',\n", " 'This Is America with Dennis Wholey',\n", " 'Thom Hartmann Program',\n", " 'Thomsen Reuters',\n", " 'Thomson Reuters',\n", " 'Time Magazine',\n", " 'Time Warner Cable',\n", " 'Times of London',\n", " 'To The Contrary (Persephone Productions)',\n", " 'Toronto Star',\n", " 'TownHall',\n", " 'Townhall',\n", " 'Transport Topics',\n", " 'Trinity Broadcast Network',\n", " 'U.S. News & World Report',\n", " 'UCG',\n", " 'USA Today',\n", " 'Univision',\n", " 'Vanity Fair',\n", " 'Variety',\n", " 'Vice News',\n", " 'Voice of America',\n", " 'Voterama in Congress',\n", " 'Vox ',\n", " 'Vox Media',\n", " 'WBAL-TV',\n", " 'WBALL TV 11',\n", " 'WFDC–TV Univision',\n", " 'WJLA–TV / Newschannel 8',\n", " 'WMAL Radio',\n", " 'WMDT',\n", " 'WNEW / CBS DC',\n", " 'WNYC',\n", " 'WPFW–FM',\n", " 'WRC–TV / NBC–4',\n", " 'WTOP',\n", " 'WTOP Radio',\n", " 'WTTG-TV',\n", " 'WTTG–Fox Television',\n", " 'WUSA–TV',\n", " 'Wall Street Journal',\n", " 'Wall Street Journal / Dow Jones',\n", " 'Washington Blade',\n", " 'Washington Bureau News Service',\n", " 'Washington Business Journal',\n", " 'Washington City Paper',\n", " 'Washington Examiner',\n", " 'Washington Free Beacon',\n", " 'Washington Post',\n", " 'Washington Radio & Press Service',\n", " 'Washington Times',\n", " 'Washingtonian',\n", " 'Washingtonpost.com',\n", " 'Weekly Standard',\n", " 'West Wing Writers',\n", " 'Westwood One',\n", " 'White House Dossier',\n", " 'Wired',\n", " 'Wisconsin NewsHour',\n", " 'World Magazine',\n", " 'Yahoo News',\n", " 'ZDNet']" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "org_summary_df.index.tolist()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Tweet summary\n", "For tweets in dataset." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Types of tweets" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "retweet 273412\n", "original 199949\n", "reply 93184\n", "quote 83805\n", "Name: tweet_type, dtype: int64" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tweet_df['tweet_type'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## User tweet summary" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Types of tweets in dataset for each user" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
originalquotereplyretweet
count2,484.002,484.002,484.002,484.00
mean79.8333.5437.22109.56
std135.8490.07186.34341.02
min0.000.000.000.00
25%5.000.000.003.00
50%29.005.003.0024.00
75%99.0028.0018.0094.25
max1,579.001,440.007,328.008,855.00
\n", "
" ], "text/plain": [ " original quote reply \\\n", "count 2,484.00 2,484.00 2,484.00 \n", "mean 79.83 33.54 37.22 \n", "std 135.84 90.07 186.34 \n", "min 0.00 0.00 0.00 \n", "25% 5.00 0.00 0.00 \n", "50% 29.00 5.00 3.00 \n", "75% 99.00 28.00 18.00 \n", "max 1,579.00 1,440.00 7,328.00 \n", "\n", " retweet \n", "count 2,484.00 \n", "mean 109.56 \n", "std 341.02 \n", "min 0.00 \n", "25% 3.00 \n", "50% 24.00 \n", "75% 94.25 \n", "max 8,855.00 " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user_summary_df[['original', 'quote', 'reply', 'retweet']].describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1/9/90 rule\n", "For top 1%, 9%, 90% of tweeters, the number of tweets and types of tweets they account for." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
screen_namenameorganizationpositiongenderfollowers_countfollowing_counttweet_countuser_created_atverifiedprotectedoriginalquotereplyretweettweets_in_datasettweets_in_dataset_bin
user_id
456994513maria_e_recioRecio, MariaAustin American-StatesmanPolitical ReporterF103953038464Fri Jan 06 22:22:40 +0000 2012FalseFalse261.00291.00108.003,204.003,864.00Top 1%
22891564chrisgeidnerGeidner, ChrisBuzzFeedLegal Editor & Supreme Court CorrespondentM786314767201131Thu Mar 05 06:48:00 +0000 2009TrueFalse592.00475.002,850.00750.004,667.00Top 1%
21810329sdonnanDonnan, ShawnFinancial TimesWolrd Trade EditorM11693542875733Tue Feb 24 23:10:17 +0000 2009TrueFalse203.00374.00152.002,792.003,521.00Top 1%
19545932kampeasKampeas, RonJewish Telegraphic AgencyWashington Bureau ChiefM6901195250954Mon Jan 26 17:37:58 +0000 2009FalseFalse506.00349.00202.002,027.003,084.00Top 1%
47408060jonathanlandayLanday, JonathanMcClatchy NewspapersNational Security CorrespondentM11126109378318Mon Jun 15 18:42:47 +0000 2009TrueFalse418.0041.0070.002,352.002,881.00Top 1%
3817401ericgellerGeller, EricPoliticoCybersecurity ReporterM52569732201279Sun Apr 08 20:27:11 +0000 2007TrueFalse820.001,435.007,328.000.009,583.00Top 1%
593813785donnayoungdcYoung, DonnaS&P Global Market IntelligenceSenior ReporterF5654162146571Tue May 29 15:45:45 +0000 2012FalseFalse1,095.00885.009.001,169.003,158.00Top 1%
104299137davidmdruckerDrucker, DavidWashington ExaminerSenior Political CorrespondentM329662475101229Tue Jan 12 22:56:50 +0000 2010TrueFalse611.001,122.00517.00934.003,184.00Top 1%
61734492fahrentholdFahrenthold, DavidWashington PostPolitical ReporterM419647334125457Fri Jul 31 09:29:37 +0000 2009TrueFalse115.00142.0063.002,333.002,653.00Top 1%
13524182daveweigelWeigel, DavidWashington PostPolitical ReporterM31891510169166821Fri Feb 15 17:58:23 +0000 2008TrueFalse712.00784.00242.002,155.003,893.00Top 1%
25702314ericmgarciaGarcia, Eric M.CQ Roll CallReporterM2960374842198Sat Mar 21 17:44:40 +0000 2009FalseFalse441.001,188.00575.00405.002,609.00Top 1%
18825339cahnemilyCahn, EmilyMicSenior Politics WriterF16181211895033Sat Jan 10 03:19:50 +0000 2009TrueFalse1,205.001,440.00279.003,459.006,383.00Top 1%
21612122hotlinejoshKraushaar, Josh P.National JournalPolitics EditorM491511456152116Sun Feb 22 23:45:46 +0000 2009TrueFalse395.00643.00338.004,302.005,678.00Top 1%
21696279brianbeutlerBeutler, Brian AlfredNew RepublicSenior EditorM7158672296050Mon Feb 23 21:31:16 +0000 2009TrueFalse475.00546.00714.002,122.003,857.00Top 1%
16459325ryanbeckwithBeckwith, Ryan TeagueTime MagazinePolitics EditorM20241682688797Thu Sep 25 22:43:36 +0000 2008TrueFalse843.00529.00753.001,778.003,903.00Top 1%
42352386rschlesSchlesinger, RobertU.S. News & World ReportManaging Editor, OpinionM4426191034044Mon May 25 04:52:44 +0000 2009TrueFalse122.00590.0056.002,206.002,974.00Top 1%
304988603neilwmccabeMcCabe, NeilBreitbart NewsPolitical CorrespondentM18991769957983Wed May 25 13:09:32 +0000 2011FalseFalse682.00616.00227.004,444.005,969.00Top 1%
191964162samlitzingerLitzinger, SamCBS NewsCorrespondent, CBS RadioM2302216490023Fri Sep 17 20:37:31 +0000 2010FalseFalse759.00206.00430.005,331.006,726.00Top 1%
259395895johnjharwoodHarwood, JohnCNBCChief Washington CorrespondentM139370115773724Tue Mar 01 20:49:40 +0000 2011TrueFalse825.00487.0083.003,307.004,702.00Top 1%
14529929jaketapperTapper, JakeCNNAnchor & Chief Washington CorrespondentM12383175664144300Fri Apr 25 17:23:28 +0000 2008TrueFalse1,162.00266.00645.001,295.003,368.00Top 1%
15486163simonmarksfsnMarks, SimonFeature Story NewsPresident & Chief CorrespondentM7622363239421Fri Jul 18 20:45:38 +0000 2008FalseFalse1,191.00613.00189.001,017.003,010.00Top 1%
19576571jaredrizziRizzi, JaredSirius XM Satellite RadioWhite House Correspondent, SXMPOTUSM12277592438049Tue Jan 27 04:09:53 +0000 2009TrueFalse645.00858.001,393.002,050.004,946.00Top 1%
2453025128gloriaminottMinott, GloriaWPFW–FMJournalist and Radio HostF46823245438Sat Apr 19 12:03:52 +0000 2014FalseFalse0.000.001.008,855.008,856.00Top 1%
\n", "
" ], "text/plain": [ " screen_name name \\\n", "user_id \n", "456994513 maria_e_recio Recio, Maria \n", "22891564 chrisgeidner Geidner, Chris \n", "21810329 sdonnan Donnan, Shawn \n", "19545932 kampeas Kampeas, Ron \n", "47408060 jonathanlanday Landay, Jonathan \n", "3817401 ericgeller Geller, Eric \n", "593813785 donnayoungdc Young, Donna \n", "104299137 davidmdrucker Drucker, David \n", "61734492 fahrenthold Fahrenthold, David \n", "13524182 daveweigel Weigel, David \n", "25702314 ericmgarcia Garcia, Eric M. \n", "18825339 cahnemily Cahn, Emily \n", "21612122 hotlinejosh Kraushaar, Josh P. \n", "21696279 brianbeutler Beutler, Brian Alfred \n", "16459325 ryanbeckwith Beckwith, Ryan Teague \n", "42352386 rschles Schlesinger, Robert \n", "304988603 neilwmccabe McCabe, Neil \n", "191964162 samlitzinger Litzinger, Sam \n", "259395895 johnjharwood Harwood, John \n", "14529929 jaketapper Tapper, Jake \n", "15486163 simonmarksfsn Marks, Simon \n", "19576571 jaredrizzi Rizzi, Jared \n", "2453025128 gloriaminott Minott, Gloria \n", "\n", " organization \\\n", "user_id \n", "456994513 Austin American-Statesman \n", "22891564 BuzzFeed \n", "21810329 Financial Times \n", "19545932 Jewish Telegraphic Agency \n", "47408060 McClatchy Newspapers \n", "3817401 Politico \n", "593813785 S&P Global Market Intelligence \n", "104299137 Washington Examiner \n", "61734492 Washington Post \n", "13524182 Washington Post \n", "25702314 CQ Roll Call \n", "18825339 Mic \n", "21612122 National Journal \n", "21696279 New Republic \n", "16459325 Time Magazine \n", "42352386 U.S. News & World Report \n", "304988603 Breitbart News \n", "191964162 CBS News \n", "259395895 CNBC \n", "14529929 CNN \n", "15486163 Feature Story News \n", "19576571 Sirius XM Satellite Radio \n", "2453025128 WPFW–FM \n", "\n", " position gender \\\n", "user_id \n", "456994513 Political Reporter F \n", "22891564 Legal Editor & Supreme Court Correspondent M \n", "21810329 Wolrd Trade Editor M \n", "19545932 Washington Bureau Chief M \n", "47408060 National Security Correspondent M \n", "3817401 Cybersecurity Reporter M \n", "593813785 Senior Reporter F \n", "104299137 Senior Political Correspondent M \n", "61734492 Political Reporter M \n", "13524182 Political Reporter M \n", "25702314 Reporter M \n", "18825339 Senior Politics Writer F \n", "21612122 Politics Editor M \n", "21696279 Senior Editor M \n", "16459325 Politics Editor M \n", "42352386 Managing Editor, Opinion M \n", "304988603 Political Correspondent M \n", "191964162 Correspondent, CBS Radio M \n", "259395895 Chief Washington Correspondent M \n", "14529929 Anchor & Chief Washington Correspondent M \n", "15486163 President & Chief Correspondent M \n", "19576571 White House Correspondent, SXMPOTUS M \n", "2453025128 Journalist and Radio Host F \n", "\n", " followers_count following_count tweet_count \\\n", "user_id \n", "456994513 1039 530 38464 \n", "22891564 78631 4767 201131 \n", "21810329 11693 5428 75733 \n", "19545932 6901 1952 50954 \n", "47408060 11126 1093 78318 \n", "3817401 52569 732 201279 \n", "593813785 5654 1621 46571 \n", "104299137 32966 2475 101229 \n", "61734492 419647 3341 25457 \n", "13524182 318915 10169 166821 \n", "25702314 2960 3748 42198 \n", "18825339 16181 2118 95033 \n", "21612122 49151 1456 152116 \n", "21696279 71586 722 96050 \n", "16459325 20241 6826 88797 \n", "42352386 4426 1910 34044 \n", "304988603 18991 7699 57983 \n", "191964162 2302 2164 90023 \n", "259395895 139370 1157 73724 \n", "14529929 1238317 5664 144300 \n", "15486163 7622 3632 39421 \n", "19576571 12277 5924 38049 \n", "2453025128 468 232 45438 \n", "\n", " user_created_at verified protected \\\n", "user_id \n", "456994513 Fri Jan 06 22:22:40 +0000 2012 False False \n", "22891564 Thu Mar 05 06:48:00 +0000 2009 True False \n", "21810329 Tue Feb 24 23:10:17 +0000 2009 True False \n", "19545932 Mon Jan 26 17:37:58 +0000 2009 False False \n", "47408060 Mon Jun 15 18:42:47 +0000 2009 True False \n", "3817401 Sun Apr 08 20:27:11 +0000 2007 True False \n", "593813785 Tue May 29 15:45:45 +0000 2012 False False \n", "104299137 Tue Jan 12 22:56:50 +0000 2010 True False \n", "61734492 Fri Jul 31 09:29:37 +0000 2009 True False \n", "13524182 Fri Feb 15 17:58:23 +0000 2008 True False \n", "25702314 Sat Mar 21 17:44:40 +0000 2009 False False \n", "18825339 Sat Jan 10 03:19:50 +0000 2009 True False \n", "21612122 Sun Feb 22 23:45:46 +0000 2009 True False \n", "21696279 Mon Feb 23 21:31:16 +0000 2009 True False \n", "16459325 Thu Sep 25 22:43:36 +0000 2008 True False \n", "42352386 Mon May 25 04:52:44 +0000 2009 True False \n", "304988603 Wed May 25 13:09:32 +0000 2011 False False \n", "191964162 Fri Sep 17 20:37:31 +0000 2010 False False \n", "259395895 Tue Mar 01 20:49:40 +0000 2011 True False \n", "14529929 Fri Apr 25 17:23:28 +0000 2008 True False \n", "15486163 Fri Jul 18 20:45:38 +0000 2008 False False \n", "19576571 Tue Jan 27 04:09:53 +0000 2009 True False \n", "2453025128 Sat Apr 19 12:03:52 +0000 2014 False False \n", "\n", " original quote reply \\\n", "user_id \n", "456994513 261.00 291.00 108.00 \n", "22891564 592.00 475.00 2,850.00 \n", "21810329 203.00 374.00 152.00 \n", "19545932 506.00 349.00 202.00 \n", "47408060 418.00 41.00 70.00 \n", "3817401 820.00 1,435.00 7,328.00 \n", "593813785 1,095.00 885.00 9.00 \n", "104299137 611.00 1,122.00 517.00 \n", "61734492 115.00 142.00 63.00 \n", "13524182 712.00 784.00 242.00 \n", "25702314 441.00 1,188.00 575.00 \n", "18825339 1,205.00 1,440.00 279.00 \n", "21612122 395.00 643.00 338.00 \n", "21696279 475.00 546.00 714.00 \n", "16459325 843.00 529.00 753.00 \n", "42352386 122.00 590.00 56.00 \n", "304988603 682.00 616.00 227.00 \n", "191964162 759.00 206.00 430.00 \n", "259395895 825.00 487.00 83.00 \n", "14529929 1,162.00 266.00 645.00 \n", "15486163 1,191.00 613.00 189.00 \n", "19576571 645.00 858.00 1,393.00 \n", "2453025128 0.00 0.00 1.00 \n", "\n", " retweet tweets_in_dataset tweets_in_dataset_bin \n", "user_id \n", "456994513 3,204.00 3,864.00 Top 1% \n", "22891564 750.00 4,667.00 Top 1% \n", "21810329 2,792.00 3,521.00 Top 1% \n", "19545932 2,027.00 3,084.00 Top 1% \n", "47408060 2,352.00 2,881.00 Top 1% \n", "3817401 0.00 9,583.00 Top 1% \n", "593813785 1,169.00 3,158.00 Top 1% \n", "104299137 934.00 3,184.00 Top 1% \n", "61734492 2,333.00 2,653.00 Top 1% \n", "13524182 2,155.00 3,893.00 Top 1% \n", "25702314 405.00 2,609.00 Top 1% \n", "18825339 3,459.00 6,383.00 Top 1% \n", "21612122 4,302.00 5,678.00 Top 1% \n", "21696279 2,122.00 3,857.00 Top 1% \n", "16459325 1,778.00 3,903.00 Top 1% \n", "42352386 2,206.00 2,974.00 Top 1% \n", "304988603 4,444.00 5,969.00 Top 1% \n", "191964162 5,331.00 6,726.00 Top 1% \n", "259395895 3,307.00 4,702.00 Top 1% \n", "14529929 1,295.00 3,368.00 Top 1% \n", "15486163 1,017.00 3,010.00 Top 1% \n", "19576571 2,050.00 4,946.00 Top 1% \n", "2453025128 8,855.00 8,856.00 Top 1% " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user_summary_df[user_summary_df.tweets_in_dataset_bin == 'Top 1%']" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
originalquotereplyretweettweets_in_datasetpercent_of_originalpercent_of_quotepercent_of_replypercent_of_retweetspercent_of_tweets_in_datasetusers_in_bin
tweets_in_dataset_bin
Bottom 90%118,274.0036,419.0031,546.00116,400.00302,639.000.600.440.340.430.472043
Middle 9%65,947.0033,018.0043,692.0097,456.00240,113.000.330.400.470.360.37206
Top 1%14,078.0013,880.0017,224.0058,287.00103,469.000.070.170.190.210.1623
\n", "
" ], "text/plain": [ " original quote \\\n", "tweets_in_dataset_bin \n", "Bottom 90% 118,274.00 36,419.00 \n", "Middle 9% 65,947.00 33,018.00 \n", "Top 1% 14,078.00 13,880.00 \n", "\n", " reply retweet \\\n", "tweets_in_dataset_bin \n", "Bottom 90% 31,546.00 116,400.00 \n", "Middle 9% 43,692.00 97,456.00 \n", "Top 1% 17,224.00 58,287.00 \n", "\n", " tweets_in_dataset percent_of_original \\\n", "tweets_in_dataset_bin \n", "Bottom 90% 302,639.00 0.60 \n", "Middle 9% 240,113.00 0.33 \n", "Top 1% 103,469.00 0.07 \n", "\n", " percent_of_quote percent_of_reply \\\n", "tweets_in_dataset_bin \n", "Bottom 90% 0.44 0.34 \n", "Middle 9% 0.40 0.47 \n", "Top 1% 0.17 0.19 \n", "\n", " percent_of_retweets percent_of_tweets_in_dataset \\\n", "tweets_in_dataset_bin \n", "Bottom 90% 0.43 0.47 \n", "Middle 9% 0.36 0.37 \n", "Top 1% 0.21 0.16 \n", "\n", " users_in_bin \n", "tweets_in_dataset_bin \n", "Bottom 90% 2043 \n", "Middle 9% 206 \n", "Top 1% 23 " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tweets_in_dataset_bin_summary_df = user_summary_df[['original', 'quote', 'reply', 'retweet', 'tweets_in_dataset', 'tweets_in_dataset_bin']].groupby('tweets_in_dataset_bin').sum()\n", "tweets_in_dataset_bin_summary_df['percent_of_original'] = tweets_in_dataset_bin_summary_df.original / tweets_in_dataset_bin_summary_df.original.sum()\n", "tweets_in_dataset_bin_summary_df['percent_of_quote'] = tweets_in_dataset_bin_summary_df.quote / tweets_in_dataset_bin_summary_df.quote.sum()\n", "tweets_in_dataset_bin_summary_df['percent_of_reply'] = tweets_in_dataset_bin_summary_df.reply / tweets_in_dataset_bin_summary_df.reply.sum()\n", "tweets_in_dataset_bin_summary_df['percent_of_retweets'] = tweets_in_dataset_bin_summary_df.retweet / tweets_in_dataset_bin_summary_df.retweet.sum()\n", "tweets_in_dataset_bin_summary_df['percent_of_tweets_in_dataset'] = tweets_in_dataset_bin_summary_df.tweets_in_dataset / tweets_in_dataset_bin_summary_df.tweets_in_dataset.sum()\n", "tweets_in_dataset_bin_summary_df['users_in_bin'] = user_summary_df[['tweets_in_dataset_bin', 'tweets_in_dataset']].groupby('tweets_in_dataset_bin').count()\n", "tweets_in_dataset_bin_summary_df\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## User summary" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
followers_countfollowing_counttweet_count
count2,484.002,484.002,484.00
mean14,644.391,344.528,760.62
std84,477.362,805.2115,836.17
min0.000.000.00
25%659.00428.001,001.25
50%2,114.00933.003,578.00
75%6,611.001,621.509,572.00
max2,133,806.0094,689.00201,279.00
\n", "
" ], "text/plain": [ " followers_count following_count tweet_count\n", "count 2,484.00 2,484.00 2,484.00\n", "mean 14,644.39 1,344.52 8,760.62\n", "std 84,477.36 2,805.21 15,836.17\n", "min 0.00 0.00 0.00\n", "25% 659.00 428.00 1,001.25\n", "50% 2,114.00 933.00 3,578.00\n", "75% 6,611.00 1,621.50 9,572.00\n", "max 2,133,806.00 94,689.00 201,279.00" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user_summary_df[['followers_count', 'following_count', 'tweet_count']].describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Gender" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "M 1398\n", "F 1085\n", "Name: gender, dtype: int64" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user_summary_df['gender'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Organization" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Top by average followers" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
followers_count
sumsizeaverage
organization
MSNBC17329927247,570.29
Toronto Star1650561165,056.00
New York1257541125,754.00
New Yorker1251801125,180.00
MTV News1014731101,473.00
\n", "
" ], "text/plain": [ " followers_count \n", " sum size average\n", "organization \n", "MSNBC 1732992 7 247,570.29\n", "Toronto Star 165056 1 165,056.00\n", "New York 125754 1 125,754.00\n", "New Yorker 125180 1 125,180.00\n", "MTV News 101473 1 101,473.00" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "org_summary_df[['followers_count']].sort_values([('followers_count', 'average')], ascending=False).head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Top by average following" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
following_count
sumsizeaverage
organization
White House Dossier744117,441.00
Snapchat601916,019.00
Bankrate585315,853.00
New York Daily News428814,288.00
Texas Tribune393513,935.00
\n", "
" ], "text/plain": [ " following_count \n", " sum size average\n", "organization \n", "White House Dossier 7441 1 7,441.00\n", "Snapchat 6019 1 6,019.00\n", "Bankrate 5853 1 5,853.00\n", "New York Daily News 4288 1 4,288.00\n", "Texas Tribune 3935 1 3,935.00" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "org_summary_df[['following_count']].sort_values([('following_count', 'average')], ascending=False).head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Top by average tweet count" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tweet_count
sumsizeaverage
organization
New Republic96050196,050.00
Mic95033195,033.00
Yahoo News93714193,714.00
MTV News80962180,962.00
ProPublica78207178,207.00
\n", "
" ], "text/plain": [ " tweet_count \n", " sum size average\n", "organization \n", "New Republic 96050 1 96,050.00\n", "Mic 95033 1 95,033.00\n", "Yahoo News 93714 1 93,714.00\n", "MTV News 80962 1 80,962.00\n", "ProPublica 78207 1 78,207.00" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "org_summary_df[['tweet_count']].sort_values([('tweet_count', 'average')], ascending=False).head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Top by number of tweets in dataset" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tweets_in_dataset
sumsizeaverage
organization
Politico43,669.00103.00423.97
CNN33,868.00149.00227.30
Washington Post22,621.0060.00377.02
Bloomberg News17,558.0075.00234.11
CBS News17,036.0061.00279.28
\n", "
" ], "text/plain": [ " tweets_in_dataset \n", " sum size average\n", "organization \n", "Politico 43,669.00 103.00 423.97\n", "CNN 33,868.00 149.00 227.30\n", "Washington Post 22,621.00 60.00 377.02\n", "Bloomberg News 17,558.00 75.00 234.11\n", "CBS News 17,036.00 61.00 279.28" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "org_summary_df[['tweets_in_dataset']].sort_values([('tweets_in_dataset', 'sum')], ascending=False).head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## First tweet for each user" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tweet_id 2293\n", "screen_name 2293\n", "tweet_created_at 2293\n", "user_created_at 2293\n", "tweets_to_date 2293\n", "tweet_type 2293\n", "dtype: int64" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Get the first tweet for each user\n", "first_tweet_df = tweet_df.loc[tweet_df.groupby('user_id')['tweet_created_at'].idxmin()].set_index(['user_id'])\n", "first_tweet_df.count()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tweet_idscreen_nametweet_created_atuser_created_attweets_to_datetweet_type
user_id
16338087876092563563958272AbbyDanzig2017-06-17 15:01:58+00:002008-09-17 22:10:27+00:001542retweet
3901972468875730040750604288jchamseddine102017-06-16 15:01:26+00:002015-10-08 18:44:17+00:00605original
198935531875477217895231488CarrieStevenson2017-06-15 22:16:48+00:002010-10-05 16:30:31+00:00438original
267210696875005803283050496PeteBehrEENews2017-06-14 15:03:34+00:002011-03-16 14:28:09+00:0024original
425112739874967586085244930jzieglerWTOP2017-06-14 12:31:43+00:002011-11-30 15:37:28+00:00815retweet
\n", "
" ], "text/plain": [ " tweet_id screen_name tweet_created_at \\\n", "user_id \n", "16338087 876092563563958272 AbbyDanzig 2017-06-17 15:01:58+00:00 \n", "3901972468 875730040750604288 jchamseddine10 2017-06-16 15:01:26+00:00 \n", "198935531 875477217895231488 CarrieStevenson 2017-06-15 22:16:48+00:00 \n", "267210696 875005803283050496 PeteBehrEENews 2017-06-14 15:03:34+00:00 \n", "425112739 874967586085244930 jzieglerWTOP 2017-06-14 12:31:43+00:00 \n", "\n", " user_created_at tweets_to_date tweet_type \n", "user_id \n", "16338087 2008-09-17 22:10:27+00:00 1542 retweet \n", "3901972468 2015-10-08 18:44:17+00:00 605 original \n", "198935531 2010-10-05 16:30:31+00:00 438 original \n", "267210696 2011-03-16 14:28:09+00:00 24 original \n", "425112739 2011-11-30 15:37:28+00:00 815 retweet " ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "first_tweet_df.sort_values('tweet_created_at', ascending=False).head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Most recent first tweet" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "Timestamp('2017-06-17 15:01:58+0000', tz='UTC')" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "first_tweet_df['tweet_created_at'].max()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }