{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# What is Trump yelling about?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import twitter" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Define your api_key/secret and access_token_key/secret here\n", "\n", "api = twitter.Api(consumer_key=api_key,\n", " consumer_secret=api_secret,\n", " access_token_key=access_token_key,\n", " access_token_secret=access_token_secret)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "User(ID=1961548171, ScreenName=kmarwahaha)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "api.VerifyCredentials()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Scrape last ~3200 tweets" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def get_statuses(prev_statuses = None, screen_name = \"realDonaldTrump\", count=200):\n", " max_id = prev_statuses[-1].id-1 if prev_statuses else None\n", " return api.GetUserTimeline(screen_name=screen_name, max_id=max_id, count=count)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "n = 16\n", "temp = get_statuses()\n", "all_statuses = temp\n", "for _ in range(n):\n", " temp = get_statuses(temp)\n", " all_statuses += temp" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Status(ID=888478670502932480, ScreenName=realDonaldTrump, Created=Fri Jul 21 19:19:56 +0000 2017, Text='Manufacturers’ record-high optimism reported in the 1st qtr has carried into the 2nd qtr of 2017 via @ShopFloorNAM:… https://t.co/G6QSAqjUeY')" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Earliest tweet scraped\n", "all_statuses[-1]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Parse date" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#https://stackoverflow.com/questions/3682748/converting-unix-timestamp-string-to-readable-date-in-python\n", "import datetime\n", "def readable_date(unix_time):\n", " return datetime.datetime.fromtimestamp(int(unix_time)).strftime('%Y.%m.%d')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "text_and_date = [(status.text, readable_date(status.created_at_in_seconds)) for status in all_statuses]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "('Congratulations to Josh Hawley on your big Senate Primary win in Missouri. I look forward to working with you towar… https://t.co/gGWY572ROS',\n", " '2018.08.07')" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Most recent tweet\n", "text_and_date[0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Analyze" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def trim_results(text_and_date, filterx):\n", " return list(filter(\n", " lambda y: len(y[1]) > 0,\n", " [(date, \n", " \" \".join(filter(filterx, \n", " map(lambda x: x.replace('-', ' ').replace(u'\\u2026', '').strip(), \n", " text.strip().split())\n", " ))\n", " ) for (text, date) in text_and_date]\n", " ))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Approach 1: Find words that are uppercase, capitalized, hashtag'd, or end in \"!\"" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "filter1 = lambda x: x != u'&' and (len(x) > 0 and x[0].upper() == x[0] \n", " or x.upper() == x or u'#' == x[0] or u'!' == x[-1])" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "trimmed1 = trim_results(text_and_date, filter1)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[('2018.08.07', 'Congratulations Josh Hawley Senate Primary Missouri. I'),\n", " ('2018.08.07',\n", " 'Congratulations Bill Schuette. You Big November Governor Great St'),\n", " ('2018.08.07',\n", " 'Congratulations STAR Republican Party, Senator John James. A'),\n", " ('2018.08.07', '.....Congratulations Troy Balderson Ohio. A race!'),\n", " ('2018.08.07', 'When I Ohio Troy Balderson, 64 36. That After'),\n", " ('2018.08.07', 'Today, 236th Purple Heart, Armed Forces'),\n", " ('2018.08.07',\n", " 'RT @EricTrump: Ohio today! We Troy Balderson Congress. Visit'),\n", " ('2018.08.07', 'Ohio, Troy Balderson Congress. His Nancy Pelosi, Crime, B'),\n", " ('2018.08.07', 'The Iran These November'),\n", " ('2018.08.06', 'California'),\n", " ('2018.08.06', 'Democrats Open Borders ICE, Country'),\n", " ('2018.08.06', 'RT @realDonaldTrump: Presidential Approval Better'),\n", " ('2018.08.06', 'John James Republican Star Senate Michigan. If'),\n", " ('2018.08.06', 'Governor Jerry Brown Free Flow North'),\n", " ('2018.08.06', 'Kris Kobach, Governor Great State Kansas. He'),\n", " ('2018.08.06', 'Great Economy'),\n", " ('2018.08.06', '....a Kremlin Donald Trump. Collusion R'),\n", " ('2018.08.06', '“Collusion Russia Hillary Clinton 100% Russians, Adam'),\n", " ('2018.08.05',\n", " 'RT @realDonaldTrump: ...Danny O’Connor Nancy Pelosi Maxine Waters – Danny'),\n", " ('2018.08.05',\n", " 'RT @realDonaldTrump: A Ohio’s 12th Congressional District Troy Balderson! Troy Ohio, O')]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trimmed1[:20]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Hmm, this isn't that insightful. Let's see what he's really yelling about." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Approach 2: Just look for YELLING" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#https://stackoverflow.com/questions/1265665/python-check-if-a-string-represents-an-int-without-using-try-except\n", "def RepresentsInt(s):\n", " try: \n", " int(s)\n", " return True\n", " except ValueError:\n", " return False\n", "def IsASCII(u):\n", " try: \n", " u.encode()\n", " return True\n", " except UnicodeEncodeError:\n", " return False" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [], "source": [ "filter2 = lambda x: x.upper() == x and not RepresentsInt(x) and u'-' != x and x != u'RT' and len(x) > 3 and IsASCII(x)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [], "source": [ "trimmed2 = trim_results(text_and_date, filter2)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[('2018.08.07', 'STAR'),\n", " ('2018.08.06', 'ICE,'),\n", " ('2018.08.06', '100%'),\n", " ('2018.08.05', 'U.S.,'),\n", " ('2018.08.05', 'TRUE.'),\n", " ('2018.08.04', 'P.R.'),\n", " ('2018.08.04', 'U.S.,'),\n", " ('2018.08.04', 'HAPPY BIRTHDAY @USCG!'),\n", " ('2018.08.03', '@WWE'),\n", " ('2018.08.03', '@DRUDGE_REPORT:'),\n", " ('2018.08.03', '@DRUDGE_REPORT: +37,000...'),\n", " ('2018.08.03', '@DRUDGE_REPORT: RECORD 155,965,000 EMPLOYED'),\n", " ('2018.08.03', '@DRUDGE_REPORT:'),\n", " ('2018.08.03', '219,000 185,000 ADP”'),\n", " ('2018.08.03', '✅”US'),\n", " ('2018.08.03', '.@POTUS'),\n", " ('2018.08.03', '“THE RUSSIA HOAX, ILLICIT SCHEME CLEAR HILLARY CLINTON'),\n", " ('2018.08.03', 'MAKE AMERICA GREAT AGAIN!'),\n", " ('2018.08.03', '500,000'),\n", " ('2018.08.03', 'NASA,')]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trimmed2[:20]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "That's a bit better." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### What's his yelling vocabulary like?" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from collections import Counter" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [], "source": [ "trump_yelling = Counter([x[1] for x in trimmed2])" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[('U.S.', 76),\n", " ('#MAGA', 21),\n", " ('GREAT', 19),\n", " ('DACA', 19),\n", " ('MAKE AMERICA GREAT AGAIN!', 13),\n", " ('@FLOTUS', 12),\n", " ('NATO', 11),\n", " ('MAKING AMERICA GREAT AGAIN!', 10),\n", " ('MS 13', 10),\n", " ('DACA.', 10),\n", " ('@POTUS', 9),\n", " ('A.G.', 7),\n", " ('🇺🇸🇺🇸🇺🇸', 7),\n", " ('U.S.,', 6),\n", " ('.@POTUS', 6),\n", " ('FISA', 6),\n", " ('2018,', 6),\n", " ('ISIS', 6),\n", " ('FEMA', 6),\n", " ('#UNGA', 6),\n", " ('ICE,', 5),\n", " ('FBI.', 5),\n", " ('CUTS', 5),\n", " ('@FLOTUS:', 5),\n", " ('THANK', 5),\n", " ('D.C.', 5),\n", " ('2017,', 5),\n", " ('NEVER', 5),\n", " ('100%', 4),\n", " ('VERY', 4),\n", " ('JOBS, JOBS, JOBS!', 4),\n", " ('OPEC', 4),\n", " ('U.S.A.', 4),\n", " ('CNN.', 4),\n", " ('@FEMA', 4),\n", " ('ZERO', 4),\n", " ('T.V.', 4),\n", " ('HAPPY BIRTHDAY', 4),\n", " ('15 0', 4),\n", " ('#USA🇺🇸', 4),\n", " ('#SCOTUS', 3),\n", " ('AMERICA OPEN BUSINESS!', 3),\n", " ('VOTE', 3),\n", " ('WITCH HUNT!', 3),\n", " ('HEROES', 3),\n", " ('D.C.,', 3),\n", " ('LIVE #MAGA', 3),\n", " ('.@FLOTUS', 3),\n", " ('50%,', 3),\n", " ('ISIS,', 3),\n", " ('BORDER WALL', 3),\n", " ('#WEF18', 3),\n", " ('V.A.', 3)]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[i for i in trump_yelling.most_common() if i[1] > 2]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Makes sense. His whole YELLING vocab is below." ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['\"20,000📈21,000📈22,000📈 #MAGA',\n", " '\"46%',\n", " '\"@POTUS',\n", " '\"DEPLORABLES.\"',\n", " '\"NO\"',\n", " '#AMERICA FIRST',\n", " '#AMERICA FIRST!',\n", " '#APEC',\n", " '#APEC2017',\n", " '#APEC2017 @FLOTUS',\n", " '#ASEAN50 FAIR TRADE DEALS,',\n", " '#AZ08.',\n", " '#BYE BYE',\n", " '#CHANGETHELAWS',\n", " '#FAKE NEWS!',\n", " '#FEMA',\n", " '#GES2017',\n", " '#GES2017.',\n", " '#HELSINKI2018',\n", " '#ISIS',\n", " '#LESM',\n", " '#MAGA',\n", " '#MAGA @CPAC',\n", " '#MAGA WINNING,',\n", " '#MAGA!',\n", " '#MAGA🇺🇸',\n", " '#MS13 #MS13',\n", " '#NATO2018!',\n", " '#NATOS',\n", " '#S2155,',\n", " '#SCOTUS',\n", " '#SOTU',\n", " '#UNGA',\n", " '#USA',\n", " '#USAF B 1B',\n", " '#USA🇺🇸',\n", " '#USMC242',\n", " '#USVI.',\n", " '#USVI. @FEMA',\n", " '#WEF18',\n", " '$1.4',\n", " '$1.6',\n", " '$1.6 $700',\n", " '$1.6B U.S.A.',\n", " '$1.7 CASH',\n", " '$16.3+ 9.2%',\n", " '$20,000,000',\n", " '$500 DOWN,',\n", " '$700 $716',\n", " '$700,000',\n", " '$729M',\n", " '$800',\n", " '$972,000 GPS. $12,400,000 DNC.',\n", " \"'16. C.M.\",\n", " \"'18.\",\n", " '(#HR873.)',\n", " '($700,000)',\n", " '(110',\n", " '(5 10',\n", " '(@FLOTUS.)',\n", " '(AAA).',\n", " '(D VA),',\n", " '(D.C.) 4:00 P.M.',\n", " '(DACA)',\n", " '(DMD),',\n", " '(H.R. 2142)',\n", " '(HMX 1)',\n", " '(SEAL)',\n", " '(U.S.',\n", " '(USA)',\n", " '*NOT*',\n", " '.....',\n", " '.....I',\n", " '....8 U.S.',\n", " '....@NASCAR',\n", " '....“$17',\n", " '...9',\n", " '...I',\n", " '...NFL DOWN. U.S.',\n", " '...U.S.A.',\n", " '.@CNN FLAG, ANTHEM, COUNTRY.',\n", " '.@FLOTUS',\n", " '.@G7',\n", " '.@NFL:',\n", " '.@POTUS',\n", " '.@POTUS #FOSTA #SESTA',\n", " '.@POTUS #TRUMP🇺🇸 @FLOTUS🌺 HOPELESS...YOU HOPE! INSPIRE ALL! #MAGA #USA',\n", " '.@POTUS #USA',\n", " '.@POTUS @FLOTUS',\n", " '.@POTUS @FLOTUS @LVMPD',\n", " '.@POTUS @FLOTUS @UMCSN',\n", " '.@POTUS U.S.',\n", " '.@POTUS:',\n", " '.@POTUS’',\n", " '.@USCG',\n", " '.@VP',\n", " '1/2%.',\n", " '1/20 35,000 ISIS 17,500',\n", " '10,000',\n", " '100%',\n", " '100,000 MIGHT SECOND',\n", " '10:00',\n", " '10:00 A.M.',\n", " '10:00 A.M. ENJOY!',\n", " '10:00.',\n", " '116%',\n", " '11:00 A.M.',\n", " '11:30 A.M.',\n", " '12,000 10,000',\n", " '15 0',\n", " '15%.',\n", " '15,000',\n", " '18,589 2016, 25,075',\n", " '19,000 RESPECTING',\n", " '1944,',\n", " '1960, 20,000 185,000',\n", " '1969”',\n", " '1973.',\n", " '1:00 P.M.',\n", " '2,000',\n", " '2,000,000',\n", " '2,500',\n", " '20,295 @POTUS',\n", " '200%',\n", " '2000\"',\n", " '2000....A',\n", " '2000”',\n", " '2004\"',\n", " '2008”',\n", " '2014,',\n", " '2016/2017',\n", " '2017,',\n", " '2017.',\n", " '2018!',\n", " '2018,',\n", " '2018. #MAGA',\n", " '2020.',\n", " '2020?',\n", " '219,000 185,000 ADP”',\n", " '22,000 18,000',\n", " '223,000',\n", " '227 205.',\n", " '23,000',\n", " '24,000',\n", " '24/7 CNN, ABC, NBC, CBS, NYTIMES WAPO,',\n", " '25%, U.S.',\n", " '25,000',\n", " '25,000.',\n", " '2:00 A.M.',\n", " '3.9%',\n", " '3/4:',\n", " '300,000',\n", " '31,174',\n", " '33,000',\n", " '33,000 $145,000,000',\n", " '3:00 P.M. N.J.',\n", " '3:30 P.M.',\n", " '4.1%,',\n", " '4.1%, $5.4',\n", " '4.1%. 2.6%,',\n", " '4/9/18,',\n", " '400,000 GREAT WOMEN D.C.',\n", " '45%,',\n", " '45.6',\n", " '49%,',\n", " '50%,',\n", " '50,000',\n", " '500,000',\n", " '55,000 6,000,000',\n", " '59%,',\n", " '6.8%,',\n", " '60...',\n", " '69 30,',\n", " '6:00 A.M.',\n", " '6:15 A.M. NOW!',\n", " '7:00 P.M.',\n", " '7:00 P.M. #MAGA🇺🇸',\n", " '7:15.',\n", " '7:45',\n", " '8:18 A.M.',\n", " '8:30',\n", " '8:30.',\n", " '9,000',\n", " '93 5 5/31/99.',\n", " '95.1%',\n", " '9:00 A.M.',\n", " '9:00 P.M.',\n", " '@ABC',\n", " '@ABC @CNN.',\n", " '@ABC:',\n", " '@CBS',\n", " '@CIA!',\n", " '@CNBC',\n", " '@CNN',\n", " '@CNN @MSNBC',\n", " '@CNN @NBC.',\n", " '@CNN!',\n", " '@CNN, FAKE,',\n", " '@CNN.',\n", " '@DRUDGE_REPORT:',\n", " '@DRUDGE_REPORT: +37,000...',\n", " '@DRUDGE_REPORT: RECORD 155,965,000 EMPLOYED',\n", " '@FEMA',\n", " '@FEMA,',\n", " '@FLOTUS',\n", " '@FLOTUS @JBA_NAFW.',\n", " '@FLOTUS THANK',\n", " '@FLOTUS THANK GREAT WORK!',\n", " '@FLOTUS 🇺🇸🇺🇸🇺🇸',\n", " '@FLOTUS!',\n", " '@FLOTUS:',\n", " '@FLOTUS: @JBA_NAFW',\n", " '@GOP #MAGA 🇺🇸🇺🇸🇺🇸',\n", " '@GOP:',\n", " '@GOP: .@POTUS:',\n", " '@JBA_NAFW HISTORIC CUTS',\n", " '@JPN_PMO',\n", " '@LOUDOBBS TRUE!',\n", " '@NASA',\n", " '@NASCAR',\n", " '@NFIB:',\n", " '@NRA.',\n", " '@NVGOP! #MAGA',\n", " '@POT',\n", " '@POTUS',\n", " '@POTUS #MAGA',\n", " '@POTUS 863,000',\n", " '@POTUS @FLOTUS @MELANIATRUMP',\n", " '@POTUS @FLOTUS @USCG',\n", " '@POTUS @GES2017.',\n", " '@POTUS POTUS',\n", " '@POTUS THANK',\n", " '@POTUS U.S. #DPRK',\n", " \"@POTUS'\",\n", " \"@POTUS' #MAGA\",\n", " '@POTUS.',\n", " '@TBN.',\n", " '@UN...',\n", " '@USCG 24/7/365. THANK',\n", " '@USCG,',\n", " '@USCIS U.S.',\n", " '@USUN GREAT #USA🇺🇸',\n", " '@VFWHQ',\n", " '@VP:',\n", " '@VP: .@POTUS',\n", " '@VP: @POTUS',\n", " '@VP: TRUMP CUTS,',\n", " '@VSPPIO',\n", " '@WEF!',\n", " '@WSJ @CFPB',\n", " '@WWE',\n", " 'A.G.',\n", " 'A.G. 33,000',\n", " 'A.G. FISA',\n", " 'A.G.,',\n", " 'A.P.',\n", " 'ABC, NBC,',\n", " 'ADDITIONAL',\n", " 'AGENDA,',\n", " 'ALL TIME',\n", " 'ALL TIME RECORD OPTIMISM!',\n", " 'ALWAYS',\n", " 'AMERICA AMERICA DETERMINED',\n", " \"AMERICA DON'T WORSHIP GOVERNMENT WORSHIP\",\n", " 'AMERICA FIRST!',\n", " 'AMERICA NATION',\n", " 'AMERICA OPEN BUSINESS!',\n", " 'AMERICA!',\n", " 'AMERICA, AMERICAN AMERICA',\n", " 'AMERICAN CITIZENS FIRST!',\n", " 'AMERICAN PEOPLE FIRST!',\n", " 'AMERICAN SAILOR BEST',\n", " 'AMERICAN VICTIMS ILLEGAL IMMIGRATION.',\n", " 'AMERICANS FIRST.',\n", " 'AMVETS',\n", " 'ANARCHY',\n", " 'ANGER UNITY $12,000,000?),....',\n", " 'ASAP EVER',\n", " 'ASAP.',\n", " 'AUSTIN BOMBING SUSPECT DEAD.',\n", " 'AWOL',\n", " 'BACK! BILLIONS',\n", " 'BADLY DACA',\n", " 'BAILOUTS BAILOUTS',\n", " 'BASE',\n", " 'BEAT',\n", " 'BEFORE',\n", " 'BENCH',\n", " 'BEST MAKE AMERICA GREAT AGAIN',\n", " 'BETTER',\n", " 'BEWARE!',\n", " 'BILLION',\n", " 'BILLION DOLLARS',\n", " 'BILLION U.S.A., BILL',\n", " 'BOOK MUST READ!',\n", " 'BOOM!',\n", " 'BOOM!!',\n", " 'BOOMING',\n", " 'BOOMING,',\n", " 'BORDER WALL',\n", " 'BOYCOTT YOU! #NFL #MAGA',\n", " 'BRAVE ONCE MORE',\n", " 'BREAKING:',\n", " 'BUILD WALL,',\n", " 'BUST.',\n", " 'CFPB,',\n", " 'CFPB.',\n", " 'CHAIN MIGRATION',\n", " 'CHANGE LAWS!',\n", " 'CHAOS',\n", " 'CHEMICAL',\n", " 'CHIP',\n", " 'CIA!',\n", " 'CIA,',\n", " 'CIA.',\n", " 'CLASSIFIED',\n", " 'CNN,',\n", " 'CNN, ABC, CBS?',\n", " 'CNN.',\n", " 'CNN’S CNN, MOST TRUSTED NAME NEWS.',\n", " 'CNN” CNN.',\n", " 'COLDEST',\n", " 'COLLUSION',\n", " 'COLLUSION OBSTRUCTION.',\n", " 'COLLUSION!',\n", " 'COMMANDER IN CHIEF’S TROPHY!',\n", " 'CONGRATULATIONS U.S. GOLD!',\n", " 'CONGRATULATIONS!',\n", " 'COSTLY',\n", " 'COUNTRY, COMMUNITIES, GREAT AMERICAN WORKERS!',\n", " 'CPAC',\n", " 'CPAC STRAW POLL RESULTS: APPROVE PRESIDENT TRUMP DOING',\n", " 'CRAZY!',\n", " 'CUTS',\n", " 'CUTS JOBS ACT!',\n", " 'CUTS JOBS ACT:',\n", " 'CUTS REFORM!',\n", " 'CUTS U.S.',\n", " 'CUTS,',\n", " 'CUTS, MILLION',\n", " 'D.C.',\n", " 'D.C. @POTUS',\n", " 'D.C. THAT GREAT STATE',\n", " 'D.C.,',\n", " 'DACA',\n", " 'DACA DACA',\n", " 'DACA STRONG WALL',\n", " 'DACA WALL',\n", " 'DACA!',\n", " 'DACA! DACA',\n", " 'DACA)',\n", " 'DACA,',\n", " 'DACA.',\n", " 'DACA.”',\n", " 'DACA: DACA',\n", " 'DEAD',\n", " 'DEATH PENALTY!',\n", " 'DELIVER',\n", " 'DEMS!',\n", " 'DHS, @CBP,',\n", " 'DIGENOVA, U.S.',\n", " 'DNC.',\n", " 'DOJ,',\n", " 'DON’T HAVE STEEL, DON’T HAVE COUNTRY!',\n", " 'DOW, NASDAQ #MAGA',\n", " 'DOWN',\n", " 'DOWN.',\n", " 'DREAMERS INNOVATORS',\n", " 'E.U. U.S.',\n", " 'ENFORCE PROTECT SUPPORT #LESM',\n", " 'EPA.',\n", " 'ESPN',\n", " 'ESPN RECORD',\n", " 'EVER @POTUS',\n", " 'EVER WSJ.',\n", " 'EVER, S.C.:',\n", " 'EVERY SINGLE AFTER,',\n", " 'EVERYONE',\n", " 'EVIDENCE COLLUSION....I',\n", " 'F 35 U.S.',\n", " 'FACE',\n", " 'FAIR FAMILIES STAY AMERICA, GROW AMERI',\n", " 'FAIR STAY GROW',\n", " 'FAIR TRADE!',\n", " 'FAKE',\n", " 'FAKE NEWS',\n", " 'FAKE NEWS!',\n", " 'FAKE!',\n", " 'FANTASTIC USA!',\n", " 'FAST',\n", " 'FBI)',\n", " 'FBI,',\n", " 'FBI.',\n", " 'FEMA',\n", " 'FEMA GREAT',\n", " 'FEMA,',\n", " 'FEMA, P.R.',\n", " 'FEMA.',\n", " 'FIFTH 2017! #DOW24K #MAGA',\n", " 'FILES',\n", " 'FINDINGS:',\n", " 'FIRED,',\n", " 'FISA',\n", " 'FLAG GREAT COUNTRY!',\n", " 'FLORIDA',\n", " 'FLORIDA EVERY SINGLE AFTER,',\n", " 'FLOTUS POTUS HEROES',\n", " 'FOUNDERS FREEDOM FREEDOM GIFT GOD.',\n", " 'FREEDOM',\n", " 'FRONT PAGE',\n", " 'FUTURE',\n", " 'GOAL U.S.',\n", " 'GOOD',\n", " 'GOODLATTE',\n", " 'GOP”',\n", " 'GPS,',\n", " 'GRATEFUL NATION, THANK (HEROES)',\n", " 'GREAT',\n", " 'GREAT (3.0',\n", " 'GREAT 100%! \"ISIS CJTF–OIR',\n", " 'GREAT AMERICAN FLAG.',\n", " 'GREAT BLESS TEXAS BLESS USA🇺🇸',\n", " 'GREAT CUTS!',\n", " 'GREAT EVENING',\n", " 'GREAT HONOR BRAVE HEROES @USMC THANK',\n", " 'GREAT MILITARY.',\n", " 'GREAT NATION! #USA🇺🇸',\n", " 'GREAT NYPD,',\n", " 'GREAT U.S.',\n", " 'GREAT U.S. FEMA',\n", " 'GREAT VETERANS',\n", " 'GREAT VETS,',\n", " 'GREAT VETS.',\n", " 'GREAT!',\n", " 'GREAT. WINNING AGAIN!',\n", " 'GUILT SOMETHING!',\n", " 'H.R. 267,',\n", " 'HANDED',\n", " 'HAPPY 100TH BIRTHDAY',\n", " 'HAPPY BIRTHDAY',\n", " 'HAPPY BIRTHDAY @USCG!',\n", " 'HAPPY EASTER!',\n", " 'HAPPY THANKSGIVING!',\n", " 'HAPPY THANKSGIVING, EVER,',\n", " 'HAPPY YEAR! MAKING AMERICA GREAT AGAIN,',\n", " 'HATE',\n", " 'HAVE GREAT LIFE!',\n", " 'HEARS YOUR VOICE YOUR BACK.',\n", " 'HERO',\n", " 'HEROES',\n", " 'HEROES.',\n", " 'HISTORIC',\n", " 'HISTORIC $15,000,000,000',\n", " 'HISTORIC CUTS',\n", " 'HISTORIC RELIEF',\n", " 'HISTORY EVER',\n", " 'HISTORY.',\n", " 'HOME.',\n", " 'HONOR MEDAL VALOR',\n", " 'HOUSE INTELLIGENCE COMMITTEE HAS, AFTER MONTH LONG IN DEPTH INVESTIGATION, FOUND EVIDENCE COLLUSION',\n", " 'HOUSE REPUBLICANS SHOULD PASS STRONG FAIR IMMIGRATION BILL, KNOWN GOODLATTE THEIR AFTERNOON VOTE',\n", " 'I.T.',\n", " 'ICE,',\n", " 'ICE.',\n", " 'IDEA.',\n", " 'IMMEDIATELY',\n", " 'IMPROVE INCREASE LOWER COSTS HEALTHCARE!',\n", " 'INCREDIBLE U.S. 🇺🇸🇰🇷',\n", " 'INSANE IMMIGRATION LAWS NOW!',\n", " 'INTELLIGENCE LEAK A.G.',\n", " 'ISIS',\n", " 'ISIS .....',\n", " 'ISIS ISIS 26,000 13,200',\n", " 'ISIS SHOULD DEATH PENALTY!',\n", " 'ISIS,',\n", " 'JINPING',\n", " 'JINPING JONG',\n", " 'JOBS!',\n", " 'JOBS! E.U. U.S. STOP!',\n", " 'JOBS, JOBS, JOBS',\n", " 'JOBS, JOBS, JOBS!',\n", " 'JOBS, JOBS, JOBS! #MAGA',\n", " 'JUST OUT: 3.9% WITCH HUNT!',\n", " 'KKK,',\n", " 'KOREAN END! GREAT',\n", " 'KORUS!',\n", " 'L.G.',\n", " 'LAST',\n", " 'LAYER',\n", " 'LEAKER LIAR.',\n", " 'LEAKS A.G.',\n", " 'LEAKS NEWS',\n", " 'LEFT',\n", " 'LIED! LIED! LIED!',\n", " 'LIVE',\n", " 'LIVE #MAGA',\n", " 'LIVE #USA🇺🇸',\n", " 'LOST',\n", " 'LOVE',\n", " 'LOVE ALL!',\n", " 'LOVE VEGAS!',\n", " 'LOWER RIPOFF DRUG PRICES!',\n", " 'LOWEST RATE EVER RECORDED!',\n", " 'MAGA',\n", " 'MAGA!',\n", " 'MAGA.',\n", " 'MAGNIFICENT',\n", " 'MAKE AMERICA',\n", " 'MAKE AMERICA GREAT AGAIN',\n", " 'MAKE AMERICA GREAT AGAIN #MAGA🇺🇸',\n", " 'MAKE AMERICA GREAT AGAIN RALLY!',\n", " 'MAKE AMERICA GREAT AGAIN!',\n", " 'MAKE CHANGE!',\n", " 'MAKING AMERICA GREAT AGAIN!',\n", " 'MAKING AMERICA SAFE GREAT AGAIN! #MAGA',\n", " 'MANY TIMES.”',\n", " 'MANY TIMES”',\n", " 'MASSIVE',\n", " 'MASSIVE WALL!',\n", " 'MASTERS',\n", " 'MATCH BLESSINGS',\n", " 'MEETING,',\n", " 'MERIT BASED',\n", " 'MERRY CHRISTMAS!!',\n", " 'MERRY CHRISTMAS!!!',\n", " 'MILLION',\n", " 'MISSION',\n", " 'MORE',\n", " 'MOST DISHONEST CORRUPT MEDIA AWARDS YEAR 5:00',\n", " 'MS 13',\n", " 'MS 13 ASAP!',\n", " 'MS 13 GANGS ICE!',\n", " 'MS 13 MAKE AMERICA SAFE AGAIN!',\n", " 'MS 13!',\n", " 'MS 13.',\n", " 'MSNBC',\n", " 'MUCH',\n", " 'MUCH CNN, U.S.,',\n", " 'MUST MAKE AMERICA GREAT AGAIN!',\n", " 'N.J.,',\n", " 'N.K.',\n", " 'N.Y.',\n", " 'NAFTA',\n", " 'NAFTA,',\n", " 'NASA,',\n", " 'NASCAR',\n", " 'NASCAR 500.',\n", " 'NATION',\n", " 'NATION, GREAT AMERICAN WORKERS!',\n", " 'NATIONAL PUBLIC HEALTH EMERGENCY',\n", " 'NATO',\n", " 'NATO MORE, LESS.',\n", " 'NATO!',\n", " 'NATO,',\n", " 'NATO, MUCH GDP.',\n", " 'NATO.',\n", " 'NATO. U.S.',\n", " 'NCAA',\n", " 'NEVER',\n", " 'NEVER HEROES',\n", " 'NEVER NOW.',\n", " 'NEVER U.S.',\n", " 'NEVER, EVER THREATEN UNITED STATES AGAIN WILL SUFFER CONSEQUENCES LIKE',\n", " 'NEWS',\n", " 'NEWS ALERT:',\n", " 'NEWS ALERT: U.S.',\n", " 'NEWS EXCLUSIVE:',\n", " 'NEWS MANY',\n", " 'NFL!',\n", " 'NORTHCOM SOUTHCOM.',\n", " 'NOT!',\n", " 'NOTHING',\n", " 'NOTHING,',\n", " 'NOW!',\n", " 'NOW! JOKE!',\n", " 'NRA!',\n", " 'NRA,',\n", " 'NYC, U.S.A.!',\n", " 'NYC.',\n", " 'OPEC',\n", " 'OPEN BUSINESS U.S.',\n", " 'OPIOID CRISIS:',\n", " 'OTHER',\n", " 'OWNED',\n", " 'P.M.',\n", " 'P.M.,',\n", " 'P.R.',\n", " 'PARDON',\n", " 'PATRIOT ☑️LOVE',\n", " 'PEOPLE',\n", " 'PEOPLE BELIEVE MAJOR NATIONAL NEWS ORGS FABRICATE STORIES ABOUT FAKE NEWS,',\n", " 'POLITICO',\n", " \"POTUS' 9/29/17\",\n", " \"POTUS' ☑️NASDAQ\",\n", " 'PROBABLY',\n", " 'PROMISES KEPT!',\n", " 'PROSPERITY, OPPORTUNITY, DOMINANCE,',\n", " 'PROTECT COMRADES,',\n", " 'PROUD',\n", " 'PROUD FARMING LEGACY.',\n", " 'PROUDLY',\n", " 'READ:',\n", " 'REAL PRIDE COUNTRY #USA🇺🇸',\n", " 'REALLY DON’T CARE,',\n", " 'REBUILD',\n", " 'RECORD HIGH 500!',\n", " 'RECORD HIGHS!',\n", " 'REFUS',\n", " 'REGISTER. P.O.',\n", " 'RELATED',\n", " 'REMEMBER PEARL',\n", " 'REPORT BOMBSHELL:',\n", " 'REPUBLICAN LEADERSHIP, WINNING AGAIN RESPECTED',\n", " 'REQUESTED',\n", " 'RESIST.',\n", " 'RESPECT',\n", " 'REST PEACE BILLY GRAHAM!',\n", " 'RESTORE AMERICAN PROSPERITY RECLAIM AMERICA’S DESTINY.',\n", " 'RICK SACCONE,',\n", " 'RIGGED H....',\n", " 'RIGHT DODD FRANK.',\n", " 'RIGHT TIME. TOGETHER',\n", " 'RISES POINTS YEAR FIRST TIME EVER MAKE AMERICA GREAT AGAIN!',\n", " 'RUSH LIMBAUGH',\n", " 'S.C., MS 13',\n", " 'SAFE SAFE',\n", " 'SAFE!',\n", " 'SECOND AMENDMENT WILL NEVER REPEALED!',\n", " 'SECRETLY',\n", " 'SECURITY BASED! AMERICA SAFE',\n", " 'SHUTDOWN',\n", " 'SICK!',\n", " 'SNL,',\n", " 'SOVEREIGN INDEPENDENT',\n", " 'SPECTACULAR!',\n", " 'SPIRIT HEREOS',\n", " 'SPYGATE',\n", " 'SPYING',\n", " 'STAND',\n", " 'STAND COUNTRY',\n", " 'STAR',\n", " 'STEEL BACK VERY',\n", " 'STOP',\n", " 'STRONGER',\n", " 'SUPREME COURT UPHOLDS TRUMP TRAVEL BAN.',\n", " 'T.V.',\n", " 'TAPE!',\n", " 'TAXES AMERICA FIRST. WORKERS, COMMUNIT',\n", " 'TEAMWORK!',\n", " 'TERMINATE',\n", " 'TEXAS: EVERY SINGLE AFTER,',\n", " 'TEXTS BOMBSHELLS!',\n", " 'THANK',\n", " 'THANK 24/7/365',\n", " 'THANK @NFIB! #NFIB75',\n", " 'THANK ASIA! #USA🇺🇸',\n", " 'THANK GREA',\n", " 'THANK GREAT',\n", " 'THANK HEROES',\n", " 'THANK INSPIRE',\n", " 'THANK MAKING AMERICA GREAT AGAIN!',\n", " 'THANK RULE LAW! LESS SAFE!',\n", " 'THANK U.S.',\n", " 'THANK YOU!',\n", " 'THANKS.\"',\n", " 'THEY LOSE FORTUNE,',\n", " 'TOGETHER, #MAGA🇺🇸',\n", " 'TONIGHT',\n", " 'TONIGHT:',\n", " 'TOTAL BOMB',\n", " 'TOTAL HOAX.',\n", " 'TOTAL WITCH HUNT!!!',\n", " 'TOTALLY UNTRUE',\n", " 'TPP,',\n", " 'TRADE',\n", " 'TREMENDOUS',\n", " 'TRILLION',\n", " 'TRUE.',\n", " 'TRUMP!!',\n", " 'TRUST.',\n", " 'TUNE',\n", " 'TUNE TONIGHT',\n", " 'U.N.',\n", " 'U.S.',\n", " 'U.S. #FEMA GREAT',\n", " 'U.S. #GES2017',\n", " 'U.S. #USA🇺🇸',\n", " 'U.S. $1.50',\n", " 'U.S. 15,000',\n", " 'U.S. 1973.',\n", " 'U.S. 2002. 2004.',\n", " 'U.S. 2004”',\n", " 'U.S. 2015: 2016: 2017: 2018:',\n", " 'U.S. 209,000 4.3%',\n", " 'U.S. 270%',\n", " 'U.S. 9:00 A.M.',\n", " 'U.S. C 130',\n", " 'U.S. CEO,',\n", " 'U.S. CNN!',\n", " 'U.S. COAL PRODUCTION',\n", " 'U.S. CRAZY!',\n", " 'U.S. FEMA',\n", " 'U.S. JOBS',\n", " 'U.S. LOWER PRICES!',\n", " 'U.S. MARKETS FROM ELECTION 11/8/2016}',\n", " 'U.S. STRONG',\n", " 'U.S. U.S. NATO,',\n", " 'U.S. WEAK WEAK',\n", " 'U.S. WIN!',\n", " 'U.S. [@USCG]',\n", " 'U.S.,',\n", " 'U.S.A.',\n", " 'U.S.A. BEST',\n", " 'U.S.A.,',\n", " 'UCLA',\n", " 'UNCONSTITUTIONAL!',\n", " 'UNITY',\n", " 'UNNECESSARY',\n", " 'USA!',\n", " 'USA,',\n", " 'V.A.',\n", " 'VERY',\n", " 'VERY DANGEROUS SOUTHERN BORDER, WALL',\n", " 'VERY EXPENSIVE',\n", " 'VETO 800,000 DACA',\n", " 'VIDEO: @FEMA #USVI',\n", " 'VOTE',\n", " 'W.H.',\n", " 'WALL!',\n", " 'WALL,',\n", " 'WALL.',\n", " 'WATCH LIVE: WBZ)',\n", " 'WEAK',\n", " 'WEEKLY ADDRESS🇺🇸',\n", " 'WELCOME HOME JOSH!',\n", " 'WELCOME HOME!',\n", " 'WHY? #MAGA!',\n", " 'WILL PROTECT SOUTHERN BORDER!',\n", " 'WIN!',\n", " \"WIN. IT'S TIME!\",\n", " 'WINNING!',\n", " 'WITCH HUNT',\n", " 'WITCH HUNT!',\n", " 'WITH FLORIDA! 1 800 342 3557 1 800 FL HELP 1',\n", " 'WOMEN BLUE.',\n", " 'WONDERFUL MAGNIFICENT',\n", " 'WORKERS FAMILIES',\n", " 'WORKING TOGETHER,',\n", " 'WORST',\n", " 'WOUNDED WARRIORS',\n", " 'WOW!',\n", " 'WOW, CANNOT',\n", " 'YEAR LONG.',\n", " 'YOU!',\n", " \"YOU'RE FIRED.\",\n", " 'YOU:',\n", " 'ZERO',\n", " 'ZTE,',\n", " 'ZTE, U.S.',\n", " '“1000',\n", " '“90%',\n", " '“ABC',\n", " '“ANTI TRUMP AGENT CLINTON EMAIL PROBE”',\n", " '“BET',\n", " '“FBI',\n", " '“FBI STOP ELECTI',\n", " '“FX” @FLOTUS THANK GREAT',\n", " '“GOP',\n", " '“ICE $43M',\n", " '“MS 13',\n", " '“OBAMA KEPT THEM CAGES, WRAPPED THEM FOIL”',\n", " '“OUT,”',\n", " '“SHADOW BANNING”',\n", " '“SPIED TRUMP CAMPAIGN WITH EMBEDDED INFORMANT.”',\n", " '“SPYGATE.”',\n", " '“THE RUSSIA HOAX, ILLICIT SCHEME CLEAR HILLARY CLINTON',\n", " '“U.S.',\n", " '“WHAT HAPPENED” KNOW!',\n", " '“WHERE WORLD BARACK OBAMA?”',\n", " '”DEPLORABLES” MASSIVE (304 227)',\n", " '✅”US',\n", " 'ありがとうございます',\n", " 'トランプ大統領による、初の、歴史的な日本訪問は、間違いなく、日米同盟の揺るぎない絆を世界に示すことができました。 本当にありがとう、ドナルド。そして、アジア歴訪の大成功をお祈りしています。',\n", " 'フロリダに到着し、早速トランプ大統領との首脳会談に臨みました。今日は、大半を北朝鮮問題に費やし、非常に重要な点で認識を一致させることができました。 「日本のために最善となるようベストを尽くす」 トランプ大統領は、来る米朝首脳会談で拉致問題を取り上げ',\n", " '🇺🇸🇬🇧',\n", " '🇺🇸🇮🇹',\n", " '🇺🇸🇰🇷#UNGA',\n", " '🇺🇸🇵🇷',\n", " '🇺🇸🇺🇸🇺🇸',\n", " '🔥@TPUSA']" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sorted(trump_yelling.keys())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Saving and cleanup" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Writing 'all_statuses' (list) to file 'all_statuses.txt'.\n", "Writing 'text_and_date' (list) to file 'text_and_date.txt'.\n", "Writing 'trimmed1' (list) to file 'trimmed1.txt'.\n", "Writing 'trimmed2' (list) to file 'trimmed2.txt'.\n", "Writing 'trump_yelling' (Counter) to file 'trump_yelling.txt'.\n" ] } ], "source": [ "%store all_statuses > all_statuses.txt\n", "%store text_and_date > text_and_date.txt\n", "%store trimmed1 > trimmed1.txt\n", "%store trimmed2 > trimmed2.txt\n", "%store trump_yelling > trump_yelling.txt" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%store -r" ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }