{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.7" }, "colab": { "name": "Live tweeting.ipynb", "provenance": [], "collapsed_sections": [], "toc_visible": true, "include_colab_link": true } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "code", "metadata": { "id": "3lyBiqUTGq-Y", "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "outputId": "d43b0f44-2a44-4ffc-d703-543739b340e8" }, "source": [ "#@title @Author and building @date\n", "!pip install watermark\n", "%load_ext watermark\n", "from IPython.display import clear_output;clear_output()\n", "%watermark -a \"Romell D.Z.\" -u -d -p tweepy,scipy,nltk,gensim,sklearn,networkx,textblob,spacy" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Romell D.Z. \n", "last updated: 2020-10-09 \n", "\n", "tweepy 3.6.0\n", "scipy 1.4.1\n", "nltk 3.2.5\n", "gensim 3.6.0\n", "sklearn 0.0\n", "networkx 2.5\n", "textblob 0.15.3\n", "spacy 2.2.4\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "aQe_NEVKHmlN", "cellView": "form" }, "source": [ "#@title Parameters\n", "KEYWORD='podemos' #@param {type: \"string\"}\n", "#@title \n", "NUMBER_OF_TWEETS = 250 #@param {type: \"slider\", min: 250, max: 5000, step:250}\n", "language = \"es\" #@param [\"es\", \"en\"] {allow-input: false}\n", "GEO_LOCATION_BOUNDING_BOX = [-82.1,-18.4,-68.3,-3.1] #@param {type: \"string\"}" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "_wIAMCKgl2al", "cellView": "form" }, "source": [ "#@title Insert your gist which contains .netrc\n", "%%capture\n", "import os, netrc\n", "gist = '' #@param {type: \"string\"}\n", "!wget https://gist.githubusercontent.com/romellfudi/{gist}/raw/.tweet.netrc -O .netrc\n", "file = \".netrc\" if os.path.isfile(\".netrc\") else None\n", "auth = netrc.netrc(file)\n", "ckey,_,csecret=auth.authenticators('tweet_api')\n", "atoken,_,asecret=auth.authenticators('tweet_secret')" ], "execution_count": 5, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "7HkqhZMUGq-j" }, "source": [ "#@title Load libraries\n", "%%capture\n", "!pip install unidecode pyLDAvis pyprind networkx advertools\n", "!pip install deplacy\n", "if language == 'en':\n", " !python -m spacy download en_core_web_sm\n", "else:\n", " !python -m spacy download es_core_news_sm\n", "!mkdir snapshot/\n", "import os\n", "import tweepy\n", "from tweepy import Stream\n", "from unidecode import unidecode\n", "import re\n", "from tweepy import StreamListener\n", "from pprint import pprint\n", "import pyprind\n", "import pandas as pd\n", "import random\n", "from scipy.stats import beta as beta_distribution\n", "import numpy as np\n", "from textblob import TextBlob\n", "from nltk.corpus import stopwords \n", "from nltk import word_tokenize\n", "from nltk.data import load\n", "from nltk.stem import SnowballStemmer\n", "from nltk.stem.wordnet import WordNetLemmatizer\n", "import string\n", "import gensim\n", "from gensim import corpora\n", "\n", "import pyLDAvis\n", "import pyLDAvis.gensim # don't skip this\n", "\n", "from gensim.models.ldamodel import LdaModel\n", "from string import punctuation\n", "from sklearn.feature_extraction.text import CountVectorizer\n", "\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.svm import LinearSVC\n", "from sklearn.model_selection import GridSearchCV\n", "from textblob import Word\n", "from textblob import TextBlob\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "\n", "from sklearn.manifold import TSNE\n", "import matplotlib.pyplot as plt\n", "import advertools as adv\n", "import networkx as nx\n", "import warnings\n", "warnings.simplefilter('ignore')\n", "import pkg_resources,imp\n", "imp.reload(pkg_resources)\n", "import spacy\n", "from spacy import displacy\n", "if language == 'en':\n", " nlp = spacy.load('en_core_web_sm') \n", "else:\n", " nlp = spacy.load('es_core_news_sm')\n", "nltk.download('punkt')\n", "%matplotlib inline\n", "# from textblob import TextBlob\n", "# TextBlob(\"bonjour esto es una 我 它\").detect_language()" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "NQwcnzYmGq-p" }, "source": [ "# [BoundingBox](https://boundingbox.klokantech.com/) CSV RAW" ] }, { "cell_type": "code", "metadata": { "id": "WdhD7uzcGq-p" }, "source": [ "auth = tweepy.OAuthHandler(ckey, csecret)\n", "auth.set_access_token(atoken, asecret)\n", "api = tweepy.API(auth)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "4YW2y9MKGq-4" }, "source": [ "class FiniteStreamListener(StreamListener):\n", " \n", " def __init__(self, number_of_tweets):\n", " self.number_of_tweets = number_of_tweets\n", " self.tweets = []\n", " self.tweets_dict = []\n", " pbar = pyprind.ProgBar(number_of_tweets)\n", " super(FiniteStreamListener,self).__init__()\n", " \n", " def on_status(self, status):\n", " if len(self.tweets) < self.number_of_tweets:\n", " # if TextBlob(status.text).detect_language() == 'es':\n", " self.tweets_dict.append(status._json)\n", " place = status._json['place']['name'] if(status._json['place']) else \"\"\n", " self.tweets.append({'date':status.created_at,\n", " 'text':status.text,\n", " 'location':place,\n", " 'followers':status._json['user']['followers_count']})\n", " pbar.update()\n", " else:\n", " return False" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "B4Jo4Pw6Gq-7" }, "source": [ "finite_stream_listener = FiniteStreamListener(number_of_tweets=NUMBER_OF_TWEETS)\n", "streaming_api = Stream(auth=auth, listener=finite_stream_listener,timeout=5) #60" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "RVqBos_wGq-_" }, "source": [ "### The streaming API doesn't allow to filter by location AND keyword simultaneously." ] }, { "cell_type": "code", "metadata": { "id": "QK5z4Ou3Gq-_" }, "source": [ "finite_stream_listener.tweets = []\n", "EMOTICONS = \">:] :-) :) :o) :] :3 :c) :> =] 8) =) :} :^) \"\n", "EMOTICONS = EMOTICONS.strip().split(' ')\n", "# streaming_api.filter(track=EMOTICONS,languages=[language],async=True)\n", "# streaming_api.filter(locations=GEO_LOCATION_BOUNDING_BOX,languages=[language],async=True)\n", "streaming_api.filter(locations=GEO_LOCATION_BOUNDING_BOX,track=EMOTICONS, languages=[language],async=True)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "-Kuo8f7eMycB", "colab": { "base_uri": "https://localhost:8080/", "height": 68 }, "outputId": "93576c5d-e218-4cca-e3cc-71bed9c83060" }, "source": [ "print(len(finite_stream_listener.tweets))\n", "# 0% [##############################] 100% | ETA: 00:00:00\n", "# Total time elapsed: 01:05:42 \n", "random.sample([t['text'] for t in finite_stream_listener.tweets],2)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "200\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/plain": [ "['RT @ghwtss__: en fin, orgullosa de que esto pasó en Perú :) https://t.co/vSguM7Znti',\n", " 'RT @sheila_Corzo: Quiero amigos de twitter :) digan hola.\\U0001f97a']" ] }, "metadata": { "tags": [] }, "execution_count": 65 } ] }, { "cell_type": "code", "metadata": { "id": "kVQU0KViGq_N" }, "source": [ "np.save('tweets_dict',finite_stream_listener.tweets_dict)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "HkstNvrtGq_Q" }, "source": [ "def make_lowercase(tweet):\n", " return tweet.lower()\n", "\n", "def remove_diacritics(tweet):\n", " return unidecode(tweet)\n", "\n", "def remove_non_alpha_characters(tweet):\n", " return ''.join(character for character in tweet if character.isalpha() or character == ' ')\n", "\n", "def remove_web_site(tweet):\n", " return re.sub(r'http\\w+', '', tweet)#, flags=re.MULTILINE)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "t-8gwW6SGq_T" }, "source": [ "tweets_df = pd.DataFrame.from_dict(finite_stream_listener.tweets)\n", "tweets_df.rename(columns={'text':'Tweets'},inplace=True)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "CcQmFTC7Gq_V" }, "source": [ "tweets_df['word_count'] = tweets_df['Tweets'].apply(lambda x: len(str(x).split(\" \")))\n", "tweets_df['char_count'] = tweets_df['Tweets'].str.len()\n", "\n", "def avg_word(sentence):\n", " words = sentence.split()\n", " return (sum(len(word) for word in words)/len(words))\n", "\n", "tweets_df['avg_word'] = tweets_df['Tweets'].apply(lambda x: avg_word(x))\n", "tweets_df['hastags'] = tweets_df['Tweets'].apply(lambda x: len([x for x in x.split() if x.startswith('#')]))\n", "tweets_df['numerics'] = tweets_df['Tweets'].apply(lambda x: len([x for x in x.split() if x.isdigit()]))" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "DZHeGLcgGq_a" }, "source": [ "cleaned_tweets = list(tweets_df['Tweets'])\n", "for cleaning_function in \\\n", " [make_lowercase, \n", " remove_diacritics,\n", " remove_non_alpha_characters,\n", " remove_web_site]:\n", " cleaned_tweets = [cleaning_function(tweet) for tweet in cleaned_tweets]" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "jRBvHlfyGq_d", "colab": { "base_uri": "https://localhost:8080/", "height": 68 }, "outputId": "aa29d573-cb53-4341-fdd7-20d23807913d" }, "source": [ "random.sample(cleaned_tweets,3)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "['lovelymink hola remi nos ensenaras a cocinar ',\n", " 'el pedo de acomodar eso es que esta termina siendo la cena ',\n", " 'tejadaguadalup pa mi sos la o ']" ] }, "metadata": { "tags": [] }, "execution_count": 289 } ] }, { "cell_type": "code", "metadata": { "id": "DbdB1ODOkuEO", "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "outputId": "e0ce4822-89b0-4fca-a9f5-58d5747125b0" }, "source": [ "KEYWORD = make_lowercase(remove_diacritics(remove_non_alpha_characters(remove_web_site(KEYWORD))))\n", "KEYWORD" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" }, "text/plain": [ "'podemos'" ] }, "metadata": { "tags": [] }, "execution_count": 108 } ] }, { "cell_type": "code", "metadata": { "id": "zisW-O4bGq_g", "colab": { "base_uri": "https://localhost:8080/", "height": 51 }, "outputId": "1cd26ed3-a5ce-4cbc-a87e-c8a13dfd1f11" }, "source": [ "number_of_occurences = sum('peru' in tweet for tweet in cleaned_tweets)\n", "print('Nuestra palabra clave aparece: {} veces'.format(number_of_occurences))\n", "print('Nuestra palabra clave apareció en: {}% de los tweets'.format(100 * number_of_occurences/NUMBER_OF_TWEETS))" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Nuestra palabra clave aparece: 5 veces\n", "Nuestra palabra clave apareció en: 2.5% de los tweets\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "qo2z0PJ0Gq_j", "colab": { "base_uri": "https://localhost:8080/", "height": 119 }, "outputId": "144fca34-ca4c-4fac-b848-640dcc8566b2" }, "source": [ "pprint([tweet for tweet in cleaned_tweets if KEYWORD in tweet][:number_of_occurences//2+1])" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "['rt romerotroll vamos amigoooos si podemos logra ese numero canciones para '\n", " 'mi ex rt ',\n", " 'rt romerotroll vamos amigoooos si podemos logra ese numero canciones para '\n", " 'mi ex rt ',\n", " 'rt romerotroll vamos amigoooos si podemos logra ese numero canciones para '\n", " 'mi ex rt ']\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "5r_LrvUBGq_5", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "4ea63b9f-ef5d-466a-96f9-bd428927e569" }, "source": [ "indices_of_tweets_containing_keyword=[index for index, tweet in enumerate(cleaned_tweets) if KEYWORD in tweet]\n", "print('index de los 10 tweets:%s'%(indices_of_tweets_containing_keyword))" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "index de los 10 tweets:[0, 2, 20, 22, 24, 30, 41, 65, 86, 116, 121, 130, 139, 148]\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "6TZPwnPhGq__" }, "source": [ "distances_between_indices_of_tweets_containing_keyword = [\n", " second_index - first_index for first_index, second_index in \\\n", " zip(indices_of_tweets_containing_keyword[:-1], indices_of_tweets_containing_keyword[1:])\n", "]" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "PYv77BARGrAG", "colab": { "base_uri": "https://localhost:8080/", "height": 265 }, "outputId": "fb678f3b-9a12-447e-8ecf-506a6fc69714" }, "source": [ "pd.Series(distances_between_indices_of_tweets_containing_keyword).hist()\n", "plt.savefig('snapshot/lima_tweets_hist.png')" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD4CAYAAAD8Zh1EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAARCUlEQVR4nO3dbYxcZ3mH8evGNmB5wQE5GiLHdNMStUJxG/AooQJVsyAqExBppRQlSgNGoEWIqKE1UgwfAkRCDVUNAgUlcpsoCaJZEAmtm0SiEWQb8oGX3dRk47i0BhnVxnVIAg4DbtCSux/2uNqsZndmZ8/u7Dy9ftLI5+WZM/ftZ/zP7Nk5J5GZSJKG34sGXYAkqR4GuiQVwkCXpEIY6JJUCANdkgqxcVAvvG3bthwdHR3Uy6/YL3/5S7Zs2TLoMmpXal9Qbm/2NXxW0tv09PRTmXlup30DC/TR0VGmpqYG9fIrNjk5SavVGnQZtSu1Lyi3N/saPivpLSJ+vNg+T7lIUiEMdEkqhIEuSYUw0CWpEAa6JBXCQJekQnQN9Ih4aUR8NyK+HxGHI+KTHca8JCK+HBFHI+I7ETG6GsVKkhbXyyf054A3Z+YfABcDuyPiDQvGvA/4WWa+Bvgs8Ol6y5QkddM10HNOu1rdVD0W3kT9cuDOavmrwFsiImqrUpLUVfTyP7iIiA3ANPAa4AuZef2C/Y8DuzPzeLX+Q+DSzHxqwbhxYByg0WjsmpiY6KvomROn+3peHXZu3wpAu91mZGRkzV53rXpubIZTZ1647WzPw26t52yt2NfwWUlvY2Nj05nZ7LSvp0v/M/M3wMURcQ7wtYi4KDMfX24hmXkAOADQbDaz30tf9+y7v6/n1eHY1S1g7S9LXque9+6cZf/MC98WZ3sedqVeSm5fw2e1elvWt1wy8+fAQ8DuBbtOADsAImIjsBV4uo4CJUm96eVbLudWn8yJiM3AW4F/XzDsIPCeavkK4Jvp/6xUktZUL6dczgPurM6jvwj4SmbeFxE3AlOZeRC4DfhiRBwFngGuXLWKJUkddQ30zHwMeF2H7TfMW/4f4M/qLU2StBxeKSpJhTDQJakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiEMdEkqhIEuSYUw0CWpEAa6JBXCQJekQhjoklQIA12SCmGgS1IhDHRJKoSBLkmFMNAlqRAGuiQVwkCXpEIY6JJUCANdkgphoEtSIQx0SSqEgS5Jhega6BGxIyIeiognIuJwRFzXYUwrIk5HxKHqccPqlCtJWszGHsbMAnsz89GIeBkwHREPZuYTC8Z9KzPfUX+JkqRedP2EnpknM/PRavkXwBFg+2oXJklansjM3gdHjAIPAxdl5rPztreAe4DjwE+Aj2Tm4Q7PHwfGARqNxq6JiYm+ip45cbqv59Vh5/atALTbbUZGRtbsddeq58ZmOHXmhdvO9jzs1nrO1op9DZ+V9DY2Njadmc1O+3oO9IgYAf4V+FRm3rtg38uB5zOzHRGXAZ/LzAuXOl6z2cypqameXnuh0X339/W8Ohy76e0ATE5O0mq11ux116rnvTtn2T/zwjNxZ3sedms9Z2vFvobPSnqLiEUDvadvuUTEJuY+gX9pYZgDZOazmdmulh8ANkXEtr6qlST1pZdvuQRwG3AkMz+zyJhXVeOIiEuq4z5dZ6GSpKX18i2XNwLXADMRcaja9jHg1QCZeStwBfDBiJgFzgBX5nJOzkuSVqxroGfmI0B0GXMzcHNdRUmSls8rRSWpEAa6JBXCQJekQhjoklQIA12SCmGgS1IhDHRJKoSBLkmFMNAlqRAGuiQVwkCXpEIY6JJUCANdkgphoEtSIQx0SSqEgS5JhTDQJakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiEMdEkqhIEuSYUw0CWpEAa6JBWia6BHxI6IeCginoiIwxFxXYcxERGfj4ijEfFYRLx+dcqVJC1mYw9jZoG9mfloRLwMmI6IBzPziXlj3gZcWD0uBW6p/pQkrZGun9Az82RmPlot/wI4AmxfMOxy4K6c823gnIg4r/ZqJUmLiszsfXDEKPAwcFFmPjtv+33ATZn5SLX+DeD6zJxa8PxxYByg0WjsmpiY6KvomROn+3penRqb4dSZQVdRv0597dy+dTDF1KzdbjMyMjLoMmpnX8NnJb2NjY1NZ2az075eTrkAEBEjwD3Ah+eH+XJk5gHgAECz2cxWq9XPYdiz7/6+nlenvTtn2T/T81/f0OjU17GrW4MppmaTk5P0+55bz+xr+KxWbz19yyUiNjEX5l/KzHs7DDkB7Ji3fn61TZK0Rnr5lksAtwFHMvMziww7CLy7+rbLG4DTmXmyxjolSV30cs7gjcA1wExEHKq2fQx4NUBm3go8AFwGHAV+Bby3/lIlSUvpGujVLzqjy5gEPlRXUZKk5fNKUUkqhIEuSYUw0CWpEAa6JBXCQJekQhjoklQIA12SCmGgS1IhDHRJKoSBLkmFMNAlqRAGuiQVwkCXpEIY6JJUCANdkgphoEtSIQx0SSqEgS5JhTDQJakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiEMdEkqRNdAj4jbI+LJiHh8kf2tiDgdEYeqxw31lylJ6mZjD2PuAG4G7lpizLcy8x21VCRJ6kvXT+iZ+TDwzBrUIklagcjM7oMiRoH7MvOiDvtawD3AceAnwEcy8/AixxkHxgEajcauiYmJvoqeOXG6r+fVqbEZTp0ZdBX169TXzu1bB1NMzdrtNiMjI4Muo3b2NXxW0tvY2Nh0ZjY77asj0F8OPJ+Z7Yi4DPhcZl7Y7ZjNZjOnpqa6vnYno/vu7+t5ddq7c5b9M72csRounfo6dtPbB1RNvSYnJ2m1WoMuo3b2NXxW0ltELBroK/6WS2Y+m5ntavkBYFNEbFvpcSVJy7PiQI+IV0VEVMuXVMd8eqXHlSQtT9dzBhFxN9ACtkXEceDjwCaAzLwVuAL4YETMAmeAK7OX8ziSpFp1DfTMvKrL/puZ+1qjJGmAvFJUkgphoEtSIQx0SSqEgS5JhTDQJakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiEMdEkqhIEuSYUw0CWpEAa6JBXCQJekQhjoklQIA12SCmGgS1IhDHRJKoSBLkmFMNAlqRAGuiQVwkCXpEIY6JJUCANdkgphoEtSIboGekTcHhFPRsTji+yPiPh8RByNiMci4vX1lylJ6qaXT+h3ALuX2P824MLqMQ7csvKyJEnL1TXQM/Nh4JklhlwO3JVzvg2cExHn1VWgJKk3kZndB0WMAvdl5kUd9t0H3JSZj1Tr3wCuz8ypDmPHmfsUT6PR2DUxMdFX0TMnTvf1vDo1NsOpM4Ouon7rqa+d27fWerx2u83IyEhPY9fDe6xX62nO+rHYPC9nvvoxyDm+YOuGvnsbGxubzsxmp30bV1TVMmXmAeAAQLPZzFar1ddx9uy7v8aq+rN35yz7Z9b0r29NrKe+jl3dqvV4k5OT9PqeWw/vsV6tpznrx2LzvJz56scg5/iO3VtWpbc6vuVyAtgxb/38apskaQ3VEegHgXdX33Z5A3A6M0/WcFxJ0jJ0/TktIu4GWsC2iDgOfBzYBJCZtwIPAJcBR4FfAe9drWIlSYvrGuiZeVWX/Ql8qLaKJEl98UpRSSqEgS5JhTDQJakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiEMdEkqhIEuSYUw0CWpEAa6JBXCQJekQhjoklQIA12SCmGgS1IhDHRJKoSBLkmFMNAlqRAGuiQVwkCXpEIY6JJUCANdkgphoEtSIQx0SSpET4EeEbsj4gcRcTQi9nXYvycifhoRh6rH++svVZK0lI3dBkTEBuALwFuB48D3IuJgZj6xYOiXM/PaVahRktSDXj6hXwIczcwfZeavgQng8tUtS5K0XJGZSw+IuALYnZnvr9avAS6d/2k8IvYAfw38FPgP4C8z8786HGscGAdoNBq7JiYm+ip65sTpvp5Xp8ZmOHVm0FXUbz31tXP71lqP1263GRkZ6WnseniP9Wo9zVk/Fpvn5cxXPwY5xxds3dB3b2NjY9OZ2ey0r+splx79M3B3Zj4XER8A7gTevHBQZh4ADgA0m81stVp9vdiefff3X2lN9u6cZf9MXX9968d66uvY1a1ajzc5OUmv77n18B7r1Xqas34sNs/Lma9+DHKO79i9ZVV66+WUywlgx7z186tt/yczn87M56rVvwd21VOeJKlXvQT694ALI+KCiHgxcCVwcP6AiDhv3uo7gSP1lShJ6kXXn9MyczYirgW+DmwAbs/MwxFxIzCVmQeBv4iIdwKzwDPAnlWsWZLUQU8n3jLzAeCBBdtumLf8UeCj9ZYmSVoOrxSVpEIY6JJUCANdkgphoEtSIQx0SSqEgS5JhTDQJakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiEMdEkqhIEuSYUw0CWpEAa6JBXCQJekQhjoklQIA12SCmGgS1IhDHRJKoSBLkmFMNAlqRAGuiQVwkCXpEIY6JJUiJ4CPSJ2R8QPIuJoROzrsP8lEfHlav93ImK07kIlSUvrGugRsQH4AvA24LXAVRHx2gXD3gf8LDNfA3wW+HTdhUqSltbLJ/RLgKOZ+aPM/DUwAVy+YMzlwJ3V8leBt0RE1FemJKmbyMylB0RcAezOzPdX69cAl2bmtfPGPF6NOV6t/7Aa89SCY40D49Xq7wI/qKuRAdgGPNV11PAptS8otzf7Gj4r6e23MvPcTjs29l/P8mXmAeDAWr7maomIqcxsDrqOupXaF5Tbm30Nn9XqrZdTLieAHfPWz6+2dRwTERuBrcDTdRQoSepNL4H+PeDCiLggIl4MXAkcXDDmIPCeavkK4JvZ7VyOJKlWXU+5ZOZsRFwLfB3YANyemYcj4kZgKjMPArcBX4yIo8AzzIV+6Yo4ddRBqX1Bub3Z1/BZld66/lJUkjQcvFJUkgphoEtSIQz0PkTEsYiYiYhDETE16Hr6FRG3R8ST1XUEZ7e9MiIejIj/rP58xSBr7McifX0iIk5Uc3YoIi4bZI39iogdEfFQRDwREYcj4rpq+1DP2xJ9DfW8RcRLI+K7EfH9qq9PVtsvqG6TcrS6bcqLa3k9z6EvX0QcA5oLL5waNhHxR0AbuCszL6q2/Q3wTGbeVN235xWZef0g61yuRfr6BNDOzL8dZG0rFRHnAedl5qMR8TJgGvgTYA9DPG9L9PUuhnjeqivmt2RmOyI2AY8A1wF/BdybmRMRcSvw/cy8ZaWv5yf0/8cy82HmvpU03/zbONzJ3D+qobJIX0XIzJOZ+Wi1/AvgCLCdIZ+3JfoaajmnXa1uqh4JvJm526RAjfNloPcngX+JiOnqdgYlaWTmyWr5v4HGIIup2bUR8Vh1SmaoTkl0Ut3V9HXAdyho3hb0BUM+bxGxISIOAU8CDwI/BH6embPVkOPU9B8vA70/b8rM1zN3B8oPVT/iF6e6OKyUc3K3AL8DXAycBPYPtpyViYgR4B7gw5n57Px9wzxvHfoa+nnLzN9k5sXMXWV/CfB7q/VaBnofMvNE9eeTwNeYm6RSnKrOZ549r/nkgOupRWaeqv5hPQ/8HUM8Z9W52HuAL2XmvdXmoZ+3Tn2VNG+Z+XPgIeAPgXOq26RA59up9MVAX6aI2FL90oaI2AL8MfD40s8aKvNv4/Ae4J8GWEttzoZd5U8Z0jmrfsl2G3AkMz8zb9dQz9tifQ37vEXEuRFxTrW8GXgrc78feIi526RAjfPlt1yWKSJ+m7lP5TB364R/yMxPDbCkvkXE3UCLuVt5ngI+Dvwj8BXg1cCPgXdl5lD9gnGRvlrM/diewDHgA/POOQ+NiHgT8C1gBni+2vwx5s43D+28LdHXVQzxvEXE7zP3S88NzH2A/kpm3ljlyATwSuDfgD/PzOdW/HoGuiSVwVMuklQIA12SCmGgS1IhDHRJKoSBLkmFMNAlqRAGuiQV4n8B3kmM61foqgsAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "tags": [], "needs_background": "light" } } ] }, { "cell_type": "code", "metadata": { "id": "9NE14naYGrAM" }, "source": [ "alpha = 1 + number_of_occurences\n", "beta = 1 + (NUMBER_OF_TWEETS - number_of_occurences)\n", "\n", "x_values = np.linspace(0, 1, 1002)[1:-1]\n", "pdf_y_values = beta_distribution(alpha, beta).pdf(x_values)\n", "cdf_y_values = np.cumsum(pdf_y_values) / np.sum(pdf_y_values)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "pwon85MZGrAS", "colab": { "base_uri": "https://localhost:8080/", "height": 404 }, "outputId": "1d2ce622-bad8-424d-e2a9-ef1aa3dd1fce" }, "source": [ "plt.figure(figsize=(18, 6))\n", "plt.subplot(121)\n", "plt.plot(x_values, pdf_y_values, label=(r'$\\alpha=%.1f,\\ \\beta=%.1f$' % (alpha, beta)))\n", "\n", "plt.xlim(0, 1)\n", "plt.xlabel('Probability of tweet containing keyword')\n", "plt.ylabel('Probability density')\n", "plt.title('Beta Distribution PDF')\n", "plt.legend(loc=1)\n", "\n", "plt.subplot(122)\n", "plt.plot(x_values, cdf_y_values)\n", "plt.xlim(0, 1)\n", "plt.ylim(0, 1.005)\n", "plt.yticks(np.linspace(0, 1, 21))\n", "plt.xlabel('Probability of tweet containing keyword')\n", "plt.ylabel('Cumulative probability')\n", "plt.title('Beta Distribution CDF')\n", "plt.savefig('snapshot/Beta Distribution CDF.png');" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "tags": [], "needs_background": "light" } } ] }, { "cell_type": "code", "metadata": { "id": "17S1ekByGrAX" }, "source": [ "ix = [n for n,b in enumerate((cdf_y_values>.5)&(cdf_y_values<.95)) if b]" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "_TrWznl_GrAb" }, "source": [ "range_ =cdf_y_values[ix]" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "gjTYPbyoGrAh" }, "source": [ "a=np.c_[x_values[ix],cdf_y_values[ix]]\n", "max_ix,_ = np.unravel_index(a.argmax(), a.shape)\n", "min_ix,_ = np.unravel_index(a.argmin(), a.shape)\n", "fifth_percentile, ninety_fifth_percentile = x_values[max_ix],x_values[min_ix]" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "fXPKRlrgGrAr", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "ad9a4588-fc26-4487-8b2c-00822faf73e4" }, "source": [ "print('Con 90% de certeza digamos que la verdadera probabilidad se encuentra entre: {} y {}'.format(\n", " round(fifth_percentile, 10), round(ninety_fifth_percentile, 10)))" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Con 90% de certeza digamos que la verdadera probabilidad se encuentra entre: 0.023976024 y 0.000999001\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "dCK4CLU9GrAw", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "18877da0-99a0-40f5-dcde-e55b29c62e07" }, "source": [ "def compute_total_probability_that_probability_less_than_p(p):\n", " return max(cumulative_prob for cumulative_prob, x_value in zip(cdf_y_values, x_values) if x_value < p)\n", "\n", "print('La probabilidad que la verdadera probabilidad es > .1 es: {}'.format(\n", " 1 - compute_total_probability_that_probability_less_than_p(.1)))" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "La probabilidad que la verdadera probabilidad es > .1 es: 3.328883511244385e-05\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "NQwp8QP1pPpb" }, "source": [ "tweets_has_location_df = tweets_df[tweets_df['location']!='']" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "DQ9ECRsdGrA1", "colab": { "base_uri": "https://localhost:8080/", "height": 297 }, "outputId": "97bce4f1-8b45-4b42-c3a4-ac39c8416293" }, "source": [ "tweets_has_location_df.value_counts(subset=['location'],ascending=True).plot(kind='barh')\n", "plt.title('Most Frequent locations')\n", "plt.xlabel('Cantidad')\n", "plt.tight_layout()\n", "plt.savefig('snapshot/most Frequent locations.png');" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "tags": [], "needs_background": "light" } } ] }, { "cell_type": "code", "metadata": { "id": "7uftv9GkGrBK" }, "source": [ "tweets_df.to_csv('lima_tweets.csv',index=None)\n", "tweets_has_location_df.to_csv('lima_tweets_location.csv',index=None)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "EFeMp4-OGrBM" }, "source": [ "tweets_df = pd.read_csv('lima_tweets.csv')\n", "tweets_has_location_df = pd.read_csv('lima_tweets_location.csv')" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "qwmkja1aGrBP", "colab": { "base_uri": "https://localhost:8080/", "height": 85 }, "outputId": "02bf116c-f439-4ab3-e570-82caf367109d" }, "source": [ "import nltk\n", "nltk.download('stopwords')\n", "nltk.download('wordnet')\n", "stop = stopwords.words('spanish') " ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "[nltk_data] Downloading package stopwords to /root/nltk_data...\n", "[nltk_data] Unzipping corpora/stopwords.zip.\n", "[nltk_data] Downloading package wordnet to /root/nltk_data...\n", "[nltk_data] Unzipping corpora/wordnet.zip.\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "JedRxzniGrBS" }, "source": [ "doc_complete = tweets_df.Tweets.values\n", "exclude = set(string.punctuation) \n", "lemma = WordNetLemmatizer()\n", "def clean(doc):\n", " stop_free = \" \".join([i for i in doc.lower().split() if i not in stop])\n", " punc_free = ''.join(ch for ch in stop_free if ch not in exclude)\n", " normalized = \" \".join(lemma.lemmatize(word) for word in punc_free.split())\n", " return normalized\n", "\n", "doc_clean = [clean(doc).split() for doc in doc_complete] \n", "tweets_df.dropna(inplace=True)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "rtatm1ZEGrBU" }, "source": [ "tweets_df['Tweets_clean'] = pd.Series(doc_clean).apply(lambda x:' '.join(x))\n", "tweets_df['word_count'] = tweets_df['Tweets_clean'].apply(lambda x: len(str(x).split(\" \")))\n", "tweets_df['char_count'] = tweets_df['Tweets_clean'].str.len()" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "HXsnnCa6GrBW", "colab": { "base_uri": "https://localhost:8080/", "height": 261 }, "outputId": "92409e67-eb49-4038-e5fd-dea5d1d8406f" }, "source": [ "def avg_word(sentence):\n", " words = sentence.split()\n", " if len(words)==0:\n", " return 0\n", " return (sum(len(word) for word in words)/len(words))\n", "\n", "tweets_df['avg_word'] = tweets_df['Tweets_clean'].apply(lambda x: avg_word(x))\n", "tweets_df['hastags'] = tweets_df['Tweets'].apply(lambda x: len([x for x in x.split() if x.startswith('#')]))\n", "tweets_df['numerics'] = tweets_df['Tweets'].apply(lambda x: len([x for x in x.split() if x.isdigit()]))\n", "tweets_df.head(3)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateTweetslocationfollowersword_countchar_countavg_wordhastagsnumericsTweets_clean
42020-10-09 04:11:20Poco a poco todo se va acomodandoSanta Anita1702136.00000000va acomodando
112020-10-09 04:11:25Hay gente que han perdido la vergüenza, preten...Jesús Maria517516.42857100gente perdido vergüenza pretender ser candidat...
122020-10-09 04:11:25Éste Viernes de Sankirtana, nos visita Hanumat...Lurigancho401141167.35714300éste viernes sankirtana visita hanumat prana d...
\n", "
" ], "text/plain": [ " date ... Tweets_clean\n", "4 2020-10-09 04:11:20 ... va acomodando\n", "11 2020-10-09 04:11:25 ... gente perdido vergüenza pretender ser candidat...\n", "12 2020-10-09 04:11:25 ... éste viernes sankirtana visita hanumat prana d...\n", "\n", "[3 rows x 10 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 167 } ] }, { "cell_type": "code", "metadata": { "id": "JbKK0bseGrBa", "colab": { "base_uri": "https://localhost:8080/", "height": 297 }, "outputId": "c31bee2b-84fd-46ad-869b-abc785a6d0a3" }, "source": [ "freq = pd.Series(' '.join(tweets_df['Tweets_clean']).split()).value_counts()[:5]\n", "freq.plot(kind='barh')\n", "plt.title('Most Frequent words')\n", "plt.xlabel('Count')\n", "plt.tight_layout()\n", "plt.savefig('snapshot/most Frequent words.png');" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "tags": [], "needs_background": "light" } } ] }, { "cell_type": "code", "metadata": { "id": "xGq7C0LVGrBd", "colab": { "base_uri": "https://localhost:8080/", "height": 68 }, "outputId": "54d2b5cf-e01d-43b3-c7f3-babbd193edd5" }, "source": [ "pprint(tweets_df['Tweets_clean'][:2])" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "4 va acomodando\n", "11 gente perdido vergüenza pretender ser candidat...\n", "Name: Tweets_clean, dtype: object\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "Fr2Zd6ReGrBf" }, "source": [ "dictionary = corpora.Dictionary(tweets_df['Tweets_clean'].apply(lambda x:x.split()))\n", "# Converting list of documents (corpus) into Document Term Matrix using dictionary prepared above.\n", "doc_term_matrix = [dictionary.doc2bow(doc) for doc in doc_clean]" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Cwvv6PmRGrBi", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "91f75786-bc53-461c-f57b-becad4394419" }, "source": [ "pprint(doc_term_matrix[:2])" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "[[], [(86, 1)]]\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "hoxzAYYmGrBk" }, "source": [ "Lda = gensim.models.ldamodel.LdaModel\n", "ldamodel = Lda(doc_term_matrix, num_topics=3, id2word = dictionary, passes=50)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "yK9YfL5XGrBn", "colab": { "base_uri": "https://localhost:8080/", "height": 68 }, "outputId": "8767584f-98a2-40fb-b180-fd5b041097f0" }, "source": [ "from pprint import pprint\n", "pprint(ldamodel.print_topics(num_topics=3, num_words=3))" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "[(0, '0.056*\"va\" + 0.056*\"da\" + 0.035*\"ser\"'),\n", " (1, '0.061*\"gusta\" + 0.028*\"haciendo\" + 0.028*\"hizo\"'),\n", " (2, '0.077*\"buenas\" + 0.056*\"noches\" + 0.056*\"amigo\"')]\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "UpkZb_smGrBq" }, "source": [ "# from gensim.test.utils import datapath\n", "# fname = datapath(\"lda_lima_tweet_model\")\n", "ldamodel.save(\"lda_lima_tweet_model\")" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "we2vTlmGGrBx" }, "source": [ "from gensim.models.ldamodel import LdaModel\n", "ldamodel = LdaModel.load(\"lda_lima_tweet_model\")" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "yfMGAm44GrB0" }, "source": [ "doc_lda = ldamodel[doc_term_matrix]" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "38jMYB9jGrB3", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "133b0f1c-333e-408a-dab0-4bca0108093a" }, "source": [ "print('Perplexity: ', ldamodel.log_perplexity(doc_term_matrix)) # a measure of how good the model is. lower the better" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Perplexity: -5.379126834037385\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "zwVZoVuoGrB7", "colab": { "base_uri": "https://localhost:8080/", "height": 881 }, "outputId": "f8398a5a-203c-40e5-b24b-e137cfe4414a" }, "source": [ "pyLDAvis.enable_notebook()\n", "vis = pyLDAvis.gensim.prepare(ldamodel, doc_term_matrix, dictionary)\n", "vis" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "\n", "\n", "\n", "\n", "
\n", "" ], "text/plain": [ "PreparedData(topic_coordinates= x y topics cluster Freq\n", "topic \n", "1 0.100282 -0.075889 1 1 41.999370\n", "2 -0.124327 -0.038993 2 1 29.245487\n", "0 0.024046 0.114882 3 1 28.755144, topic_info= Term Freq Total Category logprob loglift\n", "119 buenas 4.000000 4.000000 Default 30.0000 30.0000\n", "122 noches 3.000000 3.000000 Default 29.0000 29.0000\n", "1 va 3.000000 3.000000 Default 28.0000 28.0000\n", "54 amigo 3.000000 3.000000 Default 27.0000 27.0000\n", "86 gusta 5.000000 5.000000 Default 26.0000 26.0000\n", ".. ... ... ... ... ... ...\n", "42 dio 0.732745 1.134113 Topic3 -4.2741 0.8095\n", "41 cólera 0.732745 1.134113 Topic3 -4.2741 0.8095\n", "31 muchas 1.802959 2.873987 Topic3 -3.3737 0.7801\n", "3 gente 0.764516 2.263886 Topic3 -4.2316 0.1608\n", "43 hizo 0.735490 3.044914 Topic3 -4.2704 -0.1743\n", "\n", "[127 rows x 6 columns], token_table= Topic Freq Term\n", "term \n", "77 3 0.881755 alexis\n", "54 2 0.879114 amigo\n", "23 1 0.818875 anja\n", "78 3 0.881755 aria\n", "55 2 0.870030 aziraphale\n", "... ... ... ...\n", "123 2 0.870027 🌛\n", "124 2 0.870027 👏👏👏\n", "48 2 0.870018 😍\n", "125 2 0.870027 🙏\n", "76 2 0.870024 🤦🏻‍♂️xd\n", "\n", "[97 rows x 3 columns], R=30, lambda_step=0.01, plot_opts={'xlab': 'PC1', 'ylab': 'PC2'}, topic_order=[2, 3, 1])" ] }, "metadata": { "tags": [] }, "execution_count": 184 } ] }, { "cell_type": "code", "metadata": { "id": "2CZ_1VveGrB-", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "f1def4b8-f9bd-49d1-8410-93388f57f2f4" }, "source": [ "def lemmatization(texts, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV']):\n", " texts_out = []\n", " for sent in texts:\n", " doc_ = nlp(sent)\n", " texts_out.append(list(set([str(c.head) for c in doc_ if c.head.tag_.startswith(tuple(allowed_postags))])))\n", " return texts_out\n", "\n", "lemmatization(tweets_df['Tweets_clean'][:5],['VERB'])" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[['acomodando'], ['pretender'], ['da'], [], ['cuídate']]" ] }, "metadata": { "tags": [] }, "execution_count": 185 } ] }, { "cell_type": "code", "metadata": { "id": "IUKd4S9VGrCA" }, "source": [ "def join_comma(row_list):\n", " if row_list == []:\n", " return np.NaN\n", " else:\n", " return ', '.join(row_list)\n", "\n", "tweets_df['ACTIONS']=pd.Series(lemmatization(tweets_df['Tweets_clean'],['VERB'])).apply(join_comma)\n", "tweets_df['NOUNS']=pd.Series(lemmatization(tweets_df['Tweets_clean'],['NOUN'])).apply(join_comma)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "ctE8O8UkGrCD", "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "outputId": "82fac4e9-c861-48a9-abff-9c28bf801273" }, "source": [ "tweets_df[['Tweets_clean','NOUNS','ACTIONS']].head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Tweets_cleanNOUNSACTIONS
4va acomodandodesgraciacuídate
11gente perdido vergüenza pretender ser candidat...NaNNaN
12éste viernes sankirtana visita hanumat prana d...piqueos, basuracomer
14untkcqr emilia14614784 luvwaifus cuídate anja ...NaNhagan
33ohohuh1 cuídate mucho desgracia todoNaNNaN
\n", "
" ], "text/plain": [ " Tweets_clean NOUNS ACTIONS\n", "4 va acomodando desgracia cuídate\n", "11 gente perdido vergüenza pretender ser candidat... NaN NaN\n", "12 éste viernes sankirtana visita hanumat prana d... piqueos, basura comer\n", "14 untkcqr emilia14614784 luvwaifus cuídate anja ... NaN hagan\n", "33 ohohuh1 cuídate mucho desgracia todo NaN NaN" ] }, "metadata": { "tags": [] }, "execution_count": 187 } ] }, { "cell_type": "code", "metadata": { "id": "VOUx5UBjGrCF", "colab": { "base_uri": "https://localhost:8080/", "height": 425 }, "outputId": "7f711ad6-2cb6-48b6-ef85-4793b7796bfa" }, "source": [ "tweets_df.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateTweetslocationfollowersword_countchar_countavg_wordhastagsnumericsTweets_cleanACTIONSNOUNS
42020-10-09 04:11:20Poco a poco todo se va acomodandoSanta Anita1702136.00000000va acomodandocuídatedesgracia
112020-10-09 04:11:25Hay gente que han perdido la vergüenza, preten...Jesús Maria517516.42857100gente perdido vergüenza pretender ser candidat...NaNNaN
122020-10-09 04:11:25Éste Viernes de Sankirtana, nos visita Hanumat...Lurigancho401141167.35714300éste viernes sankirtana visita hanumat prana d...comerpiqueos, basura
142020-10-09 04:11:27@UnTkCqr @Emilia14614784 @LuvWaifus Cuídate mu...Villa el Salvador371131077.30769200untkcqr emilia14614784 luvwaifus cuídate anja ...haganNaN
332020-10-09 04:11:43@ohohuh1 :( cuídate mucho.\\n\\nQué desgracia, t...Magdalena del Mar1965366.40000000ohohuh1 cuídate mucho desgracia todoNaNNaN
\n", "
" ], "text/plain": [ " date ... NOUNS\n", "4 2020-10-09 04:11:20 ... desgracia\n", "11 2020-10-09 04:11:25 ... NaN\n", "12 2020-10-09 04:11:25 ... piqueos, basura\n", "14 2020-10-09 04:11:27 ... NaN\n", "33 2020-10-09 04:11:43 ... NaN\n", "\n", "[5 rows x 12 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 188 } ] }, { "cell_type": "code", "metadata": { "id": "FlBB6cxMGrCH" }, "source": [ "tweets_df.to_csv('tweets_solutions.csv',index=None)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "VrTbtR9EGrCL" }, "source": [ "tweets_df = pd.read_csv('tweets_solutions.csv')\n", "tweets_df.dropna(inplace=True)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "h5_y9mqtGrCP", "colab": { "base_uri": "https://localhost:8080/", "height": 68 }, "outputId": "ea701c0c-977c-435a-c860-92c9eb376909" }, "source": [ "tweets_df['Tweets_clean']" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0 va acomodando\n", "2 éste viernes sankirtana visita hanumat prana d...\n", "Name: Tweets_clean, dtype: object" ] }, "metadata": { "tags": [] }, "execution_count": 191 } ] }, { "cell_type": "code", "metadata": { "id": "uXm8kPtqGrCT", "colab": { "base_uri": "https://localhost:8080/", "height": 85 }, "outputId": "fa4e715b-1b75-410b-fb57-3e5c48dafbde" }, "source": [ "tweets_df['Tweets_clean'] = tweets_df['Tweets_clean'].apply(lambda x: \" \".join([Word(word).lemmatize() for word in x.split()]))\n", "print(tweets_df.shape)\n", "tweets_df['Tweets_clean'].head()" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "(2, 12)\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/plain": [ "0 va acomodando\n", "2 éste viernes sankirtana visita hanumat prana d...\n", "Name: Tweets_clean, dtype: object" ] }, "metadata": { "tags": [] }, "execution_count": 192 } ] }, { "cell_type": "code", "metadata": { "id": "SsJLbnTeGrCW" }, "source": [ "# displacy.serve(doc, style=\"dep\")" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "bOSJ2BGSGrCd", "colab": { "base_uri": "https://localhost:8080/", "height": 378 }, "outputId": "4d49e8f1-2c95-4f97-9698-dd90d0f1c1b5" }, "source": [ "doc = nlp(tweets_df['Tweets_clean'].iloc[-1])\n", "from IPython.display import HTML, Image, display\n", "displacy.render(doc, style=\"dep\",jupyter=True,options={'distance':100})" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "\n", "\n", " éste\n", " PRON\n", "\n", "\n", "\n", " viernes\n", " NOUN\n", "\n", "\n", "\n", " sankirtana\n", " ADJ\n", "\n", "\n", "\n", " visita\n", " NOUN\n", "\n", "\n", "\n", " hanumat\n", " PROPN\n", "\n", "\n", "\n", " prana\n", " PROPN\n", "\n", "\n", "\n", " da\n", " VERB\n", "\n", "\n", "\n", " gran\n", " ADJ\n", "\n", "\n", "\n", " entusiasta\n", " ADJ\n", "\n", "\n", "\n", " sankirtanero\n", " PROPN\n", "\n", "\n", "\n", " perú\n", " PROPN\n", "\n", "\n", "\n", " méxico\n", " PROPN\n", "\n", "\n", "\n", " colombi…\n", " PROPN\n", "\n", "\n", "\n", " httpstco4hqv9gupmk\n", " PROPN\n", "\n", "\n", "\n", " \n", " \n", " nsubj\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " obl\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " amod\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " nsubj\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " amod\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " flat\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " amod\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " obj\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " obj\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " obj\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " flat\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " flat\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " flat\n", " \n", " \n", "\n", "" ], "text/plain": [ "" ] }, "metadata": { "tags": [] } } ] }, { "cell_type": "code", "metadata": { "id": "BB6t0TIcGrCg", "colab": { "base_uri": "https://localhost:8080/", "height": 52 }, "outputId": "010a3c75-05c6-45a7-b07e-8a72608bb5b6" }, "source": [ "displacy.render(doc, style=\"ent\",jupyter=True,)" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "
éste viernes sankirtana visita hanumat prana da gran entusiasta sankirtanero perú méxico colombi\n", "\n", " … httpstco4hqv9gupmk\n", " MISC\n", "\n", "
" ], "text/plain": [ "" ] }, "metadata": { "tags": [] } } ] }, { "cell_type": "code", "metadata": { "id": "dsjGrCmMGrCi", "colab": { "base_uri": "https://localhost:8080/", "height": 238 }, "outputId": "c05b27ad-32e6-4f18-95b1-cf3da79665d6" }, "source": [ "TextBlob(tweets_df['Tweets_clean'].iloc[-1]).ngrams(2)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[WordList(['éste', 'viernes']),\n", " WordList(['viernes', 'sankirtana']),\n", " WordList(['sankirtana', 'visita']),\n", " WordList(['visita', 'hanumat']),\n", " WordList(['hanumat', 'prana']),\n", " WordList(['prana', 'da']),\n", " WordList(['da', 'gran']),\n", " WordList(['gran', 'entusiasta']),\n", " WordList(['entusiasta', 'sankirtanero']),\n", " WordList(['sankirtanero', 'perú']),\n", " WordList(['perú', 'méxico']),\n", " WordList(['méxico', 'colombi…']),\n", " WordList(['colombi…', 'httpstco4hqv9gupmk'])]" ] }, "metadata": { "tags": [] }, "execution_count": 205 } ] }, { "cell_type": "code", "metadata": { "id": "b7RrEC-IGrCk", "colab": { "base_uri": "https://localhost:8080/", "height": 376 }, "outputId": "fcfe9870-2515-4f57-d00b-755f68033409" }, "source": [ "tf1 = (tweets_df['Tweets_clean']).apply(lambda x: pd.value_counts(x.split(\" \"))).sum(axis = 0).reset_index()\n", "tf1.columns = ['words','tf']\n", "for i,word in enumerate(tf1['words']):\n", " tf1.loc[i, 'idf'] = np.log(tweets_df.shape[0]/(len(tweets_df[tweets_df['Tweets_clean'].str.contains(word)])))\n", "\n", "tf1['tfidf'] = tf1['tf'] * tf1['idf']\n", "print(tf1.shape)\n", "tf1.head(10)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "(16, 4)\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
wordstfidftfidf
0acomodando1.00.6931470.693147
1va1.00.6931470.693147
2perú1.00.6931470.693147
3gran1.00.6931470.693147
4sankirtanero1.00.6931470.693147
5da1.00.0000000.000000
6méxico1.00.6931470.693147
7hanumat1.00.6931470.693147
8éste1.00.6931470.693147
9colombi…1.00.6931470.693147
\n", "
" ], "text/plain": [ " words tf idf tfidf\n", "0 acomodando 1.0 0.693147 0.693147\n", "1 va 1.0 0.693147 0.693147\n", "2 perú 1.0 0.693147 0.693147\n", "3 gran 1.0 0.693147 0.693147\n", "4 sankirtanero 1.0 0.693147 0.693147\n", "5 da 1.0 0.000000 0.000000\n", "6 méxico 1.0 0.693147 0.693147\n", "7 hanumat 1.0 0.693147 0.693147\n", "8 éste 1.0 0.693147 0.693147\n", "9 colombi… 1.0 0.693147 0.693147" ] }, "metadata": { "tags": [] }, "execution_count": 206 } ] }, { "cell_type": "code", "metadata": { "id": "104JtA-sGrCn" }, "source": [ "stop = set(stopwords.words('spanish'))\n", "stop |= set(['lima','si','ser'])" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "OOXx4IfXGrCo", "colab": { "base_uri": "https://localhost:8080/", "height": 51 }, "outputId": "2224b188-a14e-4713-eae9-06eb25599032" }, "source": [ "tfidf = TfidfVectorizer(max_features=1000, lowercase=True, analyzer='word', stop_words= stop,ngram_range=(1,1))\n", "train_vect = tfidf.fit_transform(tweets_df['Tweets_clean'])\n", "train_vect" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "<2x16 sparse matrix of type ''\n", "\twith 16 stored elements in Compressed Sparse Row format>" ] }, "metadata": { "tags": [] }, "execution_count": 208 } ] }, { "cell_type": "code", "metadata": { "id": "wRUM6oA4GrCt", "colab": { "base_uri": "https://localhost:8080/", "height": 68 }, "outputId": "38b634c0-f8f0-41a3-90e0-d13fee1fd4dd" }, "source": [ "from sklearn.feature_extraction.text import CountVectorizer\n", "bow = CountVectorizer(max_features=1000, lowercase=True, ngram_range=(1,1),analyzer = \"word\")\n", "train_bow = bow.fit_transform(tweets_df['Tweets_clean'])\n", "print(train_bow.shape)\n", "train_bow" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "(2, 16)\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/plain": [ "<2x16 sparse matrix of type ''\n", "\twith 16 stored elements in Compressed Sparse Row format>" ] }, "metadata": { "tags": [] }, "execution_count": 209 } ] }, { "cell_type": "code", "metadata": { "id": "P236la94GrCv" }, "source": [ "from sklearn.metrics.pairwise import linear_kernel\n", "\n", "def find_similar(tfidf_matrix, index, top_n = 5):\n", " cosine_similarities = linear_kernel(tfidf_matrix[index:index+1], tfidf_matrix).flatten()\n", " related_docs_indices = [i for i in cosine_similarities.argsort()[::-1] if i != index]\n", " return [(index, cosine_similarities[index]) for index in related_docs_indices][0:top_n]" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "aaSpWD5oGrCy", "colab": { "base_uri": "https://localhost:8080/", "height": 131 }, "outputId": "f005cd42-0fe5-442b-afec-d7f6fe6db560" }, "source": [ "tweet = tweets_df.sample(1)\n", "tweet" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateTweetslocationfollowersword_countchar_countavg_wordhastagsnumericsTweets_cleanACTIONSNOUNS
22020-10-09 04:11:25Éste Viernes de Sankirtana, nos visita Hanumat...Lurigancho401141167.35714300éste viernes sankirtana visita hanumat prana d...comerpiqueos, basura
\n", "
" ], "text/plain": [ " date ... NOUNS\n", "2 2020-10-09 04:11:25 ... piqueos, basura\n", "\n", "[1 rows x 12 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 211 } ] }, { "cell_type": "code", "metadata": { "id": "l0xeCmaEGrC1", "colab": { "base_uri": "https://localhost:8080/", "height": 54 }, "outputId": "39a9dea1-a252-449a-f316-8b670ba86b05" }, "source": [ "print(tweet['Tweets'].values)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "['Éste Viernes de Sankirtana, nos visita Hanumat Prana Das, un gran y entusiasta sankirtanero. \\nPerú /México /Colombi… https://t.co/4hqv9gupmK']\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "KbwXoco7GrC3" }, "source": [ "tweet.reset_index(drop=True,inplace=True)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "5reTsQtQGrC6", "colab": { "base_uri": "https://localhost:8080/", "height": 111 }, "outputId": "1f03f6a6-3d32-4e87-8173-2c2d03990476" }, "source": [ "pd.options.display.max_colwidth = 120\n", "vals = pd.DataFrame()\n", "for index, score in find_similar(train_vect, tweet.index[0],top_n = 5):\n", " vals = vals.append(tweets_df.iloc[index:index+1,:])\n", " vals.loc[index,'score'] = score\n", " \n", "vals[['Tweets','score']].head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Tweetsscore
2Éste Viernes de Sankirtana, nos visita Hanumat Prana Das, un gran y entusiasta sankirtanero. \\nPerú /México /Colombi...NaN
1NaN0.0
\n", "
" ], "text/plain": [ " Tweets score\n", "2 Éste Viernes de Sankirtana, nos visita Hanumat Prana Das, un gran y entusiasta sankirtanero. \\nPerú /México /Colombi... NaN\n", "1 NaN 0.0" ] }, "metadata": { "tags": [] }, "execution_count": 214 } ] }, { "cell_type": "code", "metadata": { "id": "d2F9cn2eGrC-" }, "source": [ "corpus = nlp('\\n'.join(tweets_df['NOUNS'].dropna()))" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "DcV2upsFGrDD", "colab": { "base_uri": "https://localhost:8080/", "height": 142 }, "outputId": "808101e6-e786-4f86-e24a-66a786d78350" }, "source": [ "visited = {}\n", "nouns = []\n", "for word in corpus:\n", " if word.pos_.startswith('N') and len(word.string) < 15 and len(word.string) > 2:\n", " token = word.string.strip().lower()\n", " if token in visited:\n", " visited[token] += 1\n", " continue\n", " else:\n", " visited[token] = 1\n", " nouns.append(word)\n", "nouns = sorted(nouns, key=lambda w: -visited[w.string.strip().lower()])[:150]\n", "pd.DataFrame([[w.text, visited[w.string.strip().lower()]] for w in nouns], columns=['Noun', 'Freq'])" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NounFreq
0desgracia1
1piqueos1
2basura1
\n", "
" ], "text/plain": [ " Noun Freq\n", "0 desgracia 1\n", "1 piqueos 1\n", "2 basura 1" ] }, "metadata": { "tags": [] }, "execution_count": 216 } ] }, { "cell_type": "code", "metadata": { "id": "IG1DDB3XGrDG", "colab": { "base_uri": "https://localhost:8080/", "height": 296 }, "outputId": "a05546d1-c8eb-4ba5-d184-fce555496cde" }, "source": [ "def plot_with_labels(low_dim_embs, labels, filename='tsne.png'):\n", " assert low_dim_embs.shape[0] >= len(labels), \"More labels than embeddings\"\n", " plt.figure() # in inches\n", " for i, label in enumerate(labels):\n", " x, y = low_dim_embs[i, :]\n", " plt.scatter(x, y, s=2.0)\n", " plt.annotate(label, xy=(x, y), xytext=(5, 2),\n", " textcoords='offset points',ha='right',va='bottom')\n", " plt.tight_layout()\n", " plt.savefig('snapshot/lima_words_TSNE.png')\n", " plt.show()\n", "\n", "# Creating the tsne plot [Warning: will take time]\n", "tsne = TSNE(perplexity=50.0, n_components=2, init='pca', n_iter=10000)\n", "\n", "low_dim_embedding = tsne.fit_transform(np.array([word.vector for word in nouns]))\n", "\n", "# Finally plotting and saving the fig \n", "plot_with_labels(low_dim_embedding, [word.text for word in nouns])" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEXCAYAAAD4LtBgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAVP0lEQVR4nO3de5BW5Z3g8e8PUBOJCZOBVVYJOilEuxu0SQMaYWPKbMTLjlFzIZFSo2IUrcxWJU60tIyxKlXeslbceFlrhvIWwwrjhVxcvMTxVmpoCCJohE6aiOgIUUPSICjw2z/62NMgt9jdvA/d30/VW336Oaff9zlPCV/f8x4gMhNJkkrTr9YTkCRpawyUJKlIBkqSVCQDJUkqkoGSJBXJQEmSitStgYqIKyLiu935nB9WRPzXiJhV63lI0q4UEf8SEXW1nkd3iO78c1ARcQXQlpnXdduTAoMHD87hw4cTEd35tJKkAsybN+9PmTnkAzsys0sP4FJgCfAU8DPgu8Cngf8HzAOeBA6pjv0KsAh4HniiGtsbuAd4EbgPeA5oqva1AT/66Ec/mk8++WT+4Ac/yKampqyvr8+pU6fmpk2bMjNz6dKlecwxx+To0aOzsbExW1pasrW1Nevr6zMzs7W1NSdMmJCNjY3Z2NiYTz/9dErS7qy1tTVHjhyZ3/jGN/KQQw7JU089NdesWZOf+9zncu7cuZmZOX369BwxYkSOHTs2zznnnLzgggsyM/OMM87ImTNndjzXwIEDO7avueaabGpqylGjRuXll1/eMf6jH/0o6+vrs76+Pq+//vrMzGxra8vjjz8+R48enfX19TljxowPdS5Ac26tL1sb3NkH8BnghSoyHwdaqkA9CoyojhkP/LrafgHYv9oeVH39LvB/qu0GYEOnQCXw1c985jOZmfnmm292nNCUKVNy9uzZmZk5bty4vPfeezMz85133sk1a9ZsFqg1a9bkO++8k5mZS5YsyfefT5J2V62trQnkU089lZmZ3/zmN/Paa6/tCNRrr72Ww4YNy5UrV+b69evzs5/97A4DNWfOnI7/+d+4cWOecMIJ+fjjj2dzc3M2NDRkW1tb/vWvf826urqcP39+zpo1K88555yO5/nzn//8oc5lW4Ea0IV3ZQATgfsycy1ARMwGPgJ8FpjZ6ZLcXtXXp4HbIuIe4N5qbALwY4DMXBQRCzs9/0bg397/5rHHHuOaa65h7dq1vPXWW9TX13P00UezYsUKTj75ZAA+8pGPfGCS7733HhdeeCELFiygf//+LFmypIunLUm1N2zYMI466igApkyZwg033NCx77nnnuPoo49myJD2K2df+9rXdvh730MPPcRDDz1EY2MjAG1tbSxdupS2tjZOPvlkBg4cCMApp5zCk08+yaRJk/jOd77D9773PU488UQmTpzYrefXE3fx9QP+nJmHd3ocCpCZ5wGXAcOAeRHx9zt4rnWZuRFg3bp1TJs2jVmzZvHCCy8wdepU1q1bt1MTuv7669l33315/vnnaW5u5t133/3wZydJhdjyc/md/Zx+wIABbNq0CYBNmzZ1/J6YmVxyySUsWLCABQsW0NLSwtlnn73N5zn44IOZP38+o0aN4rLLLuPKK6/8kGeydV0N1BPAlyLioxGxD/A/gLVAa0R8BSDaHVZtfzozn8vMy4FVtIfqaeCr1f46YNTWXuj9GA0ePJi2tjZmzWq/QW+fffbhgAMO4P777wdg/fr1rF27drOfXb16NUOHDqVfv37ceeedbNy4sYunLUm198orr/DMM88AcPfddzNhwoSOfePHj+fxxx/nzTff5L333mPmzJkd+w488EDmzZsHwOzZs3nvvfcAOPbYY5k+fTptbW0ArFixgpUrVzJx4kTuv/9+1q5dy5o1a7jvvvuYOHEir732GnvvvTdTpkzhoosuYv78+d16fl26xJeZ8yPi/9J+08NKYG616zTg5oi4DNgDmFEdc21EjACC9s+pngeWArdHxIvA74DFwOotX2vQoEFMnTqVhoYG9ttvP8aOHdux78477+Rb3/oWl19+OXvssQczZ86kX7//bO+0adM49dRTueOOO5g0aVLH21RJ2p2NHDmSG2+8kbPOOou6ujrOP/98fv7znwMwdOhQrrjiCo488kgGDRrE4Ycf3vFzU6dO5aSTTuKwww7b7PfEL37xi7z00ksceeSRAHzsYx/jrrvuYsyYMZx55pmMGzcOgHPOOYfGxkbmzJnDRRddRL9+/dhjjz24+eabu/X8uvU28w81gYj+wB6ZuS4iPg08AozMzI7rcE1NTdnc3FyzOUpSaZYtW8aJJ57IokWLdur42267jebmZn7yk5/08Mz+dhExLzObthzv6k0S3WFv4LGI2IP2d1bTOsdJktQ31fwd1M7wHZQk9V7begfVZ/4uvnl/fJvT//U55v3x7VpPRZJ6h+W/gTtPaf/aA/pMoH78yBKeWPonfvyIfwZKkrrFv18Fv3+0/WsPKOEzqF3in75w8GZfJUlddPTFm3/tZn4GJUmqqT7/GZQkafdioCRJRTJQkqQiGShJUpEMlCSpSAZKklQkAyVJKpKBkiQVyUBJkopkoCRJRTJQkqQiGShJUpEMlCSpSAZKklSkbglUREyPiJURsajT2Ccj4uGIWFp9/btqPCLihohoiYiFETGmO+YgSepduusd1G3ApC3GLgYezcwRwKPV9wDHASOqx7nAzd00B0lSL9ItgcrMJ4C3thg+Cbi92r4d+FKn8Tuy3bPAoIgY2h3zkCT1Hj35GdS+mfl6tf0fwL7V9v7A8k7HvVqNbSYizo2I5ohoXrVqVQ9OU5JUol1yk0S2/7vyf9O/LZ+Zt2ZmU2Y2DRkypIdmJkkqVU8G6o33L91VX1dW4yuAYZ2OO6AakySpQ08GajZwRrV9BvBAp/HTq7v5jgBWd7oUKEkSAAO640ki4mfA0cDgiHgV+D5wFXBPRJwN/BH4anX4r4DjgRZgLfDN7piDJKl36ZZAZebXt7HrmK0cm8AF3fG6kqTey79JQpJUJAMlSSqSgZIkFclASZKKZKAkSUUyUJKkIhkoSVKRDJQkqUgGSpJUJAMlSSqSgZIkFclASZKKZKAkSUUyUJKkIhkoSVKRDJQkqUgGSpJUJAMlSSqSgZIkFclASZKKZKAkSUUyUJKkIhkoSVKRDJQkqUgGSpJUJAMlSSqSgZIkFclASZKKZKAkSUUyUJKkIhkoSVKRDJQkqUgGSpJUJAMlSSqSgZIkFclASZKKZKAkSUUyUJKkIhkoSVKRDJQkqUgGSpJUJAMlSSqSgZIkFclASZKKZKAkSUWqWaAiYlJEvBwRLRFxca3mIUkqU00CFRH9gRuB44A64OsRUVeLuUiSylSrd1DjgJbM/ENmvgvMAE6q0VwkSQWqVaD2B5Z3+v7VakySJKDgmyQi4tyIaI6I5lWrVtV6OpKkXaxWgVoBDOv0/QHVWIfMvDUzmzKzaciQIbt0cpKk2qtVoOYCIyLioIjYE5gMzK7RXCRJBRpQixfNzA0RcSEwB+gPTM/MxbWYiySpTDUJFEBm/gr4Va1eX5JUtmJvkpAk9W0GSpJUJAMlSSqSgZIkFclASZKKZKAkSUUyUJKkIhkoSVKRDJQkqUgGSpJUJAMlSSqSgZIkFclASZKKZKAkSUUyUJKkIhkoSVKRDJQkqUgGSpJUJAMlSSqSgZIkFclASZKKZKAkSUUyUJKkIhkoSVKRDJQkqUgGSpJUJAMlSSqSgZIkFclASZKKZKAkSUUyUJKkIhkoSVKRDJQkqUgGSpJUJAMlSSqSgZIkFclASZKKZKAkSUUyUJKkIhkoSVKRDJQkqUgGSpJUJAMlSSqSgZIkFclASZKKZKAkSUXqUqAi4isRsTgiNkVE0xb7LomIloh4OSKO7TQ+qRpriYiLu/L6kqTeq6vvoBYBpwBPdB6MiDpgMlAPTAJuioj+EdEfuBE4DqgDvl4dK0nSZgZ05Ycz8yWAiNhy10nAjMxcD7RGRAswrtrXkpl/qH5uRnXsi12ZhySp9+mpz6D2B5Z3+v7Vamxb4x8QEedGRHNENK9ataqHpilJKtUO30FFxCPAflvZdWlmPtD9U2qXmbcCtwI0NTVlT72OJKlMOwxUZn7hQzzvCmBYp+8PqMbYzrgkSR166hLfbGByROwVEQcBI4DfAHOBERFxUETsSfuNFLN7aA6SpN1Yl26SiIiTgf8NDAF+GRELMvPYzFwcEffQfvPDBuCCzNxY/cyFwBygPzA9Mxd36QwkSb1SZJb/8U5TU1M2NzfXehqSpB4QEfMys2nLcf8mCUlSkQyUJKlIBkqSVCQDJUkqkoGSJBXJQEmSimSgJElFMlCSpCIZKElSkQyUJKlIBkqSVCQDJUkqkoGSJBXJQEmSimSgJElFMlCSpCIZKElSkQyUJKlIBkqSVCQDJUkqkoGSJBXJQEmSimSgJElFMlCSpCIZKElSkQyUJKlIBkqSVCQDJUkqkoGSJBXJQEmSimSgJElFMlCSpCIZKElSkQyUJKlIBkqSVCQDJUkqkoGSJBXJQEmSimSgJElFMlCSpCIZKElSkQyUJKlIBkqSVCQDJUkqkoGSJBWpS4GKiGsj4ncRsTAi7ouIQZ32XRIRLRHxckQc22l8UjXWEhEXd+X1JUm9V1ffQT0MNGTmaGAJcAlARNQBk4F6YBJwU0T0j4j+wI3AcUAd8PXqWEmSNtOlQGXmQ5m5ofr2WeCAavskYEZmrs/MVqAFGFc9WjLzD5n5LjCjOlaSpM1052dQZwEPVtv7A8s77Xu1GtvWuCRJmxmwowMi4hFgv63sujQzH6iOuRTYAPy0uyYWEecC5wJ86lOf6q6nlSTtJnYYqMz8wvb2R8SZwInAMZmZ1fAKYFinww6oxtjO+JaveytwK0BTU1Nu7RhJUu/V1bv4JgH/DPxjZq7ttGs2MDki9oqIg4ARwG+AucCIiDgoIvak/UaK2V2ZgySpd9rhO6gd+AmwF/BwRAA8m5nnZebiiLgHeJH2S38XZOZGgIi4EJgD9AemZ+biLs5BktQLxX9elStXU1NTNjc313oakqQeEBHzMrNpy3H/JglJUpEMlCSpSAZKklQkAyVJKpKBkiQVyUBJkopkoCRJRTJQkqQiGShJUpEMlCSpSAZKklQkAyVJKpKBkiQVyUBJkopkoCRJRTJQkqQiGShJUpEMlCSpSAZKklQkAyVJKpKBkiQVyUBJkopkoCRJRTJQkqQiGShpB5YtW0ZDQ0OtpyH1OQZKKkRmsmnTplpPQyqGgZJ2woYNGzjttNM49NBD+fKXv8zatWu58sorGTt2LA0NDZx77rlkJgA33HADdXV1jB49msmTJwNwxRVXcN1113U8X0NDA8uWLWPZsmWMHDmS008/nYaGBpYvX875559PU1MT9fX1fP/736/J+UolMFDSTnj55ZeZNm0aL730Eh//+Me56aabuPDCC5k7dy6LFi3inXfe4Re/+AUAV111Fb/97W9ZuHAht9xyyw6fe+nSpUybNo3FixczfPhwfvjDH9Lc3MzChQt5/PHHWbhwYU+fnlQkAyXthGHDhnHUUUcBMGXKFJ566ikee+wxxo8fz6hRo/j1r3/N4sWLARg9ejSnnXYad911FwMGDNjhcw8fPpwjjjii4/t77rmHMWPG0NjYyOLFi3nxxRd75qSkwhkoaSdExAe+nzZtGrNmzeKFF15g6tSprFu3DoBf/vKXXHDBBcyfP5+xY8eyYcMGBgwYsNnnS+8fCzBw4MCO7dbWVq677joeffRRFi5cyAknnLDZsVJfYqCknfDKK6/wzDPPAHD33XczYcIEAAYPHkxbWxuzZs0CYNOmTSxfvpzPf/7zXH311axevZq2tjYOPPBA5s+fD8D8+fNpbW3d6uv85S9/YeDAgXziE5/gjTfe4MEHH9wFZyeVacfXHyQxcuRIbrzxRs466yzq6uo4//zzefvtt2loaGC//fZj7NixAGzcuJEpU6awevVqMpNvf/vbDBo0iFNPPZU77riD+vp6xo8fz8EHH7zV1znssMNobGzkkEMO2eyyotQXxft3HpWsqakpm5ubaz0NSVIPiIh5mdm05biX+CRJRTJQkqQiGSipMAtWLuC8h89jwcoFtZ6KVFMGSirMLc/fwtOvPc0tz+/4D/lKvZl38UmFOe+w8zb7KvVVBkoqzOH/5XBu+e++e5K8xCdJKpKBkiQVyUBJkopkoCRJRTJQkqQiGShJUpEMlCSpSAZKklSk3eKf24iIVcAfaz2Pv9Fg4E+1nkTBXJ/tc322z/XZtt1xbYZn5pAtB3eLQO2OIqJ5a/++idq5Ptvn+myf67NtvWltvMQnSSqSgZIkFclA9Zxbaz2Bwrk+2+f6bJ/rs229Zm38DEqSVCTfQUmSimSguigiro2I30XEwoi4LyIGddp3SUS0RMTLEXFsp/FJ1VhLRFxcm5nvGhHxlYhYHBGbIqJpi319fn221JfP/X0RMT0iVkbEok5jn4yIhyNiafX176rxiIgbqvVaGBFjajfzXSMihkXEYxHxYvVr65+q8d63RpnpowsP4IvAgGr7auDqarsOeB7YCzgI+D3Qv3r8HvgHYM/qmLpan0cPrs+hwEjg34GmTuOuzwfXqs+e+xbr8N+AMcCiTmPXABdX2xd3+nV2PPAgEMARwHO1nv8uWJ+hwJhqex9gSfXrqdetke+guigzH8rMDdW3zwIHVNsnATMyc31mtgItwLjq0ZKZf8jMd4EZ1bG9Uma+lJkvb2WX6/NBffncO2TmE8BbWwyfBNxebd8OfKnT+B3Z7llgUEQM3TUzrY3MfD0z51fbfwVeAvanF66RgepeZ9H+fyrQ/h/M8k77Xq3GtjXe17g+H9SXz31H9s3M16vt/wD2rbb79JpFxIFAI/AcvXCNBtR6AruDiHgE2G8ruy7NzAeqYy4FNgA/3ZVzK8HOrI/UXTIzI6LP334cER8D/g34n5n5l4jo2Ndb1shA7YTM/ML29kfEmcCJwDFZXfQFVgDDOh12QDXGdsZ3Sztan23oM+vzN9jemvR1b0TE0Mx8vbo8tbIa75NrFhF70B6nn2bmvdVwr1sjL/F1UURMAv4Z+MfMXNtp12xgckTsFREHASOA3wBzgRERcVBE7AlMro7ta1yfD+rL574js4Ezqu0zgAc6jZ9e3al2BLC602WuXina3yr9K/BSZv6vTrt63xrV+i6N3f1B+4f7y4EF1eOWTvsupf2urJeB4zqNH0/7nTe/p/0yWM3PowfX52Tar3mvB94A5rg+212vPnvundbgZ8DrwHvVfztnA38PPAosBR4BPlkdG8CN1Xq9QKc7RXvrA5gAJLCw0+87x/fGNfJvkpAkFclLfJKkIhkoSVKRDJQkqUgGSpJUJAMlSSqSgZIkFclASZKKZKAkSUX6/12LvbyFy0ZCAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "tags": [], "needs_background": "light" } } ] }, { "cell_type": "code", "metadata": { "id": "EvmqBsLMGrDJ", "colab": { "base_uri": "https://localhost:8080/", "height": 97 }, "outputId": "bb9681d9-8ab6-4779-8065-fd41f14051cf" }, "source": [ "tweets_df = pd.read_csv('tweets_solutions.csv')\n", "tweets_df.head(1)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateTweetslocationfollowersword_countchar_countavg_wordhastagsnumericsTweets_cleanACTIONSNOUNS
02020-10-09 04:11:20Poco a poco todo se va acomodandoSanta Anita1702136.000va acomodandocuídatedesgracia
\n", "
" ], "text/plain": [ " date Tweets ... ACTIONS NOUNS\n", "0 2020-10-09 04:11:20 Poco a poco todo se va acomodando ... cuídate desgracia\n", "\n", "[1 rows x 12 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 219 } ] }, { "cell_type": "code", "metadata": { "id": "QsjXRKTGGrDP", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "a842c15b-3558-4ed7-c451-a73f9e3eca4d" }, "source": [ "hashtag_summary = adv.extract_hashtags(tweets_df['Tweets'])\n", "hashtag_summary.keys()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "dict_keys(['hashtags', 'hashtags_flat', 'hashtag_counts', 'hashtag_freq', 'top_hashtags', 'overview'])" ] }, "metadata": { "tags": [] }, "execution_count": 220 } ] }, { "cell_type": "code", "metadata": { "id": "lDnYMAfZGrDR", "colab": { "base_uri": "https://localhost:8080/", "height": 85 }, "outputId": "8715a1a1-29d1-4810-bc50-b9b71d0ba0ff" }, "source": [ "hashtag_summary['overview']" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'hashtags_per_post': 0.0,\n", " 'num_hashtags': 0,\n", " 'num_posts': 20,\n", " 'unique_hashtags': 0}" ] }, "metadata": { "tags": [] }, "execution_count": 221 } ] }, { "cell_type": "code", "metadata": { "id": "gU45Lg8tGrDX", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "c94f7da2-282a-426f-eaca-86dc1da07dc5" }, "source": [ "hashtag_summary['hashtags'][:5]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[[], [], [], [], []]" ] }, "metadata": { "tags": [] }, "execution_count": 224 } ] }, { "cell_type": "code", "metadata": { "id": "jt6G_xdIGrDa", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "f50f66f4-ac95-4dac-96c7-fe52737635d9" }, "source": [ "hashtag_summary['hashtag_counts'][:20]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]" ] }, "metadata": { "tags": [] }, "execution_count": 225 } ] }, { "cell_type": "code", "metadata": { "id": "4IfQjn5SGrDc", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "f9b34c34-60f6-4019-8a31-567af409b67c" }, "source": [ "hashtag_summary['hashtag_freq'][:20]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[(0, 20)]" ] }, "metadata": { "tags": [] }, "execution_count": 226 } ] }, { "cell_type": "code", "metadata": { "id": "0xHt2ZTTGrDf", "colab": { "base_uri": "https://localhost:8080/", "height": 513 }, "outputId": "a4503634-52f5-4c8b-def6-0c4f977c33ae" }, "source": [ "plt.figure(facecolor='#ebebeb', figsize=(11, 8))\n", "plt.bar([x[0] for x in hashtag_summary['hashtag_freq'][:15]],\n", " [x[1] for x in hashtag_summary['hashtag_freq'][:15]])\n", "plt.title('Hashtag frequency')\n", "plt.xlabel('Hashtags per tweet')\n", "plt.ylabel('Number of tweets')\n", "plt.yscale('log')\n", "plt.grid(alpha=0.5)\n", "plt.gca().set_frame_on(False)" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAApsAAAHwCAYAAADpSaRgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deXjM997/8dekESmpQ0sFRYij9rXIhliH2oIUtyVdtLnRVt22dFFVeqpVqrZSPW2diIqqxnKUoRRtbK2UtneLWk5ECHHUElSI+f3h6vyaY2Kidz/fMenzcV3nujJbvm9vnD59ZyZjy87OdgoAAAAwwM/bAwAAAKDoIjYBAABgDLEJAAAAY4hNAAAAGENsAgAAwBhiEwAAAMYQmwD+tMqVK6dDhw555diXLl3SgAEDVL16dT322GNemQEArEBsAritNWnSRJs3b8533eLFi9WlSxejxzV9jFWrVik7O1v79+/X+++/b+w4AOBtxCYAeEFGRoZCQ0Pl7+/v9varV69aPBEAmEFsAvB5M2bMULNmzRQSEqLIyEitXr3adduhQ4fUvXt3Va9eXffff78ef/zxfI/dsmWLmjdvrtDQUI0dO1ZOp1P79+/XmDFj9PXXX6tq1aoKDQ2VJK1bt05t2rRRtWrV1LBhQ02ZMiXf91qyZIkaN26smjVratq0aW7PykrS66+/rmnTpmn58uWqWrWqkpKStHjxYj344IMaN26catasqSlTpujy5ct66aWX1KhRI9WpU0ejR4/WpUuXXN9n9uzZqlu3rurVq6dFixble1lAjx49tHDhQtd9//NM7U8//aTY2Fj99a9/VVhYmJYvX+667amnntLYsWP1X//1XwoJCZHdbtfhw4ddt+/du9f12Dp16mj69Ok6ceKEqlSpotOnT7vut2fPHtWqVUtXrlwp3G8kgCKJ2ATg80JCQrRq1SodOnRIo0eP1rBhw5SVlSVJeu211xQdHa0DBw5oz549N8TmunXrtH79em3evFkrV67Uxo0bVbNmTb3xxht64IEHlJ6eroMHD0qSSpYsqTlz5ujgwYP68MMPtWDBAn366aeSpH379mns2LGaO3euvv/+e507d07Hjx93O29CQoJGjBihmJgYpaena+DAgZKktLQ0Va1aVT/88IP+53/+R5MmTdLBgwf1+eefa+fOnTp+/LimTp0qSdqwYYPefvttffzxx9qxY4e2bNlS6H1duHBBsbGx6tWrl3788UfNnz9fCQkJ2rdvn+s+y5cv15gxY3TgwAFVq1ZNr776qiQpJydHvXv3Vtu2bfXdd99p586datWqlcqXL6+IiAitWLHC9T2WLl2qmJgYFStWrNCzASh6iE0At724uDiFhoa6/peQkJDv9h49eig4OFh+fn7q2bOnqlWrpm+++UaSVKxYMR09elRZWVkKDAxUWFhYvscOHz5cf/nLX3TfffcpMjJS33//fYFzREZGqk6dOvLz81PdunXVs2dPbd26VdL112Da7XaFhYUpICBACQkJstlst/TrDA4O1hNPPCF/f38FBgZq4cKFmjRpksqUKaOgoCCNGDHCdQZyxYoV6tevn2rXrq2SJUtq7NixhT7OunXrVLlyZfXv31/+/v5q0KCBunbtqpUrV7ru8+CDD6pJkyby9/dX7969XXtZt26d7r33Xg0bNkyBgYEKCgpS06ZNJUl9+/bV0qVLJUl5eXn65JNP1KdPn1vaAYCix/2LhQDgNpKYmKjWrVu7Li9evFhJSUmuy0uWLNHcuXOVkZEh6fqZu3//+9+SpPHjx+u1115Tx44dVbp0aQ0dOlQDBgxwPfbee+91fX3nnXfqwoULBc6xa9cuTZo0SXv37lVubq5yc3PVvXt3SVJWVpYqVarkum+JEiVUpkyZW/p1VqxY0fX1qVOndPHiRbVv3951ndPpVF5enut4DRs2dN123333Ffo4R48eVVpamuvlAdL114j+Ngx/u5cSJUq49pKZmamQkBC337dz584aM2aM0tPTdeDAAZUqVUpNmjQp9FwAiiZiE4BPy8jI0MiRI7Vs2TI1a9ZMd9xxh6Kjo+V0OiVJ5cuX1/Tp0yVJ27dvV2xsrMLDw1W9evWbfl93ZyWHDBmiwYMHKzk5WYGBgXrhhRdcr1EsX768Dhw44LrvpUuX9PPPP9/Sr+W3x7znnnt055136ssvv1SFChVuuG/58uWVmZnpunz06NF8t5coUSLf6ztPnjzp+rpSpUqKiIjQxx9/fEvz/frYlJQUt7cFBgaqR48e+vjjj/XTTz/poYceuuXvD6Do4Wl0AD7t4sWLstlsKlu2rCTpww8/1N69e123r1ixQseOHZMklS5dWjabTX5+nv+vr1y5cjp+/Lhyc3Nd1+Xk5Kh06dIKDAxUWlqaPvnkE9dt3bp1k8Ph0M6dO5Wbm6s33njDFby/h5+fnwYOHKhx48YpOztbknT8+HFt3LhR0vWXDiQnJ2vfvn26ePGi67Wcv6pXr55Wr16tixcv6tChQ1q0aJHrto4dO+rgwYP66KOPdOXKFV25ckXffPON9u/f73Gujh076sSJE5o3b54uX76snJwc7dq1y3V7nz59lJycrLVr1/IUOgBJxCYAH3f//fdr6NCh6ty5s+rUqaMff/xRzZs3d92+e/du2e12Va1aVYMGDdLf/va3Ap8G/q2WLVvq/vvvV926dXX//fdLuv4u8tdff10hISGaOnWq6yl0SapVq5YmT56s+Ph41atXTyVLllTZsmUVEBDwu39t48ePV7Vq1dS5c2dVq1ZNvXv3dp09bd++vf77v/9bPXv2VPPmzRUVFZXvsUOGDFFAQIDq1Kmjp556SrGxsa7bgoKCtHTpUqWkpKh+/fqqW7euJk6cqMuXL3ucKSgoSB9//LHWrVununXrqnnz5vryyy9dt7do0UI2m00NGjRQ5cqVf/evHUDRYcvOzv79//QGALiVk5OjGjVqaMeOHapataolxyxXrpx27Njh8SUCpvXs2VO9evXSoEGDvDoHgNsDZzYB4A/icDh08eJFXbhwQRMmTFDt2rVVpUoVb49lqW+++Ubffvutevbs6e1RANwmiE0A+IOsWbNG9evXV/369XXo0CG9++67t/zjj3zZk08+qd69e+uVV15RUFCQt8cBcJvgaXQAAAAYw5lNAAAAGENsAgAAwBhi8xb89oci4zp24h57cY+9uMdebsRO3GMv7rEX926XvRCbt+DatWveHuG2w07cYy/usRf32MuN2Il77MU99uLe7bIXYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGP8vT3Arz799FOtX79e58+f14ABA9SmTRtvjwQAAID/I6NnNocPH67atWurZcuW+a7fsGGDwsLC1KxZM82YMUOS9OCDD2r69OmaOnWqli9fbnIsAAAAWMRobPbr10/Jycn5rsvLy9Ozzz6r5ORkpaamKiUlRfv27XPd/uabb+qxxx4zORYAAAAsYjQ2IyIiVKZMmXzXpaWlKSQkRCEhIQoICFBMTIzWrFkjp9OpiRMnql27dmrYsKHJsQAAAGARy1+zefz4cVWqVMl1uWLFitq1a5feffddbd68WefOndPhw4f1yCOPuH18YmKiEhMTJUkxMTGKjY21YmxJ0rlz5yw7lq9gJ+6xF/fYi3vs5UbsxD324h57cc/qvQQHB7u9/rZ5g1B8fLzi4+M93i8uLk5xcXEWTOReQYv8M2Mn7rEX99iLe+zlRuzEPfbiHntx73bYi+U/+qhChQrKzMx0XT527JgqVKhg9RgAAACwgOWx2bhxYx0+fFjp6enKzc3V8uXL1alTJ6vHAAAAgAWMxmZ8fLw6d+6sAwcOqEGDBkpKSpK/v78mT56sPn36KDIyUt27d1etWrVMjgEAAAAvMfqazfnz57u9vkOHDurQoYPJQwMAAOA2wMdVAgAAwBhiEwAAAMYQmwAAADCG2AQAAIAxxCYAAACMITYBAABgDLEJAAAAY4hNAAAAGENsAgAAwBhiEwAAAMYQmwAAADCG2AQAAIAxPhmbDodDI0eOlMPh8PYoAAAAuAl/bw/we9jtdtntdm+PAQAAAA988swmAAAAfAOxCQAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGN8MjYdDodGjhwph8Ph7VEAAABwE/7eHuD3sNvtstvt3h4DAAAAHvjkmU0AAAD4BmITAAAAxhCbAAAAMIbYBAAAgDHEJgAAAIwhNgEAAGAMsQkAAABjiE0AAAAYQ2wCAADAGGITAAAAxhCbAAAAMIbYBAAAgDHEJgAAAIwhNgEAAGAMsQkAAABjiE0AAAAYQ2wCAADAGGITAAAAxhCbAAAAMIbYBAAAgDHEJgAAAIzxydh0OBwaOXKkHA6Ht0cBAADATfh7e4Dfw263y263e3sMAAAAeOCTZzYBAADgG4hNAAAAGENsAgAAwBhiEwAAAMYQmwAAADCG2AQAAIAxxCYAAACMITYBAABgDLEJAAAAY4hNAAAAGENsAgAAwBhiEwAAAMYQmwAAADCG2AQAAIAxxCYAAACMITYBAABgDLEJAAAAY4hNAAAAGENsAgAAwBhiEwAAAMYQmwAAADCG2AQAAIAxxCYAAACMITYBAABgjE/GpsPh0MiRI+VwOLw9CgAAAG7C39sD/B52u112u93bYwAAAMADnzyzCQAAAN9AbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjPHJ2HQ4HBo5cqQcDoe3RwEAAMBN+Ht7gN/DbrfLbrd7ewwAAAB44JNnNgEAAOAbiE0AAAAYQ2wCAADAGGITAAAAxhCbAAAAMIbYBAAAgDHEJgAAAIwhNgEAAGAMsQkAAABjiE0AAAAYQ2wCAADAGGITAAAAxhCbAAAAMIbYBAAAgDHEJgAAAIwhNgEAAGAMsQkAAABjiE0AAAAYQ2wCAADAGGITAAAAxhCbAAAAMIbYBAAAgDHEJgAAAIwhNgEAAGAMsQkAAABjiE0AAAAYQ2wCAADAGGITAAAAxhCbAAAAMIbYBAAAgDHEJgAAAIwhNgEAAGAMsQkAAABjPMbmihUrlJOTI0maNm2aHnnkEe3Zs8f4YAAAAPB9HmPzzTffVFBQkLZv364tW7aof//+Gjt2rBWzAQAAwMd5jE0/v+t3Wb9+veLi4tSxY0fl5uYaHwwAAAC+z2NsVqhQQaNGjdKKFSvUvn17Xb58WU6n04rZCuRwODRy5Eg5HA6vzgEAAICb8/d0h7///e/auHGjhg0bpr/85S/KysrSSy+9ZMVsBbLb7bLb7V6dAQAAAJ55PLM5evRode3aVaGhoZKk4OBgLV261PhgAAAA8H0eY3Pv3r35Lufl5fFudAAAABRKgU+jv/XWW3rrrbf0yy+/qFq1aq7XaQYEBGjQoEGWDQgAAADfVWBsjhgxQiNGjNCkSZP04osvWjkTAAAAigiPT6O/8MILWrp0qaZNmyZJyszMVFpamvHBAAAA4Ps8xmZCQoK++uorLVu2TJJUsmRJJSQkGB8MAAAAvs9jbO7atUtTpkxR8eLFJUmlS5fWlStXjA8GAAAA3+cxNosVK6a8vDzZbDZJ0qlTp1xfAwAAADfjMTafeOIJPfzww8rOztbf/vY3de3aVSNGjLBiNgAAAPg4j58gFBsbq4YNG2rLli1yOp1KTExUzZo1rZgNAAAAPs7jmU1J+ve//60777xTjz/+uO6++26lp6ebngsAAABFgMfYfOONNzRr1izNmDFDknT16lUNHTrU+GAAAADwfR5jc/Xq1UpKSlKJEiUkXf9s9AsXLhgfDAAAAL7PY2wGBATIZrO53oFOaAIAAKCwPMZmjx49NGrUKJ07d04LFy5UbGysBg4caMVsAAAA8HEe343+5JNPatOmTbrrrrt04MABJSQkKDo62oLRAAAA4Os8xmZSUpLCw8M1YcIEC8YBAABAUeIxNjMzMzV69GhlZGSoQYMGCg8PV1hYmOrXr2/FfAAAAPBhHmMzISFBknTp0iUtXLhQc+bM0bhx43TixAnjwwEAAMC3eYzNadOmaefOnbpw4YLq16+vCRMmKCwszIrZAAAA4OM8xubq1avl7++vDh06KCIiQg888ICKFy9uxWwAAADwcR5/9NHGjRu1bNkyNW7cWJs2bVKrVq3UpUsXK2YDAACAj/N4ZvPHH3/U9u3btXXrVu3evVuVKlXiaXQAAAAUisfYnDRpksLDw/XEE0+ocePGKlasmBVzAQAAoAjw+DR6q1at9PTTT6t58+au0HznnXeMDwYAAADf5zE2P/rooxuuS05ONjIMAAAAipYCn0b/5JNPtGzZMh05ciTfZ6Hn5OSoTJkylgwHAAAA31ZgbDZr1kzly5fX6dOnNXToUNf1QUFBqlu3riXDAQAAwLcVGJuVK1dW5cqVtWbNGivnAQAAQBHi8TWbAAAAwO9FbAIAAMCYAmOzV69ekqSJEydaNgwAAACKlgJfs3nixAnt3LlTa9euVUxMjJxOZ77bGzZsaHw4AAAA+LYCYzMhIUHTpk3TsWPHNH78+Hy32Ww2paSkGB8OAAAAvq3A2Ozevbu6d++uadOmadSoUVbOBAAAgCLC42ejjxo1SmvXrtW2bdskSZGRkerYsaPxwQAAAOD7PL4bfdKkSZo/f75q1qypmjVrav78+XrllVesmA0AAAA+zuOZzc8++0yff/65/Pyud2m/fv3Utm1bjRs3zvhwAAAA8G2F+jmbZ8+edX197tw5Y8MAAACgaPF4ZvOZZ55R27ZtFRkZKafTqe3bt3NWEwAAAIXiMTZ79eqliIgI7d69W5I0fvx4lS9f3vhgAAAA8H0eY1OSgoOD1alTJ9OzAAAAoIjhs9EBAABgDLEJAAAAY24am3l5eQoPD7dqFgAAABQxN43NO+64QzVq1NDRo0etmgcAAABFiMc3CJ05c0ZRUVFq0qSJSpQo4bo+KSnJ6GAAAADwfR5j89lnn7VijlvicDjkcDhkt9tlt9u9PQ4AAAAK4DE2IyMjlZGRoUOHDql169a6ePGirl27ZsVsBSIyAQAAfIPHd6MvXLhQjz32mEaPHi1JOn78uOLi4owPBgAAAN/nMTbfe+89rV69WkFBQZKk0NBQnTp1yvhgAAAA8H0eY7N48eIKCAhwXb569apsNpvRoQAAAFA0eHzNZkREhKZPn65ffvlFmzZt0gcffKCOHTtaMRsAAAB8nMczmy+++KLKli2r2rVr6x//+Ifat2+v559/3orZAAAA4OM8ntn08/NT37591aRJE9lsNtWoUYOn0QEAAFAoHmNz3bp1GjNmjEJCQuR0OnXkyBFNnTpV7du3t2I+AAAA+DCPsfnSSy8pJSVF1atXlyQdPnxY/fv3JzYBAADgkcfXbAYFBblCU5JCQkJcPwYJAAAAuJkCz2z+85//lCQ1atRI/fr1U48ePWSz2bRy5Uo1btzYsgEBAADguwqMTYfD4fq6XLly2rp1qyTpnnvu0aVLl8xPBgAAAJ9XYGzOmjXLyjkAAABQBHl8g1B6err+/ve/KyMjQ1evXnVdn5SUZHQwAAAA+D6Psfnwww+rf//+stvt/HxNAAAA3BKPsVm8eHHFx8dbMQsAAACKGI+xGR8frzfeeEPR0dEKCAhwXd+wYUOjgwEAAMD3eYzNH374QUuXLtUXX3whP7/rP5bTZrMpJSXF+HAAAADwbR5jc+XKlfr666/zndUEAAAACsPjJwjVrl1bZ8+etWIWAAAAFDEez2yePXtWERERatSokYoXL+66nh99BAAAAE88xubYsWOtmAMAAABFkMfYjIyMtGIOAAAAFEEeYzMkJMT1w9yvXLmiK1euqESJEjp8+LDx4QAAAODbPMbmv/71L9fXTqdTa9as0ddff21yJgAAABQRHt+N/ls2m00PPvigPv/8c1PzAAAAoAjxeGbzn//8p+vra9euaffu3QoMDDQ6FAAAAIoGj7HpcDj+/539/VW5cmUlJiYaHQoAAABFg8fYnDVrlhVzAAAAoAgqMDanTp1a4INsNptGjRplZCAAAAAUHQXGZokSJW647uLFi1q0aJFOnz5NbAIAAMCjAmNz2LBhrq9zcnL0zjvvaPHixerZs6eGDh1qyXAAAADwbTd9zebPP/+suXPnatmyZerbt682bNig0qVLWzUbAAAAfFyBsTlhwgStXr1agwYN0ubNmxUUFGTlXAAAACgCCozNt99+W8WLF9ebb76p6dOnu653Op2y2Wx8XCUAAAA8KjA2T548aeUcAAAAKIJu6eMqAQAAgFtBbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGOITQAAABhjy87Odnp7iFvlcDjkcDhkt9tlt9stO+7Tidu07SR9/lvh915jJ26wF/fYi3vs5UbsxD324h57ce+fA6sqODjY22PI39sD/B5WRyYAAAB+H/4ZAAAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGOITQAAABhDbAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCQAAAGNum9j817/+pWeeeUaPPvqot0cBAADAH8RobA4fPly1a9dWy5Yt812/YcMGhYWFqVmzZpoxY4YkKSQkxPU1AAAAigajsdmvXz8lJyfnuy4vL0/PPvuskpOTlZqaqpSUFO3bt8/kGAAAAPASo7EZERGhMmXK5LsuLS1NISEhCgkJUUBAgGJiYrRmzRqTYwAAAMBL/K0+4PHjx1WpUiXX5YoVK2rXrl06ffq0Xn31VX3//fd66623NGLECLePT0xMVGJioiQpJiZGsbGxlswtSaF3OSVds+x4voCduMde3GMv7rGXG7ET99iLe+zFvXPnzll6vODgYLfXWx6bBbn77rs1depUj/eLi4tTXFycBRPd6OD5w9p28rZ5T9Vt4ho7cYu9uMde3GMvN2In7rEX99iLOy+UKlVgAFrJ8t+ZChUqKDMz03X52LFjqlChgtVjAAAAwAKWx2bjxo11+PBhpaenKzc3V8uXL1enTp2sHgMAAAAWMBqb8fHx6ty5sw4cOKAGDRooKSlJ/v7+mjx5svr06aPIyEh1795dtWrVMjkGAAAAvMToazbnz5/v9voOHTqoQ4cOJg8NAACA2wCvpgUAAIAxxCYAAACMITYBAABgDLEJAAAAY4hNAAAAGENsAgAAwBhiEwAAAMYQmwAAADCG2AQAAIAxxCYAAACMITYBAABgDLEJAAAAY3wyNh0Oh0aOHCmHw+HtUQAAAHAT/t4e4Pew2+2y2+3eHgMAAAAe2LKzs53eHsJXJCYmKi4uzttj3FbYiXvsxT324h57uRE7cY+9uMde3Ltd9uKTT6N7S2JiordHuO2wE/fYi3vsxT32ciN24h57cY+9uHe77IXYBAAAgDHEJgAAAIy5Y+zYsRO8PYQvadiwobdHuO2wE/fYi3vsxT32ciN24h57cY+9uHc77IU3CAEAAMAYnkYHAACAMcTmTfz888+KjY1V8+bNFRsbqzNnzhR43/Pnz6tBgwZKSEiwcELvKMxeMjIy1LZtW0VHRysqKkoLFiywflALFWYn3333nTp37qyoqCi1bt1aKSkpXpjUWoX9O9SnTx+Fhoaqf//+Fk9onQ0bNigT9PsAAAyaSURBVCgsLEzNmjXTjBkzbrj98uXLevzxx9WsWTPZ7XYdOXLEC1Naz9Netm7dqrZt2yo4OFgrV670woTe4Wkvc+fOVWRkpFq3bq1evXopIyPDC1Naz9NeFixYoFatWik6OlpdunTRvn37vDCltTzt5FerVq1SuXLltHv3bgunu47YvImZM2eqZcuW2rlzp1q2bKmZM2cWeN/JkycrPDzcwum8pzB7KV++vNasWaNNmzZp7dq1mjlzprKysrwwrTUKs5MSJUpo9uzZ+vLLL7VkyRKNGzdOZ8+e9cK01ins36GnnnpKb7/9tsXTWScvL0/PPvuskpOTlZqaqpSUlBv+I7ho0SKVLl1aX331lYYMGaKJEyd6aVrrFGYv9913n2bNmqXevXt7aUrrFWYv9evX1/r167V582Z169ZNL7/8spemtU5h9tK7d29t2bJFmzZt0tNPP60XX3zRS9NaozA7kaScnBzNnz9fTZs29cKUxOZNrVmzRn379pUk9e3bV59++qnb++3Zs0fZ2dmKjo62cDrvKcxeAgICVLx4cUlSbm6url27ZumMVivMTkJDQxUaGipJCg4OVrly5XTq1ClL57RaYf8OtWrVSkFBQVaOZqm0tDSFhIQoJCREAQEBiomJ0Zo1a/Ld57e76tatm7744gs5nUX7JfWF2UuVKlVUt25d2Ww2L01pvcLsJSoqSiVKlJAkNW3aVMeOHfPGqJYqzF7uuusu19cXL14s8n9uCrMT6foJsaefftr132WrEZs3kZ2dreDgYEnXz9RlZ2ffcJ9r165p/Pjxf4p/Vf6qMHuRpMzMTLVu3VqNGjXS008/7XpMUVTYnfwqLS1Nubm5qlatmhXjec2t7qWoOn78uCpVquS6XLFiRR0/fjzffbKyslz38ff3V6lSpXT69GlL57RaYfbyZ3Sre1m0aJHatWtnxWheVdi9vPfee2rWrJlefvllvfrqq1aOaLnC7GTPnj3KzMxUx44drR7PxSc/G/2P1Lt3b508efKG659//vl8l202m9t/Ib3//vtq3769KlasaGxGb/i/7kWSKlWqpM2bNysrK0txcXHq1q2b7r33XiPzWuGP2Il0PSqGDRum2bNny8/P9/+990ftBcCtW7p0qfbs2aMVK1Z4e5TbxuDBgzV48GAtW7ZMb775pubMmePtkbzm1xNis2bN8uocf/rYXLZsWYG3lStXTllZWQoODlZWVpbKli17w32+/vprbd++XR988IEuXLig3NxclSxZUuPHjzc5tnH/1738VnBwsGrVqqXt27ere/fuf/SolvkjdnL+/Hn1799fzz//vB544AFTo1rqj/yzUlRVqFBBmZmZrsvHjh1ThQoV8t0nODhYmZmZqlixoq5evapz587p7rvvtnpUSxVmL39Ghd3L5s2bNX36dK1YscJrT49a6Vb/vPTs2VNjxoyxYjSv8bSTnJwc7d27VzExMZKkkydPauDAgUpKSlKjRo0sm9P3T6sY1KlTJy1ZskSStGTJEnXu3PmG+8ybN0+7d+9WWlqaJkyYoD59+vh8aHpSmL0cO3ZMly5dkiSdOXNGO3bsUI0aNSyd00qF2Ulubq4efvhh9enTx6ej+1YUZi9/Bo0bN9bhw4eVnp6u3NxcLV++XJ06dcp3n9/uatWqVYqKiiryZ4ILs5c/o8Ls5dtvv9Xo0aO1cOFClStXzkuTWqswezl48KDr6/Xr16t69epWj2kpTzspVaqU9u3bp7S0NKWlpalp06aWh6ZEbN7U8OHDtXnzZjVv3lxbtmzR8OHDJUm7d+/WiBEjvDyd9xRmL/v371enTp0UHR2tHj166Mknn1SdOnW8ObZRhdnJihUrtG3bNiUnJys6OlrR0dH67rvvvDm2cYX9O9S1a1cNHjxYX3zxhRo0aKCNGzd6a2Qj/P39NXnyZPXp00eRkZHq3r27atWqpddee01r166VJA0YMECnT59Ws2bNNHfu3CL/LlqpcHv55ptv1KBBA61atUqjR49WVFSUl6c2rzB7efnll3XhwgUNHjxY0dHRGjhwoJenNq8we3nvvfcUFRWl6OhozZ07V7Nnz/by1GYVZie3Az5BCAAAAMZwZhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYAyxCcDnVa1aNd/lxYsXKyEh4Za/T2pqqvr37+/2tnnz5unixYu/az5vSE1N1c6dO419/yNHjtz0B/oDwK+ITQAohPnz57s+qOB2dPXq1XyXU1NT9dVXXxk7XkZGBrEJoFCITQBFmsPhkN1uV5s2bfJ9jntqaqrrh+u3adNGOTk5kqQLFy7o0UcfVXh4uIYMGSKn06n58+crKytLPXv2dH3s25gxY9S+fXtFRUXp9ddfdx1v/fr1Cg8PV7t27fTcc8+5zpQWdLxfHTlyxHXMiIgIPfroo64zqXv27FH37t3Vrl07PfTQQ8rKypIk9ejRQy+88ILat2+v+fPn5/teCxYs0Lx58xQdHa3U1FQ1bdpUTqdTZ8+eVfny5bV161ZJUrdu3XTw4EFduHBBw4cPV8eOHdWmTRutWbNGkpSXl6cJEyaoQ4cOat26tf7xj39IkiZNmqTt27crOjpa8+bN+2N/0wAUKX/6z0YH4Pt++eUXRUdHuy6fOXNGdrtdktSiRQutXbtWNptNCxcu1OzZszVx4kS9/fbbev3119WiRQvl5OQoMDBQkvTdd9/pyy+/VHBwsLp06aIdO3YoPj5e8+bNU0pKiu655x5J0vPPP68yZcooLy9PvXr10v/+7/8qNDRUo0eP1sqVK1W1alXFx8e7ZiroeL914MABvfXWW2rRooWGDx+uDz74QPHx8XruueeUmJiosmXLKiUlRa+++qpmzpwpSbpy5Yo+++yzfN+nSpUqeuSRR1SyZEk9+eSTkqTQ0FDt27dPR44cUYMGDbR9+3Y1bdpUx44dU2hoqF555RW1bNlSM2fO1NmzZ9WxY0e1atVKy5Yt01133aX169fr8uXL6tKli6Kjo/Xiiy9qzpw5+vDDD/+430gARRKxCcDnBQYGatOmTa7Lixcv1u7duyVJx44d0xNPPKETJ04oNzdXVapUkSQ1b95c48ePV+/evdW1a1cFBQVJkpo0aaKKFStKkurVq6eMjAyFhYXdcMwVK1YoMTFReXl5OnHihPbv369r166patWqrteQ9urVS4mJiTc93m9VqlRJLVq0kCQ99NBDevfdd9W2bVv9+OOPio2NlSRdu3ZN5cuXdz2mR48ehdpRWFiYtm3bpiNHjuiZZ57RwoULFRER4fqM5E2bNsnhcGjOnDmSpMuXLyszM1Off/65fvjhB61atUqSdP78eR06dEgBAQGFOi4AEJsAirTnnntOQ4cOVadOnZSamqopU6ZIkp555hl16NBBn332mbp06aKPPvpIkvJFlJ+f3w2vhZSk9PR0zZkzR+vXr1fp0qX11FNP6ZdffrnpHO6O99e//jXffWw22w2XnU6natWq5Xpa+z+VLFnS8xIkhYeHa8GCBcrKylJCQoLmzJmj1NRUV0g7nU598MEHqlGjRr7HOZ1OTZ48WW3bts13fWpqaqGOCwC8ZhNAkXbu3DlVqFBBkpScnOy6/vDhw6pTp46GDx+uRo0a6aeffrrp9wkKCnK9zvL8+fMqWbKkSpUqpZMnT2rjxo2SpBo1aig9PV1HjhyRJC1fvvyWjnf06FHXm3qWLVumFi1aqEaNGjp16pTr+itXrmjv3r0ef92/nVe6fsb2q6++kp+fnwIDA1WvXj0lJiYqPDxcktSmTRu9++67cjqdkqRvv/1WktS2bVstWLBAV65ckSTX6zv/8/sDQEGITQBF2tixYzV48GC1a9fO9XpLSXrnnXfUsmVLtW7dWsWKFVO7du1u+n0GDRqkvn37KiYmRvXq1VO9evVcb+hp3ry5JOnOO+/UlClT1LdvX7Vr105BQUEqVapUoY9Xo0YNvf/++4qIiNCZM2f0yCOPKCAgQO+//74mTpzoenNRYd5lbrfb9emnnyo6Olrbtm1T8eLFVbFiRTVt2lTS9afVc3JyVKdOHUnSqFGjdPXqVbVu3VpRUVF67bXXJEkDBw5UzZo11a5dO7Vs2VKjRo1SXl6e6tSpozvuuIM3CAHwyJadne309hAAUFTk5OQoKChITqdTCQkJql69uoYMGeLxcUeOHNGAAQP0xRdfWDAlAFiHM5sA8AdKSkpSdHS0oqKidO7cOcXFxXl7JADwKs5sAgAAwBjObAIAAMAYYhMAAADGEJsAAAAwhtgEAACAMcQmAAAAjCE2AQAAYMz/A1yR2074noLXAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "tags": [] } } ] }, { "cell_type": "code", "metadata": { "id": "fDMLUKHJGrDh", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "b0026fd3-1f13-4299-8448-5e5d0cc4e1c8" }, "source": [ "hashtag_summary['top_hashtags'][:10]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[]" ] }, "metadata": { "tags": [] }, "execution_count": 228 } ] }, { "cell_type": "code", "metadata": { "id": "xcCIfwJfGrDm", "colab": { "base_uri": "https://localhost:8080/", "height": 716 }, "outputId": "16f4f1dc-12c0-4fee-f5f9-dc422dc12aee" }, "source": [ "plt.figure(facecolor='#ebebeb', figsize=(8, 12))\n", "plt.barh([x[0] for x in hashtag_summary['top_hashtags'][2:][:5]][::-1],\n", " [x[1] for x in hashtag_summary['top_hashtags'][2:][:5]][::-1])\n", "plt.title('Top Hashtags')\n", "# plt.xticks(range(3))\n", "plt.grid(alpha=0.5)\n", "plt.gca().set_frame_on(False)" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "tags": [] } } ] }, { "cell_type": "code", "metadata": { "id": "5YUuqGkrGrDp", "colab": { "base_uri": "https://localhost:8080/", "height": 54 }, "outputId": "10229d7e-4e66-4859-dda1-b4468b180b81" }, "source": [ "emoji_summary = adv.extract_emoji(tweets_df['Tweets'])\n", "emoji_summary.keys()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "dict_keys(['emoji', 'emoji_text', 'emoji_flat', 'emoji_flat_text', 'emoji_counts', 'emoji_freq', 'top_emoji', 'top_emoji_text', 'top_emoji_groups', 'top_emoji_sub_groups', 'overview'])" ] }, "metadata": { "tags": [] }, "execution_count": 230 } ] }, { "cell_type": "code", "metadata": { "id": "K8tpiBK0GrDs", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "4bab7c31-3ea1-4f95-e5ec-b183fb076384" }, "source": [ "emoji_summary['overview']" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'emoji_per_post': 1.15, 'num_emoji': 23, 'num_posts': 20, 'unique_emoji': 14}" ] }, "metadata": { "tags": [] }, "execution_count": 231 } ] }, { "cell_type": "code", "metadata": { "id": "r_KbiZTfGrDw", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "aa550026-6fd6-4d98-c7ef-e9cc742be443" }, "source": [ "emoji_summary['emoji'][50:80]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[]" ] }, "metadata": { "tags": [] }, "execution_count": 232 } ] }, { "cell_type": "code", "metadata": { "id": "7vBQAxJeGrDz", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "517cb2e3-abb6-473c-9b1d-d9ff091dfafd" }, "source": [ "emoji_summary['emoji_text'][50:80]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[]" ] }, "metadata": { "tags": [] }, "execution_count": 233 } ] }, { "cell_type": "code", "metadata": { "id": "kny_YTO8GrD1", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "f7bacd24-27b5-4370-cae2-0198b52f4ca8" }, "source": [ "emoji_summary['emoji_flat'][:10]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "['😍', '\\U0001f97a', '🙏', '🍔', '😋', '🤣', '🤣', '🤣', '🤣', '🤣']" ] }, "metadata": { "tags": [] }, "execution_count": 234 } ] }, { "cell_type": "code", "metadata": { "id": "u_0OcMZXGrD6", "colab": { "base_uri": "https://localhost:8080/", "height": 187 }, "outputId": "296fae10-b5a9-49b3-cce8-b76cc4c34dcd" }, "source": [ "emoji_summary['emoji_flat_text'][:10]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "['smiling face with heart-eyes',\n", " 'pleading face',\n", " 'folded hands',\n", " 'hamburger',\n", " 'face savoring food',\n", " 'rolling on the floor laughing',\n", " 'rolling on the floor laughing',\n", " 'rolling on the floor laughing',\n", " 'rolling on the floor laughing',\n", " 'rolling on the floor laughing']" ] }, "metadata": { "tags": [] }, "execution_count": 235 } ] }, { "cell_type": "code", "metadata": { "id": "gw47wHBJGrD9", "colab": { "base_uri": "https://localhost:8080/", "height": 187 }, "outputId": "41d8045e-9be9-4915-a18e-896ee3b0626d" }, "source": [ "list(zip(emoji_summary['emoji_flat'][:10], emoji_summary['emoji_flat_text'][:10]))" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[('😍', 'smiling face with heart-eyes'),\n", " ('\\U0001f97a', 'pleading face'),\n", " ('🙏', 'folded hands'),\n", " ('🍔', 'hamburger'),\n", " ('😋', 'face savoring food'),\n", " ('🤣', 'rolling on the floor laughing'),\n", " ('🤣', 'rolling on the floor laughing'),\n", " ('🤣', 'rolling on the floor laughing'),\n", " ('🤣', 'rolling on the floor laughing'),\n", " ('🤣', 'rolling on the floor laughing')]" ] }, "metadata": { "tags": [] }, "execution_count": 236 } ] }, { "cell_type": "code", "metadata": { "id": "runlNSfHGrEB", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "30642476-a7f2-4bee-e3af-9444a33e8aac" }, "source": [ "emoji_summary['emoji_counts'][:15]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[0, 0, 0, 0, 0, 0, 1, 4, 0, 7, 1, 0, 1, 2, 0]" ] }, "metadata": { "tags": [] }, "execution_count": 237 } ] }, { "cell_type": "code", "metadata": { "id": "HGTxPQsFGrED", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "ade2ebed-590d-4f3f-9e61-f590ec13e782" }, "source": [ "emoji_summary['emoji_freq'][:15]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[(0, 12), (1, 3), (2, 2), (4, 1), (5, 1), (7, 1)]" ] }, "metadata": { "tags": [] }, "execution_count": 238 } ] }, { "cell_type": "code", "metadata": { "id": "5I8nB82wGrEG", "colab": { "base_uri": "https://localhost:8080/", "height": 513 }, "outputId": "70c5e85d-eb6e-4433-d8d8-b5971eee2ee5" }, "source": [ "plt.figure(facecolor='#ebebeb', figsize=(8, 8))\n", "plt.bar([x[0] for x in emoji_summary['emoji_freq'][:15]],\n", " [x[1] for x in emoji_summary['emoji_freq'][:15]])\n", "plt.title('Emoji frequency')\n", "plt.xlabel('Emoji per tweet')\n", "plt.ylabel('Number of tweets')\n", "plt.yscale('log')\n", "plt.grid(alpha=0.5)\n", "plt.gca().set_frame_on(False)" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfQAAAHwCAYAAABQR52cAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deViVdf7/8ReKWIhTViqgJo1lYrlAWCwuxxROmYkZLuMkSZNO5jrqmE1u0+ZlarabLVOhhWWGWo6eTMUa11wonTHLQlzA3VEhHeDA7w+/nl/MROcInHMfPj0f1+V1cRbu874le3Lf5z73HXDs2LEyAQCAGq2W1QMAAICqI+gAABiAoAMAYACCDgCAAQg6AAAGIOgAABiAoAOG+/DDD9W3b1/X7ebNm2vfvn0/+9yjR4/q7rvvVkREhKZMmeKjCQFUhwA+hw5YIzo6WseOHVOtWv//9+oBAwZoxowZls00e/Zs7dy5U2+99ZYCAgIsmwPApQu0egDg12zBggXq0qWL1WO4HDhwQC1btqww5iUlJQoM5H8bgD9ilzvghzIyMtSjRw9NmjRJLVq0UExMjLZs2aKMjAy1a9dOkZGRWrhwoev5Z86c0fDhw9WqVStFRUVp9uzZKi0tdS3rrrvucj23YcOG+uGHH/7nNUeMGKH3339fL730kpo3b65169bpmWeeUVpamoYNG6brrrtOGRkZOnPmjEaPHq2bbrpJbdq00dNPPy2n0ylJcjqdmjp1qm688UbFxMTozTffVMOGDVVSUiLpwl6JdevWuV7zmWee0bBhw1y3t27dqh49eqhFixay2Wxav36967Hk5GRNnz5dPXr0UEREhPr27asTJ064Ht+0aZPre9u1a6eMjAzt2LFDrVu3ds0nSZ988olsNltlfzSA3yLogJ/avn27WrdurW+//VZ9+vTR0KFDtWPHDm3ZskWvvPKKJk6cqIKCAknSo48+qjNnzmjr1q1aunSpPvjgA7333nuX9HovvfSS7r33Xo0YMUK5ubmuPQcrV67U3Xffre+//14pKSkaOXKkAgMDtWXLFq1Zs0ZZWVlasGCBJGn+/Pn69NNPtWbNGq1atUoff/yxx6+fn5+vgQMH6k9/+pO+++47TZs2TWlpaTp+/LjrOR999JFefPFF7d69W0VFRXr55ZclXdizMGDAAD344IP65ptvtHbtWt18882KiopSgwYNtHbtWtcyFi1apH79+l3S3w1QExB0wEKpqalq0aKF68/8+fNdj1177bUaOHCgateurd69e+vQoUMaP3686tatq65duyooKEg5OTlyOp3KzMzUpEmTFBISomuvvVYPP/ywFi1aVC0zxsTEqEePHqpVq5bOnj2rzz77TE8++aTq1aunhg0b6qGHHlJmZqYkaenSpfrjH/+oJk2aqEGDBho9erTHr7No0SJ1795diYmJqlWrlmw2m9q1a6fPPvvM9ZwBAwaoRYsWuvzyy5WcnKxdu3ZJkhYvXqwuXbqoT58+qlOnjq666iq1adNGktS/f399+OGHkqRTp05p7dq16tOnT7X83QD+hDfDAAulp6dX+B56w4YNXV9fdtllkqRGjRqVu6+wsFAnTpxQcXGxmjZt6nqsadOmys/Pr5YZmzRp4vr64MGDKi4u1s033+y6r7S01PWcw4cPKzw83PVYs2bNPH6dgwcPatmyZXI4HK77SkpK1LFjR9ftn67/5ZdfrsLCQklSXl6eIiIifna5ffv2VXx8vAoLC7V06VLddtttCg0N9XguoKYg6EANd/XVV6tOnTo6ePCgbrzxRknSoUOHFBYWVi3L/+kBcuHh4apbt6727NnzswfHNW7cWHl5ea7bBw8eLPd4cHCwzp0757p99OjRcsvu27ev5syZc8kzhoeHa8eOHT/7WFhYmGJiYrR8+XJ98MEHSktLu+TlAzUBu9yBGq527dpKTk7W008/rYKCAh04cEBz585VSkpKtb9WaGiobDabpkyZorNnz6q0tFQ5OTmug9eSk5P1+uuvKy8vT//+97/1wgsvlPv+m2++WZmZmSouLlZ2dna599j79u0rh8OhNWvWyOl06vz581q/fn25XxAqkpKSonXr1mnJkiUqKSnRyZMntXPnTtfj/fv310svvaTdu3eXO0AQMAlBByx03333qXnz5q4/999/f6WWM336dAUHBysmJkY9e/bUvffeq9///vfVPO0FL7/8soqLi5WQkKDrr79eDzzwgGtLe9CgQeratatsNpu6dev2P/GcOHGi9u3bp+uvv14zZswo9152kyZNNH/+fD333HNq1aqV2rdvr5deesl1tP4vadq0qTIyMjR37lzdcMMN6tq1q/75z3+6Hu/Ro4cOHDigHj16KDg4uJr+JgD/wollAMO9++67+vDDD10HrvnS/v37dcsttyg/P9/yz6936NBBs2bN8qvP/QPViS10wHB79uzRtddea/UYlrq4a79Tp04WTwJ4DwfFAQZLTU3VDz/8oDfffNPqUSyTnJysb7/9Vi+//HK50+wCpmGXOwAABuDXVQAADEDQAQAwAEH3sp+eOMMErI9/Y338G+vj32r6+hB0L/PkM7Q1Cevj31gf/8b6+Leavj4EHQAAAxB0AAAMQNABADAAQQcAwAAEHQAAAxB0AAAMQNABADAAQQcAwAAEHQAAAxB0AAAMQNABADAAQQcAwAAEHQAAAxB0AAAMQNABADAAQQcAwAAEHQAAAxB0AAAMEGj1AP4kZtbmal9mXKNSbTyaW63L3Dr+tmpdHgCg5mMLHQAAAxB0AAAMQNABADAAQQcAwAAEHQAAAxB0AAAMQNABADAAQQcAwAAEHQAAAxB0AAAMQNABADAAQQcAwAAEHQAAAxB0AAAMQNABADAAQQcAwAAEHQAAAxB0AAAMQNABADAAQQcAwAAEHQAAAxB0AAAMQNABADAAQQcAwAB+E/R9+/Zp9OjRSktLs3oUAABqHK8GfdSoUYqMjFSnTp3K3b969WrFxsaqQ4cOev755yVJERERrq8BAMCl8WrQBwwYoIULF5a7z+l0auLEiVq4cKHWr1+vzMxM7dmzx5tjAABgPK8GPT4+Xg0aNCh33/bt2xUREaGIiAgFBQWpd+/eWrFihTfHAADAeIG+fsH8/Hw1adLEdTs8PFzbtm3TyZMn9fTTT2vXrl167rnnNGbMmJ/9/vT0dKWnp0uSevfurZSUlGqbLa5RabUt66IW9cskVe9yDx8+XK3LuxRnzpyx7LW9gfXxb6yPf2N9fC80NLTCx3we9IpcddVVmjVrltvnpaamKjU11SszbDya64Wllmrj0erdEfJLP1BfsPr1qxvr499YH//G+vgPnx/lHhYWpkOHDrlu5+XlKSwszNdjAABgFJ8HPSoqSjk5OcrNzVVRUZGWLFmiO+64w9djAABgFK8GfejQobrzzju1d+9etW3bVgsWLFBgYKCmT5+ufv36KSEhQb169VKrVq28OQYAAMbz6nvor7322s/en5iYqMTERG++NAAAvyp+c6Y4AABQeQQdAAADEHQAAAxA0AEAMABBBwDAAAQdAAADEHQAAAxA0AEAMABBBwDAAAQdAAAD1MigOxwOjR07Vg6Hw+pRAADwC35zPfRLYbfbZbfbrR4DAAC/USO30AEAQHkEHQAAAxB0AAAMQNABADAAQQcAwAAEHQAAAxB0AAAMQNABADAAQQcAwAAEHQAAAxB0AAAMQNABADAAQQcAwAAEHQAAAxB0AAAMQNABADAAQQcAwAA1MugOh0Njx46Vw+GwehQAAPxCoNUDVIbdbpfdbrd6DAAA/EaN3EIHAADlEXQAAAxA0AEAMABBBwDAAAQdAAADEHQAAAxA0AEAMABBBwDAAAQdAAADEHQAAAxA0AEAMABBBwDAAAQdAAADEHQAAAxA0AEAMABBBwDAAAQdAAADEHQAAAxA0AEAMECNDLrD4dDYsWPlcDisHgUAAL8QaPUAlWG322W3260eAwAAv1Ejt9ABAEB5BB0AAAMQdAAADEDQAQAwAEEHAMAABB0AAAMQdAAADEDQAQAwAEEHAMAABB0AAAMQdAAADEDQAQAwAEEHAMAABB0AAAMQdAAADEDQAQAwAEEHAMAABB0AAAMQdAAADEDQAQAwAEEHAMAABB0AAAMQdAAADFAjg+5wODR27Fg5HA6rRwEAwC8EWj1AZdjtdtntdqvHAADAb9TILXQAAFAeQQcAwAAEHQAAAxB0AAAMQNABADAAQQcAwAAEHQAAAxB0AAAMQNABADAAQQcAwAAEHQAAAxB0AAAMQNABADAAQQcAwAAEHQAAAxB0AAAMQNABADAAQQcAwAAEHQAAAxB0AAAMQNABADAAQQcAwAAEHQAAAxB0AAAMQNABADAAQQcAwAAEHQAAA9TIoDscDo0dO1YOh8PqUQAA8AuBVg9QGXa7XXa73eoxAADwGzVyCx0AAJRH0AEAMABBBwDAAAQdAAADEHQAAAxA0AEAMABBBwDAAAQdAAADEHQAAAxA0AEAMABBBwDAAAQdAAADEHQAAAzgNuhLly5VQUGBJGn27NkaPHiwvvrqK68PBgAAPOc26M8++6xCQkK0adMmff755xo4cKAmTJjgi9kAAICH3Aa9Vq0LT1m1apVSU1OVlJSkoqIirw8GAAA85zboYWFhGjdunJYuXaru3bvrP//5j8rKynwxGwAA8JDboL/xxhvq2rWr3n//fV1xxRU6deqUpk6d6ovZAACAh9wGffz48erZs6datGghSQoNDdWiRYu8PhgAAPCc26B/88035W47nU6OcgcAwM8EVvTAc889p+eee07nz5/Xdddd53rfPCgoSIMGDfLZgAAAwL0Kgz5mzBiNGTNGTzzxhCZPnuzLmQAAwCVyu8v9scce06JFizR79mxJ0qFDh7R9+3avDwYAADznNuiPPPKIvvzySy1evFiSVK9ePT3yyCNeHwwAAHjObdC3bdumZ555RnXr1pUkXXnllSouLvb6YAAAwHNug16nTh05nU4FBARIko4fP+76GgAA+Ae3QR8yZIjuv/9+HTt2TE899ZR69uypMWPG+GI2AADgoQqPcr8oJSVF7dq10+eff66ysjKlp6erZcuWvpgNAAB4yKProZ84cUKXX365HnzwQV111VXKzc319lwAAOASuA36zJkz9eKLL+r555+XJJWUlGjYsGFeHwwAAHjObdCXL1+uBQsWKDg4WNKFc7kXFhZ6fTAAAOA5t0EPCgpSQECA68h2Yg4AgP9xG/Tk5GSNGzdOZ86c0fz585WSkqL77rvPF7MBAAAPuT3Kffjw4crKylL9+vW1d+9ePfLII7LZbD4YrWIOh0MOh0N2u112u93SWQAA8Adug75gwQLFxcVp2rRpPhjHM4QcAIDy3Ab90KFDGj9+vA4cOKC2bdsqLi5OsbGxatOmjS/mAwAAHnAb9IsXYjl37pzmz5+vl19+WZMmTdKRI0e8PhwAAPCM26DPnj1bW7ZsUWFhodq0aaNp06YpNjbWF7MBAAAPuQ368uXLFRgYqMTERMXHxysmJsZ15TUAAOAf3H5sbc2aNVq8eLGioqKUlZWlzp0766677vLFbAAAwENut9B3796tTZs2acOGDcrOzlaTJk3Y5Q4AgJ9xG/QnnnhCcXFxGjJkiKKiolSnTh1fzAUAAC6B213unTt31siRI3Xrrbe6Yj5v3jyvDwYAADznNugffPDB/9y3cOFCrwwDAAAqp8Jd7h999JEWL16s/fv3lzt3e0FBgRo0aOCT4QAAgGcqDHqHDh3UuHFjnTx5stz1z0NCQnTTTTf5ZDgAAOCZCoPerFkzNWvWTCtWrPDlPAAAoBLcvocOAAD8H0EHAMAAFQa9T58+kqTHH3/cZ8MAAIDKqfA99CNHjmjLli1auXKlevfurbKysnKPt2vXzuvDAQAAz1QY9EceeUSzZ89WXl6epkyZUu6xgIAAZWZmen04AADgmQqD3qtXL/Xq1UuzZ8/WuHHjfDkTAAC4RG7P5T5u3DitXLlSGzdulCQlJCQoKSnJ64MBAADPuT3K/YknntBrr72mli1bqmXLlnrttdf05JNP+mI2AADgIbdb6J999pnWrl2rWrUutH/AgAG6/fbbNWnSJK8PBwAAPOPR59BPnz7t+vrMmTNeGwYAAFSO2y300aNH6/bbb1dCQoLKysq0adMmts4BAPAzboPep08fxcfHKzs7W5I0ZcoUNW7c2OuDAQAAz7kNuiSFhobqjjvu8PYsAACgkjiXOwAABiDoAAAY4BeD7nQ6FRcX56tZAABAJf1i0GvXrq3rr79eBw8e9NU8AACgEtweFPfvf/9bHTt2VHR0tIKDg133L1iwwKuDAQAAz7kN+sSJE30xBwAAqAK3QU9ISNCBAwf0ww8/qEuXLvrxxx9VWlrqi9kAAICH3B7lPn/+fD3wwAMaP368JCk/P1+pqaleHwwAAHjObdDffPNNLV++XCEhIZKkFi1a6Pjx414fDAAAeM5t0OvWraugoCDX7ZKSEgUEBHh1KAAAcGncvoceHx+vOXPm6Pz588rKytJbb72lpKQkX8wGAAA85HYLffLkybrmmmsUGRmpd955R927d9df/vIXX8wGAAA85HYLvVatWurfv7+io6MVEBCg66+/nl3uAAD4GbdB//TTT/XnP/9ZERERKisr0/79+zVr1ix1797dF/MBAAAPuA361KlTlZmZqd/+9reSpJycHA0cOJCgAwDgR9y+hx4SEuKKuSRFRES4PsIGAAD8Q4Vb6J988okkqX379howYICSk5MVEBCgZcuWKSoqymcDAgAA9yoMusPhcH3dsGFDbdiwQZJ09dVX69y5c96fDAAAeKzCoL/44ou+nAMAAFSB24PicnNz9cYbb+jAgQMqKSlx3W/l5VMdDoccDofsdrvsdrtlc/i7mFmbq32ZcY1KtfFobrUuc+v426p1eQDwa+Q26Pfff78GDhwou93uN58/J+QAAJTnNuh169bV0KFDfTELAACoJLdBHzp0qGbOnCmbzVbuIi3t2rXz6mAAAMBzboP+r3/9S4sWLdIXX3yhWrUufGw9ICBAmZmZXh8OAAB4xm3Qly1bpq1bt5bbOgcAAP7F7ZniIiMjdfr0aV/MAgAAKsntFvrp06cVHx+v9u3bq27duq77rfzYGgAAKM9t0CdMmOCLOQAAQBW4DXpCQoIv5gAAAFXgNugRERGuE8oUFxeruLhYwcHBysnJ8fpwAADAM26Dvm/fPtfXZWVlWrFihbZu3erNmQAAwCVye5T7TwUEBKhHjx5au3att+YBAACV4HYL/eJ10SWptLRU2dnZuuyyy7w6FAAAuDRug/7T66IHBgaqWbNmSk9P9+pQAADg0rgNOtdFBwDA/1UY9FmzZlX4TQEBARo3bpxXBgIAAJeuwqAHBwf/z30//vij3n33XZ08eZKgAwDgRyoM+sMPP+z6uqCgQPPmzVNGRobuueceDRs2zCfDAQAAz/zie+inTp3S3LlztXjxYvXv31+rV6/WlVde6avZAACAhyoM+rRp07R8+XINGjRI69atU0hIiC/nAgAAl6DCoL/yyiuqW7eunn32Wc2ZM8d1f1lZmQICAjj1KwAAfqTCoB89etSXcwAAgCq4pFO/AgAA/0TQAQAwAEEHAMAABB0AAAMQdAAADEDQAQAwAEEHAMAABB0AAAMQdAAADEDQAQAwAEEHAMAABB0AAAMQdAAADEDQAQAwAEEHAMAABB0AAAMQdAAADEDQAQAwAEEHAMAABB0AAAMQdAAADEDQAQAwAEEHAMAABB0AAAMQdAAADEDQAQAwAEEHAMAABB0AAAMQdAAADEDQAQAwAEEHAMAABB0AAAMQdAAADEDQAQAwAEEHAMAABB0AAAMQdAAADEDQAQAwAEEHAMAABB0AAAMQdAAADEDQAQAwQKDVA1SGw+GQw+GQ3W6X3W63ehz4SMyszdW+zLhGpdp4NLdal7l1/G3VujwA8ESNDDohBwCgPHa5AwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGICgAwBgAIIOAIABCDoAAAYg6AAAGCDQ6gEuKiws1IQJExQUFKSEhASlpKRYPRIAADWGV7fQR40apcjISHXq1Knc/atXr1ZsbKw6dOig559/XpK0fPly9erVS3PmzNHKlSu9ORYAAMbxatAHDBighQsXlrvP6XRq4sSJWrhwodavX6/MzEzt2bNHeXl5Cg8PlyTVrl3bm2MBAGAcrwY9Pj5eDRo0KHff9u3bFRERoYiICAUFBal3795asWKFwsPDlZ+fL0kqLS315lgAABjH5++h5+fnq0mTJq7b4eHh2rZtm4YMGaKJEydq1apVstvtFX5/enq60tPTJUm9e/eu1vfa4xpV/y8SLeqXSare5R4+fNij57E+7lm5Pt5w5swZy177qU9zqn2ZLeqX6fuz1bvcx5Ku8+h5pq2PN1j535tU/T+jmvDzCQ0NrfAxvzkorl69enrxxRfdPi81NVWpqalemWHj0VwvLLVUG49W746QX/qB/hTr4wnr1sdbrHp9034+pq2Pt1j5+tX/M6rZPx+ff2wtLCxMhw4dct3Oy8tTWFiYr8cAAMAoPg96VFSUcnJylJubq6KiIi1ZskR33HGHr8cAAMAoXg360KFDdeedd2rv3r1q27atFixYoMDAQE2fPl39+vVTQkKCevXqpVatWnlzDAAAjOfV99Bfe+21n70/MTFRiYmJ3nxpAAB+VTj1KwAABiDoAAAYgKADAGAAgg4AgAEIOgAABiDoAAAYgKADAGAAgg4AgAEIOgAABiDoAAAYgKADAGCAGhl0h8OhsWPHyuFwWD0KAAB+wasXZ/EWu90uu91u9RgAAPiNGrmFDgAAygs4duxYmdVDmCw9PV2pqalWj1FtWB//xvr4N9bHv9X09WEL3cvS09OtHqFasT7+jfXxb6yPf6vp60PQAQAwAEEHAMAAtSdMmDDN6iFM165dO6tHqFasj39jffwb6+PfavL6cFAcAAAGYJc7AAAGIOhetHr1asXGxqpDhw56/vnnrR6nSkaNGqXIyEh16tTJ6lGqxaFDh9S7d28lJCSoY8eOmjdvntUjVcn58+eVlJQkm82mjh07asaMGVaPVC2cTqe6du2qgQMHWj1KlUVHR6tz586y2Wzq3r271eNU2enTp5WWlqa4uDjFx8fryy+/tHqkStu7d69sNpvrz3XXXadXX33V6rEuGbvcvcTpdCo2NlaLFi1SeHi4kpKSNG/ePN14441Wj1YpGzZsUL169TRixAh98cUXVo9TZYcPH9aRI0fUrl07FRQUqFu3bkpPT6+xP5+ysjIVFhYqJCRExcXF6tmzp5566inFxMRYPVqVzJ07V9nZ2Tp79qzee+89q8epkujoaK1atUpXX3211aNUi+HDhys2NlaDBg1SUVGRzp07pyuuuMLqsarM6XSqTZs2cjgcatasmdXjXBK20L1k+/btioiIUEREhIKCgtS7d2+tWLHC6rEqLT4+Xg0aNLB6jGoTGhrqOvglJCRELVu2VH5+vsVTVV5AQIBCQkIkScXFxSouLlZAQIDFU1VNXl6eVq1apfvuu8/qUfBfzpw5o02bNrl+NkFBQUbEXJI+//xzRURE1LiYSwTda/Lz89WkSRPX7fDw8BodDJPt379fO3fu1C233GL1KFXidDpls9kUGRkpm81W49fnscce09SpU1Wrlhn/mwoICFDfvn1de4NqstzcXF199dUaOXKkunbtqjFjxqiwsNDqsapFZmam+vTpY/UYlWLGvxSgkgoKCpSWlqYnn3xS9evXt3qcKqldu7aysrL09ddfa/v27dq9e7fVI1Xap59+qoYNG9bojxD9t08++URr1qzRwoUL9be//U0bNmyweqRKczqd+vrrr5WWlqa1a9cqODhYL7zwgtVjVVlRUZEcDod69epl9SiVQtC9JCwsTIcOHXLdzsvLU1hYmIUT4b8VFxcrLS1NKSkp6tmzp9XjVJsrrrhCHTt21Jo1a6wepdI2b96slStXKjo6WkOGDNE//vEPDRs2zOqxquTiv/+GDRuqR48e2rFjh8UTVV5YWJjCw8Nde4Huvvtuff311xZPVXWrV69W27Zt1ahRI6tHqRSC7iVRUVHKyclRbm6uioqKtGTJEt1xxx1Wj4X/U1ZWpjFjxqhly5Y1PhSSdPz4cZ0+fVqSdO7cOWVlZemGG26weKrKmzx5smtPw+uvv66OHTtq7ty5Vo9VaYWFhSooKHB9nZWVpVatWlk8VeU1btxY4eHh2rt3ryTpiy++qLEHlP7URx99pHvuucfqMSqtRl4PvSYIDAzU9OnT1a9fP5WWlup3v/tdjf4HPHToUK1fv14nT55U27ZtNWHChBp9sNLmzZv1wQcfqHXr1rLZbJIuvGebmJho7WCVdOTIEY0YMUKlpb0mG6kAAAVOSURBVKUqLS1VcnKykpKSrB4L/+fYsWMaPHiwJKmkpER9+vRRt27drB2qiqZPn66HHnpIxcXFat68eY3f5V5YWKh169Zp9uzZVo9SaXxsDQAAA7DLHQAAAxB0AAAMQNABADAAQQcAwAAEHQAAAxB0wI81bty43FWgquOqfYcPH1ZaWpokKTs7W48++miVl+lORkaGDh8+7LXlr1+/Xlu2bPHa8oGagM+hA37ssssuU1ZWVrUuMzQ0VG+99ZYkqX379mrfvn21Ll+6cGrQ2rVru24vXLhQkZGRCg0NrfbXki4EvV69err11lu9snygJmALHaiBoqOj9cQTT7iurf3VV1+pb9++6tChg95++21JF86GN23aNHXq1EmdO3dWZmampAsXo7l4Xfv169f/7LXGMzIyNGjQICUnJ+vWW2/VzJkzXY8tWrTIde31cePGyel0SpKaN2+uKVOmyGazlbs29rJly5Sdna2HHnpINptNGzdudJ1kZcWKFWrWrJmKiop0/vx51+Vec3Jy1K9fP3Xr1k09e/bUd999J+nCGfEGDx6sxMREJSYmavPmzdq/f7/efvttvfrqq67lA79GbKEDfuz8+fOuM9lJ0ujRo12npmzatKmysrI0adIkjRo1SsuXL9f58+fVuXNnDR48WJ988ol27dqlrKwsnThxQklJSYqLi/P4tbdv364vvvhCl19+uZKSkpSYmKjg4GAtWbJEy5cvV506dTRhwgR9+OGH6t+/v3788UdFR0fr8ccfL7ecXr166c0339Rf//pXtW/fXiUlJRo5cqQkadOmTWrVqpV27NihkpISRUdHS5LGjRunmTNnqkWLFtq2bZsmTJigzMxMPfbYY3rooYcUGxurgwcPql+/ftqwYYMGDx6sevXqafjw4VX8GwdqLoIO+LFf2uV+8doAkZGRKiwsVEhIiEJCQhQUFKTTp09r8+bNuueee1S7dm01atRI8fHxys7OVuvWrT16bZvNpquuukqSdNddd2nTpk0KDAzUV1995TpF7vnz53XNNddIunC1t7vvvtvtcgMDAxUREaFvv/1W27dv17Bhw7Rx40Y5nU7FxsaqoKBAX375pf7whz+4vqeoqEjShWtV79mzx3X/2bNnXedIB37tCDpQQwUFBUmSatWq5fr64u2SkpJqf72AgACVlZWpf//+mjx58v88Xrdu3XLvm/+SuLg4rV69WnXq1FHnzp01cuRIOZ1OTZs2TWVlZfrNb37zs7/IlJaWauXKlbrsssuqujqAcXgPHTBUbGyslixZIqfTqePHj2vjxo2Kiory+PvXrVunU6dO6dy5c1qxYoVuu+02de7cWR9//LGOHTsmSTp16pQOHDjgdlkhISHltqRjY2M1b948xcTE6JprrtGpU6f0/fffKzIyUvXr11fz5s21dOlSSReOBdi1a5ekC3sN3njjDddydu7c+bPLB36NCDrgxy6+h37xz3+/P/1L7rrrLt10002y2Wzq06ePpkyZosaNG0u6sLXtTnR0tNLS0tSlSxf17NlT7du314033qhHH31Uffv2VZcuXZSSkqIjR464XdaAAQM0fvx42Ww2nTt3TtHR0Tp27JjrPf3WrVsrMjLSNdfcuXP17rvvymazqWPHjlq5cqUk6emnn1Z2dra6dOmihIQEvfPOO5Iku92uv//97xwUh181rrYG/Mp89dVXmjx5spYtW1bhczIyMpSdna0ZM2b4cDIAVcEWOvArkp2draFDh2ro0KFWjwKgmrGFDgCAAdhCBwDAAAQdAAADEHQAAAxA0AEAMABBBwDAAAQdAAAD/D9uUKQCTCVfMwAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "tags": [] } } ] }, { "cell_type": "code", "metadata": { "id": "NfgaWrSjGrEJ", "colab": { "base_uri": "https://localhost:8080/", "height": 153 }, "outputId": "b398d457-31f8-4593-f3c2-4c4e05d00e5c" }, "source": [ "emoji_summary['top_emoji'][:8]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[('🤣', 7),\n", " ('👏', 3),\n", " ('🙏', 2),\n", " ('😍', 1),\n", " ('\\U0001f97a', 1),\n", " ('🍔', 1),\n", " ('😋', 1),\n", " ('🤦🏻\\u200d♂️', 1)]" ] }, "metadata": { "tags": [] }, "execution_count": 240 } ] }, { "cell_type": "code", "metadata": { "id": "vQXFyGqwGrEN", "colab": { "base_uri": "https://localhost:8080/", "height": 153 }, "outputId": "7d070c6b-3d81-4566-d166-6f624c06574d" }, "source": [ "emoji_summary['top_emoji_text'][:8]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[('rolling on the floor laughing', 7),\n", " ('clapping hands', 3),\n", " ('folded hands', 2),\n", " ('smiling face with heart-eyes', 1),\n", " ('pleading face', 1),\n", " ('hamburger', 1),\n", " ('face savoring food', 1),\n", " ('man facepalming: light skin tone', 1)]" ] }, "metadata": { "tags": [] }, "execution_count": 241 } ] }, { "cell_type": "code", "metadata": { "id": "SDAf-JyfGrEP", "colab": { "base_uri": "https://localhost:8080/", "height": 499 }, "outputId": "1bf667ac-15ff-44a0-b963-441ff11a28c8" }, "source": [ "plt.figure(facecolor='#ebebeb', figsize=(8, 8))\n", "plt.barh([x[0] for x in emoji_summary['top_emoji_text'][:8]][::-1],\n", " [x[1] for x in emoji_summary['top_emoji_text'][:8]][::-1])\n", "plt.title('Top Emoji')\n", "plt.grid(alpha=0.5)\n", "plt.gca().set_frame_on(False)" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "tags": [] } } ] }, { "cell_type": "code", "metadata": { "id": "eBuvxltgGrET", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "f089602b-55d0-4898-80c2-48a84b3e5c67" }, "source": [ "mention_summary = adv.extract_mentions(tweets_df.Tweets)\n", "mention_summary.keys()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "dict_keys(['mentions', 'mentions_flat', 'mention_counts', 'mention_freq', 'top_mentions', 'overview'])" ] }, "metadata": { "tags": [] }, "execution_count": 243 } ] }, { "cell_type": "code", "metadata": { "id": "aibfhLKsGrEW", "colab": { "base_uri": "https://localhost:8080/", "height": 85 }, "outputId": "2995eee9-1276-40c2-cf7b-88da46dd9561" }, "source": [ "mention_summary['overview']" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'mentions_per_post': 0.85,\n", " 'num_mentions': 17,\n", " 'num_posts': 20,\n", " 'unique_mentions': 17}" ] }, "metadata": { "tags": [] }, "execution_count": 244 } ] }, { "cell_type": "code", "metadata": { "id": "WAVuQkuzGrEY", "colab": { "base_uri": "https://localhost:8080/", "height": 272 }, "outputId": "91b1a379-81c0-4f93-da5e-88659262ac3d" }, "source": [ "mention_summary['mentions'][:15]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[[],\n", " [],\n", " [],\n", " ['@untkcqr', '@emilia14614784', '@luvwaifus'],\n", " ['@ohohuh1'],\n", " ['@sobrinov_edu'],\n", " ['@alelegabi'],\n", " ['@gordoomx'],\n", " [],\n", " [],\n", " ['@melody260515'],\n", " ['@daniloruiz1924', '@universitario'],\n", " [],\n", " [],\n", " ['@cin_deli']]" ] }, "metadata": { "tags": [] }, "execution_count": 245 } ] }, { "cell_type": "code", "metadata": { "id": "gQhMYNGTGrEc", "colab": { "base_uri": "https://localhost:8080/", "height": 187 }, "outputId": "4c1a1c2b-af7e-4e1b-8473-2352d800177c" }, "source": [ "mention_summary['mentions_flat'][:10]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "['@untkcqr',\n", " '@emilia14614784',\n", " '@luvwaifus',\n", " '@ohohuh1',\n", " '@sobrinov_edu',\n", " '@alelegabi',\n", " '@gordoomx',\n", " '@melody260515',\n", " '@daniloruiz1924',\n", " '@universitario']" ] }, "metadata": { "tags": [] }, "execution_count": 246 } ] }, { "cell_type": "code", "metadata": { "id": "NHlbSDfoGrEe", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "5a6b7c96-3b24-4493-fa21-776bb205ef22" }, "source": [ "mention_summary['mention_counts'][:20]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[0, 0, 0, 3, 1, 1, 1, 1, 0, 0, 1, 2, 0, 0, 1, 0, 2, 2, 1, 1]" ] }, "metadata": { "tags": [] }, "execution_count": 247 } ] }, { "cell_type": "code", "metadata": { "id": "c1cnepuYGrEg", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "6dd840cf-dddc-494f-b24f-e74052e355de" }, "source": [ "mention_summary['mention_freq'][:15]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[(0, 8), (1, 8), (2, 3), (3, 1)]" ] }, "metadata": { "tags": [] }, "execution_count": 248 } ] }, { "cell_type": "code", "metadata": { "id": "hbJlkp0tGrEj", "colab": { "base_uri": "https://localhost:8080/", "height": 513 }, "outputId": "e28dee7c-2f9e-49c5-f7f6-4804c9894d9d" }, "source": [ "plt.figure(facecolor='#ebebeb', figsize=(8, 8))\n", "plt.bar([x[0] for x in mention_summary['mention_freq'][:15]],\n", " [x[1] for x in mention_summary['mention_freq'][:15]])\n", "plt.title('Mention frequency')\n", "plt.xlabel('Mention per tweet')\n", "plt.ylabel('Number of tweets')\n", "plt.grid(alpha=0.5)\n", "plt.yscale('log')\n", "plt.gca().set_frame_on(False)\n", "plt.savefig('snapshot/Mention Frequency.png');" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "tags": [] } } ] }, { "cell_type": "code", "metadata": { "id": "_Pw8z76HGrEn", "colab": { "base_uri": "https://localhost:8080/", "height": 187 }, "outputId": "8c986dd8-a85f-4e1e-ff4a-ef64bb9ab6e8" }, "source": [ "mention_summary['top_mentions'][:10]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[('@untkcqr', 1),\n", " ('@emilia14614784', 1),\n", " ('@luvwaifus', 1),\n", " ('@ohohuh1', 1),\n", " ('@sobrinov_edu', 1),\n", " ('@alelegabi', 1),\n", " ('@gordoomx', 1),\n", " ('@melody260515', 1),\n", " ('@daniloruiz1924', 1),\n", " ('@universitario', 1)]" ] }, "metadata": { "tags": [] }, "execution_count": 250 } ] }, { "cell_type": "code", "metadata": { "id": "i1vc7iY9GrEp", "colab": { "base_uri": "https://localhost:8080/", "height": 499 }, "outputId": "bde27274-6c93-4942-d074-5eb6f7fda164" }, "source": [ "plt.figure(facecolor='#ebebeb', figsize=(8, 8))\n", "plt.barh([x[0] for x in mention_summary['top_mentions'][:15]][::-1],\n", " [x[1] for x in mention_summary['top_mentions'][:15]][::-1])\n", "plt.title('Top Mentions')\n", "plt.grid(alpha=0.5)\n", "plt.gca().set_frame_on(False)\n", "plt.savefig('snapshot/Top Mentions.png');" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "tags": [] } } ] }, { "cell_type": "code", "metadata": { "id": "FzAtCJKhGrEu", "colab": { "base_uri": "https://localhost:8080/", "height": 68 }, "outputId": "2629cec7-2ef5-4888-f466-b0ec0805d3c3" }, "source": [ "tweets_df.columns" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "Index(['date', 'Tweets', 'location', 'followers', 'word_count', 'char_count',\n", " 'avg_word', 'hastags', 'numerics', 'Tweets_clean', 'ACTIONS', 'NOUNS'],\n", " dtype='object')" ] }, "metadata": { "tags": [] }, "execution_count": 252 } ] }, { "cell_type": "code", "metadata": { "id": "UwKUY9mYGrEx", "colab": { "base_uri": "https://localhost:8080/", "height": 306 }, "outputId": "4cc0f2e7-82ad-44ee-ccb7-784dfd683b37" }, "source": [ "extracted_tweets = (tweets_df[['Tweets', 'followers']]\n", " .assign(hashtags=hashtag_summary['hashtags'],\n", " hashcounts=hashtag_summary['hashtag_counts'],\n", " mentions=mention_summary['mentions'],\n", " mention_count=mention_summary['mention_counts'],\n", " emoji=emoji_summary['emoji'],\n", " emoji_text=emoji_summary['emoji_text'],\n", " emoji_count=emoji_summary['emoji_counts'],))\n", "extracted_tweets.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Tweetsfollowershashtagshashcountsmentionsmention_countemojiemoji_textemoji_count
0Poco a poco todo se va acomodando170[]0[]0[][]0
1Hay gente que han perdido la vergüenza, pretender ser candidato de UPP.51[]0[]0[][]0
2Éste Viernes de Sankirtana, nos visita Hanumat Prana Das, un gran y entusiasta sankirtanero. \\nPerú /México /Colombi...401[]0[]0[][]0
3@UnTkCqr @Emilia14614784 @LuvWaifus Cuídate mucho Anja. Recuerda que Emilia es la Best Girl de Re:Zero por muchas r…...371[]0[@untkcqr, @emilia14614784, @luvwaifus]3[][]0
4@ohohuh1 :( cuídate mucho.\\n\\nQué desgracia, todo...196[]0[@ohohuh1]1[][]0
\n", "
" ], "text/plain": [ " Tweets ... emoji_count\n", "0 Poco a poco todo se va acomodando ... 0\n", "1 Hay gente que han perdido la vergüenza, pretender ser candidato de UPP. ... 0\n", "2 Éste Viernes de Sankirtana, nos visita Hanumat Prana Das, un gran y entusiasta sankirtanero. \\nPerú /México /Colombi... ... 0\n", "3 @UnTkCqr @Emilia14614784 @LuvWaifus Cuídate mucho Anja. Recuerda que Emilia es la Best Girl de Re:Zero por muchas r…... ... 0\n", "4 @ohohuh1 :( cuídate mucho.\\n\\nQué desgracia, todo... ... 0\n", "\n", "[5 rows x 9 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 253 } ] }, { "cell_type": "code", "metadata": { "id": "qI_rHEfEGrEz", "colab": { "base_uri": "https://localhost:8080/", "height": 68 }, "outputId": "e20083ea-6557-4c62-a77b-7eadb2b8c244" }, "source": [ "extracted_tweets.columns" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "Index(['Tweets', 'followers', 'hashtags', 'hashcounts', 'mentions',\n", " 'mention_count', 'emoji', 'emoji_text', 'emoji_count'],\n", " dtype='object')" ] }, "metadata": { "tags": [] }, "execution_count": 254 } ] }, { "cell_type": "code", "metadata": { "id": "C_GRXb6oGrE1", "colab": { "base_uri": "https://localhost:8080/", "height": 49 }, "outputId": "29674214-d82b-43e2-fb83-ee99bd05367c" }, "source": [ "word_freq_hash = adv.word_frequency(extracted_tweets['hashtags'].str.join(' '), \n", " extracted_tweets['followers'].fillna(0))#.sort_values(['abs_freq'], ascending=False).head(20)\n", "word_freq_hash.head(10)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
wordabs_freqwtd_freqrel_value
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [word, abs_freq, wtd_freq, rel_value]\n", "Index: []" ] }, "metadata": { "tags": [] }, "execution_count": 255 } ] }, { "cell_type": "code", "metadata": { "id": "v2Yar4ZhGrE3", "colab": { "base_uri": "https://localhost:8080/", "height": 49 }, "outputId": "473d7456-a1db-4b1b-9244-7ca8e794f4d7" }, "source": [ "extracted_tweets[extracted_tweets['hashtags'].str.join(' ')\n", " .str.contains('lima',case=False)]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Tweetsfollowershashtagshashcountsmentionsmention_countemojiemoji_textemoji_count
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [Tweets, followers, hashtags, hashcounts, mentions, mention_count, emoji, emoji_text, emoji_count]\n", "Index: []" ] }, "metadata": { "tags": [] }, "execution_count": 256 } ] }, { "cell_type": "code", "metadata": { "id": "DxiJ0t74GrE5", "colab": { "base_uri": "https://localhost:8080/", "height": 359 }, "outputId": "26d556f6-c43a-465b-bca4-68e82785668b" }, "source": [ "word_freq_mention = adv.word_frequency(extracted_tweets['mentions'].str.join(' '), \n", " extracted_tweets['followers'].fillna(0))\n", " #.sort_values(['abs_freq'], ascending=False).head(20)\n", "word_freq_mention.head(10)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
wordabs_freqwtd_freqrel_value
0@daniloruiz1924160146014.0
1@universitario160146014.0
2@gordoomx121612161.0
3@manago72121612161.0
4@emilia146147841371371.0
5@untkcqr1371371.0
6@luvwaifus1371371.0
7@marlovilela1329329.0
8@seleccionperu1329329.0
9@ohohuh11196196.0
\n", "
" ], "text/plain": [ " word abs_freq wtd_freq rel_value\n", "0 @daniloruiz1924 1 6014 6014.0\n", "1 @universitario 1 6014 6014.0\n", "2 @gordoomx 1 2161 2161.0\n", "3 @manago72 1 2161 2161.0\n", "4 @emilia14614784 1 371 371.0\n", "5 @untkcqr 1 371 371.0\n", "6 @luvwaifus 1 371 371.0\n", "7 @marlovilela 1 329 329.0\n", "8 @seleccionperu 1 329 329.0\n", "9 @ohohuh1 1 196 196.0" ] }, "metadata": { "tags": [] }, "execution_count": 257 } ] }, { "cell_type": "code", "metadata": { "id": "vZ71uAioGrE8", "colab": { "base_uri": "https://localhost:8080/", "height": 359 }, "outputId": "5b929965-0714-442d-827c-c381022d2c8f" }, "source": [ "word_freq_emoji = adv.word_frequency(extracted_tweets['emoji'].str.join(' '), \n", " extracted_tweets['followers'].fillna(0))#.sort_values(['abs_freq'], ascending=False).head(20)\n", "word_freq_emoji.head(10)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
wordabs_freqwtd_freqrel_value
0👏364832161.0
1🙏243222161.0
2🥺121612161.0
3🍔121612161.0
4😋121612161.0
5🌛121612161.0
6🤣71946278.0
7😍1155155.0
8🤦🏻‍♂️1136136.0
9😪1135135.0
\n", "
" ], "text/plain": [ " word abs_freq wtd_freq rel_value\n", "0 👏 3 6483 2161.0\n", "1 🙏 2 4322 2161.0\n", "2 🥺 1 2161 2161.0\n", "3 🍔 1 2161 2161.0\n", "4 😋 1 2161 2161.0\n", "5 🌛 1 2161 2161.0\n", "6 🤣 7 1946 278.0\n", "7 😍 1 155 155.0\n", "8 🤦🏻‍♂️ 1 136 136.0\n", "9 😪 1 135 135.0" ] }, "metadata": { "tags": [] }, "execution_count": 258 } ] }, { "cell_type": "code", "metadata": { "id": "iHIxbDOcGrFA", "colab": { "base_uri": "https://localhost:8080/", "height": 187 }, "outputId": "4d21763c-e07f-40b4-fffd-119dbb38212c" }, "source": [ "[adv.extract_emoji(k)['emoji_flat_text'][0] for k in word_freq_emoji['word'][:10]]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "['clapping hands',\n", " 'folded hands',\n", " 'pleading face',\n", " 'hamburger',\n", " 'face savoring food',\n", " 'first quarter moon face',\n", " 'rolling on the floor laughing',\n", " 'smiling face with heart-eyes',\n", " 'person facepalming',\n", " 'sleepy face']" ] }, "metadata": { "tags": [] }, "execution_count": 272 } ] }, { "cell_type": "code", "metadata": { "id": "c2KTfLl9GrFC", "colab": { "base_uri": "https://localhost:8080/", "height": 359 }, "outputId": "622abc67-815e-4313-8e0c-26ac1086562b" }, "source": [ "word_freq_emoji[:10].assign(emoji_text=[adv.extract_emoji(k)['emoji_flat_text'][0] for k in word_freq_emoji['word'][:10]])" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
wordabs_freqwtd_freqrel_valueemoji_text
0👏364832161.0clapping hands
1🙏243222161.0folded hands
2🥺121612161.0pleading face
3🍔121612161.0hamburger
4😋121612161.0face savoring food
5🌛121612161.0first quarter moon face
6🤣71946278.0rolling on the floor laughing
7😍1155155.0smiling face with heart-eyes
8🤦🏻‍♂️1136136.0person facepalming
9😪1135135.0sleepy face
\n", "
" ], "text/plain": [ " word abs_freq wtd_freq rel_value emoji_text\n", "0 👏 3 6483 2161.0 clapping hands\n", "1 🙏 2 4322 2161.0 folded hands\n", "2 🥺 1 2161 2161.0 pleading face\n", "3 🍔 1 2161 2161.0 hamburger\n", "4 😋 1 2161 2161.0 face savoring food\n", "5 🌛 1 2161 2161.0 first quarter moon face\n", "6 🤣 7 1946 278.0 rolling on the floor laughing\n", "7 😍 1 155 155.0 smiling face with heart-eyes\n", "8 🤦🏻‍♂️ 1 136 136.0 person facepalming\n", "9 😪 1 135 135.0 sleepy face" ] }, "metadata": { "tags": [] }, "execution_count": 273 } ] }, { "cell_type": "code", "metadata": { "id": "_L20I1tJGrFF" }, "source": [ "sotu_retweets = np.load('tweets_dict.npy',allow_pickle=True)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "vGohnLcbGrFJ", "colab": { "base_uri": "https://localhost:8080/", "height": 497 }, "outputId": "237508bb-54eb-409e-d6d4-edb044848b6e" }, "source": [ "def buildDataFrameFromDict(mapping):\n", " df=[]\n", " for f in mapping:\n", " f_n = {}\n", " for k,item in f.items():\n", " if isinstance(item,dict):\n", " for i,j in item.items():\n", " f_n[k+'-'+i] = j\n", " else:\n", " f_n[k] = f[k]\n", " df.append(f_n)\n", " \n", " df = pd.DataFrame(df)\n", " return df\n", "\n", "sotu = buildDataFrameFromDict(sotu_retweets)\n", "sotu.head(3)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
created_atidid_strtextsourcetruncatedin_reply_to_status_idin_reply_to_status_id_strin_reply_to_user_idin_reply_to_user_id_strin_reply_to_screen_nameuser-iduser-id_struser-nameuser-screen_nameuser-locationuser-urluser-descriptionuser-translator_typeuser-protecteduser-verifieduser-followers_countuser-friends_countuser-listed_countuser-favourites_countuser-statuses_countuser-created_atuser-utc_offsetuser-time_zoneuser-geo_enableduser-languser-contributors_enableduser-is_translatoruser-profile_background_coloruser-profile_background_image_urluser-profile_background_image_url_httpsuser-profile_background_tileuser-profile_link_coloruser-profile_sidebar_border_coloruser-profile_sidebar_fill_color...quoted_status-is_quote_statusquoted_status-quote_countquoted_status-reply_countquoted_status-retweet_countquoted_status-favorite_countquoted_status-entitiesquoted_status-favoritedquoted_status-retweetedquoted_status-filter_levelquoted_status-langquoted_status_permalink-urlquoted_status_permalink-expandedquoted_status_permalink-displayquoted_status-extended_tweetquoted_status-possibly_sensitivedisplay_text_rangeextended_tweet-full_textextended_tweet-display_text_rangeextended_tweet-entitiespossibly_sensitiveretweeted_status-extended_entitiesentities-mediaextended_entities-mediaquoted_status-extended_entitiesquoted_status-quoted_status_idquoted_status-quoted_status_id_strextended_tweet-extended_entitiesplace-idplace-urlplace-place_typeplace-nameplace-full_nameplace-country_codeplace-countryplace-bounding_boxplace-attributesgeo-typegeo-coordinatescoordinates-typecoordinates-coordinates
0Fri Oct 09 03:56:50 +0000 202013144144916485652481314414491648565248RT @lovtyholic: ‼️‼️ayuden a reportar esta cuenta NO INTERACTUAR NO CITAR NO RESPONDER solo reporten el tweet y la c...<a href=\"http://twitter.com/download/android\" rel=\"nofollow\">Twitter for Android</a>FalseNaNNoneNaNNoneNone32768782383276878238HELLO JONGDAE 𐂅moonsootyongNonehttps://invisiblestring.carrd.co#도경수 🐧 -108 ۵she/her۵MoonDanSe|Tyongf♥️ #문태일 #태용 #정한 #เตนล์ ...noneFalseFalse14319839061220735Sun Jul 12 01:50:27 +0000 2015NoneNoneFalseNoneFalseFalse000000http://abs.twimg.com/images/themes/theme1/bg.pnghttps://abs.twimg.com/images/themes/theme1/bg.pngFalseF58EA8000000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1Fri Oct 09 03:56:50 +0000 202013144144940393144321314414494039314432¿qué hora es en su país cuando ven este tweet??? aquí son las 10:56<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>FalseNaNNoneNaNNoneNone12306976731051335701230697673105133570𝐋𝐢𝐬𝐚 ♡𝐀𝐑𝐌𝐘Winter_bear1997NoneNone𝑌𝑜𝑢’𝑣𝑒 𝑠𝘩𝑜𝑤𝑛 𝑚𝑒 𝐼 𝘩𝑎𝑣𝑒 𝑟𝑒𝑎𝑠𝑜𝑛𝑠 𝐼 𝑠𝘩𝑜𝑢𝑙𝑑 𝑙𝑜𝑣𝑒 𝑚𝑦𝑠𝑒𝑙f 𝐎𝐓𝟕 𝐒𝐭𝐚𝐧 𝐹𝑎𝑛 𝑎𝑐𝑐𝑜𝑢𝑛𝑡noneFalseFalse3503196935137848151Fri Feb 21 03:36:22 +0000 2020NoneNoneFalseNoneFalseFalseF5F8FAFalse1DA1F2C0DEEDDDEEF6...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2Fri Oct 09 03:56:51 +0000 202013144144960023224321314414496002322432RT @Juuandedios_p: Este tweet podra llegar a 500 comentarios con la siguiente frase:\\n\\nCANCIONES PARA MI EX\\n\\n15 c...<a href=\"http://twitter.com/download/android\" rel=\"nofollow\">Twitter for Android</a>FalseNaNNoneNaNNoneNone12680483432020090881268048343202009088𝙚𝙙𝙖🦋edaloveskeniaNoneNone•𝐜𝐮𝐞𝐧𝐭𝐚 𝐩𝐚𝐫𝐚 𝐯𝐨𝐭𝐨𝐬•\\n𝐜𝐮𝐞𝐧𝐭𝐚 𝐩𝐫𝐢𝐧𝐜𝐢𝐩𝐚𝐥: @edapdc1\\n 𝐟𝐚𝐧 𝐚𝐜𝐜𝐨𝐮𝐧𝐭noneFalseFalse1071360803584Wed Jun 03 05:14:31 +0000 2020NoneNoneFalseNoneFalseFalseF5F8FAFalse1DA1F2C0DEEDDDEEF6...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "

3 rows × 160 columns

\n", "
" ], "text/plain": [ " created_at ... coordinates-coordinates\n", "0 Fri Oct 09 03:56:50 +0000 2020 ... NaN\n", "1 Fri Oct 09 03:56:50 +0000 2020 ... NaN\n", "2 Fri Oct 09 03:56:51 +0000 2020 ... NaN\n", "\n", "[3 rows x 160 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 277 } ] }, { "cell_type": "code", "metadata": { "id": "atLOTuX4GrFO", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "e0b0f80f-ca77-4268-a05c-b29669cb05aa" }, "source": [ "pprint(sotu.columns.values)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "array(['created_at', 'id', 'id_str', 'text', 'source', 'truncated',\n", " 'in_reply_to_status_id', 'in_reply_to_status_id_str',\n", " 'in_reply_to_user_id', 'in_reply_to_user_id_str',\n", " 'in_reply_to_screen_name', 'user-id', 'user-id_str', 'user-name',\n", " 'user-screen_name', 'user-location', 'user-url',\n", " 'user-description', 'user-translator_type', 'user-protected',\n", " 'user-verified', 'user-followers_count', 'user-friends_count',\n", " 'user-listed_count', 'user-favourites_count',\n", " 'user-statuses_count', 'user-created_at', 'user-utc_offset',\n", " 'user-time_zone', 'user-geo_enabled', 'user-lang',\n", " 'user-contributors_enabled', 'user-is_translator',\n", " 'user-profile_background_color',\n", " 'user-profile_background_image_url',\n", " 'user-profile_background_image_url_https',\n", " 'user-profile_background_tile', 'user-profile_link_color',\n", " 'user-profile_sidebar_border_color',\n", " 'user-profile_sidebar_fill_color', 'user-profile_text_color',\n", " 'user-profile_use_background_image', 'user-profile_image_url',\n", " 'user-profile_image_url_https', 'user-profile_banner_url',\n", " 'user-default_profile', 'user-default_profile_image',\n", " 'user-following', 'user-follow_request_sent', 'user-notifications',\n", " 'geo', 'coordinates', 'place', 'contributors',\n", " 'retweeted_status-created_at', 'retweeted_status-id',\n", " 'retweeted_status-id_str', 'retweeted_status-text',\n", " 'retweeted_status-display_text_range', 'retweeted_status-source',\n", " 'retweeted_status-truncated',\n", " 'retweeted_status-in_reply_to_status_id',\n", " 'retweeted_status-in_reply_to_status_id_str',\n", " 'retweeted_status-in_reply_to_user_id',\n", " 'retweeted_status-in_reply_to_user_id_str',\n", " 'retweeted_status-in_reply_to_screen_name',\n", " 'retweeted_status-user', 'retweeted_status-geo',\n", " 'retweeted_status-coordinates', 'retweeted_status-place',\n", " 'retweeted_status-contributors',\n", " 'retweeted_status-is_quote_status',\n", " 'retweeted_status-extended_tweet', 'retweeted_status-quote_count',\n", " 'retweeted_status-reply_count', 'retweeted_status-retweet_count',\n", " 'retweeted_status-favorite_count', 'retweeted_status-entities',\n", " 'retweeted_status-favorited', 'retweeted_status-retweeted',\n", " 'retweeted_status-possibly_sensitive',\n", " 'retweeted_status-filter_level', 'retweeted_status-lang',\n", " 'is_quote_status', 'quote_count', 'reply_count', 'retweet_count',\n", " 'favorite_count', 'entities-hashtags', 'entities-urls',\n", " 'entities-user_mentions', 'entities-symbols', 'favorited',\n", " 'retweeted', 'filter_level', 'lang', 'timestamp_ms',\n", " 'retweeted_status-quoted_status_id',\n", " 'retweeted_status-quoted_status_id_str',\n", " 'retweeted_status-quoted_status',\n", " 'retweeted_status-quoted_status_permalink', 'quoted_status_id',\n", " 'quoted_status_id_str', 'quoted_status-created_at',\n", " 'quoted_status-id', 'quoted_status-id_str', 'quoted_status-text',\n", " 'quoted_status-display_text_range', 'quoted_status-source',\n", " 'quoted_status-truncated', 'quoted_status-in_reply_to_status_id',\n", " 'quoted_status-in_reply_to_status_id_str',\n", " 'quoted_status-in_reply_to_user_id',\n", " 'quoted_status-in_reply_to_user_id_str',\n", " 'quoted_status-in_reply_to_screen_name', 'quoted_status-user',\n", " 'quoted_status-geo', 'quoted_status-coordinates',\n", " 'quoted_status-place', 'quoted_status-contributors',\n", " 'quoted_status-is_quote_status', 'quoted_status-quote_count',\n", " 'quoted_status-reply_count', 'quoted_status-retweet_count',\n", " 'quoted_status-favorite_count', 'quoted_status-entities',\n", " 'quoted_status-favorited', 'quoted_status-retweeted',\n", " 'quoted_status-filter_level', 'quoted_status-lang',\n", " 'quoted_status_permalink-url', 'quoted_status_permalink-expanded',\n", " 'quoted_status_permalink-display', 'quoted_status-extended_tweet',\n", " 'quoted_status-possibly_sensitive', 'display_text_range',\n", " 'extended_tweet-full_text', 'extended_tweet-display_text_range',\n", " 'extended_tweet-entities', 'possibly_sensitive',\n", " 'retweeted_status-extended_entities', 'entities-media',\n", " 'extended_entities-media', 'quoted_status-extended_entities',\n", " 'quoted_status-quoted_status_id',\n", " 'quoted_status-quoted_status_id_str',\n", " 'extended_tweet-extended_entities', 'place-id', 'place-url',\n", " 'place-place_type', 'place-name', 'place-full_name',\n", " 'place-country_code', 'place-country', 'place-bounding_box',\n", " 'place-attributes', 'geo-type', 'geo-coordinates',\n", " 'coordinates-type', 'coordinates-coordinates',\n", " 'retweeted_status-user-screen_name'], dtype=object)\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "F2vISmP3GrFQ" }, "source": [ "data = sotu.sample(10)['user-screen_name']\n", "sotu['retweeted_status-user-screen_name'] = np.random.choice(data,len(sotu))" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "kSEctOb2GrFS", "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "outputId": "628dff3e-74ff-47d2-8f53-df6c24e1ebbe" }, "source": [ "sotu[['user-screen_name','retweeted_status-user-screen_name']].head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user-screen_nameretweeted_status-user-screen_name
0moonsootyongwhotamelonsugar
1Winter_bear1997YoceOs
2edaloveskeniaConstanpr
3LeeBer37753434leonXD__
4FelipeCucalonMichi_Ca_Pin
\n", "
" ], "text/plain": [ " user-screen_name retweeted_status-user-screen_name\n", "0 moonsootyong whotamelonsugar\n", "1 Winter_bear1997 YoceOs\n", "2 edaloveskenia Constanpr\n", "3 LeeBer37753434 leonXD__\n", "4 FelipeCucalon Michi_Ca_Pin" ] }, "metadata": { "tags": [] }, "execution_count": 279 } ] }, { "cell_type": "code", "metadata": { "id": "gRs2rtruGrFW", "colab": { "base_uri": "https://localhost:8080/", "height": 51 }, "outputId": "125bab97-5989-4687-d3b8-7a4e39424579" }, "source": [ "G_rt = nx.from_pandas_edgelist(\n", " sotu,\n", " source = 'user-screen_name', \n", " target = 'retweeted_status-user-screen_name',\n", " create_using = nx.DiGraph())\n", " \n", "print('Nodes in RT network:', len(G_rt.nodes()))\n", "print('Edges in RT network:', len(G_rt.edges()))" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Nodes in RT network: 589\n", "Edges in RT network: 609\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "D_rZumXjGrFX", "colab": { "base_uri": "https://localhost:8080/", "height": 51 }, "outputId": "d9a80ab8-0fbb-4738-eedb-2137ab3bfaf1" }, "source": [ "G_reply = nx.from_pandas_edgelist(\n", " sotu,\n", " source = 'user-screen_name', \n", " target = 'in_reply_to_screen_name',\n", " create_using = nx.DiGraph())\n", " \n", "print('Nodes in reply network:', len(G_reply.nodes()))\n", "\n", "print('Edges in reply network:', len(G_reply.edges()))" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Nodes in reply network: 749\n", "Edges in reply network: 598\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "xkk0qnxRGrFZ", "colab": { "base_uri": "https://localhost:8080/", "height": 248 }, "outputId": "0fd3c035-22e6-4782-9b5e-40e9ee838f90" }, "source": [ "pos = nx.random_layout(G_rt)\n", "\n", "sizes = [x[1] for x in G_rt.degree()]\n", "\n", "nx.draw_networkx(G_rt, pos, \n", " with_labels = False, \n", " node_size = sizes,\n", " width = 0.1, alpha = 0.7,\n", " arrowsize = 2, linewidths = 0)\n", "\n", "plt.savefig('snapshot/lima_tweets_influencing_graph.png')\n", "plt.axis('off'); plt.show()" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "tags": [] } } ] }, { "cell_type": "code", "metadata": { "id": "2MrzgNFfGrFb", "colab": { "base_uri": "https://localhost:8080/", "height": 248 }, "outputId": "9b8c7c7e-79b2-406b-d59c-d4a317baa626" }, "source": [ "pos = nx.random_layout(G_reply)\n", "sizes = [x[1] for x in G_reply.degree()]\n", "\n", "nx.draw_networkx(G_reply, pos, \n", " with_labels = False, \n", " node_size = sizes,\n", " width = 0.1, alpha = 0.7,\n", " arrowsize = 2, linewidths = 0)\n", "\n", "plt.axis('off'); plt.show()" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "tags": [] } } ] }, { "cell_type": "code", "metadata": { "id": "iyJEPk8tGrFe", "colab": { "base_uri": "https://localhost:8080/", "height": 391 }, "outputId": "ebb15d1e-95d2-4ed2-ffb6-1fb256499ee4" }, "source": [ "column_names = ['screen_name', 'degree_centrality']\n", "rt_centrality = nx.in_degree_centrality(G_rt)\n", "\n", "reply_centrality = nx.in_degree_centrality(G_reply)\n", "\n", "rt = pd.DataFrame(list(rt_centrality.items()), columns = column_names)\n", "reply = pd.DataFrame(list(reply_centrality.items()), columns = column_names)\n", "\n", "display(rt.sort_values('degree_centrality', ascending = False).head())\n", "\n", "display(reply.sort_values('degree_centrality', ascending = False).head())" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
screen_namedegree_centrality
14corzin4430.134354
7leonXD__0.127551
1whotamelonsugar0.119048
5Constanpr0.103741
34Danna_ArdilaO0.102041
\n", "
" ], "text/plain": [ " screen_name degree_centrality\n", "14 corzin443 0.134354\n", "7 leonXD__ 0.127551\n", "1 whotamelonsugar 0.119048\n", "5 Constanpr 0.103741\n", "34 Danna_ArdilaO 0.102041" ] }, "metadata": { "tags": [] } }, { "output_type": "display_data", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
screen_namedegree_centrality
1None0.570856
111leoni708039950.002674
369lovely_min_k0.002674
546jmvalerapiedras0.002674
62jimenaajimenezr0.002674
\n", "
" ], "text/plain": [ " screen_name degree_centrality\n", "1 None 0.570856\n", "111 leoni70803995 0.002674\n", "369 lovely_min_k 0.002674\n", "546 jmvalerapiedras 0.002674\n", "62 jimenaajimenezr 0.002674" ] }, "metadata": { "tags": [] } } ] }, { "cell_type": "code", "metadata": { "id": "9fT6t9GOGrFf", "colab": { "base_uri": "https://localhost:8080/", "height": 391 }, "outputId": "cbb824d4-3f20-485e-f560-686e43584b40" }, "source": [ "column_names = ['screen_name', 'betweenness_centrality']\n", "# Generate betweenness centrality for retweets \n", "rt_centrality = nx.betweenness_centrality(G_rt)\n", "\n", "# Generate betweenness centrality for replies \n", "reply_centrality = nx.betweenness_centrality(G_reply)\n", "\n", "# Store centralities in data frames\n", "rt = pd.DataFrame(list(rt_centrality.items()), columns = column_names)\n", "reply = pd.DataFrame(list(reply_centrality.items()), columns = column_names)\n", "\n", "# Print first five results in descending order of centrality\n", "display(rt.sort_values('betweenness_centrality', ascending = False).head())\n", "\n", "# Print first five results in descending order of centrality\n", "display(reply.sort_values('betweenness_centrality', ascending = False).head())" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
screen_namebetweenness_centrality
14corzin4430.004016
51kmiilinn0.002161
9Michi_Ca_Pin0.001792
1whotamelonsugar0.001751
28Crackalaka20.001686
\n", "
" ], "text/plain": [ " screen_name betweenness_centrality\n", "14 corzin443 0.004016\n", "51 kmiilinn 0.002161\n", "9 Michi_Ca_Pin 0.001792\n", "1 whotamelonsugar 0.001751\n", "28 Crackalaka2 0.001686" ] }, "metadata": { "tags": [] } }, { "output_type": "display_data", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
screen_namebetweenness_centrality
261rosamt50.000002
509stigmxjk0.000002
76LuSKabeche0.000002
0moonsootyong0.000000
502Castelopio0.000000
\n", "
" ], "text/plain": [ " screen_name betweenness_centrality\n", "261 rosamt5 0.000002\n", "509 stigmxjk 0.000002\n", "76 LuSKabeche 0.000002\n", "0 moonsootyong 0.000000\n", "502 Castelopio 0.000000" ] }, "metadata": { "tags": [] } } ] }, { "cell_type": "code", "metadata": { "id": "Kq-0BfOAGrFh", "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "outputId": "f8dc3f76-b6a1-4078-c300-9dd914436054" }, "source": [ "column_names = ['screen_name', 'degree']\n", "\n", "degree_rt = pd.DataFrame(list(G_rt.in_degree()), columns = column_names)\n", "degree_reply = pd.DataFrame(list(G_reply.in_degree()), columns = column_names)\n", "\n", "ratio = degree_rt.merge(degree_reply, on = 'screen_name', suffixes = ('_rt', '_reply'))\n", "\n", "ratio['ratio'] = ratio['degree_reply'] / ratio['degree_rt']\n", "\n", "ratio = ratio[ratio['degree_rt'] >= 5]\n", "\n", "display(ratio.sort_values('ratio', ascending = False).head())" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
screen_namedegree_rtdegree_replyratio
14corzin4437910.012658
1whotamelonsugar7000.000000
3YoceOs5100.000000
5Constanpr6100.000000
7leonXD__7500.000000
\n", "
" ], "text/plain": [ " screen_name degree_rt degree_reply ratio\n", "14 corzin443 79 1 0.012658\n", "1 whotamelonsugar 70 0 0.000000\n", "3 YoceOs 51 0 0.000000\n", "5 Constanpr 61 0 0.000000\n", "7 leonXD__ 75 0 0.000000" ] }, "metadata": { "tags": [] } } ] }, { "cell_type": "code", "metadata": { "id": "MQXFda9k8ihF" }, "source": [ "%%capture\n", "!jupyter nbconvert --to html Live_tweeting.ipynb --output index.html\n", "with open('index.html', 'r') as file:\n", " content = file.read()\n", "content = content.replace(\"div.input_area {\",\"div.input_area {\\n\\tdisplay: none;\") \n", "content = content.replace(\".prompt {\",\".prompt {\\n\\tdisplay: none;\")\n", "f = open('index.html', 'w')\n", "f.write(content)\n", "f.close()" ], "execution_count": null, "outputs": [] } ] }