{ "cells": [ { "cell_type": "code", "execution_count": 124, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Import the libraries that we're going to use\n", "from nltk.corpus import twitter_samples\n", "from nltk import casual_tokenize, word_tokenize, sent_tokenize\n", "from nltk.stem import PorterStemmer\n", "from nltk.corpus import stopwords\n", "from nltk.tag import StanfordNERTagger\n", "\n", "from collections import Counter\n", "import string\n", "import re\n", "\n", "from gensim.models.ldamodel import LdaModel\n", "from gensim import corpora\n", "\n", "from pycorenlp import StanfordCoreNLP" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Lowercase all text\n", "#\n", "# @param strings\n", "# An array of sentences (not word tokenized)\n", "# @returns an array of lowercased sentences\n", "\n", "def lowercase(strings):\n", " return [i.lower() for i in strings]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Tokenize into words (using Tweet tokenizer, probably not suitable for non-Tweet text)\n", "#\n", "# @param strings\n", "# An array of sentences\n", "# @returns an array of tokenized sentences (each tokenized sentence is an array, so this returns an array of arrays)\n", "\n", "def tokenize_tweets(strings):\n", " return [casual_tokenize(i) for i in strings]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Tokenize into words (regular text, not Tweet text)\n", "#\n", "# @param strings\n", "# An array of sentences\n", "# @returns an array of tokenized sentences (each tokenized sentence is an array, so this returns an array of arrays)\n", "\n", "def tokenize_regular(strings):\n", " return [word_tokenize(i) for i in strings]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Tokenize into sentences (regular text, not Tweet text)\n", "#\n", "# @param text\n", "# A string of text (not broken into sentences)\n", "# @returns an array of sentences\n", "\n", "def tokenize_sentences(text):\n", " return sent_tokenize(text)" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Remove all punctuation from sentences\n", "#\n", "# @param tokenized\n", "# An array of tokenized sentences\n", "# @returns an array of tokenized sentences with no punctuation\n", "\n", "def remove_punctuation(tokenized):\n", " stripped = [[''.join([letter for letter in word if letter not in string.punctuation]) for word in sentence]\\\n", " for sentence in tokenized]\n", " stripped = [[word for word in sentence if len(word) > 0] for sentence in stripped]\n", " return [i for i in stripped if len(i) > 0]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Either remove all digits from sentences or replace them with pound sign\n", "#\n", "# @param tokenized\n", "# An array of tokenized sentences\n", "# @ param replace\n", "# Whether to replace the digits with # or not (default = True)\n", "# @returns an array of sentences with digits removed or replaced\n", "\n", "def remove_digits(tokenized,replace=True):\n", " if replace:\n", " stripped = [[re.sub('[0123456789]','#',word) for word in sentence] for sentence in tokenized]\n", " else:\n", " stripped = [[re.sub('[0123456789]','',word) for word in sentence] for sentence in tokenized]\n", " stripped = [[word for word in sentence if len(word) > 0 ] for sentence in stripped]\n", " return [i for i in stripped if len(i) > 0] " ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Stem all words\n", "#\n", "# @param tokenized\n", "# An array of tokenized sentences\n", "# @returns an array of tokenized sentences with all of the words stemmed\n", "\n", "def stem_words(tokenized):\n", " stemmer = PorterStemmer()\n", " return [[stemmer.stem(word) for word in sentence] for sentence in tokenized]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Remove words that occur less than a certain number of times\n", "#\n", "# @param tokenized\n", "# An array of tokenized sentences\n", "# @param threshold\n", "# The minimum number of times a word has to occur before it is removed (default = 5)\n", "# @returns an array of tokenized sentences with rare words replaced with 'UNK'\n", "\n", "def remove_rare_words(tokenized,threshold=5):\n", " #count the number of times each word appears in all the sentences\n", " counter = Counter([word for sentence in tokenized for word in sentence])\n", " \n", " #remove words that appear less than the threshold number of times (replace with 'UNK')\n", " return [[word if counter[word] >= threshold else 'UNK' for word in sentence] for sentence in tokenized]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Remove links (text that begins with 'http://' or 'https://')\n", "#\n", "# @param tokenized\n", "# An array of tokenized sentences\n", "# @returns an array of tokenized sentences with links replaced with 'LINK'\n", "\n", "def remove_links(tokenized):\n", " return [[word if word[:7] != 'http://' and word[:8] != 'https://' else 'LINK' for word in sentence] \\\n", " for sentence in tokenized]" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "# Remove all (English) stopwords\n", "#\n", "# @param tokenized\n", "# An array of tokenized sentences\n", "# @returns an array of tokenized sentences with stopwords removed\n", "\n", "def remove_stopwords(tokenized):\n", " stop = set(stopwords.words('english'))\n", " stripped = [[word for word in sentence if word not in stop] for sentence in tokenized]\n", " return [i for i in stripped if len(i) > 0] " ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Extract NER relationships from text\n", "# Make sure that you've downloaded the Stanford NER files\n", "# This only extracts persons, organizations, and locations\n", "#\n", "# @param tokenized\n", "# An array of tokenized sentences\n", "# @returns an array of sentences where each sentence is a list of tuples of (word, entity label)\n", "\n", "def extract_ner(tokenized):\n", " # Download these files from https://nlp.stanford.edu/software/\n", " # Make sure the paths are set correctly\n", " st = StanfordNERTagger('/Users/laura/software/stanford-ner/classifiers/english.all.3class.distsim.crf.ser.gz',\\\n", " '/Users/laura/software/stanford-ner/stanford-ner.jar') \n", " return st.tag_sents(tokenized) #Batch processing is important - speeds it up tremendously!" ] }, { "cell_type": "code", "execution_count": 113, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Calculate topics from text using LDA\n", "#\n", "# @param tokenized\n", "# An array of tokenized sentences\n", "# @param ignore\n", "# Words to ignore when creating topics\n", "# @param num_topics\n", "# The number of topics to calculate\n", "# @returns\n", "# The dictionary of the corpus in the correct format for the topic model\n", "# The corpus in the correct format for the topic model\n", "# A trained topic model\n", "\n", "def topic_modeling(tokenized,ignore=set(),num_topics = 10):\n", " dictionary = corpora.Dictionary([[word for word in sentence if word not in ignore] for sentence in tokenized])\n", " corpus = [dictionary.doc2bow(sentence) for sentence in tokenized]\n", " return (dictionary,corpus,LdaModel(corpus, num_topics=num_topics, id2word=dictionary))" ] }, { "cell_type": "code", "execution_count": 146, "metadata": {}, "outputs": [], "source": [ "# Calculate sentiment of tokenized sentences\n", "# NOTE: before you do this, you need to make sure that the Stanford CoreNLP server is up and running! To do that:\n", "# 1. Download it - wget http://nlp.stanford.edu/software/stanford-corenlp-full-2018-01-31.zip\n", "# 2. Unzip it - unzip stanford-corenlp-full-2018-01-31.zip\n", "# 3. Change to directory - cd stanford-corenlp-full-2018-01-31\n", "# 4. Run it - java -mx5g -cp \"*\" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -timeout 10000000\n", "#\n", "# @param tokenized\n", "# An array of tokenized sentences\n", "# @returns the result for the Stanford CoreNPL Sentiment Analysis tool\n", "\n", "def sentiment(tokenized):\n", " nlp = StanfordCoreNLP('http://localhost:9000')\n", " \n", " results = []\n", " for tokens in tokenized:\n", " results.append(nlp.annotate(' '.join(tokens),\n", " properties={\n", " 'annotators': 'sentiment',\n", " 'outputFormat': 'json',\n", " 'timeout': 10000000,\n", " }))\n", " return results" ] }, { "cell_type": "code", "execution_count": 91, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Ok, let's get some sample tweets! (you can replace this with your own data)\n", "strings = twitter_samples.strings('positive_tweets.json') + \\\n", " twitter_samples.strings('negative_tweets.json')" ] }, { "cell_type": "code", "execution_count": 187, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['#FollowFriday @France_Inte @PKuchly57 @Milipol_Paris for being top engaged members in my community this week :)',\n", " '@Lamb2ja Hey James! How odd :/ Please call our Contact Centre on 02392441234 and we will be able to assist you :) Many thanks!',\n", " '@DespiteOfficial we had a listen last night :) As You Bleed is an amazing track. When are you in Scotland?!',\n", " '@97sides CONGRATS :)',\n", " 'yeaaaah yippppy!!! my accnt verified rqst has succeed got a blue tick mark on my fb profile :) in 15 days',\n", " '@BhaktisBanter @PallaviRuhail This one is irresistible :)\\n#FlipkartFashionFriday http://t.co/EbZ0L2VENM',\n", " \"We don't like to keep our lovely customers waiting for long! We hope you enjoy! Happy Friday! - LWWF :) https://t.co/smyYriipxI\",\n", " '@Impatientraider On second thought, there’s just not enough time for a DD :) But new shorts entering system. Sheep must be buying.',\n", " 'Jgh , but we have to go to Bayan :D bye',\n", " 'As an act of mischievousness, am calling the ETL layer of our in-house warehousing app Katamari.\\n\\nWell… as the name implies :p.']" ] }, "execution_count": 187, "metadata": {}, "output_type": "execute_result" } ], "source": [ "strings[:10]" ] }, { "cell_type": "code", "execution_count": 92, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Prepare data\n", "lowercased = lowercase(strings)\n", "tokenized = tokenize_tweets(lowercased)\n", "tokenized = remove_links(tokenized)" ] }, { "cell_type": "code", "execution_count": 64, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Named Entity Recognition\n", "ner = extract_ner(tokenized)" ] }, { "cell_type": "code", "execution_count": 65, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "PERSON\n", "jane 1\n", "miss kang 1\n", "michael woodford 1\n", "bush 1\n", "emma roberts 1\n", "chris gayle 2\n", "kath 2\n", "jonah 1\n", "jumma mubarak 1\n", "lewis 1\n", "miss dubai 1\n", "tom moore 1\n", "joe 1\n", "pete wentz 1\n", "john prescott 1\n", "tom felton 1\n", "anna 1\n", "don ’ 1\n", "miss hannah montana 1\n", "niamh fennell 1\n", "irene 1\n", "darcey connor 1\n", "selena gomez louis tomlinson rita liam payne 1\n", "miss pamela 1\n", "kevin clifton 1\n", "bea miller 1\n", "jessica 2\n", "donna 1\n", "fabian delph 1\n", "chris ellis 1\n", "christophe gans 1\n", "clarke 1\n", "betty miller 1\n", "danny 1\n", "karlie kloss 1\n", "sam smith 1\n", "delph 1\n", "phil 1\n", "orhan pamuk 1\n", "manny 1\n", "obama 1\n", "o sunnies 1\n", "john sheen 1\n", "erica 1\n", "michael jackson 1\n", "michael 2\n", "alex smith 1\n", "miss matt 1\n", "thurston collins 1\n", "tara barkin 1\n", "anna akana 1\n", "hillary clinton 1\n", "mrs wong 1\n", "steven william umboh 1\n", "ORGANIZATION\n", "expedia 1\n", "PLACE\n" ] } ], "source": [ "# Let's look at some entities\n", "for entity in ['PERSON','ORGANIZATION','PLACE']:\n", " print(entity)\n", " entities = set()\n", " counter = Counter()\n", " for sentence in ner:\n", " entityStarted = False\n", " fullEntity = ''\n", " for (word,entityLabel) in sentence:\n", " if entityLabel == entity:\n", " if entityStarted:\n", " fullEntity += ' ' + word\n", " else:\n", " fullEntity = word\n", " entityStarted = True\n", " elif entityStarted:\n", " entities.add(fullEntity)\n", " counter[fullEntity] += 1\n", " fullEntity = ''\n", " entityStarted = False\n", " if fullEntity != '':\n", " entities.add(fullEntity)\n", " counter[fullEntity] += 1\n", " \n", " for entity in entities: #Print out what we found\n", " print(entity,counter[entity])" ] }, { "cell_type": "code", "execution_count": 66, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#tokenized = remove_rare_words(tokenized)" ] }, { "cell_type": "code", "execution_count": 93, "metadata": { "collapsed": true }, "outputs": [], "source": [ "tokenized_noStopWords = remove_stopwords(tokenized)\n", "tokenized_noStopWords = remove_punctuation(tokenized_noStopWords)" ] }, { "cell_type": "code", "execution_count": 114, "metadata": {}, "outputs": [], "source": [ "# Topic modeling\n", "# NOTE: Highly dependent on the number of topics you use\n", "\n", "(dictionary,corpus,topics) = topic_modeling(tokenized_noStopWords,ignore = set(['UNK','LINK']),num_topics=5)" ] }, { "cell_type": "code", "execution_count": 115, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[(0,\n", " '0.009*\"cant\" + 0.008*\"d\" + 0.007*\"like\" + 0.006*\"today\" + 0.006*\"feel\" + 0.004*\"want\" + 0.004*\"see\" + 0.004*\"okay\" + 0.004*\"im\" + 0.004*\"makes\"'),\n", " (1,\n", " '0.023*\"im\" + 0.011*\"want\" + 0.008*\"like\" + 0.008*\"kik\" + 0.007*\"know\" + 0.007*\"snapchat\" + 0.006*\"thanks\" + 0.004*\"tired\" + 0.004*\"pls\" + 0.004*\"sick\"'),\n", " (2,\n", " '0.018*\"miss\" + 0.009*\"sad\" + 0.008*\"much\" + 0.007*\"cant\" + 0.006*\"get\" + 0.006*\"thank\" + 0.006*\"good\" + 0.006*\"u\" + 0.006*\"one\" + 0.006*\"oh\"'),\n", " (3,\n", " '0.017*\"please\" + 0.015*\"follow\" + 0.013*\"u\" + 0.012*\"》\" + 0.012*\"♛\" + 0.011*\"back\" + 0.010*\"love\" + 0.010*\"justinbieber\" + 0.007*\"day\" + 0.007*\"sorry\"'),\n", " (4,\n", " '0.015*\"im\" + 0.011*\"followed\" + 0.009*\"get\" + 0.008*\"go\" + 0.007*\"thanks\" + 0.007*\"still\" + 0.007*\"3\" + 0.007*\"see\" + 0.007*\"wanna\" + 0.006*\"please\"')]" ] }, "execution_count": 115, "metadata": {}, "output_type": "execute_result" } ], "source": [ "topics.print_topics()" ] }, { "cell_type": "code", "execution_count": 122, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['@joyster2012',\n", " '@cathstaincliffe',\n", " 'good',\n", " 'for',\n", " 'you',\n", " ',',\n", " 'girl',\n", " '!',\n", " '!',\n", " 'best',\n", " 'wishes',\n", " ':-)']" ] }, "execution_count": 122, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tokenized[40]" ] }, { "cell_type": "code", "execution_count": 123, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[(0, 0.028808361042268887),\n", " (1, 0.028776925978230294),\n", " (2, 0.88457221079167814),\n", " (3, 0.028668412518234668),\n", " (4, 0.029174089669588135)]" ] }, "execution_count": 123, "metadata": {}, "output_type": "execute_result" } ], "source": [ "topics.get_document_topics(corpus[40])" ] }, { "cell_type": "code", "execution_count": 157, "metadata": {}, "outputs": [], "source": [ "# Sentiment analysis\n", "sent = sentiment(tokenized[:10])" ] }, { "cell_type": "code", "execution_count": 159, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "'#followfriday @france_inte @pkuchly57 @milipol_paris for being top engaged members in my community this week : -RRB-': 2 Neutral\n", "'@lamb2ja hey james !': 2 Neutral\n", "'how odd : / please call our contact centre on 02392441234 and we will be able to assist you :-RRB- many thanks !': 2 Neutral\n", "'@despiteofficial we had a listen last night :-RRB- as you bleed is an amazing track .': 3 Positive\n", "'when are you in scotland ?': 2 Neutral\n", "'!': 2 Neutral\n", "'@ 97sides congrats : -RRB-': 2 Neutral\n", "'yeaaaah yippppy !': 3 Positive\n", "'!': 2 Neutral\n", "'!': 2 Neutral\n", "'my accnt verified rqst has succeed got a blue tick mark on my fb profile :-RRB- in 15 days': 1 Negative\n", "'@bhaktisbanter @pallaviruhail this one is irresistible :-RRB- #flipkartfashionfriday LINK': 3 Positive\n", "'we do n't like to keep our lovely customers waiting for long !': 1 Negative\n", "'we hope you enjoy !': 3 Positive\n", "'happy friday !': 3 Positive\n", "'- lwwf :-RRB- LINK': 2 Neutral\n", "'@impatientraider on second thought , there ' s just not enough time for a dd :-RRB- but new shorts entering system .': 2 Neutral\n", "'sheep must be buying .': 1 Negative\n", "'jgh , but we have to go to bayan :d bye': 1 Negative\n", "'as an act of mischievousness , am calling the etl layer of our in-house warehousing app katamari .': 1 Negative\n", "'well ... as the name implies :p .': 3 Positive\n" ] } ], "source": [ "for res in sent:\n", " for s in res[\"sentences\"]:\n", " print(\"'%s': %s %s\" % (\n", " \" \".join([t[\"word\"] for t in s[\"tokens\"]]),\n", " s[\"sentimentValue\"], s[\"sentiment\"]))" ] }, { "cell_type": "code", "execution_count": 160, "metadata": { "collapsed": true }, "outputs": [], "source": [ "sent = sentiment(tokenized[-10:])" ] }, { "cell_type": "code", "execution_count": 161, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "'i want it to be my birthday already : -LRB-': 2 Neutral\n", "'@louanndavies completely agree .': 2 Neutral\n", "'the press wo n't : -LRB-': 2 Neutral\n", "'im super duper tired : -LRB-': 1 Negative\n", "'having boring time :-LRB- do n't know what to do ...': 1 Negative\n", "'ill be on soon , i promise :-LRB- waaah': 1 Negative\n", "'i wan na change my avi but usanele : -LRB-': 1 Negative\n", "'my puppy broke her foot : -LRB-': 2 Neutral\n", "'where 's all the jaebum baby pictures :-LRB- -LRB-': 2 Neutral\n", "'but but mr ahmad maslan cooks too :-LRB- LINK': 1 Negative\n", "'@eawoman as a hull supporter i am expecting a misserable few weeks : - -LRB-': 2 Neutral\n" ] } ], "source": [ "for res in sent:\n", " for s in res[\"sentences\"]:\n", " print(\"'%s': %s %s\" % (\n", " \" \".join([t[\"word\"] for t in s[\"tokens\"]]),\n", " s[\"sentimentValue\"], s[\"sentiment\"]))" ] }, { "cell_type": "code", "execution_count": 177, "metadata": { "collapsed": true }, "outputs": [], "source": [ "justinbeiber = [sentence for sentence in tokenized if '@justinbieber' in ' '.join(sentence)]" ] }, { "cell_type": "code", "execution_count": 180, "metadata": {}, "outputs": [], "source": [ "sent = sentiment(justinbeiber)" ] }, { "cell_type": "code", "execution_count": 182, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "'@justinbieber :-RRB- always smile': 3 Positive\n", "'@justinbieber can you please follow me @caitecat1209 ♡ ♡ ♡ please jb follow me i love you always and forever .': 3 Positive\n", "'i 'm a belieber ♡ ♡ ♡ :-RRB- i love u': 2 Neutral\n", "'this is why im standing and always being a belieber :-RRB- its all bcause of him @justinbieber 2009 until die : -RRB-': 2 Neutral\n", "''' @justinbieber : :-RRB- '' why baby ?': 1 Negative\n", "'😘 😘 😘': 2 Neutral\n", "'@justinbieber it makesme happy to see this :-RRB- keep smiling we love u': 3 Positive\n", "'@justinbieber when you smile , i smile : -RRB-': 3 Positive\n", "'@justinbieber :-RRB- back at ya': 2 Neutral\n", "'@justinbieber i love u : -RRB-': 2 Neutral\n", "'@justinbieber you are daddy af ... : - -RRB-': 2 Neutral\n", "'justin where are you ?': 2 Neutral\n", "':-LRB- @justinbieber': 2 Neutral\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'350 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'349 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'348 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'347 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'346 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'345 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'344 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'343 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'342 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'341 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'340 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'339 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'338 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'337 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'336 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'335 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'334 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'333 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'332 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'331 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'330 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'329 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'328 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'327 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'326 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'325 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'324 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'323 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'322 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'321 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'320 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'319 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'318 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'317 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'♛ ♛ ♛ 》 》 》 i love you so much .': 3 Positive\n", "'i beli ̇ eve that he wi ̇ ll follow .': 1 Negative\n", "'please follow me please justi ̇ n @justinbieber :-LRB- x15 .': 1 Negative\n", "'316 》 》 》 see me ♛ ♛ ♛': 1 Negative\n", "'@xbiebsft5sos followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@lucybanjifede followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@biebsxmoonlight followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@haticebieberr followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@_avonsparadise_ followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@bloutangelina followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@gaganch57575757 followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@lewiismynewt followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@palmirabieber followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@extended5h followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@biebersxgalaxy followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@cloud9cabello followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@likeafigure8 followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@tyareramirez followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@mariajosecleri1 followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@gilinskyscherry followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@_angelina_horan followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@justinbieber if u see this , can u follow me ?': 1 Negative\n", "'i 'm waiting so long : -LRB-': 2 Neutral\n", "'@princesswhut followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@marixyanchik1 followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@kissingjal followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@cutelikejdb followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@kargadouri followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@rfarghaly123 followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@camy19994 followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@kidrauhlogan followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@buterairlines followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@amerazjm followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@zeynepirdal followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@cro_marta followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@austin_love_am followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@natvolpato1 followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@josselynramos01 followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@lill_hippie followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@rhiska22 followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@pablonerudaofic followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@colon_valeria followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@kendrahatesu followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@bocagirlslayed followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@camss59 followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@waqasalirajput4 followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@emre_lavigne followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@frantaandbiebxr followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@jessicarios468 followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@nashyy_niall followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@juzzyftmahone followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@jime_jb21 followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@salweimar followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@eveh1_1 followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@gbiebs17 followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@aylinguvenkaya_ followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@boyhuptuquruq followed me thanks , and @justinbieber please followed me too : -LRB-': 1 Negative\n", "'@victoria199412 follow back me , thanks @justinbieber please : -LRB-': 3 Positive\n", "'@jkcorden @justinbieber hey i miss him too : -LRB-': 1 Negative\n", "'@justinbieber you do n't follow me : -LRB-': 1 Negative\n", "'@justinbieber follback me pls : -LRB-': 1 Negative\n", "'@iperfectyonce @justinbieber its my biggest dream can u follow me brooo :-LRB- -LRB- -LRB-': 1 Negative\n" ] } ], "source": [ "all_sentiment = []\n", "for res in sent:\n", " for s in res[\"sentences\"]:\n", " print(\"'%s': %s %s\" % (\n", " \" \".join([t[\"word\"] for t in s[\"tokens\"]]),\n", " s[\"sentimentValue\"], s[\"sentiment\"]))\n", " all_sentiment.append(s['sentiment'])" ] }, { "cell_type": "code", "execution_count": 184, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "40" ] }, "execution_count": 184, "metadata": {}, "output_type": "execute_result" } ], "source": [ "all_sentiment.count('Positive')" ] }, { "cell_type": "code", "execution_count": 185, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "162" ] }, "execution_count": 185, "metadata": {}, "output_type": "execute_result" } ], "source": [ "all_sentiment.count('Negative')" ] }, { "cell_type": "code", "execution_count": 186, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "9" ] }, "execution_count": 186, "metadata": {}, "output_type": "execute_result" } ], "source": [ "all_sentiment.count('Neutral')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 2 }