import settings import tweepy import dataset from textblob import TextBlob from sqlalchemy.exc import ProgrammingError import json import sys import urllib3.contrib.pyopenssl urllib3.contrib.pyopenssl.inject_into_urllib3() tcount = 0 db = dataset.connect(settings.CONNECTION_STRING) class StreamListener(tweepy.StreamListener): def on_status(self, status): global tcount description = status.user.description loc = status.user.location text = status.text coords = status.coordinates geo = status.geo name = status.user.screen_name user_created = status.user.created_at followers = status.user.followers_count id_str = status.id_str created = status.created_at retweets = status.retweet_count bg_color = status.user.profile_background_color blob = TextBlob(text) sent = blob.sentiment if geo is not None: geo = json.dumps(geo) if coords is not None: coords = json.dumps(coords) table = db[settings.TABLE_NAME] try: table.insert(dict( user_description=description, user_location=loc, coordinates=coords, text=text, geo=geo, user_name=name, user_created=user_created, user_followers=followers, id_str=id_str, created=created, retweet_count=retweets, user_bg_color=bg_color, polarity=sent.polarity, subjectivity=sent.subjectivity, )) tcount += 1 except ProgrammingError as err: print(err) def on_error(self, status_code): if status_code == 420: #returning False in on_data disconnects the stream return False # # Setup of tweepy; assumes the correct variables have been defined in private.py # auth = tweepy.OAuthHandler(settings.TWITTER_APP_KEY, settings.TWITTER_APP_SECRET) auth.set_access_token(settings.TWITTER_KEY, settings.TWITTER_SECRET) api = tweepy.API(auth) stream_listener = StreamListener() stream = tweepy.Stream(auth=api.auth, listener=stream_listener) try: stream.filter(track=settings.TRACK_TERMS) except KeyboardInterrupt: print "Scraping interrupted, " + ("%d" % tcount) + " results written to " + settings.CONNECTION_STRING except: print("Unexpected error:", sys.exc_info()[0]) raise