# Scraping stocks from Google and Alpha Vantage # @author: Andres L. Suarez-Cetrulo import pandas as pd import numpy as np import datetime """ Global values """ PATH = "/home/YOUR_USER/DATA_REPO" RAW_DATA_PATH = PATH+"/WAREHOUSE/" MAX_TRIES = 10 # downloading a symbol """ Retrieve intraday stock data from Alpha Vantage API. """ #Alpha Vantage API to download 15 days of minute data (only if required) from alpha_vantage.timeseries import TimeSeries from alpha_vantage.cryptocurrencies import CryptoCurrencies apikey='YOUR_ALPHA_VANTAGE_API_KEY' # Get pandas object with the intraday data and another with the call's metadata # Get the same for crypto ts = TimeSeries(key=apikey, output_format='pandas') cc = CryptoCurrencies(key=apikey, output_format='pandas') """ Retrieve intraday stock data from Google Finance. """ import csv import datetime import re import pandas as pd import requests def get_google_finance_intraday(ticker, period=60, days=1, exchange='USD', debug=False): """ Retrieve intraday stock data from Google Finance. Parameters ---------- ticker : str Company ticker symbol. period : int Interval between stock values in seconds. days : int Number of days of data to retrieve. Returns ------- df : pandas.DataFrame DataFrame containing the opening price, high price, low price, closing price, and volume. The index contains the times associated with the retrieved price values. """ uri = 'https://finance.google.com/finance/getprices' \ '?&p={days}d&f=d,o,h,l,c,v&q={ticker}&i={period}?x={exchange}'.format(ticker=ticker, period=period, days=days, exchange=exchange) if(debug): print (uri) page = requests.get(uri) reader = csv.reader(page.content.splitlines()) columns = ['Open', 'High', 'Low', 'Close', 'Volume'] rows = [] times = [] for row in reader: if re.match('^[a\d]', row[0]): if row[0].startswith('a'): start = datetime.datetime.fromtimestamp(int(row[0][1:])) times.append(start) else: times.append(start+datetime.timedelta(seconds=period*int(row[0]))) rows.append(map(float, row[1:])) if len(rows): return pd.DataFrame(rows, index=pd.DatetimeIndex(times, name='Date'), columns=columns) else: return pd.DataFrame(rows, index=pd.DatetimeIndex(times, name='Date')) """ Download price for a given symbol using either Google Finance or Alpha Vantage """ def download_single_price_from(symbol,period=60,days=20,exchange='USD',site="google",debug=True, \ path="default" ,name="default"): #real max days at 1min level is 15... df = pd.DataFrame({'A' : []}) site_option = "" # Download index price if site=="google_finance": df = get_google_finance_intraday(symbol,period,days,exchange,debug) elif site=="alpha_vantage": df, meta_data = ts.get_intraday(symbol, interval='1min', outputsize='full') elif site=="avantage_crypto": df, meta_data = cc.get_digital_currency_intraday(symbol, exchange) # Save index prices output_file=check_or_create_path(path)+"/"+name+"_"+str(datetime.date.today())+".csv.gz" # df.to_csv(output_file, sep=';', encoding='utf-8', compression='gzip') df.to_csv(output_file, sep=';', compression='gzip') # encoding='utf-8', # each compressed file can be read after as: # df = pd.read_csv(output_file, compression='gzip') """ Trying to download the symbols '$MAX_TRIES' times. After, we assume that the symbol is not available in the given API/Provider. """ def try_download(symbol,period,days,exchange,site,debug,path, name, tries_count): try: download_single_price_from(symbol=symbol,period=period,days=days,exchange=exchange,\ site=site,debug=debug,path=path,name=symbol) except: # catch *all* exceptions e = sys.exc_info()[0] print( "

Error: %s

" % e ) # Recursive function that tries again from pointer when crashing (awaits 5 seconds to retry) if (tries_count