#Z0096 # import from python libraries and modules import pandas as pd import numpy as np from os.path import isfile # import functions from created modules from env import get_connection #################### Acquire Data #################### def get_sql(query, db_name, use_csv=True): ''' Takes SQL query and database name as `string` and runs through pandas read_sql function, using get_connection from env.py. Requires Codeup database login credentials. use_csv=True will use data from existing CSV file if one exists, default behavior use_csv=False will obtain new query return and overwrite existing CSV if one exists ''' # check if CSV file already exists or use_csv=False if use_csv == False or isfile(f'{db_name}.csv') == False: # acquire data from database df = pd.read_sql(query, get_connection(db_name)) # write acquired data to CSV file df.to_csv(f'{db_name}.csv', index=False) # drop any existing duplicates df.drop_duplicates() else: # read into DataFrame from CSV file df = pd.read_csv(f'{db_name}.csv') return df def acquire_mvp(use_csv=False): ''' Using get_sql function we pass a specific query to obtain the MVP' variables to create a DataFrame using the property_id unique identifier as the index This function acquires only the data need to construct the MVP ''' # assign sql query for specified data query = ''' SELECT properties_2017.id AS property_id, bedroomcnt AS bedrooms, bathroomcnt AS bathrooms, fips, calculatedfinishedsquarefeet AS square_feet, taxamount AS tax_amount_usd, taxvaluedollarcnt AS tax_value_usd FROM properties_2017 INNER JOIN predictions_2017 USING(parcelid) LEFT JOIN propertylandusetype USING(propertylandusetypeid) WHERE propertylandusetypeid IN (261, 263, 264, 266, 268, 275, 276, 279) AND CAST(transactiondate AS DATE) BETWEEN 20170501 AND 20170831 ORDER BY properties_2017.id ASC ;''' # use get_sql function to read into DataFrame df = get_sql(query, 'zillow', use_csv=use_csv) # set id to index df = df.set_index('property_id') # drop rows with null values df = df.dropna() return df