{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas\n", "import pandas_gbq" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "##excel1 = 'Spark Q419 dj reads sample.xlsx'\n", "##excel2 = 'Spark Q419 local radio sample.xlsx'\n", "excel1 = 'APEX Q320 sample.xlsx'\n", "excel2 = 'APEX Sep 2020 sample.xlsx'\n", "\n", "project_id = 'marketing-bigquery-project'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df1 = pandas.read_excel(excel1)\n", "df2 = pandas.read_excel(excel2)\n", "##df3 = pandas.read_excel(excel3)\n", "\n", "values1 = df1[['CLI','MARKET NAME','STA','STA TYPE NAME','DEMO #1','DP','LEN','AIR DATE','AFF','FULL TIME','ISCI CODE']]\n", "values2 = df2[['CLI','MARKET NAME','STA','STA TYPE NAME','DEMO #1','DP','LEN','AIR DATE','AFF','FULL TIME','ISCI CODE']]\n", "##values3 = df3[['CLI','MARKET NAME','STA','STA TYPE NAME','DEMO #1','DP','LEN','AIR DATE','AFF','FULL TIME','ISCI CODE']]\n", "\n", "dataframes = [values1, values2]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "joinedData = pandas.concat(dataframes)\n", "##joinedData = values1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Below code extracts the first 3 letters from the AIR DATE column and creates a new column Month with the corresponding number of the month" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'JAN', 'MONTH'] = '1'\n", "joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'FEB', 'MONTH'] = '2'\n", "joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'MAR', 'MONTH'] = '3'\n", "joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'APR', 'MONTH'] = '4'\n", "joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'MAY', 'MONTH'] = '5'\n", "joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'JUN', 'MONTH'] = '6'\n", "joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'JUL', 'MONTH'] = '7'\n", "joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'AUG', 'MONTH'] = '8'\n", "joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'SEP', 'MONTH'] = '9'\n", "joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'OCT', 'MONTH'] = '10'\n", "joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'NOV', 'MONTH'] = '11'\n", "joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'DEC', 'MONTH'] = '12'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create Date column by extracting the last 5 characters from the AIR DATE string" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "joinedData['Date'] = joinedData['AIR DATE'].str.slice(3,6) + '20' + joinedData['AIR DATE'].str.slice(6,8)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create full date column by concatenating the MONTH and Date column" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "joinedData['full date'] = joinedData['MONTH'] + '/' + joinedData['Date']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create time column by extracting and formatting the time from the TIME column" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "joinedData['time'] = joinedData['FULL TIME'].str.slice(0,2) + ':' + joinedData['FULL TIME'].str.slice(2,4)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "joinedData['am/pm'] = joinedData['FULL TIME'].str.slice(4,5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Replace spaces with underscore because GBQ does not accept spaces in the column names" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "joinedDataFormatted = joinedData.rename(columns = {'MARKET NAME': 'MARKET_NAME', 'STA TYPE NAME': 'STA_TYPE_NAME', 'DEMO #1': 'DEMO',\n", " 'DP': 'daypart', 'AIR DATE': 'AIR_DATE', 'AFF': 'DAY', 'FULL TIME': 'FULL_TIME',\n", " 'ISCI CODE': 'ISCI_CODE', 'full date': 'full_date', 'am/pm': 'am_pm'} )" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "joinedDataFormatted.head(20)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pandas_gbq.to_gbq(joinedDataFormatted, 'Spike_Attribution_Model.fct_radio_log_data_q3_2020', project_id = project_id, \n", " table_schema = [{'name': 'CLI', 'type': 'STRING'},\n", " {'name': 'MARKET_NAME', 'type': 'STRING'},\n", " {'name': 'STA', 'type': 'STRING'},\n", " {'name': 'STA_TYPE_NAME', 'type': 'STRING'},\n", " {'name': 'DEMO', 'type': 'STRING'},\n", " {'name': 'daypart', 'type': 'STRING'},\n", " {'name': 'LEN', 'type': 'STRING'},\n", " {'name': 'AIR_DATE', 'type': 'STRING'},\n", " {'name': 'DAY', 'type': 'STRING'},\n", " {'name': 'FULL_TIME', 'type': 'STRING'},\n", " {'name': 'ISCI_CODE', 'type': 'STRING'},\n", " {'name': 'MONTH', 'type': 'STRING'},\n", " {'name': 'DATE', 'type': 'STRING'},\n", " {'name': 'full_date', 'type': 'STRING'},\n", " {'name': 'time', 'type': 'STRING'},\n", " {'name': 'am_pm', 'type': 'STRING'}], if_exists = 'replace')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }