{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "parameters" ] }, "outputs": [], "source": [ "# notebook parameters\n", "\n", "import os\n", "import sys\n", "\n", "# replace this with your notebook working directory\n", "sys.path.append(\"/root/telco-churn-augmentation/\")\n", "\n", "\n", "spark_master = \"yarn\"\n", "app_name = \"churn-analytics\"\n", "input_prefix = \"\"\n", "input_file = \"churn-etl\"\n", "output_prefix = \"\"\n", "output_mode = \"overwrite\"\n", "output_kind = \"parquet\"\n", "input_kind = \"parquet\"\n", "driver_memory = '8g'\n", "executor_memory = '8g'\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pyspark\n", "\n", "session = pyspark.sql.SparkSession.builder \\\n", " .master(spark_master) \\\n", " .appName(app_name) \\\n", " .config(\"spark.eventLog.enabled\", True) \\\n", " .config(\"spark.eventLog.dir\", \".\") \\\n", " .config(\"spark.driver.memory\", driver_memory) \\\n", " .config(\"spark.executor.memory\", executor_memory) \\\n", " .config(\"spark.rapids.memory.pinnedPool.size\", \"2G\") \\\n", " .config(\"spark.sql.shuffle.partitions\", 16) \\\n", " .config(\"spark.sql.files.maxPartitionBytes\", \"4096MB\") \\\n", " .config(\"spark.rapids.sql.enabled\", True) \\\n", " .config(\"spark.executor.cores\", 4) \\\n", " .config(\"spark.task.cpus\", 1) \\\n", " .config(\"spark.rapids.sql.concurrentGpuTasks\", 2) \\\n", " .config(\"spark.task.resource.gpu.amount\", .5) \\\n", " .config(\"spark.rapids.sql.variableFloatAgg.enabled\", True) \\\n", " .config(\"spark.rapids.sql.explain\", \"NOT_ON_GPU\") \\\n", " .config(\"spark.rapids.sql.decimalType.enabled\", \"True\") \\\n", " .getOrCreate()\n", "session" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import churn.eda\n", "import churn.etl\n", "\n", "churn.etl.register_options(\n", " output_prefix = output_prefix,\n", " output_mode = output_mode,\n", " output_kind = output_kind,\n", " input_kind = input_kind\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = churn.etl.read_df(session, input_prefix + input_file)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Generating reports" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%time\n", "\n", "summary = churn.eda.gen_summary(df)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "session.catalog.listTables()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Slicing and dicing by tenure in quarters" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "grouped_by_quarters = session.table(\"cube_3\").select(\"3_month_spans\", \"Contract\", \"PaperlessBilling\", \"Churn\", \"Count\").toPandas()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "grouped_by_quarters = grouped_by_quarters.rename(columns = {'3_month_spans' : 'tenure_in_quarters'})" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import altair as alt\n", "alt.data_transformers.enable('json')\n", "\n", "alt.Chart(grouped_by_quarters.dropna()).mark_bar().encode(\n", " x = 'tenure_in_quarters:O',\n", " y = 'sum(Count):Q',\n", " color = 'Churn:N',\n", " column = 'Contract:N'\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import altair as alt\n", "alt.data_transformers.enable('json')\n", "\n", "alt.Chart(grouped_by_quarters.dropna()).mark_bar().encode(\n", " x = 'tenure_in_quarters:O',\n", " y = 'sum(Count):Q',\n", " column = 'Churn:N',\n", " color = 'Contract:N'\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Finishing up" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# session.stop()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 4 }