{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11183b59-8662-4a14-ac6a-35e44c1209f6",
   "metadata": {},
   "outputs": [],
   "source": [
    "Oracle AI Data Platform v1.0\n",
    "\n",
    "Copyright © 2025, Oracle and/or its affiliates.\n",
    "\n",
    "Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "079f9106-82b1-48b2-b42b-bacff883df9e",
   "metadata": {
    "type": "python"
   },
   "source": [
    "# Delta Schema Evolution\n",
    "**This notebook illustrates how you can configure your AI Data Platform to allow for a schema that evolves over time.**\n",
    " \n",
    "It covers:\n",
    " \n",
    " 1. **Direct Write to OCI Object Storage**\n",
    " 2. **Accessing OCI Object Storage via an External Table**\n",
    "    - Creating an external table\n",
    "    - Querying an external table\n",
    " 3. **Writing new data with additional column to OCI Object Storage**\n",
    "    - Writing to object storage\n",
    "    - Reading data\n",
    " \n",
    "# **Parameters**\n",
    " - OCI Object Storage bucket name and Namespace name\n",
    " - Catalog name\n",
    " - Schema name\n",
    " - External table name\n",
    " - Folder name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6cf09b6f-ae3f-41cd-9e0a-9e5baed672ba",
   "metadata": {
    "execution": {
     "iopub.status.busy": "2025-03-28T17:28:49.092Z"
    }
   },
   "outputs": [],
   "source": [
    "# Change these values to your env\n",
    "oci_bucket=oidlUtils.parameters.getParameter(\"OCI_BUCKET\", \"oci://replace_bucket_name@replace_namespace\")\n",
    "p_catalog_name=oidlUtils.parameters.getParameter(\"CATALOG_NAME\", \"default\")\n",
    "p_schema_name=oidlUtils.parameters.getParameter(\"SCHEMA_NAME\", \"default\")\n",
    "p_table_name=oidlUtils.parameters.getParameter(\"TABLE_NAME\", \"deltatab\")\n",
    "p_folder_name=oidlUtils.parameters.getParameter(\"FOLDER_NAME\", \"mydata\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "8a1f34ad-2dca-41e4-bf7b-bf0df0ebf999",
   "metadata": {
    "execution": {
     "iopub.status.busy": "2025-03-28T17:27:06.122Z"
    }
   },
   "outputs": [],
   "source": [
    "import pyspark\n",
    "from pyspark.sql.types import *\n",
    "from pyspark.sql.functions import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "2b9afc19-63d9-4f88-a7cf-55e78a3178e4",
   "metadata": {
    "execution": {
     "iopub.status.busy": "2025-03-28T17:27:22.807Z"
    }
   },
   "outputs": [],
   "source": [
    "# Create a spark dataframe and write as a delta table\n",
    "data = [(\"Robert\", \"Baratheon\", \"Baratheon\", \"Storms End\", 48),\n",
    "        (\"Eddard\", \"Stark\", \"Stark\", \"Winterfell\", 46),\n",
    "        (\"Jamie\", \"Lannister\", \"Lannister\", \"Casterly Rock\", 29)\n",
    "        ]\n",
    "schema = StructType([\n",
    "    StructField(\"firstname\", StringType(), True),\n",
    "    StructField(\"lastname\", StringType(), True),\n",
    "    StructField(\"house\", StringType(), True),\n",
    "    StructField(\"location\", StringType(), True),\n",
    "    StructField(\"age\", IntegerType(), True)\n",
    "])\n",
    "\n",
    "sample_dataframe = spark.createDataFrame(data=data, schema=schema)\n",
    "sample_dataframe.write.mode(saveMode=\"overwrite\").format(\"delta\").save(oci_bucket+'/'+p_folder_name+'/')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "60467d58-51cd-4b93-8ce1-7f39ab19a8d1",
   "metadata": {
    "execution": {
     "iopub.status.busy": "2025-03-28T17:27:33.435Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre>DataFrame[]\n",
       "</pre>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Create an external table on the folder for the delta table above\n",
    "spark.sql(\"create table if not exists \"+p_catalog_name+\".\"+p_schema_name+\".\"+p_table_name+\" USING DELTA LOCATION '\"+oci_bucket+\"/\"+p_folder_name+\"/'\");"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "67be876f-2362-4f6f-a38b-a0c12b9825c2",
   "metadata": {
    "execution": {
     "iopub.status.busy": "2025-03-28T17:27:44.987Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre>+---------+---------+---------+-------------+---+\n",
       "|firstname| lastname|    house|     location|age|\n",
       "+---------+---------+---------+-------------+---+\n",
       "|   Robert|Baratheon|Baratheon|   Storms End| 48|\n",
       "|   Eddard|    Stark|    Stark|   Winterfell| 46|\n",
       "|    Jamie|Lannister|Lannister|Casterly Rock| 29|\n",
       "+---------+---------+---------+-------------+---+\n",
       "\n",
       "</pre>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Query the Delta table\n",
    "spark.sql(\"select * from \" + p_catalog_name+\".\"+p_schema_name+\".\"+p_table_name).show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "b46d9ee6-3273-4eb8-967a-15e872740ecf",
   "metadata": {
    "execution": {
     "iopub.status.busy": "2025-03-28T17:27:50.509Z"
    }
   },
   "outputs": [],
   "source": [
    "# Let's evolve the schema, add a salary column\n",
    "data = [(\"Jim\", \"Benson\", \"Hillmount\", \"Glasgow\", 34,100),\n",
    "        (\"Jen\", \"Oliver\", \"Sleepy Hollow\", \"Cheddar\", 37,200)\n",
    "        ]\n",
    "schema = StructType([\n",
    "    StructField(\"firstname\", StringType(), True),\n",
    "    StructField(\"lastname\", StringType(), True),\n",
    "    StructField(\"house\", StringType(), True),\n",
    "    StructField(\"location\", StringType(), True),\n",
    "    StructField(\"age\", IntegerType(), True),\n",
    "    StructField(\"salary\", IntegerType(), True)\n",
    "])\n",
    "\n",
    "sample_dataframe = spark.createDataFrame(data=data, schema=schema)\n",
    "sample_dataframe.write.mode(saveMode=\"append\").format(\"delta\").option(\"mergeSchema\", \"true\").save(oci_bucket+'/'+p_folder_name+'/')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "c559af35-bde2-47b2-8475-26e2e36de4e2",
   "metadata": {
    "execution": {
     "iopub.status.busy": "2025-03-28T17:28:02.486Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre>+---------+---------+-------------+-------------+---+------+\n",
       "|firstname| lastname|        house|     location|age|salary|\n",
       "+---------+---------+-------------+-------------+---+------+\n",
       "|      Jen|   Oliver|Sleepy Hollow|      Cheddar| 37|   200|\n",
       "|      Jim|   Benson|    Hillmount|      Glasgow| 34|   100|\n",
       "|   Robert|Baratheon|    Baratheon|   Storms End| 48|  NULL|\n",
       "|   Eddard|    Stark|        Stark|   Winterfell| 46|  NULL|\n",
       "|    Jamie|Lannister|    Lannister|Casterly Rock| 29|  NULL|\n",
       "+---------+---------+-------------+-------------+---+------+\n",
       "\n",
       "</pre>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Query the Delta table and see the new column 'salary', the null values in the table for historical rows\n",
    "\n",
    "spark.sql(\"select * from \" + p_catalog_name+\".\"+p_schema_name+\".\"+p_table_name).show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "43e4af8d-8812-4cbc-93f4-06c4df5582d1",
   "metadata": {
    "execution": {
     "iopub.status.busy": "2025-03-28T17:28:26.052Z"
    },
    "type": "python"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre>DataFrame[]\n",
       "</pre>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "spark.sql(\"drop table \"+p_catalog_name+\".\"+p_schema_name+\".\"+p_table_name);"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}