{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "71f0d318-672c-4d14-818f-9560e52d1b8c", "metadata": {}, "outputs": [], "source": [ "Oracle AI Data Platform v1.0\n", "\n", "Copyright © 2025, Oracle and/or its affiliates.\n", "\n", "Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/" ] }, { "cell_type": "markdown", "id": "8a76b357-10b2-43f5-864e-2747ce0040dc", "metadata": { "type": "python" }, "source": [ "# Spark Delta SQL Examples\n", " **Learn basic Delta features in IDL**\n", "\n", " See https://docs.delta.io/latest/index.html for detailed documentation.\n", " \n", " This notebook demonstrates different ways to read and write data from Delta tables in AI Data Platform. It covers:\n", " \n", " 1. **Setup catalog and schemas**\n", " 2. **Create Delta Table**\n", " 3. **Delta Table operations**\n", " - Insert data\n", " - Update data\n", " - Query Data\n", " 4. **Schema exploration**\n", " - Listing schemas and tables\n", " - Describing delta table history data\n", " 5. **Merge**\n", " - Merge data\n", " 6. **Time travel**\n", " - Query by version\n", " - Query by timestamp\n", " 7. **Advanced Delta**\n", " " ] }, { "cell_type": "markdown", "id": "1a670927-fc2f-445c-8d2c-338645fa428b", "metadata": {}, "source": [ "# **1. Setup catalog and schemas**" ] }, { "cell_type": "markdown", "id": "f843ec04-b72b-44f6-9a72-2be2a289cd58", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T15:16:21.700Z" }, "type": "markdown" }, "source": [ "## Show Catalogs" ] }, { "cell_type": "code", "execution_count": 1, "id": "4031b6f5-d673-474d-86e2-41bd8113d678", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T15:21:33.090Z" } }, "outputs": [ { "data": { "text/html": [ "
+------------+--------+-----------+--------------------+-------------+\n",
       "|catalog_name|    type|source_type|          created_by|   created_at|\n",
       "+------------+--------+-----------+--------------------+-------------+\n",
       "|        hive|INTERNAL|        N.A|ocid1.user.oc1..a...|1742660232068|\n",
       "|  discovered|INTERNAL|        N.A|ocid1.user.oc1..a...|1742752860364|\n",
       "|        lake|INTERNAL|        N.A|ocid1.user.oc1..a...|1742755977633|\n",
       "|lake_example|INTERNAL|        N.A|ocid1.user.oc1..a...|1742668284598|\n",
       "+------------+--------+-----------+--------------------+-------------+\n",
       "\n",
       "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "spark.sql(\"show catalogs\").show()" ] }, { "cell_type": "markdown", "id": "15bb6d74-ff08-4327-9cbe-bb80e16cb9e7", "metadata": {}, "source": [ "## Create Managed Catalog" ] }, { "cell_type": "code", "execution_count": 1, "id": "6bbeaab3-2a11-4ec1-acec-b1c816bb78fd", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T15:21:37.884Z" } }, "outputs": [], "source": [ "# Create managed catalog\n", "spark.sql(\"create catalog if not exists lake\")\n" ] }, { "cell_type": "code", "execution_count": 1, "id": "5691975e-f0bc-4ffe-9c98-ac1af61cc1eb", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T15:21:47.091Z" } }, "outputs": [ { "data": { "text/html": [ "
+------------------+--------------------+\n",
       "|         attribute|     attribute_value|\n",
       "+------------------+--------------------+\n",
       "|              name|                lake|\n",
       "|         createdOn|       1742755977633|\n",
       "|         updatedOn|       1742755977275|\n",
       "|connection_details|                  {}|\n",
       "|   catalog_details|{\"createdOn\":\"174...|\n",
       "|         createdBy|ocid1.user.oc1..a...|\n",
       "|              type|            INTERNAL|\n",
       "|         updatedBy|ocid1.user.oc1..a...|\n",
       "+------------------+--------------------+\n",
       "\n",
       "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Describe Catalog\n", "spark.sql(\"describe catalog lake\").show()" ] }, { "cell_type": "markdown", "id": "55560927-d68d-4321-8e9b-9ec13d6e97f1", "metadata": {}, "source": [ "## Create Schema in Managed Catalog" ] }, { "cell_type": "code", "execution_count": 1, "id": "72fe94b6-e810-4613-8aa3-a79c736fc4e1", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T15:21:52.078Z" } }, "outputs": [], "source": [ "# Create Schema\n", "spark.sql(\"create schema if not exists lake.bronze\")" ] }, { "cell_type": "code", "execution_count": 1, "id": "1be56743-d407-4612-b2ae-231188946db0", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T17:04:41.651Z" } }, "outputs": [ { "data": { "text/html": [ "
+--------------+----------+\n",
       "|     info_name|info_value|\n",
       "+--------------+----------+\n",
       "|  Catalog Name|      lake|\n",
       "|Namespace Name|    bronze|\n",
       "+--------------+----------+\n",
       "\n",
       "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Describe Schema\n", "spark.sql(\"describe schema lake.bronze\").show()" ] }, { "cell_type": "markdown", "id": "a63df75e-333d-455a-a419-4b950a266b3e", "metadata": { "type": "markdown" }, "source": [ "# **2. Create Delta Table**" ] }, { "cell_type": "markdown", "id": "0f67c7b9-900b-40c0-b98f-c7d4d133134f", "metadata": {}, "source": [ "## Create Target Delta Table" ] }, { "cell_type": "code", "execution_count": 1, "id": "939ae194-fa80-46c3-bffc-65d297498039", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T17:05:32.933Z" } }, "outputs": [], "source": [ "from pyspark.sql import SparkSession\n", "# Sample patient data\n", "data = [\n", " (\"2025-03-24T08:00:00Z\", \"P001\", 72, \"120/80\", 98, 36.8, \"302A\", \"stable\"),\n", " (\"2025-03-24T08:05:00Z\", \"P001\", 75, \"118/78\", 97, 36.9, \"302A\", \"stable\"),\n", " (\"2025-03-24T08:10:00Z\", \"P002\", 90, \"140/90\", 92, 38.2, \"215B\", \"critical\"),\n", " (\"2025-03-24T08:15:00Z\", \"P002\", 95, \"145/95\", 90, 38.5, \"215B\", \"critical\")\n", "]\n", "\n", "columns = [\"timestamp\", \"patient_id\", \"heart_rate_bpm\", \"blood_pressure\", \"oxygen_saturation\", \"temperature_C\", \"room_number\", \"status\"]\n", "\n", "df = spark.createDataFrame(data, columns)\n", "\n", "# Save as Delta Table\n", "df.write.format(\"delta\").mode(\"overwrite\").saveAsTable(\"lake.bronze.patient\")\n" ] }, { "cell_type": "markdown", "id": "bcaf3b6c-a747-4917-9464-9b3012ece36d", "metadata": { "type": "markdown" }, "source": [ "# **3. Delta Table Operations**" ] }, { "cell_type": "markdown", "id": "52ba5b85-1814-4af5-b2cc-59bb8fac7582", "metadata": {}, "source": [ "# Update Data in Delta Table" ] }, { "cell_type": "code", "execution_count": 1, "id": "6ede6a23-56b8-4d86-b495-6e0a7ccaaef6", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T17:05:42.700Z" } }, "outputs": [], "source": [ "spark.sql(\"update lake.bronze.patient set room_number='302X' where patient_id='P001'\")" ] }, { "cell_type": "markdown", "id": "019a0092-2684-4d4e-9c02-cfe7cbf41a2e", "metadata": {}, "source": [ "## Query Data" ] }, { "cell_type": "code", "execution_count": 1, "id": "67341f65-d4e4-4235-98bd-c228e763d1d1", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T17:05:50.149Z" } }, "outputs": [ { "data": { "text/html": [ "
+--------------------+----------+--------------+--------------+-----------------+-------------+-----------+--------+\n",
       "|           timestamp|patient_id|heart_rate_bpm|blood_pressure|oxygen_saturation|temperature_C|room_number|  status|\n",
       "+--------------------+----------+--------------+--------------+-----------------+-------------+-----------+--------+\n",
       "|2025-03-24T08:10:00Z|      P002|            90|        140/90|               92|         38.2|       215B|critical|\n",
       "|2025-03-24T08:15:00Z|      P002|            95|        145/95|               90|         38.5|       215B|critical|\n",
       "|2025-03-24T08:00:00Z|      P001|            72|        120/80|               98|         36.8|       302X|  stable|\n",
       "|2025-03-24T08:05:00Z|      P001|            75|        118/78|               97|         36.9|       302X|  stable|\n",
       "+--------------------+----------+--------------+--------------+-----------------+-------------+-----------+--------+\n",
       "\n",
       "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Query managed table\n", "spark.sql(\"select * from lake.bronze.patient\").show()" ] }, { "cell_type": "markdown", "id": "e6c983b9-b0c3-480f-9ba2-0d111569c88b", "metadata": { "type": "markdown" }, "source": [ "# **4. Schema Exploration**" ] }, { "cell_type": "markdown", "id": "694f0bd2-8760-40c5-a41e-1ffc8779959f", "metadata": {}, "source": [ "## Show Schemas, Tables, Describe Tables" ] }, { "cell_type": "code", "execution_count": 1, "id": "0aabaf9d-b680-46ec-8310-83253c634148", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T17:05:55.007Z" } }, "outputs": [ { "data": { "text/html": [ "
+---------+\n",
       "|namespace|\n",
       "+---------+\n",
       "|  default|\n",
       "|   bronze|\n",
       "+---------+\n",
       "\n",
       "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "spark.sql(\"show schemas in lake\").show()" ] }, { "cell_type": "code", "execution_count": 1, "id": "b399a240-5092-40d1-b226-7f19c8c3f549", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T17:06:01.661Z" } }, "outputs": [ { "data": { "text/html": [ "
+---------+---------+-----------+\n",
       "|namespace|tableName|isTemporary|\n",
       "+---------+---------+-----------+\n",
       "|   bronze|  patient|      false|\n",
       "+---------+---------+-----------+\n",
       "\n",
       "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "spark.sql(\"show tables in lake.bronze\").show()" ] }, { "cell_type": "code", "execution_count": 1, "id": "b83c9dbe-2bfe-49ed-853d-91a34ccb03ae", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T17:06:05.830Z" } }, "outputs": [ { "data": { "text/html": [ "
+---------+---------+-----------+\n",
       "|namespace|tableName|isTemporary|\n",
       "+---------+---------+-----------+\n",
       "|   bronze|  patient|      false|\n",
       "+---------+---------+-----------+\n",
       "\n",
       "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# List tables using a pattern eg. tables beginning with e\n", "spark.sql(\"show tables in lake.bronze like 'p.*'\").show()" ] }, { "cell_type": "code", "execution_count": 1, "id": "0e726160-679d-48e8-8641-9d20f0f80568", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T17:06:09.609Z" } }, "outputs": [ { "data": { "text/html": [ "
+--------------------+--------------------+-------+\n",
       "|            col_name|           data_type|comment|\n",
       "+--------------------+--------------------+-------+\n",
       "|           timestamp|              string|   NULL|\n",
       "|          patient_id|              string|   NULL|\n",
       "|      heart_rate_bpm|              bigint|   NULL|\n",
       "|      blood_pressure|              string|   NULL|\n",
       "|   oxygen_saturation|              bigint|   NULL|\n",
       "|       temperature_C|              double|   NULL|\n",
       "|         room_number|              string|   NULL|\n",
       "|              status|              string|   NULL|\n",
       "|                    |                    |       |\n",
       "|# Detailed Table ...|                    |       |\n",
       "|                Name| lake.bronze.patient|       |\n",
       "|                Type|             MANAGED|       |\n",
       "|            Location|oci://nnn...        |       |\n",
       "|            Provider|               delta|       |\n",
       "|               Owner|ocid1.user.oc1..a...|       |\n",
       "|    Table Properties|[delta.minReaderV...|       |\n",
       "+--------------------+--------------------+-------+\n",
       "\n",
       "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Describe managed table, see type is MANAGED and provider is DELTA\n", "spark.sql(\"describe table extended lake.bronze.patient\").show()" ] }, { "cell_type": "code", "execution_count": 1, "id": "28201018-8fae-4d4e-b928-679bf051a9df", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T17:06:18.784Z" } }, "outputs": [ { "data": { "text/html": [ "
+-------+-------------------+------+--------+------------+--------------------+----+--------+---------+-----------+--------------+-------------+--------------------+------------+--------------------+\n",
       "|version|          timestamp|userId|userName|   operation| operationParameters| job|notebook|clusterId|readVersion|isolationLevel|isBlindAppend|    operationMetrics|userMetadata|          engineInfo|\n",
       "+-------+-------------------+------+--------+------------+--------------------+----+--------+---------+-----------+--------------+-------------+--------------------+------------+--------------------+\n",
       "|      2|2025-03-24 17:05:41|  NULL|    NULL|      UPDATE|{predicate -> [\"(...|NULL|    NULL|     NULL|          1|  Serializable|        false|{numRemovedFiles ...|        NULL|Apache-Spark/3.5....|\n",
       "|      1|2025-03-24 17:05:31|  NULL|    NULL|       WRITE|{mode -> Append, ...|NULL|    NULL|     NULL|          0|  Serializable|         true|{numFiles -> 2, n...|        NULL|Apache-Spark/3.5....|\n",
       "|      0|2025-03-24 17:05:22|  NULL|    NULL|CREATE TABLE|{partitionBy -> [...|NULL|    NULL|     NULL|       NULL|  Serializable|         true|                  {}|        NULL|Apache-Spark/3.5....|\n",
       "+-------+-------------------+------+--------+------------+--------------------+----+--------+---------+-----------+--------------+-------------+--------------------+------------+--------------------+\n",
       "\n",
       "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Describe history of Delta table\n", "spark.sql(\"describe history lake.bronze.patient\").show()" ] }, { "cell_type": "markdown", "id": "70d38c33-f674-4263-93f6-a6ececfac98d", "metadata": {}, "source": [ "# List all tables in all schemas in a catalog" ] }, { "cell_type": "code", "execution_count": 1, "id": "8a97a248-3131-40b8-9ac0-29c95744e890", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T17:06:26.945Z" } }, "outputs": [ { "data": { "text/html": [ "
[\"lake.bronze.patient\"]\n",
       "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "catalog_name=\"lake\"\n", "databases = [\n", " catalog_name+\".\"+db.namespace \n", " for db in spark.sql('show schemas in '+catalog_name).collect()\n", "]\n", "tables = [\n", " catalog_name+\".\"+f\"{row['namespace']}.{row['tableName']}\" #.. format\n", " for db_rows in [\n", " spark.sql(f'show tables in {db}').collect() for db in databases\n", " ] \n", " for row in db_rows\n", "]\n", "print(tables)" ] }, { "cell_type": "markdown", "id": "8030274e-f560-451b-bfd4-04af47656212", "metadata": { "type": "markdown" }, "source": [ "# **5. Merge Operation**" ] }, { "cell_type": "markdown", "id": "c1f56c4f-6aff-44c6-9bd1-418e575a7368", "metadata": {}, "source": [ "## Merge into Delta Table" ] }, { "cell_type": "code", "execution_count": 1, "id": "c65028fb-91e1-4a4d-86cd-e9095c632070", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T17:08:54.789Z" } }, "outputs": [], "source": [ "# Load Delta Table\n", "deltaTable = spark.sql(\"select * from lake.bronze.patient\")\n", "\n", "# New data update\n", "new_data = spark.createDataFrame([\n", " (\"2025-03-24T08:20:00Z\", \"P002\", 100, \"150/100\", 88, 39.0, \"ICU\", \"critical\") # Moved to ICU\n", "], columns)\n", "\n", "new_data.createOrReplaceTempView(\"src\")\n", "\n", "#\n", "spark.sql(\"merge into lake.bronze.patient as dest \\\n", "using src as src \\\n", "on src.patient_id = dest.patient_id \\\n", "when matched then update set room_number = src.room_number, status=src.status \\\n", "when not matched then insert *\")\n" ] }, { "cell_type": "code", "execution_count": 1, "id": "dd7a94fb-70f8-4479-9eb1-4c8e2893c24e", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T17:09:03.026Z" } }, "outputs": [ { "data": { "text/html": [ "
+--------------------+----------+--------------+--------------+-----------------+-------------+-----------+--------+\n",
       "|           timestamp|patient_id|heart_rate_bpm|blood_pressure|oxygen_saturation|temperature_C|room_number|  status|\n",
       "+--------------------+----------+--------------+--------------+-----------------+-------------+-----------+--------+\n",
       "|2025-03-24T08:10:00Z|      P002|            90|        140/90|               92|         38.2|        ICU|critical|\n",
       "|2025-03-24T08:15:00Z|      P002|            95|        145/95|               90|         38.5|        ICU|critical|\n",
       "|2025-03-24T08:00:00Z|      P001|            72|        120/80|               98|         36.8|       302X|  stable|\n",
       "|2025-03-24T08:05:00Z|      P001|            75|        118/78|               97|         36.9|       302X|  stable|\n",
       "+--------------------+----------+--------------+--------------+-----------------+-------------+-----------+--------+\n",
       "\n",
       "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "spark.read.table(\"lake.bronze.patient\").show()" ] }, { "cell_type": "markdown", "id": "e62fbc02-a940-42ac-997f-e96366521309", "metadata": { "type": "markdown" }, "source": [ "# **6. Time Travel**" ] }, { "cell_type": "markdown", "id": "96c3d232-92cd-4a23-94b0-7e95f9ecb5b7", "metadata": {}, "source": [ "# Querying Delta Tables - Time Travel" ] }, { "cell_type": "code", "execution_count": 1, "id": "a0a57e9c-0126-45eb-bc91-383948198cf9", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T17:09:56.806Z" }, "type": "python" }, "outputs": [ { "data": { "text/html": [ "
+--------------------+----------+--------------+--------------+-----------------+-------------+-----------+--------+\n",
       "|           timestamp|patient_id|heart_rate_bpm|blood_pressure|oxygen_saturation|temperature_C|room_number|  status|\n",
       "+--------------------+----------+--------------+--------------+-----------------+-------------+-----------+--------+\n",
       "|2025-03-24T08:10:00Z|      P002|            90|        140/90|               92|         38.2|       215B|critical|\n",
       "|2025-03-24T08:15:00Z|      P002|            95|        145/95|               90|         38.5|       215B|critical|\n",
       "|2025-03-24T08:00:00Z|      P001|            72|        120/80|               98|         36.8|       302A|  stable|\n",
       "|2025-03-24T08:05:00Z|      P001|            75|        118/78|               97|         36.9|       302A|  stable|\n",
       "+--------------------+----------+--------------+--------------+-----------------+-------------+-----------+--------+\n",
       "\n",
       "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Querying as of version - try versions to see the data at specific versions in time\n", "spark.sql(\"select * from lake.bronze.patient VERSION AS OF 1\").show()" ] }, { "cell_type": "code", "execution_count": 1, "id": "21947e92-2195-4b97-9b6b-f85dd24638f1", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T17:10:41.585Z" } }, "outputs": [ { "data": { "text/html": [ "
+--------------------+----------+--------------+--------------+-----------------+-------------+-----------+--------+\n",
       "|           timestamp|patient_id|heart_rate_bpm|blood_pressure|oxygen_saturation|temperature_C|room_number|  status|\n",
       "+--------------------+----------+--------------+--------------+-----------------+-------------+-----------+--------+\n",
       "|2025-03-24T08:10:00Z|      P002|            90|        140/90|               92|         38.2|       215B|critical|\n",
       "|2025-03-24T08:15:00Z|      P002|            95|        145/95|               90|         38.5|       215B|critical|\n",
       "|2025-03-24T08:00:00Z|      P001|            72|        120/80|               98|         36.8|       302X|  stable|\n",
       "|2025-03-24T08:05:00Z|      P001|            75|        118/78|               97|         36.9|       302X|  stable|\n",
       "+--------------------+----------+--------------+--------------+-----------------+-------------+-----------+--------+\n",
       "\n",
       "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Query as of timestamp - change the date/make generic\n", "spark.sql(\"select * from lake.bronze.patient TIMESTAMP AS OF '2025-03-24 17:05:45.000Z'\").show()" ] }, { "cell_type": "markdown", "id": "02e045f8-abda-4456-90e4-3a8c437cd4f9", "metadata": { "type": "markdown" }, "source": [ "# **7. Advanced Delta**" ] }, { "cell_type": "markdown", "id": "8ae9a291-fecd-405c-a65d-e4dc7ef379f0", "metadata": {}, "source": [ "# Liquid Clustering \n", "\n", "https://docs.delta.io/latest/delta-clustering.html\n", "\n", "Liquid clustering improves the existing partitioning and ZORDER techniques by simplifying data layout decisions in order to optimize query performance. Liquid clustering provides flexibility to redefine clustering columns without rewriting existing data, allowing data layout to evolve alongside analytic needs over time.\n" ] }, { "cell_type": "code", "execution_count": 1, "id": "9f330d87-c06e-4418-bf71-f276a9ccf38b", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T15:30:33.675Z" } }, "outputs": [], "source": [ "spark.sql(\"create table lake.bronze.clusteredtable (c1 int, c2 string) USING DELTA CLUSTER BY (c1)\")" ] }, { "cell_type": "code", "execution_count": 1, "id": "df0dfb22-d9f7-4e00-96e9-25f43eb99985", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T15:31:53.830Z" } }, "outputs": [], "source": [ "spark.sql(\"insert into lake.bronze.clusteredtable select 1 c1, 'hello world' c2\")" ] }, { "cell_type": "markdown", "id": "b6e0ced7-84f0-43d2-976d-7ceaeb10fe03", "metadata": {}, "source": [ "# Vacuum - Remove files no longer referenced\n", "\n", "https://docs.delta.io/latest/delta-utility.html#remove-files-no-longer-referenced-by-a-delta-table" ] }, { "cell_type": "code", "execution_count": 1, "id": "04dce1aa-66d6-49ea-aac3-533114aa9630", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T15:32:58.707Z" } }, "outputs": [], "source": [ "spark.sql(\"vacuum lake.bronze.clusteredtable \")" ] }, { "cell_type": "markdown", "id": "84e638e6-e881-470e-bfc8-6147ff6318f1", "metadata": {}, "source": [ "# Restore from history" ] }, { "cell_type": "code", "execution_count": 1, "id": "3000f374-3d41-4cfc-a375-1e335dc87522", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T15:33:22.425Z" } }, "outputs": [ { "data": { "text/html": [ "
+-------+-------------------+------+--------+------------+--------------------+----+--------+---------+-----------+--------------+-------------+--------------------+------------+--------------------+\n",
       "|version|          timestamp|userId|userName|   operation| operationParameters| job|notebook|clusterId|readVersion|isolationLevel|isBlindAppend|    operationMetrics|userMetadata|          engineInfo|\n",
       "+-------+-------------------+------+--------+------------+--------------------+----+--------+---------+-----------+--------------+-------------+--------------------+------------+--------------------+\n",
       "|      2|2025-03-24 15:31:52|  NULL|    NULL|       WRITE|{mode -> Append, ...|NULL|    NULL|     NULL|          1|  Serializable|         true|{numFiles -> 1, n...|        NULL|Apache-Spark/3.5....|\n",
       "|      1|2025-03-24 15:31:37|  NULL|    NULL|       WRITE|{mode -> Append, ...|NULL|    NULL|     NULL|          0|  Serializable|         true|{numFiles -> 1, n...|        NULL|Apache-Spark/3.5....|\n",
       "|      0|2025-03-24 15:30:30|  NULL|    NULL|CREATE TABLE|{partitionBy -> [...|NULL|    NULL|     NULL|       NULL|  Serializable|         true|                  {}|        NULL|Apache-Spark/3.5....|\n",
       "+-------+-------------------+------+--------+------------+--------------------+----+--------+---------+-----------+--------------+-------------+--------------------+------------+--------------------+\n",
       "\n",
       "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "spark.sql(\"describe history lake.bronze.clusteredtable \").show()" ] }, { "cell_type": "code", "execution_count": 1, "id": "75bfb379-d781-4288-965f-a93d1007a18d", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T15:33:47.857Z" } }, "outputs": [], "source": [ "spark.sql(\"RESTORE TABLE lake.bronze.clusteredtable TO VERSION AS OF 1\")" ] }, { "cell_type": "code", "execution_count": 1, "id": "ef81710d-a6ab-493a-8684-6cb5b6781190", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T15:34:33.518Z" }, "type": "python" }, "outputs": [ { "data": { "text/html": [ "
+---+-----+\n",
       "| c1|   c2|\n",
       "+---+-----+\n",
       "|  1|hello|\n",
       "+---+-----+\n",
       "\n",
       "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "spark.sql(\"select * from lake.bronze.clusteredtable\").show()" ] }, { "cell_type": "code", "execution_count": 1, "id": "c29a6704-90dc-4802-a655-c58540789bb0", "metadata": { "execution": { "iopub.status.busy": "2025-03-24T17:11:05.703Z" }, "type": "python" }, "outputs": [ { "data": { "text/html": [ "\"Image\"" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "\n", "# Convert Delta Table to Pandas\n", "deltaTable = spark.sql(\"select * from lake.bronze.patient\")\n", "df_pandas = deltaTable.toPandas()\n", "\n", "# Plot\n", "plt.figure(figsize=(10,5))\n", "for patient in df_pandas[\"patient_id\"].unique():\n", " patient_data = df_pandas[df_pandas[\"patient_id\"] == patient]\n", " plt.plot(patient_data[\"timestamp\"], patient_data[\"heart_rate_bpm\"], marker='o', label=f\"Patient {patient}\")\n", "\n", "plt.xlabel(\"Timestamp\")\n", "plt.ylabel(\"Heart Rate (BPM)\")\n", "plt.title(\"Patient Heart Rate Over Time\")\n", "plt.legend()\n", "plt.xticks(rotation=45)\n", "plt" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }