{ "cells": [ { "cell_type": "markdown", "id": "5c27dfd1-4fe0-4a97-92e6-ddf78889aa93", "metadata": { "nteract": { "transient": { "deleting": false } } }, "source": [ "### Install the latest .whl package\n", "\n", "Check [here](https://pypi.org/project/semantic-link-labs/) to see the latest version." ] }, { "cell_type": "code", "execution_count": null, "id": "d5cae9db-cef9-48a8-a351-9c5fcc99645c", "metadata": { "jupyter": { "outputs_hidden": true, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } }, "outputs": [], "source": [ "%pip install semantic-link-labs" ] }, { "cell_type": "markdown", "id": "b195eae8", "metadata": {}, "source": [ "### Import the library" ] }, { "cell_type": "code", "execution_count": null, "id": "1344e286", "metadata": {}, "outputs": [], "source": [ "import sempy_labs as labs\n", "table_name = 'MyTable' # Enter the name of the delta table\n", "lakehouse = 'MyLakehouse' # Enter the name or ID of the lakehouse in which the delta table resides\n", "workspace = 'MyWorkspace' # Enter the name or ID of the workspace in which the lakehouse resides" ] }, { "cell_type": "markdown", "id": "baa24264", "metadata": {}, "source": [ "### Run Delta Analyzer for a given table in the lakehouse attached to your notebook" ] }, { "cell_type": "code", "execution_count": null, "id": "0a1903c0", "metadata": {}, "outputs": [], "source": [ "x = labs.delta_analyzer(\n", " table_name=table_name,\n", " lakehouse=lakehouse,\n", " workspace=workspace\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "285986d1", "metadata": {}, "outputs": [], "source": [ "for name, df in x.items():\n", " print(name)\n", " display(df)" ] }, { "cell_type": "markdown", "id": "e1d118dd", "metadata": {}, "source": [ "### Get actual (not approximate) distinct counts" ] }, { "cell_type": "code", "execution_count": null, "id": "d1ecf538", "metadata": {}, "outputs": [], "source": [ "x = labs.delta_analyzer(\n", " table_name=table_name,\n", " approx_distinct_count=False,\n", " lakehouse=lakehouse,\n", " workspace=workspace\n", " )" ] }, { "cell_type": "markdown", "id": "60aa6592", "metadata": {}, "source": [ "### Export the results of Delta Analyzer to a set of delta tables in your lakehouse\n", "The export always appends results to the delta tables" ] }, { "cell_type": "code", "execution_count": null, "id": "1d4235ff", "metadata": {}, "outputs": [], "source": [ "x = labs.delta_analyzer(\n", " table_name=table_name,\n", " lakehouse=lakehouse,\n", " workspace=workspace,\n", " export=True\n", " )" ] } ], "metadata": { "kernel_info": { "name": "synapse_pyspark" }, "kernelspec": { "display_name": "Synapse PySpark", "language": "Python", "name": "synapse_pyspark" }, "language_info": { "name": "python" }, "microsoft": { "language": "python" }, "nteract": { "version": "nteract-front-end@1.0.0" }, "spark_compute": { "compute_id": "/trident/default" }, "synapse_widget": { "state": {}, "version": "0.1" }, "widgets": {} }, "nbformat": 4, "nbformat_minor": 5 }