{ "cells": [ { "cell_type": "markdown", "source": [ "# Credential Scan on Azure Data Explorer\n", "\n", "__Notebook Version:__ 1.0
\n", "__Python Version:__ Python 3.6 - AzureML
\n", "__Required Packages:__ No
\n", "__Platforms Supported:__ Azure Machine Learning Notebooks\n", " \n", "__Data Source Required:__ Data Explorer data tables \n", " \n", "### Description\n", "This notebook provides step-by-step instructions and sample code to detect credential leak into Azure Data Explorer using Azure SDK for Python and KQL.
\n", "*** Need to download and installPython module for Azure Data Explorer. ***
\n", "*** Please run the cells sequentially to avoid errors. Please do not use \"run all cells\". ***
\n", "Need to know more about KQL? [Getting started with Kusto Query Language](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/concepts/).\n", "\n", "## Table of Contents\n", "1. Warm-up\n", "2. Azure Authentication\n", "3. Azure data Explorer Queries" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "markdown", "source": [ "## 1. Warm-up" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# If you need to know what Python modules are available, you may run this:\n", "# help(\"modules\")\n", "# During the installation, there maybe some incompatible errors, you can safely ignore them.\n", "!pip install azure-kusto-data\n", "!pip install azure-mgmt-kusto" ], "outputs": [], "execution_count": null, "metadata": { "gather": { "logged": 1627592899981 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } }, "scrolled": true } }, { "cell_type": "code", "source": [ "# Load Python libraries that will be used in this notebook\n", "from azure.common.client_factory import get_client_from_cli_profile\n", "from azure.common.credentials import get_azure_cli_credentials\n", "from azure.mgmt.resource import ResourceManagementClient\n", "\n", "from azure.mgmt.kusto import KustoManagementClient\n", "from azure.kusto.data.exceptions import KustoServiceError\n", "from azure.kusto.data.helpers import dataframe_from_result_table\n", "from azure.kusto.data import KustoClient, KustoConnectionStringBuilder, ClientRequestProperties\n", "\n", "import pandas as pd\n", "import json\n", "import ipywidgets\n", "from IPython.display import display, HTML, Markdown\n", "import warnings" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1627592915090 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Functions will be used in this notebook\n", "def read_config_values(file_path):\n", " \"This loads pre-generated parameters for Sentinel Workspace\"\n", " with open(file_path) as json_file:\n", " if json_file:\n", " json_config = json.load(json_file)\n", " return (json_config[\"tenant_id\"],\n", " json_config[\"subscription_id\"],\n", " json_config[\"resource_group\"],\n", " json_config[\"workspace_id\"],\n", " json_config[\"workspace_name\"],\n", " json_config[\"user_alias\"],\n", " json_config[\"user_object_id\"])\n", " return None\n", "\n", "def has_valid_token():\n", " \"Check to see if there is a valid AAD token\"\n", " try:\n", " credentials, sub_id = get_azure_cli_credentials()\n", " creds = credentials._get_cred(resource=None)\n", " token = creds._token_retriever()[2]\n", " print(\"Successfully signed in.\")\n", " return True\n", " except Exception as ex:\n", " if \"Please run 'az login' to setup account\" in str(ex):\n", " print(str(ex))\n", " return False\n", " elif \"AADSTS70043: The refresh token has expired\" in str(ex):\n", " message = \"**The refresh token has expired.
Please continue your login process. Then:
1. If you plan to run multiple notebooks on the same compute instance today, you may restart the compute instance by clicking 'Compute' on left menu, then select the instance, clicking 'Restart';
2. Otherwise, you may just restart the kernel from top menu.
Finally, close and re-load the notebook, then re-run cells one by one from the top.**\"\n", " display(Markdown(message))\n", " return False\n", " except:\n", " print(\"Please restart the kernel, and run 'az login'.\")\n", " return False\n", "\n", "def get_credscan_kql_where_clause(column_name):\n", " \"This function return the KQL where clause for credscan\"\n", " where_clause = \" | where {0} \"\n", " regex_string = \"\"\n", " regex_list = [\n", " r\"(?i)(ida:password|IssuerSecret|(api|client|app(lication)?)[_\\\\- ]?(key|secret)[^,a-z]|\\\\.azuredatabricks\\\\.net).{0,10}(dapi)?[a-z0-9/+]{22}\",\n", " r\"(?i)(x-api-(key|token).{0,10}[a-z0-9/+]{40}|v1\\\\.[a-z0-9/+]{40}[^a-z0-9/+])\",\n", " r\"(?-i)\\\\WAIza(?i)[a-z0-9_\\\\\\\\\\\\-]{35}\\\\W\",\n", " r\"(?i)(\\\\Wsig\\\\W|Secret(Value)?|IssuerSecret|(\\\\Wsas|primary|secondary|management|Shared(Access(Policy)?)?).?Key|\\\\.azure\\\\-devices\\\\.net|\\\\.(core|servicebus|redis\\\\.cache|accesscontrol|mediaservices)\\\\.(windows\\\\.net|chinacloudapi\\\\.cn|cloudapi\\\\.de|usgovcloudapi\\\\.net)|New\\\\-AzureRedisCache).{0,100}([a-z0-9/+]{43}=)\",\n", " r\"(?i)visualstudio\\\\.com.{1,100}\\\\W(?-i)[a-z2-7]{52}\\\\W\",\n", " r\"(?i)se=2021.+sig=[a-z0-9%]{43,63}%3d\",\n", " r\"(?i)(x-functions-key|ApiKey|Code=|\\\\.azurewebsites\\\\.net/api/).{0,100}[a-z0-9/\\\\+]{54}={2}\",\n", " r\"(?i)code=[a-z0-9%]{54,74}(%3d){2}\",\n", " r\"(?i)(userpwd|publishingpassword).{0,100}[a-z0-9/\\\\+]{60}\\\\W\",\n", " r\"(?i)[^a-z0-9/\\\\+][a-z0-9/\\\\+]{86}==\",\n", " r\"(?-i)\\\\-{5}BEGIN( ([DR]SA|EC|OPENSSH|PGP))? PRIVATE KEY( BLOCK)?\\\\-{5}\",\n", " r\"(?i)(app(lication)?|client)[_\\\\- ]?(key(url)?|secret)([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2})[^\\\\-]\",\n", " r\"(?i)refresh[_\\\\-]?token([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2})(\\\"data:text/plain,.+\\\"|[a-z0-9/+=_.-]{20,200})\",\n", " r\"(?i)AccessToken(Secret)?([\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2}|[\\\\s=:>]{1,10})[a-z0-9/+=_.-]{20,200}\",\n", " r\"(?i)[a-z0-9]{3,5}://[^%:\\\\s\\\"'/][^:\\\\s\\\"'/\\\\$]+[^:\\\\s\\\"'/\\\\$%]:([^%\\\\s\\\"'/][^@\\\\s\\\"'/]{0,100}[^%\\\\s\\\"'/])@[\\\\$a-z0-9:\\\\.\\\\-_%\\\\?=/]+\",\n", " r\"(?i)snmp(\\\\-server)?\\\\.exe.{0,100}(priv|community)\",\n", " r\"(?i)(ConvertTo\\\\-?SecureString\\\\s*((\\\\(|\\\\Wstring)\\\\s*)?['\\\"]+)\",\n", " r\"(?i)(Consumer|api)[_\\\\- ]?(Secret|Key)([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>,\\\\]]{3,15}|[\\\"'=:\\\\(]{2})[^\\\\s]{5,}\",\n", " r\"(?i)authorization[,\\\\[:= \\\"']+([dbaohmnsv])\",\n", " r\"(?i)-u\\\\s+.{2,100}-p\\\\s+[^\\\\-/]\",\n", " r\"(?i)(amqp|ssh|(ht|f)tps?)://[^%:\\\\s\\\"'/][^:\\\\s\\\"'/\\\\$]+[^:\\\\s\\\"'/\\\\$%]:([^%\\\\s\\\"'/][^@\\\\s\\\"'/]{0,100}[^%\\\\s\\\"'/])@[\\\\$a-z0-9:\\\\.\\\\-_%\\\\?=/]+\",\n", " r\"(?i)(\\\\Waws|amazon)?.{0,5}(secret|access.?key).{0,10}\\\\W[a-z0-9/\\\\+]{40}\",\n", " r\"(?-i)(eyJ0eXAiOiJKV1Qi|eyJhbGci)\",\n", " r\"(?i)@(\\\\.(on)?)?microsoft\\\\.com[ -~\\\\s]{1,100}?(\\\\w?pass\\\\w?)\",\n", " r\"(?i)net(\\\\.exe)?.{1,5}(user\\\\s+|share\\\\s+/user:|user-?secrets? set)\\\\s+[a-z0-9]\",\n", " r\"(?i)xox[pbar]\\\\-[a-z0-9]\",\n", " r\"(?i)[\\\":\\\\s=]((x?corp|extranet(test)?|ntdev)(\\\\.microsoft\\\\.com)?|corp|redmond|europe|middleeast|northamerica|southpacific|southamerica|fareast|africa|exchange|extranet(test)?|partners|parttest|ntdev|ntwksta)\\\\W.{0,100}(password|\\\\Wpwd|\\\\Wpass|\\\\Wpw\\\\W|userpass)\",\n", " r\"(?i)(sign_in|SharePointOnlineAuthenticatedContext|(User|Exchange)Credentials?|password)[ -~\\\\s]{0,100}?@([a-z0-9.]+\\\\.(on)?)?microsoft\\\\.com['\\\"]?\",\n", " r\"(?i)(\\\\.database\\\\.azure\\\\.com|\\\\.database(\\\\.secure)?\\\\.windows\\\\.net|\\\\.cloudapp\\\\.net|\\\\.database\\\\.usgovcloudapi\\\\.net|\\\\.database\\\\.chinacloudapi\\\\.cn|\\\\.database.cloudapi.de).{0,100}(DB_PASS|(sql|service)?password|\\\\Wpwd\\\\W)\",\n", " r\"(?i)(secret(.?key)?|password)[\\\"']?\\\\s*[:=]\\\\s*[\\\"'][^\\\\s]+?[\\\"']\",\n", " r\"(?i)[^a-z\\\\$](DB_USER|user id|uid|(sql)?user(name)?|service\\\\s?account)\\\\s*[^\\\\w\\\\s,]([ -~\\\\s]{2,120}?|[ -~]{2,30}?)([^a-z\\\\s\\\\$]|\\\\s)\\\\s*(DB_PASS|(sql|service)?password|pwd)\",\n", " r\"(?i)(password|secret(key)?)[ \\\\t]*[=:]+[ \\\\t]*([^:\\\\s\\\"';,<]{2,200})\",\n", " ]\n", "\n", " for (i, re_str) in enumerate(regex_list):\n", " if i != 0:\n", " if i == 27:\n", " regex_string += \" and \"\n", " else:\n", " regex_string += \" or \" \n", "\n", " regex_string += \" \" + column_name + \" matches regex \\\"\" + re_str + \"\\\"\"\n", "\n", " return where_clause.format(regex_string)\n", "\n", "def set_continuation_flag(flag):\n", " if flag == False:\n", " print(\"continuation flag is false.\")\n", " return flag" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1627592917986 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Calling the above function to populate Sentinel workspace parameters\n", "# The file, config.json, was generated by the system, however, you may modify the values, or manually set the variables\n", "tenant_id, subscription_id, resource_group, workspace_id, workspace_name, user_alias, user_object_id = read_config_values('config.json');" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1627592923975 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "markdown", "source": [ "## 2. Azure Authentication" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Azure CLI is used to get device code to login into Azure, you need to copy the code and open the DeviceLogin site.\n", "# You may add [--tenant $tenant_id] to the command\n", "if has_valid_token() == False:\n", " !az login --tenant $tenant_id --use-device-code\n", "\n", "kusto_client = get_client_from_cli_profile(KustoManagementClient, subscription_id = subscription_id)\n", "resource_client = get_client_from_cli_profile(ResourceManagementClient, subscription_id = subscription_id)\n", "\n", "# Set continuation_flag\n", "if resource_client == None:\n", " continuation_flag = set_continuation_flag(False)\n", "else:\n", " continuation_flag = set_continuation_flag(True)" ], "outputs": [], "execution_count": null, "metadata": { "gather": { "logged": 1627592928487 } } }, { "cell_type": "code", "source": [ "# If you encounter error like: \"got an unexpected keyword argument 'user_agent'\" at the above cell, you may run the following command as a temporarily work-around to continue:\r\n", "# Please uncomment the following line and run it:\r\n", "# !pip install --upgrade azure-cli\r\n", "# Then re-run the cell above" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Select Azure Resource Group\n", "if continuation_flag:\n", " group_list = resource_client.resource_groups.list()\n", " group_dropdown = ipywidgets.Dropdown(options=sorted([g.name for g in group_list]), description='Groups:')\n", " display(group_dropdown)" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1627592932260 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "markdown", "source": [ "## 3. Azure Data Explorer Queries" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "#Select a Kusto cluster for the subscription\n", "if continuation_flag and group_dropdown.value != None:\n", " cluster_list = list(kusto_client.clusters.list_by_resource_group(group_dropdown.value))\n", " if cluster_list != None:\n", " cluster_dropdown = ipywidgets.Dropdown(options=sorted([c.name for c in cluster_list]), description='Clusters:')\n", " display(cluster_dropdown)\n", " else:\n", " continuation_flag = set_continuation_flag(False)\n", "else:\n", " continuation_flag = set_continuation_flag(False)" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1627592964202 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "#Select a database for the subscription\n", "if continuation_flag and cluster_dropdown.value != None:\n", " database_list = kusto_client.databases.list_by_cluster(resource_group_name=group_dropdown.value, cluster_name=cluster_dropdown.value)\n", " if database_list != None:\n", " database_dropdown = ipywidgets.Dropdown(options=sorted([c.name for c in database_list]), description='Databases:')\n", " display(database_dropdown)\n", " else:\n", " continuation_flag = set_continuation_flag(False)\n", "else:\n", " continuation_flag = set_continuation_flag(False)" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1627592969017 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Initialize Kusto data client and create table list\n", "if continuation_flag and database_dropdown.value != None:\n", " cluster_uris = [c.uri for c in cluster_list if c.name == cluster_dropdown.value]\n", " kusto_data_client = KustoClient(KustoConnectionStringBuilder.with_az_cli_authentication(cluster_uris[0]))\n", " if kusto_data_client != None:\n", " try:\n", " db_name = database_dropdown.value.split(\"/\")[1]\n", " tables = kusto_data_client.execute_mgmt(db_name, \".show tables details\")\n", " table_list = dataframe_from_result_table(tables.primary_results[0]).TableName.tolist()\n", " if table_list != None:\n", " table_dropdown = ipywidgets.Dropdown(options=sorted(table_list), description='Tables:')\n", " display(table_dropdown)\n", " else:\n", " continuation_flag = set_continuation_flag(False)\n", " except:\n", " continuation_flag = set_continuation_flag(False)\n", " else:\n", " continuation_flag = set_continuation_flag(False)\n", "else:\n", " continuation_flag = set_continuation_flag(False)" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1627592974897 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Select a Column in the selected table to scan\n", "# However, PLEASE NOTE: you may safely ignore this cell if you decide to scan all columns for the selected table!\n", "if continuation_flag and table_dropdown.value != None:\n", " warnings.filterwarnings('ignore')\n", " all_columns_query = \"{0} | getschema | project ColumnName | order by ColumnName asc\".format(table_dropdown.value)\n", " columns_result = kusto_data_client.execute_query(database=db_name, query=all_columns_query)\n", " if columns_result != None:\n", " column_list = dataframe_from_result_table(columns_result.primary_results[0]).ColumnName.tolist()\n", " column_dropdown = ipywidgets.Dropdown(options=sorted(column_list), description='Columns:')\n", " display(column_dropdown)\n", " else:\n", " column_list= []\n", "else:\n", " continuation_flag = set_continuation_flag(False)" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1627592981481 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# This cell will run Credential Scanner regex\n", "# You may adjust the query based on your needs.\n", "# To look at the query, you may run: print(query)\n", "if continuation_flag and table_dropdown.value != None:\n", " if 'column_list' in vars() and column_dropdown.value != None:\n", " column_name = \"tostring(['{0}'])\".format(column_dropdown.value)\n", " else:\n", " column_name = \"*\"\n", "\n", " table_name = table_dropdown.value\n", " kql_where_clause = get_credscan_kql_where_clause(column_name)\n", " query = \"{0} {1}\".format(table_name, kql_where_clause)\n", " \n", " #print(\"Query: \" + query)\n", "\n", " # Run query\n", " result = kusto_data_client.execute_query(database=db_name, query=query)\n", "\n", " # Display Result\n", " final_result = dataframe_from_result_table(result.primary_results[0])\n", " if final_result.size == 0:\n", " print(\"No leaked credentials found\")\n", " else:\n", " display(final_result)\n", "else:\n", " continuation_flag = set_continuation_flag(False)" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1627592984920 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Save results to a csv file in the current file system\n", "if continuation_flag and final_result.empty == False and final_result.size > 0: \n", " final_result.to_csv('credscan_adx.csv')" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1621614956590 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } } ], "metadata": { "celltoolbar": "Tags", "kernel_info": { "name": "python3-azureml" }, "kernelspec": { "name": "python3-azureml", "language": "python", "display_name": "Python 3.6 - AzureML" }, "language_info": { "name": "python", "version": "3.6.9", "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython", "version": 3 }, "pygments_lexer": "ipython3", "nbconvert_exporter": "python", "file_extension": ".py" }, "microsoft": { "host": { "AzureML": { "notebookHasBeenCompleted": true } } }, "nteract": { "version": "nteract-front-end@1.0.0" } }, "nbformat": 4, "nbformat_minor": 2 }