{
"cells": [
{
"cell_type": "markdown",
"source": [
"# Credential Scan on Azure Data Explorer\n",
"\n",
"__Notebook Version:__ 1.0
\n",
"__Python Version:__ Python 3.6 - AzureML
\n",
"__Required Packages:__ No
\n",
"__Platforms Supported:__ Azure Machine Learning Notebooks\n",
" \n",
"__Data Source Required:__ Data Explorer data tables \n",
" \n",
"### Description\n",
"This notebook provides step-by-step instructions and sample code to detect credential leak into Azure Data Explorer using Azure SDK for Python and KQL.
\n",
"*** Need to download and installPython module for Azure Data Explorer. ***
\n",
"*** Please run the cells sequentially to avoid errors. Please do not use \"run all cells\". ***
\n",
"Need to know more about KQL? [Getting started with Kusto Query Language](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/concepts/).\n",
"\n",
"## Table of Contents\n",
"1. Warm-up\n",
"2. Azure Authentication\n",
"3. Azure data Explorer Queries"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "markdown",
"source": [
"## 1. Warm-up"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# If you need to know what Python modules are available, you may run this:\n",
"# help(\"modules\")\n",
"# During the installation, there maybe some incompatible errors, you can safely ignore them.\n",
"!pip install azure-kusto-data\n",
"!pip install azure-mgmt-kusto"
],
"outputs": [],
"execution_count": null,
"metadata": {
"gather": {
"logged": 1627592899981
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"scrolled": true
}
},
{
"cell_type": "code",
"source": [
"# Load Python libraries that will be used in this notebook\n",
"from azure.common.client_factory import get_client_from_cli_profile\n",
"from azure.common.credentials import get_azure_cli_credentials\n",
"from azure.mgmt.resource import ResourceManagementClient\n",
"\n",
"from azure.mgmt.kusto import KustoManagementClient\n",
"from azure.kusto.data.exceptions import KustoServiceError\n",
"from azure.kusto.data.helpers import dataframe_from_result_table\n",
"from azure.kusto.data import KustoClient, KustoConnectionStringBuilder, ClientRequestProperties\n",
"\n",
"import pandas as pd\n",
"import json\n",
"import ipywidgets\n",
"from IPython.display import display, HTML, Markdown\n",
"import warnings"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"gather": {
"logged": 1627592915090
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Functions will be used in this notebook\n",
"def read_config_values(file_path):\n",
" \"This loads pre-generated parameters for Sentinel Workspace\"\n",
" with open(file_path) as json_file:\n",
" if json_file:\n",
" json_config = json.load(json_file)\n",
" return (json_config[\"tenant_id\"],\n",
" json_config[\"subscription_id\"],\n",
" json_config[\"resource_group\"],\n",
" json_config[\"workspace_id\"],\n",
" json_config[\"workspace_name\"],\n",
" json_config[\"user_alias\"],\n",
" json_config[\"user_object_id\"])\n",
" return None\n",
"\n",
"def has_valid_token():\n",
" \"Check to see if there is a valid AAD token\"\n",
" try:\n",
" credentials, sub_id = get_azure_cli_credentials()\n",
" creds = credentials._get_cred(resource=None)\n",
" token = creds._token_retriever()[2]\n",
" print(\"Successfully signed in.\")\n",
" return True\n",
" except Exception as ex:\n",
" if \"Please run 'az login' to setup account\" in str(ex):\n",
" print(str(ex))\n",
" return False\n",
" elif \"AADSTS70043: The refresh token has expired\" in str(ex):\n",
" message = \"**The refresh token has expired.
Please continue your login process. Then:
1. If you plan to run multiple notebooks on the same compute instance today, you may restart the compute instance by clicking 'Compute' on left menu, then select the instance, clicking 'Restart';
2. Otherwise, you may just restart the kernel from top menu.
Finally, close and re-load the notebook, then re-run cells one by one from the top.**\"\n",
" display(Markdown(message))\n",
" return False\n",
" except:\n",
" print(\"Please restart the kernel, and run 'az login'.\")\n",
" return False\n",
"\n",
"def get_credscan_kql_where_clause(column_name):\n",
" \"This function return the KQL where clause for credscan\"\n",
" where_clause = \" | where {0} \"\n",
" regex_string = \"\"\n",
" regex_list = [\n",
" r\"(?i)(ida:password|IssuerSecret|(api|client|app(lication)?)[_\\\\- ]?(key|secret)[^,a-z]|\\\\.azuredatabricks\\\\.net).{0,10}(dapi)?[a-z0-9/+]{22}\",\n",
" r\"(?i)(x-api-(key|token).{0,10}[a-z0-9/+]{40}|v1\\\\.[a-z0-9/+]{40}[^a-z0-9/+])\",\n",
" r\"(?-i)\\\\WAIza(?i)[a-z0-9_\\\\\\\\\\\\-]{35}\\\\W\",\n",
" r\"(?i)(\\\\Wsig\\\\W|Secret(Value)?|IssuerSecret|(\\\\Wsas|primary|secondary|management|Shared(Access(Policy)?)?).?Key|\\\\.azure\\\\-devices\\\\.net|\\\\.(core|servicebus|redis\\\\.cache|accesscontrol|mediaservices)\\\\.(windows\\\\.net|chinacloudapi\\\\.cn|cloudapi\\\\.de|usgovcloudapi\\\\.net)|New\\\\-AzureRedisCache).{0,100}([a-z0-9/+]{43}=)\",\n",
" r\"(?i)visualstudio\\\\.com.{1,100}\\\\W(?-i)[a-z2-7]{52}\\\\W\",\n",
" r\"(?i)se=2021.+sig=[a-z0-9%]{43,63}%3d\",\n",
" r\"(?i)(x-functions-key|ApiKey|Code=|\\\\.azurewebsites\\\\.net/api/).{0,100}[a-z0-9/\\\\+]{54}={2}\",\n",
" r\"(?i)code=[a-z0-9%]{54,74}(%3d){2}\",\n",
" r\"(?i)(userpwd|publishingpassword).{0,100}[a-z0-9/\\\\+]{60}\\\\W\",\n",
" r\"(?i)[^a-z0-9/\\\\+][a-z0-9/\\\\+]{86}==\",\n",
" r\"(?-i)\\\\-{5}BEGIN( ([DR]SA|EC|OPENSSH|PGP))? PRIVATE KEY( BLOCK)?\\\\-{5}\",\n",
" r\"(?i)(app(lication)?|client)[_\\\\- ]?(key(url)?|secret)([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2})[^\\\\-]\",\n",
" r\"(?i)refresh[_\\\\-]?token([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2})(\\\"data:text/plain,.+\\\"|[a-z0-9/+=_.-]{20,200})\",\n",
" r\"(?i)AccessToken(Secret)?([\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2}|[\\\\s=:>]{1,10})[a-z0-9/+=_.-]{20,200}\",\n",
" r\"(?i)[a-z0-9]{3,5}://[^%:\\\\s\\\"'/][^:\\\\s\\\"'/\\\\$]+[^:\\\\s\\\"'/\\\\$%]:([^%\\\\s\\\"'/][^@\\\\s\\\"'/]{0,100}[^%\\\\s\\\"'/])@[\\\\$a-z0-9:\\\\.\\\\-_%\\\\?=/]+\",\n",
" r\"(?i)snmp(\\\\-server)?\\\\.exe.{0,100}(priv|community)\",\n",
" r\"(?i)(ConvertTo\\\\-?SecureString\\\\s*((\\\\(|\\\\Wstring)\\\\s*)?['\\\"]+)\",\n",
" r\"(?i)(Consumer|api)[_\\\\- ]?(Secret|Key)([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>,\\\\]]{3,15}|[\\\"'=:\\\\(]{2})[^\\\\s]{5,}\",\n",
" r\"(?i)authorization[,\\\\[:= \\\"']+([dbaohmnsv])\",\n",
" r\"(?i)-u\\\\s+.{2,100}-p\\\\s+[^\\\\-/]\",\n",
" r\"(?i)(amqp|ssh|(ht|f)tps?)://[^%:\\\\s\\\"'/][^:\\\\s\\\"'/\\\\$]+[^:\\\\s\\\"'/\\\\$%]:([^%\\\\s\\\"'/][^@\\\\s\\\"'/]{0,100}[^%\\\\s\\\"'/])@[\\\\$a-z0-9:\\\\.\\\\-_%\\\\?=/]+\",\n",
" r\"(?i)(\\\\Waws|amazon)?.{0,5}(secret|access.?key).{0,10}\\\\W[a-z0-9/\\\\+]{40}\",\n",
" r\"(?-i)(eyJ0eXAiOiJKV1Qi|eyJhbGci)\",\n",
" r\"(?i)@(\\\\.(on)?)?microsoft\\\\.com[ -~\\\\s]{1,100}?(\\\\w?pass\\\\w?)\",\n",
" r\"(?i)net(\\\\.exe)?.{1,5}(user\\\\s+|share\\\\s+/user:|user-?secrets? set)\\\\s+[a-z0-9]\",\n",
" r\"(?i)xox[pbar]\\\\-[a-z0-9]\",\n",
" r\"(?i)[\\\":\\\\s=]((x?corp|extranet(test)?|ntdev)(\\\\.microsoft\\\\.com)?|corp|redmond|europe|middleeast|northamerica|southpacific|southamerica|fareast|africa|exchange|extranet(test)?|partners|parttest|ntdev|ntwksta)\\\\W.{0,100}(password|\\\\Wpwd|\\\\Wpass|\\\\Wpw\\\\W|userpass)\",\n",
" r\"(?i)(sign_in|SharePointOnlineAuthenticatedContext|(User|Exchange)Credentials?|password)[ -~\\\\s]{0,100}?@([a-z0-9.]+\\\\.(on)?)?microsoft\\\\.com['\\\"]?\",\n",
" r\"(?i)(\\\\.database\\\\.azure\\\\.com|\\\\.database(\\\\.secure)?\\\\.windows\\\\.net|\\\\.cloudapp\\\\.net|\\\\.database\\\\.usgovcloudapi\\\\.net|\\\\.database\\\\.chinacloudapi\\\\.cn|\\\\.database.cloudapi.de).{0,100}(DB_PASS|(sql|service)?password|\\\\Wpwd\\\\W)\",\n",
" r\"(?i)(secret(.?key)?|password)[\\\"']?\\\\s*[:=]\\\\s*[\\\"'][^\\\\s]+?[\\\"']\",\n",
" r\"(?i)[^a-z\\\\$](DB_USER|user id|uid|(sql)?user(name)?|service\\\\s?account)\\\\s*[^\\\\w\\\\s,]([ -~\\\\s]{2,120}?|[ -~]{2,30}?)([^a-z\\\\s\\\\$]|\\\\s)\\\\s*(DB_PASS|(sql|service)?password|pwd)\",\n",
" r\"(?i)(password|secret(key)?)[ \\\\t]*[=:]+[ \\\\t]*([^:\\\\s\\\"';,<]{2,200})\",\n",
" ]\n",
"\n",
" for (i, re_str) in enumerate(regex_list):\n",
" if i != 0:\n",
" if i == 27:\n",
" regex_string += \" and \"\n",
" else:\n",
" regex_string += \" or \" \n",
"\n",
" regex_string += \" \" + column_name + \" matches regex \\\"\" + re_str + \"\\\"\"\n",
"\n",
" return where_clause.format(regex_string)\n",
"\n",
"def set_continuation_flag(flag):\n",
" if flag == False:\n",
" print(\"continuation flag is false.\")\n",
" return flag"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"gather": {
"logged": 1627592917986
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Calling the above function to populate Sentinel workspace parameters\n",
"# The file, config.json, was generated by the system, however, you may modify the values, or manually set the variables\n",
"tenant_id, subscription_id, resource_group, workspace_id, workspace_name, user_alias, user_object_id = read_config_values('config.json');"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"gather": {
"logged": 1627592923975
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "markdown",
"source": [
"## 2. Azure Authentication"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Azure CLI is used to get device code to login into Azure, you need to copy the code and open the DeviceLogin site.\n",
"# You may add [--tenant $tenant_id] to the command\n",
"if has_valid_token() == False:\n",
" !az login --tenant $tenant_id --use-device-code\n",
"\n",
"kusto_client = get_client_from_cli_profile(KustoManagementClient, subscription_id = subscription_id)\n",
"resource_client = get_client_from_cli_profile(ResourceManagementClient, subscription_id = subscription_id)\n",
"\n",
"# Set continuation_flag\n",
"if resource_client == None:\n",
" continuation_flag = set_continuation_flag(False)\n",
"else:\n",
" continuation_flag = set_continuation_flag(True)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"gather": {
"logged": 1627592928487
}
}
},
{
"cell_type": "code",
"source": [
"# If you encounter error like: \"got an unexpected keyword argument 'user_agent'\" at the above cell, you may run the following command as a temporarily work-around to continue:\r\n",
"# Please uncomment the following line and run it:\r\n",
"# !pip install --upgrade azure-cli\r\n",
"# Then re-run the cell above"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Select Azure Resource Group\n",
"if continuation_flag:\n",
" group_list = resource_client.resource_groups.list()\n",
" group_dropdown = ipywidgets.Dropdown(options=sorted([g.name for g in group_list]), description='Groups:')\n",
" display(group_dropdown)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"gather": {
"logged": 1627592932260
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "markdown",
"source": [
"## 3. Azure Data Explorer Queries"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"#Select a Kusto cluster for the subscription\n",
"if continuation_flag and group_dropdown.value != None:\n",
" cluster_list = list(kusto_client.clusters.list_by_resource_group(group_dropdown.value))\n",
" if cluster_list != None:\n",
" cluster_dropdown = ipywidgets.Dropdown(options=sorted([c.name for c in cluster_list]), description='Clusters:')\n",
" display(cluster_dropdown)\n",
" else:\n",
" continuation_flag = set_continuation_flag(False)\n",
"else:\n",
" continuation_flag = set_continuation_flag(False)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"gather": {
"logged": 1627592964202
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"#Select a database for the subscription\n",
"if continuation_flag and cluster_dropdown.value != None:\n",
" database_list = kusto_client.databases.list_by_cluster(resource_group_name=group_dropdown.value, cluster_name=cluster_dropdown.value)\n",
" if database_list != None:\n",
" database_dropdown = ipywidgets.Dropdown(options=sorted([c.name for c in database_list]), description='Databases:')\n",
" display(database_dropdown)\n",
" else:\n",
" continuation_flag = set_continuation_flag(False)\n",
"else:\n",
" continuation_flag = set_continuation_flag(False)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"gather": {
"logged": 1627592969017
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Initialize Kusto data client and create table list\n",
"if continuation_flag and database_dropdown.value != None:\n",
" cluster_uris = [c.uri for c in cluster_list if c.name == cluster_dropdown.value]\n",
" kusto_data_client = KustoClient(KustoConnectionStringBuilder.with_az_cli_authentication(cluster_uris[0]))\n",
" if kusto_data_client != None:\n",
" try:\n",
" db_name = database_dropdown.value.split(\"/\")[1]\n",
" tables = kusto_data_client.execute_mgmt(db_name, \".show tables details\")\n",
" table_list = dataframe_from_result_table(tables.primary_results[0]).TableName.tolist()\n",
" if table_list != None:\n",
" table_dropdown = ipywidgets.Dropdown(options=sorted(table_list), description='Tables:')\n",
" display(table_dropdown)\n",
" else:\n",
" continuation_flag = set_continuation_flag(False)\n",
" except:\n",
" continuation_flag = set_continuation_flag(False)\n",
" else:\n",
" continuation_flag = set_continuation_flag(False)\n",
"else:\n",
" continuation_flag = set_continuation_flag(False)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"gather": {
"logged": 1627592974897
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Select a Column in the selected table to scan\n",
"# However, PLEASE NOTE: you may safely ignore this cell if you decide to scan all columns for the selected table!\n",
"if continuation_flag and table_dropdown.value != None:\n",
" warnings.filterwarnings('ignore')\n",
" all_columns_query = \"{0} | getschema | project ColumnName | order by ColumnName asc\".format(table_dropdown.value)\n",
" columns_result = kusto_data_client.execute_query(database=db_name, query=all_columns_query)\n",
" if columns_result != None:\n",
" column_list = dataframe_from_result_table(columns_result.primary_results[0]).ColumnName.tolist()\n",
" column_dropdown = ipywidgets.Dropdown(options=sorted(column_list), description='Columns:')\n",
" display(column_dropdown)\n",
" else:\n",
" column_list= []\n",
"else:\n",
" continuation_flag = set_continuation_flag(False)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"gather": {
"logged": 1627592981481
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# This cell will run Credential Scanner regex\n",
"# You may adjust the query based on your needs.\n",
"# To look at the query, you may run: print(query)\n",
"if continuation_flag and table_dropdown.value != None:\n",
" if 'column_list' in vars() and column_dropdown.value != None:\n",
" column_name = \"tostring(['{0}'])\".format(column_dropdown.value)\n",
" else:\n",
" column_name = \"*\"\n",
"\n",
" table_name = table_dropdown.value\n",
" kql_where_clause = get_credscan_kql_where_clause(column_name)\n",
" query = \"{0} {1}\".format(table_name, kql_where_clause)\n",
" \n",
" #print(\"Query: \" + query)\n",
"\n",
" # Run query\n",
" result = kusto_data_client.execute_query(database=db_name, query=query)\n",
"\n",
" # Display Result\n",
" final_result = dataframe_from_result_table(result.primary_results[0])\n",
" if final_result.size == 0:\n",
" print(\"No leaked credentials found\")\n",
" else:\n",
" display(final_result)\n",
"else:\n",
" continuation_flag = set_continuation_flag(False)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"gather": {
"logged": 1627592984920
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Save results to a csv file in the current file system\n",
"if continuation_flag and final_result.empty == False and final_result.size > 0: \n",
" final_result.to_csv('credscan_adx.csv')"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"gather": {
"logged": 1621614956590
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
}
],
"metadata": {
"celltoolbar": "Tags",
"kernel_info": {
"name": "python3-azureml"
},
"kernelspec": {
"name": "python3-azureml",
"language": "python",
"display_name": "Python 3.6 - AzureML"
},
"language_info": {
"name": "python",
"version": "3.6.9",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"microsoft": {
"host": {
"AzureML": {
"notebookHasBeenCompleted": true
}
}
},
"nteract": {
"version": "nteract-front-end@1.0.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}