{ "cells": [ { "cell_type": "markdown", "source": [ "# Credential Scan on Azure Log Analytics\n", "\n", "__Notebook Version:__ 1.0
\n", "__Python Version:__ Python 3.8 - AzureML
\n", "__Required Packages:__ No
\n", "__Platforms Supported:__ Azure Machine Learning Notebooks\n", " \n", "__Data Source Required:__ Log Analytics tables \n", " \n", "### Description\n", "This notebook provides step-by-step instructions and sample code to detect credential leak into Azure Log Analytics using Azure SDK for Python and KQL.
\n", "*** No need to download and install any other Python modules. ***
\n", "*** Please run the cells sequentially to avoid errors. Please do not use \"run all cells\". ***
\n", "Need to know more about KQL? [Getting started with Kusto Query Language](https://docs.microsoft.com/azure/data-explorer/kusto/concepts/).\n", "\n", "## Table of Contents\n", "1. Warm-up\n", "2. Azure Authentication\n", "3. Azure Log Analytics Data Queries" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "markdown", "source": [ "## 1. Warm-up" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# If you need to know what Python modules are available, you may run this:\n", "# help(\"modules\")\n", "!pip install azure-monitor-query" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1685046982904 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Load Python libraries that will be used in this notebook\n", "from azure.mgmt.loganalytics import LogAnalyticsManagementClient\n", "from azure.monitor.query import LogsQueryClient, MetricsQueryClient, LogsQueryStatus\n", "from azure.identity import AzureCliCredential, DefaultAzureCredential\n", "\n", "from datetime import datetime, timezone\n", "import pandas as pd\n", "import json\n", "import ipywidgets\n", "from IPython.display import display, HTML, Markdown" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1685047045861 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Functions will be used in this notebook\n", "def read_config_values(file_path):\n", " \"This loads pre-generated parameters for Microsoft Sentinel Workspace\"\n", " with open(file_path) as json_file:\n", " if json_file:\n", " json_config = json.load(json_file)\n", " return (json_config[\"tenant_id\"],\n", " json_config[\"subscription_id\"],\n", " json_config[\"resource_group\"],\n", " json_config[\"workspace_id\"],\n", " json_config[\"workspace_name\"],\n", " json_config[\"user_alias\"],\n", " json_config[\"user_object_id\"])\n", " return None\n", "\n", "def has_valid_token():\n", " \"Check to see if there is a valid AAD token\"\n", " try:\n", " error = \"Please run 'az login'\"\n", " expired = \"AADSTS70043: The refresh token has expired or is invalid\"\n", " failed = \"failed\"\n", " validator = !az account get-access-token\n", " \n", " if any(expired in item for item in validator.get_list()):\n", " return '**The refresh token has expired.
Please continue your login process. Then:
1. If you plan to run multiple notebooks on the same compute instance today, you may restart the compute instance by clicking \"Compute\" on left menu, then select the instance, clicking \"Restart\";
2. Otherwise, you may just restart the kernel from top menu.
Finally, close and re-load the notebook, then re-run cells one by one from the top.**'\n", " elif any(error in item for item in validator.get_list()) or any(failed in item for item in validator.get_list()):\n", " return \"Please run 'az login' to setup account\"\n", " else:\n", " return None\n", " except:\n", " return \"Please login\"\n", " \n", "def get_credscan_kql_where_clause(column_name):\n", " \"This function return the KQL where clause for credscan\"\n", " where_clause = \" | where TimeGenerated > ago({0}) | where {1} \"\n", " time_range = \"7d\"\n", " regex_string = \"\"\n", " regex_list = [\n", " r\"(?i)(ida:password|IssuerSecret|(api|client|app(lication)?)[_\\\\- ]?(key|secret)[^,a-z]|\\\\.azuredatabricks\\\\.net).{0,10}(dapi)?[a-z0-9/+]{22}\",\n", " r\"(?i)(x-api-(key|token).{0,10}[a-z0-9/+]{40}|v1\\\\.[a-z0-9/+]{40}[^a-z0-9/+])\",\n", " r\"(?-i)\\\\WAIza(?i)[a-z0-9_\\\\\\\\\\\\-]{35}\\\\W\",\n", " r\"(?i)(\\\\Wsig\\\\W|Secret(Value)?|IssuerSecret|(\\\\Wsas|primary|secondary|management|Shared(Access(Policy)?)?).?Key|\\\\.azure\\\\-devices\\\\.net|\\\\.(core|servicebus|redis\\\\.cache|accesscontrol|mediaservices)\\\\.(windows\\\\.net|chinacloudapi\\\\.cn|cloudapi\\\\.de|usgovcloudapi\\\\.net)|New\\\\-AzureRedisCache).{0,100}([a-z0-9/+]{43}=)\",\n", " r\"(?i)visualstudio\\\\.com.{1,100}\\\\W(?-i)[a-z2-7]{52}\\\\W\",\n", " r\"(?i)se=2021.+sig=[a-z0-9%]{43,63}%3d\",\n", " r\"(?i)(x-functions-key|ApiKey|Code=|\\\\.azurewebsites\\\\.net/api/).{0,100}[a-z0-9/\\\\+]{54}={2}\",\n", " r\"(?i)code=[a-z0-9%]{54,74}(%3d){2}\",\n", " r\"(?i)(userpwd|publishingpassword).{0,100}[a-z0-9/\\\\+]{60}\\\\W\",\n", " r\"(?i)[^a-z0-9/\\\\+][a-z0-9/\\\\+]{86}==\",\n", " r\"(?-i)\\\\-{5}BEGIN( ([DR]SA|EC|OPENSSH|PGP))? PRIVATE KEY( BLOCK)?\\\\-{5}\",\n", " r\"(?i)(app(lication)?|client)[_\\\\- ]?(key(url)?|secret)([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2})[^\\\\-]\",\n", " r\"(?i)refresh[_\\\\-]?token([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2})(\\\"data:text/plain,.+\\\"|[a-z0-9/+=_.-]{20,200})\",\n", " r\"(?i)AccessToken(Secret)?([\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2}|[\\\\s=:>]{1,10})[a-z0-9/+=_.-]{20,200}\",\n", " r\"(?i)[a-z0-9]{3,5}://[^%:\\\\s\\\"'/][^:\\\\s\\\"'/\\\\$]+[^:\\\\s\\\"'/\\\\$%]:([^%\\\\s\\\"'/][^@\\\\s\\\"'/]{0,100}[^%\\\\s\\\"'/])@[\\\\$a-z0-9:\\\\.\\\\-_%\\\\?=/]+\",\n", " r\"(?i)snmp(\\\\-server)?\\\\.exe.{0,100}(priv|community)\",\n", " r\"(?i)(ConvertTo\\\\-?SecureString\\\\s*((\\\\(|\\\\Wstring)\\\\s*)?['\\\"]+)\",\n", " r\"(?i)(Consumer|api)[_\\\\- ]?(Secret|Key)([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>,\\\\]]{3,15}|[\\\"'=:\\\\(]{2})[^\\\\s]{5,}\",\n", " r\"(?i)authorization[,\\\\[:= \\\"']+([dbaohmnsv])\",\n", " r\"(?i)-u\\\\s+.{2,100}-p\\\\s+[^\\\\-/]\",\n", " r\"(?i)(amqp|ssh|(ht|f)tps?)://[^%:\\\\s\\\"'/][^:\\\\s\\\"'/\\\\$]+[^:\\\\s\\\"'/\\\\$%]:([^%\\\\s\\\"'/][^@\\\\s\\\"'/]{0,100}[^%\\\\s\\\"'/])@[\\\\$a-z0-9:\\\\.\\\\-_%\\\\?=/]+\",\n", " r\"(?i)(\\\\Waws|amazon)?.{0,5}(secret|access.?key).{0,10}\\\\W[a-z0-9/\\\\+]{40}\",\n", " r\"(?-i)(eyJ0eXAiOiJKV1Qi|eyJhbGci)\",\n", " r\"(?i)@(\\\\.(on)?)?microsoft\\\\.com[ -~\\\\s]{1,100}?(\\\\w?pass\\\\w?)\",\n", " r\"(?i)net(\\\\.exe)?.{1,5}(user\\\\s+|share\\\\s+/user:|user-?secrets? set)\\\\s+[a-z0-9]\",\n", " r\"(?i)xox[pbar]\\\\-[a-z0-9]\",\n", " r\"(?i)[\\\":\\\\s=]((x?corp|extranet(test)?|ntdev)(\\\\.microsoft\\\\.com)?|corp|redmond|europe|middleeast|northamerica|southpacific|southamerica|fareast|africa|exchange|extranet(test)?|partners|parttest|ntdev|ntwksta)\\\\W.{0,100}(password|\\\\Wpwd|\\\\Wpass|\\\\Wpw\\\\W|userpass)\",\n", " r\"(?i)(sign_in|SharePointOnlineAuthenticatedContext|(User|Exchange)Credentials?|password)[ -~\\\\s]{0,100}?@([a-z0-9.]+\\\\.(on)?)?microsoft\\\\.com['\\\"]?\",\n", " r\"(?i)(\\\\.database\\\\.azure\\\\.com|\\\\.database(\\\\.secure)?\\\\.windows\\\\.net|\\\\.cloudapp\\\\.net|\\\\.database\\\\.usgovcloudapi\\\\.net|\\\\.database\\\\.chinacloudapi\\\\.cn|\\\\.database.cloudapi.de).{0,100}(DB_PASS|(sql|service)?password|\\\\Wpwd\\\\W)\",\n", " r\"(?i)(secret(.?key)?|password)[\\\"']?\\\\s*[:=]\\\\s*[\\\"'][^\\\\s]+?[\\\"']\",\n", " r\"(?i)[^a-z\\\\$](DB_USER|user id|uid|(sql)?user(name)?|service\\\\s?account)\\\\s*[^\\\\w\\\\s,]([ -~\\\\s]{2,120}?|[ -~]{2,30}?)([^a-z\\\\s\\\\$]|\\\\s)\\\\s*(DB_PASS|(sql|service)?password|pwd)\",\n", " r\"(?i)(password|secret(key)?)[ \\\\t]*[=:]+[ \\\\t]*([^:\\\\s\\\"';,<]{2,200})\",\n", " ]\n", "\n", " for (i, re_str) in enumerate(regex_list):\n", " if i != 0:\n", " if i == 27:\n", " regex_string += \" and \"\n", " else:\n", " regex_string += \" or \" \n", "\n", " regex_string += \" \" + column_name + \" matches regex \\\"\" + re_str + \"\\\"\"\n", "\n", " return where_clause.format(time_range, regex_string)\n" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1685047048410 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Calling the above function to populate Microsoft Sentinel workspace parameters\n", "# The file, config.json, was generated by the system, however, you may modify the values, or manually set the variables\n", "tenant_id, subscription_id, resource_group, workspace_id, workspace_name, user_alias, user_object_id = read_config_values('config.json');" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1685047431482 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "markdown", "source": [ "## 2. Azure Authentication" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Azure CLI is used to get device code to login into Azure, you need to copy the code and open the DeviceLogin site.\n", "# You may add [--tenant $tenant_id] to the command\n", "if has_valid_token() != None:\n", " message = '**The refresh token has expired.
Please continue your login process. Then:
1. If you plan to run multiple notebooks on the same compute instance today, you may restart the compute instance by clicking \"Compute\" on left menu, then select the instance, clicking \"Restart\";
2. Otherwise, you may just restart the kernel from top menu.
Finally, close and re-load the notebook, then re-run cells one by one from the top.**'\n", " display(Markdown(message))\n", " !echo -e '\\e[42m'\n", " !az login --tenant $tenant_id --use-device-code" ], "outputs": [], "execution_count": null, "metadata": { "gather": { "logged": 1685047434386 } } }, { "cell_type": "code", "source": [ "# Initialzie Azure LogAnalyticsDataClient, which is used to access Microsoft Sentinel log data in Azure Log Analytics. \n", "# You may need to change resource_uri for various cloud environments.\n", "resource_uri = \"https://api.loganalytics.io\"\n", "la_client = LogAnalyticsManagementClient(AzureCliCredential(), subscription_id = subscription_id)\n", "credential = DefaultAzureCredential()\n", "la_data_client = LogsQueryClient(credential)" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1685047436188 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "markdown", "source": [ "## 3. Azure Log Analytics Data Queries" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Get all tables available using Kusto query language. If you need to know more about KQL, please check out the link provided at the introductory section.\n", "tables_result = None\n", "table_list = None\n", "start_time=datetime(2023, 1, 1, tzinfo=timezone.utc)\n", "end_time=datetime(2024, 12, 31, tzinfo=timezone.utc)\n", "\n", "all_tables_query = \"union withsource = SentinelTableName * | distinct SentinelTableName | sort by SentinelTableName asc\"\n", "tables_result = la_data_client.query_workspace(\n", " workspace_id=workspace_id,\n", " query=all_tables_query,\n", " timespan=(start_time, end_time))\n", "\n", "if tables_result.status == LogsQueryStatus.SUCCESS:\n", " df = pd.DataFrame(data=tables_result.tables[0].rows, columns=tables_result.tables[0].columns)\n", " table_list = list(df[\"SentinelTableName\"])\n", " table_dropdown = ipywidgets.Dropdown(options=table_list, description='Tables:')\n", " display(table_dropdown)" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1685048001252 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Select a Column in the selected table to scan\n", "# However, you may safely ignore this cell if you decide to scan all columns for the selected table!\n", "columns_result = None\n", "column_list = None\n", "all_columns_query = \"{0} | getschema | project ColumnName | order by ColumnName asc\".format(table_dropdown.value)\n", "columns_result = la_data_client.query_workspace(\n", " workspace_id=workspace_id,\n", " query=all_columns_query,\n", " timespan=(start_time, end_time))\n", "\n", "if columns_result.status == LogsQueryStatus.SUCCESS:\n", " df = pd.DataFrame(data=columns_result.tables[0].rows, columns=columns_result.tables[0].columns)\n", " col_list = list(df[\"ColumnName\"])\n", " column_dropdown = ipywidgets.Dropdown(options=col_list, description='Columns:')\n", " display(column_dropdown)\n", "else:\n", " column_list= []\n", "\n" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1685048012023 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# This cell will run Credential Scanner regex\n", "# You may adjust the query based on your needs.\n", "# To look at the query, you may run: print(query)\n", "\n", "if table_list != None:\n", " if 'column_list' in vars() and column_list == None and column_dropdown.value != None:\n", " column_name = \"tostring({0})\".format(column_dropdown.value)\n", " else:\n", " column_name = \"*\"\n", "\n", " table_name = table_dropdown.value\n", " kql_where_clause = get_credscan_kql_where_clause(column_name)\n", " query = \"{0} {1}\".format(table_name, kql_where_clause)\n", " \n", " #print(\"Query: \" + query)\n", "\n", " # Run query\n", " result = la_data_client.query_workspace(\n", " workspace_id=workspace_id,\n", " query=query,\n", " timespan=(start_time, end_time))\n", "\n", " # Display Result\n", " df = pd.DataFrame(data=result.tables[0].rows, columns=result.tables[0].columns)\n", " print(df)\n", "\n", " " ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1685048016754 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } } ], "metadata": { "celltoolbar": "Tags", "kernel_info": { "name": "python38-azureml" }, "kernelspec": { "name": "python38-azureml", "language": "python", "display_name": "Python 3.8 - AzureML" }, "language_info": { "name": "python", "version": "3.8.5", "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython", "version": 3 }, "pygments_lexer": "ipython3", "nbconvert_exporter": "python", "file_extension": ".py" }, "microsoft": { "host": { "AzureML": { "notebookHasBeenCompleted": true } }, "ms_spell_check": { "ms_spell_check_language": "en" } }, "nteract": { "version": "nteract-front-end@1.0.0" } }, "nbformat": 4, "nbformat_minor": 0 }