{ "cells": [ { "cell_type": "markdown", "source": [ "# Credential Scan on Azure Blob Storage\n", "\n", "__Notebook Version:__ 1.0
\n", "__Python Version:__ Python 3.8 - AzureML
\n", "__Required Packages:__ No
\n", "__Platforms Supported:__ Azure Machine Learning Notebooks\n", " \n", "__Data Source Required:__ No \n", " \n", "### Description\n", "This notebook provides step-by-step instructions and sample code to detect credential leak into Azure Blob Storage using Azure SDK for Python.
\n", "*** No need to download and install any other Python modules. ***
\n", "*** Please run the cells sequentially to avoid errors. Please do not use \"run all cells\". ***
\n", "\n", "## Table of Contents\n", "1. Warm-up\n", "2. Authentication to Azure Storage\n", "3. Scan Azure Blob for Leaking Credentials" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "markdown", "source": [ "## 1. Warm-up" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# If you need to know what Python modules are available, you may run this:\n", "# help(\"modules\")" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1617837106035 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } }, "tags": [ "parameters" ] } }, { "cell_type": "code", "source": [ "# Load Python libraries that will be used in this notebook\n", "from azure.common.client_factory import get_client_from_cli_profile\n", "from azure.common.credentials import get_azure_cli_credentials\n", "from azure.mgmt.storage import StorageManagementClient\n", "from azure.identity import DefaultAzureCredential\n", "from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, __version__\n", "from azure.mgmt.resource import ResourceManagementClient\n", "\n", "import json\n", "import os\n", "import csv\n", "import ipywidgets\n", "from IPython.display import display, HTML, Markdown\n", "import re" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1627592262386 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Functions will be used in this notebook\n", "def read_config_values(file_path):\n", " \"This loads pre-generated parameters for Sentinel Workspace\"\n", " with open(file_path) as json_file:\n", " if json_file:\n", " json_config = json.load(json_file)\n", " return (json_config[\"tenant_id\"],\n", " json_config[\"subscription_id\"],\n", " json_config[\"resource_group\"],\n", " json_config[\"workspace_id\"],\n", " json_config[\"workspace_name\"],\n", " json_config[\"user_alias\"],\n", " json_config[\"user_object_id\"])\n", " return None\n", "\n", "def has_valid_token():\n", " \"Check to see if there is a valid AAD token\"\n", " try:\n", " credentials, sub_id = get_azure_cli_credentials()\n", " creds = credentials._get_cred(resource=None)\n", " token = creds._token_retriever()[2]\n", " print(\"Successfully signed in.\")\n", " return True\n", " except Exception as ex:\n", " if \"Please run 'az login' to setup account\" in str(ex):\n", " print(\"Please sign in first.\")\n", " return False\n", " elif \"AADSTS70043: The refresh token has expired\" in str(ex):\n", " message = \"**The refresh token has expired.
Please continue your login process. Then:
1. If you plan to run multiple notebooks on the same compute instance today, you may restart the compute instance by clicking 'Compute' on left menu, then select the instance, clicking 'Restart';
2. Otherwise, you may just restart the kernel from top menu.
Finally, close and re-load the notebook, then re-run cells one by one from the top.**\"\n", " display(Markdown(message))\n", " return False\n", " except:\n", " print(\"Please restart the kernel, and run 'az login'.\")\n", " return False\n", "\n", "def get_file_content(blob):\n", " \"Decoding file content\"\n", " try:\n", " content = blob.content_as_text(max_concurrency=1, encoding='UTF-8')\n", " except UnicodeDecodeError:\n", " content = blob.content_as_text(max_concurrency=1, encoding='UTF-16')\n", " return content\n", "\n", "def get_regex_list():\n", " \"This function return RegEx list for credscan\"\n", " regex_list = [\n", " \"(?i)(ida:password|IssuerSecret|(api|client|app(lication)?)[_\\\\- ]?(key|secret)[^,a-z]|\\\\.azuredatabricks\\\\.net).{0,10}(dapi)?[a-z0-9/+]{22}\",\n", " \"(?i)(x-api-(key|token).{0,10}[a-z0-9/+]{40}|v1\\\\.[a-z0-9/+]{40}[^a-z0-9/+])\",\n", " \"(?-i:)\\\\WAIza(?i)[a-z0-9_\\\\\\\\\\\\-]{35}\\\\W\",\n", " \"(?i)(\\\\Wsig\\\\W|Secret(Value)?|IssuerSecret|(\\\\Wsas|primary|secondary|management|Shared(Access(Policy)?)?).?Key|\\\\.azure\\\\-devices\\\\.net|\\\\.(core|servicebus|redis\\\\.cache|accesscontrol|mediaservices)\\\\.(windows\\\\.net|chinacloudapi\\\\.cn|cloudapi\\\\.de|usgovcloudapi\\\\.net)|New\\\\-AzureRedisCache).{0,100}([a-z0-9/+]{43}=)\",\n", " \"(?i)visualstudio\\\\.com.{1,100}\\\\W(?-i:)[a-z2-7]{52}\\\\W\",\n", " \"(?i)se=2021.+sig=[a-z0-9%]{43,63}%3d\",\n", " \"(?i)(x-functions-key|ApiKey|Code=|\\\\.azurewebsites\\\\.net/api/).{0,100}[a-z0-9/\\\\+]{54}={2}\",\n", " \"(?i)code=[a-z0-9%]{54,74}(%3d){2}\",\n", " \"(?i)(userpwd|publishingpassword).{0,100}[a-z0-9/\\\\+]{60}\\\\W\",\n", " \"(?i)[^a-z0-9/\\\\+][a-z0-9/\\\\+]{86}==\",\n", " \"(?-i:)\\\\-{5}BEGIN( ([DR]SA|EC|OPENSSH|PGP))? PRIVATE KEY( BLOCK)?\\\\-{5}\",\n", " \"(?i)(app(lication)?|client)[_\\\\- ]?(key(url)?|secret)([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2})[^\\\\-]\",\n", " \"(?i)refresh[_\\\\-]?token([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2})(\\\"data:text/plain,.+\\\"|[a-z0-9/+=_.-]{20,200})\",\n", " \"(?i)AccessToken(Secret)?([\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2}|[\\\\s=:>]{1,10})[a-z0-9/+=_.-]{20,200}\",\n", " \"(?i)[a-z0-9]{3,5}://[^%:\\\\s\\\"'/][^:\\\\s\\\"'/\\\\$]+[^:\\\\s\\\"'/\\\\$%]:([^%\\\\s\\\"'/][^@\\\\s\\\"'/]{0,100}[^%\\\\s\\\"'/])@[\\\\$a-z0-9:\\\\.\\\\-_%\\\\?=/]+\",\n", " \"(?i)snmp(\\\\-server)?\\\\.exe.{0,100}(priv|community)\",\n", " \"(?i)(ConvertTo\\\\-?SecureString\\\\s*((\\\\(|\\\\Wstring)\\\\s*)?['\\\"]+)\",\n", " \"(?i)(Consumer|api)[_\\\\- ]?(Secret|Key)([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>,\\\\]]{3,15}|[\\\"'=:\\\\(]{2})[^\\\\s]{5,}\",\n", " \"(?i)authorization[,\\\\[:= \\\"']+([dbaohmnsv])\",\n", " \"(?i)-u\\\\s+.{2,100}-p\\\\s+[^\\\\-/]\",\n", " \"(?i)(amqp|ssh|(ht|f)tps?)://[^%:\\\\s\\\"'/][^:\\\\s\\\"'/\\\\$]+[^:\\\\s\\\"'/\\\\$%]:([^%\\\\s\\\"'/][^@\\\\s\\\"'/]{0,100}[^%\\\\s\\\"'/])@[\\\\$a-z0-9:\\\\.\\\\-_%\\\\?=/]+\",\n", " \"(?i)(\\\\Waws|amazon)?.{0,5}(secret|access.?key).{0,10}\\\\W[a-z0-9/\\\\+]{40}\",\n", " \"(?-i:)(eyJ0eXAiOiJKV1Qi|eyJhbGci)\",\n", " \"(?i)@(\\\\.(on)?)?microsoft\\\\.com[ -~\\\\s]{1,100}?(\\\\w?pass\\\\w?)\",\n", " \"(?i)net(\\\\.exe)?.{1,5}(user\\\\s+|share\\\\s+/user:|user-?secrets? set)\\\\s+[a-z0-9]\",\n", " \"(?i)xox[pbar]\\\\-[a-z0-9]\",\n", " \"(?i)[\\\":\\\\s=]((x?corp|extranet(test)?|ntdev)(\\\\.microsoft\\\\.com)?|corp|redmond|europe|middleeast|northamerica|southpacific|southamerica|fareast|africa|exchange|extranet(test)?|partners|parttest|ntdev|ntwksta)\\\\W.{0,100}(password|\\\\Wpwd|\\\\Wpass|\\\\Wpw\\\\W|userpass)\",\n", " \"(?i)(sign_in|SharePointOnlineAuthenticatedContext|(User|Exchange)Credentials?|password)[ -~\\\\s]{0,100}?@([a-z0-9.]+\\\\.(on)?)?microsoft\\\\.com['\\\"]?\",\n", " \"(?i)(\\\\.database\\\\.azure\\\\.com|\\\\.database(\\\\.secure)?\\\\.windows\\\\.net|\\\\.cloudapp\\\\.net|\\\\.database\\\\.usgovcloudapi\\\\.net|\\\\.database\\\\.chinacloudapi\\\\.cn|\\\\.database.cloudapi.de).{0,100}(DB_PASS|(sql|service)?password|\\\\Wpwd\\\\W)\",\n", " \"(?i)(secret(.?key)?|password)[\\\"']?\\\\s*[:=]\\\\s*[\\\"'][^\\\\s]+?[\\\"']\",\n", " \"(?i)[^a-z\\\\$](DB_USER|user id|uid|(sql)?user(name)?|service\\\\s?account)\\\\s*[^\\\\w\\\\s,]([ -~\\\\s]{2,120}?|[ -~]{2,30}?)([^a-z\\\\s\\\\$]|\\\\s)\\\\s*(DB_PASS|(sql|service)?password|pwd)\",\n", " \"(?i)(password|secret(key)?)[ \\\\t]*[=:]+[ \\\\t]*([^:\\\\s\\\"';,<]{2,200})\",\n", " ]\n", "\n", " return regex_list\n", "\n", "def set_continuation_flag(flag):\n", " if flag == False:\n", " print(\"continuation flag is false.\")\n", " return flag\n", "\n", "def convert_result_to_string(result_row):\n", " if (type(result_row)) == str:\n", " return result_row\n", " elif (type(result_row)) == tuple:\n", " return ','.join([m for m in result_row if len(m) > 0])\n", "\n", "def export_csv(file_name, data_list):\n", " with open(file_name, 'w') as f:\n", " w = csv.writer(f, delimiter = ',')\n", " w.writerows([x.split(',') for x in data_list])" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1627592263255 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Calling the above function to populate Sentinel workspace parameters\n", "# The file, config.json, was generated by the system, however, you may modify the values, or manually set the variables\n", "tenant_id, subscription_id, resource_group, workspace_id, workspace_name, user_alias, user_object_id = read_config_values('config.json');" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1627592265955 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "markdown", "source": [ "## 2. Authentication to Azure Storage" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Azure CLI is used to get device code to login into Azure, you need to copy the code and open the DeviceLogin site.\n", "# You may add [--tenant $tenant_id] to the command\n", "if has_valid_token() == False:\n", " !az login --tenant $tenant_id --use-device-code\n", "\n", "# Initializing Azure Storage and Azure Resource Python clients\n", "storage_client = get_client_from_cli_profile(StorageManagementClient, subscription_id = subscription_id)\n", "resource_client = get_client_from_cli_profile(ResourceManagementClient, subscription_id = subscription_id)\n", "\n", "# Set continuation_flag\n", "if resource_client == None:\n", " continuation_flag = set_continuation_flag(False)\n", "else:\n", " continuation_flag = set_continuation_flag(True)" ], "outputs": [], "execution_count": null, "metadata": { "gather": { "logged": 1627592286648 } } }, { "cell_type": "code", "source": [ "# If you encounter error like: \"got an unexpected keyword argument 'user_agent'\" at the above cell, you may run the following command as a temporarily work-around to continue:\r\n", "# Please uncomment the following line and run it:\r\n", "# !pip install --upgrade azure-cli\r\n", "# Then re-run the cell above" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Select Azure Resource Group\n", "if continuation_flag:\n", " group_list = resource_client.resource_groups.list()\n", " group_dropdown = ipywidgets.Dropdown(options=sorted([g.name for g in group_list]), description='Groups:')\n", " display(group_dropdown)" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1627592289368 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Select Azure Storage Account\n", "if continuation_flag and group_dropdown.value != None:\n", " resource_list = resource_client.resources.list_by_resource_group(\n", " group_dropdown.value,\n", " filter=\"resourceType eq 'Microsoft.Storage/storageAccounts'\",\n", " )\n", " storage_account_dropdown = ipywidgets.Dropdown(options=sorted([r.name for r in resource_list]), description='Accounts:')\n", " display(storage_account_dropdown)\n", "else:\n", " continuation_flag = set_continuation_flag(False)" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1627592308969 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "markdown", "source": [ "## 3. Scan Azure Blob for Leaking Credentials" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Select a blob container for a specified Azure Storage account\n", "if continuation_flag and storage_account_dropdown.value != None:\n", " storage_keys = storage_client.storage_accounts.list_keys(group_dropdown.value,storage_account_dropdown.value)\n", " if storage_keys != None:\n", " storage_key = {v.key_name: v.value for v in storage_keys.keys}['key1']\n", "\n", " blob_service_client = BlobServiceClient(\n", " account_url=\"https://{0}.blob.core.windows.net\".format(storage_account_dropdown.value),\n", " credential=storage_key\n", " )\n", " if blob_service_client != None:\n", " container_list = blob_service_client.list_containers()\n", " container_dropdown = ipywidgets.Dropdown(options=sorted([r.name for r in container_list]), description='Containers:')\n", " display(container_dropdown)\n", " else:\n", " continuation_flag = set_continuation_flag(False)\n", " else:\n", " continuation_flag = set_continuation_flag(False)\n", "else:\n", " continuation_flag = set_continuation_flag(False)" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1627592317478 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Select a blob from a specified blob container\n", "if continuation_flag and container_dropdown.value != None:\n", " container_client = blob_service_client.get_container_client(container_dropdown.value)\n", " if container_client != None:\n", " blob_list = container_client.list_blobs()\n", " blob_dropdown = ipywidgets.Dropdown(options=sorted([r.name for r in blob_list]), description='Blobs:')\n", " display(blob_dropdown)\n", " else:\n", " continuation_flag = set_continuation_flag(False)\n", "else:\n", " continuation_flag = set_continuation_flag(False)" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1627592321570 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Get blob content\n", "if continuation_flag and blob_dropdown.value != None:\n", " selected_blob = container_client.download_blob(blob_dropdown.value)\n", " if selected_blob != None:\n", " content = get_file_content(selected_blob)\n", " else:\n", " continuation_flag = set_continuation_flag(False)\n", "else:\n", " continuation_flag = set_continuation_flag(False)" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1627592325608 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Run Regex strings on the file content\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "result_list = []\n", "csv_string = \"\"\n", "if continuation_flag and content != None:\n", " has_leaking = False\n", " regex_list = get_regex_list()\n", " for regex in regex_list:\n", " re.compile(regex)\n", " results = re.findall(regex, content)\n", " if results:\n", " print(\"================================================\")\n", " print(\"MATCHED REGEX:\\n\" + regex)\n", " print(\"------------------------------------------------\")\n", " print(\"FILE: \" + blob_dropdown.value + \"\\n\")\n", " #print(content)\n", " print(\"---------------MATCHED CONTENT -----------------\")\n", " for result in results:\n", " print(str(result))\n", " csv_string = convert_result_to_string(result)\n", " result_list.append(csv_string) \n", " print(\"================================================\")\n", " has_leaking = True \n", " \n", " if has_leaking == False:\n", " print('No leaking data found')\n", "\n", "else:\n", " continuation_flag = set_continuation_flag(False)" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1627592327916 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Save results to a csv file in the current file system\n", "if continuation_flag and len(result_list) > 0:\n", " export_csv(\"credscan_blob.csv\", result_list)\n", "else:\n", " print(\"No data\")" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1627592332992 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } } ], "metadata": { "celltoolbar": "Tags", "kernel_info": { "name": "python38-azureml" }, "kernelspec": { "name": "python38-azureml", "language": "python", "display_name": "Python 3.8 - AzureML" }, "language_info": { "name": "python", "version": "3.8.1", "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython", "version": 3 }, "pygments_lexer": "ipython3", "nbconvert_exporter": "python", "file_extension": ".py" }, "microsoft": { "host": { "AzureML": { "notebookHasBeenCompleted": true } } }, "nteract": { "version": "nteract-front-end@1.0.0" } }, "nbformat": 4, "nbformat_minor": 2 }