{
"cells": [
{
"cell_type": "markdown",
"source": [
"# Credential Scan on Azure Blob Storage\n",
"\n",
"__Notebook Version:__ 1.0
\n",
"__Python Version:__ Python 3.8 - AzureML
\n",
"__Required Packages:__ No
\n",
"__Platforms Supported:__ Azure Machine Learning Notebooks\n",
" \n",
"__Data Source Required:__ No \n",
" \n",
"### Description\n",
"This notebook provides step-by-step instructions and sample code to detect credential leak into Azure Blob Storage using Azure SDK for Python.
\n",
"*** No need to download and install any other Python modules. ***
\n",
"*** Please run the cells sequentially to avoid errors. Please do not use \"run all cells\". ***
\n",
"\n",
"## Table of Contents\n",
"1. Warm-up\n",
"2. Authentication to Azure Storage\n",
"3. Scan Azure Blob for Leaking Credentials"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "markdown",
"source": [
"## 1. Warm-up"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# If you need to know what Python modules are available, you may run this:\n",
"# help(\"modules\")"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"gather": {
"logged": 1617837106035
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"tags": [
"parameters"
]
}
},
{
"cell_type": "code",
"source": [
"# Load Python libraries that will be used in this notebook\n",
"from azure.common.client_factory import get_client_from_cli_profile\n",
"from azure.common.credentials import get_azure_cli_credentials\n",
"from azure.mgmt.storage import StorageManagementClient\n",
"from azure.identity import DefaultAzureCredential\n",
"from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, __version__\n",
"from azure.mgmt.resource import ResourceManagementClient\n",
"\n",
"import json\n",
"import os\n",
"import csv\n",
"import ipywidgets\n",
"from IPython.display import display, HTML, Markdown\n",
"import re"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"gather": {
"logged": 1627592262386
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Functions will be used in this notebook\n",
"def read_config_values(file_path):\n",
" \"This loads pre-generated parameters for Sentinel Workspace\"\n",
" with open(file_path) as json_file:\n",
" if json_file:\n",
" json_config = json.load(json_file)\n",
" return (json_config[\"tenant_id\"],\n",
" json_config[\"subscription_id\"],\n",
" json_config[\"resource_group\"],\n",
" json_config[\"workspace_id\"],\n",
" json_config[\"workspace_name\"],\n",
" json_config[\"user_alias\"],\n",
" json_config[\"user_object_id\"])\n",
" return None\n",
"\n",
"def has_valid_token():\n",
" \"Check to see if there is a valid AAD token\"\n",
" try:\n",
" credentials, sub_id = get_azure_cli_credentials()\n",
" creds = credentials._get_cred(resource=None)\n",
" token = creds._token_retriever()[2]\n",
" print(\"Successfully signed in.\")\n",
" return True\n",
" except Exception as ex:\n",
" if \"Please run 'az login' to setup account\" in str(ex):\n",
" print(\"Please sign in first.\")\n",
" return False\n",
" elif \"AADSTS70043: The refresh token has expired\" in str(ex):\n",
" message = \"**The refresh token has expired.
Please continue your login process. Then:
1. If you plan to run multiple notebooks on the same compute instance today, you may restart the compute instance by clicking 'Compute' on left menu, then select the instance, clicking 'Restart';
2. Otherwise, you may just restart the kernel from top menu.
Finally, close and re-load the notebook, then re-run cells one by one from the top.**\"\n",
" display(Markdown(message))\n",
" return False\n",
" except:\n",
" print(\"Please restart the kernel, and run 'az login'.\")\n",
" return False\n",
"\n",
"def get_file_content(blob):\n",
" \"Decoding file content\"\n",
" try:\n",
" content = blob.content_as_text(max_concurrency=1, encoding='UTF-8')\n",
" except UnicodeDecodeError:\n",
" content = blob.content_as_text(max_concurrency=1, encoding='UTF-16')\n",
" return content\n",
"\n",
"def get_regex_list():\n",
" \"This function return RegEx list for credscan\"\n",
" regex_list = [\n",
" \"(?i)(ida:password|IssuerSecret|(api|client|app(lication)?)[_\\\\- ]?(key|secret)[^,a-z]|\\\\.azuredatabricks\\\\.net).{0,10}(dapi)?[a-z0-9/+]{22}\",\n",
" \"(?i)(x-api-(key|token).{0,10}[a-z0-9/+]{40}|v1\\\\.[a-z0-9/+]{40}[^a-z0-9/+])\",\n",
" \"(?-i:)\\\\WAIza(?i)[a-z0-9_\\\\\\\\\\\\-]{35}\\\\W\",\n",
" \"(?i)(\\\\Wsig\\\\W|Secret(Value)?|IssuerSecret|(\\\\Wsas|primary|secondary|management|Shared(Access(Policy)?)?).?Key|\\\\.azure\\\\-devices\\\\.net|\\\\.(core|servicebus|redis\\\\.cache|accesscontrol|mediaservices)\\\\.(windows\\\\.net|chinacloudapi\\\\.cn|cloudapi\\\\.de|usgovcloudapi\\\\.net)|New\\\\-AzureRedisCache).{0,100}([a-z0-9/+]{43}=)\",\n",
" \"(?i)visualstudio\\\\.com.{1,100}\\\\W(?-i:)[a-z2-7]{52}\\\\W\",\n",
" \"(?i)se=2021.+sig=[a-z0-9%]{43,63}%3d\",\n",
" \"(?i)(x-functions-key|ApiKey|Code=|\\\\.azurewebsites\\\\.net/api/).{0,100}[a-z0-9/\\\\+]{54}={2}\",\n",
" \"(?i)code=[a-z0-9%]{54,74}(%3d){2}\",\n",
" \"(?i)(userpwd|publishingpassword).{0,100}[a-z0-9/\\\\+]{60}\\\\W\",\n",
" \"(?i)[^a-z0-9/\\\\+][a-z0-9/\\\\+]{86}==\",\n",
" \"(?-i:)\\\\-{5}BEGIN( ([DR]SA|EC|OPENSSH|PGP))? PRIVATE KEY( BLOCK)?\\\\-{5}\",\n",
" \"(?i)(app(lication)?|client)[_\\\\- ]?(key(url)?|secret)([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2})[^\\\\-]\",\n",
" \"(?i)refresh[_\\\\-]?token([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2})(\\\"data:text/plain,.+\\\"|[a-z0-9/+=_.-]{20,200})\",\n",
" \"(?i)AccessToken(Secret)?([\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2}|[\\\\s=:>]{1,10})[a-z0-9/+=_.-]{20,200}\",\n",
" \"(?i)[a-z0-9]{3,5}://[^%:\\\\s\\\"'/][^:\\\\s\\\"'/\\\\$]+[^:\\\\s\\\"'/\\\\$%]:([^%\\\\s\\\"'/][^@\\\\s\\\"'/]{0,100}[^%\\\\s\\\"'/])@[\\\\$a-z0-9:\\\\.\\\\-_%\\\\?=/]+\",\n",
" \"(?i)snmp(\\\\-server)?\\\\.exe.{0,100}(priv|community)\",\n",
" \"(?i)(ConvertTo\\\\-?SecureString\\\\s*((\\\\(|\\\\Wstring)\\\\s*)?['\\\"]+)\",\n",
" \"(?i)(Consumer|api)[_\\\\- ]?(Secret|Key)([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>,\\\\]]{3,15}|[\\\"'=:\\\\(]{2})[^\\\\s]{5,}\",\n",
" \"(?i)authorization[,\\\\[:= \\\"']+([dbaohmnsv])\",\n",
" \"(?i)-u\\\\s+.{2,100}-p\\\\s+[^\\\\-/]\",\n",
" \"(?i)(amqp|ssh|(ht|f)tps?)://[^%:\\\\s\\\"'/][^:\\\\s\\\"'/\\\\$]+[^:\\\\s\\\"'/\\\\$%]:([^%\\\\s\\\"'/][^@\\\\s\\\"'/]{0,100}[^%\\\\s\\\"'/])@[\\\\$a-z0-9:\\\\.\\\\-_%\\\\?=/]+\",\n",
" \"(?i)(\\\\Waws|amazon)?.{0,5}(secret|access.?key).{0,10}\\\\W[a-z0-9/\\\\+]{40}\",\n",
" \"(?-i:)(eyJ0eXAiOiJKV1Qi|eyJhbGci)\",\n",
" \"(?i)@(\\\\.(on)?)?microsoft\\\\.com[ -~\\\\s]{1,100}?(\\\\w?pass\\\\w?)\",\n",
" \"(?i)net(\\\\.exe)?.{1,5}(user\\\\s+|share\\\\s+/user:|user-?secrets? set)\\\\s+[a-z0-9]\",\n",
" \"(?i)xox[pbar]\\\\-[a-z0-9]\",\n",
" \"(?i)[\\\":\\\\s=]((x?corp|extranet(test)?|ntdev)(\\\\.microsoft\\\\.com)?|corp|redmond|europe|middleeast|northamerica|southpacific|southamerica|fareast|africa|exchange|extranet(test)?|partners|parttest|ntdev|ntwksta)\\\\W.{0,100}(password|\\\\Wpwd|\\\\Wpass|\\\\Wpw\\\\W|userpass)\",\n",
" \"(?i)(sign_in|SharePointOnlineAuthenticatedContext|(User|Exchange)Credentials?|password)[ -~\\\\s]{0,100}?@([a-z0-9.]+\\\\.(on)?)?microsoft\\\\.com['\\\"]?\",\n",
" \"(?i)(\\\\.database\\\\.azure\\\\.com|\\\\.database(\\\\.secure)?\\\\.windows\\\\.net|\\\\.cloudapp\\\\.net|\\\\.database\\\\.usgovcloudapi\\\\.net|\\\\.database\\\\.chinacloudapi\\\\.cn|\\\\.database.cloudapi.de).{0,100}(DB_PASS|(sql|service)?password|\\\\Wpwd\\\\W)\",\n",
" \"(?i)(secret(.?key)?|password)[\\\"']?\\\\s*[:=]\\\\s*[\\\"'][^\\\\s]+?[\\\"']\",\n",
" \"(?i)[^a-z\\\\$](DB_USER|user id|uid|(sql)?user(name)?|service\\\\s?account)\\\\s*[^\\\\w\\\\s,]([ -~\\\\s]{2,120}?|[ -~]{2,30}?)([^a-z\\\\s\\\\$]|\\\\s)\\\\s*(DB_PASS|(sql|service)?password|pwd)\",\n",
" \"(?i)(password|secret(key)?)[ \\\\t]*[=:]+[ \\\\t]*([^:\\\\s\\\"';,<]{2,200})\",\n",
" ]\n",
"\n",
" return regex_list\n",
"\n",
"def set_continuation_flag(flag):\n",
" if flag == False:\n",
" print(\"continuation flag is false.\")\n",
" return flag\n",
"\n",
"def convert_result_to_string(result_row):\n",
" if (type(result_row)) == str:\n",
" return result_row\n",
" elif (type(result_row)) == tuple:\n",
" return ','.join([m for m in result_row if len(m) > 0])\n",
"\n",
"def export_csv(file_name, data_list):\n",
" with open(file_name, 'w') as f:\n",
" w = csv.writer(f, delimiter = ',')\n",
" w.writerows([x.split(',') for x in data_list])"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"gather": {
"logged": 1627592263255
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Calling the above function to populate Sentinel workspace parameters\n",
"# The file, config.json, was generated by the system, however, you may modify the values, or manually set the variables\n",
"tenant_id, subscription_id, resource_group, workspace_id, workspace_name, user_alias, user_object_id = read_config_values('config.json');"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"gather": {
"logged": 1627592265955
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "markdown",
"source": [
"## 2. Authentication to Azure Storage"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Azure CLI is used to get device code to login into Azure, you need to copy the code and open the DeviceLogin site.\n",
"# You may add [--tenant $tenant_id] to the command\n",
"if has_valid_token() == False:\n",
" !az login --tenant $tenant_id --use-device-code\n",
"\n",
"# Initializing Azure Storage and Azure Resource Python clients\n",
"storage_client = get_client_from_cli_profile(StorageManagementClient, subscription_id = subscription_id)\n",
"resource_client = get_client_from_cli_profile(ResourceManagementClient, subscription_id = subscription_id)\n",
"\n",
"# Set continuation_flag\n",
"if resource_client == None:\n",
" continuation_flag = set_continuation_flag(False)\n",
"else:\n",
" continuation_flag = set_continuation_flag(True)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"gather": {
"logged": 1627592286648
}
}
},
{
"cell_type": "code",
"source": [
"# If you encounter error like: \"got an unexpected keyword argument 'user_agent'\" at the above cell, you may run the following command as a temporarily work-around to continue:\r\n",
"# Please uncomment the following line and run it:\r\n",
"# !pip install --upgrade azure-cli\r\n",
"# Then re-run the cell above"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Select Azure Resource Group\n",
"if continuation_flag:\n",
" group_list = resource_client.resource_groups.list()\n",
" group_dropdown = ipywidgets.Dropdown(options=sorted([g.name for g in group_list]), description='Groups:')\n",
" display(group_dropdown)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"gather": {
"logged": 1627592289368
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Select Azure Storage Account\n",
"if continuation_flag and group_dropdown.value != None:\n",
" resource_list = resource_client.resources.list_by_resource_group(\n",
" group_dropdown.value,\n",
" filter=\"resourceType eq 'Microsoft.Storage/storageAccounts'\",\n",
" )\n",
" storage_account_dropdown = ipywidgets.Dropdown(options=sorted([r.name for r in resource_list]), description='Accounts:')\n",
" display(storage_account_dropdown)\n",
"else:\n",
" continuation_flag = set_continuation_flag(False)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"gather": {
"logged": 1627592308969
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "markdown",
"source": [
"## 3. Scan Azure Blob for Leaking Credentials"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Select a blob container for a specified Azure Storage account\n",
"if continuation_flag and storage_account_dropdown.value != None:\n",
" storage_keys = storage_client.storage_accounts.list_keys(group_dropdown.value,storage_account_dropdown.value)\n",
" if storage_keys != None:\n",
" storage_key = {v.key_name: v.value for v in storage_keys.keys}['key1']\n",
"\n",
" blob_service_client = BlobServiceClient(\n",
" account_url=\"https://{0}.blob.core.windows.net\".format(storage_account_dropdown.value),\n",
" credential=storage_key\n",
" )\n",
" if blob_service_client != None:\n",
" container_list = blob_service_client.list_containers()\n",
" container_dropdown = ipywidgets.Dropdown(options=sorted([r.name for r in container_list]), description='Containers:')\n",
" display(container_dropdown)\n",
" else:\n",
" continuation_flag = set_continuation_flag(False)\n",
" else:\n",
" continuation_flag = set_continuation_flag(False)\n",
"else:\n",
" continuation_flag = set_continuation_flag(False)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"gather": {
"logged": 1627592317478
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Select a blob from a specified blob container\n",
"if continuation_flag and container_dropdown.value != None:\n",
" container_client = blob_service_client.get_container_client(container_dropdown.value)\n",
" if container_client != None:\n",
" blob_list = container_client.list_blobs()\n",
" blob_dropdown = ipywidgets.Dropdown(options=sorted([r.name for r in blob_list]), description='Blobs:')\n",
" display(blob_dropdown)\n",
" else:\n",
" continuation_flag = set_continuation_flag(False)\n",
"else:\n",
" continuation_flag = set_continuation_flag(False)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"gather": {
"logged": 1627592321570
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Get blob content\n",
"if continuation_flag and blob_dropdown.value != None:\n",
" selected_blob = container_client.download_blob(blob_dropdown.value)\n",
" if selected_blob != None:\n",
" content = get_file_content(selected_blob)\n",
" else:\n",
" continuation_flag = set_continuation_flag(False)\n",
"else:\n",
" continuation_flag = set_continuation_flag(False)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"gather": {
"logged": 1627592325608
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Run Regex strings on the file content\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"result_list = []\n",
"csv_string = \"\"\n",
"if continuation_flag and content != None:\n",
" has_leaking = False\n",
" regex_list = get_regex_list()\n",
" for regex in regex_list:\n",
" re.compile(regex)\n",
" results = re.findall(regex, content)\n",
" if results:\n",
" print(\"================================================\")\n",
" print(\"MATCHED REGEX:\\n\" + regex)\n",
" print(\"------------------------------------------------\")\n",
" print(\"FILE: \" + blob_dropdown.value + \"\\n\")\n",
" #print(content)\n",
" print(\"---------------MATCHED CONTENT -----------------\")\n",
" for result in results:\n",
" print(str(result))\n",
" csv_string = convert_result_to_string(result)\n",
" result_list.append(csv_string) \n",
" print(\"================================================\")\n",
" has_leaking = True \n",
" \n",
" if has_leaking == False:\n",
" print('No leaking data found')\n",
"\n",
"else:\n",
" continuation_flag = set_continuation_flag(False)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"gather": {
"logged": 1627592327916
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Save results to a csv file in the current file system\n",
"if continuation_flag and len(result_list) > 0:\n",
" export_csv(\"credscan_blob.csv\", result_list)\n",
"else:\n",
" print(\"No data\")"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"gather": {
"logged": 1627592332992
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
}
],
"metadata": {
"celltoolbar": "Tags",
"kernel_info": {
"name": "python38-azureml"
},
"kernelspec": {
"name": "python38-azureml",
"language": "python",
"display_name": "Python 3.8 - AzureML"
},
"language_info": {
"name": "python",
"version": "3.8.1",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"microsoft": {
"host": {
"AzureML": {
"notebookHasBeenCompleted": true
}
}
},
"nteract": {
"version": "nteract-front-end@1.0.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}