{
"cells": [
{
"cell_type": "markdown",
"source": [
"# Hands on - Surfing Your Data using Azure SDK for Python\r\n",
"\r\n",
"__Notebook Version:__ 1.0
\r\n",
"__Python Version:__ Python 3.6 - AzureML
\r\n",
"__Required Packages:__ No
\r\n",
"__Platforms Supported:__ Azure Machine Learning Notebooks\r\n",
" \r\n",
"__Data Source Required:__ Log Analytics tables \r\n",
" \r\n",
"### Description\r\n",
"This notebook will provide step-by-step instructions and sample code to guide you through Azure authentication, Sentinel log data discovery by using Azure SDK for Python and Kusto Query Language (KQL).
\r\n",
"*** No need to download and install any other Python modules. ***
\r\n",
"*** Please run the cells sequentially to avoid errors. ***
\r\n",
"Need to know more about KQL? [Getting started with Kusto Query Language](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/concepts/).\r\n",
"\r\n",
"## Table of Contents\r\n",
"1. Warm-up\r\n",
"2. Azure Authentication\r\n",
"3. Log Analytics Data Queries\r\n",
"4. Bonus: Sentinel Watchlist Items Retrieval"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "markdown",
"source": [
"## 1. Warm-up"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# If you need to know what Python modules are available, you may run this:\r\n",
"# help(\"modules\")"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1605055050943
}
}
},
{
"cell_type": "code",
"source": [
"# Load Python libraries that will be used in this notebook\r\n",
"from azure.common.client_factory import get_client_from_cli_profile\r\n",
"from azure.common.credentials import get_azure_cli_credentials\r\n",
"from azure.loganalytics.models import QueryBody\r\n",
"from azure.mgmt.loganalytics import LogAnalyticsManagementClient\r\n",
"from azure.loganalytics import LogAnalyticsDataClient\r\n",
"\r\n",
"from pandas.io.json import json_normalize\r\n",
"import json\r\n",
"import ipywidgets\r\n",
"import matplotlib.pyplot as plt"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1605204950057
}
}
},
{
"cell_type": "code",
"source": [
"# Functions will be used in this notebook\r\n",
"def read_config_values(file_path):\r\n",
" \"This loads pre-generated parameters for Sentinel Workspace\"\r\n",
" with open(file_path) as json_file:\r\n",
" if json_file:\r\n",
" json_config = json.load(json_file)\r\n",
" return (json_config[\"tenant_id\"],\r\n",
" json_config[\"subscription_id\"],\r\n",
" json_config[\"resource_group\"],\r\n",
" json_config[\"workspace_id\"],\r\n",
" json_config[\"workspace_name\"])\r\n",
" return None\r\n",
"\r\n",
"def process_result(result):\r\n",
" \"This function processes data returned from Azure LogAnalyticsDataClient, it returns pandas DataFrame.\"\r\n",
" json_result = result.as_dict()\r\n",
" cols = json_normalize(json_result['tables'][0], 'columns')\r\n",
" final_result = json_normalize(json_result['tables'][0], 'rows')\r\n",
" if final_result.shape[0] != 0:\r\n",
" final_result.columns = cols.name\r\n",
"\r\n",
" return final_result"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1605204951320
}
}
},
{
"cell_type": "code",
"source": [
"# Calling the above function to populate Sentinel workspace parameters\r\n",
"# The file, config.json, was generated by the system, however, you may modify the values, or manually set the variables\r\n",
"tenant_id, subscription_id, resource_group, workspace_id, workspace_name = read_config_values('config.json');"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1605204954598
}
}
},
{
"cell_type": "markdown",
"source": [
"## 2. Azure Authentication"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Azure CLI is used to get device code to login into Azure, you need to copy the code and open the DeviceLogin site.\r\n",
"# You may add [--tenant $tenant_id] to the command\r\n",
"!az login --tenant $tenant_id --use-device-code"
],
"outputs": [],
"execution_count": null,
"metadata": {
"gather": {
"logged": 1605202763487
}
}
},
{
"cell_type": "code",
"source": [
"# Initialzie Azure LogAnalyticsDataClient, which is used to access Sentinel log data in Azure Log Analytics. \r\n",
"# You may need to change resource_uri for various cloud environments.\r\n",
"resource_uri = \"https://api.loganalytics.io\"\r\n",
"la_client = get_client_from_cli_profile(LogAnalyticsManagementClient, subscription_id = subscription_id)\r\n",
"creds, _ = get_azure_cli_credentials(resource=resource_uri)\r\n",
"la_data_client = LogAnalyticsDataClient(creds)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1605204960035
}
}
},
{
"cell_type": "markdown",
"source": [
"## 3. Log Analytics Data Queries"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Get all tables available using Kusto query language. If you need to know more about KQL, please check out the link provided at the introductory section.\r\n",
"tables_result = None\r\n",
"table_list = None\r\n",
"all_tables_query = \"union withsource = SentinelTableName * | distinct SentinelTableName | sort by SentinelTableName asc\"\r\n",
"if la_data_client != None:\r\n",
" tables_result = la_data_client.query(workspace_id, QueryBody(query=all_tables_query))"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1605204962622
}
}
},
{
"cell_type": "code",
"source": [
"# Process the data using above function, then convert DataFrame to list\r\n",
"if tables_result != None:\r\n",
" table_list = process_result(tables_result)\r\n",
" tables = sorted(table_list.SentinelTableName.tolist())\r\n",
" table_dropdown = ipywidgets.Dropdown(options=tables, description='Tables:')\r\n",
" display(table_dropdown)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1605204964273
}
}
},
{
"cell_type": "code",
"source": [
"# You may query the table based on your needs, here I use TimeGenerated column as an example, going back to 7 days, counting events per day\r\n",
"# Then process the data and display the result\r\n",
"# To look at the query, you may run: print(sample_query)\r\n",
"date_column_name = \"TimeGenerated\"\r\n",
"count_column_name = \"Count\"\r\n",
"if table_list.empty == False:\r\n",
" table_name = table_dropdown.value\r\n",
" sample_query = \"{0} | where {1} >= ago(7d) | summarize {2}=count() by format_datetime({1}, 'yyyy-M-dd') | order by {1} asc\".format(table_name, date_column_name, count_column_name)\r\n",
" print(\"Query:\" + sample_query)\r\n",
" print(\"===================\")\r\n",
" result_sample = la_data_client.query(workspace_id, QueryBody(query=sample_query))\r\n",
" sample_result = process_result(result_sample)\r\n",
" print(sample_result)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1605205041782
}
}
},
{
"cell_type": "code",
"source": [
"# Then plot a bar chart \r\n",
"if sample_result.empty == False:\r\n",
" plt.bar(sample_result[date_column_name], sample_result[count_column_name])\r\n",
" plt.rcParams['figure.figsize'] = [14,2.5]"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1605205047959
}
}
},
{
"cell_type": "markdown",
"source": [
"## 4. Bonus: Sentinel Watchlist Items Retrieval\r\n"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Here we are going to use the Sentinel Watchlist name that you got in previous Hands-on notebook to get all Watchlist items \r\n",
"# First, please set the watchlist_name\r\n",
"watchlist_name = ipywidgets.Text(value='[[YOUR WATCHLIST NAME]]',description='watchlist_name: ')\r\n",
"display(watchlist_name)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1605205052532
}
}
},
{
"cell_type": "code",
"source": [
"# Using Aazure SDK for Python: LogAnalyticsDataClient to get items\r\n",
"watchlist_query = \"_GetWatchlist('{0}')\".format(watchlist_name.value)\r\n",
"result_watchlist = la_data_client.query(workspace_id, QueryBody(query=watchlist_query))\r\n",
"my_watchlist_items = process_result(result_watchlist)\r\n",
"print(my_watchlist_items)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1605205072248
}
}
},
{
"cell_type": "code",
"source": [],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
}
],
"metadata": {
"kernelspec": {
"name": "python3-azureml",
"language": "python",
"display_name": "Python 3.6 - AzureML"
},
"language_info": {
"name": "python",
"version": "3.6.9",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"kernel_info": {
"name": "python3-azureml"
},
"nteract": {
"version": "nteract-front-end@1.0.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}