{
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Hands on - Surfing Your Data using Azure SDK for Python\n",
        "\n",
        "__Notebook Version:__ 1.0<br>\n",
        "__Python Version:__ Python 3.8 - AzureML<br>\n",
        "__Required Packages:__ No<br>\n",
        "__Platforms Supported:__  Azure Machine Learning Notebooks\n",
        "     \n",
        "__Data Source Required:__ Log Analytics tables \n",
        "    \n",
        "### Description\n",
        "This notebook will provide step-by-step instructions and sample code to guide you through Azure authentication, Sentinel log data discovery by using Azure SDK for Python and Kusto Query Language (KQL).<br>\n",
        "*** No need to download and install any other Python modules. ***<br>\n",
        "*** Please run the cells sequentially to avoid errors. *** <br>\n",
        "Need to know more about KQL? [Getting started with Kusto Query Language](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/concepts/).\n",
        "\n",
        "## Table of Contents\n",
        "1. Warm-up\n",
        "2. Azure Authentication\n",
        "3. Log Analytics Data Queries\n",
        "4. Bonus: Sentinel Watchlist Items Retrieval"
      ],
      "metadata": {
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 1. Warm-up"
      ],
      "metadata": {
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# If you need to know what Python modules are available, you may run this:\n",
        "# help(\"modules\")"
      ],
      "outputs": [],
      "execution_count": null,
      "metadata": {
        "collapsed": true,
        "gather": {
          "logged": 1605055050943
        },
        "jupyter": {
          "outputs_hidden": false,
          "source_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Load Python libraries that will be used in this notebook\n",
        "from azure.common.client_factory import get_client_from_cli_profile\n",
        "from azure.common.credentials import get_azure_cli_credentials\n",
        "from azure.loganalytics.models import QueryBody\n",
        "from azure.mgmt.loganalytics import LogAnalyticsManagementClient\n",
        "from azure.loganalytics import LogAnalyticsDataClient\n",
        "\n",
        "from IPython.display import display, HTML, Markdown\n",
        "import pandas as pd\n",
        "import json\n",
        "import ipywidgets\n",
        "import matplotlib.pyplot as plt"
      ],
      "outputs": [],
      "execution_count": null,
      "metadata": {
        "collapsed": true,
        "gather": {
          "logged": 1627596157670
        },
        "jupyter": {
          "outputs_hidden": false,
          "source_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Functions will be used in this notebook\n",
        "def read_config_values(file_path):\n",
        "    \"This loads pre-generated parameters for Sentinel Workspace\"\n",
        "    with open(file_path) as json_file:\n",
        "        if json_file:\n",
        "            json_config = json.load(json_file)\n",
        "            return (json_config[\"tenant_id\"],\n",
        "                    json_config[\"subscription_id\"],\n",
        "                    json_config[\"resource_group\"],\n",
        "                    json_config[\"workspace_id\"],\n",
        "                    json_config[\"workspace_name\"],\n",
        "                    json_config[\"user_alias\"],\n",
        "                    json_config[\"user_object_id\"])\n",
        "    return None\n",
        "\n",
        "def has_valid_token():\n",
        "    \"Check to see if there is a valid AAD token\"\n",
        "    try:\n",
        "        credentials, sub_id = get_azure_cli_credentials()\n",
        "        creds = credentials._get_cred(resource=None)\n",
        "        token = creds._token_retriever()[2]\n",
        "        print(\"Successfully signed in.\")\n",
        "        return True\n",
        "    except Exception as ex:\n",
        "        if \"Please run 'az login' to setup account\" in str(ex):\n",
        "            print(str(ex))\n",
        "            return False\n",
        "        elif \"AADSTS70043: The refresh token has expired\" in str(ex):\n",
        "            message = \"**The refresh token has expired. <br> Please continue your login process. Then: <br> 1. If you plan to run multiple notebooks on the same compute instance today, you may restart the compute instance by clicking 'Compute' on left menu, then select the instance, clicking 'Restart'; <br> 2. Otherwise, you may just restart the kernel from top menu. <br> Finally, close and re-load the notebook, then re-run cells one by one from the top.**\"\n",
        "            display(Markdown(message))\n",
        "            return False\n",
        "    except:\n",
        "        print(\"Please restart the kernel, and run 'az login'.\")\n",
        "        return False\n",
        "\n",
        "def process_result(result):\n",
        "    \"This function processes data returned from Azure LogAnalyticsDataClient, it returns pandas DataFrame.\"\n",
        "    json_result = result.as_dict()\n",
        "    cols = pd.json_normalize(json_result['tables'][0], 'columns')\n",
        "    final_result = pd.json_normalize(json_result['tables'][0], 'rows')\n",
        "    if final_result.shape[0] != 0:\n",
        "        final_result.columns = cols.name\n",
        "\n",
        "    return final_result"
      ],
      "outputs": [],
      "execution_count": null,
      "metadata": {
        "collapsed": true,
        "gather": {
          "logged": 1627596159719
        },
        "jupyter": {
          "outputs_hidden": false,
          "source_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Calling the above function to populate Sentinel workspace parameters\n",
        "# The file, config.json, was generated by the system, however, you may modify the values, or manually set the variables\n",
        "tenant_id, subscription_id, resource_group, workspace_id, workspace_name, user_alias, user_object_id = read_config_values('config.json');"
      ],
      "outputs": [],
      "execution_count": null,
      "metadata": {
        "collapsed": true,
        "gather": {
          "logged": 1627596164365
        },
        "jupyter": {
          "outputs_hidden": false,
          "source_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 2. Azure Authentication"
      ],
      "metadata": {
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Azure CLI is used to get device code to login into Azure, you need to copy the code and open the DeviceLogin site.\n",
        "# You may add [--tenant $tenant_id] to the command\n",
        "if has_valid_token() == False:\n",
        "    !az login --tenant $tenant_id --use-device-code\n",
        "\n",
        "# Initialzie Azure LogAnalyticsDataClient, which is used to access Sentinel log data in Azure Log Analytics.  \n",
        "# You may need to change resource_uri for various cloud environments.\n",
        "resource_uri = \"https://api.loganalytics.io\"\n",
        "la_client = get_client_from_cli_profile(LogAnalyticsManagementClient, subscription_id = subscription_id)\n",
        "creds, _ = get_azure_cli_credentials(resource=resource_uri)\n",
        "la_data_client = LogAnalyticsDataClient(creds)"
      ],
      "outputs": [],
      "execution_count": null,
      "metadata": {
        "gather": {
          "logged": 1627596167655
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# If you encounter error like: \"got an unexpected keyword argument 'user_agent'\" at the above cell, you may run the following command as a temporarily work-around to continue:\r\n",
        "# Please uncomment the following line and run it:\r\n",
        "# !pip install --upgrade azure-cli\r\n",
        "# Then re-run the cell above"
      ],
      "outputs": [],
      "execution_count": null,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "gather": {
          "logged": 1627596170786
        }
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 3. Log Analytics Data Queries"
      ],
      "metadata": {
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Get all tables available using Kusto query language.  If you need to know more about KQL, please check out the link provided at the introductory section.\n",
        "tables_result = None\n",
        "table_list = None\n",
        "all_tables_query = \"union withsource = SentinelTableName * | distinct SentinelTableName | sort by SentinelTableName asc\"\n",
        "if la_data_client != None:\n",
        "    tables_result = la_data_client.query(workspace_id, QueryBody(query=all_tables_query))"
      ],
      "outputs": [],
      "execution_count": null,
      "metadata": {
        "collapsed": true,
        "gather": {
          "logged": 1627596175658
        },
        "jupyter": {
          "outputs_hidden": false,
          "source_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Process the data using above function, then convert DataFrame to list\n",
        "if tables_result != None:\n",
        "    table_list = process_result(tables_result)\n",
        "    tables = sorted(table_list.SentinelTableName.tolist())\n",
        "    table_dropdown = ipywidgets.Dropdown(options=tables, description='Tables:')\n",
        "    display(table_dropdown)"
      ],
      "outputs": [],
      "execution_count": null,
      "metadata": {
        "collapsed": true,
        "gather": {
          "logged": 1627596178815
        },
        "jupyter": {
          "outputs_hidden": false,
          "source_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# You may query the table based on your needs, here I use TimeGenerated column as an example, going back to 7 days, counting events per day\n",
        "# Then process the data and display the result\n",
        "# To look at the query, you may run: print(sample_query)\n",
        "date_column_name = \"TimeGenerated\"\n",
        "count_column_name = \"Count\"\n",
        "if table_list.empty == False:\n",
        "    table_name = table_dropdown.value\n",
        "    sample_query = \"{0} | where {1} >= ago(7d) | summarize {2}=count() by format_datetime({1}, 'yyyy-M-dd') | order by {1} asc\".format(table_name, date_column_name, count_column_name)\n",
        "    print(\"Query:\" + sample_query)\n",
        "    print(\"===================\")\n",
        "    result_sample = la_data_client.query(workspace_id, QueryBody(query=sample_query))\n",
        "    sample_result = process_result(result_sample)\n",
        "    print(sample_result)"
      ],
      "outputs": [],
      "execution_count": null,
      "metadata": {
        "collapsed": true,
        "gather": {
          "logged": 1627596186625
        },
        "jupyter": {
          "outputs_hidden": false,
          "source_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Then plot a bar chart \n",
        "if sample_result.empty == False:\n",
        "    plt.bar(sample_result[date_column_name], sample_result[count_column_name])\n",
        "    plt.rcParams['figure.figsize'] = [14,2.5]"
      ],
      "outputs": [],
      "execution_count": null,
      "metadata": {
        "collapsed": true,
        "gather": {
          "logged": 1627596191716
        },
        "jupyter": {
          "outputs_hidden": false,
          "source_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 4. Bonus: Sentinel Watchlist Items Retrieval\n"
      ],
      "metadata": {
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Here we are going to use the Sentinel Watchlist name that you got in previous Hands-on notebook to get all Watchlist items \n",
        "# First, please set the watchlist_name\n",
        "watchlist_name = ipywidgets.Text(value='[[YOUR WATCHLIST NAME]]',description='watchlist_name: ')\n",
        "display(watchlist_name)"
      ],
      "outputs": [],
      "execution_count": null,
      "metadata": {
        "collapsed": true,
        "gather": {
          "logged": 1619562234520
        },
        "jupyter": {
          "outputs_hidden": false,
          "source_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "tags": [
          "parameters"
        ]
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Using Aazure SDK for Python: LogAnalyticsDataClient to get items\n",
        "watchlist_query = \"_GetWatchlist('{0}')\".format(watchlist_name.value)\n",
        "result_watchlist = la_data_client.query(workspace_id, QueryBody(query=watchlist_query))\n",
        "my_watchlist_items = process_result(result_watchlist)\n",
        "print(my_watchlist_items)"
      ],
      "outputs": [],
      "execution_count": null,
      "metadata": {
        "collapsed": true,
        "gather": {
          "logged": 1619562245589
        },
        "jupyter": {
          "outputs_hidden": false,
          "source_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    }
  ],
  "metadata": {
    "celltoolbar": "Tags",
    "kernel_info": {
      "name": "python38-azureml"
    },
    "kernelspec": {
      "name": "python38-azureml",
      "language": "python",
      "display_name": "Python 3.8 - AzureML"
    },
    "language_info": {
      "name": "python",
      "version": "3.8.1",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    },
    "microsoft": {
      "host": {
        "AzureML": {
          "notebookHasBeenCompleted": true
        }
      }
    },
    "nteract": {
      "version": "nteract-front-end@1.0.0"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
}