Skip to content

Commit

Permalink
[Notebooks] Update hunting notebooks (#229)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhzhao8888 authored Aug 15, 2023
1 parent 4e09def commit 42af024
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 223 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
{
"cell_type": "markdown",
"source": [
"# Hunting - Automated Data Query and Ingestion to Custom Table\n",
"# Hunting - Automated Data Query and MDTI API and Ingestion to Custom Table\n",
"\n",
"__Notebook Version:__ 1.0<br>\n",
"__Python Version:__ Python 3.8<br>\n",
Expand Down Expand Up @@ -32,6 +32,44 @@
],
"metadata": {}
},
{
"cell_type": "code",
"source": [
"%pip install azure.mgmt.loganalytics"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"%pip install azure.monitor.query"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
Expand All @@ -57,7 +95,7 @@
"# Load Python libraries that will be used in this notebook\n",
"from azure.mgmt.loganalytics import LogAnalyticsManagementClient\n",
"from azure.monitor.query import LogsQueryClient, MetricsQueryClient, LogsQueryStatus\n",
"#from azure.identity.aio import DefaultAzureCredential\n",
"##from azure.identity.aio import DefaultAzureCredential\n",
"from azure.monitor.ingestion import LogsIngestionClient\n",
"\n",
"from azure.identity import AzureCliCredential, DefaultAzureCredential, ClientSecretCredential\n",
Expand All @@ -78,17 +116,13 @@
{
"cell_type": "code",
"source": [
"# User inputs\r\n",
"tenant_id = ''\r\n",
"subscription_id = ''\r\n",
"akv_name = ''\r\n",
"akv_link_name = ''\r\n",
"workspace_id = ''\r\n",
"client_id_name = ''\r\n",
"client_secret_name = ''\r\n",
"resource_group_name = \"\"\r\n",
"location = \"\"\r\n",
"workspace_name = ''"
"client_secret_name = ''"
],
"outputs": [],
"execution_count": null,
Expand All @@ -101,13 +135,15 @@
{
"cell_type": "code",
"source": [
"# User inputs\r\n",
"# Parameters for provisioning resources\"\r\n",
"workspace_resource_id = '/subscriptions/{subscription_id}}/resourceGroups/{resource_group_name}/providers/Microsoft.OperationalInsights/workspaces/{workspace_name}'\r\n",
"# Parameters for provisioning resources\r\n",
"resource_group_name = \"\"\r\n",
"location = \"\"\r\n",
"workspace_name = ''\r\n",
"workspace_resource_id = '/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.OperationalInsights/workspaces/{2}'.format(subscription_id, resource_group_name, workspace_name)\r\n",
"dataCollection_endpoint_name = \"\"\r\n",
"dataCollection_rule_name = \"\"\r\n",
"stream_name = \"\"\r\n",
"immutable_rule_id = \"\"\r\n",
"dataCollection_rule_name_for_enrichment = \"\"\r\n",
"stream_name_for_enrichment = \"\"\r\n",
"immutable_rule_id_for_enrichment = \"\"\r\n",
"dce_endpoint = ''"
],
"outputs": [],
Expand Down Expand Up @@ -174,7 +210,7 @@
"\r\n",
"end_time = datetime.now(timezone.utc)\r\n",
"start_time = end_time - timedelta(5)\r\n",
"query = \"DynamicSummary | where TimeGenerated > ago(3d) | project TimeGenerated, MyData = SummaryStatus\"\r\n",
"query = \"YOURTABLE | where TimeGenerated > ago(3d) | project TimeGenerated, Url\"\r\n",
"query_result = la_data_client.query_workspace(\r\n",
" workspace_id=workspace_id,\r\n",
" query=query,\r\n",
Expand Down Expand Up @@ -215,7 +251,7 @@
" return response\r\n",
"\r\n",
"def get_token_for_graph():\r\n",
" resource_uri = \"https://graph.microsoft.com/ThreatIntelligence.Read.All\"\r\n",
" resource_uri = \"https://graph.microsoft.com\"\r\n",
" client_id = mssparkutils.credentials.getSecret(akv_name, client_id_name, akv_link_name)\r\n",
" client_secret = mssparkutils.credentials.getSecret(akv_name, client_secret_name, akv_link_name)\r\n",
"\r\n",
Expand Down Expand Up @@ -243,10 +279,9 @@
{
"cell_type": "code",
"source": [
"# Calling Microsoft Sentinel Watchlist API\r\n",
"# If you don't have Watchlist, you may create one, or try to access different features, such as Bookmarks.\r\n",
"# Calling MDTI API, hosts as example\r\n",
"header_token_value = \"Bearer {}\".format(get_token_for_graph())\r\n",
"response_mdti = call_mdti_api_for_read(header_token_value, \"hosts('riskiq.net')\")"
"response_mdti_host = call_mdti_api_for_read(header_token_value, \"hosts('www.microsoft.com')\")"
],
"outputs": [],
"execution_count": null,
Expand All @@ -265,9 +300,10 @@
{
"cell_type": "code",
"source": [
"if response_mdti != None:\r\n",
" print(response_mdti)\r\n",
" #df_api_data = pd.DataFrame(response_watchlist.json()[\"value\"])"
"# Data process\r\n",
"df_host = pd.json_normalize(response_mdti_host.json())\r\n",
"df_merged = pd.merge(df_la_query, df_host[['id','firstSeenDateTime','registrar']], left_on='Url', right_on='id', how=\"outer\")\r\n",
"df_final = df_merged.rename(columns = {'TimeGenerated': 'TimeGenerated', 'Url': 'Url', 'registrar': 'Fact'})[['TimeGenerated', 'Url', 'Fact']]"
],
"outputs": [],
"execution_count": null,
Expand All @@ -284,44 +320,11 @@
}
},
{
"cell_type": "code",
"cell_type": "markdown",
"source": [
"# Calling Microsoft Sentinel API for List, the same template can be used for calling other Azure REST APIs with different parameters.\r\n",
"# For different environments, such as national clouds, you may need to use different root_url, please contact with your admins.\r\n",
"# It can be ---.azure.us, ---.azure.microsoft.scloud, ---.azure.eaglex.ic.gov, etc.\r\n",
"def call_azure_rest_api_for_get_watchlist_items(token, resource_group_name, sentinel_workspace_name, resource_alias, api_version):\r\n",
" \"Calling Microsoft Sentinel REST API\"\r\n",
" headers = {\"Authorization\": token, \"content-type\":\"application/json\" }\r\n",
" provider_name = \"Microsoft.OperationalInsights\"\r\n",
" provider2_name = \"Microsoft.SecurityInsights\"\r\n",
" target_resource_name = \"watchlists\"\r\n",
" sub_target_source_name = \"watchlistItems\"\r\n",
" api_version = api_version\r\n",
" root_url = \"https://management.azure.com\"\r\n",
" arm_rest_url_template_for_list = \"{0}/subscriptions/{1}/resourceGroups/{2}/providers/{3}/workspaces/{4}/providers/{5}/{6}/{7}/{8}?api-version={9}\"\r\n",
" arm_rest_url = arm_rest_url_template_for_list.format(root_url, subscription_id, resource_group_name, provider_name, sentinel_workspace_name, provider2_name, target_resource_name, resource_alias, sub_target_source_name, api_version)\r\n",
" response = requests.get(arm_rest_url, headers=headers, verify=True)\r\n",
" return response\r\n",
"\r\n",
"def get_token_for_azure():\r\n",
" resource_uri = \"https://management.azure.com/\"\r\n",
" client_id = mssparkutils.credentials.getSecret(akv_name, client_id_name, akv_link_name)\r\n",
" client_secret = mssparkutils.credentials.getSecret(akv_name, client_secret_name, akv_link_name)\r\n",
"\r\n",
" credential = ClientSecretCredential(\r\n",
" tenant_id=tenant_id, \r\n",
" client_id=client_id, \r\n",
" client_secret=client_secret)\r\n",
" access_token = credential.get_token(resource_uri + \"/.default\")\r\n",
" return access_token[0]\r\n"
"## 3. Save result to Azure Log Analytics Custom Table"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
Expand All @@ -332,6 +335,7 @@
{
"cell_type": "code",
"source": [
"# function for data converting\r\n",
"def convert_dataframe_to_list_of_dictionaries(df, hasTimeGeneratedColumn):\r\n",
" list = df.to_dict('records')\r\n",
"\r\n",
Expand All @@ -340,7 +344,7 @@
" if hasTimeGeneratedColumn and str(row['TimeGenerated']) != \"NaT\":\r\n",
" row['TimeGenerated']= row['TimeGenerated'].strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\")\r\n",
" \r\n",
" return list\r\n"
" return list"
],
"outputs": [],
"execution_count": null,
Expand All @@ -359,10 +363,9 @@
{
"cell_type": "code",
"source": [
"# Calling Microsoft Sentinel Watchlist API\r\n",
"# If you don't have Watchlist, you may create one, or try to access different features, such as Bookmarks.\r\n",
"header_token_value = \"Bearer {}\".format(get_token_for_azure())\r\n",
"response_watchlist = call_azure_rest_api_for_get_watchlist_items(header_token_value, \"zhzhaopitest\", \"zhzhaoasi\", \"zz20220801\", \"2023-02-01\")"
"# Construct data body for LA data ingestion\r\n",
"list_final = convert_dataframe_to_list_of_dictionaries(df_final, True)\r\n",
"body = list_final"
],
"outputs": [],
"execution_count": null,
Expand All @@ -381,139 +384,11 @@
{
"cell_type": "code",
"source": [
"if response_watchlist != None:\r\n",
" df_api_data = pd.DataFrame(response_watchlist.json()[\"value\"])"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"df_union = pd.concat([df_la_query, df_api_data])\r\n",
"#display(df_union)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"final_result1 = convert_dataframe_to_list_of_dictionaries(df_la_query, True)\r\n",
"display(final_result1)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"final_result2 = convert_dataframe_to_list_of_dictionaries(df_api_data, False)\r\n",
"#display(final_result2)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"final_result = convert_dataframe_to_list_of_dictionaries(df_union, True)\r\n",
"#display(final_result)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "markdown",
"source": [
"## 3. Save result to Azure Log Analytics Custom Table"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"from azure.core.exceptions import HttpResponseError\r\n",
"from azure.identity import DefaultAzureCredential\r\n",
"from azure.monitor.ingestion import LogsIngestionClient\r\n",
"\r\n",
"body = [\r\n",
" {\r\n",
" \"TimeGenerated \": \"2023-08-04T14:51:14.1104269Z\",\r\n",
" \"MyData\": \"Computer1\",\r\n",
" },\r\n",
" {\r\n",
" \"TimeGenerated \": \"2023-08-04T14:41:14.1104269Z\",\r\n",
" \"MyData\": \"Computer2\",\r\n",
" }\r\n",
" ]\r\n",
"\r\n",
"\r\n",
"# Data ingestion to LA custom table\r\n",
"client = LogsIngestionClient(endpoint=dce_endpoint, credential=credential, logging_enable=True)\r\n",
"\r\n",
"try:\r\n",
" ingestion_result = client.upload(rule_id=immutable_rule_id, stream_name=stream_name, logs=body)\r\n",
" ingestion_result = client.upload(rule_id=immutable_rule_id_for_enrichment, stream_name=stream_name_for_enrichment, logs=body)\r\n",
"except HttpResponseError as e:\r\n",
" print(f\"Upload failed: {e}\")"
],
Expand All @@ -533,7 +408,9 @@
},
{
"cell_type": "code",
"source": [],
"source": [
"ingestion_result"
],
"outputs": [],
"execution_count": null,
"metadata": {
Expand Down
Loading

0 comments on commit 42af024

Please sign in to comment.