From a38ff175dd338a486bc6e2d20d33f1701779c888 Mon Sep 17 00:00:00 2001 From: Tyler Erickson Date: Thu, 30 May 2024 10:35:03 -0700 Subject: [PATCH] initial forest example (~10 regions failing) --- example_forest_by_admin2.ipynb | 877 +++++++++++++++++++++++++++++++++ 1 file changed, 877 insertions(+) create mode 100644 example_forest_by_admin2.ipynb diff --git a/example_forest_by_admin2.ipynb b/example_forest_by_admin2.ipynb new file mode 100644 index 0000000..c2f1756 --- /dev/null +++ b/example_forest_by_admin2.ipynb @@ -0,0 +1,877 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "59b88b8e-295e-4dee-bc19-cc7ae7d44740", + "metadata": {}, + "source": [ + "# Example: Forested Area by Admin Level 2" + ] + }, + { + "cell_type": "markdown", + "id": "37c3de9a-1e5e-40d9-a016-784ee181e8b1", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "88a95b34-4d8e-4725-aaaf-227966e5cad1", + "metadata": {}, + "outputs": [], + "source": [ + "import ee\n", + "from earthengine_dask.core import ClusterGEE\n", + "import google.auth" + ] + }, + { + "cell_type": "markdown", + "id": "2019c415-4711-4299-a40c-4c5faafa6bf3", + "metadata": {}, + "source": [ + "## Authenticate & Initialize Earth Engine" + ] + }, + { + "cell_type": "markdown", + "id": "9026fd54-1285-4d65-a886-07115f452c8c", + "metadata": {}, + "source": [ + "Get credentials and the GCP project ID, authenticating if necessary." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7a15d6b4-2ff0-4a50-952d-4f2860de0624", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " credentials, project_id = google.auth.default()\n", + "except google.auth.exceptions.DefaultCredentialsError:\n", + " !gcloud auth application-default login\n", + " credentials, project_id = google.auth.default()\n", + "try:\n", + " ee.Initialize(credentials=credentials, project=project_id)\n", + "except google.auth.exceptions.RefreshError:\n", + " !gcloud auth application-default login\n", + " credentials, project_id = google.auth.default()\n", + "ee.Initialize(credentials=credentials, project=project_id)" + ] + }, + { + "cell_type": "markdown", + "id": "84e59c6f-4d7f-4acf-ba20-6f15a43f98c4", + "metadata": {}, + "source": [ + "# Input Data" + ] + }, + { + "cell_type": "markdown", + "id": "36265600-a446-4044-9efe-3422b5f0a21e", + "metadata": {}, + "source": [ + "## Input: Forest Baseline\n", + "\n", + "This example will use the [European Commission Joint Research Centre's 2020 global map of forest cover](https://data.jrc.ec.europa.eu/dataset/10d1b337-b7d1-4938-a048-686c8185b290) for the forest baseline. The dataset is [available in Earth Engine](https://developers.google.com/earth-engine/datasets/catalog/JRC_GFC2020_V1)." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "bc7df765-5c8b-45da-8bb6-705601e6743d", + "metadata": {}, + "outputs": [], + "source": [ + "ic = ee.ImageCollection(\"JRC/GFC2020/V1\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "f313f69c-04b5-46ca-b3b6-31458e3a4e74", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There is 1 image in the collection.\n" + ] + } + ], + "source": [ + "print(f'There is {ic.size().getInfo()} image in the collection.')" + ] + }, + { + "cell_type": "markdown", + "id": "2949b205-635e-4a48-9f33-cc8e1a8ec66f", + "metadata": {}, + "source": [ + "... which we will use as the forest baseline." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a26ba1fd-1116-49b3-a908-d9189696fd63", + "metadata": {}, + "outputs": [], + "source": [ + "forest_baseline = ic.first()" + ] + }, + { + "cell_type": "markdown", + "id": "5d39a93e-db34-407d-80a2-134308b480f2", + "metadata": {}, + "source": [ + "Looking at the projection information, the image is in decimal degrees of latitude and longitude (EPSG:4326)." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6a41140f-bb9b-4c7e-909c-b8212ef50289", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'type': 'Projection',\n", + " 'crs': 'EPSG:4326',\n", + " 'transform': [8.983152841195215e-05,\n", + " 0,\n", + " -170.00005897568744,\n", + " 0,\n", + " -8.983152841195215e-05,\n", + " 80.03737653225383]}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "proj_info = forest_baseline.projection().getInfo()\n", + "proj_info" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "97270963-97cb-4416-8fbd-e1ffb51e69af", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The nominal scale (at the equator) is 10 meters/pixel.\n" + ] + } + ], + "source": [ + "print(f'The nominal scale (at the equator) is '\n", + " f'{forest_baseline.projection().nominalScale().getInfo()} meters/pixel.')" + ] + }, + { + "cell_type": "markdown", + "id": "5c8fe28b-cd0b-4a33-aa8c-5df008e4a7ec", + "metadata": {}, + "source": [ + "## Input: Administrative Boundaries\n", + "\n", + "We will use the municipal level (ADM2) boundaries provided by the [geoBoundaries](https://www.geoboundaries.org/) global database of political administrative boundaries v6.0, which is also [available in Earth Engine](https://developers.google.com/earth-engine/datasets/catalog/WM_geoLab_geoBoundaries_600_ADM2)." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "c5073cf1-abdd-4413-a37e-99dbdd71a79f", + "metadata": {}, + "outputs": [], + "source": [ + "admin = ee.FeatureCollection(\"WM/geoLab/geoBoundaries/600/ADM2\")\n", + "# admin = admin.filter(ee.Filter.eq('shapeGroup', 'USA'))\n", + "# admin = admin.filter(ee.Filter.eq('shapeName', 'Colorado'))\n", + "# admin = admin.filter(ee.Filter.eq('shapeName', 'Boulder'))\n", + "\n", + "# roi = ee.Geometry.Polygon(\n", + "# [[[-109.01952260759319, 40.971552045695994],\n", + "# [-109.01952260759319, 37.01127149086416],\n", + "# [-101.99925893571819, 37.01127149086416],\n", + "# [-101.99925893571819, 40.971552045695994]]], None, False)\n", + "# admin = admin.filterBounds(roi)" + ] + }, + { + "cell_type": "markdown", + "id": "c34921bb-7257-4b49-b024-929e2f296427", + "metadata": {}, + "source": [ + "There are quite a few features in the collection." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "cdaa4d75-a6eb-4381-a916-9f4c88b02528", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There are 49617 features in the collection.\n" + ] + } + ], + "source": [ + "print(f'There are {admin.size().getInfo()} features in the collection.')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "53a0dda6-71fa-423d-9b21-f6ad1bb50017", + "metadata": {}, + "outputs": [], + "source": [ + "# admin.aggregate_histogram('shapeName').getInfo()" + ] + }, + { + "cell_type": "markdown", + "id": "0cdf35fe-6240-46eb-bf00-e2950b997fd8", + "metadata": {}, + "source": [ + "# Analysis" + ] + }, + { + "cell_type": "markdown", + "id": "0bcd5ea2-76ca-424f-98eb-8bd46e88ade2", + "metadata": {}, + "source": [ + "Define a function that calculates the forested area, and adds it back to the feature." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "b16d9786-ae03-4e73-bf10-6110b34c2bc0", + "metadata": {}, + "outputs": [], + "source": [ + "def get_area(img, shape_id):\n", + " fc = ee.FeatureCollection(\n", + " admin.filter(ee.Filter.eq('shapeID', shape_id))\n", + " )\n", + " \n", + " stats_sum = ee.Number(\n", + " img.multiply(ee.Image.pixelArea()).reduceRegions(\n", + " collection=fc,\n", + " reducer=ee.Reducer.sum(),\n", + " ).aggregate_array('sum').get(0)\n", + " )\n", + " \n", + " return_dict = ee.Dictionary({\n", + " 'shapeName': fc.aggregate_array('shapeName').get(0),\n", + " 'shapeGroup': fc.aggregate_array('shapeGroup').get(0),\n", + " 'shapeType': fc.aggregate_array('shapeType').get(0),\n", + " 'area_km2': stats_sum.round().multiply(1e-6),\n", + " }).getInfo()\n", + " return_dict['shape_id'] = shape_id\n", + " return return_dict" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "fdc66b67-ad59-465a-b63e-ab3317b00f4d", + "metadata": {}, + "outputs": [], + "source": [ + "# # Use for debugging to count the total area, rather than forested area\n", + "# forest_baseline = forest_baseline.unmask().multiply(0).add(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "a377039f-4792-4d12-85a1-24c8b5d58b91", + "metadata": {}, + "outputs": [], + "source": [ + "# Try it out.\n", + "# shape_ids = admin.aggregate_array('shapeID').distinct().getInfo()\n", + "# shape_ids" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "a1ef0f78-9fc1-404c-9132-0203c690799f", + "metadata": {}, + "outputs": [], + "source": [ + "# # tileScale=1\n", + "# get_area(img=forest_baseline, shape_id='42512837B26705409874577')" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "7690cfb9-2cac-447e-b235-45ad5782c413", + "metadata": {}, + "outputs": [], + "source": [ + "# # tileScale=16\n", + "# get_area(img=forest_baseline, shape_id='42512837B26705409874577')" + ] + }, + { + "cell_type": "markdown", + "id": "50373e8c-cbe0-445d-873c-164edf14bc26", + "metadata": {}, + "source": [ + "## Start Dask Cluster\n", + "\n", + "Start up a Earth Engine enabled cluster. This may take a few minutes to complete." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "6693099f-82e7-4f0e-bd2a-a4da6dbcb91a", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5423963efa914028ae0052da0455ca51", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
╭──────────────────────────────── Package Info ────────────────────────────────╮\n",
+       "│                    ╷                                                         │\n",
+       "│   Package           Note                                                    │\n",
+       "│ ╶──────────────────┼───────────────────────────────────────────────────────╴ │\n",
+       "│   earthengine-dask │ Wheel built from                                        │\n",
+       "│                    │ ~/Documents/GitHub/VorGeo/earthengine-dask              │\n",
+       "│                    ╵                                                         │\n",
+       "╰──────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "╭──────────────────────────────── \u001b[1;32mPackage Info\u001b[0m ────────────────────────────────╮\n", + "│ ╷ │\n", + "│ \u001b[1m \u001b[0m\u001b[1mPackage \u001b[0m\u001b[1m \u001b[0m│\u001b[1m \u001b[0m\u001b[1mNote \u001b[0m\u001b[1m \u001b[0m │\n", + "│ ╶──────────────────┼───────────────────────────────────────────────────────╴ │\n", + "│ earthengine-dask │ Wheel built from │\n", + "│ │ ~/Documents/GitHub/VorGeo/earthengine-dask │\n", + "│ ╵ │\n", + "╰──────────────────────────────────────────────────────────────────────────────╯\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "df7992996b414d3d80035cbd86e6e38e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Google Application Default Credentials have been written to a file on your Coiled VM(s).\n",
+      "These credentials will potentially be valid until explicitly revoked by running\n",
+      "gcloud auth application-default revoke\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:coiled.credentials.google:Could not get token from client GCP session. This is not a concern unless you're planning to use forwarded GCP credentials on your cluster. The error was: Reauthentication is needed. Please run `gcloud auth application-default login` to reauthenticate.\n"
+     ]
+    }
+   ],
+   "source": [
+    "cluster = ClusterGEE(\n",
+    "    name='test-cluster-forest-by-admin-temp',\n",
+    "    n_workers=3,\n",
+    "    worker_cpu=8,\n",
+    "    # spot_policy=\"spot_with_fallback\",\n",
+    "    region='us-west1',\n",
+    "    idle_timeout=\"4 hours\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9a2c7acb-0564-4e3b-90ab-0e56134ab8cb",
+   "metadata": {},
+   "source": [
+    "Retrieve a client for the cluster, and display it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "02bb4842-09e3-4fb0-8899-97514ce283ca",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "
\n", + "
\n", + "
\n", + "

Client

\n", + "

Client-a74fd23a-1e10-11ef-89ea-fe11494405b6

\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
Connection method: Cluster objectCluster type: earthengine_dask.ClusterGEE
\n", + " Dashboard: https://cluster-tycwu.dask.host/EkZZTbw0iKRTOHKq/status\n", + "
\n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "

Cluster Info

\n", + "
\n", + "
\n", + "
\n", + "
\n", + "

ClusterGEE

\n", + "

test-cluster-forest-by-admin-temp

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " Dashboard: https://cluster-tycwu.dask.host/EkZZTbw0iKRTOHKq/status\n", + " \n", + " Workers: 3\n", + "
\n", + " Total threads: 24\n", + " \n", + " Total memory: 91.77 GiB\n", + "
\n", + "\n", + "
\n", + " \n", + "

Scheduler Info

\n", + "
\n", + "\n", + "
\n", + "
\n", + "
\n", + "
\n", + "

Scheduler

\n", + "

Scheduler-fd0096a5-374b-4485-952d-b277dbbf7eeb

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " Comm: tls://10.1.0.9:8786\n", + " \n", + " Workers: 3\n", + "
\n", + " Dashboard: http://10.1.0.9:8787/status\n", + " \n", + " Total threads: 24\n", + "
\n", + " Started: Just now\n", + " \n", + " Total memory: 91.77 GiB\n", + "
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "

Workers

\n", + "
\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "

Worker: test-cluster-forest-by-admin-temp-worker-0c0f656e4f

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + "
\n", + " Comm: tls://10.1.0.8:35395\n", + " \n", + " Total threads: 8\n", + "
\n", + " Dashboard: http://10.1.0.8:8787/status\n", + " \n", + " Memory: 30.59 GiB\n", + "
\n", + " Nanny: tls://10.1.0.8:32977\n", + "
\n", + " Local directory: /scratch/dask-scratch-space/worker-7k1yritw\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "

Worker: test-cluster-forest-by-admin-temp-worker-771328c5c9

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + "
\n", + " Comm: tls://10.1.0.7:45921\n", + " \n", + " Total threads: 8\n", + "
\n", + " Dashboard: http://10.1.0.7:8787/status\n", + " \n", + " Memory: 30.59 GiB\n", + "
\n", + " Nanny: tls://10.1.0.7:37941\n", + "
\n", + " Local directory: /scratch/dask-scratch-space/worker-jru6dk8k\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "

Worker: test-cluster-forest-by-admin-temp-worker-7ca04f3446

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + "
\n", + " Comm: tls://10.1.0.5:37289\n", + " \n", + " Total threads: 8\n", + "
\n", + " Dashboard: http://10.1.0.5:8787/status\n", + " \n", + " Memory: 30.59 GiB\n", + "
\n", + " Nanny: tls://10.1.0.5:35463\n", + "
\n", + " Local directory: /scratch/dask-scratch-space/worker-o8otlm70\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "\n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-29 21:29:36,801 - distributed.client - ERROR - Failed to reconnect to scheduler after 30.00 seconds, closing client\n" + ] + } + ], + "source": [ + "client = cluster.get_client()\n", + "client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "457b3b8c-cd92-4215-99e7-86fcdb1258a4", + "metadata": {}, + "outputs": [], + "source": [ + "#shape_ids = admin.aggregate_array('shapeID').distinct().getInfo()\n", + "# shape_ids = ['42512837B26705409874577']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "248653a8-60fb-4b6c-87ec-054d1ec548c7", + "metadata": {}, + "outputs": [], + "source": [ + "# Create and submit jobs among the workers.\n", + "# Allow for retries to handle \"Too many concurrent aggregations.\" errors\n", + "submitted_jobs = [\n", + " {\n", + " 'shape_id': shape_id, \n", + " 'area':client.submit(\n", + " get_area, forest_baseline, shape_id, \n", + " retries=2\n", + " )\n", + " }\n", + " for shape_id in shape_ids\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "72089446-90c5-4abd-8cc6-95cb9ad47dd9", + "metadata": {}, + "outputs": [], + "source": [ + "## Debug issue with large regions by trying a problematic shape_id\n", + "# submitted_jobs = [\n", + "# {\n", + "# 'shape_id': '42512837B26705409874577',\n", + "# 'tile_scale': tile_scale,\n", + "# 'area':client.submit(\n", + "# get_area, forest_baseline, '42512837B26705409874577',\n", + "# retries=1\n", + "# )\n", + "# }\n", + "# ]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9a3d6ed-23b6-4b4c-89a9-f5568380c00e", + "metadata": {}, + "outputs": [], + "source": [ + "submitted_jobs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e411a64c-fbcf-4549-a6e1-f7b4a4c4d8a0", + "metadata": {}, + "outputs": [], + "source": [ + "results = client.gather(submitted_jobs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eac249a2-b763-414e-af35-063f916ca184", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "pd.DataFrame(client.gather(submitted_jobs[:10]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c68ab0e-b2cb-476e-a07f-865227f90e19", + "metadata": {}, + "outputs": [], + "source": [ + "for job in submitted_jobs:\n", + " if job['area'].status in ['error']:\n", + " future = job['area']\n", + " print(future.exception())\n", + " print(job['tile_scale'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf051a21-9c79-4a40-a607-050fd46e1513", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}