From 68531011572937b936d74fa073ab2f9ab9486783 Mon Sep 17 00:00:00 2001 From: Ghislain Vieilledent Date: Thu, 30 May 2024 17:31:36 +1100 Subject: [PATCH] Updating country_download to include geefcc --- forestatrisk/__init__.py | 2 +- forestatrisk/data/__init__.py | 2 +- forestatrisk/data/compute/compute_forest.py | 137 ++++++------------ forestatrisk/data/country_compute.py | 12 +- forestatrisk/data/country_download.py | 7 +- forestatrisk/data/download/download_gadm.py | 31 +--- forestatrisk/data/download/download_srtm.py | 22 +-- .../{extent_shp.py => get_vector_extent.py} | 0 8 files changed, 73 insertions(+), 140 deletions(-) rename forestatrisk/data/{extent_shp.py => get_vector_extent.py} (100%) diff --git a/forestatrisk/__init__.py b/forestatrisk/__init__.py index 99a21ff..123793a 100644 --- a/forestatrisk/__init__.py +++ b/forestatrisk/__init__.py @@ -17,7 +17,7 @@ # Local imports # Data -from .data import sample, extent_shp, get_vector_extent +from .data import sample, get_vector_extent, extent_shp # Model from .model import cellneigh, cellneigh_ctry diff --git a/forestatrisk/data/__init__.py b/forestatrisk/data/__init__.py index d37d167..8104941 100644 --- a/forestatrisk/data/__init__.py +++ b/forestatrisk/data/__init__.py @@ -3,6 +3,6 @@ from .country_compute import country_compute from .country_download import country_download from .sample import sample -from .extent_shp import extent_shp, get_vector_extent +from .get_vector_extent import get_vector_extent, extent_shp # EOF diff --git a/forestatrisk/data/compute/compute_forest.py b/forestatrisk/data/compute/compute_forest.py index af0926d..9b12daa 100644 --- a/forestatrisk/data/compute/compute_forest.py +++ b/forestatrisk/data/compute/compute_forest.py @@ -1,14 +1,13 @@ """Processing forest data.""" import subprocess -from glob import glob from osgeo import gdal from .compute_distance import compute_distance -def compute_forest(iso, proj, extent, verbose=False): +def compute_forest(proj, extent, verbose=False): """Processing forest data. :param iso: Country iso code. @@ -28,10 +27,6 @@ def compute_forest(iso, proj, extent, verbose=False): # Creation options copts = ["COMPRESS=LZW", "PREDICTOR=2", "BIGTIFF=YES"] - # Build vrt file - tif_forest_files = glob("forest_*.tif") - gdal.BuildVRT("forest.vrt", tif_forest_files, callback=cback) - # Reproject param = gdal.WarpOptions( options=["overwrite"], @@ -47,49 +42,37 @@ def compute_forest(iso, proj, extent, verbose=False): ) gdal.Warp("forest_src.tif", "forest.vrt", options=param) + # Number of bands (or years) + with gdal.Open("forest_src.tif") as ds: + nbands = ds.RasterCount + + # Index offset if band == 3 + k = 1 if nbands == 3 else 0 + # Separate bands - gdal.Translate("forest_t1_src.tif", "forest_src.tif", - maskBand=None, - bandList=[1], creationOptions=copts, - callback=cback) - gdal.Translate("forest_t2_src.tif", "forest_src.tif", - maskBand=None, - bandList=[3], creationOptions=copts, - callback=cback) - gdal.Translate("forest_t3_src.tif", "forest_src.tif", - maskBand=None, - bandList=[5], creationOptions=copts, - callback=cback) - gdal.Translate("forest_2005_src.tif", "forest_src.tif", - maskBand=None, - bandList=[2], creationOptions=copts, - callback=cback) - gdal.Translate("forest_2015_src.tif", "forest_src.tif", - maskBand=None, - bandList=[4], creationOptions=copts, - callback=cback) + for i in range(nbands): + gdal.Translate(f"forest_t{i + k}_src.tif", "forest_src.tif", + maskBand=None, + bandList=[i + 1], creationOptions=copts, + callback=cback) # Rasterize country border # (by default: zero outside, without nodata value) - gdal.Rasterize("ctry_PROJ.tif", "ctry_PROJ.shp", + gdal.Rasterize("ctry_PROJ.tif", "ctry_PROJ.gpkg", outputBounds=extent, xRes=30, yRes=30, targetAlignedPixels=True, burnValues=[1], outputType=gdal.GDT_Byte, creationOptions=copts, callback=cback) - # Compute distance to forest edge at t1 for modelling - compute_distance("forest_t1_src.tif", "dist_edge_t1.tif", - values=0, nodata=0, verbose=verbose) + # Compute distance to forest edge at t1, t2, and t3 + # for modelling, validating, and forecasting, respectively + for i in range(3): + compute_distance(f"forest_t{i + 1}_src.tif", + f"dist_edge_t{i + 1}.tif", + values=0, nodata=0, verbose=verbose) - # Compute distance to forest edge at t2 for validating - compute_distance("forest_t2_src.tif", "dist_edge_t2.tif", - values=0, nodata=0, verbose=verbose) - - # Compute distance to forest edge at t3 for forecasting - compute_distance("forest_t3_src.tif", "dist_edge_t3.tif", - values=0, nodata=0, verbose=verbose) - - # Compute fcc12_src.tif + # Compute fcc + # Command to compute fcc cmd_str = ( 'gdal_calc.py --overwrite ' '-A {0} -B {1} ' @@ -97,42 +80,28 @@ def compute_forest(iso, proj, extent, verbose=False): '--calc="255-254*(A==1)*(B==1)-255*(A==1)*(B==0)" ' '--co "COMPRESS=LZW" --co "PREDICTOR=2" --co "BIGTIFF=YES" ' '--NoDataValue=255 --quiet') - rast_a = "forest_t1_src.tif" - rast_b = "forest_t2_src.tif" - rast_out = "fcc12_src.tif" - cmd = cmd_str.format(rast_a, rast_b, rast_out) - subprocess.run(cmd, shell=True, check=True, - capture_output=False) - - # Compute distance to past deforestation at t2 for modelling - compute_distance("fcc12_src.tif", "dist_defor_t2.tif", - values=0, nodata=0, input_nodata=True, - verbose=verbose) - - # Compute fcc23_src.tif - rast_a = "forest_t2_src.tif" - rast_b = "forest_t3_src.tif" - rast_out = "fcc23_src.tif" - cmd = cmd_str.format(rast_a, rast_b, rast_out) - subprocess.run(cmd, shell=True, check=True, - capture_output=False) + # Loop on bands + for i in range(nbands - 1): + # Compute fcc{i}{i + 1}_src.tif + rast_a = f"forest_t{i + k}_src.tif" + rast_b = f"forest_t{i + 1 + k}_src.tif" + fcc_out = f"fcc{i + k}{i + 1 + k}_src.tif" + cmd = cmd_str.format(rast_a, rast_b, fcc_out) + subprocess.run(cmd, shell=True, check=True, + capture_output=False) - # Compute distance to past deforestation at t3 for forecasting - compute_distance("fcc23_src.tif", "dist_defor_t3.tif", - values=0, nodata=0, input_nodata=True, - verbose=verbose) + # Compute distance to past deforestation only if 4 bands + if nbands == 4: + for i in range(nbands - 1): + # Compute distance to past deforestation at t1, t2, and t3 + dist_out = f"dist_defor_t{i + 1}.tif" + compute_distance(fcc_out, dist_out, + values=0, nodata=0, input_nodata=True, + verbose=verbose) # Mask forest rasters with country border - rast_in = [ - "forest_t1_src.tif", "forest_t2_src.tif", - "forest_t3_src.tif", "forest_2005_src.tif", - "forest_2015_src.tif" - ] - rast_out = [ - "forest_t1.tif", "forest_t2.tif", - "forest_t3.tif", "forest_2005.tif", - "forest_2015.tif" - ] + rast_in = [f"forest_t{i + k}_src.tif" for i in range(nbands)] + rast_out = [f"forest_t{i + k}.tif" for i in range(nbands)] cmd_str = ( 'gdal_calc.py --overwrite ' '-A {0} -B ctry_PROJ.tif ' @@ -146,9 +115,9 @@ def compute_forest(iso, proj, extent, verbose=False): subprocess.run(cmd, shell=True, check=True, capture_output=False) - # Mask fcc12 and fcc23 with country border - rast_in = ["fcc12_src.tif", "fcc23_src.tif"] - rast_out = ["fcc12.tif", "fcc23.tif"] + # Mask fcc with country border + rast_in = [f"fcc{i + k}{i + k + 1}_src.tif" for i in range(nbands - 1)] + rast_out = [f"fcc{i + k}{i + k + 1}.tif" for i in range(nbands - 1)] cmd_str = ( 'gdal_calc.py --overwrite ' '-A {0} -B ctry_PROJ.tif ' @@ -164,7 +133,7 @@ def compute_forest(iso, proj, extent, verbose=False): capture_output=False) # Create raster fcc123.tif - # (0: nodata, 1: for2000, 2: for2010, 3: for2020) + # (0: nodata, 1: t1, 2: t2, 3: t3) cmd = ( 'gdal_calc.py --overwrite ' '-A forest_t1.tif -B forest_t2.tif -C forest_t3.tif ' @@ -176,20 +145,4 @@ def compute_forest(iso, proj, extent, verbose=False): subprocess.run(cmd, shell=True, check=True, capture_output=False) - # Create raster fcc12345.tif - # (0: nodata, 1: for2000, 2: for2005, - # 3: for2010, 4: for2015, 5: for2020) - cmd = ( - 'gdal_calc.py --overwrite ' - '-A forest_t1.tif -B forest_2005.tif -C forest_t2.tif ' - '-D forest_2015.tif -E forest_t3.tif ' - '--outfile=fcc12345.tif --type=Byte ' - '--calc="A+B+C+D+E" ' - '--co "COMPRESS=LZW" --co "PREDICTOR=2" --co "BIGTIFF=YES" ' - '--NoDataValue=0 --quiet' - ) - subprocess.run(cmd, shell=True, check=True, - capture_output=False) - - # End diff --git a/forestatrisk/data/country_compute.py b/forestatrisk/data/country_compute.py index 3f40b3c..704261e 100644 --- a/forestatrisk/data/country_compute.py +++ b/forestatrisk/data/country_compute.py @@ -11,7 +11,7 @@ from .compute import compute_biomass_avitabile, compute_osm from .compute import compute_srtm, compute_wdpa from .compute import compute_forest -from .extent_shp import extent_shp +from .get_vector_extent import get_vector_extent def country_compute( @@ -53,21 +53,21 @@ def country_compute( """ # Reproject GADM - ofile = os.path.join(temp_dir, "ctry_PROJ.shp") - ifile = os.path.join(temp_dir, "gadm36_" + iso3 + "_0.shp") + ofile = os.path.join(temp_dir, "ctry_PROJ.gpkg") + ifile = os.path.join(temp_dir, "gadm41_" + iso3 + "_0.gpkg") param = gdal.VectorTranslateOptions( accessMode="overwrite", srcSRS="EPSG:4326", dstSRS=proj, reproject=True, - format="ESRI Shapefile", + format="GPKG", layerCreationOptions=["ENCODING=UTF-8"], ) gdal.VectorTranslate(ofile, ifile, options=param) # Compute extent - ifile = os.path.join(temp_dir, "ctry_PROJ.shp") - extent_proj = extent_shp(ifile) + ifile = os.path.join(temp_dir, "ctry_PROJ.gpkg") + extent_proj = get_vector_extent(ifile) # Region with buffer of 5km xmin_reg = np.floor(extent_proj[0] - 5000) diff --git a/forestatrisk/data/country_download.py b/forestatrisk/data/country_download.py index 32a08eb..f756e49 100644 --- a/forestatrisk/data/country_download.py +++ b/forestatrisk/data/country_download.py @@ -1,11 +1,15 @@ """Download country geospatial data.""" +import os + import geefcc as gf from ..misc import make_dir from .download import download_gadm, download_osm from .download import download_srtm, download_wdpa +opj = os.path.join + def country_download( get_fcc_args, @@ -60,7 +64,8 @@ def country_download( # Download if gadm: - download_gadm(iso3=iso3, output_dir=output_dir) + ofile = opj(output_dir, "gadm41_" + iso3 + "_0.gpkg") + download_gadm(iso3=iso3, output_file=ofile) if srtm: download_srtm(iso3=iso3, output_dir=output_dir) if wdpa: diff --git a/forestatrisk/data/download/download_gadm.py b/forestatrisk/data/download/download_gadm.py index 7f68ad3..d0c7d80 100644 --- a/forestatrisk/data/download/download_gadm.py +++ b/forestatrisk/data/download/download_gadm.py @@ -1,13 +1,10 @@ """Download GADM data.""" import os -from zipfile import ZipFile from urllib.request import urlretrieve -from ...misc import make_dir - -def download_gadm(iso3, output_dir="."): +def download_gadm(iso3, output_file): """Download GADM data for a country. Download GADM (Global Administrative Areas) for a specific @@ -15,29 +12,17 @@ def download_gadm(iso3, output_dir="."): :param iso3: Country ISO 3166-1 alpha-3 code. - :param output_dir: Directory where data is downloaded. Default to - current working directory. + :param output_file: Path to output GPKG file. """ - # Create directory - make_dir(output_dir) - - # Check for existing data - shp_name = os.path.join(output_dir, "gadm41_" + iso3 + "_0.shp") - if os.path.isfile(shp_name) is not True: - - # Download the zipfile from gadm.org - fname = os.path.join(output_dir, iso3 + "_shp.zip") - url = ( - "https://geodata.ucdavis.edu/gadm/gadm4.1/" - "shp/gadm41_" + iso3 + "_shp.zip" - ) - urlretrieve(url, fname) + # Check for existing file + if not os.path.isfile(output_file): - # Extract files from zip - with ZipFile(fname) as file: - file.extractall(output_dir) + # Download the file from gadm.org + url = ("https://geodata.ucdavis.edu/gadm/gadm4.1/" + f"gpkg/gadm41_{iso3}.gpkg") + urlretrieve(url, output_file) # End diff --git a/forestatrisk/data/download/download_srtm.py b/forestatrisk/data/download/download_srtm.py index 70f6719..e928e38 100644 --- a/forestatrisk/data/download/download_srtm.py +++ b/forestatrisk/data/download/download_srtm.py @@ -13,8 +13,9 @@ from urllib2 import HTTPError # HTTPError with Python 2 from ...misc import make_dir -from ..extent_shp import extent_shp +from ..get_vector_extent import get_vector_extent from .tiles_srtm import tiles_srtm +from .download_gadm import download_gadm def download_srtm(iso3, output_dir="."): @@ -38,23 +39,12 @@ def download_srtm(iso3, output_dir="."): make_dir(output_dir) # Check for existing data - shp_name = os.path.join(output_dir, "gadm36_" + iso3 + "_0.shp") - if os.path.isfile(shp_name) is not True: - - # Download the zipfile from gadm.org - fname = os.path.join(output_dir, iso3 + "_shp.zip") - url = ( - "https://biogeo.ucdavis.edu/data/gadm3.6/" - "shp/gadm36_" + iso3 + "_shp.zip" - ) - urlretrieve(url, fname) - - # Extract files from zip - with ZipFile(fname) as file: - file.extractall(output_dir) + gpkg_name = os.path.join(output_dir, "gadm41_" + iso3 + "_0.gpkg") + if os.path.isfile(gpkg_name) is not True: + download_gadm(iso3, gpkg_name) # Compute extent and SRTM tiles - extent_latlong = extent_shp(shp_name) + extent_latlong = get_vector_extent(gpkg_name) tiles_long, tiles_lat = tiles_srtm(extent_latlong) tiles_long = tiles_long.split("-") tiles_lat = tiles_lat.split("-") diff --git a/forestatrisk/data/extent_shp.py b/forestatrisk/data/get_vector_extent.py similarity index 100% rename from forestatrisk/data/extent_shp.py rename to forestatrisk/data/get_vector_extent.py