From 0c3dd98a9febed2418fef47553041532f92bf07d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 3 Oct 2023 15:24:11 +0000 Subject: [PATCH 01/40] Bump aws-actions/amazon-ecr-login from 1 to 2 Bumps [aws-actions/amazon-ecr-login](https://github.com/aws-actions/amazon-ecr-login) from 1 to 2. - [Release notes](https://github.com/aws-actions/amazon-ecr-login/releases) - [Changelog](https://github.com/aws-actions/amazon-ecr-login/blob/main/CHANGELOG.md) - [Commits](https://github.com/aws-actions/amazon-ecr-login/compare/v1...v2) --- updated-dependencies: - dependency-name: aws-actions/amazon-ecr-login dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/test-and-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-and-build.yml b/.github/workflows/test-and-build.yml index df20bb54..87b7d5c6 100644 --- a/.github/workflows/test-and-build.yml +++ b/.github/workflows/test-and-build.yml @@ -54,7 +54,7 @@ jobs: aws-region: ${{ env.AWS_REGION }} - name: Login to Amazon ECR - uses: aws-actions/amazon-ecr-login@v1 + uses: aws-actions/amazon-ecr-login@v2 - name: Fetch GAMMA run: | From 4415430d0521b4bfcbcec7f1a035d94c390a6ed8 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 30 Oct 2023 15:39:56 -0500 Subject: [PATCH 02/40] reproject into the correct coordinate system --- hyp3_gamma/water_mask.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index e29438bf..14e1e905 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -45,7 +45,7 @@ def create_water_mask(input_tif: str, output_tif: str): mask = geopandas.read_file(mask_location, mask=extent) with TemporaryDirectory() as temp_shapefile: - mask.to_file(temp_shapefile, driver='ESRI Shapefile') + mask.to_file(temp_shapefile, crs='EPSG:32632', driver='ESRI Shapefile') with GDALConfigManager(OGR_ENABLE_PARTIAL_REPROJECTION='YES'): gdal.Rasterize(dst_ds, temp_shapefile, allTouched=True, burnValues=[1]) From 483a534a754ff6f4edbdfba578606257e28004ba Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Mon, 30 Oct 2023 21:41:13 -0800 Subject: [PATCH 03/40] need libspatialindex-dev and rtree library to make the geopandas.clip work, modify the create_water_mask function to corrrect wrong mask shapefile and make the gdal.rasteize work --- Dockerfile | 2 +- environment.yml | 1 + hyp3_gamma/water_mask.py | 16 +++++++++++++--- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index c7222e72..fb3d9bf0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -33,7 +33,7 @@ RUN apt update \ python-is-python3 \ python3-numpy python3-matplotlib python3-scipy python3-shapely python3-packaging \ # Additional installs - python3-pip wget git vim \ + python3-pip wget git vim libspatialindex-dev \ && apt clean \ && rm -rf /var/lib/apt/lists/* diff --git a/environment.yml b/environment.yml index fc334c40..f809f44d 100644 --- a/environment.yml +++ b/environment.yml @@ -29,3 +29,4 @@ dependencies: - pip: - lxml==4.8.0 - hyp3lib==1.7.0 + - rtree diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index 14e1e905..cb0f9b3c 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -4,6 +4,7 @@ from tempfile import TemporaryDirectory import geopandas +from shapely.geometry import Polygon from osgeo import gdal from hyp3_gamma.util import GDALConfigManager @@ -34,18 +35,27 @@ def create_water_mask(input_tif: str, output_tif: str): mask_location = '/vsicurl/https://asf-dem-west.s3.amazonaws.com/WATER_MASK/GSHHG/hyp3_water_mask_20220912.shp' src_ds = gdal.Open(input_tif) - dst_ds = gdal.GetDriverByName('GTiff').Create(output_tif, src_ds.RasterXSize, src_ds.RasterYSize, 1, gdal.GDT_Byte) dst_ds.SetGeoTransform(src_ds.GetGeoTransform()) dst_ds.SetProjection(src_ds.GetProjection()) dst_ds.SetMetadataItem('AREA_OR_POINT', src_ds.GetMetadataItem('AREA_OR_POINT')) + # need assign values for the band, otherwise, the gdal.Rasterize does not work correctly. + data = dst_ds.ReadAsArray() + data[:, :] = 0 + dst_ds.GetRasterBand(1).WriteArray(data) + extent = gdal.Info(input_tif, format='json')['wgs84Extent'] extent = split_geometry_on_antimeridian(extent) - mask = geopandas.read_file(mask_location, mask=extent) + poly = Polygon(extent['coordinates'][0]) + mask = (geopandas.read_file(mask_location)).clip(poly) # very slow, but it produces correct water_mask.tif + + # mask = geopandas.read_file(mask_location, mask=extent), + # This mask has a wrong extent, can not be used to produce water_mask.tif + with TemporaryDirectory() as temp_shapefile: - mask.to_file(temp_shapefile, crs='EPSG:32632', driver='ESRI Shapefile') + mask.to_file(temp_shapefile, driver='ESRI Shapefile') with GDALConfigManager(OGR_ENABLE_PARTIAL_REPROJECTION='YES'): gdal.Rasterize(dst_ds, temp_shapefile, allTouched=True, burnValues=[1]) From 5001bc225dca4996fd60a637dd0d50691579f4dc Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Mon, 30 Oct 2023 21:45:42 -0800 Subject: [PATCH 04/40] modify the code style --- hyp3_gamma/water_mask.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index cb0f9b3c..fbf5534c 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -4,8 +4,8 @@ from tempfile import TemporaryDirectory import geopandas -from shapely.geometry import Polygon from osgeo import gdal +from shapely.geometry import Polygon from hyp3_gamma.util import GDALConfigManager From 83b80bd616581f3d760a50f1071e39216a730b66 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 31 Oct 2023 12:48:09 -0500 Subject: [PATCH 05/40] added hyp3-isce2's water masking improvements --- hyp3_gamma/water_mask.py | 105 +++++++++++++++++++++++++++++---------- 1 file changed, 78 insertions(+), 27 deletions(-) diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index fbf5534c..023be038 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -1,17 +1,42 @@ """Create and apply a water body mask""" import json import subprocess +from pathlib import Path from tempfile import TemporaryDirectory -import geopandas +import geopandas as gpd +import numpy as np +import pandas as pd +import s3fs +import shapely from osgeo import gdal -from shapely.geometry import Polygon +from shapely import geometry, wkt -from hyp3_gamma.util import GDALConfigManager +from hyp3_isce2.utils import GDALConfigManager gdal.UseExceptions() +def get_geo_partition(coordinate, partition_size=90): + """Get the geo-partition for a given coordinate (i.e., the lat/lon box it falls in) + + Args: + coordinate: A coordinate tuple (lon, lat) + partition_size: The partition size (in degrees) to use for the geo-partition + using a value of 90 will result in geo-partitions of 90x90 degrees + + Returns: + A string representing the geo-partition for the given coordinate and partition size + """ + x, y = coordinate + x_rounded = int(np.floor(x / partition_size)) * partition_size + y_rounded = int(np.floor(y / partition_size)) * partition_size + x_fill = str(x_rounded).zfill(4) + y_fill = str(y_rounded).zfill(4) + partition = f'{y_fill}_{x_fill}' + return partition + + def split_geometry_on_antimeridian(geometry: dict): geometry_as_bytes = json.dumps(geometry).encode() cmd = ['ogr2ogr', '-wrapdateline', '-datelineoffset', '20', '-f', 'GeoJSON', '/vsistdout/', '/vsistdin/'] @@ -19,7 +44,32 @@ def split_geometry_on_antimeridian(geometry: dict): return json.loads(geojson_str)['features'][0]['geometry'] -def create_water_mask(input_tif: str, output_tif: str): +def get_water_mask_gdf(extent: geometry.Polygon) -> gpd.GeoDataFrame: + """Get a GeoDataFrame of the water mask for a given extent + + Args: + extent: The extent to get the water mask for + + Returns: + GeoDataFrame of the water mask for the given extent + """ + mask_location = 'asf-dem-west/WATER_MASK/GSHHG/hyp3_water_mask_20220912' + corrected_extent = split_geometry_on_antimeridian(json.loads(shapely.to_geojson(extent))) + + filters = list(set([('lat_lon', '=', get_geo_partition(coord)) for coord in extent.exterior.coords])) + s3_fs = s3fs.S3FileSystem(anon=True, default_block_size=5 * (2**20)) + + # TODO the conversion from pd -> gpd can be removed when gpd adds the filter param for read_parquet + df = pd.read_parquet(mask_location, filesystem=s3_fs, filters=filters) + df['geometry'] = df['geometry'].apply(wkt.loads) + df['lat_lon'] = df['lat_lon'].astype(str) + gdf = gpd.GeoDataFrame(df, crs='EPSG:4326') + + mask = gpd.clip(gdf, geometry.shape(corrected_extent)) + return mask + + +def create_water_mask(input_image: str, output_image: str, gdal_format='GTiff'): """Create a water mask GeoTIFF with the same geometry as a given input GeoTIFF The water mask is assembled from GSHHG v2.3.7 Levels 1, 2, 3, and 5 at full resolution. To learn more, visit @@ -29,34 +79,35 @@ def create_water_mask(input_tif: str, output_tif: str): land in the pixel. Args: - input_tif: Path for the input GeoTIFF - output_tif: Path for the output GeoTIFF + input_imge: Path for the input GDAL-compatible image + output_image: Path for the output image + gdal_format: GDAL format name to create output image as """ - mask_location = '/vsicurl/https://asf-dem-west.s3.amazonaws.com/WATER_MASK/GSHHG/hyp3_water_mask_20220912.shp' - - src_ds = gdal.Open(input_tif) - dst_ds = gdal.GetDriverByName('GTiff').Create(output_tif, src_ds.RasterXSize, src_ds.RasterYSize, 1, gdal.GDT_Byte) + src_ds = gdal.Open(input_image) + + driver_options = [] + if gdal_format == 'GTiff': + driver_options = ['COMPRESS=LZW', 'TILED=YES', 'NUM_THREADS=ALL_CPUS'] + + dst_ds = gdal.GetDriverByName(gdal_format).Create( + output_image, + src_ds.RasterXSize, + src_ds.RasterYSize, + 1, + gdal.GDT_Byte, + driver_options, + ) dst_ds.SetGeoTransform(src_ds.GetGeoTransform()) dst_ds.SetProjection(src_ds.GetProjection()) dst_ds.SetMetadataItem('AREA_OR_POINT', src_ds.GetMetadataItem('AREA_OR_POINT')) - # need assign values for the band, otherwise, the gdal.Rasterize does not work correctly. - data = dst_ds.ReadAsArray() - data[:, :] = 0 - dst_ds.GetRasterBand(1).WriteArray(data) - - extent = gdal.Info(input_tif, format='json')['wgs84Extent'] - extent = split_geometry_on_antimeridian(extent) - - poly = Polygon(extent['coordinates'][0]) - mask = (geopandas.read_file(mask_location)).clip(poly) # very slow, but it produces correct water_mask.tif - - # mask = geopandas.read_file(mask_location, mask=extent), - # This mask has a wrong extent, can not be used to produce water_mask.tif + extent = gdal.Info(input_image, format='json')['wgs84Extent'] + mask = get_water_mask_gdf(geometry.shape(extent)) - with TemporaryDirectory() as temp_shapefile: - mask.to_file(temp_shapefile, driver='ESRI Shapefile') + with TemporaryDirectory() as temp_dir: + temp_file = str(Path(temp_dir) / 'mask.shp') + mask.to_file(temp_file, driver='ESRI Shapefile') with GDALConfigManager(OGR_ENABLE_PARTIAL_REPROJECTION='YES'): - gdal.Rasterize(dst_ds, temp_shapefile, allTouched=True, burnValues=[1]) + gdal.Rasterize(dst_ds, temp_file, allTouched=True, burnValues=[1]) - del src_ds, dst_ds + del src_ds, dst_ds \ No newline at end of file From ce2c1685d71f1988a1518219d90f366718cb3f0e Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Tue, 31 Oct 2023 10:31:14 -0800 Subject: [PATCH 06/40] read parquet file, add needed libraries in the environmenmt file --- Dockerfile | 2 +- environment.yml | 3 ++- hyp3_gamma/water_mask.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index fb3d9bf0..c7222e72 100644 --- a/Dockerfile +++ b/Dockerfile @@ -33,7 +33,7 @@ RUN apt update \ python-is-python3 \ python3-numpy python3-matplotlib python3-scipy python3-shapely python3-packaging \ # Additional installs - python3-pip wget git vim libspatialindex-dev \ + python3-pip wget git vim \ && apt clean \ && rm -rf /var/lib/apt/lists/* diff --git a/environment.yml b/environment.yml index f809f44d..b7161819 100644 --- a/environment.yml +++ b/environment.yml @@ -26,7 +26,8 @@ dependencies: # Addresses https://github.com/ASFHyP3/hyp3-gamma/issues/421 # - hyp3lib>=1.7.0,<2 # - lxml + - s3fs - pip: - lxml==4.8.0 - hyp3lib==1.7.0 - - rtree + - pyarrow diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index 023be038..e9ea9ec3 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -12,7 +12,7 @@ from osgeo import gdal from shapely import geometry, wkt -from hyp3_isce2.utils import GDALConfigManager +from hyp3_gamma.util import GDALConfigManager gdal.UseExceptions() From a0029e581f8c1ec76d1d01d0466e39c3650ba297 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 31 Oct 2023 13:36:29 -0500 Subject: [PATCH 07/40] added newline --- hyp3_gamma/water_mask.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index e9ea9ec3..9c3f87e1 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -110,4 +110,4 @@ def create_water_mask(input_image: str, output_image: str, gdal_format='GTiff'): with GDALConfigManager(OGR_ENABLE_PARTIAL_REPROJECTION='YES'): gdal.Rasterize(dst_ds, temp_file, allTouched=True, burnValues=[1]) - del src_ds, dst_ds \ No newline at end of file + del src_ds, dst_ds From 0baea5af509f0ea9e80d5902fa2417c687946d42 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 31 Oct 2023 13:36:50 -0500 Subject: [PATCH 08/40] updated changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4939c836..956da27a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [PEP 440](https://www.python.org/dev/peps/pep-0440/) and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [6.4.1] + +### Fixed +- A bug where the water mask would be significantly incorrect over some regions in Europe. + ## [6.4.0] ### Added From 1dace922e697bf9cca7882610b39e9ac506c3aae Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 31 Oct 2023 14:24:19 -0500 Subject: [PATCH 09/40] add s3fs to requirements --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 9311d076..1f0263bf 100644 --- a/setup.py +++ b/setup.py @@ -41,6 +41,7 @@ 'numpy>=1.21,<1.22', 'pillow', 'python-dateutil', + 's3fs' ], extras_require={ From d4b74c7270eef4a589f995cf612dd8e22722cdcf Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 31 Oct 2023 14:30:34 -0500 Subject: [PATCH 10/40] add pyarrow to requirements --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1f0263bf..9f24653a 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,8 @@ 'numpy>=1.21,<1.22', 'pillow', 'python-dateutil', - 's3fs' + 's3fs', + 'pyarrow' ], extras_require={ From 4d5427de5578b6643d1d192ae86c221ba478194c Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Tue, 31 Oct 2023 16:30:15 -0800 Subject: [PATCH 11/40] add shapely>=2.0 in the setup.py and environment.yml --- environment.yml | 1 + setup.py | 1 + 2 files changed, 2 insertions(+) diff --git a/environment.yml b/environment.yml index b7161819..88a0cf45 100644 --- a/environment.yml +++ b/environment.yml @@ -20,6 +20,7 @@ dependencies: - gdal>=3.4,<3.5 - geopandas - jinja2 + - shapely>=2.0 - numpy>=1.21,<1.22 - pillow - python-dateutil diff --git a/setup.py b/setup.py index 9f24653a..45e509b2 100644 --- a/setup.py +++ b/setup.py @@ -37,6 +37,7 @@ 'geopandas', 'hyp3lib>=1.7.0,<2', 'jinja2', + 'shapely>=2.0', 'lxml', 'numpy>=1.21,<1.22', 'pillow', From 7bd14d63087d2561d796a4f362b2a7a5e49fe118 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Wed, 1 Nov 2023 09:05:23 -0800 Subject: [PATCH 12/40] downgrade shapely to v 1.8.0, in order to keep its compibible to GAMMA version --- environment.yml | 1 - hyp3_gamma/water_mask.py | 9 +++++---- setup.py | 1 - 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/environment.yml b/environment.yml index 88a0cf45..b7161819 100644 --- a/environment.yml +++ b/environment.yml @@ -20,7 +20,6 @@ dependencies: - gdal>=3.4,<3.5 - geopandas - jinja2 - - shapely>=2.0 - numpy>=1.21,<1.22 - pillow - python-dateutil diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index 9c3f87e1..c538dd17 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -44,7 +44,7 @@ def split_geometry_on_antimeridian(geometry: dict): return json.loads(geojson_str)['features'][0]['geometry'] -def get_water_mask_gdf(extent: geometry.Polygon) -> gpd.GeoDataFrame: +def get_water_mask_gdf(extent: dict) -> gpd.GeoDataFrame: """Get a GeoDataFrame of the water mask for a given extent Args: @@ -54,9 +54,10 @@ def get_water_mask_gdf(extent: geometry.Polygon) -> gpd.GeoDataFrame: GeoDataFrame of the water mask for the given extent """ mask_location = 'asf-dem-west/WATER_MASK/GSHHG/hyp3_water_mask_20220912' - corrected_extent = split_geometry_on_antimeridian(json.loads(shapely.to_geojson(extent))) + # extent_poly = geometry.shape(extent) + corrected_extent = split_geometry_on_antimeridian(extent) - filters = list(set([('lat_lon', '=', get_geo_partition(coord)) for coord in extent.exterior.coords])) + filters = list(set([('lat_lon', '=', get_geo_partition(coord)) for coord in geometry.shape(extent).exterior.coords])) s3_fs = s3fs.S3FileSystem(anon=True, default_block_size=5 * (2**20)) # TODO the conversion from pd -> gpd can be removed when gpd adds the filter param for read_parquet @@ -102,7 +103,7 @@ def create_water_mask(input_image: str, output_image: str, gdal_format='GTiff'): dst_ds.SetMetadataItem('AREA_OR_POINT', src_ds.GetMetadataItem('AREA_OR_POINT')) extent = gdal.Info(input_image, format='json')['wgs84Extent'] - mask = get_water_mask_gdf(geometry.shape(extent)) + mask = get_water_mask_gdf(extent) with TemporaryDirectory() as temp_dir: temp_file = str(Path(temp_dir) / 'mask.shp') diff --git a/setup.py b/setup.py index 45e509b2..9f24653a 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,6 @@ 'geopandas', 'hyp3lib>=1.7.0,<2', 'jinja2', - 'shapely>=2.0', 'lxml', 'numpy>=1.21,<1.22', 'pillow', From ca3646077128184e9d802229c5c21ed608d44d70 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Wed, 1 Nov 2023 09:29:56 -0800 Subject: [PATCH 13/40] remove import shapely statement in water_mask.py --- hyp3_gamma/water_mask.py | 1 - 1 file changed, 1 deletion(-) diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index c538dd17..dfbd904a 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -8,7 +8,6 @@ import numpy as np import pandas as pd import s3fs -import shapely from osgeo import gdal from shapely import geometry, wkt From 0760fc0c24f5e666e59ab9b4c33e99a9d0958594 Mon Sep 17 00:00:00 2001 From: cirrusasf <62269400+cirrusasf@users.noreply.github.com> Date: Wed, 1 Nov 2023 09:34:39 -0800 Subject: [PATCH 14/40] Update hyp3_gamma/water_mask.py Co-authored-by: Andrew Player --- hyp3_gamma/water_mask.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index dfbd904a..c4b118de 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -56,7 +56,8 @@ def get_water_mask_gdf(extent: dict) -> gpd.GeoDataFrame: # extent_poly = geometry.shape(extent) corrected_extent = split_geometry_on_antimeridian(extent) - filters = list(set([('lat_lon', '=', get_geo_partition(coord)) for coord in geometry.shape(extent).exterior.coords])) + shape_coords = geometry.shape(extent).exterior.coords + filters = list(set([('lat_lon', '=', get_geo_partition(coord)) for coord in shape_coords])) s3_fs = s3fs.S3FileSystem(anon=True, default_block_size=5 * (2**20)) # TODO the conversion from pd -> gpd can be removed when gpd adds the filter param for read_parquet From 7273723cbbc2e6a45c696e997f364a75ee2e4fb6 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Wed, 1 Nov 2023 12:04:55 -0800 Subject: [PATCH 15/40] need to add libspatialindex-dev and rtree for making the geopandas.clip() work --- Dockerfile | 2 +- environment.yml | 3 ++- setup.py | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index c7222e72..fb3d9bf0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -33,7 +33,7 @@ RUN apt update \ python-is-python3 \ python3-numpy python3-matplotlib python3-scipy python3-shapely python3-packaging \ # Additional installs - python3-pip wget git vim \ + python3-pip wget git vim libspatialindex-dev \ && apt clean \ && rm -rf /var/lib/apt/lists/* diff --git a/environment.yml b/environment.yml index b7161819..2e888b3b 100644 --- a/environment.yml +++ b/environment.yml @@ -30,4 +30,5 @@ dependencies: - pip: - lxml==4.8.0 - hyp3lib==1.7.0 - - pyarrow + - rtree + - pyarrow \ No newline at end of file diff --git a/setup.py b/setup.py index 9f24653a..4b541ac0 100644 --- a/setup.py +++ b/setup.py @@ -42,6 +42,7 @@ 'pillow', 'python-dateutil', 's3fs', + 'rtree', 'pyarrow' ], From 54dad01bd6e81b4a4339cced28dfdb9638f29191 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Wed, 1 Nov 2023 14:09:22 -0800 Subject: [PATCH 16/40] modify environment.yml to install rtree and pyarrow with conda --- environment.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/environment.yml b/environment.yml index 2e888b3b..627cf298 100644 --- a/environment.yml +++ b/environment.yml @@ -27,8 +27,8 @@ dependencies: # - hyp3lib>=1.7.0,<2 # - lxml - s3fs + - rtree + - pyarrow - pip: - lxml==4.8.0 - - hyp3lib==1.7.0 - - rtree - - pyarrow \ No newline at end of file + - hyp3lib==1.7.0 \ No newline at end of file From 27dd145e2d40ba58032bd4c5ed9ba7899636c939 Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Wed, 1 Nov 2023 17:48:41 -0800 Subject: [PATCH 17/40] Update environment.yml --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 627cf298..31cb5ad8 100644 --- a/environment.yml +++ b/environment.yml @@ -31,4 +31,4 @@ dependencies: - pyarrow - pip: - lxml==4.8.0 - - hyp3lib==1.7.0 \ No newline at end of file + - hyp3lib==1.7.0 From fcbf4900f47c8317365405134d5820075f75641c Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Thu, 2 Nov 2023 11:51:32 -0800 Subject: [PATCH 18/40] back to the previous way of reasding the mask with geopandas.read_file, add clip with extent --- environment.yml | 2 +- hyp3_gamma/water_mask.py | 7 ++++++- setup.py | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/environment.yml b/environment.yml index 31cb5ad8..4bc0eb2e 100644 --- a/environment.yml +++ b/environment.yml @@ -27,7 +27,7 @@ dependencies: # - hyp3lib>=1.7.0,<2 # - lxml - s3fs - - rtree + - pyogrio - pyarrow - pip: - lxml==4.8.0 diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index c4b118de..e93741ae 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -103,7 +103,12 @@ def create_water_mask(input_image: str, output_image: str, gdal_format='GTiff'): dst_ds.SetMetadataItem('AREA_OR_POINT', src_ds.GetMetadataItem('AREA_OR_POINT')) extent = gdal.Info(input_image, format='json')['wgs84Extent'] - mask = get_water_mask_gdf(extent) + extent = split_geometry_on_antimeridian(extent) + # mask = get_water_mask_gdf(extent) + + mask_location = '/vsicurl/https://asf-dem-west.s3.amazonaws.com/WATER_MASK/GSHHG/hyp3_water_mask_20220912.shp' + mask = geopandas.read_file(mask_location, mask=extent, engine="pyogrio") + mask = mask.clip(gdf, geometry.shape(extent)) with TemporaryDirectory() as temp_dir: temp_file = str(Path(temp_dir) / 'mask.shp') diff --git a/setup.py b/setup.py index 4b541ac0..56a71642 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ 'pillow', 'python-dateutil', 's3fs', - 'rtree', + 'pyogrio', 'pyarrow' ], From 7afee62bebf3697c27c21942d6697d9f19e5001f Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Thu, 2 Nov 2023 11:55:53 -0800 Subject: [PATCH 19/40] keep rtree library to make geopandas.clip works --- Dockerfile | 2 +- environment.yml | 1 + setup.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index fb3d9bf0..c7222e72 100644 --- a/Dockerfile +++ b/Dockerfile @@ -33,7 +33,7 @@ RUN apt update \ python-is-python3 \ python3-numpy python3-matplotlib python3-scipy python3-shapely python3-packaging \ # Additional installs - python3-pip wget git vim libspatialindex-dev \ + python3-pip wget git vim \ && apt clean \ && rm -rf /var/lib/apt/lists/* diff --git a/environment.yml b/environment.yml index 4bc0eb2e..5102749c 100644 --- a/environment.yml +++ b/environment.yml @@ -27,6 +27,7 @@ dependencies: # - hyp3lib>=1.7.0,<2 # - lxml - s3fs + - rtree - pyogrio - pyarrow - pip: diff --git a/setup.py b/setup.py index 56a71642..2c29bd08 100644 --- a/setup.py +++ b/setup.py @@ -42,6 +42,7 @@ 'pillow', 'python-dateutil', 's3fs', + 'rtree', 'pyogrio', 'pyarrow' ], From 8f65479be2c3f0712b6fed902e425e80adb2bead Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Thu, 2 Nov 2023 15:18:18 -0800 Subject: [PATCH 20/40] add geopandasa.clip() to clip mask to the extent --- environment.yml | 1 - hyp3_gamma/water_mask.py | 6 +++--- setup.py | 1 - 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/environment.yml b/environment.yml index 5102749c..31cb5ad8 100644 --- a/environment.yml +++ b/environment.yml @@ -28,7 +28,6 @@ dependencies: # - lxml - s3fs - rtree - - pyogrio - pyarrow - pip: - lxml==4.8.0 diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index e93741ae..813d707e 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -103,12 +103,12 @@ def create_water_mask(input_image: str, output_image: str, gdal_format='GTiff'): dst_ds.SetMetadataItem('AREA_OR_POINT', src_ds.GetMetadataItem('AREA_OR_POINT')) extent = gdal.Info(input_image, format='json')['wgs84Extent'] - extent = split_geometry_on_antimeridian(extent) + corrected_extent = split_geometry_on_antimeridian(extent) # mask = get_water_mask_gdf(extent) mask_location = '/vsicurl/https://asf-dem-west.s3.amazonaws.com/WATER_MASK/GSHHG/hyp3_water_mask_20220912.shp' - mask = geopandas.read_file(mask_location, mask=extent, engine="pyogrio") - mask = mask.clip(gdf, geometry.shape(extent)) + mask = gpd.read_file(mask_location, mask=corrected_extent) + mask = mask.clip(mask, geometry.shape(corrected_extent)) with TemporaryDirectory() as temp_dir: temp_file = str(Path(temp_dir) / 'mask.shp') diff --git a/setup.py b/setup.py index 2c29bd08..4b541ac0 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,6 @@ 'python-dateutil', 's3fs', 'rtree', - 'pyogrio', 'pyarrow' ], From 9fdc17d3b5c308bb357f5b6bf121b6fb92e26018 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Fri, 3 Nov 2023 16:04:42 -0800 Subject: [PATCH 21/40] solve the partition issue when read the parquet file and filter format, solve the antimeridian issue --- hyp3_gamma/water_mask.py | 55 ++++++++++++++++++++++++++++++++-------- 1 file changed, 45 insertions(+), 10 deletions(-) diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index 813d707e..01af5176 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -28,7 +28,9 @@ def get_geo_partition(coordinate, partition_size=90): A string representing the geo-partition for the given coordinate and partition size """ x, y = coordinate - x_rounded = int(np.floor(x / partition_size)) * partition_size + x_rounded = 0 if int(np.floor(x / partition_size)) * partition_size == 180 \ + else int(np.floor(x / partition_size)) * partition_size + y_rounded = int(np.floor(y / partition_size)) * partition_size x_fill = str(x_rounded).zfill(4) y_fill = str(y_rounded).zfill(4) @@ -43,6 +45,37 @@ def split_geometry_on_antimeridian(geometry: dict): return json.loads(geojson_str)['features'][0]['geometry'] +def poly_from_box(box): + ''' + create a polygon with box + Args: + box: [min_lon, min_lat, max_lon, max_lat] + Returns: + polygon + ''' + p1 = geometry.Point(box[0], box[1]) + p2 = geometry.Point(box[2], box[1]) + p3 = geometry.Point(box[2], box[3]) + p4 = geometry.Point(box[0], box[3]) + return geometry.Polygon([p1,p2,p3,p4,p1]) + + +def envelope(corrected_extent): + if corrected_extent["type"] == 'Polygon': + polys = geometry.MultiPolygon([geometry.shape(corrected_extent)]) + return polys + else: + polys = geometry.shape(corrected_extent) + bounds = [list(poly.bounds) for poly in polys.geoms] + lat_min = min([bound[1] for bound in bounds]) + lat_max = max([bound[3] for bound in bounds]) + for i in range(len(bounds)): + bounds[i][1] = lat_min + bounds[i][3] = lat_max + + return geometry.MultiPolygon([poly_from_box(bound) for bound in bounds]) + + def get_water_mask_gdf(extent: dict) -> gpd.GeoDataFrame: """Get a GeoDataFrame of the water mask for a given extent @@ -53,11 +86,17 @@ def get_water_mask_gdf(extent: dict) -> gpd.GeoDataFrame: GeoDataFrame of the water mask for the given extent """ mask_location = 'asf-dem-west/WATER_MASK/GSHHG/hyp3_water_mask_20220912' - # extent_poly = geometry.shape(extent) + corrected_extent = split_geometry_on_antimeridian(extent) - shape_coords = geometry.shape(extent).exterior.coords - filters = list(set([('lat_lon', '=', get_geo_partition(coord)) for coord in shape_coords])) + polys = envelope(corrected_extent) + + filters = [] + for poly in polys.geoms: + tmp = list(set([('lat_lon', '=', get_geo_partition(coord)) for coord in poly.envelope.exterior.coords])) + for i in tmp: + filters.append([i]) + s3_fs = s3fs.S3FileSystem(anon=True, default_block_size=5 * (2**20)) # TODO the conversion from pd -> gpd can be removed when gpd adds the filter param for read_parquet @@ -66,7 +105,7 @@ def get_water_mask_gdf(extent: dict) -> gpd.GeoDataFrame: df['lat_lon'] = df['lat_lon'].astype(str) gdf = gpd.GeoDataFrame(df, crs='EPSG:4326') - mask = gpd.clip(gdf, geometry.shape(corrected_extent)) + mask = gpd.clip(gdf, polys) return mask @@ -103,12 +142,8 @@ def create_water_mask(input_image: str, output_image: str, gdal_format='GTiff'): dst_ds.SetMetadataItem('AREA_OR_POINT', src_ds.GetMetadataItem('AREA_OR_POINT')) extent = gdal.Info(input_image, format='json')['wgs84Extent'] - corrected_extent = split_geometry_on_antimeridian(extent) - # mask = get_water_mask_gdf(extent) - mask_location = '/vsicurl/https://asf-dem-west.s3.amazonaws.com/WATER_MASK/GSHHG/hyp3_water_mask_20220912.shp' - mask = gpd.read_file(mask_location, mask=corrected_extent) - mask = mask.clip(mask, geometry.shape(corrected_extent)) + mask = get_water_mask_gdf(extent) with TemporaryDirectory() as temp_dir: temp_file = str(Path(temp_dir) / 'mask.shp') From af0619706a6734860344d982c9884ab4e0776229 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Fri, 3 Nov 2023 16:06:50 -0800 Subject: [PATCH 22/40] modify code style --- hyp3_gamma/water_mask.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index 01af5176..3908a0f8 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -57,7 +57,7 @@ def poly_from_box(box): p2 = geometry.Point(box[2], box[1]) p3 = geometry.Point(box[2], box[3]) p4 = geometry.Point(box[0], box[3]) - return geometry.Polygon([p1,p2,p3,p4,p1]) + return geometry.Polygon([p1, p2, p3, p4, p1]) def envelope(corrected_extent): From 9faeb97b696360c914390efabc465684b87e4762 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 6 Nov 2023 13:26:13 -0600 Subject: [PATCH 23/40] refactoring --- hyp3_gamma/water_mask.py | 59 +++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index 3908a0f8..c2caddaa 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -45,33 +45,44 @@ def split_geometry_on_antimeridian(geometry: dict): return json.loads(geojson_str)['features'][0]['geometry'] -def poly_from_box(box): - ''' - create a polygon with box +def poly_from_box(bbox: list): + """ Create a polygon from a bbox + Args: - box: [min_lon, min_lat, max_lon, max_lat] + bbox: The extent bounding box, e.g. [min_lon, min_lat, max_lon, max_lat] + Returns: - polygon - ''' - p1 = geometry.Point(box[0], box[1]) - p2 = geometry.Point(box[2], box[1]) - p3 = geometry.Point(box[2], box[3]) - p4 = geometry.Point(box[0], box[3]) - return geometry.Polygon([p1, p2, p3, p4, p1]) - - -def envelope(corrected_extent): - if corrected_extent["type"] == 'Polygon': - polys = geometry.MultiPolygon([geometry.shape(corrected_extent)]) + Polygon: The extent Polygon: [(min_lon, min_lat), (min_lon, max_lat), ...] + """ + min_lon = geometry.Point(bbox[0], bbox[1]) + min_lat = geometry.Point(bbox[2], bbox[1]) + max_lon = geometry.Point(bbox[2], bbox[3]) + max_lat = geometry.Point(bbox[0], bbox[3]) + return geometry.Polygon([min_lon, min_lat, max_lon, max_lat]) + + +def envelope(extent: dict): + """Returns an evelope surrounding a split extent. + + Args: + extent: A dict representing a Polygon or MultiPolygon from split_geometry_on_antimeridian. + + Returns: + A MultiPolygon representing the envelope surrounding the input Polygon(s). + """ + min_lat_col = 1 + max_lat_col = 3 + if extent["type"] == 'Polygon': + polys = geometry.MultiPolygon([geometry.shape(extent)]) return polys else: - polys = geometry.shape(corrected_extent) - bounds = [list(poly.bounds) for poly in polys.geoms] - lat_min = min([bound[1] for bound in bounds]) - lat_max = max([bound[3] for bound in bounds]) + polys = geometry.shape(extent) + bounds = np.asarray([list(poly.bounds) for poly in polys.geoms]) + lat_min = bounds[:, min_lat_col].min() + lat_max = bounds[:, max_lat_col].max() for i in range(len(bounds)): - bounds[i][1] = lat_min - bounds[i][3] = lat_max + bounds[i][min_lat_col] = lat_min + bounds[i][max_lat_col] = lat_max return geometry.MultiPolygon([poly_from_box(bound) for bound in bounds]) @@ -87,9 +98,7 @@ def get_water_mask_gdf(extent: dict) -> gpd.GeoDataFrame: """ mask_location = 'asf-dem-west/WATER_MASK/GSHHG/hyp3_water_mask_20220912' - corrected_extent = split_geometry_on_antimeridian(extent) - - polys = envelope(corrected_extent) + polys = envelope(split_geometry_on_antimeridian(extent)) filters = [] for poly in polys.geoms: From 4bb89c70ccaf87a95194ecdfe510274b9381d4a1 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 6 Nov 2023 13:28:21 -0600 Subject: [PATCH 24/40] refactoring --- hyp3_gamma/water_mask.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index c2caddaa..cb018e59 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -62,7 +62,7 @@ def poly_from_box(bbox: list): def envelope(extent: dict): - """Returns an evelope surrounding a split extent. + """Returns an evelope surrounding a split extent. Args: extent: A dict representing a Polygon or MultiPolygon from split_geometry_on_antimeridian. From 171b4627bd277e5403213f2a31455ff909559768 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 6 Nov 2023 15:24:09 -0600 Subject: [PATCH 25/40] fixed errors in water masking --- hyp3_gamma/water_mask.py | 105 ++------------------------------------- 1 file changed, 5 insertions(+), 100 deletions(-) diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index cb018e59..46c8eed1 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -5,39 +5,14 @@ from tempfile import TemporaryDirectory import geopandas as gpd -import numpy as np -import pandas as pd -import s3fs from osgeo import gdal -from shapely import geometry, wkt +from shapely import geometry from hyp3_gamma.util import GDALConfigManager gdal.UseExceptions() -def get_geo_partition(coordinate, partition_size=90): - """Get the geo-partition for a given coordinate (i.e., the lat/lon box it falls in) - - Args: - coordinate: A coordinate tuple (lon, lat) - partition_size: The partition size (in degrees) to use for the geo-partition - using a value of 90 will result in geo-partitions of 90x90 degrees - - Returns: - A string representing the geo-partition for the given coordinate and partition size - """ - x, y = coordinate - x_rounded = 0 if int(np.floor(x / partition_size)) * partition_size == 180 \ - else int(np.floor(x / partition_size)) * partition_size - - y_rounded = int(np.floor(y / partition_size)) * partition_size - x_fill = str(x_rounded).zfill(4) - y_fill = str(y_rounded).zfill(4) - partition = f'{y_fill}_{x_fill}' - return partition - - def split_geometry_on_antimeridian(geometry: dict): geometry_as_bytes = json.dumps(geometry).encode() cmd = ['ogr2ogr', '-wrapdateline', '-datelineoffset', '20', '-f', 'GeoJSON', '/vsistdout/', '/vsistdin/'] @@ -45,79 +20,6 @@ def split_geometry_on_antimeridian(geometry: dict): return json.loads(geojson_str)['features'][0]['geometry'] -def poly_from_box(bbox: list): - """ Create a polygon from a bbox - - Args: - bbox: The extent bounding box, e.g. [min_lon, min_lat, max_lon, max_lat] - - Returns: - Polygon: The extent Polygon: [(min_lon, min_lat), (min_lon, max_lat), ...] - """ - min_lon = geometry.Point(bbox[0], bbox[1]) - min_lat = geometry.Point(bbox[2], bbox[1]) - max_lon = geometry.Point(bbox[2], bbox[3]) - max_lat = geometry.Point(bbox[0], bbox[3]) - return geometry.Polygon([min_lon, min_lat, max_lon, max_lat]) - - -def envelope(extent: dict): - """Returns an evelope surrounding a split extent. - - Args: - extent: A dict representing a Polygon or MultiPolygon from split_geometry_on_antimeridian. - - Returns: - A MultiPolygon representing the envelope surrounding the input Polygon(s). - """ - min_lat_col = 1 - max_lat_col = 3 - if extent["type"] == 'Polygon': - polys = geometry.MultiPolygon([geometry.shape(extent)]) - return polys - else: - polys = geometry.shape(extent) - bounds = np.asarray([list(poly.bounds) for poly in polys.geoms]) - lat_min = bounds[:, min_lat_col].min() - lat_max = bounds[:, max_lat_col].max() - for i in range(len(bounds)): - bounds[i][min_lat_col] = lat_min - bounds[i][max_lat_col] = lat_max - - return geometry.MultiPolygon([poly_from_box(bound) for bound in bounds]) - - -def get_water_mask_gdf(extent: dict) -> gpd.GeoDataFrame: - """Get a GeoDataFrame of the water mask for a given extent - - Args: - extent: The extent to get the water mask for - - Returns: - GeoDataFrame of the water mask for the given extent - """ - mask_location = 'asf-dem-west/WATER_MASK/GSHHG/hyp3_water_mask_20220912' - - polys = envelope(split_geometry_on_antimeridian(extent)) - - filters = [] - for poly in polys.geoms: - tmp = list(set([('lat_lon', '=', get_geo_partition(coord)) for coord in poly.envelope.exterior.coords])) - for i in tmp: - filters.append([i]) - - s3_fs = s3fs.S3FileSystem(anon=True, default_block_size=5 * (2**20)) - - # TODO the conversion from pd -> gpd can be removed when gpd adds the filter param for read_parquet - df = pd.read_parquet(mask_location, filesystem=s3_fs, filters=filters) - df['geometry'] = df['geometry'].apply(wkt.loads) - df['lat_lon'] = df['lat_lon'].astype(str) - gdf = gpd.GeoDataFrame(df, crs='EPSG:4326') - - mask = gpd.clip(gdf, polys) - return mask - - def create_water_mask(input_image: str, output_image: str, gdal_format='GTiff'): """Create a water mask GeoTIFF with the same geometry as a given input GeoTIFF @@ -151,8 +53,11 @@ def create_water_mask(input_image: str, output_image: str, gdal_format='GTiff'): dst_ds.SetMetadataItem('AREA_OR_POINT', src_ds.GetMetadataItem('AREA_OR_POINT')) extent = gdal.Info(input_image, format='json')['wgs84Extent'] + corrected_extent = split_geometry_on_antimeridian(extent) - mask = get_water_mask_gdf(extent) + mask_location = '/vsicurl/https://asf-dem-west.s3.amazonaws.com/WATER_MASK/GSHHG/hyp3_water_mask_20220912.shp' + mask = gpd.read_file(mask_location, mask=corrected_extent, engine="fiona") + mask = gpd.clip(mask, geometry.shape(corrected_extent)) with TemporaryDirectory() as temp_dir: temp_file = str(Path(temp_dir) / 'mask.shp') From 3a7b0f3be69c20f2426f04f331cd95087fa408b1 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Mon, 6 Nov 2023 22:12:01 -0900 Subject: [PATCH 26/40] do not split the antimeridian extent, but convert it into UTM, then do read_file and clip with the envelope of extent --- hyp3_gamma/water_mask.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index 46c8eed1..3c3d72c3 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -7,6 +7,7 @@ import geopandas as gpd from osgeo import gdal from shapely import geometry +from pyproj import CRS from hyp3_gamma.util import GDALConfigManager @@ -52,12 +53,21 @@ def create_water_mask(input_image: str, output_image: str, gdal_format='GTiff'): dst_ds.SetProjection(src_ds.GetProjection()) dst_ds.SetMetadataItem('AREA_OR_POINT', src_ds.GetMetadataItem('AREA_OR_POINT')) - extent = gdal.Info(input_image, format='json')['wgs84Extent'] - corrected_extent = split_geometry_on_antimeridian(extent) + info = gdal.Info(input_image, format='json') + prj = CRS.from_wkt(info["coordinateSystem"]["wkt"]) + epsg = prj.to_epsg() + + extent = info['wgs84Extent'] + + extent_gdf = gpd.GeoDataFrame(index=[0], geometry=[geometry.shape(extent)], crs='EPSG:4326').to_crs(epsg) + + envelope = extent_gdf.envelope mask_location = '/vsicurl/https://asf-dem-west.s3.amazonaws.com/WATER_MASK/GSHHG/hyp3_water_mask_20220912.shp' - mask = gpd.read_file(mask_location, mask=corrected_extent, engine="fiona") - mask = gpd.clip(mask, geometry.shape(corrected_extent)) + + mask = gpd.read_file(mask_location, mask=envelope, engine="fiona").to_crs(epsg) + + mask =gpd.clip(mask, envelope) with TemporaryDirectory() as temp_dir: temp_file = str(Path(temp_dir) / 'mask.shp') From cc8fe60a41c5dfa64123322c4e196d4aea0ea7d2 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Mon, 6 Nov 2023 22:17:38 -0900 Subject: [PATCH 27/40] code style --- hyp3_gamma/water_mask.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index 3c3d72c3..6bf2640f 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -6,8 +6,8 @@ import geopandas as gpd from osgeo import gdal -from shapely import geometry from pyproj import CRS +from shapely import geometry from hyp3_gamma.util import GDALConfigManager From 80dbe288174fa1e25b0abc2179f871f9dddff582 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Mon, 6 Nov 2023 22:19:20 -0900 Subject: [PATCH 28/40] code style --- hyp3_gamma/water_mask.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index 6bf2640f..482e60a9 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -53,7 +53,7 @@ def create_water_mask(input_image: str, output_image: str, gdal_format='GTiff'): dst_ds.SetProjection(src_ds.GetProjection()) dst_ds.SetMetadataItem('AREA_OR_POINT', src_ds.GetMetadataItem('AREA_OR_POINT')) - info = gdal.Info(input_image, format='json') + info = gdal.Info(input_image, format='json') prj = CRS.from_wkt(info["coordinateSystem"]["wkt"]) epsg = prj.to_epsg() @@ -67,7 +67,7 @@ def create_water_mask(input_image: str, output_image: str, gdal_format='GTiff'): mask = gpd.read_file(mask_location, mask=envelope, engine="fiona").to_crs(epsg) - mask =gpd.clip(mask, envelope) + mask = gpd.clip(mask, envelope) with TemporaryDirectory() as temp_dir: temp_file = str(Path(temp_dir) / 'mask.shp') From 37805882fdacd12fed9d34fee4bbe66c945e0d9c Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Tue, 7 Nov 2023 07:56:32 -0900 Subject: [PATCH 29/40] refactor code --- hyp3_gamma/water_mask.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index 482e60a9..2d831c4c 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -20,6 +20,23 @@ def split_geometry_on_antimeridian(geometry: dict): geojson_str = subprocess.run(cmd, input=geometry_as_bytes, stdout=subprocess.PIPE, check=True).stdout return json.loads(geojson_str)['features'][0]['geometry'] +def get_envelope(input_image: str): + """ + get the envelope of the input_image + Args: + input_image: Path for the input GDAL-compatible image + + Returns: + (envelope, epsg): polygon of the envelope of the geotiff in the crs of the geotiff, epsg str + + """ + info = gdal.Info(input_image, format='json') + prj = CRS.from_wkt(info["coordinateSystem"]["wkt"]) + epsg = prj.to_epsg() + extent = info['wgs84Extent'] + extent_gdf = gpd.GeoDataFrame(index=[0], geometry=[geometry.shape(extent)], crs='EPSG:4326').to_crs(epsg) + return extent_gdf.envelope, epsg + def create_water_mask(input_image: str, output_image: str, gdal_format='GTiff'): """Create a water mask GeoTIFF with the same geometry as a given input GeoTIFF @@ -31,7 +48,7 @@ def create_water_mask(input_image: str, output_image: str, gdal_format='GTiff'): land in the pixel. Args: - input_imge: Path for the input GDAL-compatible image + input_image: Path for the input GDAL-compatible image output_image: Path for the output image gdal_format: GDAL format name to create output image as """ @@ -53,19 +70,11 @@ def create_water_mask(input_image: str, output_image: str, gdal_format='GTiff'): dst_ds.SetProjection(src_ds.GetProjection()) dst_ds.SetMetadataItem('AREA_OR_POINT', src_ds.GetMetadataItem('AREA_OR_POINT')) - info = gdal.Info(input_image, format='json') - prj = CRS.from_wkt(info["coordinateSystem"]["wkt"]) - epsg = prj.to_epsg() - - extent = info['wgs84Extent'] - - extent_gdf = gpd.GeoDataFrame(index=[0], geometry=[geometry.shape(extent)], crs='EPSG:4326').to_crs(epsg) - - envelope = extent_gdf.envelope + envelope, epsg = get_envelope(input_image) mask_location = '/vsicurl/https://asf-dem-west.s3.amazonaws.com/WATER_MASK/GSHHG/hyp3_water_mask_20220912.shp' - mask = gpd.read_file(mask_location, mask=envelope, engine="fiona").to_crs(epsg) + mask = gpd.read_file(mask_location, mask=envelope).to_crs(epsg) mask = gpd.clip(mask, envelope) From 812485d0ee2cb0bd5972e0ed39d36c2916de4ad4 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Tue, 7 Nov 2023 07:58:16 -0900 Subject: [PATCH 30/40] code style --- hyp3_gamma/water_mask.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index 2d831c4c..39a1febc 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -20,6 +20,7 @@ def split_geometry_on_antimeridian(geometry: dict): geojson_str = subprocess.run(cmd, input=geometry_as_bytes, stdout=subprocess.PIPE, check=True).stdout return json.loads(geojson_str)['features'][0]['geometry'] + def get_envelope(input_image: str): """ get the envelope of the input_image From eb3d7475bbc6073bea25c368b4cada70ee949480 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Tue, 7 Nov 2023 09:57:11 -0900 Subject: [PATCH 31/40] remove split_antimeridian function and its test function --- hyp3_gamma/water_mask.py | 9 -------- tests/test_water_mask.py | 46 ---------------------------------------- 2 files changed, 55 deletions(-) diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index 39a1febc..c3b5f3f7 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -1,6 +1,4 @@ """Create and apply a water body mask""" -import json -import subprocess from pathlib import Path from tempfile import TemporaryDirectory @@ -14,13 +12,6 @@ gdal.UseExceptions() -def split_geometry_on_antimeridian(geometry: dict): - geometry_as_bytes = json.dumps(geometry).encode() - cmd = ['ogr2ogr', '-wrapdateline', '-datelineoffset', '20', '-f', 'GeoJSON', '/vsistdout/', '/vsistdin/'] - geojson_str = subprocess.run(cmd, input=geometry_as_bytes, stdout=subprocess.PIPE, check=True).stdout - return json.loads(geojson_str)['features'][0]['geometry'] - - def get_envelope(input_image: str): """ get the envelope of the input_image diff --git a/tests/test_water_mask.py b/tests/test_water_mask.py index c4587c8a..79850b3b 100644 --- a/tests/test_water_mask.py +++ b/tests/test_water_mask.py @@ -6,52 +6,6 @@ gdal.UseExceptions() -def test_split_geometry_on_antimeridian(): - geometry = { - 'type': 'Polygon', - 'coordinates': [[ - [170, 50], - [175, 55], - [-170, 55], - [-175, 50], - [170, 50], - ]], - } - result = water_mask.split_geometry_on_antimeridian(geometry) - assert result == { - 'type': 'MultiPolygon', - 'coordinates': [ - [[ - [175.0, 55.0], - [180.0, 55.0], - [180.0, 50.0], - [170.0, 50.0], - [175.0, 55.0], - ]], - [[ - [-170.0, 55.0], - [-175.0, 50.0], - [-180.0, 50.0], - [-180.0, 55.0], - [-170.0, 55.0], - ]], - ], - } - - geometry = { - 'type': 'Polygon', - 'coordinates': [[ - [150, 50], - [155, 55], - [-150, 55], - [-155, 50], - [150, 50], - ]], - } - result = water_mask.split_geometry_on_antimeridian(geometry) - assert result == geometry - - def test_create_water_mask_with_no_water(tmp_path, test_data_dir): input_tif = str(test_data_dir / 'test_geotiff.tif') output_tif = str(tmp_path / 'water_mask.tif') From a796cc13fb49f58c4bb85b5acc5ce21b302cc1a4 Mon Sep 17 00:00:00 2001 From: cirrusasf <62269400+cirrusasf@users.noreply.github.com> Date: Tue, 7 Nov 2023 10:00:46 -0900 Subject: [PATCH 32/40] Update hyp3_gamma/water_mask.py Co-authored-by: Andrew Player --- hyp3_gamma/water_mask.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index c3b5f3f7..49778238 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -13,14 +13,13 @@ def get_envelope(input_image: str): - """ - get the envelope of the input_image + """ Get the envelope of the input_image + Args: input_image: Path for the input GDAL-compatible image Returns: - (envelope, epsg): polygon of the envelope of the geotiff in the crs of the geotiff, epsg str - + (envelope, epsg): Envelope of the geotiff as a Polygon, and the EPSG code of the geotiff as a string. """ info = gdal.Info(input_image, format='json') prj = CRS.from_wkt(info["coordinateSystem"]["wkt"]) From 76e35e22fb85c5231c942b8322d82a8fb2cd3528 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Tue, 7 Nov 2023 10:24:21 -0900 Subject: [PATCH 33/40] use pyogrio engine to do geopandas.read_file --- hyp3_gamma/water_mask.py | 6 +++++- tests/test_water_mask.py | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index 49778238..d2a62343 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -6,6 +6,7 @@ from osgeo import gdal from pyproj import CRS from shapely import geometry +import pyogrio from hyp3_gamma.util import GDALConfigManager @@ -65,7 +66,10 @@ def create_water_mask(input_image: str, output_image: str, gdal_format='GTiff'): mask_location = '/vsicurl/https://asf-dem-west.s3.amazonaws.com/WATER_MASK/GSHHG/hyp3_water_mask_20220912.shp' - mask = gpd.read_file(mask_location, mask=envelope).to_crs(epsg) + bounds = envelope.to_crs(4326).bounds + bbox = (bounds['minx'][0], bounds['miny'][0],bounds['maxx'][0], bounds['maxy'][0]) + + mask = gpd.read_file(mask_location, bbox=bbox, engine='pyogrio').to_crs(epsg) mask = gpd.clip(mask, envelope) diff --git a/tests/test_water_mask.py b/tests/test_water_mask.py index 79850b3b..68e9fa1c 100644 --- a/tests/test_water_mask.py +++ b/tests/test_water_mask.py @@ -1,5 +1,6 @@ import numpy as np from osgeo import gdal +import pyorgio from hyp3_gamma import water_mask From c493ed728e6e5aa52d7506b878f1ba31da323079 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Tue, 7 Nov 2023 10:26:37 -0900 Subject: [PATCH 34/40] add pyogrio library in the environment.yml and setup.py --- environment.yml | 1 + setup.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 31cb5ad8..3fd10586 100644 --- a/environment.yml +++ b/environment.yml @@ -29,6 +29,7 @@ dependencies: - s3fs - rtree - pyarrow + - pyogrio - pip: - lxml==4.8.0 - hyp3lib==1.7.0 diff --git a/setup.py b/setup.py index 4b541ac0..f326886f 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,8 @@ 'python-dateutil', 's3fs', 'rtree', - 'pyarrow' + 'pyarrow', + 'pyogrio' ], extras_require={ From 1210f3d60fca989d5743372da14e73970c2e06e2 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Tue, 7 Nov 2023 10:30:37 -0900 Subject: [PATCH 35/40] code style --- hyp3_gamma/water_mask.py | 4 ++-- tests/test_water_mask.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index d2a62343..9ebf0c4d 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -6,7 +6,7 @@ from osgeo import gdal from pyproj import CRS from shapely import geometry -import pyogrio + from hyp3_gamma.util import GDALConfigManager @@ -67,7 +67,7 @@ def create_water_mask(input_image: str, output_image: str, gdal_format='GTiff'): mask_location = '/vsicurl/https://asf-dem-west.s3.amazonaws.com/WATER_MASK/GSHHG/hyp3_water_mask_20220912.shp' bounds = envelope.to_crs(4326).bounds - bbox = (bounds['minx'][0], bounds['miny'][0],bounds['maxx'][0], bounds['maxy'][0]) + bbox = (bounds['minx'][0], bounds['miny'][0], bounds['maxx'][0], bounds['maxy'][0]) mask = gpd.read_file(mask_location, bbox=bbox, engine='pyogrio').to_crs(epsg) diff --git a/tests/test_water_mask.py b/tests/test_water_mask.py index 68e9fa1c..79850b3b 100644 --- a/tests/test_water_mask.py +++ b/tests/test_water_mask.py @@ -1,6 +1,5 @@ import numpy as np from osgeo import gdal -import pyorgio from hyp3_gamma import water_mask From d59250439ea6804b5b2cbdfb895bc1e02dc431a9 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Tue, 7 Nov 2023 10:54:55 -0900 Subject: [PATCH 36/40] decide use geopandas.read_file with mask equal to envelope in the input reference file projection --- environment.yml | 1 - hyp3_gamma/water_mask.py | 5 +---- setup.py | 3 +-- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/environment.yml b/environment.yml index 3fd10586..31cb5ad8 100644 --- a/environment.yml +++ b/environment.yml @@ -29,7 +29,6 @@ dependencies: - s3fs - rtree - pyarrow - - pyogrio - pip: - lxml==4.8.0 - hyp3lib==1.7.0 diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index 9ebf0c4d..be40d2c0 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -66,10 +66,7 @@ def create_water_mask(input_image: str, output_image: str, gdal_format='GTiff'): mask_location = '/vsicurl/https://asf-dem-west.s3.amazonaws.com/WATER_MASK/GSHHG/hyp3_water_mask_20220912.shp' - bounds = envelope.to_crs(4326).bounds - bbox = (bounds['minx'][0], bounds['miny'][0], bounds['maxx'][0], bounds['maxy'][0]) - - mask = gpd.read_file(mask_location, bbox=bbox, engine='pyogrio').to_crs(epsg) + mask = gpd.read_file(mask_location, mask=envelope).to_crs(epsg) mask = gpd.clip(mask, envelope) diff --git a/setup.py b/setup.py index f326886f..4b541ac0 100644 --- a/setup.py +++ b/setup.py @@ -43,8 +43,7 @@ 'python-dateutil', 's3fs', 'rtree', - 'pyarrow', - 'pyogrio' + 'pyarrow' ], extras_require={ From f24d9a474472f98db61b96dc5d112840939381c7 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 8 Nov 2023 16:18:08 -0600 Subject: [PATCH 37/40] remove unused imports --- environment.yml | 3 --- setup.py | 5 +---- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/environment.yml b/environment.yml index 31cb5ad8..fc334c40 100644 --- a/environment.yml +++ b/environment.yml @@ -26,9 +26,6 @@ dependencies: # Addresses https://github.com/ASFHyP3/hyp3-gamma/issues/421 # - hyp3lib>=1.7.0,<2 # - lxml - - s3fs - - rtree - - pyarrow - pip: - lxml==4.8.0 - hyp3lib==1.7.0 diff --git a/setup.py b/setup.py index 4b541ac0..dc30d0c2 100644 --- a/setup.py +++ b/setup.py @@ -40,10 +40,7 @@ 'lxml', 'numpy>=1.21,<1.22', 'pillow', - 'python-dateutil', - 's3fs', - 'rtree', - 'pyarrow' + 'python-dateutil' ], extras_require={ From 2637c5548dbdb767cecf44669df8a9e438de149b Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Wed, 8 Nov 2023 15:05:22 -0900 Subject: [PATCH 38/40] keep rtree and delete s3fs and pyarrow in both environment.yml and setup.py --- environment.yml | 1 + setup.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index fc334c40..f79a6aa2 100644 --- a/environment.yml +++ b/environment.yml @@ -23,6 +23,7 @@ dependencies: - numpy>=1.21,<1.22 - pillow - python-dateutil + - rtree # Addresses https://github.com/ASFHyP3/hyp3-gamma/issues/421 # - hyp3lib>=1.7.0,<2 # - lxml diff --git a/setup.py b/setup.py index dc30d0c2..8ead1fa0 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,8 @@ 'lxml', 'numpy>=1.21,<1.22', 'pillow', - 'python-dateutil' + 'python-dateutil', + 'rtree' ], extras_require={ From 4de59275d0960414e859d876a3306b94ce22a730 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 9 Nov 2023 12:36:26 -0600 Subject: [PATCH 39/40] fixed docstring --- hyp3_gamma/water_mask.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hyp3_gamma/water_mask.py b/hyp3_gamma/water_mask.py index be40d2c0..0ee493da 100755 --- a/hyp3_gamma/water_mask.py +++ b/hyp3_gamma/water_mask.py @@ -20,7 +20,7 @@ def get_envelope(input_image: str): input_image: Path for the input GDAL-compatible image Returns: - (envelope, epsg): Envelope of the geotiff as a Polygon, and the EPSG code of the geotiff as a string. + (envelope, epsg): The envelope and epsg code of the GeoTIFF. """ info = gdal.Info(input_image, format='json') prj = CRS.from_wkt(info["coordinateSystem"]["wkt"]) From 4c63fb7fe3c7d25a34bef6d488e5cdd995ff0830 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 9 Nov 2023 13:00:03 -0600 Subject: [PATCH 40/40] better changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 956da27a..0b672cab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [6.4.1] ### Fixed -- A bug where the water mask would be significantly incorrect over some regions in Europe. +- Incorrect / blank water masks in some areas (primarily Europe), by clipping the water mask to the envelope of the product before rasterization. ## [6.4.0]