From e0a3608a71f3b8ab2c305ddf4db1e0d1b530f21b Mon Sep 17 00:00:00 2001 From: Jannic Veith Date: Tue, 1 Oct 2024 11:14:55 +0200 Subject: [PATCH] Add script to import geometries --- .../.gitignore | 165 ++++++++++++++++++ .../README.md | 14 ++ .../2024-10-01_import-new-geometries/main.py | 68 ++++++++ .../requirements.txt | 2 + 4 files changed, 249 insertions(+) create mode 100644 scripts/2024-10-01_import-new-geometries/.gitignore create mode 100644 scripts/2024-10-01_import-new-geometries/README.md create mode 100644 scripts/2024-10-01_import-new-geometries/main.py create mode 100644 scripts/2024-10-01_import-new-geometries/requirements.txt diff --git a/scripts/2024-10-01_import-new-geometries/.gitignore b/scripts/2024-10-01_import-new-geometries/.gitignore new file mode 100644 index 00000000..eec7044f --- /dev/null +++ b/scripts/2024-10-01_import-new-geometries/.gitignore @@ -0,0 +1,165 @@ +*.sql +*.gpkg + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ diff --git a/scripts/2024-10-01_import-new-geometries/README.md b/scripts/2024-10-01_import-new-geometries/README.md new file mode 100644 index 00000000..96ca6541 --- /dev/null +++ b/scripts/2024-10-01_import-new-geometries/README.md @@ -0,0 +1,14 @@ +# Script to import new Geometries into the assets database + +The script takes a geopackage file as input and creates two files: + +- a file `sgsids.txt` which contains all sgsids whose geometries are to be updated +- a SQL Script `import_geometries.sql` which, for each asset with a relevant sgsid, deletes the existing geometries and inserts the new ones + +Getting started: + +- Install Python 3.12 +- Install requirements.txt +- Adjust the parameters in at the top of `main.py` +- Run `main.py` +- Run the resulting SQL Script directly in the database. In case of a large number of geometries, consider running the script in smaller transaction blocks. Tools like Datagrip can do this directly. diff --git a/scripts/2024-10-01_import-new-geometries/main.py b/scripts/2024-10-01_import-new-geometries/main.py new file mode 100644 index 00000000..b5f3d904 --- /dev/null +++ b/scripts/2024-10-01_import-new-geometries/main.py @@ -0,0 +1,68 @@ +from typing import TextIO + +import geopandas as gpd +import numpy as np +from geopandas import GeoDataFrame +import os + +# The following parameters need to be adjusted to the correct values: +filedir = '' +filename = 'geometries.gpkg' +output_filename = 'import_geometries.sql' +output_sgsids_filename = 'sgsids.txt' + +# The following sgsids are not in the database, they were manually translated to the probably correct sgsid: +sgsid_translation_dict = {} + +# Only use entries with Bemerkung equal to one of the following: +allowed_bemerkung = ['erraten', 'erraten_B', 'neu', 'neu (?)', 'unsich kein B'] + +def read_gdf(file: str, layer: str, allowed_bemerkung: list, translation_dict: dict): + '''Read a geopandas dataframe from a file and layer, filter out entries with Bemerkung not in allowed_bemerkung and translate the sgsid according to translationDict.''' + gdf = gpd.read_file(file, layer=layer) + print(gdf['Bemerkung'].value_counts()) + gdf = gdf[(gdf['Bemerkung'].isin(allowed_bemerkung))] + gdf.replace({'IDSGS_neu': translation_dict}, inplace=True) + return gdf + +def write_insert_statements(f: TextIO, gdf: GeoDataFrame, geom_type: str): + for i, row in gdf.iterrows(): + f.write( + f"INSERT INTO study_{geom_type} (asset_id, geom_quality_item_code, geom) VALUES ((SELECT asset_id FROM asset WHERE sgs_id = {row['IDSGS_neu']}), 'revised', ST_GeomFromText('{row['geometry'].wkt}', 2056));\n") + +def main(): + path = os.path.join(filedir, filename) + gdf_points = read_gdf(file=path, layer='backup_20240829__points', allowed_bemerkung=allowed_bemerkung, translation_dict=sgsid_translation_dict) + gdf_lines = read_gdf(file=path, layer='backup_20240829__lines', allowed_bemerkung=allowed_bemerkung, translation_dict=sgsid_translation_dict) + gdf_polygons = read_gdf(file=path, layer='backup_20240829__polygons', allowed_bemerkung=allowed_bemerkung, translation_dict=sgsid_translation_dict) + gdf_lines = gdf_lines.explode() # explode multi-linestrings to linestrings + gdf_polygons = gdf_polygons.explode() # explode multi-polygons to polygons + gdf_points = gdf_points[~gdf_points['geometry'].is_empty] # remove empty geometries + + sgs_ids = np.unique(np.concatenate( + [gdf_lines['IDSGS_neu'].unique(), + gdf_points['IDSGS_neu'].unique(), + gdf_polygons['IDSGS_neu'].unique()])) + + np.savetxt(os.path.join(filedir, output_sgsids_filename), sgs_ids.astype(int), fmt='%d', delimiter=',') + + # for each sgsid write a sql delete statement to delete all entries with this sgsid. Commit the commands every 1000 rows. + with open(os.path.join(filedir, output_filename), 'w') as f: + for i, sgsid in enumerate(sgs_ids): + f.write( + f"DELETE FROM study_location WHERE asset_id = (SELECT asset_id FROM asset WHERE sgs_id = {sgsid});\n") + f.write(f"DELETE FROM study_trace WHERE asset_id = (SELECT asset_id FROM asset WHERE sgs_id = {sgsid});\n") + f.write(f"DELETE FROM study_area WHERE asset_id = (SELECT asset_id FROM asset WHERE sgs_id = {sgsid});\n") + + f.write('SELECT \'Finished deleting previous studies.\';\n\n') + + # write the insert statements for the points + f.write('SELECT \'Creating locations.\';\n') + write_insert_statements(f, gdf_points, 'location') + f.write('\nSELECT \'Creating trace.\';\n') + write_insert_statements(f, gdf_lines, 'trace') + f.write('\nSELECT \'Creating area.\';\n') + write_insert_statements(f, gdf_polygons, 'area') + +if __name__ == '__main__': + main() diff --git a/scripts/2024-10-01_import-new-geometries/requirements.txt b/scripts/2024-10-01_import-new-geometries/requirements.txt new file mode 100644 index 00000000..1d841aed --- /dev/null +++ b/scripts/2024-10-01_import-new-geometries/requirements.txt @@ -0,0 +1,2 @@ +geopandas +numpy \ No newline at end of file