Skip to content

Commit

Permalink
Merge pull request #488 from PnX-SI/develop
Browse files Browse the repository at this point in the history
Develop > Master / 2.2.3
  • Loading branch information
camillemonchicourt authored Sep 28, 2023
2 parents ae6f30c + 3277e11 commit 74d7940
Show file tree
Hide file tree
Showing 17 changed files with 246 additions and 97 deletions.
23 changes: 14 additions & 9 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,22 @@ jobs:
strategy:
fail-fast: false
matrix:
debian-version: [ '10', '11', '12' ]
include:
- name: "Debian 10"
- debian-version: '10'
python-version: "3.7"
postgres-version: 11
postgis-version: 2.5
- name: "Debian 11"
python-version: "3.9"
postgres-version: 13
postgis-version: 3.2
postgres-version: '11'
postgis-version: '2.5'
- debian-version: '11'
python-version: '3.9'
postgres-version: '13'
postgis-version: '3.2'
- debian-version: '12'
python-version: '3.11'
postgres-version: '15'
postgis-version: '3.3'

name: ${{ matrix.name }}
name: Debian ${{ matrix.debian-version }}

services:
postgres:
Expand Down Expand Up @@ -118,7 +123,7 @@ jobs:
GEONATURE_CONFIG_FILE: dependencies/GeoNature/config/test_config.toml
GEONATURE_SETTINGS: gn_module_import.test_config
- name: Upload coverage to Codecov
if: ${{ matrix.name == 'Debian 11' }}
if: ${{ matrix.debian-version == '12' }}
uses: codecov/codecov-action@v2
with:
flags: pytest
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.2.2
2.2.3
26 changes: 19 additions & 7 deletions backend/gn_module_import/checks/dataframe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
from uuid import uuid4
from itertools import chain

from sqlalchemy import func
from sqlalchemy.orm.exc import NoResultFound
from sqlalchemy.dialects.postgresql import insert as pg_insert
from flask import current_app

from geonature.utils.env import db
Expand Down Expand Up @@ -118,11 +120,21 @@ def run_all_checks(imprt, fields: Dict[str, BibFields], df):
ordered_invalid_rows = sorted(invalid_rows["line_no"])
column = generated_fields.get(error["column"], error["column"])
column = imprt.fieldmapping.get(column, column)
error = ImportUserError(
imprt=imprt,
type=error_type,
column=column,
rows=ordered_invalid_rows,
comment=error.get("comment"),
# If an error for same import, same column and of the same type already exists,
# we concat existing erroneous rows with current rows.
stmt = pg_insert(ImportUserError).values(
{
"id_import": imprt.id_import,
"id_error": error_type.pk,
"column_error": column,
"id_rows": ordered_invalid_rows,
"comment": error.get("comment"),
}
)
db.session.add(error)
stmt = stmt.on_conflict_do_update(
constraint="t_user_errors_un", # unique (import, error_type, column)
set_={
"id_rows": func.array_cat(ImportUserError.rows, stmt.excluded["id_rows"]),
},
)
db.session.execute(stmt)
61 changes: 30 additions & 31 deletions backend/gn_module_import/checks/sql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from flask import current_app
from sqlalchemy import func
from sqlalchemy.sql.expression import select, update, insert, literal
from sqlalchemy.sql.expression import select, update, insert, literal, join
from sqlalchemy.sql import column
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import array_agg, aggregate_order_by
Expand Down Expand Up @@ -174,61 +174,59 @@ def check_nomenclatures(imprt, fields):
)


def set_column_from_referential(imprt, field, reference, error_type, whereclause=None):
source_field = getattr(ImportSyntheseData, field.source_field)
def check_referential(imprt, field, reference_field, error_type, reference_table=None):
synthese_field = getattr(ImportSyntheseData, field.synthese_field)
stmt = (
update(ImportSyntheseData)
.values(
{
synthese_field: reference,
}
)
.where(
sa.and_(
source_field == sa.cast(reference, sa.Unicode),
ImportSyntheseData.id_import == imprt.id_import,
if reference_table is None:
reference_table = reference_field.class_
# We outerjoin the referential, and select rows where there is a value in synthese field
# but no value in referential, which means no value in the referential matched synthese field.
cte = (
select([ImportSyntheseData.line_no])
.select_from(
join(
ImportSyntheseData,
reference_table,
synthese_field == reference_field,
isouter=True,
)
)
.where(ImportSyntheseData.imprt == imprt)
.where(synthese_field != None)
.where(reference_field == None)
.cte("invalid_ref")
)
if whereclause is not None:
stmt = stmt.where(whereclause)
db.session.execute(stmt)
report_erroneous_rows(
imprt,
error_type=error_type,
error_column=field.name_field,
whereclause=sa.and_(
source_field != None,
source_field != "",
synthese_field == None,
),
whereclause=ImportSyntheseData.line_no == cte.c.line_no,
)


def set_cd_nom(imprt, fields):
def check_cd_nom(imprt, fields):
if "cd_nom" not in fields:
return
field = fields["cd_nom"]
whereclause = None
# Filter out on a taxhub list if provided
list_id = current_app.config["IMPORT"].get("ID_LIST_TAXA_RESTRICTION", None)
if list_id is not None:
whereclause = sa.and_(
CorNomListe.id_liste == list_id,
BibNoms.id_nom == CorNomListe.id_nom,
Taxref.cd_nom == BibNoms.cd_nom,
reference_table = join(Taxref, BibNoms).join(
CorNomListe,
sa.and_(BibNoms.id_nom == CorNomListe.id_nom, CorNomListe.id_liste == list_id),
)
set_column_from_referential(
imprt, field, Taxref.cd_nom, "CD_NOM_NOT_FOUND", whereclause=whereclause
else:
reference_table = Taxref
check_referential(
imprt, field, Taxref.cd_nom, "CD_NOM_NOT_FOUND", reference_table=reference_table
)


def set_cd_hab(imprt, fields):
def check_cd_hab(imprt, fields):
if "cd_hab" not in fields:
return
field = fields["cd_hab"]
set_column_from_referential(imprt, field, Habref.cd_hab, "CD_HAB_NOT_FOUND")
check_referential(imprt, field, Habref.cd_hab, "CD_HAB_NOT_FOUND")


def set_altitudes(imprt, fields):
Expand Down Expand Up @@ -380,6 +378,7 @@ def set_uuid(imprt, fields):
db.session.execute(stmt)


# Currently not used as done during dataframe checks
def check_mandatory_fields(imprt, fields):
for field in fields.values():
if not field.mandatory or not field.synthese_field:
Expand Down
1 change: 1 addition & 0 deletions backend/gn_module_import/conf_schema_toml.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,4 @@ class GnModuleSchemaConf(Schema):
# are in the list. Otherwise throws an error
ID_LIST_TAXA_RESTRICTION = fields.Integer(load_default=None)
MODULE_URL = fields.String(load_default="/import")
DATAFRAME_BATCH_SIZE = fields.Integer(load_default=10000)
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""unique import error
Revision ID: 2896cf965dd6
Revises: d6bf8eaf088c
Create Date: 2023-09-28 10:19:10.133530
"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "2896cf965dd6"
down_revision = "ea67bf7b6888"
branch_labels = None
depends_on = None


def upgrade():
op.create_unique_constraint(
schema="gn_imports",
table_name="t_user_errors",
columns=["id_import", "id_error", "column_error"],
constraint_name="t_user_errors_un",
)


def downgrade():
op.drop_constraint(
schema="gn_imports", table_name="t_user_errors", constraint_name="t_user_errors_un"
)
50 changes: 50 additions & 0 deletions backend/gn_module_import/migrations/ea67bf7b6888_remove_cd_fk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""remove cd fk
Revision ID: ea67bf7b6888
Revises: d6bf8eaf088c
Create Date: 2023-09-27 15:37:19.286693
"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "ea67bf7b6888"
down_revision = "d6bf8eaf088c"
branch_labels = None
depends_on = None


def upgrade():
op.drop_constraint(
schema="gn_imports",
table_name="t_imports_synthese",
constraint_name="t_imports_synthese_cd_nom_fkey",
)
op.drop_constraint(
schema="gn_imports",
table_name="t_imports_synthese",
constraint_name="t_imports_synthese_cd_hab_fkey",
)


def downgrade():
op.create_foreign_key(
constraint_name="t_imports_synthese_cd_nom_fkey",
source_schema="gn_imports",
source_table="t_imports_synthese",
local_cols=["cd_nom"],
referent_schema="taxonomie",
referent_table="taxref",
remote_cols=["cd_nom"],
)
op.create_foreign_key(
constraint_name="t_imports_synthese_cd_hab_fkey",
source_schema="gn_imports",
source_table="t_imports_synthese",
local_cols=["cd_hab"],
referent_schema="ref_habitats",
referent_table="habref",
remote_cols=["cd_hab"],
)
1 change: 1 addition & 0 deletions backend/gn_module_import/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def generate_input_url_for_dataset(self, dataset):
return f"/import/process/upload?datasetId={dataset.id_dataset}"

generate_input_url_for_dataset.label = "Importer des données"
generate_input_url_for_dataset.object_code = "IMPORT"

def generate_module_url_for_source(self, source):
id_import = re.search(r"^Import\(id=(?P<id>\d+)\)$", source.name_source).group("id")
Expand Down
19 changes: 11 additions & 8 deletions backend/gn_module_import/routes/imports.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from io import BytesIO
import codecs
from io import StringIO
from io import BytesIO, StringIO, TextIOWrapper
import csv
import unicodedata

Expand Down Expand Up @@ -215,7 +214,7 @@ def upload_file(scope, import_id):
@blueprint.route("/imports/<int:import_id>/decode", methods=["POST"])
@permissions.check_cruved_scope("C", get_scope=True, module_code="IMPORT", object_code="IMPORT")
def decode_file(scope, import_id):
imprt = TImports.query.options(undefer("source_file")).get_or_404(import_id)
imprt = TImports.query.get_or_404(import_id)
if not imprt.has_instance_permission(scope):
raise Forbidden
if not imprt.dataset.active:
Expand Down Expand Up @@ -257,15 +256,19 @@ def decode_file(scope, import_id):
except ValueError:
raise BadRequest(description="decode parameter must but an int")
if decode:
csvfile = TextIOWrapper(BytesIO(imprt.source_file), encoding=imprt.encoding)
csvreader = csv.reader(csvfile, delimiter=imprt.separator)
try:
csvfile = StringIO(imprt.source_file.decode(imprt.encoding))
columns = next(csvreader)
while True: # read full file to ensure that no encoding errors occur
next(csvreader)
except UnicodeError as e:
raise BadRequest(
description="Erreur d’encodage lors de la lecture du fichier source. "
"Avez-vous sélectionné le bon encodage de votre fichier ?"
)
csvreader = csv.reader(csvfile, delimiter=imprt.separator)
columns = next(csvreader)
except StopIteration:
pass
duplicates = set([col for col in columns if columns.count(col) > 1])
if duplicates:
raise BadRequest(f"Duplicates column names: {duplicates}")
Expand Down Expand Up @@ -519,7 +522,7 @@ def get_import_invalid_rows_as_csv(scope, import_id):
Export invalid data in CSV.
"""
imprt = TImports.query.options(undefer("source_file")).get_or_404(import_id)
imprt = TImports.query.get_or_404(import_id)
if not imprt.has_instance_permission(scope):
raise Forbidden
if not imprt.processed:
Expand All @@ -530,7 +533,7 @@ def get_import_invalid_rows_as_csv(scope, import_id):

@stream_with_context
def generate_invalid_rows_csv():
sourcefile = StringIO(imprt.source_file.decode(imprt.encoding))
sourcefile = TextIOWrapper(BytesIO(imprt.source_file), encoding=imprt.encoding)
destfile = StringIO()
csvreader = csv.reader(sourcefile, delimiter=imprt.separator)
csvwriter = csv.writer(destfile, dialect=csvreader.dialect, lineterminator="\n")
Expand Down
Loading

0 comments on commit 74d7940

Please sign in to comment.