Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DO NOT MERGE]fix(mtd): fixing several anomalies for mtd sync with inpn mtd #3230

Draft
wants to merge 25 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
a02c616
fix(mtd): fix return of sync_af function
VincentCauchois Jun 4, 2024
0064984
fix(mtd): fix typo in associate_actors function
VincentCauchois Jun 4, 2024
c8735b6
fix(mtd): skip associate_actors for an af not retrieved
VincentCauchois Oct 29, 2024
77fdd4f
docs(mtd): use numpy docstring for sync_af_and_ds_by_user
VincentCauchois Oct 29, 2024
08617c1
refactor(mtd): add list type hint for return of some methods
VincentCauchois Oct 29, 2024
1519249
feat(mtd): improve logging for function sync_ds
VincentCauchois Oct 31, 2024
7410c64
chore(mtd): comply with numpy docstring format for `sync_af`
VincentCauchois Oct 31, 2024
2a9134a
feat(mtd): improve logging for function sync_af
VincentCauchois Oct 31, 2024
fd4af6f
fix(mtd): skip sync of af for an af without uuid
VincentCauchois Oct 31, 2024
e1931a6
docs(mtd): improve dosctring for function associate_actors
VincentCauchois Oct 31, 2024
1e4667e
refactor(mtd): add type hints to associate_actors
VincentCauchois Oct 31, 2024
3e75dde
refactor(mtd): refact function associate_actors
VincentCauchois Oct 31, 2024
b0fcdcd
fix(mtd): add exception handling for associate_actors
VincentCauchois Oct 31, 2024
eaa6c40
fix(mtd): do not set organism name to "" if none
VincentCauchois Oct 31, 2024
9de1d2e
fix(mtd): ensure role retrieved from email is a user
VincentCauchois Oct 31, 2024
b035fe5
fix(mtd): fix index_elements for .on_conflict_do_nothing
VincentCauchois Oct 31, 2024
65735af
fix(mtd): handle associate_actors if no uuid for organism
VincentCauchois Oct 31, 2024
86871b7
fix(mtd): make association to organism prior to user
VincentCauchois Oct 31, 2024
280de22
fix(mtd): handle mtd with no contact principal retrieved
VincentCauchois Oct 31, 2024
37de38f
fix(mtd): retrieve af even if actors association impossible
VincentCauchois Oct 31, 2024
182f4c2
fix(mtd): add formatting af uuid in ds xml parsing
VincentCauchois Oct 31, 2024
1fe49af
refactor(mtd): remove imports for unused modules and functions
VincentCauchois Nov 4, 2024
3a7c8e3
refactor(mtd): rename function `parse_acquisition_framwork_xml`
VincentCauchois Nov 4, 2024
252e5c7
refactor(mtd): add function `parse_acquisition_frameworks_xml`
VincentCauchois Nov 4, 2024
c1b5846
[WIP](mtd): several TODOs
VincentCauchois Oct 31, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 52 additions & 27 deletions backend/geonature/core/gn_meta/mtd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@
from sqlalchemy import func, select

from .mtd_utils import associate_actors, sync_af, sync_ds
from .xml_parser import parse_acquisition_framework, parse_acquisition_framwork_xml, parse_jdd_xml
from .xml_parser import (
parse_acquisition_framework,
parse_single_acquisition_framework_xml,
parse_jdd_xml,
parse_acquisition_frameworks_xml,
)

# create logger
logger = logging.getLogger("MTD_SYNC")
Expand All @@ -36,6 +41,7 @@
class MTDInstanceApi:
af_path = "/mtd/cadre/export/xml/GetRecordsByInstanceId?id={ID_INSTANCE}"
ds_path = "/mtd/cadre/jdd/export/xml/GetRecordsByInstanceId?id={ID_INSTANCE}"
# TODO: check if there are endpoints to retrieve metadata for a given user and instance, and not only a given user and whatever instance
ds_user_path = "/mtd/cadre/jdd/export/xml/GetRecordsByUserId?id={ID_ROLE}"
af_user_path = "/mtd/cadre/export/xml/GetRecordsByUserId?id={ID_ROLE}"
single_af_path = "/mtd/cadre/export/xml/GetRecordById?id={ID_AF}" # NOTE: `ID_AF` is actually an UUID and not an ID from the point of view of geonature database.
Expand All @@ -60,20 +66,14 @@ def _get_xml(self, path):
def _get_af_xml(self):
return self._get_xml(self.af_path)

def get_af_list(self):
def get_af_list(self) -> list:
xml = self._get_af_xml()
_xml_parser = etree.XMLParser(ns_clean=True, recover=True, encoding="utf-8")
root = etree.fromstring(xml, parser=_xml_parser)
af_iter = root.iterfind(".//{http://inpn.mnhn.fr/mtd}CadreAcquisition")
af_list = []
for af in af_iter:
af_list.append(parse_acquisition_framework(af))
return af_list
return parse_acquisition_frameworks_xml(xml)

def _get_ds_xml(self):
return self._get_xml(self.ds_path)

def get_ds_list(self):
def get_ds_list(self) -> list:
xml = self._get_ds_xml()
return parse_jdd_xml(xml)

Expand Down Expand Up @@ -119,10 +119,7 @@ def get_list_af_for_user(self):
warning_message = f"""{warning_message} > Probably no acquisition framework found for the user with ID '{self.id_role}'"""
logger.warning(warning_message)
return []
_xml_parser = etree.XMLParser(ns_clean=True, recover=True, encoding="utf-8")
root = etree.fromstring(xml, parser=_xml_parser)
af_iter = root.findall(".//{http://inpn.mnhn.fr/mtd}CadreAcquisition")
af_list = [parse_acquisition_framework(af) for af in af_iter]
af_list = parse_acquisition_frameworks_xml(xml)
return af_list

def get_single_af(self, af_uuid):
Expand All @@ -142,7 +139,7 @@ def get_single_af(self, af_uuid):
url = urljoin(self.api_endpoint, self.single_af_path)
url = url.format(ID_AF=af_uuid)
xml = self._get_xml_by_url(url)
return parse_acquisition_framwork_xml(xml)
return parse_single_acquisition_framework_xml(xml)


class INPNCAS:
Expand Down Expand Up @@ -206,12 +203,22 @@ def process_af_and_ds(af_list, ds_list, id_role=None):
add_unexisting_digitizer(af["id_digitizer"] if not id_role else id_role)
user_add_total_time += time.time() - start_add_user_time
af = sync_af(af)
# TODO: choose whether or not to commit retrieval of the AF before association of actors
# and possibly retrieve an AF without any actor associated to it
# Commit here to retrieve the AF even if the association of actors that follows is to fail
db.session.commit()
# If the AF has not been retrieved, associated actors cannot be retrieved either
# and thus we continue to the next AF
if not af:
continue
associate_actors(
actors,
CorAcquisitionFrameworkActor,
"id_acquisition_framework",
af.id_acquisition_framework,
af.unique_acquisition_framework_id,
)
# TODO: check the following TODO:
# TODO: remove actors removed from MTD
db.session.commit()
logger.debug("MTD - PROCESS DS LIST")
Expand All @@ -227,7 +234,13 @@ def process_af_and_ds(af_list, ds_list, id_role=None):
user_add_total_time += time.time() - start_add_user_time
ds = sync_ds(ds, list_cd_nomenclature)
if ds is not None:
associate_actors(actors, CorDatasetActor, "id_dataset", ds.id_dataset)
associate_actors(
actors,
CorDatasetActor,
"id_dataset",
ds.id_dataset,
ds.unique_dataset_id,
)

user_add_total_time = round(user_add_total_time, 2)
db.session.commit()
Expand All @@ -253,9 +266,12 @@ def sync_af_and_ds_by_user(id_role, id_af=None):
"""
Method to trigger MTD sync on user authentication.

Args:
id_role (int): The ID of the role (group or user).
id_af (str, optional): The ID of the AF (Acquisition Framework). Defaults to None.
Parameters
-----------
id_role : int
The ID of the role (group or user).
id_af : str, optional
The ID of an AF (Acquisition Framework).
"""

logger.info("MTD - SYNC USER : START")
Expand All @@ -270,19 +286,24 @@ def sync_af_and_ds_by_user(id_role, id_af=None):
ds_list = mtd_api.get_ds_user_list()

if not id_af:
# Get the unique UUIDs of the acquisition frameworks for the user
set_user_af_uuids = {ds["uuid_acquisition_framework"] for ds in ds_list}
user_af_uuids = list(set_user_af_uuids)

# TODO - voir avec INPN pourquoi les AF par user ne sont pas dans l'appel global des AF
# Ce code ne fonctionne pas pour cette raison -> AF manquants
# af_list = mtd_api.get_af_list()
# af_list = [af for af in af_list if af["unique_acquisition_framework_id"] in user_af_uuids]
# TODO: check the following commented section
# - Choose how to get the list of AF for the user, that will be retrieved
# # Get the unique UUIDs of the acquisition frameworks for the user
# set_user_af_uuids = {ds["uuid_acquisition_framework"] for ds in ds_list}
# user_af_uuids = list(set_user_af_uuids)
#
# # TODO - voir avec INPN pourquoi les AF par user ne sont pas dans l'appel global des AF
# # Ce code ne fonctionne pas pour cette raison -> AF manquants
# # af_list = mtd_api.get_af_list()
# # af_list = [af for af in af_list if af["unique_acquisition_framework_id"] in user_af_uuids]

# Get the list of acquisition frameworks for the user
# call INPN API for each AF to retrieve info
# TODO: check if there is any AF that is retrieved while not being associated to the current instance
# this may theoretically happen as AF from the XML file are not yet filtered with the instance ID
af_list = mtd_api.get_list_af_for_user()
else:
# TODO: Check the following TODO
# TODO: handle case where the AF ; corresponding to the provided `id_af` ; does not exist yet in the database
# this case should not happend from a user action because the only case where `id_af` is provided is for when the user click to unroll an AF in the module Metadata, in which case the AF already exists in the database.
# It would still be better to handle case where the AF does not exist in the database, and to first retrieve the AF from 'INPN Métadonnées' in this case
Expand All @@ -293,6 +314,10 @@ def sync_af_and_ds_by_user(id_role, id_af=None):
af_list = [mtd_api.get_single_af(uuid_af)]

# Filter the datasets based on the specified UUID
# TODO: check if there could be a difference for the AF between :
# - information displayed in the frontend page with the list of metadata
# - information retrieved from INPN MTD sync and committed to the database meanwhile
# If so, determine whether it is problematic ; and frontend content should be updated accordingly ; or not
ds_list = [ds for ds in ds_list if ds["uuid_acquisition_framework"] == uuid_af]

# Process the acquisition frameworks and datasets
Expand Down
Loading
Loading