Skip to content

Commit

Permalink
ENH: Stop generating metadata when config is not valid
Browse files Browse the repository at this point in the history
  • Loading branch information
tnatt committed Sep 23, 2024
1 parent 4787c91 commit e83d1fb
Show file tree
Hide file tree
Showing 7 changed files with 161 additions and 54 deletions.
16 changes: 6 additions & 10 deletions src/fmu/dataio/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,9 @@ def _get_meta_fmu(fmudata: FmuProvider) -> schema.InternalFMU | None:


def _get_meta_access(dataio: ExportData) -> fields.SsdlAccess:
assert isinstance(dataio.config, GlobalConfiguration)
return fields.SsdlAccess(
asset=(
dataio.config.access.asset
if isinstance(dataio.config, GlobalConfiguration)
else fields.Asset(name="")
),
asset=dataio.config.access.asset,
classification=dataio._classification,
ssdl=fields.Ssdl(
access_level=dataio._classification,
Expand Down Expand Up @@ -104,6 +101,9 @@ def generate_export_metadata(
"""

if not isinstance(dataio.config, GlobalConfiguration):
raise ValueError("Can't produce metadata when the global config is invalid")

objdata = objectdata_provider_factory(obj, dataio)

return schema.InternalObjectMetadata(
Expand All @@ -112,11 +112,7 @@ def generate_export_metadata(
source=SOURCE,
class_=objdata.classname,
fmu=_get_meta_fmu(fmudata) if fmudata else None,
masterdata=(
dataio.config.masterdata
if isinstance(dataio.config, GlobalConfiguration)
else None
),
masterdata=dataio.config.masterdata,
access=_get_meta_access(dataio),
data=objdata.get_metadata(),
file=_get_meta_filedata(dataio, obj, objdata, fmudata, compute_md5),
Expand Down
4 changes: 2 additions & 2 deletions src/fmu/dataio/_model/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ class InternalObjectMetadata(JsonSchemaMetadata):
enums.FMUClass.dictionary,
] = Field(alias="class")
fmu: Optional[InternalFMU]
masterdata: Optional[fields.Masterdata]
access: Optional[fields.SsdlAccess]
masterdata: fields.Masterdata
access: fields.SsdlAccess
data: Union[data.AnyData, InternalAnyData]
file: fields.File
display: fields.Display
Expand Down
3 changes: 3 additions & 0 deletions src/fmu/dataio/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ def export_file(
) -> str:
"""Export a valid object to file"""

# create output folder if not existing
filename.parent.mkdir(parents=True, exist_ok=True)

if filename.suffix == ".gri" and isinstance(obj, xtgeo.RegularSurface):
obj.to_file(filename, fformat="irap_binary")
elif filename.suffix == ".csv" and isinstance(obj, (xtgeo.Polygons, xtgeo.Points)):
Expand Down
62 changes: 45 additions & 17 deletions src/fmu/dataio/dataio.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@
from .aggregation import AggregatedData
from .case import CreateCaseMetadata
from .preprocessed import ExportPreprocessedData
from .providers._filedata import FileDataProvider
from .providers._fmu import FmuProvider, get_fmu_context_from_environment
from .providers.objectdata._provider import objectdata_provider_factory

# DATAIO_EXAMPLES: Final = dataio_examples()
INSIDE_RMS: Final = detect_inside_rms()
Expand Down Expand Up @@ -728,6 +730,16 @@ def _update_fmt_flag(self) -> None:

def _update_check_settings(self, newsettings: dict) -> None:
"""Update instance settings (properties) from other routines."""
# if no newsettings (kwargs) this rutine is not needed
if not newsettings:
return

warnings.warn(
"In the future it will not be possible to enter following arguments "
f"inside the export() / generate_metadata() methods: {list(newsettings)}. "
"Please move them up to initialization of the ExportData instance.",
FutureWarning,
)
logger.info("Try new settings %s", newsettings)

# derive legal input from dataclass signature
Expand Down Expand Up @@ -801,6 +813,25 @@ def _get_fmu_provider(self) -> FmuProvider:
workflow=self.workflow,
)

def _export_without_metadata(self, obj: types.Inferrable) -> str:
"""
Export the object without a metadata file. The absolute export path
is found using the FileDataProvider directly.
A string with full path to the exported item is returned.
"""
self._update_fmt_flag()

fmudata = self._get_fmu_provider() if self._fmurun else None

filemeta = FileDataProvider(
dataio=self,
objdata=objectdata_provider_factory(obj, self),
runpath=fmudata.get_runpath() if fmudata else None,
).get_metadata()

assert filemeta.absolute_path is not None # for mypy
return export_file(obj, filename=filemeta.absolute_path, flag=self._usefmtflag)

# ==================================================================================
# Public methods:
# ==================================================================================
Expand Down Expand Up @@ -837,14 +868,11 @@ def generate_metadata(
logger.info("Generate metadata...")
logger.info("KW args %s", kwargs)

if kwargs:
warnings.warn(
"In the future it will not be possible to enter following arguments "
f"inside the export() / generate_metadata() methods: {list(kwargs)}. "
"Please move them up to initialization of the ExportData instance.",
FutureWarning,
)
self._update_check_settings(kwargs)
if not isinstance(self.config, GlobalConfiguration):
warnings.warn("Can't produce metadata when the global config is invalid")
return {}

self._update_check_settings(kwargs)

if isinstance(obj, (str, Path)):
if self.casepath is None:
Expand Down Expand Up @@ -899,21 +927,21 @@ def export(
is_observation=self.is_observation,
).export(obj)

metadata = self.generate_metadata(obj, compute_md5=True, **kwargs)
logger.info("Object type is: %s", type(obj))

# should only export object if config is not valid
if not isinstance(self.config, GlobalConfiguration):
warnings.warn("Data will be exported, but without metadata.", UserWarning)
self._update_check_settings(kwargs)
return self._export_without_metadata(obj)

metadata = self.generate_metadata(obj, compute_md5=True, **kwargs)
outfile = Path(metadata["file"]["absolute_path"])
# create output folders if they don't exist
outfile.parent.mkdir(parents=True, exist_ok=True)
metafile = outfile.parent / f".{outfile.name}.yml"

export_file(obj, outfile, flag=self._usefmtflag)
logger.info("Actual file is: %s", outfile)

if isinstance(self.config, GlobalConfiguration):
export_metadata_file(metafile, metadata, savefmt=self.meta_format)
logger.info("Metadata file is: %s", metafile)
else:
warnings.warn("Data will be exported, but without metadata.", UserWarning)

export_metadata_file(metafile, metadata, savefmt=self.meta_format)
logger.info("Metadata file is: %s", metafile)
return str(outfile)
111 changes: 98 additions & 13 deletions tests/test_units/test_dataio.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ def test_missing_or_wrong_config_exports_with_warning(monkeypatch, tmp_path, reg
with pytest.warns(UserWarning, match="The global config"):
edata = ExportData(config={}, content="depth", name="mysurface")

meta = edata.generate_metadata(regsurf)
with pytest.warns(UserWarning, match="Can't produce metadata "):
meta = edata.generate_metadata(regsurf)
assert "masterdata" not in meta

# check that obj is created but no metadata is found
Expand All @@ -60,6 +61,93 @@ def test_missing_or_wrong_config_exports_with_warning(monkeypatch, tmp_path, reg
read_metadata(out)


def test_wrong_config_exports_correctly_ouside_fmu(
monkeypatch, tmp_path, globalconfig1, regsurf
):
"""
In case a config is invalid, objects are exported without metadata.
Test that the export path is correct and equal one with valid config,
outside an fmu run.
"""

monkeypatch.chdir(tmp_path)
name = "mysurface"

with pytest.warns(UserWarning, match="The global config"), pytest.warns(
UserWarning, match="without metadata"
):
objpath_cfg_invalid = ExportData(
config={},
content="depth",
name=name,
).export(regsurf)

objpath_cfg_valid = ExportData(
config=globalconfig1,
content="depth",
name=name,
).export(regsurf)

assert Path(objpath_cfg_invalid) == tmp_path / f"share/results/maps/{name}.gri"
assert Path(objpath_cfg_invalid).exists()
assert Path(objpath_cfg_valid).exists()
assert objpath_cfg_invalid == objpath_cfg_valid

# test that it works with deprecated pattern also
with pytest.warns(FutureWarning):
objpath_cfg_valid = ExportData(config=globalconfig1).export(
regsurf,
content="depth",
name=name,
)
assert objpath_cfg_invalid == objpath_cfg_valid


def test_wrong_config_exports_correctly_in_fmu(
monkeypatch, fmurun_w_casemetadata, globalconfig1, regsurf
):
"""
In case a config is invalid, objects are exported without metadata.
Test that the export path is correct and equal to exports with valid config,
inside an fmu run.
"""

monkeypatch.chdir(fmurun_w_casemetadata)
name = "mysurface"

with pytest.warns(UserWarning, match="The global config"), pytest.warns(
UserWarning, match="without metadata"
):
objpath_cfg_invalid = ExportData(
config={},
content="depth",
name=name,
).export(regsurf)

objpath_cfg_valid = ExportData(
config=globalconfig1,
content="depth",
name=name,
).export(regsurf)

assert (
Path(objpath_cfg_invalid)
== fmurun_w_casemetadata / f"share/results/maps/{name}.gri"
)
assert Path(objpath_cfg_invalid).exists()
assert Path(objpath_cfg_valid).exists()
assert objpath_cfg_invalid == objpath_cfg_valid

# test that it works with deprecated pattern also
with pytest.warns(FutureWarning):
objpath_cfg_valid = ExportData(config=globalconfig1).export(
regsurf,
content="depth",
name=name,
)
assert objpath_cfg_invalid == objpath_cfg_valid


def test_config_miss_required_fields(monkeypatch, tmp_path, globalconfig1, regsurf):
"""Global config exists but missing critical data; export file but skip metadata."""

Expand Down Expand Up @@ -156,7 +244,7 @@ def test_update_check_settings_shall_fail(globalconfig1):

newsettings = {"invalidkey": "some"}
some = ExportData(config=globalconfig1, content="depth")
with pytest.raises(KeyError):
with pytest.warns(FutureWarning), pytest.raises(KeyError):
some._update_check_settings(newsettings)


Expand Down Expand Up @@ -282,12 +370,6 @@ def test_classification(globalconfig1, regsurf):
mymeta = exp.generate_metadata(regsurf)
assert mymeta["access"]["classification"] == "restricted"

# verify that classification is defaulted to internal
with pytest.warns(UserWarning):
exp = ExportData(config={}, content="depth")
mymeta = exp.generate_metadata(regsurf)
assert mymeta["access"]["classification"] == "internal"


def test_rep_include(globalconfig1, regsurf):
"""Test that 'classification' is set correctly."""
Expand All @@ -308,16 +390,19 @@ def test_rep_include(globalconfig1, regsurf):
mymeta = exp.generate_metadata(regsurf)
assert mymeta["access"]["ssdl"]["rep_include"] is True

# test that rep_include is taken from config if not provided
# test that rep_include is defaulted to false if not provided
exp = ExportData(config=globalconfig1, content="depth")
mymeta = exp.generate_metadata(regsurf)
assert mymeta["access"]["ssdl"]["rep_include"] is False

# test that rep_include is defaulted False
with pytest.warns(UserWarning):
exp = ExportData(config={}, content="depth")
# add ssdl.rep_include to the config
config = deepcopy(globalconfig1)
config["access"]["ssdl"] = {"rep_include": True}

# test that rep_include can be read from config
exp = ExportData(config=config, content="depth")
mymeta = exp.generate_metadata(regsurf)
assert mymeta["access"]["ssdl"]["rep_include"] is False
assert mymeta["access"]["ssdl"]["rep_include"] is True


def test_unit_is_none(globalconfig1, regsurf):
Expand Down
14 changes: 6 additions & 8 deletions tests/test_units/test_metadata_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,8 @@ def test_metadata_populate_masterdata_is_empty(globalconfig1, regsurf):

assert not some.config

mymeta = generate_export_metadata(regsurf, some)
assert "masterdata" not in mymeta
with pytest.raises(ValueError, match="Can't produce metadata"):
generate_export_metadata(regsurf, some)


def test_metadata_populate_masterdata_is_present_ok(edataobj1, edataobj2, regsurf):
Expand All @@ -207,9 +207,8 @@ def test_metadata_populate_access_miss_cfg_access(globalconfig1, regsurf):
edata = dio.ExportData(config=cfg1_edited, content="depth")
assert not edata.config

mymeta = generate_export_metadata(regsurf, edata)
# check that the default "internal" is used
assert mymeta.access.classification == "internal"
with pytest.raises(ValueError, match="Can't produce metadata"):
generate_export_metadata(regsurf, edata)


def test_metadata_populate_access_ok_config(edataobj2, regsurf):
Expand Down Expand Up @@ -283,9 +282,8 @@ def test_metadata_populate_wrong_config(globalconfig1, regsurf):

assert not edata.config

# use default 'internal' if wrong in config
meta = generate_export_metadata(regsurf, edata)
assert meta.access.classification == "internal"
with pytest.raises(ValueError, match="Can't produce metadata"):
generate_export_metadata(regsurf, edata)


def test_metadata_populate_wrong_argument(globalconfig1):
Expand Down
5 changes: 1 addition & 4 deletions tests/test_units/test_preprocessed.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,10 +317,7 @@ def test_export_preprocessed_file_exportdata_casepath_on_export(
edata = dataio.ExportData(config=rmsglobalconfig, is_observation=True)

# test that error is thrown when missing casepath
# (UserWarning initially in ExportData)
with pytest.warns(UserWarning, match="case metadata"), pytest.raises(
TypeError, match="No 'casepath' argument provided"
):
with pytest.raises(TypeError, match="No 'casepath' argument provided"):
edata.export(surfacepath)

# test that export() works if casepath is provided
Expand Down

0 comments on commit e83d1fb

Please sign in to comment.