Skip to content

Commit

Permalink
DEP: Always compute checksum_md5 when generating metadata (#844)
Browse files Browse the repository at this point in the history
  • Loading branch information
tnatt committed Oct 9, 2024
1 parent acb2f25 commit 6e79086
Show file tree
Hide file tree
Showing 7 changed files with 29 additions and 35 deletions.
2 changes: 2 additions & 0 deletions docs/src/dataio_3_migration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,8 @@ Change to this instead 👇:
Additionally

- The ``return_symlink`` argument to ``export()`` is deprecated. It is redundant and can be removed.
- The ``compute_md5`` argument to ``generate_metadata()`` is deprecated and can be removed, as
an MD5 checksum is always computed by default.


Getting partial metadata from generate_metadata() when config is invalid
Expand Down
11 changes: 2 additions & 9 deletions schema/definitions/0.8.0/schema/fmu_results.json
Original file line number Diff line number Diff line change
Expand Up @@ -2859,18 +2859,11 @@
"title": "Absolute Path Symlink"
},
"checksum_md5": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"examples": [
"kjhsdfvsdlfk23knerknvk23"
],
"title": "Checksum Md5"
"title": "Checksum Md5",
"type": "string"
},
"relative_path": {
"examples": [
Expand Down
5 changes: 1 addition & 4 deletions src/fmu/dataio/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,13 @@ def _get_meta_filedata(
obj: types.Inferrable,
objdata: ObjectDataProvider,
fmudata: FmuProvider | None,
compute_md5: bool,
) -> fields.File:
"""Derive metadata for the file."""
return FileDataProvider(
dataio=dataio,
objdata=objdata,
runpath=fmudata.get_runpath() if fmudata else None,
obj=obj,
compute_md5=compute_md5,
).get_metadata()


Expand Down Expand Up @@ -75,7 +73,6 @@ def generate_export_metadata(
obj: types.Inferrable,
dataio: ExportData,
fmudata: FmuProvider | None = None,
compute_md5: bool = True,
) -> schema.InternalObjectMetadata:
"""
Main function to generate the full metadata
Expand Down Expand Up @@ -119,7 +116,7 @@ def generate_export_metadata(
),
access=_get_meta_access(dataio),
data=objdata.get_metadata(),
file=_get_meta_filedata(dataio, obj, objdata, fmudata, compute_md5),
file=_get_meta_filedata(dataio, obj, objdata, fmudata),
tracklog=fields.Tracklog.initialize(),
display=_get_meta_display(dataio, objdata),
preprocessed=dataio.preprocessed,
Expand Down
2 changes: 1 addition & 1 deletion src/fmu/dataio/_model/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ class File(BaseModel):
)
"""The path of a file relative to the case root."""

checksum_md5: Optional[str] = Field(examples=["kjhsdfvsdlfk23knerknvk23"])
checksum_md5: str = Field(examples=["kjhsdfvsdlfk23knerknvk23"])
"""A valid MD5 checksum of the file."""

size_bytes: Optional[int] = Field(default=None)
Expand Down
22 changes: 14 additions & 8 deletions src/fmu/dataio/dataio.py
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,7 @@ def _export_without_metadata(self, obj: types.Inferrable) -> str:
filemeta = FileDataProvider(
dataio=self,
objdata=objdata,
obj=obj,
runpath=fmudata.get_runpath() if fmudata else None,
).get_metadata()

Expand All @@ -842,17 +843,12 @@ def generate_metadata(
Args:
obj: XTGeo instance, a Pandas Dataframe instance or other supported object.
compute_md5: If True, compute a MD5 checksum for the exported file.
compute_md5: Deprecated, a MD5 checksum will always be computed.
**kwargs: Using other ExportData() input keys is now deprecated, input the
arguments when initializing the ExportData() instance instead.
Returns:
A dictionary with all metadata.
Note:
If the ``compute_md5`` key is False, the ``file.checksum_md5`` will be
empty. If true, the MD5 checksum will be generated based on export to
a temporary file, which may be time-consuming if the file is large.
"""

logger.info("Generate metadata...")
Expand All @@ -865,6 +861,14 @@ def generate_metadata(
FutureWarning,
)

if not compute_md5:
warnings.warn(
"Using the 'compute_md5=False' option to prevent an MD5 checksum "
"from being computed is now deprecated. This option has no longer "
"an effect and will be removed in the near future.",
UserWarning,
)

self._update_check_settings(kwargs)

if isinstance(obj, (str, Path)):
Expand All @@ -879,7 +883,9 @@ def generate_metadata(
fmudata = self._get_fmu_provider() if self._fmurun else None

return generate_export_metadata(
obj, self, fmudata, compute_md5=compute_md5
obj=obj,
dataio=self,
fmudata=fmudata,
).model_dump(mode="json", exclude_none=True, by_alias=True)

def export(
Expand Down Expand Up @@ -927,7 +933,7 @@ def export(
self._update_check_settings(kwargs)
return self._export_without_metadata(obj)

metadata = self.generate_metadata(obj, compute_md5=True, **kwargs)
metadata = self.generate_metadata(obj, **kwargs)
outfile = Path(metadata["file"]["absolute_path"])
metafile = outfile.parent / f".{outfile.name}.yml"

Expand Down
7 changes: 3 additions & 4 deletions src/fmu/dataio/providers/_filedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from typing import TYPE_CHECKING, Final, Optional
from typing import TYPE_CHECKING, Final

from fmu.dataio._logging import null_logger
from fmu.dataio._model import enums, fields
Expand Down Expand Up @@ -49,9 +49,8 @@ class FileDataProvider(Provider):
# input
dataio: ExportData
objdata: ObjectDataProvider
obj: types.Inferrable
runpath: Path | None = None
obj: Optional[types.Inferrable] = None
compute_md5: bool = False

@property
def name(self) -> str:
Expand Down Expand Up @@ -88,7 +87,7 @@ def get_metadata(self) -> fields.File:
return fields.File(
absolute_path=absolute_path.resolve(),
relative_path=relative_path,
checksum_md5=self._compute_md5() if self.compute_md5 else None,
checksum_md5=self._compute_md5(),
)

def _get_share_folders(self) -> Path:
Expand Down
15 changes: 6 additions & 9 deletions tests/test_units/test_filedataprovider_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,7 @@ def test_get_filestem(
edataobj1.parent = parentname
edataobj1.name = ""

fdata = FileDataProvider(
edataobj1,
objdata,
)
fdata = FileDataProvider(edataobj1, objdata, regsurf)

stem = fdata._get_filestem()
assert stem == expected
Expand Down Expand Up @@ -163,7 +160,7 @@ def test_get_filestem_shall_fail(
edataobj1.parent = parentname
edataobj1.name = ""

fdata = FileDataProvider(edataobj1, objdata)
fdata = FileDataProvider(edataobj1, objdata, regsurf)

with pytest.raises(ValueError) as msg:
_ = fdata._get_filestem()
Expand All @@ -178,7 +175,7 @@ def test_get_share_folders(regsurf, globalconfig2):
objdata = objectdata_provider_factory(regsurf, edataobj1)
objdata.name = "some"

fdata = FileDataProvider(edataobj1, objdata)
fdata = FileDataProvider(edataobj1, objdata, regsurf)
share_folders = fdata._get_share_folders()
assert isinstance(share_folders, Path)
assert share_folders == Path(f"share/results/{ExportFolder.maps.value}")
Expand All @@ -200,7 +197,7 @@ def test_get_share_folders_with_subfolder(regsurf, globalconfig2):
objdata = objectdata_provider_factory(regsurf, edataobj1)
objdata.name = "some"

fdata = FileDataProvider(edataobj1, objdata)
fdata = FileDataProvider(edataobj1, objdata, regsurf)
share_folders = fdata._get_share_folders()
assert share_folders == Path("share/results/maps/sub")

Expand Down Expand Up @@ -229,7 +226,7 @@ def test_filedata_provider(regsurf, tmp_path, globalconfig2):
objdata.time0 = datetime.strptime(t1, "%Y%m%d")
objdata.time1 = datetime.strptime(t2, "%Y%m%d")

fdata = FileDataProvider(cfg, objdata)
fdata = FileDataProvider(cfg, objdata, regsurf)
filemeta = fdata.get_metadata()

assert isinstance(filemeta, fields.File)
Expand All @@ -250,6 +247,6 @@ def test_filedata_has_nonascii_letters(regsurf, tmp_path, globalconfig2):
objdata = objectdata_provider_factory(regsurf, edataobj1)
objdata.name = "anynõme"

fdata = FileDataProvider(edataobj1, objdata)
fdata = FileDataProvider(edataobj1, objdata, regsurf)
with pytest.raises(ValueError, match="Path has non-ascii elements"):
fdata.get_metadata()

0 comments on commit 6e79086

Please sign in to comment.