diff --git a/docs/src/dataio_3_migration.rst b/docs/src/dataio_3_migration.rst index f95fa2a6a..7f0fd2bbb 100644 --- a/docs/src/dataio_3_migration.rst +++ b/docs/src/dataio_3_migration.rst @@ -195,6 +195,8 @@ Change to this instead 👇: Additionally - The ``return_symlink`` argument to ``export()`` is deprecated. It is redundant and can be removed. + - The ``compute_md5`` argument to ``generate_metadata()`` is deprecated and can be removed, as + an MD5 checksum is always computed by default. Getting partial metadata from generate_metadata() when config is invalid diff --git a/schema/definitions/0.8.0/schema/fmu_results.json b/schema/definitions/0.8.0/schema/fmu_results.json index 2836bdc83..37bf94938 100644 --- a/schema/definitions/0.8.0/schema/fmu_results.json +++ b/schema/definitions/0.8.0/schema/fmu_results.json @@ -2859,18 +2859,11 @@ "title": "Absolute Path Symlink" }, "checksum_md5": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], "examples": [ "kjhsdfvsdlfk23knerknvk23" ], - "title": "Checksum Md5" + "title": "Checksum Md5", + "type": "string" }, "relative_path": { "examples": [ diff --git a/src/fmu/dataio/_metadata.py b/src/fmu/dataio/_metadata.py index b2439e93e..1082b24d5 100644 --- a/src/fmu/dataio/_metadata.py +++ b/src/fmu/dataio/_metadata.py @@ -31,7 +31,6 @@ def _get_meta_filedata( obj: types.Inferrable, objdata: ObjectDataProvider, fmudata: FmuProvider | None, - compute_md5: bool, ) -> fields.File: """Derive metadata for the file.""" return FileDataProvider( @@ -39,7 +38,6 @@ def _get_meta_filedata( objdata=objdata, runpath=fmudata.get_runpath() if fmudata else None, obj=obj, - compute_md5=compute_md5, ).get_metadata() @@ -75,7 +73,6 @@ def generate_export_metadata( obj: types.Inferrable, dataio: ExportData, fmudata: FmuProvider | None = None, - compute_md5: bool = True, ) -> schema.InternalObjectMetadata: """ Main function to generate the full metadata @@ -119,7 +116,7 @@ def generate_export_metadata( ), access=_get_meta_access(dataio), data=objdata.get_metadata(), - file=_get_meta_filedata(dataio, obj, objdata, fmudata, compute_md5), + file=_get_meta_filedata(dataio, obj, objdata, fmudata), tracklog=fields.Tracklog.initialize(), display=_get_meta_display(dataio, objdata), preprocessed=dataio.preprocessed, diff --git a/src/fmu/dataio/_model/fields.py b/src/fmu/dataio/_model/fields.py index 3996135ad..14d4be03e 100644 --- a/src/fmu/dataio/_model/fields.py +++ b/src/fmu/dataio/_model/fields.py @@ -103,7 +103,7 @@ class File(BaseModel): ) """The path of a file relative to the case root.""" - checksum_md5: Optional[str] = Field(examples=["kjhsdfvsdlfk23knerknvk23"]) + checksum_md5: str = Field(examples=["kjhsdfvsdlfk23knerknvk23"]) """A valid MD5 checksum of the file.""" size_bytes: Optional[int] = Field(default=None) diff --git a/src/fmu/dataio/dataio.py b/src/fmu/dataio/dataio.py index 10908b544..7212d70ac 100644 --- a/src/fmu/dataio/dataio.py +++ b/src/fmu/dataio/dataio.py @@ -816,6 +816,7 @@ def _export_without_metadata(self, obj: types.Inferrable) -> str: filemeta = FileDataProvider( dataio=self, objdata=objdata, + obj=obj, runpath=fmudata.get_runpath() if fmudata else None, ).get_metadata() @@ -842,17 +843,12 @@ def generate_metadata( Args: obj: XTGeo instance, a Pandas Dataframe instance or other supported object. - compute_md5: If True, compute a MD5 checksum for the exported file. + compute_md5: Deprecated, a MD5 checksum will always be computed. **kwargs: Using other ExportData() input keys is now deprecated, input the arguments when initializing the ExportData() instance instead. Returns: A dictionary with all metadata. - - Note: - If the ``compute_md5`` key is False, the ``file.checksum_md5`` will be - empty. If true, the MD5 checksum will be generated based on export to - a temporary file, which may be time-consuming if the file is large. """ logger.info("Generate metadata...") @@ -865,6 +861,14 @@ def generate_metadata( FutureWarning, ) + if not compute_md5: + warnings.warn( + "Using the 'compute_md5=False' option to prevent an MD5 checksum " + "from being computed is now deprecated. This option has no longer " + "an effect and will be removed in the near future.", + UserWarning, + ) + self._update_check_settings(kwargs) if isinstance(obj, (str, Path)): @@ -879,7 +883,9 @@ def generate_metadata( fmudata = self._get_fmu_provider() if self._fmurun else None return generate_export_metadata( - obj, self, fmudata, compute_md5=compute_md5 + obj=obj, + dataio=self, + fmudata=fmudata, ).model_dump(mode="json", exclude_none=True, by_alias=True) def export( @@ -927,7 +933,7 @@ def export( self._update_check_settings(kwargs) return self._export_without_metadata(obj) - metadata = self.generate_metadata(obj, compute_md5=True, **kwargs) + metadata = self.generate_metadata(obj, **kwargs) outfile = Path(metadata["file"]["absolute_path"]) metafile = outfile.parent / f".{outfile.name}.yml" diff --git a/src/fmu/dataio/providers/_filedata.py b/src/fmu/dataio/providers/_filedata.py index e6e45d95b..ccaaeace7 100644 --- a/src/fmu/dataio/providers/_filedata.py +++ b/src/fmu/dataio/providers/_filedata.py @@ -11,7 +11,7 @@ from dataclasses import dataclass from enum import Enum from pathlib import Path -from typing import TYPE_CHECKING, Final, Optional +from typing import TYPE_CHECKING, Final from fmu.dataio._logging import null_logger from fmu.dataio._model import enums, fields @@ -49,9 +49,8 @@ class FileDataProvider(Provider): # input dataio: ExportData objdata: ObjectDataProvider + obj: types.Inferrable runpath: Path | None = None - obj: Optional[types.Inferrable] = None - compute_md5: bool = False @property def name(self) -> str: @@ -88,7 +87,7 @@ def get_metadata(self) -> fields.File: return fields.File( absolute_path=absolute_path.resolve(), relative_path=relative_path, - checksum_md5=self._compute_md5() if self.compute_md5 else None, + checksum_md5=self._compute_md5(), ) def _get_share_folders(self) -> Path: diff --git a/tests/test_units/test_filedataprovider_class.py b/tests/test_units/test_filedataprovider_class.py index 96cf7d725..eaee88286 100644 --- a/tests/test_units/test_filedataprovider_class.py +++ b/tests/test_units/test_filedataprovider_class.py @@ -112,10 +112,7 @@ def test_get_filestem( edataobj1.parent = parentname edataobj1.name = "" - fdata = FileDataProvider( - edataobj1, - objdata, - ) + fdata = FileDataProvider(edataobj1, objdata, regsurf) stem = fdata._get_filestem() assert stem == expected @@ -163,7 +160,7 @@ def test_get_filestem_shall_fail( edataobj1.parent = parentname edataobj1.name = "" - fdata = FileDataProvider(edataobj1, objdata) + fdata = FileDataProvider(edataobj1, objdata, regsurf) with pytest.raises(ValueError) as msg: _ = fdata._get_filestem() @@ -178,7 +175,7 @@ def test_get_share_folders(regsurf, globalconfig2): objdata = objectdata_provider_factory(regsurf, edataobj1) objdata.name = "some" - fdata = FileDataProvider(edataobj1, objdata) + fdata = FileDataProvider(edataobj1, objdata, regsurf) share_folders = fdata._get_share_folders() assert isinstance(share_folders, Path) assert share_folders == Path(f"share/results/{ExportFolder.maps.value}") @@ -200,7 +197,7 @@ def test_get_share_folders_with_subfolder(regsurf, globalconfig2): objdata = objectdata_provider_factory(regsurf, edataobj1) objdata.name = "some" - fdata = FileDataProvider(edataobj1, objdata) + fdata = FileDataProvider(edataobj1, objdata, regsurf) share_folders = fdata._get_share_folders() assert share_folders == Path("share/results/maps/sub") @@ -229,7 +226,7 @@ def test_filedata_provider(regsurf, tmp_path, globalconfig2): objdata.time0 = datetime.strptime(t1, "%Y%m%d") objdata.time1 = datetime.strptime(t2, "%Y%m%d") - fdata = FileDataProvider(cfg, objdata) + fdata = FileDataProvider(cfg, objdata, regsurf) filemeta = fdata.get_metadata() assert isinstance(filemeta, fields.File) @@ -250,6 +247,6 @@ def test_filedata_has_nonascii_letters(regsurf, tmp_path, globalconfig2): objdata = objectdata_provider_factory(regsurf, edataobj1) objdata.name = "anynõme" - fdata = FileDataProvider(edataobj1, objdata) + fdata = FileDataProvider(edataobj1, objdata, regsurf) with pytest.raises(ValueError, match="Path has non-ascii elements"): fdata.get_metadata()