From 586fc6d3aae7390ee26e74cc065e383234b4edb8 Mon Sep 17 00:00:00 2001 From: JB Lovland Date: Wed, 10 Jan 2024 13:41:01 +0100 Subject: [PATCH] Simplify export_file_compute_checksum_md5 function --- src/fmu/dataio/_metadata.py | 18 +++++++++--------- src/fmu/dataio/_utils.py | 21 ++++----------------- src/fmu/dataio/dataio.py | 15 +++++---------- 3 files changed, 18 insertions(+), 36 deletions(-) diff --git a/src/fmu/dataio/_metadata.py b/src/fmu/dataio/_metadata.py index 39857ca84..709f4b3c4 100644 --- a/src/fmu/dataio/_metadata.py +++ b/src/fmu/dataio/_metadata.py @@ -12,6 +12,7 @@ from dataclasses import dataclass, field from datetime import timezone from pathlib import Path +from tempfile import NamedTemporaryFile from typing import Any, Final from warnings import warn @@ -305,15 +306,14 @@ def _populate_meta_file(self) -> None: self.meta_file["absolute_path_symlink"] = fdata.absolute_path_symlink if self.compute_md5: - logger.info("Compute MD5 sum for tmp file...") - _, self.meta_file["checksum_md5"] = export_file_compute_checksum_md5( - self.obj, - "tmp", # type: ignore - # tmp = true given, this arg is not needed. - self.objdata.extension, - tmp=True, - flag=self.dataio._usefmtflag, - ) + with NamedTemporaryFile(buffering=0) as tf: + logger.info("Compute MD5 sum for tmp file...: %s", tf.name) + self.meta_file["checksum_md5"] = export_file_compute_checksum_md5( + self.obj, + Path(tf.name), + self.objdata.extension, + flag=self.dataio._usefmtflag, + ) else: logger.info("Do not compute MD5 sum at this stage!") self.meta_file["checksum_md5"] = None diff --git a/src/fmu/dataio/_utils.py b/src/fmu/dataio/_utils.py index 883147c7c..81fdd4642 100644 --- a/src/fmu/dataio/_utils.py +++ b/src/fmu/dataio/_utils.py @@ -7,7 +7,6 @@ import logging import os import shutil -import tempfile import uuid import warnings from copy import deepcopy @@ -174,22 +173,10 @@ def export_file_compute_checksum_md5( filename: Path, extension: str, flag: str | None = None, - tmp: bool = False, -) -> tuple[Path | None, str]: - """Export and compute checksum, with possibility to use a tmp file.""" - - usefile: Path | None = filename - if tmp: - tmpdir = tempfile.TemporaryDirectory() - usefile = Path(tmpdir.name) / "tmpfile" - - assert usefile is not None - export_file(obj, usefile, extension, flag=flag) - checksum = md5sum(usefile) - if tmp: - tmpdir.cleanup() - usefile = None - return usefile, checksum +) -> str: + """Export and compute checksum""" + export_file(obj, filename, extension, flag=flag) + return md5sum(filename) def create_symlink(source: str, target: str) -> None: diff --git a/src/fmu/dataio/dataio.py b/src/fmu/dataio/dataio.py index 00f19d2af..55b323968 100644 --- a/src/fmu/dataio/dataio.py +++ b/src/fmu/dataio/dataio.py @@ -906,17 +906,14 @@ def export( obj = self._check_obj_if_file(obj) logger.info("Export to file and compute MD5 sum, using flag: <%s>", useflag) - toutfile, md5 = export_file_compute_checksum_md5( + # inject md5 checksum in metadata + metadata["file"]["checksum_md5"] = export_file_compute_checksum_md5( obj, outfile, outfile.suffix, flag=useflag, # type: ignore # BUG(?): Looks buggy, if flag is bool export_file will blow up. ) - assert toutfile is not None - outfile = toutfile - # inject md5 checksum in metadata - metadata["file"]["checksum_md5"] = md5 logger.info("Actual file is: %s", outfile) if self._config_is_valid: @@ -1511,12 +1508,10 @@ def export(self, obj: object, **kwargs: object) -> str: metafile = outfile.parent / ("." + str(outfile.name) + ".yml") logger.info("Export to file and compute MD5 sum") - toutfile, md5 = export_file_compute_checksum_md5(obj, outfile, outfile.suffix) - assert toutfile is not None - outfile = Path(toutfile) - # inject the computed md5 checksum in metadata - metadata["file"]["checksum_md5"] = md5 + metadata["file"]["checksum_md5"] = export_file_compute_checksum_md5( + obj, outfile, outfile.suffix + ) export_metadata_file(metafile, metadata, savefmt=self.meta_format) logger.info("Actual file is: %s", outfile)