support extra FP outputs; add post_command for Gaussian (#256)

## Summary by CodeRabbit - **New Features** - Introduced a new optional argument for additional output files in multiple functions, enhancing flexibility in file handling. - Added the capability to execute a post-command after the main task in the Gaussian workflow. - Enhanced output structures across various components to accommodate additional output data. - **Bug Fixes** - Improved the handling of extra output files in various methods to ensure they are properly returned and logged. - **Documentation** - Updated documentation to clarify the purpose and usage of new arguments related to extra output files and post-commands.  --------- Signed-off-by: zjgemi <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
deepmodeling · Aug 27, 2024 · 643e889 · 643e889
1 parent 899a76f
commit 643e889
Show file tree

Hide file tree

Showing 13 changed files with 83 additions and 5 deletions.
diff --git a/dpgen2/entrypoint/args.py b/dpgen2/entrypoint/args.py
@@ -459,6 +459,7 @@ def fp_args(inputs, run):
     doc_inputs_config = "Configuration for preparing vasp inputs"
     doc_run_config = "Configuration for running vasp tasks"
     doc_task_max = "Maximum number of vasp tasks for each iteration"
+    doc_extra_output_files = "Extra output file names, support wildcards"
 
     return [
         Argument(
@@ -476,6 +477,13 @@ def fp_args(inputs, run):
             doc=doc_run_config,
         ),
         Argument("task_max", int, optional=True, default=10, doc=doc_task_max),
+        Argument(
+            "extra_output_files",
+            list,
+            optional=True,
+            default=[],
+            doc=doc_extra_output_files,
+        ),
     ]
 
 

diff --git a/dpgen2/entrypoint/submit.py b/dpgen2/entrypoint/submit.py
@@ -564,6 +564,7 @@ def workflow_concurrent_learning(
 
     fp_config["inputs"] = fp_inputs
     fp_config["run"] = config["fp"]["run_config"]
+    fp_config["extra_output_files"] = config["fp"]["extra_output_files"]
     if fp_style == "deepmd":
         assert (
             "teacher_model_path" in fp_config["run"]

diff --git a/dpgen2/fp/abacus.py b/dpgen2/fp/abacus.py
@@ -166,6 +166,7 @@ def get_output_sign(cls):
             {
                 "log": Artifact(Path),
                 "labeled_data": Artifact(Path),
+                "extra_outputs": Artifact(List[Path]),
             }
         )
 
@@ -202,10 +203,15 @@ def execute(
         out_name = fp_default_out_data_name
         sys.to("deepmd/npy", workdir / out_name)
 
+        extra_outputs = []
+        for fname in ip["config"]["extra_output_files"]:
+            extra_outputs += list(workdir.glob(fname))
+
         return OPIO(
             {
                 "log": workdir / "log",
                 "labeled_data": workdir / out_name,
+                "extra_outputs": extra_outputs,
             }
         )
 

diff --git a/dpgen2/fp/cp2k.py b/dpgen2/fp/cp2k.py
@@ -126,6 +126,7 @@ def get_output_sign(cls):
             {
                 "log": Artifact(Path),
                 "labeled_data": Artifact(Path),
+                "extra_outputs": Artifact(List[Path]),
             }
         )
 
@@ -170,10 +171,15 @@ def execute(
         out_name = fp_default_out_data_name
         sys.to("deepmd/npy", workdir / out_name)
 
+        extra_outputs = []
+        for fname in ip["config"]["extra_output_files"]:
+            extra_outputs += list(workdir.glob(fname))
+
         return OPIO(
             {
                 "log": workdir / "output.log",
                 "labeled_data": workdir / out_name,
+                "extra_outputs": extra_outputs,
             }
         )
 

diff --git a/dpgen2/fp/gaussian.py b/dpgen2/fp/gaussian.py
@@ -3,6 +3,7 @@
 from typing import (
     Any,
     List,
+    Optional,
     Tuple,
 )
 
@@ -134,6 +135,7 @@ def run_task(
         self,
         command: str,
         out: str,
+        post_command: Optional[str] = None,
     ) -> Tuple[str, str]:
         r"""Defines how one FP task runs
 
@@ -170,6 +172,23 @@ def run_task(
                 )
             )
             raise TransientError("gaussian failed")
+        if post_command is not None:
+            ret, out, err = run_command(post_command, shell=True)
+            if ret != 0:
+                logging.error(
+                    "".join(
+                        (
+                            "gaussian postprocessing failed\n",
+                            "out msg: ",
+                            out,
+                            "\n",
+                            "err msg: ",
+                            err,
+                            "\n",
+                        )
+                    )
+                )
+                raise TransientError("gaussian postprocessing failed")
         # convert the output to deepmd/npy format
         sys = dpdata.LabeledSystem(gaussian_output_name, fmt="gaussian/log")
         sys.to("deepmd/npy", out_name)
@@ -187,6 +206,7 @@ def args() -> List[dargs.Argument]:
 
         doc_gaussian_cmd = "The command of Gaussian"
         doc_gaussian_out = "The output dir name of labeled data. In `deepmd/npy` format provided by `dpdata`."
+        doc_post_command = "The command after Gaussian"
         return [
             Argument(
                 "command", str, optional=True, default="g16", doc=doc_gaussian_cmd
@@ -198,4 +218,7 @@ def args() -> List[dargs.Argument]:
                 default=fp_default_out_data_name,
                 doc=doc_gaussian_out,
             ),
+            Argument(
+                "post_command", str, optional=True, default=None, doc=doc_post_command
+            ),
         ]
diff --git a/dpgen2/fp/run_fp.py b/dpgen2/fp/run_fp.py
@@ -58,6 +58,7 @@ def get_output_sign(cls):
             {
                 "log": Artifact(Path),
                 "labeled_data": Artifact(Path),
+                "extra_outputs": Artifact(List[Path]),
             }
         )
 
@@ -196,9 +197,14 @@ def execute(
                     Path(iname).symlink_to(ii)
             out_name, log_name = self.run_task(**config)
 
+        extra_outputs = []
+        for fname in ip["config"]["extra_output_files"]:
+            extra_outputs += list(work_dir.glob(fname))
+
         return OPIO(
             {
                 "log": work_dir / log_name,
                 "labeled_data": work_dir / out_name,
+                "extra_outputs": extra_outputs,
             }
         )
diff --git a/dpgen2/superop/prep_run_fp.py b/dpgen2/superop/prep_run_fp.py
@@ -70,6 +70,7 @@ def __init__(
         self._output_artifacts = {
             "logs": OutputArtifact(),
             "labeled_data": OutputArtifact(),
+            "extra_outputs": OutputArtifact(),
         }
 
         super().__init__(
@@ -170,7 +171,7 @@ def _prep_run_fp(
                 "int('{{item}}')",
                 input_parameter=["task_name"],
                 input_artifact=["task_path"],
-                output_artifact=["log", "labeled_data"],
+                output_artifact=["log", "labeled_data", "extra_outputs"],
                 **template_slice_config,
             ),
             python_packages=upload_python_packages,
@@ -200,5 +201,8 @@ def _prep_run_fp(
     prep_run_steps.outputs.artifacts["labeled_data"]._from = run_fp.outputs.artifacts[
         "labeled_data"
     ]
+    prep_run_steps.outputs.artifacts["extra_outputs"]._from = run_fp.outputs.artifacts[
+        "extra_outputs"
+    ]
 
     return prep_run_steps
diff --git a/dpgen2/utils/download_dpgen2_artifacts.py b/dpgen2/utils/download_dpgen2_artifacts.py
@@ -67,7 +67,8 @@ def add_output(
     "prep-run-fp": DownloadDefinition()
     .add_input("confs")
     .add_output("logs")
-    .add_output("labeled_data"),
+    .add_output("labeled_data")
+    .add_output("extra_outputs"),
     "collect-data": DownloadDefinition().add_output("iter_data"),
 }
 

diff --git a/tests/fp/test_abacus.py b/tests/fp/test_abacus.py
@@ -43,6 +43,7 @@ def test_abacus(self):
                 "command": "cp -r %s OUT.ABACUS && cat %s"
                 % (data_path / "OUT.ABACUS", data_path / "log"),
             },
+            "extra_output_files": [],
         }
         confs = [data_path / "sys-2"]
         type_map = ["Na"]

diff --git a/tests/fp/test_cp2k.py b/tests/fp/test_cp2k.py
@@ -48,6 +48,7 @@ def test_cp2k(self):
                 "command": "cp -r %s output.log && cat %s"
                 % (data_path / "output.log", data_path / "output.log"),
             },
+            "extra_output_files": [],
         }
         confs = [data_path / "sys-3"]
         type_map = ["Na"]

diff --git a/tests/fp/test_run_vasp.py b/tests/fp/test_run_vasp.py
@@ -81,7 +81,8 @@ def new_init(obj, foo):
                                     "command": "myvasp",
                                     "log": "foo.log",
                                     "out": "data",
-                                }
+                                },
+                                "extra_output_files": [],
                             },
                             "task_name": self.task_name,
                             "task_path": self.task_path,
@@ -128,7 +129,8 @@ def new_init(obj, foo):
                             "config": {
                                 "run": {
                                     "command": "myvasp",
-                                }
+                                },
+                                "extra_output_files": [],
                             },
                             "task_name": self.task_name,
                             "task_path": self.task_path,
@@ -163,7 +165,8 @@ def test_error(self, mocked_run):
                         "config": {
                             "run": {
                                 "command": "myvasp",
-                            }
+                            },
+                            "extra_output_files": [],
                         },
                         "task_name": self.task_name,
                         "task_path": self.task_path,

diff --git a/tests/mocked_ops.py b/tests/mocked_ops.py
@@ -521,6 +521,7 @@ def execute(
             {
                 "log": work_dir / log,
                 "labeled_data": work_dir / labeled_data,
+                "extra_outputs": [],
             }
         )
 
@@ -578,6 +579,7 @@ def execute(
             {
                 "log": work_dir / log,
                 "labeled_data": work_dir / labeled_data,
+                "extra_outputs": [],
             }
         )
 
@@ -633,6 +635,7 @@ def execute(
             {
                 "log": work_dir / log,
                 "labeled_data": work_dir / labeled_data,
+                "extra_outputs": [],
             }
         )
 

diff --git a/tests/utils/test_dl_dpgen2_arti.py b/tests/utils/test_dl_dpgen2_arti.py
@@ -144,6 +144,11 @@ def test_fp_download(self, mocked_dl):
                 path=Path("iter-000001/prep-run-fp/outputs"),
                 skip_exists=True,
             ),
+            mock.call(
+                "arti-extra_outputs",
+                path=Path("iter-000001/prep-run-fp/outputs"),
+                skip_exists=True,
+            ),
         ]
         self.assertEqual(len(mocked_dl.call_args_list), len(expected))
         for ii, jj in zip(mocked_dl.call_args_list, expected):
@@ -174,6 +179,11 @@ def test_fp_download_chkpnt(self, mocked_dl):
                 path=Path("iter-000001/prep-run-fp/outputs"),
                 skip_exists=True,
             ),
+            mock.call(
+                "arti-extra_outputs",
+                path=Path("iter-000001/prep-run-fp/outputs"),
+                skip_exists=True,
+            ),
         ]
         self.assertEqual(len(mocked_dl.call_args_list), len(expected))
         for ii, jj in zip(mocked_dl.call_args_list, expected):
@@ -200,6 +210,11 @@ def test_fp_download_chkpnt(self, mocked_dl):
                 path=Path("iter-000001/prep-run-fp/outputs"),
                 skip_exists=True,
             ),
+            mock.call(
+                "arti-extra_outputs",
+                path=Path("iter-000001/prep-run-fp/outputs"),
+                skip_exists=True,
+            ),
         ]
         self.assertEqual(len(mocked_dl.call_args_list), len(expected))
         for ii, jj in zip(mocked_dl.call_args_list, expected):