Skip to content

Commit

Permalink
Add HDF5 support for trajs and model_devis (#259)
Browse files Browse the repository at this point in the history
<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Introduced new optional arguments for improved data handling and
multitasking capabilities.
	- Added support for HDF5 formatted data in various modules.
	- Enhanced flexibility in input handling for multiple data formats.

- **Bug Fixes**
	- Improved robustness in handling validation data structures.

- **Documentation**
- Updated documentation to clarify new parameters and their intended
use.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: zjgemi <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
zjgemi and pre-commit-ci[bot] authored Sep 10, 2024
1 parent ce4ab3e commit 3501db4
Show file tree
Hide file tree
Showing 14 changed files with 116 additions and 39 deletions.
8 changes: 8 additions & 0 deletions dpgen2/entrypoint/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@ def run_diffcsp_args():
doc_gen_tasks = "Number of DiffCSP generation tasks"
doc_gen_command = "Command for DiffCSP generation"
doc_relax_group_size = "Group size for relaxation"
doc_use_hdf5 = "Use HDF5 to store trajs and model_devis"
return [
Argument(
"gen_tasks",
Expand All @@ -380,6 +381,13 @@ def run_diffcsp_args():
default=100,
doc=doc_relax_group_size,
),
Argument(
"use_hdf5",
bool,
optional=True,
default=False,
doc=doc_use_hdf5,
),
]


Expand Down
5 changes: 4 additions & 1 deletion dpgen2/entrypoint/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@
RunDPTrain,
RunLmp,
RunRelax,
RunRelaxHDF5,
SelectConfs,
)
from dpgen2.op.caly_evo_step_merge import (
Expand Down Expand Up @@ -167,6 +168,7 @@ def make_concurrent_learning_op(
upload_python_packages: Optional[List[os.PathLike]] = None,
valid_data: Optional[S3Artifact] = None,
train_optional_files: Optional[List[str]] = None,
explore_config: Optional[dict] = None,
):
if train_style in ("dp", "dp-dist"):
prep_run_train_op = PrepRunDPTrain(
Expand Down Expand Up @@ -234,7 +236,7 @@ def make_concurrent_learning_op(
"prep-run-diffcsp",
DiffCSPGen,
PrepRelax,
RunRelax,
RunRelaxHDF5 if explore_config["use_hdf5"] else RunRelax, # type: ignore
prep_config=prep_explore_config,
run_config=run_explore_config,
upload_python_packages=upload_python_packages,
Expand Down Expand Up @@ -552,6 +554,7 @@ def workflow_concurrent_learning(
upload_python_packages=upload_python_packages,
valid_data=valid_data,
train_optional_files=train_optional_files,
explore_config=explore_config,
)
scheduler = make_naive_exploration_scheduler(config)

Expand Down
7 changes: 5 additions & 2 deletions dpgen2/exploration/render/traj_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@

import dpdata
import numpy as np
from dflow.python.opio import (
HDF5Dataset,
)

from ..deviation import (
DeviManager,
Expand All @@ -30,7 +33,7 @@ class TrajRender(ABC):
@abstractmethod
def get_model_devi(
self,
files: List[Path],
files: Union[List[Path], List[HDF5Dataset]],
) -> DeviManager:
r"""Get model deviations from recording files.
Expand All @@ -48,7 +51,7 @@ def get_model_devi(
@abstractmethod
def get_confs(
self,
traj: List[Path],
traj: Union[List[Path], List[HDF5Dataset]],
id_selected: List[List[int]],
type_map: Optional[List[str]] = None,
conf_filters: Optional["ConfFilters"] = None,
Expand Down
21 changes: 17 additions & 4 deletions dpgen2/exploration/render/traj_render_lammps.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import json
from io import (
StringIO,
)
from pathlib import (
Path,
)
Expand All @@ -12,6 +15,9 @@

import dpdata
import numpy as np
from dflow.python.opio import (
HDF5Dataset,
)

from dpgen2.utils import (
setup_ele_temp,
Expand Down Expand Up @@ -42,7 +48,7 @@ def __init__(

def get_model_devi(
self,
files: List[Path],
files: Union[List[Path], List[HDF5Dataset]],
) -> DeviManager:
ntraj = len(files)

Expand All @@ -53,7 +59,10 @@ def get_model_devi(
return model_devi

def _load_one_model_devi(self, fname, model_devi):
dd = np.loadtxt(fname)
if isinstance(fname, HDF5Dataset):
dd = fname.get_data()
else:
dd = np.loadtxt(fname)
if len(np.shape(dd)) == 1: # In case model-devi.out is 1-dimensional
dd = dd.reshape((1, len(dd)))

Expand Down Expand Up @@ -92,7 +101,7 @@ def set_ele_temp(self, system, ele_temp):

def get_confs(
self,
trajs: List[Path],
trajs: Union[List[Path], List[HDF5Dataset]],
id_selected: List[List[int]],
type_map: Optional[List[str]] = None,
conf_filters: Optional["ConfFilters"] = None,
Expand All @@ -108,7 +117,11 @@ def get_confs(
ms = dpdata.MultiSystems(type_map=type_map)
for ii in range(ntraj):
if len(id_selected[ii]) > 0:
ss = dpdata.System(trajs[ii], fmt=traj_fmt, type_map=type_map)
if isinstance(trajs[ii], HDF5Dataset):
traj = StringIO(trajs[ii].get_data()) # type: ignore
else:
traj = trajs[ii]
ss = dpdata.System(traj, fmt=traj_fmt, type_map=type_map)
ss.nopbc = self.nopbc
if ele_temp:
self.set_ele_temp(ss, ele_temp[ii])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,15 @@
List,
Optional,
Tuple,
Union,
)

from dflow.python import (
FatalError,
)
from dflow.python.opio import (
HDF5Dataset,
)

from dpgen2.exploration.report import (
ExplorationReport,
Expand Down Expand Up @@ -67,7 +71,7 @@ def reached_max_iteration(self):
def plan_next_iteration(
self,
report: Optional[ExplorationReport] = None,
trajs: Optional[List[Path]] = None,
trajs: Optional[Union[List[Path], List[HDF5Dataset]]] = None,
) -> Tuple[bool, Optional[BaseExplorationTaskGroup], Optional[ConfSelector]]:
if self.complete():
raise FatalError("Cannot plan because the stage has completed.")
Expand Down
8 changes: 6 additions & 2 deletions dpgen2/exploration/scheduler/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,16 @@
List,
Optional,
Tuple,
Union,
)

import numpy as np
from dflow.python import (
FatalError,
)
from dflow.python.opio import (
HDF5Dataset,
)

from dpgen2.exploration.report import (
ExplorationReport,
Expand Down Expand Up @@ -110,7 +114,7 @@ def force_stage_complete(self):
def plan_next_iteration(
self,
report: Optional[ExplorationReport] = None,
trajs: Optional[List[Path]] = None,
trajs: Optional[Union[List[Path], List[HDF5Dataset]]] = None,
) -> Tuple[bool, Optional[ExplorationTaskGroup], Optional[ConfSelector]]:
"""
Make the plan for the next DPGEN iteration.
Expand All @@ -119,7 +123,7 @@ def plan_next_iteration(
----------
report : ExplorationReport
The exploration report of this iteration.
trajs : List[Path]
trajs : Union[List[Path], List[HDF5Dataset]]
A list of configurations generated during the exploration. May be used to generate new configurations for the next iteration.
Returns
Expand Down
11 changes: 7 additions & 4 deletions dpgen2/exploration/scheduler/stage_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
from typing import (
List,
Tuple,
Union,
)

from dflow.python.opio import (
HDF5Dataset,
)

from dpgen2.exploration.report import (
Expand Down Expand Up @@ -87,7 +92,7 @@ def get_reports(self) -> List[ExplorationReport]:
def plan_next_iteration(
self,
report: ExplorationReport,
trajs: List[Path],
trajs: Union[List[Path], List[HDF5Dataset]],
) -> Tuple[bool, ExplorationTaskGroup, ConfSelector]:
"""
Make the plan for the next iteration of the stage.
Expand All @@ -96,11 +101,9 @@ def plan_next_iteration(
Parameters
----------
hist_reports : List[ExplorationReport]
The historical exploration report of the stage. If this is the first iteration of the stage, this list is empty.
report : ExplorationReport
The exploration report of this iteration.
confs : List[Path]
trajs : Union[List[Path], List[HDF5Dataset]]
A list of configurations generated during the exploration. May be used to generate new configurations for the next iteration.
Returns
Expand Down
8 changes: 6 additions & 2 deletions dpgen2/exploration/selector/conf_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,13 @@
Optional,
Set,
Tuple,
Union,
)

import dpdata
from dflow.python.opio import (
HDF5Dataset,
)

from dpgen2.exploration.report import (
ExplorationReport,
Expand All @@ -29,8 +33,8 @@ class ConfSelector(ABC):
@abstractmethod
def select(
self,
trajs: List[Path],
model_devis: List[Path],
trajs: Union[List[Path], List[HDF5Dataset]],
model_devis: Union[List[Path], List[HDF5Dataset]],
type_map: Optional[List[str]] = None,
optional_outputs: Optional[List[Path]] = None,
) -> Tuple[List[Path], ExplorationReport]:
Expand Down
8 changes: 6 additions & 2 deletions dpgen2/exploration/selector/conf_selector_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,14 @@
List,
Optional,
Tuple,
Union,
)

import dpdata
import numpy as np
from dflow.python.opio import (
HDF5Dataset,
)

from dpgen2.exploration.render import (
TrajRender,
Expand Down Expand Up @@ -52,8 +56,8 @@ def __init__(

def select(
self,
trajs: List[Path],
model_devis: List[Path],
trajs: Union[List[Path], List[HDF5Dataset]],
model_devis: Union[List[Path], List[HDF5Dataset]],
type_map: Optional[List[str]] = None,
optional_outputs: Optional[List[Path]] = None,
) -> Tuple[List[Path], ExplorationReport]:
Expand Down
4 changes: 3 additions & 1 deletion dpgen2/flow/dpgen_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from typing import (
List,
Optional,
Union,
)

import jsonpickle
Expand All @@ -35,6 +36,7 @@
OPIO,
Artifact,
BigParameter,
HDF5Datasets,
OPIOSign,
PythonOPTemplate,
Slices,
Expand Down Expand Up @@ -91,7 +93,7 @@ def get_input_sign(cls):
{
"exploration_scheduler": BigParameter(ExplorationScheduler),
"exploration_report": BigParameter(ExplorationReport),
"trajs": Artifact(List[Path]),
"trajs": Artifact(Union[List[Path], HDF5Datasets]),
}
)

Expand Down
1 change: 1 addition & 0 deletions dpgen2/op/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
)
from .run_relax import (
RunRelax,
RunRelaxHDF5,
)
from .select_confs import (
SelectConfs,
Expand Down
Loading

0 comments on commit 3501db4

Please sign in to comment.