Skip to content

Commit

Permalink
Merge pull request #18 from coltonbh/feature-terachem-optimizations
Browse files Browse the repository at this point in the history
  • Loading branch information
coltonbh authored Sep 13, 2024
2 parents ff80a44 + b26e0a4 commit 314394b
Show file tree
Hide file tree
Showing 12 changed files with 688 additions and 19 deletions.
4 changes: 4 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ repos:
- id: mypy
additional_dependencies:
[tokenize-rt==3.2.0, pydantic>=1.0.0, types-paramiko, types-toml, qcio>=0.11.8]
- repo: https://github.com/crate-ci/typos
rev: v1.24.5
hooks:
- id: typos
- repo: local
hooks:
- id: tests
Expand Down
2 changes: 2 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
"maxiter",
"natom",
"natoms",
"ndarray",
"nocuda",
"optim",
"pathconf",
"psutil",
"qcel",
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

## [unreleased]

### Added

- `parse_optimization_dir(...) -> OptimizationResults` for TeraChem.

## [0.6.2] - 2024-08-13

### Added
Expand Down
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,9 @@ init_forbid_extra = true
init_typed = true
warn_required_dynamic_aliases = true
warn_untyped_fields = true

[tool.typos]
# Exclude specific files or directories
files.extend-exclude = [
"tests/data/**", # Single file
]
17 changes: 16 additions & 1 deletion qcparse/encoders/terachem.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,13 @@
from qcparse.exceptions import EncoderError
from qcparse.models import NativeInput

SUPPORTED_CALCTYPES = {CalcType.energy, CalcType.gradient, CalcType.hessian}
SUPPORTED_CALCTYPES = {
CalcType.energy,
CalcType.gradient,
CalcType.hessian,
CalcType.optimization,
CalcType.transition_state,
}
XYZ_FILENAME = "geometry.xyz"
PADDING = 20 # padding between keyword and value in tc.in

Expand All @@ -21,6 +27,15 @@ def encode(inp_obj: ProgramInput) -> NativeInput:
# calctype
if inp_obj.calctype.value == CalcType.hessian:
calctype = "frequencies"
elif inp_obj.calctype.value == CalcType.optimization:
calctype = "minimize"
if not inp_obj.keywords.get("new_minimizer", "no") == "yes":
raise EncoderError(
"Only the new_minimizer is supported for optimizations. Add "
"'new_minimizer': 'yes' to the keywords."
)
elif inp_obj.calctype.value == CalcType.transition_state:
calctype = "ts"
else:
calctype = inp_obj.calctype.value

Expand Down
7 changes: 4 additions & 3 deletions qcparse/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import warnings
from importlib import import_module
from pathlib import Path
from typing import List, Union
from typing import List, Optional, Union

from qcio import ProgramInput, SinglePointResults
from qcio import CalcType, ProgramInput, SinglePointResults

from .exceptions import EncoderError, MatchNotFoundError, ParserError
from .models import NativeInput, ParserSpec, registry, single_point_results_namespace
Expand All @@ -20,6 +20,7 @@ def parse(
data_or_path: Union[str, bytes, Path],
program: str,
filetype: str = "stdout",
calctype: Optional[CalcType] = None,
) -> SinglePointResults:
"""Parse a file using the parsers registered for the given program.
Expand Down Expand Up @@ -53,7 +54,7 @@ def parse(

# Get the calctype if filetype is 'stdout'
if filetype == "stdout":
calctype = parsers.parse_calctype(file_content)
calctype = calctype if calctype else parsers.parse_calctype(file_content)

# Get all the parsers for the program, filetype, and calctype
parser_specs: List[ParserSpec] = registry.get_parsers(program, filetype, calctype)
Expand Down
130 changes: 116 additions & 14 deletions qcparse/parsers/terachem.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,18 @@
"""Parsers for TeraChem output files."""

import re

from qcio import CalcType
from pathlib import Path
from typing import List, Optional, Union

from qcio import (
CalcType,
OptimizationResults,
ProgramInput,
ProgramOutput,
Provenance,
SinglePointResults,
Structure,
)

from qcparse.exceptions import MatchNotFoundError
from qcparse.models import FileType, ParsedDataCollector
Expand Down Expand Up @@ -38,21 +48,51 @@ def parse_energy(string: str, data_collector: ParsedDataCollector):
data_collector.energy = float(regex_search(regex, string).group(1))


@parser(only=[CalcType.gradient, CalcType.hessian])
def parse_gradient(string: str, data_collector: ParsedDataCollector):
"""Parse gradient from TeraChem stdout."""
def parse_gradients(string: str, all: bool = True) -> List[List[List[float]]]:
"""Parse gradients from TeraChem stdout.
Args:
string: The contents of the TeraChem stdout file.
all: If True, return all gradients. If False, return only the first gradient.
Returns:
A list of gradients. Each gradient is a list of 3-element lists, where each
3-element list is a gradient for an atom.
"""
# This will match all floats after the dE/dX dE/dY dE/dZ header and stop at the
# terminating ---- line
regex = r"(?<=dE\/dX\s{12}dE\/dY\s{12}dE\/dZ\n)[\d\.\-\s]+(?=\n-{2,})"
gradient_string = regex_search(regex, string).group()
# terminating -- or -= line that follows gradients or optimizations.
regex = r"(?<=dE\/dX\s{12}dE\/dY\s{12}dE\/dZ\n)[\d\.\-\s]+(?=\n(?:--|-=))"

if all is True:
match: Optional[Union[List, re.Match]] = re.findall(regex, string)
else:
match = re.search(regex, string)

if not match:
raise MatchNotFoundError(regex, string)

grad_strings: List[str] = match if all is True else [match.group()] # type: ignore

gradients = []

# split string and cast to floats
values = [float(val) for val in gradient_string.split()]
for grad_string in grad_strings:
# split string and cast to floats
values = [float(val) for val in grad_string.split()]

# arrange into N x 3 gradient
gradient = []
for i in range(0, len(values), 3):
gradient.append(values[i : i + 3])
# arrange into N x 3 gradient
gradient = []
for i in range(0, len(values), 3):
gradient.append(values[i : i + 3])

gradients.append(gradient)

return gradients


@parser(only=[CalcType.gradient, CalcType.hessian])
def parse_gradient(string: str, data_collector: ParsedDataCollector):
"""Parse first gradient from TeraChem stdout."""
gradient = parse_gradients(string, all=False)[0]

data_collector.gradient = gradient

Expand Down Expand Up @@ -137,3 +177,65 @@ def calculation_succeeded(string: str) -> bool:
# If any match for a failure regex is found, the calculation failed
return True
return False


def parse_optimization_dir(
directory: Union[Path, str],
stdout: str,
*,
inp_obj: ProgramInput,
) -> OptimizationResults:
"""Parse the output directory of a TeraChem optimization calculation.
Args:
directory: Path to the directory containing the TeraChem output files.
stdout: The contents of the TeraChem stdout file.
inp_obj: The input object used for the calculation.
Returns:
OptimizationResults object
"""
directory = Path(directory)

# Parse the structures
structures = Structure.open(directory / "optim.xyz")
assert isinstance(structures, list), "Expected multiple structures in optim.xyz"

# Parse Values
from qcparse import parse

# Parse all the values from the stdout file
spr = parse(stdout, "terachem", "stdout", CalcType.energy)

gradients = parse_gradients(stdout)
program_version = parse_version_string(stdout)

# Create the trajectory
trajectory: List[ProgramOutput] = [
ProgramOutput(
input_data=ProgramInput(
calctype=CalcType.gradient,
structure=structure,
model=inp_obj.model,
keywords=inp_obj.keywords,
),
results=SinglePointResults(
**{
**spr.model_dump(),
# TeraChem places the energy as the first comment in the xyz file
"energy": structure.extras[Structure._xyz_comment_key][0],
# # Will be coerced by Pydantic to np.ndarray
"gradient": gradient, # type: ignore
}
),
success=True,
provenance=Provenance(
program="terachem",
program_version=program_version,
scratch_dir=directory.parent,
),
)
for structure, gradient in zip(structures, gradients)
]

return OptimizationResults(trajectory=trajectory)
23 changes: 23 additions & 0 deletions tests/data/gradients.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,26 @@
[4.973e-06, -3.1333e-06, -7.764e-07],
[6.7704e-06, 2.1271e-06, -3.2742e-06],
]

water_opt = [
[
[0.0015991486, 0.0011623983, -0.0008220843],
[-0.0033943838, 0.0083169673, 0.0080418029],
[0.0017952356, -0.0094793661, -0.0072197183],
],
[
[0.0007735883, 0.0005355768, -0.0003640441],
[0.0001656072, -0.0021072255, -0.0013624922],
[-0.0009391978, 0.0015716465, 0.0017265376],
],
[
[0.0001367065, 7.05798e-05, -3.25781e-05],
[-5.57745e-05, -3.29821e-05, 4.5913e-05],
[-8.09322e-05, -3.7596e-05, -1.33348e-05],
],
[
[3.50513e-05, -5.6371e-06, 1.96736e-05],
[-4.546e-07, -7.6209e-06, 8.1239e-06],
[-3.4594e-05, 1.32576e-05, -2.78001e-05],
],
]
20 changes: 20 additions & 0 deletions tests/data/terachem_opt/optim.xyz
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
3
-7.6408224649875081e+01 frame 0 xyz file generated by TeraChem
O 0.0119228928 0.0093402642 -0.0064820105
H 0.2650769983 0.9093318768 0.2741564484
H 0.6072002355 -0.2640720907 -0.7301744291
3
-7.6408928705141307e+01 frame 1 xyz file generated by TeraChem
O -0.0028251031 -0.0013840791 0.0011984340
H 0.2798216509 0.8896108556 0.2484225203
H 0.6072035665 -0.2336267117 -0.7121209539
3
-7.6408947912944399e+01 frame 2 xyz file generated by TeraChem
O -0.0014561937 -0.0002263064 0.0002998534
H 0.2776781764 0.8928649961 0.2515321133
H 0.6079781950 -0.2380385656 -0.7143320023
3
-7.6408948109186298e+01 frame 3 xyz file generated by TeraChem
O -0.0017239244 -0.0002612159 0.0002482950
H 0.2774292998 0.8930138681 0.2510061731
H 0.6084948100 -0.2381525793 -0.7137545065
Loading

0 comments on commit 314394b

Please sign in to comment.