From e2c7b646babea7e453ed5a430e89d413932617d1 Mon Sep 17 00:00:00 2001 From: Colton Hicks Date: Mon, 12 Aug 2024 21:32:37 -0700 Subject: [PATCH] Added CREST encode and parser for conformer search directory parsing. --- .pre-commit-config.yaml | 2 +- .vscode/settings.json | 3 + CHANGELOG.md | 4 + CONTRIBUTING.md | 8 ++ docs/dev-decisions.md | 12 +- poetry.lock | 22 +++- pyproject.toml | 3 +- qcparse/encoders/crest.py | 83 +++++++++++++ qcparse/main.py | 3 +- qcparse/models.py | 2 +- qcparse/parsers/crest.py | 78 ++++++++++++ tests/data/crest_output/crest_conformers.xyz | 123 +++++++++++++++++++ tests/data/crest_output/crest_rotamers.xyz | 82 +++++++++++++ tests/test_crest.py | 108 +++++++++++++++- 14 files changed, 517 insertions(+), 16 deletions(-) create mode 100644 qcparse/encoders/crest.py create mode 100644 tests/data/crest_output/crest_conformers.xyz create mode 100644 tests/data/crest_output/crest_rotamers.xyz diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 185e0de..f735180 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -33,7 +33,7 @@ repos: hooks: - id: mypy additional_dependencies: - [tokenize-rt==3.2.0, pydantic>=1.0.0, types-paramiko, types-toml] + [tokenize-rt==3.2.0, pydantic>=1.0.0, types-paramiko, types-toml, qcio>=0.11.8] - repo: local hooks: - id: tests diff --git a/.vscode/settings.json b/.vscode/settings.json index 947e2a5..9e579e0 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -12,9 +12,12 @@ "natoms", "nocuda", "pathconf", + "psutil", "qcel", "qcio", "qcparse", + "rotamer", + "rotamers", "spinmult", "tcin", "tcout", diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a76c97..11f1144 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [unreleased] +### + +- `CREST` encoder and directory parser for conformer search output directories. + ## [0.6.1] - 2024-08-08 ### Added diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 94729dd..b7a156d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -29,3 +29,11 @@ See the `terachem.py` file for an overview. - The `ParsedDataCollector` object only allows setting a particular data attribute once. If a second attempt is made it raises an `AttributeError`. This provides a sanity check that multiple parsers aren't trying to write to the same field and overwriting each other. 3. `parse` looks up the parsers for the `program` in the `parser_registry`. Parsers are registered by wrapping them with the `@parser` decorator found in `qcparse.parsers.utils`. The `@parser` decorator registers a parser with the registry under the program name of the module in which it is found, verifying that the `filetype` for which it is registered is supported by the `program` by checking `SupportedFileTypes` in the parser's module. It also registers whether a parser `must_succeed` which means an exception will be raised if this value is not found when attempting to parse a file. In order for parsers to properly register they must be imported, so make sure they are hoisted into the `qcparse.parsers.__init__` file. 4. `parse` executes all parsers for the given `filetype` and converts the `ParsedDataCollector` object passed to all the parsers into a final `SinglePointResults` object. + +## Publish the package + +With all code merged to `master` and the latest code pulled down to your local machine, run: + +```sh +python scripts/release.py x.x.x +``` diff --git a/docs/dev-decisions.md b/docs/dev-decisions.md index 02dfe19..a1042f7 100644 --- a/docs/dev-decisions.md +++ b/docs/dev-decisions.md @@ -6,12 +6,10 @@ ## UPDATED DESIGN DECISION: -- I don't see a strong reason for making this package a standalone package that parses everything required for a `SinglePointOutput` object including input data, provenance data, xyz files, etc... While the original idea was to have a cli tool to run on TeraChem files, now that I've build my own data structures (`qcio`) and driver program (`qcop`), there's no reason to parse anything but `SinglePointResults` values because we should just be driving the programs with `qcop` and already have access to the input data. The code is far easier to maintain as only a results parser. The only downside would be walking in to someone else's old data and wanting to slurp it all in, but perhaps there's no reason to build for that use case now... Just go with SIMPLE and keep the code maintainable. +- I don't see a strong reason for making this package a standalone package that parses everything required for a `ProgramOutput` object including input data, provenance data, xyz files, etc... While the original idea was to have a cli tool to run on TeraChem files, now that I've build my own data structures (`qcio`) and driver program (`qcop`), there's no reason to parse anything but `SinglePointResults` values because we should just be driving the programs with `qcop` and already have access to the input data. The code is far easier to maintain as only a results parser. The only downside would be walking in to someone else's old data and wanting to slurp it all in, but perhaps there's no reason to build for that use case now... Just go with SIMPLE and keep the code maintainable. -## Publishing Checklist +## Future Features -- Update `CHANGELOG.md` -- Bump version in `pyproject.toml` -- Tag commit with a version and GitHub Actions will publish it to pypi if tag is on `master` branch. -- `git push --tags` -- `git push` +- At some point it could be good to have a `parse_dir` function that parses the entire output directory of a program and returns the corresponding `Results` object. The `parse` function would still be used on individual files/output data; however, the `parse_dir` function would be the top-level function for collecting all results from a directory and turning them into structured data. Useful for: + - Parsing all CREST outputs, e.g., `crest_conformers.xyz` and `crest_rotamers.xyz` into a `ConformerSearchResults` object. + - Parsing data from other TeraChem output files besides just the `stdout`, e.g., converting the `c0` binary files into a `Wavefunction` object. diff --git a/poetry.lock b/poetry.lock index c46bccf..64950ee 100644 --- a/poetry.lock +++ b/poetry.lock @@ -634,13 +634,13 @@ files = [ [[package]] name = "qcio" -version = "0.10.0" +version = "0.11.8" description = "Beautiful and user friendly data structures for quantum chemistry." optional = false python-versions = "<4.0,>=3.8" files = [ - {file = "qcio-0.10.0-py3-none-any.whl", hash = "sha256:00c24b0a63ba0867af5247fefac091999b33e4277748bf86c031ae158acdf3ad"}, - {file = "qcio-0.10.0.tar.gz", hash = "sha256:51d93fe6dbd6ce5c60a028fde4e164cb520c5f2602df84f7f21872deca48f820"}, + {file = "qcio-0.11.8-py3-none-any.whl", hash = "sha256:283fd23f41807cb4a7cce49bc31495f391583d42044afdc3502c1c0f618f2814"}, + {file = "qcio-0.11.8.tar.gz", hash = "sha256:6f5703de389f5f62c86c0ac3e7be1288cb69b89e1bcfe184d121a9a88c3526f1"}, ] [package.dependencies] @@ -651,7 +651,10 @@ toml = ">=0.10.2,<0.11.0" typing-extensions = ">=4.7.1,<5.0.0" [package.extras] +all = ["ipython (>=8.0.0)", "matplotlib (>=3.0.0)", "openbabel-wheel (>=3.1.1.19,<4.0.0.0)", "py3Dmol (>=2.2.1)", "rdkit (>=2022.3.3)"] +openbabel = ["openbabel-wheel (>=3.1.1.19,<4.0.0.0)"] rdkit = ["rdkit (>=2022.3.3)"] +view = ["ipython (>=8.0.0)", "matplotlib (>=3.0.0)", "py3Dmol (>=2.2.1)"] [[package]] name = "ruff" @@ -701,6 +704,17 @@ files = [ {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] +[[package]] +name = "tomli-w" +version = "1.0.0" +description = "A lil' TOML writer" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli_w-1.0.0-py3-none-any.whl", hash = "sha256:9f2a07e8be30a0729e533ec968016807069991ae2fd921a78d42f429ae5f4463"}, + {file = "tomli_w-1.0.0.tar.gz", hash = "sha256:f463434305e0336248cac9c2dc8076b707d8a12d019dd349f5c1e382dd1ae1b9"}, +] + [[package]] name = "types-toml" version = "0.10.8.20240310" @@ -746,4 +760,4 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "c621a17fd99fa64cfc359a2b1a0e28f93351f27b6896eaca75a07c8e9cf6bb94" +content-hash = "e87c3d1b49d03f20a934eba4ac4433810bdd622a579357b87dd931771604924b" diff --git a/pyproject.toml b/pyproject.toml index af38ff7..2be5d18 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,8 @@ homepage = "https://github.com/coltonbh/qcparse" [tool.poetry.dependencies] python = "^3.8" pydantic = ">=2.0.0" -qcio = ">=0.10.0" +qcio = "^0.11.8" +tomli-w = "^1.0.0" [tool.poetry.group.dev.dependencies] mypy = "^1.1.1" diff --git a/qcparse/encoders/crest.py b/qcparse/encoders/crest.py new file mode 100644 index 0000000..c329c60 --- /dev/null +++ b/qcparse/encoders/crest.py @@ -0,0 +1,83 @@ +import copy +import os +from typing import Any, Dict + +import tomli_w +from qcio import CalcType, ProgramInput + +from qcparse.exceptions import EncoderError +from qcparse.models import NativeInput + +SUPPORTED_CALCTYPES = {CalcType.conformer_search} + + +def encode(inp_obj: ProgramInput) -> NativeInput: + """Translate a ProgramInput into CREST inputs files. + + Args: + inp_obj: The qcio ProgramInput object for a computation. + + Returns: + NativeInput with .input_files being a crest.toml file and .geometry_file the + Structure's xyz file. + """ + validate_input(inp_obj) + struct_filename = "structure.xyz" + + return NativeInput( + input_file=tomli_w.dumps(_to_toml_dict(inp_obj, struct_filename)), + geometry_file=inp_obj.structure.to_xyz(), + geometry_filename=struct_filename, + ) + + +def validate_input(inp_obj: ProgramInput): + """Validate the input for CREST. + + Args: + inp_obj: The qcio ProgramInput object for a computation. + + Raises: + EncoderError: If the input is invalid. + """ + # These values come from other parts of the ProgramInput and should not be set + # in the keywords. + non_allowed_keywords = ["charge", "uhf", "runtype"] + for keyword in non_allowed_keywords: + if keyword in inp_obj.keywords: + raise EncoderError( + f"{keyword} should not be set in keywords for CREST. It is already set " + "on the Structure or ProgramInput elsewhere.", + ) + + +def _to_toml_dict(inp_obj: ProgramInput, struct_filename: str) -> Dict[str, Any]: + """Convert a ProgramInput object to a dictionary in the CREST format of TOML. + + This function makes it easier to test for the correct TOML structure. + """ + # Start with existing keywords + toml_dict = copy.deepcopy(inp_obj.keywords) + + # Top level keywords + # Logical cores was 10% faster than physical cores, so not using psutil + toml_dict.setdefault("threads", os.cpu_count()) + toml_dict["input"] = struct_filename + + # TODO: May need to deal with non-covalent mode at some point + toml_dict["runtype"] = "imtd-gc" + + # Calculation level keywords + calculation = toml_dict.pop("calculation", {}) + calculation_level = calculation.pop("level", []) + if len(calculation_level) == 0: + calculation_level.append({}) + for level_dict in calculation_level: + level_dict["method"] = inp_obj.model.method + level_dict["charge"] = inp_obj.structure.charge + level_dict["uhf"] = inp_obj.structure.multiplicity - 1 + + calculation["level"] = calculation_level + toml_dict["calculation"] = calculation + + return toml_dict diff --git a/qcparse/main.py b/qcparse/main.py index 105fe74..d396945 100644 --- a/qcparse/main.py +++ b/qcparse/main.py @@ -94,7 +94,8 @@ def encode(inp_data: ProgramInput, program: str) -> NativeInput: A NativeInput object with the encoded input. Raises: - EncoderError: If the calctype is not supported by the program's encoder. + EncoderError: If the calctype is not supported by the program's encoder or the + input is invalid. """ # Check that calctype is supported by the encoder encoder = import_module(f"qcparse.encoders.{program}") diff --git a/qcparse/models.py b/qcparse/models.py index 5ebdde7..55301ee 100644 --- a/qcparse/models.py +++ b/qcparse/models.py @@ -163,7 +163,7 @@ class NativeInput(BaseModel): """Native input file data. Writing these files to disk should produce a valid input. Attributes: - input: input file for the program + input_file: input file for the program geometry: xyz file or other geometry file required for the calculation geometry_filename: filename of the geometry file referenced in the input """ diff --git a/qcparse/parsers/crest.py b/qcparse/parsers/crest.py index 2c3fd14..6f0fa06 100644 --- a/qcparse/parsers/crest.py +++ b/qcparse/parsers/crest.py @@ -1,3 +1,9 @@ +from pathlib import Path +from typing import List, Optional, Union + +import numpy as np +from qcio import ConformerSearchResults, Structure + from .utils import regex_search @@ -9,3 +15,75 @@ def parse_version_string(string: str) -> str: regex = r"Version (\d+\.\d+\.\d+)," match = regex_search(regex, string) return match.group(1) + + +def parse_structures( + filename: Union[Path, str], + charge: Optional[int] = None, + multiplicity: Optional[int] = None, +) -> List[Structure]: + """Parse Structures from a CREST multi-structure xyz file. + + CREST places an energy value in the comments line of each structure. This function + collects all Structures and their energies from the file into AnnotatedStructure + objects. + + Args: + filename: The path to the multi-structure xyz file. + charge: The charge of the structures. + multiplicity: The multiplicity of the structures. + + Returns: + A list of Structure objects. + """ + try: + structures = Structure.open(filename, charge=charge, multiplicity=multiplicity) + if not isinstance(structures, list): # single structure + structures = [structures] + except FileNotFoundError: + structures = [] # No structures created + return structures + + +def parse_conformer_search_dir( + directory: Union[Path, str], + *, + charge: Optional[int] = None, + multiplicity: Optional[int] = None, + collect_rotamers: bool = True, +) -> ConformerSearchResults: + """Parse the output directory of a CREST conformer search calculation. + + Args: + directory: Path to the directory containing the CREST output files. + charge: The charge of the structures. + multiplicity: The multiplicity of the structures. + collect_rotamers: Whether to parse rotamers as well as conformers. + + Returns: + The parsed conformers, rotamers, and their energies as a ConformerSearchResults + object. + """ + directory = Path(directory) + conformers = parse_structures( + directory / "crest_conformers.xyz", charge=charge, multiplicity=multiplicity + ) + + # CREST places the energy as the only value in the comment line + conf_energies = [conf.extras[Structure._xyz_comment_key][0] for conf in conformers] + + rotamers = [] + if collect_rotamers: + rotamers = parse_structures( + directory / "crest_rotamers.xyz", charge=charge, multiplicity=multiplicity + ) + + # CREST places the energy as the only value in the comment line + rotamer_energies = [rot.extras[Structure._xyz_comment_key][0] for rot in rotamers] + + return ConformerSearchResults( + conformers=conformers, + conformer_energies=np.array(conf_energies), + rotamers=rotamers, + rotamer_energies=np.array(rotamer_energies), + ) diff --git a/tests/data/crest_output/crest_conformers.xyz b/tests/data/crest_output/crest_conformers.xyz new file mode 100644 index 0000000..450817d --- /dev/null +++ b/tests/data/crest_output/crest_conformers.xyz @@ -0,0 +1,123 @@ + 39 + -107.04437987 + O 0.0059933208 1.1078020235 -0.3235600817 + C -0.0751801847 -0.0951571981 -0.1329286921 + N -1.1072082846 -0.9276293566 -0.0738458901 + C -2.3880810273 -0.5201880957 -0.0540415069 + C -2.8394857428 0.8002885351 0.1792176303 + C -4.1885428735 1.0786831876 0.2397556881 + C -4.6586556613 2.4824871838 0.4678184416 + F -5.0326634630 3.0999949195 -0.6775146753 + F -3.7461569434 3.2853648761 1.0327715610 + F -5.7460967463 2.5482664191 1.2690334434 + C -5.1551389169 0.0928479685 0.0623853939 + C -4.7156148535 -1.2069079283 -0.1716385636 + C -5.7490326748 -2.2770676715 -0.3335938636 + F -6.7707345440 -1.9101130229 -1.1358966326 + F -6.3314646847 -2.6228495854 0.8397042856 + F -5.2772891144 -3.4246113618 -0.8491175515 + C -3.3739510402 -1.5171668010 -0.2238359552 + N 1.1075278356 -0.8957257209 0.0215119745 + C 2.4102839025 -0.5436686687 0.0006380459 + C 2.8313125177 0.7868160971 -0.1830075658 + C 4.1769778488 1.0882123159 -0.1928767891 + C 4.6446190011 2.4979537482 -0.4055495656 + F 3.6785961979 3.4175214069 -0.3467906866 + F 5.2435244071 2.6590674374 -1.6073105178 + F 5.5728041202 2.8717320778 0.5021697181 + C 5.1560916863 0.1111023410 -0.0300678342 + C 4.7371981894 -1.2003831272 0.1477186824 + C 5.7639571271 -2.2715852571 0.3613267678 + F 6.9281346221 -2.0245014334 -0.2634799431 + F 6.0796430073 -2.4304353504 1.6662630879 + F 5.3662424442 -3.4839558525 -0.0624928289 + C 3.3958731129 -1.5326525294 0.1697385562 + H -2.1081733092 1.5824786545 0.3076241748 + H -6.2066911255 0.3265525454 0.1056036198 + H -3.0470278592 -2.5295084142 -0.4011087996 + H 0.8796397670 -1.8719507192 0.1516337344 + H 2.0762347641 1.5471707624 -0.3119107273 + H 6.2032694856 0.3658153173 -0.0444203699 + H 3.0977017760 -2.5610267253 0.3109293793 + 39 + -107.04429757 + O -0.0555841110 -0.0670495360 -1.1481511375 + C -0.0640661147 0.0098028332 0.0703106406 + N -1.0406753759 0.1020896043 0.9659690380 + C -2.3453271682 0.1380439790 0.6389463220 + C -3.2567793637 0.2273731653 1.7139718612 + C -4.6165720489 0.2792661369 1.4941844908 + C -5.5628095040 0.3438746991 2.6517173013 + F -5.0087765409 0.8057291664 3.7862121438 + F -6.0803598812 -0.8664092271 2.9726311411 + F -6.6328113994 1.1326053363 2.4219928304 + C -5.1478965415 0.2374373807 0.2090136299 + C -4.2552289547 0.1453700679 -0.8555814379 + C -4.8355931617 0.1274409176 -2.2363099513 + F -5.3270398418 1.3349485279 -2.6067228106 + F -5.8822138862 -0.7219521735 -2.3521430496 + F -3.9751353771 -0.2242599626 -3.1995287377 + C -2.8904524298 0.0984634638 -0.6665964737 + N 1.1661453166 0.0095497195 0.8117393932 + C 2.4443953014 -0.0726235509 0.3870028183 + C 2.7835915157 -0.1873730757 -0.9747169059 + C 4.1075635798 -0.2697590578 -1.3499480664 + C 4.4920590807 -0.3858806200 -2.7955694427 + F 3.4627968800 -0.4976421842 -3.6378389491 + F 5.2843759241 -1.4560636154 -3.0278321480 + F 5.2091142724 0.6797288541 -3.2183485991 + C 5.1456713596 -0.2414761402 -0.4208142214 + C 4.8078844792 -0.1252866556 0.9195102197 + C 5.8773322807 -0.1097795516 1.9698973288 + F 5.7010500832 0.8675897369 2.8806783123 + F 5.9192397606 -1.2579524975 2.6808586523 + F 7.1151658275 0.0609170126 1.4805334654 + C 3.4889548523 -0.0448772080 1.3275588119 + H -2.8577127670 0.2586600233 2.7154553136 + H -6.2123575063 0.2757217628 0.0424395341 + H -2.2169540091 0.0276891008 -1.5061333647 + H 0.9972348420 0.0854307166 1.8055751491 + H 1.9838142407 -0.2085151935 -1.6991078027 + H 6.1740237181 -0.3055218248 -0.7365011737 + H 3.2572744477 0.0443818251 2.3788067441 + 39 + -107.04413699 + O 0.0131837214 1.1273930191 0.1306824288 + C -0.0732217755 -0.0880737086 0.0573512004 + N -1.1060478789 -0.9229806193 0.0294575995 + C -2.3880917629 -0.5155389919 0.0270806132 + C -2.8501214953 0.8198732821 -0.0286399897 + C -4.2026362109 1.0952150229 -0.0475929821 + C -4.6442494898 2.5238021249 -0.1417339052 + F -4.5335409263 3.0216023494 -1.3951946118 + F -3.9402655314 3.3582797760 0.6417616554 + F -5.9361509554 2.7128982214 0.1982847535 + C -5.1615297641 0.0886487923 -0.0168598523 + C -4.7114050523 -1.2279050828 0.0447836468 + C -5.7402685515 -2.3143060156 0.0639786899 + F -6.3775438485 -2.4523124643 -1.1230347160 + F -6.7232059478 -2.0948004918 0.9640358027 + F -5.2519868283 -3.5321861222 0.3514752393 + C -3.3681786934 -1.5326683818 0.0627504535 + N 1.1077689774 -0.9036164000 -0.0027822224 + C 2.4119279980 -0.5574687920 0.0129954161 + C 2.8403804150 0.7782506198 0.1096504986 + C 4.1894827622 1.0745589771 0.1226512536 + C 4.6027635394 2.5152864107 0.2071675140 + F 3.9621470747 3.1923271251 1.1730638321 + F 4.3677579221 3.1874520025 -0.9376903423 + F 5.9166135757 2.6780819674 0.4559559198 + C 5.1620922052 0.0846974641 0.0375492611 + C 4.7348691375 -1.2351860665 -0.0547965625 + C 5.7808870648 -2.3039633115 -0.1599007577 + F 6.4398478946 -2.2673614882 -1.3383949089 + F 6.7342973082 -2.1940668440 0.7863083225 + F 5.2970438071 -3.5512295499 -0.0516559290 + C 3.3939538590 -1.5624527072 -0.0699925467 + H -2.1256170892 1.6187966599 -0.0500559478 + H -6.2153607970 0.3145548143 -0.0320972164 + H -3.0336744970 -2.5569533834 0.1072407064 + H 0.8747654991 -1.8853450041 -0.0651826677 + H 2.0908859097 1.5518485965 0.1769367737 + H 6.2123011811 0.3258548374 0.0491047455 + H 3.0896417741 -2.5957929131 -0.1443112560 diff --git a/tests/data/crest_output/crest_rotamers.xyz b/tests/data/crest_output/crest_rotamers.xyz new file mode 100644 index 0000000..427b9ad --- /dev/null +++ b/tests/data/crest_output/crest_rotamers.xyz @@ -0,0 +1,82 @@ +39 + -107.044379870000 2.036733506981914E-003 ! + O 0.0059933208 1.1078020235 -0.3235600817 + C -0.0751801847 -0.0951571981 -0.1329286921 + N -1.1072082846 -0.9276293566 -0.0738458901 + C -2.3880810273 -0.5201880957 -0.0540415069 + C -2.8394857428 0.8002885351 0.1792176303 + C -4.1885428735 1.0786831876 0.2397556881 + C -4.6586556613 2.4824871838 0.4678184416 + F -5.0326634630 3.0999949195 -0.6775146753 + F -3.7461569434 3.2853648761 1.0327715610 + F -5.7460967463 2.5482664191 1.2690334434 + C -5.1551389169 0.0928479685 0.0623853939 + C -4.7156148535 -1.2069079283 -0.1716385636 + C -5.7490326748 -2.2770676715 -0.3335938636 + F -6.7707345440 -1.9101130229 -1.1358966326 + F -6.3314646847 -2.6228495854 0.8397042856 + F -5.2772891144 -3.4246113618 -0.8491175515 + C -3.3739510402 -1.5171668010 -0.2238359552 + N 1.1075278356 -0.8957257209 0.0215119745 + C 2.4102839025 -0.5436686687 0.0006380459 + C 2.8313125177 0.7868160971 -0.1830075658 + C 4.1769778488 1.0882123159 -0.1928767891 + C 4.6446190011 2.4979537482 -0.4055495656 + F 3.6785961979 3.4175214069 -0.3467906866 + F 5.2435244071 2.6590674374 -1.6073105178 + F 5.5728041202 2.8717320778 0.5021697181 + C 5.1560916863 0.1111023410 -0.0300678342 + C 4.7371981894 -1.2003831272 0.1477186824 + C 5.7639571271 -2.2715852571 0.3613267678 + F 6.9281346221 -2.0245014334 -0.2634799431 + F 6.0796430073 -2.4304353504 1.6662630879 + F 5.3662424442 -3.4839558525 -0.0624928289 + C 3.3958731129 -1.5326525294 0.1697385562 + H -2.1081733092 1.5824786545 0.3076241748 + H -6.2066911255 0.3265525454 0.1056036198 + H -3.0470278592 -2.5295084142 -0.4011087996 + H 0.8796397670 -1.8719507192 0.1516337344 + H 2.0762347641 1.5471707624 -0.3119107273 + H 6.2032694856 0.3658153173 -0.0444203699 + H 3.0977017760 -2.5610267253 0.3109293793 + 39 + -107.044379830000 2.036647298363681E-003 ! + O 0.0144755365 1.1485637636 -0.1329602899 + C -0.0758671438 -0.0685795959 -0.1523392601 + N -1.1139477564 -0.8904019964 -0.2439632851 + C -2.3919873104 -0.4835063477 -0.1560724888 + C -3.3840583978 -1.4242896390 -0.5100488340 + C -4.7237709984 -1.1184128996 -0.4060895100 + C -5.7627612050 -2.1337830466 -0.7650598635 + F -5.3000257821 -3.1572956653 -1.5024423948 + F -6.7916987161 -1.6146275573 -1.4674651909 + F -6.3335151507 -2.7044290336 0.3231761386 + C -5.1550836165 0.1191408108 0.0625217723 + C -4.1822717331 1.0478587569 0.4216811547 + C -4.6444005393 2.3885235309 0.9040855166 + F -5.7280218470 2.3112067128 1.7096118827 + F -5.0207787865 3.2057739192 -0.1075349346 + F -3.7257133199 3.0715438381 1.6010772728 + C -2.8350259395 0.7749216919 0.3150856944 + N 1.1003579293 -0.8930246951 -0.1316702655 + C 2.4056592443 -0.5528748973 -0.0867669792 + C 2.8371063319 0.7860590542 -0.0399023056 + C 4.1849463068 1.0739667741 0.0070696072 + C 4.6642769076 2.4953809131 0.0392307086 + F 3.7038079685 3.4002753529 0.2413893669 + F 5.2784773422 2.8508735070 -1.1119660888 + F 5.5841552827 2.7034286586 1.0066050824 + C 5.1560021642 0.0755242894 0.0057588531 + C 4.7267871692 -1.2438216081 -0.0434327227 + C 5.7466723534 -2.3420108163 -0.0105283870 + F 6.0787839106 -2.6996855050 1.2502232079 + F 5.3338085550 -3.4730071983 -0.6084529599 + F 6.9045685873 -2.0091993136 -0.6071451055 + C 3.3831024730 -1.5642121776 -0.0839173960 + H -3.0636462795 -2.3891376878 -0.8699932520 + H -6.2050520012 0.3492324007 0.1450760623 + H -2.0987999914 1.5144028486 0.5875015336 + H 0.8650175491 -1.8753201535 -0.1716102850 + H 2.0881939498 1.5632532841 -0.0408128523 + H 6.2050997534 0.3204266863 0.0390735165 + H 3.0765408891 -2.5991263886 -0.1203786390 \ No newline at end of file diff --git a/tests/test_crest.py b/tests/test_crest.py index be61752..7178adf 100644 --- a/tests/test_crest.py +++ b/tests/test_crest.py @@ -1,6 +1,112 @@ -from qcparse.parsers.crest import parse_version_string +import pytest +from qcio import ProgramInput, Structure +from qcio.utils import water + +from qcparse.encoders.crest import _to_toml_dict, validate_input +from qcparse.exceptions import EncoderError +from qcparse.parsers.crest import ( + parse_conformer_search_dir, + parse_structures, + parse_version_string, +) def test_parse_version_string(test_data_dir): text = (test_data_dir / "crest_stdout.txt").read_text() assert parse_version_string(text) == "3.0.1" + + +def test_validate_input(prog_inp): + inp_obj = prog_inp("conformer_search") + validate_input(inp_obj) + + with pytest.raises(EncoderError): + inp_obj.keywords["charge"] = -1 + validate_input(inp_obj) + + with pytest.raises(EncoderError): + inp_obj.keywords["uhf"] = 0 + validate_input(inp_obj) + + with pytest.raises(EncoderError): + inp_obj.keywords["runtype"] = "imtd-gc" + validate_input(inp_obj) + + +def test_toml_dict(): + """Test converting a ProgramInput object to a TOML dictionary for CREST.""" + + weird_water = water.model_copy(update={"charge": -1, "multiplicity": 2}) + inp_obj = ProgramInput( + structure=weird_water, + calctype="conformer_search", + model={"method": "gfn2"}, + keywords={"calculation": {"level": [{"alpb": "acetonitrile"}]}}, + ) + + toml_dict = _to_toml_dict(inp_obj, "struct.xyz") + + assert toml_dict["input"] == "struct.xyz" + assert toml_dict["runtype"] == "imtd-gc" + assert toml_dict.get("threads") is not None # added implicitly if not set + + # Adds values correctly to existing "calculation" key + assert toml_dict["calculation"]["level"][0]["method"] == "gfn2" + assert toml_dict["calculation"]["level"][0]["charge"] == -1 + assert toml_dict["calculation"]["level"][0]["uhf"] == 1 + assert toml_dict["calculation"]["level"][0]["alpb"] == "acetonitrile" + + # Respects explicitly set threads and handles no "calculation" key + inp_obj = ProgramInput( + structure=weird_water, + calctype="conformer_search", + model={"method": "gfn2"}, + keywords={"threads": 2}, + ) + + toml_dict = _to_toml_dict(inp_obj, "struct.xyz") + assert toml_dict["threads"] == 2 + assert toml_dict["calculation"]["level"][0]["method"] == "gfn2" + assert toml_dict["calculation"]["level"][0]["charge"] == -1 + assert toml_dict["calculation"]["level"][0]["uhf"] == 1 + + +def test_parse_structures(test_data_dir): + structs = parse_structures(test_data_dir / "crest_output" / "crest_conformers.xyz") + assert len(structs) == 3 + energies = [-107.04437987, -107.04429757, -107.04413699] + for i, struct in enumerate(structs): + assert float(struct.extras[Structure._xyz_comment_key][0]) == energies[i] + assert struct.formula == "C17H7F12N2O" + + +def test_parse_structure_no_file(): + assert parse_structures("no_file.xyz") == [] + + +def test_parse_conformer_search_dir(test_data_dir): + csr = parse_conformer_search_dir(test_data_dir / "crest_output") + assert len(csr.conformers) == 3 + assert len(csr.rotamers) == 2 + # Check conformer energies + conf_energies = [-107.04437987, -107.04429757, -107.04413699] + for i, struct in enumerate(csr.conformers): + assert csr.conformer_energies[i] == conf_energies[i] + assert struct.charge == 0 + assert struct.multiplicity == 1 + # Check rotamer energies + rot_energies = [-107.044379870000, -107.044379830000] + for i, struct in enumerate(csr.rotamers): + assert csr.rotamer_energies[i] == rot_energies[i] + assert struct.charge == 0 + assert struct.multiplicity == 1 + + +def test_parse_conformer_search_charge_multiplicity(test_data_dir): + csr = parse_conformer_search_dir( + test_data_dir / "crest_output", charge=-2, multiplicity=3 + ) + for struct_type in ["conformers", "rotamers"]: + for struct in getattr(csr, struct_type): + assert struct.charge == -2 + assert struct.multiplicity == 3