From f7c186027175edd4e90d59d936e81d3edbd1e15d Mon Sep 17 00:00:00 2001 From: "David W.H. Swenson" Date: Mon, 24 Jul 2023 09:58:11 -0500 Subject: [PATCH 01/10] rough draft refactor gather --- openfecli/commands/gather.py | 221 ++++++++++++++++++++--------------- 1 file changed, 129 insertions(+), 92 deletions(-) diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index 225226eba..f446cfd70 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -51,6 +51,116 @@ def legacy_get_type(res_fn): return 'complex' +def _get_ddgs(legs): + DDGs = [] + for ligpair, vals in sorted(legs.items()): + DDGbind = None + DDGhyd = None + bind_unc = None + hyd_unc = None + + if 'complex' in vals and 'solvent' in vals: + DG1_mag, DG1_unc = vals['complex'] + DG2_mag, DG2_unc = vals['solvent'] + if not ((DG1_mag is None) or (DG2_mag is None)): + # DDG(2,1)bind = DG(1->2)complex - DG(1->2)solvent + DDGbind = (DG1_mag - DG2_mag).m + bind_unc = np.sqrt(np.sum(np.square([DG1_unc.m, DG2_unc.m]))) + elif 'solvent' in vals and 'vacuum' in vals: + DG1_mag, DG1_unc = vals['solvent'] + DG2_mag, DG2_unc = vals['vacuum'] + if not ((DG1_mag is None) or (DG2_mag is None)): + DDGhyd = (DG1_mag - DG2_mag).m + hyd_unc = np.sqrt(np.sum(np.square([DG1_unc.m, DG2_unc.m]))) + else: # -no-cov- + raise RuntimeError(f"Unknown DDG type for {vals}") + + DDGs.append((*ligpair, DDGbind, bind_unc, DDGhyd, hyd_unc)) + + return DDGs + +def _write_ddg(legs, output): + DDGs = _get_ddgs(legs) + for ligA, ligB, DDGbind, bind_unc, DDGhyd, hyd_unc in DDGs: + name = f"{ligB}, {ligA}" + if DDGbind is not None: + DDGbind, bind_unc = dp2(DDGbind), dp2(bind_unc) + output.write(f'DDGbind({name})\tRBFE\t{ligA}\t{ligB}' + f'\t{DDGbind}\t{bind_unc}\n') + if DDGhyd is not None: + DDGhyd, hyd_unc = dp2(DDGhyd), dp2(hyd_unc) + output.write(f'DDGhyd({name})\tRHFE\t{ligA}\t{ligB}\t' + f'{DDGhyd}\t{hyd_unc}\n') + + + ... + +def _write_raw_dg(legs, output): + for ligpair, vals in sorted(legs.items()): + name = ', '.join(ligpair) + for simtype, (m, u) in sorted(vals.items()): + if m is None: + m, u = 'NaN', 'NaN' + else: + m, u = dp2(m.m), dp2(u.m) + output.write(f'DG{simtype}({name})\t{simtype}\t{ligpair[0]}\t' + f'{ligpair[1]}\t{m}\t{u}\n') + +def _write_dg_mle(legs, output): + MLEs = [] + # 4b) perform MLE + g = nx.DiGraph() + nm_to_idx = {} + DDGbind_count = 0 + for ligA, ligB, DDGbind, bind_unc, DDGhyd, hyd_unc in DDGs: + if DDGbind is None: + continue + DDGbind_count += 1 + + # tl;dr this is super paranoid, but safer for now: + # cinnabar seems to rely on the ordering of values within the graph + # to correspond to the matrix that comes out from mle() + # internally they also convert the ligand names to ints, which I think + # has a side effect of giving the graph nodes a predictable order. + # fwiw this code didn't affect ordering locally + try: + idA = nm_to_idx[ligA] + except KeyError: + idA = len(nm_to_idx) + nm_to_idx[ligA] = idA + try: + idB = nm_to_idx[ligB] + except KeyError: + idB = len(nm_to_idx) + nm_to_idx[ligB] = idB + + g.add_edge( + idA, idB, calc_DDG=DDGbind, calc_dDDG=bind_unc, + ) + if DDGbind_count > 2: + idx_to_nm = {v: k for k, v in nm_to_idx.items()} + + f_i, df_i = mle(g, factor='calc_DDG') + df_i = np.diagonal(df_i) ** 0.5 + + for node, f, df in zip(g.nodes, f_i, df_i): + ligname = idx_to_nm[node] + MLEs.append((ligname, f, df)) + + for ligA, DG, unc_DG in MLEs: + DG, unc_DG = dp2(DG), dp2(unc_DG) + output.write(f'DGbind({ligA})\tDG(MLE)\tZero\t{ligA}\t{DG}\t{unc_DG}\n') + + +def dp2(v: float) -> str: + # turns 0.0012345 -> '0.0012', round() would get this wrong + return np.format_float_positional(v, precision=2, trim='0', + fractional=False) + + + + + @click.command( 'gather', short_help="Gather result jsons for network of RFE results into a TSV file" @@ -59,10 +169,20 @@ def legacy_get_type(res_fn): type=click.Path(dir_okay=True, file_okay=False, path_type=pathlib.Path), required=True) +@click.option( + '--report', + type=click.Choice(['dg', 'ddg', 'leg'], case_sensitive=False), + default="dg", show_default=True, + help=( + "What data to report. 'dg' gives maximum-likelihood estimate of " + "asbolute deltaG, 'ddg' gives delta-delta-G, and 'leg' gives the " + "raw result of the deltaG for a leg." + ) +) @click.option('output', '-o', type=click.File(mode='w'), default='-') -def gather(rootdir, output): +def gather(rootdir, output, report): """Gather simulation result jsons of relative calculations to a tsv file Will walk ROOTDIR recursively and find all results files ending in .json @@ -96,11 +216,6 @@ def gather(rootdir, output): import numpy as np from cinnabar.stats import mle - def dp2(v: float) -> str: - # turns 0.0012345 -> '0.0012', round() would get this wrong - return np.format_float_positional(v, precision=2, trim='0', - fractional=False) - # 1) find all possible jsons json_fns = glob.glob(str(rootdir) + '/**/*json', recursive=True) @@ -130,98 +245,20 @@ def dp2(v: float) -> str: legs[names][simtype] = result['estimate'], result['uncertainty'] # 4a for each ligand pair, resolve legs - DDGs = [] - for ligpair, vals in sorted(legs.items()): - DDGbind = None - DDGhyd = None - bind_unc = None - hyd_unc = None - - if 'complex' in vals and 'solvent' in vals: - DG1_mag, DG1_unc = vals['complex'] - DG2_mag, DG2_unc = vals['solvent'] - if not ((DG1_mag is None) or (DG2_mag is None)): - # DDG(2,1)bind = DG(1->2)complex - DG(1->2)solvent - DDGbind = (DG1_mag - DG2_mag).m - bind_unc = np.sqrt(np.sum(np.square([DG1_unc.m, DG2_unc.m]))) - if 'solvent' in vals and 'vacuum' in vals: - DG1_mag, DG1_unc = vals['solvent'] - DG2_mag, DG2_unc = vals['vacuum'] - if not ((DG1_mag is None) or (DG2_mag is None)): - DDGhyd = (DG1_mag - DG2_mag).m - hyd_unc = np.sqrt(np.sum(np.square([DG1_unc.m, DG2_unc.m]))) - - DDGs.append((*ligpair, DDGbind, bind_unc, DDGhyd, hyd_unc)) - - MLEs = [] - # 4b) perform MLE - g = nx.DiGraph() - nm_to_idx = {} - DDGbind_count = 0 - for ligA, ligB, DDGbind, bind_unc, DDGhyd, hyd_unc in DDGs: - if DDGbind is None: - continue - DDGbind_count += 1 - - # tl;dr this is super paranoid, but safer for now: - # cinnabar seems to rely on the ordering of values within the graph - # to correspond to the matrix that comes out from mle() - # internally they also convert the ligand names to ints, which I think - # has a side effect of giving the graph nodes a predictable order. - # fwiw this code didn't affect ordering locally - try: - idA = nm_to_idx[ligA] - except KeyError: - idA = len(nm_to_idx) - nm_to_idx[ligA] = idA - try: - idB = nm_to_idx[ligB] - except KeyError: - idB = len(nm_to_idx) - nm_to_idx[ligB] = idB - - g.add_edge( - idA, idB, calc_DDG=DDGbind, calc_dDDG=bind_unc, - ) - if DDGbind_count > 2: - idx_to_nm = {v: k for k, v in nm_to_idx.items()} - - f_i, df_i = mle(g, factor='calc_DDG') - df_i = np.diagonal(df_i) ** 0.5 - - for node, f, df in zip(g.nodes, f_i, df_i): - ligname = idx_to_nm[node] - MLEs.append((ligname, f, df)) + DDGs = _get_ddgs(legs) output.write('measurement\ttype\tligand_i\tligand_j\testimate (kcal/mol)' '\tuncertainty (kcal/mol)\n') - # 5a) write out MLE values - for ligA, DG, unc_DG in MLEs: - DG, unc_DG = dp2(DG), dp2(unc_DG) - output.write(f'DGbind({ligA})\tDG(MLE)\tZero\t{ligA}\t{DG}\t{unc_DG}\n') + # 5a) write out MLE values # 5b) write out DDG values - for ligA, ligB, DDGbind, bind_unc, DDGhyd, hyd_unc in DDGs: - name = f"{ligB}, {ligA}" - if DDGbind is not None: - DDGbind, bind_unc = dp2(DDGbind), dp2(bind_unc) - output.write(f'DDGbind({name})\tRBFE\t{ligA}\t{ligB}' - f'\t{DDGbind}\t{bind_unc}\n') - if DDGhyd is not None: - DDGhyd, hyd_unc = dp2(DDGhyd), dp2(hyd_unc) - output.write(f'DDGhyd({name})\tRHFE\t{ligA}\t{ligB}\t' - f'{DDGhyd}\t{hyd_unc}\n') - # 5c) write out each leg - for ligpair, vals in sorted(legs.items()): - name = ', '.join(ligpair) - for simtype, (m, u) in sorted(vals.items()): - if m is None: - m, u = 'NaN', 'NaN' - else: - m, u = dp2(m.m), dp2(u.m) - output.write(f'DG{simtype}({name})\t{simtype}\t{ligpair[0]}\t' - f'{ligpair[1]}\t{m}\t{u}\n') + writing_func = { + 'dg': _write_dg_mle, + 'ddg': _write_ddg, + 'leg': _write_raw_dg, + }[report.lower()] + writing_func(legs, output) PLUGIN = OFECommandPlugin( From c368ca9bf7886e13235f4b6604cafd7f87b1fdcb Mon Sep 17 00:00:00 2001 From: "David W.H. Swenson" Date: Mon, 24 Jul 2023 10:10:17 -0500 Subject: [PATCH 02/10] version that seems to work now to update to make output cleaner --- openfecli/commands/gather.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index f446cfd70..9bb5d5edb 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -52,6 +52,7 @@ def legacy_get_type(res_fn): def _get_ddgs(legs): + import numpy as np DDGs = [] for ligpair, vals in sorted(legs.items()): DDGbind = None @@ -107,6 +108,10 @@ def _write_raw_dg(legs, output): f'{ligpair[1]}\t{m}\t{u}\n') def _write_dg_mle(legs, output): + import networkx as nx + import numpy as np + from cinnabar.stats import mle + DDGs = _get_ddgs(legs) MLEs = [] # 4b) perform MLE g = nx.DiGraph() @@ -154,6 +159,7 @@ def _write_dg_mle(legs, output): def dp2(v: float) -> str: # turns 0.0012345 -> '0.0012', round() would get this wrong + import numpy as np return np.format_float_positional(v, precision=2, trim='0', fractional=False) @@ -212,9 +218,6 @@ def gather(rootdir, output, report): """ from collections import defaultdict import glob - import networkx as nx - import numpy as np - from cinnabar.stats import mle # 1) find all possible jsons json_fns = glob.glob(str(rootdir) + '/**/*json', recursive=True) From 7d6887af25f15e18e1f9c0f3b52942b61228519b Mon Sep 17 00:00:00 2001 From: "David W.H. Swenson" Date: Mon, 24 Jul 2023 14:01:37 -0500 Subject: [PATCH 03/10] improve estimate/uncertainty rounding/reporting --- openfecli/commands/gather.py | 44 +++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index 9bb5d5edb..c98a9f1e9 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -5,6 +5,39 @@ from openfecli import OFECommandPlugin import pathlib +from typing import Tuple + +def _get_column(val): + import numpy as np + if val == 0: + return 0 + + log10 = np.log10(val) + + if log10 >= 0.0: + col = np.floor(log10 + 1) + else: + col = np.floor(log10) + return int(col) + +def format_estimate_uncertainty( + est: float, + unc: float, + unc_prec: int = 2, +) -> Tuple[str, str]: + import numpy as np + # get the last column needed for uncertainty + unc_col = _get_column(unc) - (unc_prec - 1) + + if unc_col < 0: + est_str = f"{est:.{-unc_col}f}" + unc_str = f"{unc:.{-unc_col}f}" + else: + est_str = f"{np.round(est, -unc_col + 1)}" + unc_str = f"{np.round(unc, -unc_col + 1)}" + + return est_str, unc_str + def is_results_json(f): # sanity check on files before we try and deserialize @@ -85,7 +118,9 @@ def _write_ddg(legs, output): for ligA, ligB, DDGbind, bind_unc, DDGhyd, hyd_unc in DDGs: name = f"{ligB}, {ligA}" if DDGbind is not None: - DDGbind, bind_unc = dp2(DDGbind), dp2(bind_unc) + DDGbind, bind_unc = format_estimate_uncertainty(DDGbind, + bind_unc) + # DDGbind, bind_unc = dp2(DDGbind), dp2(bind_unc) output.write(f'DDGbind({name})\tRBFE\t{ligA}\t{ligB}' f'\t{DDGbind}\t{bind_unc}\n') if DDGhyd is not None: @@ -103,7 +138,7 @@ def _write_raw_dg(legs, output): if m is None: m, u = 'NaN', 'NaN' else: - m, u = dp2(m.m), dp2(u.m) + m, u = format_estimate_uncertainty(m.m, u.m) output.write(f'DG{simtype}({name})\t{simtype}\t{ligpair[0]}\t' f'{ligpair[1]}\t{m}\t{u}\n') @@ -153,7 +188,7 @@ def _write_dg_mle(legs, output): MLEs.append((ligname, f, df)) for ligA, DG, unc_DG in MLEs: - DG, unc_DG = dp2(DG), dp2(unc_DG) + DG, unc_DG = format_estimate_uncertainty(DG, unc_DG) output.write(f'DGbind({ligA})\tDG(MLE)\tZero\t{ligA}\t{DG}\t{unc_DG}\n') @@ -269,3 +304,6 @@ def gather(rootdir, output, report): section='Quickrun Executor', requires_ofe=(0, 6), ) + +if __name__ == "__main__": + gather() From 5d181e5b2768071116e482e7ffeb8861280cbf81 Mon Sep 17 00:00:00 2001 From: "David W.H. Swenson" Date: Mon, 24 Jul 2023 14:33:53 -0500 Subject: [PATCH 04/10] update to simplify output tables --- openfecli/commands/gather.py | 53 +++++++++++++++--------------------- 1 file changed, 22 insertions(+), 31 deletions(-) diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index c98a9f1e9..ae05df4fa 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -113,25 +113,24 @@ def _get_ddgs(legs): return DDGs -def _write_ddg(legs, output): +def _write_ddg(legs, writer): DDGs = _get_ddgs(legs) + writer.writerow(["ligand_i", "ligand_j", "DDG(i->j) (kcal/mol)", + "uncertainty (kcal/mol)"]) for ligA, ligB, DDGbind, bind_unc, DDGhyd, hyd_unc in DDGs: name = f"{ligB}, {ligA}" if DDGbind is not None: DDGbind, bind_unc = format_estimate_uncertainty(DDGbind, bind_unc) - # DDGbind, bind_unc = dp2(DDGbind), dp2(bind_unc) - output.write(f'DDGbind({name})\tRBFE\t{ligA}\t{ligB}' - f'\t{DDGbind}\t{bind_unc}\n') + writer.writerow([ligA, ligB, DDGbind, bind_unc]) if DDGhyd is not None: - DDGhyd, hyd_unc = dp2(DDGhyd), dp2(hyd_unc) - output.write(f'DDGhyd({name})\tRHFE\t{ligA}\t{ligB}\t' - f'{DDGhyd}\t{hyd_unc}\n') + DDGhyd, hyd_unc = format_estimate_uncertainty(DDGbind, + bind_unc) + writer.writerow([ligA, ligB, DDGhyd, hyd_unc]) - - ... - -def _write_raw_dg(legs, output): +def _write_raw_dg(legs, writer): + writer.writerow(["leg", "ligand_i", "ligand_j", "DG(i->j) (kcal/mol)", + "uncertainty (kcal/mol)"]) for ligpair, vals in sorted(legs.items()): name = ', '.join(ligpair) for simtype, (m, u) in sorted(vals.items()): @@ -139,10 +138,9 @@ def _write_raw_dg(legs, output): m, u = 'NaN', 'NaN' else: m, u = format_estimate_uncertainty(m.m, u.m) - output.write(f'DG{simtype}({name})\t{simtype}\t{ligpair[0]}\t' - f'{ligpair[1]}\t{m}\t{u}\n') + writer.writerow([simtype, *ligpair, m, u]) -def _write_dg_mle(legs, output): +def _write_dg_mle(legs, writer): import networkx as nx import numpy as np from cinnabar.stats import mle @@ -187,19 +185,11 @@ def _write_dg_mle(legs, output): ligname = idx_to_nm[node] MLEs.append((ligname, f, df)) + writer.writerow(["ligand", "DG(MLE) (kcal/mol)", + "uncertainty (kcal/mol)"]) for ligA, DG, unc_DG in MLEs: DG, unc_DG = format_estimate_uncertainty(DG, unc_DG) - output.write(f'DGbind({ligA})\tDG(MLE)\tZero\t{ligA}\t{DG}\t{unc_DG}\n') - - -def dp2(v: float) -> str: - # turns 0.0012345 -> '0.0012', round() would get this wrong - import numpy as np - return np.format_float_positional(v, precision=2, trim='0', - fractional=False) - - - + writer.writerow([ligA, DG, unc_DG]) @click.command( @@ -253,6 +243,7 @@ def gather(rootdir, output, report): """ from collections import defaultdict import glob + import csv # 1) find all possible jsons json_fns = glob.glob(str(rootdir) + '/**/*json', recursive=True) @@ -282,11 +273,11 @@ def gather(rootdir, output, report): legs[names][simtype] = result['estimate'], result['uncertainty'] - # 4a for each ligand pair, resolve legs - DDGs = _get_ddgs(legs) - - output.write('measurement\ttype\tligand_i\tligand_j\testimate (kcal/mol)' - '\tuncertainty (kcal/mol)\n') + writer = csv.writer( + output, + delimiter="\t", + lineterminator="\n", # to exactly reproduce previous, prefer "\r\n" + ) # 5a) write out MLE values # 5b) write out DDG values @@ -296,7 +287,7 @@ def gather(rootdir, output, report): 'ddg': _write_ddg, 'leg': _write_raw_dg, }[report.lower()] - writing_func(legs, output) + writing_func(legs, writer) PLUGIN = OFECommandPlugin( From b3591f2c0ebff6550ae4862f203e92487d0024a1 Mon Sep 17 00:00:00 2001 From: "David W.H. Swenson" Date: Thu, 27 Jul 2023 14:17:08 -0500 Subject: [PATCH 05/10] use 1 digit in uncertainty; `leg` => `legs` --- openfecli/commands/gather.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index ae05df4fa..cfff0921a 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -23,7 +23,7 @@ def _get_column(val): def format_estimate_uncertainty( est: float, unc: float, - unc_prec: int = 2, + unc_prec: int = 1, ) -> Tuple[str, str]: import numpy as np # get the last column needed for uncertainty @@ -202,7 +202,7 @@ def _write_dg_mle(legs, writer): required=True) @click.option( '--report', - type=click.Choice(['dg', 'ddg', 'leg'], case_sensitive=False), + type=click.Choice(['dg', 'ddg', 'legs'], case_sensitive=False), default="dg", show_default=True, help=( "What data to report. 'dg' gives maximum-likelihood estimate of " @@ -285,7 +285,7 @@ def gather(rootdir, output, report): writing_func = { 'dg': _write_dg_mle, 'ddg': _write_ddg, - 'leg': _write_raw_dg, + 'legs': _write_raw_dg, }[report.lower()] writing_func(legs, writer) From 515ad62a451f3ede85113ebbe4563d5ad0266e2f Mon Sep 17 00:00:00 2001 From: "David W.H. Swenson" Date: Fri, 28 Jul 2023 13:18:20 -0500 Subject: [PATCH 06/10] Apply suggestions from code review Co-authored-by: Irfan Alibay --- openfecli/commands/gather.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index cfff0921a..c8bcfb322 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -20,6 +20,7 @@ def _get_column(val): col = np.floor(log10) return int(col) + def format_estimate_uncertainty( est: float, unc: float, @@ -113,6 +114,7 @@ def _get_ddgs(legs): return DDGs + def _write_ddg(legs, writer): DDGs = _get_ddgs(legs) writer.writerow(["ligand_i", "ligand_j", "DDG(i->j) (kcal/mol)", @@ -128,6 +130,7 @@ def _write_ddg(legs, writer): bind_unc) writer.writerow([ligA, ligB, DDGhyd, hyd_unc]) + def _write_raw_dg(legs, writer): writer.writerow(["leg", "ligand_i", "ligand_j", "DG(i->j) (kcal/mol)", "uncertainty (kcal/mol)"]) @@ -140,6 +143,7 @@ def _write_raw_dg(legs, writer): m, u = format_estimate_uncertainty(m.m, u.m) writer.writerow([simtype, *ligpair, m, u]) + def _write_dg_mle(legs, writer): import networkx as nx import numpy as np From 16b2a344fd44b8e360c5ee73d00e7625fd088ef8 Mon Sep 17 00:00:00 2001 From: "David W.H. Swenson" Date: Mon, 31 Jul 2023 10:58:56 -0500 Subject: [PATCH 07/10] Tuple => tuple --- openfecli/commands/gather.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index cfff0921a..af7501600 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -5,7 +5,6 @@ from openfecli import OFECommandPlugin import pathlib -from typing import Tuple def _get_column(val): import numpy as np @@ -24,7 +23,7 @@ def format_estimate_uncertainty( est: float, unc: float, unc_prec: int = 1, -) -> Tuple[str, str]: +) -> tuple[str, str]: import numpy as np # get the last column needed for uncertainty unc_col = _get_column(unc) - (unc_prec - 1) From cc2fecae95fe9e853e63985b5bde5b2cb2f7188d Mon Sep 17 00:00:00 2001 From: "David W.H. Swenson" Date: Tue, 1 Aug 2023 11:27:41 -0500 Subject: [PATCH 08/10] add tests for uncertainty calc stuff --- openfecli/tests/commands/test_gather.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/openfecli/tests/commands/test_gather.py b/openfecli/tests/commands/test_gather.py index afafc4e29..68f7b6475 100644 --- a/openfecli/tests/commands/test_gather.py +++ b/openfecli/tests/commands/test_gather.py @@ -4,7 +4,24 @@ import tarfile import pytest -from openfecli.commands.gather import gather +from openfecli.commands.gather import ( + gather, format_estimate_uncertainty, _get_column +) + +@pytest.mark.parametrize('est,unc,unc_prec,est_str,unc_str', [ + (12.432, 0.111, 2, "12.43", "0.11"), + (0.9999, 0.01, 2, "1.000", "0.010"), + (1234, 100, 2, "1230", "100"), +]) +def test_format_estimate_uncertainty(est, unc, unc_prec, est_str, unc_str): + assert format_estimate_uncertainty(est, unc, unc_prec) == (est_str, unc_str) + +@pytest.mark.parametrize('val, col', [ + (1.0, 1), (0.1, -1), (-0.0, 0), (0.0, 0), (0.2, -1), (0.9, -1), + (0.011, -2), (9, 1), (10, 2), (15, 2), +]) +def test_get_column(val, col): + assert _get_column(val) == col @pytest.fixture From bf0661a21431c2de67e8229790a045bdfedf98a5 Mon Sep 17 00:00:00 2001 From: "David W.H. Swenson" Date: Tue, 1 Aug 2023 12:13:44 -0500 Subject: [PATCH 09/10] Add HyphenAwareChoice --- openfecli/clicktypes/__init__.py | 1 + openfecli/clicktypes/hyphenchoice.py | 13 +++++++++++++ openfecli/commands/gather.py | 3 ++- .../tests/clicktypes/test_hyphenchoice.py | 19 +++++++++++++++++++ 4 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 openfecli/clicktypes/__init__.py create mode 100644 openfecli/clicktypes/hyphenchoice.py create mode 100644 openfecli/tests/clicktypes/test_hyphenchoice.py diff --git a/openfecli/clicktypes/__init__.py b/openfecli/clicktypes/__init__.py new file mode 100644 index 000000000..4d9120e0e --- /dev/null +++ b/openfecli/clicktypes/__init__.py @@ -0,0 +1 @@ +from hyphenchoice import HyphenAwareChoice diff --git a/openfecli/clicktypes/hyphenchoice.py b/openfecli/clicktypes/hyphenchoice.py new file mode 100644 index 000000000..70ac77501 --- /dev/null +++ b/openfecli/clicktypes/hyphenchoice.py @@ -0,0 +1,13 @@ +import click + +def _normalize_to_hyphen(string): + return string.replace("_", "-") + +class HyphenAwareChoice(click.Choice): + def __init__(self, choices, case_sensitive=True): + choices = [_normalize_to_hyphen(choice) for choice in choices] + super().__init__(choices, case_sensitive) + + def convert(self, value, param, ctx): + value = _normalize_to_hyphen(value) + return super().convert(value, param, ctx) diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index 6633783ac..ac3ee9ce7 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -3,6 +3,7 @@ import click from openfecli import OFECommandPlugin +from openfecli.clicktypes import HyphenAwareChoice import pathlib @@ -205,7 +206,7 @@ def _write_dg_mle(legs, writer): required=True) @click.option( '--report', - type=click.Choice(['dg', 'ddg', 'legs'], case_sensitive=False), + type=HyphenAwareChoice(['dg', 'ddg', 'dg-raw'], case_sensitive=False), default="dg", show_default=True, help=( "What data to report. 'dg' gives maximum-likelihood estimate of " diff --git a/openfecli/tests/clicktypes/test_hyphenchoice.py b/openfecli/tests/clicktypes/test_hyphenchoice.py new file mode 100644 index 000000000..c7c0bb5a2 --- /dev/null +++ b/openfecli/tests/clicktypes/test_hyphenchoice.py @@ -0,0 +1,19 @@ +import pytest +import click +from openfecli.clicktypes.hyphenchoice import HyphenAwareChoice + +class TestHyphenAwareChoice: + @pytest.mark.parametrize('value', [ + "foo_bar_baz", "foo_bar-baz", "foo-bar_baz", "foo-bar-baz" + ]) + def test_init(self, value): + ch = HyphenAwareChoice([value]) + assert ch.choices == ["foo-bar-baz"] + + @pytest.mark.parametrize('value', [ + "foo_bar_baz", "foo_bar-baz", "foo-bar_baz", "foo-bar-baz" + ]) + def test_convert(self, value): + ch = HyphenAwareChoice(['foo-bar-baz']) + # counting on __call__ to get to convert() + assert ch(value) == "foo-bar-baz" From d13e17530710d02a1b0ed8598fe464fffeb5a6db Mon Sep 17 00:00:00 2001 From: "David W.H. Swenson" Date: Tue, 1 Aug 2023 13:25:41 -0500 Subject: [PATCH 10/10] clean up a bit of gather --- openfecli/clicktypes/__init__.py | 2 +- openfecli/commands/gather.py | 44 ++++++++++++++------------------ 2 files changed, 20 insertions(+), 26 deletions(-) diff --git a/openfecli/clicktypes/__init__.py b/openfecli/clicktypes/__init__.py index 4d9120e0e..29b5b1291 100644 --- a/openfecli/clicktypes/__init__.py +++ b/openfecli/clicktypes/__init__.py @@ -1 +1 @@ -from hyphenchoice import HyphenAwareChoice +from .hyphenchoice import HyphenAwareChoice diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index ac3ee9ce7..7b9e466e0 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -131,7 +131,7 @@ def _write_ddg(legs, writer): writer.writerow([ligA, ligB, DDGhyd, hyd_unc]) -def _write_raw_dg(legs, writer): +def _write_dg_raw(legs, writer): writer.writerow(["leg", "ligand_i", "ligand_j", "DG(i->j) (kcal/mol)", "uncertainty (kcal/mol)"]) for ligpair, vals in sorted(legs.items()): @@ -210,8 +210,8 @@ def _write_dg_mle(legs, writer): default="dg", show_default=True, help=( "What data to report. 'dg' gives maximum-likelihood estimate of " - "asbolute deltaG, 'ddg' gives delta-delta-G, and 'leg' gives the " - "raw result of the deltaG for a leg." + "absolute deltaG, 'ddg' gives delta-delta-G, and 'dg-raw' gives " + "the raw result of the deltaG for a leg." ) ) @click.option('output', '-o', @@ -220,30 +220,24 @@ def _write_dg_mle(legs, writer): def gather(rootdir, output, report): """Gather simulation result jsons of relative calculations to a tsv file - Will walk ROOTDIR recursively and find all results files ending in .json - (i.e those produced by the quickrun command). Each of these contains the - results of a separate leg from a relative free energy thermodynamic cycle. + This walks ROOTDIR recursively and finds all result JSON files from the + quickrun command (these files must end in .json). Each of these contains + the results of a separate leg from a relative free energy thermodynamic + cycle. - Paired legs of simulations will be combined to give the DDG values between - two ligands in the corresponding phase, producing either binding ('DDGbind') - or hydration ('DDGhyd') relative free energies. These will be reported as - 'DDGbind(B,A)' meaning DGbind(B) - DGbind(A), the difference in free energy - of binding for ligand B relative to ligand A. - - Individual leg results will be also be written. These are reported as - either DGvacuum(A,B) DGsolvent(A,B) or DGcomplex(A,B) for the vacuum, - solvent or complex free energy of transmuting ligand A to ligand B. + The results reported depend on ``--report`` flag: \b - Will produce a **tab** separated file with 6 columns: - 1) a description of the measurement, for example DDGhyd(A, B) - 2) the type of this measurement, either RBFE or RHFE - 3) the identifier of the first ligand - 4) the identifier of the second ligand - 5) the estimated value (in kcal/mol) - 6) the uncertainty on the value (also kcal/mol) - - By default, outputs to stdout, use -o option to choose file. + * 'dg' (default) reports the ligand and the results are the maximum + likelihood estimate of its absolute free, and the uncertainty in + that. + * 'ddg' reports pairs of ligand_i and ligand_j, the calculated + relative free energy DDG(i->j) = DG(j) - DG(i) and its uncertainty + * 'dg-raw' reports the raw results, giving the leg (vacuum, solvent, or + complex), ligand_i, ligand_j, the raw DG(i->j) associated with it. + + The output is a table of **tab** separated values. By default, this + outputs to stdout, use the -o option to choose an output file. """ from collections import defaultdict import glob @@ -289,7 +283,7 @@ def gather(rootdir, output, report): writing_func = { 'dg': _write_dg_mle, 'ddg': _write_ddg, - 'legs': _write_raw_dg, + 'dg-raw': _write_dg_raw, }[report.lower()] writing_func(legs, writer)