From d0a83c731cc0f0d0bec9315175c40fb798c55bbf Mon Sep 17 00:00:00 2001 From: Rob Taylor Date: Mon, 20 May 2019 03:22:37 -0700 Subject: [PATCH 1/7] round bins in densify to prevent pd.reindex errors --- fast_carpenter/summary/binned_dataframe.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fast_carpenter/summary/binned_dataframe.py b/fast_carpenter/summary/binned_dataframe.py index 39206e2..5a35698 100644 --- a/fast_carpenter/summary/binned_dataframe.py +++ b/fast_carpenter/summary/binned_dataframe.py @@ -3,6 +3,7 @@ """ import os import pandas as pd +import numpy as np from . import binning_config as cfg @@ -76,7 +77,7 @@ def densify_dataframe(in_df, binnings): if bins is None: index_values.append(in_index.unique(dim)) continue - index_values.append(pd.IntervalIndex.from_breaks(bins, closed="left")) + index_values.append(pd.IntervalIndex.from_breaks(np.round(bins, 3), closed="left")) out_index = pd.MultiIndex.from_product(index_values, names=in_index.names) out_df = in_df.reindex(index=out_index, copy=False) return out_df From 32d0b0870eeddcb6a0a0bfd57f3d9fb2edd10968 Mon Sep 17 00:00:00 2001 From: Rob Taylor Date: Mon, 20 May 2019 07:16:32 -0700 Subject: [PATCH 2/7] main now parses args given as a argument. Move creation of parser object to separate function to allow use in other scripts --- fast_carpenter/__main__.py | 45 +++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/fast_carpenter/__main__.py b/fast_carpenter/__main__.py index ea8e888..7396fdd 100644 --- a/fast_carpenter/__main__.py +++ b/fast_carpenter/__main__.py @@ -21,7 +21,7 @@ def collect(self, *args, **kwargs): pass -def process_args(args=None): +def create_parser(): from argparse import ArgumentParser, Action class StagesHelp(Action): @@ -56,11 +56,50 @@ def __call__(self, parser, namespace, values, option_string=None): help="Print help specific to the available stages") parser.add_argument("--help-stages-full", action=StagesHelp, metavar="stage", help="Print the full help specific to the available stages") - return parser.parse_args() + + return parser + + +def process_args(args=None): + from argparse import ArgumentParser, Action + + class StagesHelp(Action): + def __call__(self, parser, namespace, values, option_string=None): + full_output = option_string == "--help-stages-full" + help_stages(values, full_output=full_output) + sys.exit(0) + + parser = argumentparser(description=__doc__) + parser.add_argument("dataset_cfg", type=str, + help="dataset config to run over") + parser.add_argument("sequence_cfg", type=str, + help="config for how to process events") + parser.add_argument("--outdir", default="output", type=str, + help="where to save the results") + parser.add_argument("--mode", default="multiprocessing", type=str, + help="which mode to run in (multiprocessing, htcondor, sge)") + parser.add_argument("--ncores", default=0, type=int, + help="number of cores to run on") + parser.add_argument("--nblocks-per-dataset", default=-1, type=int, + help="number of blocks per dataset") + parser.add_argument("--nblocks-per-sample", default=-1, type=int, + help="number of blocks per sample") + parser.add_argument("--blocksize", default=1000000, type=int, + help="number of events per block") + parser.add_argument("--quiet", default=false, action='store_true', + help="keep progress report quiet") + parser.add_argument("--profile", default=false, action='store_true', + help="profile the code") + parser.add_argument("--help-stages", nargs="?", default=none, action=stageshelp, + metavar="stage-name-regex", + help="print help specific to the available stages") + parser.add_argument("--help-stages-full", action=stageshelp, metavar="stage", + help="print the full help specific to the available stages") + return parser.parse_args(args) def main(args=None): - args = process_args(args) + args = creater_parser().parse_args(args) sequence = fast_flow.read_sequence_yaml(args.sequence_cfg, output_dir=args.outdir) From 1504892656e93853eb6259cdec56059d7b573935 Mon Sep 17 00:00:00 2001 From: Rob Taylor Date: Mon, 20 May 2019 09:10:36 -0700 Subject: [PATCH 3/7] Move core of __main__ function to separate run_carpenter function --- fast_carpenter/__main__.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fast_carpenter/__main__.py b/fast_carpenter/__main__.py index 7396fdd..5f2a416 100644 --- a/fast_carpenter/__main__.py +++ b/fast_carpenter/__main__.py @@ -99,7 +99,7 @@ def __call__(self, parser, namespace, values, option_string=None): def main(args=None): - args = creater_parser().parse_args(args) + args = create_parser().parse_args(args) sequence = fast_flow.read_sequence_yaml(args.sequence_cfg, output_dir=args.outdir) @@ -107,6 +107,11 @@ def main(args=None): mkdir_p(args.outdir) + _, ret_val = run_carpenter(sequence, datasets, args) + print(ret_val) + return 0 + +def run_carpenter(sequence, datasets, args): process = atup.AtUproot(args.outdir, quiet=args.quiet, parallel_mode=args.mode, @@ -120,9 +125,7 @@ def main(args=None): sequence = [(s, s.collector() if hasattr(s, "collector") else DummyCollector()) for s in sequence] ret_val = process.run(datasets, sequence) - print(ret_val) - return 0 - + return sequence, ret_val if __name__ == "__main__": main() From 99586070dfb7b74daed43cbe72a79b20bd21d900 Mon Sep 17 00:00:00 2001 From: Rob Taylor Date: Wed, 22 May 2019 15:03:52 +0100 Subject: [PATCH 4/7] Remove defunct parse args function --- fast_carpenter/__main__.py | 39 -------------------------------------- 1 file changed, 39 deletions(-) diff --git a/fast_carpenter/__main__.py b/fast_carpenter/__main__.py index 5f2a416..234bab5 100644 --- a/fast_carpenter/__main__.py +++ b/fast_carpenter/__main__.py @@ -59,45 +59,6 @@ def __call__(self, parser, namespace, values, option_string=None): return parser - -def process_args(args=None): - from argparse import ArgumentParser, Action - - class StagesHelp(Action): - def __call__(self, parser, namespace, values, option_string=None): - full_output = option_string == "--help-stages-full" - help_stages(values, full_output=full_output) - sys.exit(0) - - parser = argumentparser(description=__doc__) - parser.add_argument("dataset_cfg", type=str, - help="dataset config to run over") - parser.add_argument("sequence_cfg", type=str, - help="config for how to process events") - parser.add_argument("--outdir", default="output", type=str, - help="where to save the results") - parser.add_argument("--mode", default="multiprocessing", type=str, - help="which mode to run in (multiprocessing, htcondor, sge)") - parser.add_argument("--ncores", default=0, type=int, - help="number of cores to run on") - parser.add_argument("--nblocks-per-dataset", default=-1, type=int, - help="number of blocks per dataset") - parser.add_argument("--nblocks-per-sample", default=-1, type=int, - help="number of blocks per sample") - parser.add_argument("--blocksize", default=1000000, type=int, - help="number of events per block") - parser.add_argument("--quiet", default=false, action='store_true', - help="keep progress report quiet") - parser.add_argument("--profile", default=false, action='store_true', - help="profile the code") - parser.add_argument("--help-stages", nargs="?", default=none, action=stageshelp, - metavar="stage-name-regex", - help="print help specific to the available stages") - parser.add_argument("--help-stages-full", action=stageshelp, metavar="stage", - help="print the full help specific to the available stages") - return parser.parse_args(args) - - def main(args=None): args = create_parser().parse_args(args) From 822326451d563fd0a392295819916b35498300cf Mon Sep 17 00:00:00 2001 From: Rob Taylor Date: Wed, 22 May 2019 09:43:05 -0700 Subject: [PATCH 5/7] Remove bin edge rounding --- fast_carpenter/summary/binned_dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fast_carpenter/summary/binned_dataframe.py b/fast_carpenter/summary/binned_dataframe.py index 5a35698..f45d813 100644 --- a/fast_carpenter/summary/binned_dataframe.py +++ b/fast_carpenter/summary/binned_dataframe.py @@ -77,7 +77,7 @@ def densify_dataframe(in_df, binnings): if bins is None: index_values.append(in_index.unique(dim)) continue - index_values.append(pd.IntervalIndex.from_breaks(np.round(bins, 3), closed="left")) + index_values.append(pd.IntervalIndex.from_breaks(bins, closed="left")) out_index = pd.MultiIndex.from_product(index_values, names=in_index.names) out_df = in_df.reindex(index=out_index, copy=False) return out_df From 61e0caa00fadf8a1518e67691af309fe8ca635c0 Mon Sep 17 00:00:00 2001 From: Rob Taylor Date: Thu, 23 May 2019 10:15:33 +0100 Subject: [PATCH 6/7] Remove numpy import --- fast_carpenter/summary/binned_dataframe.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fast_carpenter/summary/binned_dataframe.py b/fast_carpenter/summary/binned_dataframe.py index f45d813..39206e2 100644 --- a/fast_carpenter/summary/binned_dataframe.py +++ b/fast_carpenter/summary/binned_dataframe.py @@ -3,7 +3,6 @@ """ import os import pandas as pd -import numpy as np from . import binning_config as cfg From 523971b7d0ed388acb4e4a809825365a46f8544e Mon Sep 17 00:00:00 2001 From: Rob Taylor Date: Thu, 23 May 2019 10:17:01 +0100 Subject: [PATCH 7/7] Formatting changes for flake8 --- fast_carpenter/__main__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fast_carpenter/__main__.py b/fast_carpenter/__main__.py index 234bab5..5bdc577 100644 --- a/fast_carpenter/__main__.py +++ b/fast_carpenter/__main__.py @@ -59,6 +59,7 @@ def __call__(self, parser, namespace, values, option_string=None): return parser + def main(args=None): args = create_parser().parse_args(args) @@ -72,6 +73,7 @@ def main(args=None): print(ret_val) return 0 + def run_carpenter(sequence, datasets, args): process = atup.AtUproot(args.outdir, quiet=args.quiet, @@ -88,5 +90,6 @@ def run_carpenter(sequence, datasets, args): ret_val = process.run(datasets, sequence) return sequence, ret_val + if __name__ == "__main__": main()