Skip to content

Commit

Permalink
Merge pull request #148 from FAST-HEP/branch-v0.21.X
Browse files Browse the repository at this point in the history
merging 0.21.0 into master
  • Loading branch information
kreczko authored Apr 8, 2022
2 parents 33abb4b + 351fbaf commit b6f7e1b
Show file tree
Hide file tree
Showing 51 changed files with 2,291 additions and 857 deletions.
14 changes: 7 additions & 7 deletions .requirements_dev.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
flake8==3.5.0
twine==1.12.1

pytest==4.5.0
pytest-cov==2.7.1
pytest-runner==4.4
codecov==2.0.15
flake8==4.0.1
twine==3.8.0
pytest-lazy-fixture==0.6.3
pytest==6.2.5
pytest-cov==3.0.0
pytest-runner>=5.3.2
codecov==2.1.12
6 changes: 3 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
dist: xenial
dist: focal
language: python

python:
- "3.6"
- "3.7"
- "3.8"
- "3.9"
- "3.10"

install:
- pip install -r .requirements_dev.txt
Expand Down
5 changes: 5 additions & 0 deletions benchmarks/compare_carpenter_versions/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
venv_*
output
log.txt
HEPTutorial
fast_cms_public_tutorial*
5 changes: 5 additions & 0 deletions benchmarks/compare_carpenter_versions/clean_up.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/usr/bin/env bash

rm -fr output/*
rm -fr venv_*
rm -fr fast_cms_public_tutorial_*
84 changes: 84 additions & 0 deletions benchmarks/compare_carpenter_versions/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/usr/bin/env bash
REPO=github.com/kreczko/fast-carpenter.git

python -m pip install pipx
pipx install virtualenv

if [ ! -d HEPTutorial ]; then
wget http://opendata.cern.ch/record/212/files/HEPTutorial_0.tar
tar -xf HEPTutorial_0.tar HEPTutorial/files/
rm HEPTutorial_0.tar
fi



# e.g. 1becc47 and 1b0912f
version1=$1
version2=$2

function version_lt()
{
test "$(echo "$@" | tr " " "\n" | sort -rV | head -n 1)" != "$1";
}

echo "Comparing $version1 and $version2"

# set up versions
for version in $version1 $version2
do
if [ ! -d "venv_${version}" ]
then
virtualenv -p python3 venv_$version
fi
source venv_$version/bin/activate
echo "installing git+git://${REPO}@${version}"
pip install --quiet git+git://$REPO@$version

fc_version=$(fast_carpenter --version | cut -d ' ' -f2)
tutorial_version=uproot4
if version_lt $fc_version "0.20.0"
then
tutorial_version=uproot3
# TODO: this needs some extra setup e.g. for coffea to work
fi
if [ ! -d "fast_cms_public_tutorial_${tutorial_version}" ]
then
git clone \
-b kreczko-${tutorial_version} \
[email protected]:FAST-HEP/FAST_cms_public_tutorial.git \
fast_cms_public_tutorial_${tutorial_version}

pip install --quiet -r fast_cms_public_tutorial_${tutorial_version}/requirements.txt
fi

mkdir -p output/${version}
done

# run versions
for version in $version1 $version2
do
source venv_$version/bin/activate
fc_version=$(fast_carpenter --version | cut -d ' ' -f2)
tutorial_version=uproot4
if version_lt $fc_version "0.20.0"
then
tutorial_version=uproot3
fi
echo "Running with commit=$version, fast_carpenter=$fc_version, tutorial=$tutorial_version"
export PYTHONPATH=fast_cms_public_tutorial_${tutorial_version}:$PYTHONPATH

time fast_carpenter \
--mode="coffea:local" \
--outdir output/${version}/ \
fast_cms_public_tutorial_${tutorial_version}/file_list.yml \
fast_cms_public_tutorial_${tutorial_version}/sequence_cfg.yml | tee output/${version}/log.txt

time fast_plotter \
-y log \
-c fast_cms_public_tutorial_${tutorial_version}/plot_config.yml \
-o output/${version}/plotter \
output/${version}/tbl_dataset.*.csv | tee output/${version}/plotter_log.txt
done

# compare
diff -r output/${version1}/ output/${version2} | tee output/diff.txt
50 changes: 50 additions & 0 deletions docs/UML.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
## Intro

The purpose of `fast_carpenter` is to process HEP data using standard tools.
As such most of the code consists of bridges/adapters between data import tools,
data processing tools, data export tools and various other tools for tasks in between.





```mermaid
graph TD;
A-->B;
A-->C;
B-->D;
C-->D;
```


```mermaid
graph TD;
abc.MutableMapping-->TreeToDictAdaptor;
IndexProtocol-->IndexWithAliases;
IndexProtocol-->IndexDotTransform;
TreeLike-->TreeToDictAdaptor;
AdapterMethods-->Uproot3Methods;
AdapterMethods-->Uproot4Methods;
TreeToDictAdaptor-->TreeToDictAdaptorV0;
IndexDotTransform-->TreeToDictAdaptorV0;
IndexWithAliases-->TreeToDictAdaptorV0;
Uproot3Methods-->TreeToDictAdaptorV0;
TreeToDictAdaptor-->TreeToDictAdaptorV1;
IndexDotTransform-->TreeToDictAdaptorV1;
IndexWithAliases-->TreeToDictAdaptorV1;
Uproot4Methods-->TreeToDictAdaptorV1;
```

```mermaid
classDiagram
class TreeToDictAdaptor
TreeToDictAdaptor : arrays()
TreeToDictAdaptor : keys()
```





13 changes: 10 additions & 3 deletions fast_carpenter/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,16 @@
from fast_flow.help import argparse_help_stages
import fast_curator
import logging
from .backends import get_backend
from .backends import get_backend, KNOW_BACKENDS_NAMES
from .data_import import get_data_import_plugin
from .utils import mkdir_p
from .bookkeeping import write_booking
from .version import __version__
logging.getLogger(__name__).setLevel(logging.INFO)


def create_parser():
# TODO: replace with typer
from argparse import ArgumentParser

parser = ArgumentParser(description=__doc__)
Expand All @@ -26,7 +28,7 @@ def create_parser():
parser.add_argument("--outdir", default="output", type=str,
help="Where to save the results")
parser.add_argument("--mode", default="multiprocessing", type=str,
help="Which mode to run in (multiprocessing, htcondor, sge)")
help=f"Which mode to run in ({KNOW_BACKENDS_NAMES})")
parser.add_argument("--ncores", default=1, type=int,
help="Number of cores to run on")
parser.add_argument("--nblocks-per-dataset", default=-1, type=int,
Expand All @@ -53,6 +55,10 @@ def create_parser():
help="Enable creation of book-keeping tarball")
parser.add_argument("--no-bookkeeping", action='store_false', dest="bookkeeping",
help="Disable creation of book-keeping tarball")
parser.add_argument("--data-import-plugin", default="uproot4", type=str,
help="Which data import plugin to use (uproot3, uproot4, etc")
parser.add_argument("--data-import-plugin-cfg", default=None, type=str,
help="Configuration file for the data import plugin")

return parser

Expand All @@ -64,12 +70,13 @@ def main(args=None):
backend="fast_carpenter", return_cfg=True)
datasets = fast_curator.read.from_yaml(args.dataset_cfg)
backend = get_backend(args.mode)
data_import_plugin = get_data_import_plugin(args.data_import_plugin, args.data_import_plugin_cfg)

mkdir_p(args.outdir)
if args.bookkeeping:
book_keeping_file = os.path.join(args.outdir, "book-keeping.tar.gz")
write_booking(book_keeping_file, seq_cfg, datasets, cmd_line_args=args)
results, _ = backend.execute(sequence, datasets, args)
results, _ = backend.execute(sequence, datasets, args, plugins={'data_import': data_import_plugin})

print("Summary of results")
print(results)
Expand Down
30 changes: 15 additions & 15 deletions fast_carpenter/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,28 @@


def get_alphatwirl():
from . import alphatwirl
return alphatwirl
from . import _alphatwirl
return _alphatwirl


def get_coffea():
from . import coffea
return coffea


known_backends = {"multiprocessing": get_alphatwirl,
"htcondor": get_alphatwirl,
"sge": get_alphatwirl,
"alphatwirl:multiprocessing": get_alphatwirl,
"alphatwirl:htcondor": get_alphatwirl,
"alphatwirl:sge": get_alphatwirl,
"coffea:local": get_coffea,
"coffea:parsl": get_coffea,
"coffea:dask": get_coffea,
}
KNOWN_BACKENDS = {
"multiprocessing": get_alphatwirl,
"htcondor": get_alphatwirl,
"sge": get_alphatwirl,
"coffea:local": get_coffea,
"coffea:parsl": get_coffea,
"coffea:dask": get_coffea,
}

KNOW_BACKENDS_NAMES = ", ".join(list(KNOWN_BACKENDS.keys()))


def get_backend(name):
if name not in known_backends:
raise ValueError("Unknown backend requested, '%s'" % name)
return known_backends[name]()
if name not in KNOWN_BACKENDS:
raise ValueError(f"Unknown backend requested, '{name}'. Known backends: {KNOW_BACKENDS_NAMES}")
return KNOWN_BACKENDS[name]()
Loading

0 comments on commit b6f7e1b

Please sign in to comment.