diff --git a/.requirements_dev.txt b/.requirements_dev.txt
index 82f7066..7c85b69 100644
--- a/.requirements_dev.txt
+++ b/.requirements_dev.txt
@@ -1,7 +1,7 @@
-flake8==3.5.0
-twine==1.12.1
-
-pytest==4.5.0
-pytest-cov==2.7.1
-pytest-runner==4.4
-codecov==2.0.15
+flake8==4.0.1
+twine==3.8.0
+pytest-lazy-fixture==0.6.3
+pytest==6.2.5
+pytest-cov==3.0.0
+pytest-runner>=5.3.2
+codecov==2.1.12
diff --git a/.travis.yml b/.travis.yml
index e00a9b4..7283b9a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,10 +1,10 @@
-dist: xenial
+dist: focal
 language: python
 
 python:
- - "3.6"
- - "3.7"
  - "3.8"
+ - "3.9"
+ - "3.10"
 
 install:
   - pip install -r .requirements_dev.txt
diff --git a/benchmarks/compare_carpenter_versions/.gitignore b/benchmarks/compare_carpenter_versions/.gitignore
new file mode 100644
index 0000000..dd24b89
--- /dev/null
+++ b/benchmarks/compare_carpenter_versions/.gitignore
@@ -0,0 +1,5 @@
+venv_*
+output
+log.txt
+HEPTutorial
+fast_cms_public_tutorial*
\ No newline at end of file
diff --git a/benchmarks/compare_carpenter_versions/clean_up.sh b/benchmarks/compare_carpenter_versions/clean_up.sh
new file mode 100755
index 0000000..5bc01c8
--- /dev/null
+++ b/benchmarks/compare_carpenter_versions/clean_up.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+
+rm -fr output/*
+rm -fr venv_*
+rm -fr fast_cms_public_tutorial_*
\ No newline at end of file
diff --git a/benchmarks/compare_carpenter_versions/run.sh b/benchmarks/compare_carpenter_versions/run.sh
new file mode 100755
index 0000000..92bcbe8
--- /dev/null
+++ b/benchmarks/compare_carpenter_versions/run.sh
@@ -0,0 +1,84 @@
+#!/usr/bin/env bash
+REPO=github.com/kreczko/fast-carpenter.git
+
+python -m pip install pipx
+pipx install virtualenv
+
+if [ ! -d HEPTutorial ]; then
+    wget http://opendata.cern.ch/record/212/files/HEPTutorial_0.tar
+    tar -xf HEPTutorial_0.tar HEPTutorial/files/
+    rm HEPTutorial_0.tar
+fi
+
+
+
+# e.g. 1becc47 and 1b0912f
+version1=$1
+version2=$2
+
+function version_lt()
+{
+    test "$(echo "$@" | tr " " "\n" | sort -rV | head -n 1)" != "$1";
+}
+
+echo "Comparing $version1 and $version2"
+
+# set up versions
+for version in $version1 $version2
+do
+    if [ ! -d "venv_${version}" ]
+    then
+        virtualenv -p python3 venv_$version
+    fi
+    source venv_$version/bin/activate
+    echo "installing git+git://${REPO}@${version}"
+    pip install --quiet git+git://$REPO@$version
+
+    fc_version=$(fast_carpenter --version | cut -d ' ' -f2)
+    tutorial_version=uproot4
+    if version_lt $fc_version "0.20.0"
+    then
+        tutorial_version=uproot3
+        # TODO: this needs some extra setup e.g. for coffea to work
+    fi
+    if [ ! -d "fast_cms_public_tutorial_${tutorial_version}" ]
+    then
+        git clone \
+          -b kreczko-${tutorial_version} \
+          git@github.com:FAST-HEP/FAST_cms_public_tutorial.git \
+          fast_cms_public_tutorial_${tutorial_version}
+        
+        pip install --quiet -r fast_cms_public_tutorial_${tutorial_version}/requirements.txt
+    fi
+    
+    mkdir -p output/${version}
+done
+
+# run versions
+for version in $version1 $version2
+do
+    source venv_$version/bin/activate
+    fc_version=$(fast_carpenter --version | cut -d ' ' -f2)
+    tutorial_version=uproot4
+    if version_lt $fc_version "0.20.0"
+    then
+        tutorial_version=uproot3
+    fi
+    echo "Running with commit=$version, fast_carpenter=$fc_version, tutorial=$tutorial_version"
+    export PYTHONPATH=fast_cms_public_tutorial_${tutorial_version}:$PYTHONPATH
+
+    time fast_carpenter \
+      --mode="coffea:local" \
+      --outdir output/${version}/ \
+      fast_cms_public_tutorial_${tutorial_version}/file_list.yml \
+      fast_cms_public_tutorial_${tutorial_version}/sequence_cfg.yml | tee output/${version}/log.txt
+
+    time fast_plotter \
+      -y log \
+      -c fast_cms_public_tutorial_${tutorial_version}/plot_config.yml \
+      -o output/${version}/plotter \
+      output/${version}/tbl_dataset.*.csv | tee output/${version}/plotter_log.txt
+done
+
+# compare
+diff -r output/${version1}/ output/${version2} | tee output/diff.txt
diff --git a/docs/UML.md b/docs/UML.md
new file mode 100644
index 0000000..bb192bd
--- /dev/null
+++ b/docs/UML.md
@@ -0,0 +1,50 @@
+## Intro
+
+The purpose of `fast_carpenter` is to process HEP data using standard tools.
+As such most of the code consists of bridges/adapters between data import tools,
+data processing tools, data export tools and various other tools for tasks in between.
+
+
+
+
+
+```mermaid
+graph TD;
+    A-->B;
+    A-->C;
+    B-->D;
+    C-->D;
+```
+
+
+```mermaid
+graph TD;
+    abc.MutableMapping-->TreeToDictAdaptor;
+    IndexProtocol-->IndexWithAliases;
+    IndexProtocol-->IndexDotTransform;
+    TreeLike-->TreeToDictAdaptor;
+    AdapterMethods-->Uproot3Methods;
+    AdapterMethods-->Uproot4Methods;
+
+    TreeToDictAdaptor-->TreeToDictAdaptorV0;
+    IndexDotTransform-->TreeToDictAdaptorV0;
+    IndexWithAliases-->TreeToDictAdaptorV0;
+    Uproot3Methods-->TreeToDictAdaptorV0;
+
+    TreeToDictAdaptor-->TreeToDictAdaptorV1;
+    IndexDotTransform-->TreeToDictAdaptorV1;
+    IndexWithAliases-->TreeToDictAdaptorV1;
+    Uproot4Methods-->TreeToDictAdaptorV1;
+```
+
+```mermaid
+classDiagram
+class TreeToDictAdaptor
+TreeToDictAdaptor : arrays()
+TreeToDictAdaptor : keys()
+```
+
+
+
+
+
diff --git a/fast_carpenter/__main__.py b/fast_carpenter/__main__.py
index aab4d4e..ba20a71 100644
--- a/fast_carpenter/__main__.py
+++ b/fast_carpenter/__main__.py
@@ -8,7 +8,8 @@
 from fast_flow.help import argparse_help_stages
 import fast_curator
 import logging
-from .backends import get_backend
+from .backends import get_backend, KNOW_BACKENDS_NAMES
+from .data_import import get_data_import_plugin
 from .utils import mkdir_p
 from .bookkeeping import write_booking
 from .version import __version__
@@ -16,6 +17,7 @@
 
 
 def create_parser():
+    # TODO: replace with typer
     from argparse import ArgumentParser
 
     parser = ArgumentParser(description=__doc__)
@@ -26,7 +28,7 @@ def create_parser():
     parser.add_argument("--outdir", default="output", type=str,
                         help="Where to save the results")
     parser.add_argument("--mode", default="multiprocessing", type=str,
-                        help="Which mode to run in (multiprocessing, htcondor, sge)")
+                        help=f"Which mode to run in ({KNOW_BACKENDS_NAMES})")
     parser.add_argument("--ncores", default=1, type=int,
                         help="Number of cores to run on")
     parser.add_argument("--nblocks-per-dataset", default=-1, type=int,
@@ -53,6 +55,10 @@ def create_parser():
                         help="Enable creation of book-keeping tarball")
     parser.add_argument("--no-bookkeeping", action='store_false', dest="bookkeeping",
                         help="Disable creation of book-keeping tarball")
+    parser.add_argument("--data-import-plugin", default="uproot4", type=str,
+                        help="Which data import plugin to use (uproot3, uproot4, etc")
+    parser.add_argument("--data-import-plugin-cfg", default=None, type=str,
+                        help="Configuration file for the data import plugin")
 
     return parser
 
@@ -64,12 +70,13 @@ def main(args=None):
                                                      backend="fast_carpenter", return_cfg=True)
     datasets = fast_curator.read.from_yaml(args.dataset_cfg)
     backend = get_backend(args.mode)
+    data_import_plugin = get_data_import_plugin(args.data_import_plugin, args.data_import_plugin_cfg)
 
     mkdir_p(args.outdir)
     if args.bookkeeping:
         book_keeping_file = os.path.join(args.outdir, "book-keeping.tar.gz")
         write_booking(book_keeping_file, seq_cfg, datasets, cmd_line_args=args)
-    results, _ = backend.execute(sequence, datasets, args)
+    results, _ = backend.execute(sequence, datasets, args, plugins={'data_import': data_import_plugin})
 
     print("Summary of results")
     print(results)
diff --git a/fast_carpenter/backends/__init__.py b/fast_carpenter/backends/__init__.py
index 5d1479c..01e613a 100644
--- a/fast_carpenter/backends/__init__.py
+++ b/fast_carpenter/backends/__init__.py
@@ -6,8 +6,8 @@
 
 
 def get_alphatwirl():
-    from . import alphatwirl
-    return alphatwirl
+    from . import _alphatwirl
+    return _alphatwirl
 
 
 def get_coffea():
@@ -15,19 +15,19 @@ def get_coffea():
     return coffea
 
 
-known_backends = {"multiprocessing": get_alphatwirl,
-                  "htcondor": get_alphatwirl,
-                  "sge": get_alphatwirl,
-                  "alphatwirl:multiprocessing": get_alphatwirl,
-                  "alphatwirl:htcondor": get_alphatwirl,
-                  "alphatwirl:sge": get_alphatwirl,
-                  "coffea:local": get_coffea,
-                  "coffea:parsl": get_coffea,
-                  "coffea:dask": get_coffea,
-                  }
+KNOWN_BACKENDS = {
+    "multiprocessing": get_alphatwirl,
+    "htcondor": get_alphatwirl,
+    "sge": get_alphatwirl,
+    "coffea:local": get_coffea,
+    "coffea:parsl": get_coffea,
+    "coffea:dask": get_coffea,
+}
+
+KNOW_BACKENDS_NAMES = ", ".join(list(KNOWN_BACKENDS.keys()))
 
 
 def get_backend(name):
-    if name not in known_backends:
-        raise ValueError("Unknown backend requested, '%s'" % name)
-    return known_backends[name]()
+    if name not in KNOWN_BACKENDS:
+        raise ValueError(f"Unknown backend requested, '{name}'. Known backends: {KNOW_BACKENDS_NAMES}")
+    return KNOWN_BACKENDS[name]()
diff --git a/fast_carpenter/backends/_alphatwirl.py b/fast_carpenter/backends/_alphatwirl.py
new file mode 100644
index 0000000..92b3a96
--- /dev/null
+++ b/fast_carpenter/backends/_alphatwirl.py
@@ -0,0 +1,294 @@
+"""
+Functions to run a job using alphatwirl
+"""
+from cachetools import cachedmethod
+from cachetools.keys import hashkey
+from functools import partial
+import operator
+from typing import Any, Dict
+
+import awkward as awk
+import numpy as np
+
+from fast_carpenter.data_import import DataImportBase
+from fast_carpenter.tree_adapter import create_masked
+
+
+class BEvents(object):
+    """
+    from https://github.com/shane-breeze/atuproot/blob/master/atuproot/BEvents.py
+    """
+    non_branch_attrs = [
+        "tree", "nevents_in_tree", "nevents_per_block", "nblocks",
+        "start_block", "stop_block", "iblock", "start_entry", "stop_entry",
+        "_branch_cache", "_nonbranch_cache", "size", "config",
+    ]
+
+    def __init__(
+        self, tree, nevents_per_block=100000, start_block=0, stop_block=-1,
+        branch_cache={},
+    ):
+        self.tree = tree
+        self.nevents_in_tree = len(tree)
+        self.nevents_per_block = int(nevents_per_block) \
+            if nevents_per_block >= 0 \
+            else self.nevents_in_tree
+
+        nblocks = int((self.nevents_in_tree-1)/self.nevents_per_block + 1)
+        start_block = min(nblocks, start_block)
+        if stop_block > -1:
+            self.nblocks = min(nblocks-start_block, stop_block)
+        else:
+            self.nblocks = nblocks-start_block
+        self.stop_block = stop_block
+        self.start_block = start_block
+        self.iblock = -1
+
+        self._branch_cache = branch_cache
+        self._nonbranch_cache = {}
+
+    def __len__(self):
+        return self.nblocks
+
+    def __repr__(self):
+        return '{}({})'.format(
+            self.__class__.__name__,
+            self._repr_content(),
+        )
+
+    def _repr_content(self):
+        return 'tree = {!r}, nevents_in_tree = {!r}, nevents_per_block = {!r}, '\
+            'nblocks = {!r}, iblock = {!r}, start_block = {!r}, '\
+               'stop_block = {!r}'.format(
+                   self.tree,
+                   self.nevents_in_tree,
+                   self.nevents_per_block,
+                   self.nblocks,
+                   self.iblock,
+                   self.start_block,
+                   self.stop_block,
+               )
+
+    def __getitem__(self, i):
+        if i >= self.nblocks:
+            self.iblock = -1
+            raise IndexError("The index is out of range: " + str(i))
+        self._branch_cache.clear()
+
+        self.iblock = i
+        return self
+
+    def __iter__(self):
+        for self.iblock in range(self.nblocks):
+            self._branch_cache.clear()
+            yield self
+        self.iblock = -1
+        self._nonbranch_cache = {}
+
+    def __getattr__(self, attr):
+        if attr in self.non_branch_attrs:
+            return getattr(self, attr)
+        elif attr in self._nonbranch_cache:
+            return self._nonbranch_cache[attr]
+        return self._get_branch(attr)
+
+    def __setattr__(self, attr, val):
+        if attr in self.non_branch_attrs:
+            super(BEvents, self).__setattr__(attr, val)
+        else:
+            if not (isinstance(val, awk.Array) or isinstance(val, np.ndarray)):
+                self._nonbranch_cache[attr] = val
+            else:
+                key = hashkey('BEvents._get_branch', attr)
+                self._branch_cache[key] = val
+
+    @cachedmethod(operator.attrgetter('_branch_cache'), key=partial(hashkey, 'BEvents._get_branch'))
+    def _get_branch(self, name):
+        self.start_entry = (self.start_block + self.iblock) * self.nevents_per_block
+        self.stop_entry = min(
+            (self.start_block + self.iblock + 1) * self.nevents_per_block,
+            (self.start_block + self.nblocks) * self.nevents_per_block,
+            self.nevents_in_tree,
+        )
+        self.size = self.stop_entry - self.start_entry
+        try:
+            branch = "asdsd"
+            print(branch)
+            # branch = self.tree.array(
+            #     name,
+            #     entrystart=self.start_entry,
+            #     entrystop=self.stop_entry,
+            # )
+        except KeyError as e:
+            raise AttributeError(e)
+        return branch
+
+    def hasbranch(self, branch, encoding='utf-8'):
+        return (
+            branch.encode(encoding) in self.tree.keys()
+            or hashkey('BEvents._get_branch', branch) in self._branch_cache
+            or branch in self._nonbranch_cache
+        )
+
+    def delete_branches(self, branches):
+        for branch in branches:
+            key = hashkey('BEvents._get_branch', branch)
+            if key in self._branch_cache:
+                self._branch_cache.popitem(key)
+            elif branch in self._nonbranch_cache:
+                self._nonbranch_cache.popitem(branch)
+
+    def __contains__(self, branch):
+        return branch in getattr(self, "tree")
+
+
+class EventRanger():
+    def __init__(self):
+        self._owner = None
+
+    def set_owner(self, owner):
+        self._owner = owner
+
+    @property
+    def start_entry(self):
+        return (self._owner.start_block + self._owner.iblock) * self._owner.nevents_per_block
+
+    @property
+    def stop_entry(self):
+        i_block = min(self._owner.iblock + 1, self._owner.nblocks)
+        stop_entry = (self._owner.start_block + i_block) * self._owner.nevents_per_block
+        return min(self._owner.nevents_in_tree, stop_entry)
+
+    @property
+    def entries_in_block(self):
+        if self._owner and self._owner.iblock > -1:
+            return self.stop_entry - self.start_entry
+        return None
+
+
+class BEventsWrapped(BEvents):
+    def __init__(self, tree, *args, **kwargs):
+        ranges = EventRanger()
+
+        super(BEventsWrapped, self).__init__(tree, *args, **kwargs)
+        ranges.set_owner(self)
+        tree = create_masked(
+            {
+                "tree": tree,
+                "start": self.start_entry,
+                "stop": self.stop_entry,
+                "adapter": "uproot4",
+            }
+        )
+        self.tree = tree
+
+    def _block_changed(self):
+        self.tree.reset_mask()
+        self.tree.reset_cache()
+
+    def __getitem__(self, i):
+        result = super(BEventsWrapped, self).__getitem__(self, i)
+        self._block_changed()
+        return result
+
+    def __iter__(self):
+        for value in super(BEventsWrapped, self).__iter__():
+            self._block_changed()
+            yield value
+        self._block_changed()
+
+    @property
+    def start_entry(self):
+        return (self.start_block + self.iblock) * self.nevents_per_block
+
+    @property
+    def stop_entry(self):
+        i_block = min(self.iblock + 1, self.nblocks)
+        stop_entry = (self.start_block + i_block) * self.nevents_per_block
+        return min(self.nevents_in_tree, stop_entry)
+
+
+class EventBuilder(object):
+    data_import_plugin: DataImportBase = None
+
+    def __init__(self, config):
+        self.config = config
+
+    def __repr__(self):
+        return '{}({!r})'.format(
+            self.__class__.__name__,
+            self.config,
+        )
+
+    def __call__(self):
+        data = EventBuilder.data_import_plugin.open(self.config.inputPaths)
+        tree = data[self.config.treeName]
+
+        events = BEventsWrapped(
+            tree,
+            self.config.nevents_per_block,
+            self.config.start_block,
+            self.config.stop_block,
+        )
+        events.config = self.config
+        return events
+
+
+class DummyCollector():
+    def collect(self, *args, **kwargs):
+        pass
+
+
+class AtuprootContext:
+    def __init__(self, plugins: Dict[str, Any] = None) -> None:
+        self.plugins = plugins
+
+    def __enter__(self):
+        import atuproot.atuproot_main as atup
+        self.atup = atup
+        self._orig_event_builder = atup.EventBuilder
+        self._orig_build_parallel = atup.build_parallel
+
+        from atsge.build_parallel import build_parallel
+        atup.EventBuilder = EventBuilder
+        atup.EventBuilder.data_import_plugin = self.plugins["data_import"]
+        atup.build_parallel = build_parallel
+        return self
+
+    def __exit__(self, *args, **kwargs):
+        self.atup.EventBuilder = self._orig_event_builder
+        self.atup.build_parallel = self._orig_build_parallel
+
+
+def execute(sequence, datasets, args, plugins: Dict[str, Any] = None):
+    """
+    Run a job using alphatwirl and atuproot
+    """
+
+    if args.ncores < 1:
+        args.ncores = 1
+
+    sequence = [(s, s.collector() if hasattr(s, "collector") else DummyCollector()) for s in sequence]
+
+    with AtuprootContext(plugins) as runner:
+        process = runner.atup.AtUproot(
+            args.outdir,
+            quiet=args.quiet,
+            parallel_mode=args.mode,
+            process=args.ncores,
+            max_blocks_per_dataset=args.nblocks_per_dataset,
+            max_blocks_per_process=args.nblocks_per_sample,
+            nevents_per_block=args.blocksize,
+            profile=args.profile,
+            profile_out_path="profile.txt",
+        )
+
+        ret_val = process.run(datasets, sequence)
+
+    if not args.profile:
+        # This breaks in AlphaTwirl when used with the profile option
+        summary = {s[0].name: list(df.index.names) for s, df in zip(sequence, ret_val[0]) if df is not None}
+    else:
+        summary = " (Results summary not available with profile mode) "
+
+    return summary, ret_val
diff --git a/fast_carpenter/backends/alphatwirl.py b/fast_carpenter/backends/alphatwirl.py
deleted file mode 100644
index 808a186..0000000
--- a/fast_carpenter/backends/alphatwirl.py
+++ /dev/null
@@ -1,59 +0,0 @@
-"""
-Functions to run a job using alphatwirl
-"""
-
-
-class DummyCollector():
-    def collect(self, *args, **kwargs):
-        pass
-
-
-class AtuprootContext:
-    def __enter__(self):
-        import atuproot.atuproot_main as atup
-        self.atup = atup
-        self._orig_event_builder = atup.EventBuilder
-        self._orig_build_parallel = atup.build_parallel
-
-        from ..event_builder import EventBuilder
-        from atsge.build_parallel import build_parallel
-        atup.EventBuilder = EventBuilder
-        atup.build_parallel = build_parallel
-        return self
-
-    def __exit__(self, *args, **kwargs):
-        self.atup.EventBuilder = self._orig_event_builder
-        self.atup.build_parallel = self._orig_build_parallel
-
-
-def execute(sequence, datasets, args):
-    """
-    Run a job using alphatwirl and atuproot
-    """
-
-    if args.ncores < 1:
-        args.ncores = 1
-
-    sequence = [(s, s.collector() if hasattr(s, "collector") else DummyCollector()) for s in sequence]
-
-    with AtuprootContext() as runner:
-        process = runner.atup.AtUproot(args.outdir,
-                                       quiet=args.quiet,
-                                       parallel_mode=args.mode,
-                                       process=args.ncores,
-                                       max_blocks_per_dataset=args.nblocks_per_dataset,
-                                       max_blocks_per_process=args.nblocks_per_sample,
-                                       nevents_per_block=args.blocksize,
-                                       profile=args.profile,
-                                       profile_out_path="profile.txt",
-                                       )
-
-        ret_val = process.run(datasets, sequence)
-
-    if not args.profile:
-        # This breaks in AlphaTwirl when used with the profile option
-        summary = {s[0].name: list(df.index.names) for s, df in zip(sequence, ret_val[0]) if df is not None}
-    else:
-        summary = " (Results summary not available with profile mode) "
-
-    return summary, ret_val
diff --git a/fast_carpenter/backends/coffea.py b/fast_carpenter/backends/coffea.py
index eeaf401..24d5133 100644
--- a/fast_carpenter/backends/coffea.py
+++ b/fast_carpenter/backends/coffea.py
@@ -2,7 +2,7 @@
 Functions to run a job using Coffea
 """
 import copy
-from fast_carpenter.masked_tree import MaskedUprootTree
+from fast_carpenter.tree_adapter import create_masked, TreeLike
 from collections import namedtuple
 from coffea import processor as cop
 import logging
@@ -14,6 +14,48 @@
 ConfigProxy = namedtuple("ConfigProxy", "name eventtype")
 
 
+class CoffeaConnector(TreeLike):
+
+    def __init__(self, data):
+        self._data = data
+
+    def __getattr__(self, name):
+        if hasattr(self._data, name):
+            return getattr(self._data, name)
+        return getattr(self, name)
+
+    def __getitem__(self, key):
+        return self._data[key]
+
+    @property
+    def num_entries(self):
+        return len(self._data)
+
+    @property
+    def dataset(self):
+        return self._data.metadata["dataset"]
+
+    @property
+    def start(self):
+        return self._data.metadata['entrystart']
+
+    @property
+    def stop(self):
+        return self._data.metadata['entrystop']
+
+    def arrays(self, keys, **kwargs):
+        import awkward as ak
+        library = kwargs.get("library", "ak")
+        how = kwargs.get("how", dict)
+        if library == "ak" and how == dict:
+            return {key: ak.Array(self._data[key]) for key in keys}
+
+        raise NotImplementedError(f"Cannot return arrays for {library=} and {how=}")
+
+    def keys(self):
+        return self._data.fields
+
+
 class stages_accumulator(cop.AccumulatorABC):
     def __init__(self, stages):
         self._zero = copy.deepcopy(stages)
@@ -50,11 +92,17 @@ def accumulator(self):
 
     def process(self, df):
         output = self.accumulator.identity()
+        connector = CoffeaConnector(df)
+
+        # tree = MaskedUprootTree(df._tree, EventRanger(start, stop, stop - start))
+        tree = create_masked(
+            dict(
+                tree=connector,
+                start=connector.start,
+                stop=connector.stop
+            ))
 
-        start = df._branchargs['entrystart']
-        stop = df._branchargs['entrystop']
-        tree = MaskedUprootTree(df._tree, EventRanger(start, stop, stop - start))
-        dsname = df['dataset']
+        dsname = connector.dataset
         cfg_proxy = ConfigProxy(dsname, 'data' if dsname == 'data' else 'mc')
         chunk = SingleChunk(tree, ChunkConfig(cfg_proxy))
 
@@ -135,14 +183,12 @@ def create_executor(args):
     if exe_type == "local":
         executor = cop.futures_executor
         exe_args.setdefault('workers', args.ncores)
-        exe_args.setdefault('flatten', False)
     elif exe_type == "parsl":
         executor = cop.parsl_executor
         exe_args.setdefault('n_threads', args.ncores)
         exe_args.setdefault('monitoring', False)
         if 'config' not in exe_args:
             exe_args['config'] = configure_parsl(**exe_args)
-        exe_args.setdefault('flatten', False)
     elif exe_type == "dask":
         executor = cop.dask_executor
         exe_args.setdefault('processes', False)
@@ -158,7 +204,7 @@ def create_executor(args):
     return executor, exe_args
 
 
-def execute(sequence, datasets, args):
+def execute(sequence, datasets, args, plugins):
     fp = FASTProcessor(sequence)
 
     executor, exe_args = create_executor(args)
diff --git a/fast_carpenter/data_import/__init__.py b/fast_carpenter/data_import/__init__.py
new file mode 100644
index 0000000..8a92ebd
--- /dev/null
+++ b/fast_carpenter/data_import/__init__.py
@@ -0,0 +1,41 @@
+from pathlib import Path
+from ._base import DataImportBase
+from ._uproot4 import Uproot4DataImport
+from ._uproot3 import Uproot3DataImport
+
+_DATA_IMPORT_PLUGINS = {
+    "uproot4": Uproot4DataImport,
+    "uproot3": Uproot3DataImport,
+}
+
+
+def register_data_import_plugin(plugin_name: str, plugin_class: DataImportBase) -> None:
+    """
+    Register a new plugin_name for data import.
+    """
+    _DATA_IMPORT_PLUGINS[plugin_name] = plugin_class
+
+
+def _process_plugin_config(plugin_name: str, plugin_config: Path) -> None:
+    """
+        Process the plugin configuration file.
+        Reads the "register" and "plugin_name" sections to register and configure the plugin.
+    """
+    if plugin_config is None:
+        return
+    if not plugin_config.exists():
+        raise ValueError(f"Plugin config file {plugin_config} does not exist")
+    if not plugin_config.is_file():
+        raise ValueError(f"Plugin config file {plugin_config} is not a file")
+
+    raise NotImplementedError("Plugin config file processing not implemented yet")
+
+
+def get_data_import_plugin(plugin_name: str, plugin_config: Path) -> DataImportBase:
+    """
+    Get a data import plugin by name.
+    """
+    config = _process_plugin_config(plugin_name, plugin_config)
+    if plugin_name not in _DATA_IMPORT_PLUGINS:
+        raise ValueError(f"Unknown data import plugin: {plugin_name}")
+    return _DATA_IMPORT_PLUGINS[plugin_name](config)
diff --git a/fast_carpenter/data_import/_base.py b/fast_carpenter/data_import/_base.py
new file mode 100644
index 0000000..39ea849
--- /dev/null
+++ b/fast_carpenter/data_import/_base.py
@@ -0,0 +1,26 @@
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List
+
+
+class DataImportBase(ABC):
+    """
+    This Abstract Base Class is the base class for all data import classes.
+    """
+    config: Dict[str, Any]
+
+    def __init__(self, config: Dict[str, Any]) -> None:
+        self.config = config
+
+    @abstractmethod
+    def _process_config(self) -> None:
+        """
+        Process the configuration.
+        """
+        pass
+
+    @abstractmethod
+    def open(self, paths: List[str]) -> Any:
+        """
+        This method is called by the importer to open the files.
+        """
+        pass
diff --git a/fast_carpenter/data_import/_uproot3.py b/fast_carpenter/data_import/_uproot3.py
new file mode 100644
index 0000000..bf5e311
--- /dev/null
+++ b/fast_carpenter/data_import/_uproot3.py
@@ -0,0 +1,33 @@
+from typing import Any, Dict, List
+
+from ._base import DataImportBase
+
+
+class Uproot3DataImport(DataImportBase):
+    """
+    This class is a wrapper around the uproot3 library.
+    """
+
+    def __init__(self, config: Dict[str, Any]) -> None:
+        super().__init__(config)
+
+    def _process_config(self):
+        pass
+
+    def open(self, paths: List[str]) -> Any:
+        """
+        This method is called by the importer to open the file.
+        """
+        import uproot3
+        if len(paths) != 1:
+            # TODO - support multiple paths
+            raise AttributeError("Multiple paths not yet supported")
+        # Try to open the tree - some machines have configured limitations
+        # which prevent memmaps from begin created. Use a fallback - the
+        # localsource option
+        input_file = paths[0]
+        try:
+            rootfile = uproot3.open(input_file)
+        except MemoryError:
+            rootfile = uproot3.open(input_file, localsource=uproot3.FileSource.defaults)
+        return rootfile
diff --git a/fast_carpenter/data_import/_uproot4.py b/fast_carpenter/data_import/_uproot4.py
new file mode 100644
index 0000000..4707f90
--- /dev/null
+++ b/fast_carpenter/data_import/_uproot4.py
@@ -0,0 +1,33 @@
+from typing import Any, Dict, List
+
+from ._base import DataImportBase
+
+
+class Uproot4DataImport(DataImportBase):
+    """
+    This class is a wrapper around the uproot library.
+    """
+
+    def __init__(self, config: Dict[str, Any]) -> None:
+        super().__init__(config)
+
+    def _process_config(self):
+        pass
+
+    def open(self, paths: List[str]) -> Any:
+        """
+        This method is called by the importer to open the file.
+        """
+        import uproot
+        if len(paths) != 1:
+            # TODO - support multiple paths
+            raise AttributeError("Multiple paths not yet supported")
+        # Try to open the tree - some machines have configured limitations
+        # which prevent memmaps from begin created. Use a fallback - the
+        # localsource option
+        input_file = paths[0]
+        try:
+            rootfile = uproot.open(input_file)
+        except MemoryError:
+            rootfile = uproot.open(input_file, file_handler=uproot.source.chunk.Source)
+        return rootfile
diff --git a/fast_carpenter/define/reductions.py b/fast_carpenter/define/reductions.py
index c732b65..1807277 100644
--- a/fast_carpenter/define/reductions.py
+++ b/fast_carpenter/define/reductions.py
@@ -1,7 +1,8 @@
 import numpy as np
 import six
-from ..expressions import deconstruct_jaggedness, reconstruct_jaggedness
+from typing import List
 
+from ..tree_adapter import ArrayMethods
 
 __all__ = ["get_pandas_reduction"]
 
@@ -16,53 +17,45 @@ def __init__(self, index, fill_missing, force_float=True):
         self.fill_missing = fill_missing
         self.dtype = None
         if fill_missing is True or fill_missing is False:
-            self.dtype = bool
+            self.dtype = np.bool8
         elif force_float or isinstance(fill_missing, float):
-            self.dtype = float
+            self.dtype = np.float64
         else:
-            self.dtype = int
+            self.dtype = np.int32
 
     def __call__(self, array):
-        # The next two lines ought to be enough
-        # result = array.pad(abs(self.index) + int(self.index >= 0))
-        # result = result[..., self.index]
-
-        # Flatten out the first K-1 dimensions:
-        flat, counts = deconstruct_jaggedness(array, [])
-        result = reconstruct_jaggedness(flat, counts[:1])
-
-        # Now get the Nth item on the last dimension
-        result = result.pad(abs(self.index) + int(self.index >= 0))
-        result = result[..., self.index]
-
-        # Now replay the remaining dimensions on this
-        result = reconstruct_jaggedness(result, counts[1:])
-
+        padding = abs(self.index) + 1 if self.index >= 0 else abs(self.index)
+        result = ArrayMethods.pad(array, padding)
+        result = ArrayMethods.fill_none(result, self.fill_missing, axis=-1)
         if self.dtype is not None:
-            result = result.astype(self.dtype)
-        result = result.fillna(self.fill_missing)
-        return result
+            result = ArrayMethods.values_as_type(result, self.dtype)
+        return result[..., self.index]
 
 
 class JaggedMethod(object):
+    SUPPORTED: List[str] = ["sum", "prod", "any", "all", "count_nonzero",
+                            "max", "min", "argmin", "argmax"]
+    DEFAULTS = {
+        "count_nonzero": {"axis": 1},
+        "sum": {"axis": 1},
+    }
+
     def __init__(self, method):
         self.method_name = method
+        self._defaults = self.DEFAULTS.get(method, {})
 
     def __call__(self, array):
-        return getattr(array, self.method_name)()
+        return getattr(ArrayMethods, self.method_name)(array, **self._defaults)
 
 
 class JaggedProperty(object):
+    SUPPORTED: List[str] = ["counts"]
+
     def __init__(self, prop_name):
         self.prop_name = prop_name
 
     def __call__(self, array):
-        return getattr(array, self.prop_name)
-
-
-_jagged_methods = ["sum", "prod", "any", "all", "count_nonzero",
-                   "max", "min", "argmin", "argmax"]
-_jagged_properties = ["counts"]
+        return getattr(ArrayMethods, self.prop_name)(array)
 
 
 def get_awkward_reduction(stage_name, reduction, fill_missing=np.nan):
@@ -73,20 +66,21 @@ def get_awkward_reduction(stage_name, reduction, fill_missing=np.nan):
         msg = "{}: requested reduce method is not a string or an int"
         raise BadReductionConfig(msg.format(stage_name))
 
-    if reduction in _jagged_methods:
+    if reduction in JaggedMethod.SUPPORTED:
         return JaggedMethod(reduction)
-    if reduction in _jagged_properties:
+    if reduction in JaggedProperty.SUPPORTED:
         return JaggedProperty(reduction)
 
     msg = "{}: Unknown method to reduce: '{}'"
     raise BadReductionConfig(msg.format(stage_name, reduction))
 
 
-_pandas_aggregates = ["sum", "prod", "max", "min", "argmax", "argmin"]
 _numpy_ops = ["count_zero"]
 
 
 class PandasAggregate(object):
+    SUPPORTED: List[str] = ["sum", "prod", "max", "min", "argmax", "argmin"]
+
     def __init__(self, method):
         self.method = method
 
@@ -107,7 +101,7 @@ def get_pandas_reduction(stage_name, reduction):
         msg = "{}: requested reduce method is not a string or an int"
         raise BadReductionConfig(msg.format(stage_name))
 
-    if reduction in _pandas_aggregates:
+    if reduction in PandasAggregate.SUPPORTED:
         return PandasAggregate(reduction)
     elif reduction in _numpy_ops:
         op = getattr(np, reduction)
diff --git a/fast_carpenter/event_builder.py b/fast_carpenter/event_builder.py
deleted file mode 100644
index b63a059..0000000
--- a/fast_carpenter/event_builder.py
+++ /dev/null
@@ -1,86 +0,0 @@
-import uproot3
-from atuproot.BEvents import BEvents
-from .masked_tree import MaskedUprootTree
-
-
-class EventRanger():
-    def __init__(self):
-        self._owner = None
-
-    def set_owner(self, owner):
-        self._owner = owner
-
-    @property
-    def start_entry(self):
-        return (self._owner.start_block + self._owner.iblock) * self._owner.nevents_per_block
-
-    @property
-    def stop_entry(self):
-        i_block = min(self._owner.iblock + 1, self._owner.nblocks)
-        stop_entry = (self._owner.start_block + i_block) * self._owner.nevents_per_block
-        return min(self._owner.nevents_in_tree, stop_entry)
-
-    @property
-    def entries_in_block(self):
-        if self._owner and self._owner.iblock > -1:
-            return self.stop_entry - self.start_entry
-        return None
-
-
-class BEventsWrapped(BEvents):
-    def __init__(self, tree, *args, **kwargs):
-        ranges = EventRanger()
-        tree = MaskedUprootTree(tree, ranges)
-        super(BEventsWrapped, self).__init__(tree, *args, **kwargs)
-        ranges.set_owner(self)
-
-    def _block_changed(self):
-        self.tree.reset_mask()
-        self.tree.reset_cache()
-
-    def __getitem__(self, i):
-        result = super(BEventsWrapped, self).__getitem__(self, i)
-        self._block_changed()
-        return result
-
-    def __iter__(self):
-        for value in super(BEventsWrapped, self).__iter__():
-            self._block_changed()
-            yield value
-        self._block_changed()
-
-
-class EventBuilder(object):
-    def __init__(self, config):
-        self.config = config
-
-    def __repr__(self):
-        return '{}({!r})'.format(
-            self.__class__.__name__,
-            self.config,
-        )
-
-    def __call__(self):
-        if len(self.config.inputPaths) != 1:
-            # TODO - support multiple inputPaths
-            raise AttributeError("Multiple inputPaths not yet supported")
-
-        # Try to open the tree - some machines have configured limitations
-        # which prevent memmaps from begin created. Use a fallback - the
-        # localsource option
-        try:
-            rootfile = uproot3.open(self.config.inputPaths[0])
-            tree = rootfile[self.config.treeName]
-        except MemoryError:
-            rootfile = uproot3.open(
-                self.config.inputPaths[0],
-                localsource=uproot3.FileSource.defaults
-            )
-            tree = rootfile[self.config.treeName]
-
-        events = BEventsWrapped(tree,
-                                self.config.nevents_per_block,
-                                self.config.start_block,
-                                self.config.stop_block)
-        events.config = self.config
-        return events
diff --git a/fast_carpenter/expressions.py b/fast_carpenter/expressions.py
index 9530867..14dbe11 100644
--- a/fast_carpenter/expressions.py
+++ b/fast_carpenter/expressions.py
@@ -1,13 +1,13 @@
 import numpy as np
 import re
-import numexpr
 import tokenize
 import awkward0
+import awkward as ak
 import logging
-try:
-    from StringIO import StringIO
-except ImportError:
-    from io import StringIO
+
+from io import StringIO
+
+from typing import List
 
 logger = logging.getLogger(__name__)
 
@@ -22,9 +22,8 @@
              }
 
 
-def get_branches(cut, valid):
-    valid = [v.decode("utf-8") for v in valid]
-
+def get_branches(cut: str, valid: List[str]) -> List[str]:
+    """ Get branches relevant to the cut. """
     branches = []
     string = StringIO(cut).readline
     tokens = tokenize.generate_tokens(string)
@@ -43,15 +42,21 @@ def get_branches(cut, valid):
 
 
 def deconstruct_jaggedness(array, counts):
-    if not isinstance(array, awkward0.array.base.AwkwardArrayWithContent):
+    if not isinstance(array, (awkward0.array.base.AwkwardArrayWithContent, ak.highlevel.Array)):
         return array, counts
 
-    array = array.compact()
-    counts.insert(0, array.counts)
+    if isinstance(array, ak.highlevel.Array):
+        counts.insert(0, array.layout.compact_offsets64)
+        array = ak.flatten(array)
+        return deconstruct_jaggedness(array.layout.content, counts)
+    else:
+        array = array.compact()
+        counts.insert(0, array.counts)
     return deconstruct_jaggedness(array.content, counts)
 
 
 def reconstruct_jaggedness(array, counts):
+    # array = ak.unflatten(array, lengths, axis=1)
     for count in counts:
         array = awkward0.JaggedArray.fromcounts(count, array)
     return array
@@ -61,6 +66,7 @@ class TreeToDictAdaptor():
     """
     Make an uproot tree look like a dict for numexpr
     """
+
     def __init__(self, tree, alias_dict, needed_variables):
         self.tree = tree
         self.aliases = alias_dict
@@ -138,15 +144,5 @@ def preprocess_expression(expression):
 
 
 def evaluate(tree, expression):
-    cleaned_expression, alias_dict = preprocess_expression(expression)
-    context = numexpr.necompiler.getContext({}, frame_depth=1)
-    variables = numexpr.necompiler.getExprNames(cleaned_expression, context)[0]
-    try:
-        adaptor = TreeToDictAdaptor(tree, alias_dict, variables)
-    except ValueError:
-        msg = "Cannot broadcast all variables in expression: %s" % expression
-        logger.error(msg)
-        raise ValueError(msg)
-    result = numexpr.evaluate(cleaned_expression, local_dict=adaptor)
-    result = adaptor.apply_jaggedness(result)
-    return result
+    cleaned_expression, _ = preprocess_expression(expression)
+    return tree.evaluate(cleaned_expression, global_dict=constants)
diff --git a/fast_carpenter/masked_tree.py b/fast_carpenter/masked_tree.py
deleted file mode 100644
index 5e83426..0000000
--- a/fast_carpenter/masked_tree.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import pandas as pd
-import numpy as np
-from .tree_wrapper import WrappedTree
-
-
-class MaskedUprootTree(object):
-    def __init__(self, tree, event_ranger, mask=None):
-        if isinstance(tree, MaskedUprootTree):
-            self.tree = tree.tree
-            self._mask = tree._mask
-            return
-
-        self.tree = WrappedTree(tree, event_ranger)
-        self.event_ranger = event_ranger
-
-        if mask is None:
-            self._mask = None
-            return
-
-        self._mask = _normalise_mask(mask, len(self.tree))
-
-    class PandasWrap():
-        def __init__(self, owner):
-            self._owner = owner
-
-        def df(self, *args, **kwargs):
-            df = self._owner.tree.pandas.df(*args, **kwargs)
-            if self._owner._mask is None:
-                return df
-            masked = mask_df(df, self._owner._mask, self._owner.event_ranger.start_entry)
-            return masked
-
-    @property
-    def pandas(self):
-        return MaskedUprootTree.PandasWrap(self)
-
-    def unmasked_array(self, *args, **kwargs):
-        return self.tree.array(*args, **kwargs)
-
-    def unmasked_arrays(self, *args, **kwargs):
-        return self.tree.arrays(*args, **kwargs)
-
-    def array(self, *args, **kwargs):
-        array = self.tree.array(*args, **kwargs)
-        if self._mask is None:
-            return array
-        return array[self._mask]
-
-    def arrays(self, *args, **kwargs):
-        arrays = self.tree.arrays(*args, **kwargs)
-        if self._mask is None:
-            return arrays
-        if isinstance(arrays, dict):
-            return {k: v[self._mask] for k, v in arrays.items()}
-        if isinstance(arrays, tuple):
-            return tuple([v[self._mask] for v in arrays])
-        if isinstance(arrays, list):
-            return [v[self._mask] for v in arrays]
-        if isinstance(arrays, pd.DataFrame):
-            return mask_df(arrays, self._mask, self.event_ranger.start_entry)
-        if isinstance(arrays, np.ndarray):
-            if arrays.ndim == 1:
-                return arrays[self._mask]
-            if arrays.ndim == 2:
-                if arrays.shape[1] == len(self.tree):
-                    return arrays[:, self._mask]
-            msg = "Unexpected numpy array for mask, shape:%s, mask length: %s"
-            raise NotImplementedError(msg % (arrays.shape, len(self)))
-        return arrays[self._mask]
-
-    @property
-    def mask(self):
-        return self._mask
-
-    def apply_mask(self, new_mask):
-        if self._mask is None:
-            self._mask = _normalise_mask(new_mask, len(self.tree))
-        else:
-            self._mask = self._mask[new_mask]
-
-    def __len__(self):
-        if self._mask is None:
-            return len(self.tree)
-        return len(self._mask)
-
-    def __contains__(self, element):
-        return self.tree.__contains__(element)
-
-    def __getattr__(self, attr):
-        return getattr(self.tree, attr)
-
-    def reset_mask(self):
-        self._mask = None
-
-
-def _normalise_mask(mask, tree_length):
-    if isinstance(mask, (tuple, list)):
-        mask = np.array(mask)
-    elif not isinstance(mask, np.ndarray):
-        raise RuntimeError("mask is not a numpy array, a list, or a tuple")
-
-    if np.issubdtype(mask.dtype, np.integer):
-        return mask
-    elif mask.dtype.kind == "b":
-        if len(mask) != tree_length:
-            raise RuntimeError("boolean mask has a different length to the input tree")
-        return np.where(mask)[0]
-
-
-def mask_df(df, mask, start_event):
-    mask = mask + start_event
-
-    # Either of these methods could work on a general df (multiindex, or not) but they
-    # have opposite performances, so check if multi-index and choose accordingly
-    if isinstance(df.index, pd.MultiIndex):
-        broadcast_map = np.isin(df.index.get_level_values("entry"), mask)
-        masked = df.iloc[broadcast_map]
-    else:
-        masked = df.loc[mask]
-    return masked
diff --git a/fast_carpenter/selection/filters.py b/fast_carpenter/selection/filters.py
index 5bd36c4..1e9470a 100644
--- a/fast_carpenter/selection/filters.py
+++ b/fast_carpenter/selection/filters.py
@@ -1,8 +1,12 @@
 import six
+from typing import List, Tuple
+
 import numpy as np
 import pandas as pd
 from ..expressions import evaluate
 from ..define.reductions import get_awkward_reduction
+from ..tree_adapter import ArrayMethods
+from ..weights import extract_weights, get_unweighted_increment, get_weighted_increment
 
 
 def safe_and(left, right):
@@ -22,47 +26,38 @@ def safe_or(left, right):
 
 
 class Counter():
-    def __init__(self, weights):
-        self._weights = weights
-        self._w_counts = np.zeros(len(weights))
-        self._counts = 0
+    _weight_names: List[str]
+    _w_counts: np.ndarray
+    _counts: int
 
-    @staticmethod
-    def get_unweighted_increment(data, mask):
-        if mask is None:
-            return len(data)
-        elif mask.dtype.kind == "b":
-            return np.count_nonzero(mask)
-        else:
-            return len(mask)
-
-    @staticmethod
-    def get_weighted_increment(weight_names, data, mask):
-        weights = data.arrays(weight_names, outputtype=lambda *args: np.array(args))
-        if mask is not None:
-            weights = weights[:, mask]
-        return weights.sum(axis=1)
+    def __init__(self, weight_names: List[str]) -> None:
+        self._weight_names = weight_names
+        self._w_counts = np.zeros(len(weight_names))
+        self._counts = 0
 
+    # TODO: increment should take weights, not data
     def increment(self, data, is_mc, mask=None):
-        unweighted_increment = self.get_unweighted_increment(data, mask)
-        self._counts += unweighted_increment
+        weights = extract_weights(data, self._weight_names)
 
-        if not self._weights:
-            return
+        try:
+            unweighted_increment = get_unweighted_increment(weights, mask)
+        except ValueError:
+            unweighted_increment = len(data)
+        self._counts += unweighted_increment
 
-        if not is_mc:
+        if not self._weight_names or not is_mc:
             self._w_counts += unweighted_increment
             return
 
-        weighted_increments = self.get_weighted_increment(self._weights, data, mask)
-        self._w_counts += weighted_increments
+        weighted_increments = get_weighted_increment(weights, mask)
+        self._w_counts = ArrayMethods.sum([self._w_counts, weighted_increments], axis=0).to_numpy()
 
     @property
-    def counts(self):
+    def counts(self) -> Tuple[int, float]:
         return (self._counts,) + tuple(self._w_counts)
 
-    def add(self, rhs):
-        self._w_counts += rhs._w_counts
+    def add(self, rhs) -> None:
+        self._w_counts = (np.sum(self._w_counts + rhs._w_counts).tolist(),)
         self._counts += rhs._counts
 
 
@@ -133,9 +128,11 @@ class ReduceSingleCut(BaseFilter):
     def __init__(self, stage_name, depth, cut_id, weights, selection):
         super(ReduceSingleCut, self).__init__(selection, depth, cut_id, weights)
         self._str = str(selection)
-        self.reduction = get_awkward_reduction(stage_name,
-                                               selection.get("reduce"),
-                                               fill_missing=False)
+        self.reduction = get_awkward_reduction(
+            stage_name,
+            selection.get("reduce"),
+            fill_missing=False,
+        )
         self.formula = selection.get("formula")
 
     def __call__(self, data, is_mc, **kwargs):
diff --git a/fast_carpenter/summary/binned_dataframe.py b/fast_carpenter/summary/binned_dataframe.py
index b9fea93..d737375 100644
--- a/fast_carpenter/summary/binned_dataframe.py
+++ b/fast_carpenter/summary/binned_dataframe.py
@@ -8,6 +8,8 @@
 from pandas.api.types import is_object_dtype
 from . import binning_config as cfg
 
+from fast_carpenter.tree_adapter import ArrayMethods
+
 
 class Collector():
     valid_ext = {'xlsx': 'excel', 'h5': 'hdf', 'msg': 'msgpack', 'dta': 'stata', 'pkl': 'pickle', 'p': 'pickle'}
@@ -202,14 +204,14 @@ def collector(self):
         return Collector(outfilename, self._dataset_col, binnings=binnings, file_format=self._file_format)
 
     def event(self, chunk):
-        all_inputs = [key for key in chunk.tree.keys() if key.decode() in self.potential_inputs]
+        all_inputs = [key for key in chunk.tree.keys() if key in self.potential_inputs]
         if chunk.config.dataset.eventtype == "mc" or self.weight_data:
             weights = list(self._weights.values())
             all_inputs += weights
         else:
             weights = None
 
-        data = chunk.tree.pandas.df(all_inputs, flatten=False)
+        data = ArrayMethods.to_pandas(chunk.tree, all_inputs)
         data = explode(data)
         if data is None or data.empty:
             return True
@@ -241,7 +243,7 @@ def merge(self, rhs):
 
 
 def _make_column_labels(weights):
-    labels = [w + ":" + l for l in weight_labels for w in weights]
+    labels = [weight + ":" + label for label in weight_labels for weight in weights]
     return [count_label] + labels
 
 
diff --git a/fast_carpenter/testing/__init__.py b/fast_carpenter/testing/__init__.py
new file mode 100644
index 0000000..23a3332
--- /dev/null
+++ b/fast_carpenter/testing/__init__.py
@@ -0,0 +1,42 @@
+from collections import namedtuple
+import numpy as np
+
+FakeEventRange = namedtuple("FakeEventRange", "start_entry stop_entry entries_in_block")
+
+
+class Namespace():
+    def __init__(self, **kwargs):
+        self.__dict__.update(kwargs)
+
+
+class FakeBEEvent(object):
+    def __init__(self, tree, eventtype):
+        self.tree = tree
+        self.config = Namespace(dataset=Namespace(eventtype=eventtype))
+
+    def __len__(self):
+        return len(self.tree)
+
+    def count_nonzero(self):
+        return self.tree.count_nonzero()
+
+    def arrays(self, array_names, library="np", outputtype=list):
+        return [self.tree[name] for name in array_names]
+
+
+class FakeTree(dict):
+    length: int = 101
+
+    def __init__(self, length=101):
+        super(FakeTree, self).__init__(
+            NMuon=np.linspace(0, 5., length),
+            NElectron=np.linspace(0, 10, length),
+            NJet=np.linspace(2, -18, length),
+        )
+        self.length = length
+
+    def __len__(self):
+        return self.length
+
+    def arrays(self, array_names, library="np", outputtype=list):
+        return [self[name] for name in array_names]
diff --git a/fast_carpenter/tree_adapter.py b/fast_carpenter/tree_adapter.py
new file mode 100644
index 0000000..7e7622f
--- /dev/null
+++ b/fast_carpenter/tree_adapter.py
@@ -0,0 +1,692 @@
+# input is an uproot tree (uproot3 --> adapter V0, uproot4 --> adapter V1)
+# output is a dict with the same structure as the tree
+from collections import abc
+from itertools import chain
+import logging
+from typing import Any, Callable, Dict, List, Optional, Protocol
+
+import awkward as ak
+import numpy as np
+
+adapters: Dict[str, Callable] = {}
+DEFAULT_TREE_TO_DICT_ADAPTOR = "uproot4"
+logger = logging.getLogger(__name__)
+
+LIBRARIES = {
+    "awkward": ["ak", "ak.Array", "awkward"],
+    "numpy": ["np", "np.ndarray", "numpy"],
+    "pandas": ["pd", "pd.DataFrame", "pandas"],
+}
+
+SUPPORTED_OUTPUT_TYPES = [dict, tuple, list]
+
+
+def register(name: str, adaptor_creation_func: Callable) -> None:
+    """
+    Register an adaptor.
+    """
+    adapters[name] = adaptor_creation_func
+
+
+def unregister(name: str) -> None:
+    """
+    Unregister an adaptor.
+    """
+    adapters[name].pop()
+
+
+class ArrayLike(Protocol):
+    pass
+
+
+class TreeLike(Protocol):
+    pass
+
+
+class TreeToDictAdaptor(abc.MutableMapping):
+    """
+    Provides a dict-like interface to a tree-like data object (e.g. ROOT TTree, uproot.tree, etc).
+    """
+
+    tree: Any
+    aliases: Dict[str, Any]
+    extra_variables: Dict[str, Any]
+
+    def __init__(self, tree: Any, aliases: Dict[str, Any] = None) -> None:
+        self.tree = tree
+        self.aliases = aliases if aliases else {}
+        self.extra_variables = {}
+
+    def __getitem__(self, key: str) -> Any:
+        """
+        Get an item from the tree.
+        Resolves aliases if defined.
+        """
+        return self.__m_getitem__(self.__resolve_key__(key))
+
+    def __setitem__(self, key, value) -> None:
+        """
+        Creates a new branch in the tree.
+        Resolves aliases if defined.
+        """
+        self.__m_setitem__(self.__resolve_key__(key), value)
+
+    def __iter__(self) -> Any:
+        return iter(self.tree)
+
+    def __len__(self) -> int:
+        """ Returns the number of branches in the tree. """
+        return len(self.tree)
+
+    def __delitem__(self, key) -> None:
+        """ Deletes a branch from the tree. """
+        self.__m_delitem__(self.__resolve_key__(key))
+
+    def __contains__(self, key):
+        return self.__m_contains__(self.__resolve_key__(key))
+
+    def keys(self) -> List[str]:
+        all_keys = chain(self.tree.keys(), self.aliases.keys(), self.extra_variables.keys())
+        for key in all_keys:
+            yield key
+
+    def new_variable(self, key, value, context: Optional[Any] = None) -> None:
+        if context is None:
+            context = self
+        if len(value) != context.num_entries:
+            msg = f"New variable {key} does not have the right length: {len(value)} not {context.num_entries}"
+            raise ValueError(msg)
+
+        if key not in self:
+            self.__new_variable__(self.__resolve_key__(key), value)
+        else:
+            raise ValueError(f"Trying to overwrite existing variable: {key}")
+
+    def evaluate(self, expression: str) -> Any:
+        """
+        Evaluate a string expression using the tree as the namespace.
+        """
+        raise NotImplementedError()
+
+    @property
+    def num_entries(self) -> int:
+        """ Returns the number of entries in the tree. """
+        raise NotImplementedError()
+
+
+class IndexingMixin(object):
+    """
+    Provides indexing support for the dict-like interface.
+    """
+    aliases: Dict[str, str]
+    special_token_mapping: Dict[str, str] = {
+        ".": "__DOT__",
+    }
+
+    def __resolve_key__(self, key):
+        if key in self.aliases:
+            return self.aliases[key]
+        return key
+
+    def __resolve_special_tokens__(self, key):
+        new_key = key
+        for token in self.special_token_mapping:
+            if token in key:
+                new_key = new_key.replace(token, self.special_token_mapping[token])
+        return new_key
+
+
+class Uproot3Methods(object):
+    """
+    Provides uproot3-specific methods for the dict-like interface.
+    """
+
+    def __m_getitem__(self, key):
+        return self.tree.array(key)
+
+    def __m_setitem__(self, key, value):
+        self.tree.set_branch(key, value)
+
+    def __m_delitem__(self, key):
+        self.tree.drop_branch(key)
+
+    def __m_contains__(self, key):
+        return key in self.tree
+
+    @property
+    def num_entries(self) -> int:
+        return self.tree.numentries
+
+    @staticmethod
+    def counts(array, **kwargs):
+        if hasattr(array, "compact"):
+            return array.compact().counts
+        else:
+            return array.counts
+
+    def array(self, key):
+        return self[key]
+
+    @staticmethod
+    def pad(array, length, **kwargs):
+        return array.pad(length, **kwargs)
+
+    @staticmethod
+    def flatten(array, **kwargs):
+        return array.flatten(**kwargs)
+
+    @staticmethod
+    def sum(array, **kwargs):
+        return array.sum(**kwargs)
+
+    @staticmethod
+    def prod(array, **kwargs):
+        return array.prod(**kwargs)
+
+    @staticmethod
+    def any(array, **kwargs):
+        return array.any(**kwargs)
+
+    @staticmethod
+    def all(array, **kwargs):
+        return array.all(**kwargs)
+
+    @staticmethod
+    def count_nonzero(array, **kwargs):
+        return array.count_nonzero(**kwargs)
+
+    @staticmethod
+    def max(array, **kwargs):
+        return array.max(**kwargs)
+
+    @staticmethod
+    def min(array, **kwargs):
+        return array.min(**kwargs)
+
+    @staticmethod
+    def argmax(array, **kwargs):
+        return array.argmax(**kwargs)
+
+    @staticmethod
+    def argmin(array, **kwargs):
+        return array.argmin(**kwargs)
+
+    @staticmethod
+    def count_zero(array, **kwargs):
+        return np.count_zero(array, **kwargs)
+
+    @staticmethod
+    def dtype(array, **kwargs):
+        return array.dtype.kind
+
+    @staticmethod
+    def is_bool(array, **kwargs):
+        return Uproot3Methods.dtype(array) == "b"
+
+    @staticmethod
+    def arrays_as_np_lists(data, array_names, **kwargs):
+        return data.arrays(array_names, outputtype=lambda *args: np.array(args))
+
+    @staticmethod
+    def to_pandas(data, keys, flatten):
+        return data.pandas.df(keys, flatten=flatten)
+
+
+class Uproot4Methods(object):
+    """
+    Provides uproot4-specific methods for the dict-like interface.
+    """
+
+    def __m_getitem__(self, key):
+        if key in self.extra_variables:
+            return self.extra_variables[key]
+        if hasattr(self.tree[key], "array"):
+            return self.tree[key].array()
+        return self.tree[key]
+
+    def __m_setitem__(self, key, value):
+        self.tree.set_branch(key, value)
+
+    def __m_delitem__(self, key):
+        self.tree.drop_branch(key)
+
+    def __m_contains__(self, key):
+        return key in self.tree.keys() or key in self.extra_variables.keys()
+
+    def __new_variable__(self, key, value) -> None:
+        key = self.__resolve_special_tokens__(key)
+        self.extra_variables[key] = value
+
+    @property
+    def num_entries(self) -> int:
+        try:
+            return self.tree.num_entries
+        except AttributeError as e:
+            logger.error(f"Object of type {type(self.tree)} does not have a num_entries attribute.")
+            raise e
+
+    @staticmethod
+    def arraydict_to_pandas(arraydict: Dict[str, Any]):
+        """
+        Converts a dictionary of arrays to a pandas DataFrame.
+        """
+        return ak.to_pandas(arraydict)
+
+    def array_dict(self, keys: List[str]) -> Dict[str, Any]:
+        """
+        Returns a dictionary of arrays for the given keys.
+        """
+        extra_arrays = None
+        _keys = keys.copy()
+        if self.extra_variables:
+            # check if any of the extra variables are included in the expressions
+            extra_vars = []
+            for name in self.extra_variables:
+                if name in keys:
+                    extra_vars.append(name)
+                    _keys.remove(name)
+            if extra_vars:
+                extra_arrays = {key: self.extra_variables[key] for key in extra_vars}
+
+        tree_arrays = self.tree.arrays(_keys, library="ak", how=dict)
+        if extra_arrays is not None:
+            tree_arrays.update(extra_arrays)
+        return tree_arrays
+
+    @staticmethod
+    def array_exporter(dict_of_arrays, **kwargs):
+        library = kwargs.get("library", "ak")
+        how = kwargs.get("how", dict)
+
+        # TODO: long-term we want exporters + factory methods for these
+        # e.g. {("ak", tuple): AKArrayTupleExporter, ("numpy", tuple): NumpyArrayTupleExporter}
+        # reason: we want to be able to use these exporters with different libraries or
+        # in different places in this codebase and allow to inject functionality for ranges and masks
+        if library in LIBRARIES["awkward"]:
+            if how == dict:
+                return dict_of_arrays
+            elif how == list:
+                return [value for value in dict_of_arrays.values()]
+            elif how == tuple:
+                return tuple(value for value in dict_of_arrays.values())
+
+        if library in LIBRARIES["pandas"]:
+            return Uproot4Methods.arraydict_to_pandas(dict_of_arrays)
+
+    def arrays(self, expressions, *args, **kwargs):
+        if "outputtype" in kwargs:
+            # renamed uproot3 -> uproot4
+            outputtype = kwargs.pop("outputtype")
+            kwargs["how"] = outputtype
+
+        operations = kwargs.get("operations", [])
+        tree_arrays = self.array_dict(keys=expressions)
+        for operation in operations:
+            for key, value in tree_arrays.items():
+                tree_arrays[key] = operation(value)
+        return self.array_exporter(tree_arrays, **kwargs)
+
+    def array(self, key):
+        return self[key]
+
+    def evaluate(self, expression, **kwargs):
+        return ak.numexpr.evaluate(expression, self, **kwargs)
+
+    @staticmethod
+    def counts(array, **kwargs):
+        axis = kwargs.pop("axis", 1)
+        return ak.count(Uproot4Methods.only_valid_entries(array), axis=axis, **kwargs)
+
+    @staticmethod
+    def all(array, **kwargs):
+        axis = kwargs.pop("axis", 1)
+        return ak.all(array, axis=axis, **kwargs)
+
+    @staticmethod
+    def pad(array, length: int, **kwargs: Dict[str, Any]) -> ak.Array:
+        axis = kwargs.pop("axis", -1)
+        return ak.pad_none(array, length, axis=axis, **kwargs)
+
+    @staticmethod
+    def flatten(array, **kwargs):
+        return ak.flatten(array, **kwargs)
+
+    @staticmethod
+    def sum(array, **kwargs):
+        axis = kwargs.pop("axis", None)
+        return ak.sum(array, axis=axis, **kwargs)
+
+    @staticmethod
+    def prod(array, **kwargs):
+        return ak.prod(array, **kwargs)
+
+    @staticmethod
+    def any(array, **kwargs):
+        axis = kwargs.pop("axis", -1)
+        return ak.any(array, axis=axis, **kwargs)
+
+    @staticmethod
+    def count_nonzero(array, **kwargs):
+        return ak.count_nonzero(array, **kwargs)
+
+    @staticmethod
+    def max(array, **kwargs):
+        axis = kwargs.pop("axis", 1)
+        return ak.max(array, axis=axis, **kwargs)
+
+    @staticmethod
+    def min(array, **kwargs):
+        return ak.min(array, **kwargs)
+
+    @staticmethod
+    def argmax(array, **kwargs):
+        return ak.argmax(array, **kwargs)
+
+    @staticmethod
+    def argmin(array, **kwargs):
+        return ak.argmin(array, **kwargs)
+
+    @staticmethod
+    def count_zero(array, **kwargs):
+        return ak.count_zero(array, **kwargs)
+
+    @staticmethod
+    def dtype(array, **kwargs):
+        t = ak.type(array).type
+        if hasattr(t, "dtype"):
+            return t.dtype
+        return t
+
+    @staticmethod
+    def is_bool(array, **kwargs):
+        t = Uproot4Methods.dtype(array)
+        if "bool" in str(t):
+            return True
+        return Uproot4Methods.dtype(array) == "bool"
+
+    @staticmethod
+    def arrays_as_np_array(data, array_names, **kwargs):
+        """
+        Takes input data and converts it to an array of numpy arrays.
+        e.g. arrays_as_np_array(data, ["x", "y"])
+        results in
+        array(<array of x>, <array of y>)
+        """
+        return data.arrays(
+            array_names,
+            library="ak",
+            outputtype=list,
+            **kwargs,
+        )
+
+    @staticmethod
+    def to_pandas(data, keys):
+        return data.arrays(
+            keys,
+            library="pd",
+        )
+
+    @staticmethod
+    def valid_entry_mask(data: TreeLike) -> ak.Array:
+        return ~ak.is_none(data)
+
+    @staticmethod
+    def only_valid_entries(data: TreeLike) -> ak.Array:
+        return data[Uproot4Methods.valid_entry_mask(data)]
+
+    @staticmethod
+    def filtered_len(data: TreeLike) -> int:
+        return len(data[~ak.is_none(data)])
+
+    @staticmethod
+    def fill_none(data: ArrayLike, fill_value, **kwargs):
+        axis = kwargs.pop("axis", None)
+        return ak.fill_none(array=data, value=fill_value, axis=axis, **kwargs)
+
+    @staticmethod
+    def values_as_type(data: ArrayLike, dtype, **kwargs):
+        return ak.values_astype(data, dtype, **kwargs)
+
+
+ArrayMethods = Uproot4Methods
+
+
+class TreeToDictAdaptorV0(IndexingMixin, Uproot3Methods, TreeToDictAdaptor):
+    pass
+
+
+class TreeToDictAdaptorV1(IndexingMixin, Uproot4Methods, TreeToDictAdaptor):
+    pass
+
+
+register("uproot3", TreeToDictAdaptorV0)
+register("uproot4", TreeToDictAdaptorV1)
+
+
+# class ApplyRange(Callable):
+#     def __init__(self, range):
+#         self.range = range
+
+#     def __call__(self, arrays):
+#         pass
+
+# class ApplyMask(Callable):
+#     def __init__(self, mask):
+#         self.mask = mask
+
+#     def __call__(self, arrays):
+#         pass
+
+class Ranger(object):
+    """
+        TODO: range is just a different way of indexing --> refactor
+    """
+    tree: TreeToDictAdaptor
+    start: int
+    stop: int
+    block_size: int
+    mask: Any
+
+    def __init__(self, tree: TreeToDictAdaptor, start: int, stop: int) -> None:
+        self.tree = tree
+        self.start = start
+        self.stop = stop
+
+        tree_size = tree.num_entries
+        self.block_size = stop - start if stop > start > 0 else tree_size
+        self.mask = np.ones(tree_size, dtype=bool)
+        if self.block_size < tree_size:
+            self.mask = np.zeros(tree_size, dtype=bool)
+            self.mask[start:stop] = True
+
+    @property
+    def num_entries(self) -> int:
+        """Returns the size of the range - overwrites tree.num_entries."""
+        return self.block_size
+
+    @property
+    def unfiltered_num_entries(self) -> int:
+        return self.tree.num_entries
+
+    def __getitem__(self, key):
+        return ak.mask(self.tree[key], self.mask)
+
+    def __setitem__(self, key, value):
+        # self.tree[key][self.start:self.stop] = value[self.start:self.stop]
+        self.tree[key] = value
+
+    def __delitem__(self, key):
+        del self.tree[key]
+
+    def __contains__(self, key):
+        return key in self.tree
+
+    def __len__(self):
+        return self.block_size
+
+    def array(self, key):
+        return self[key]
+
+    def arrays(self, *args, **kwargs):
+        operations = kwargs.pop("operations", [])
+        operations.append(lambda x: ak.mask(x, self.mask))
+        kwargs["operations"] = operations
+        arrays = self.tree.arrays(*args, **kwargs)
+        return arrays
+
+    def new_variable(self, name, value):
+        import awkward as ak
+        if len(value) < self.tree.num_entries:
+            new_value = ak.concatenate(
+                [
+                    ak.Array([None] * self.start),
+                    value,
+                    ak.Array([None] * (self.tree.num_entries - self.stop))
+                ],
+                axis=0
+            )
+        else:
+            new_value = value
+        assert len(new_value) == self.tree.num_entries
+        self.tree.new_variable(name, new_value, context=self.tree)
+
+    def evaluate(self, expression, **kwargs):
+        import awkward as ak
+        return ak.numexpr.evaluate(expression, self, **kwargs)
+
+    def keys(self):
+        return self.tree.keys()
+
+    def arrays_to_pandas(self, *args, **kwargs):
+        return self.tree.arrays_to_pandas(*args, **kwargs)
+
+
+def combine_masks(masks):
+    import awkward as ak
+    if len(masks) == 0:
+        return ak.Array([])
+    elif len(masks) == 1:
+        return masks[0]
+    else:
+        return ak.concatenate(masks, axis=0)
+
+
+class Masked(object):
+    _mask: Any
+    _tree: Ranger
+
+    def __init__(self, tree: Ranger, mask: Any) -> None:
+        self._tree = tree
+        self._mask = np.ones(tree.num_entries, dtype=bool) if mask is None else mask
+
+        if mask is not None and len(mask) < tree.unfiltered_num_entries:
+            self._mask = ak.concatenate(
+                [
+                    ak.Array([False] * tree.start),
+                    self._mask,
+                    ak.Array([False] * (tree.unfiltered_num_entries - tree.stop))
+                ],
+                axis=0
+            )
+
+    def __getitem__(self, key):
+        if self._mask is None:
+            return self._tree[key]
+        try:
+            if len(self._mask) > len(self._tree):
+                return self._tree[key][self._tree.start:self._tree.stop].mask[self._mask]
+        except TypeError as e:
+            raise e
+        return self._tree[key].mask[self._mask]
+
+    def __len__(self):
+        return len(self._tree)
+
+    def __contains__(self, key):
+        return key in self._tree
+
+    @property
+    def num_entries(self) -> int:
+        return self._tree.num_entries
+
+    def count_nonzero(self):
+        if self._mask is None:
+            return len(self._tree)
+        return ak.count_nonzero(self._mask)
+
+    def apply_mask(self, mask):
+        if self._mask is None:
+            self._mask = mask
+        else:
+            self._mask = self._mask & mask
+
+    def reset_mask(self):
+        self._mask = None
+
+    def reset_cache(self):
+        pass
+
+    def array(self, key):
+        return self[key]
+
+    def arrays(self, *args, **kwargs):
+        operations = kwargs.pop("operations", [])
+        if self._mask is not None:
+            operations.append(lambda x: ak.mask(x, self._mask))
+
+        kwargs["operations"] = operations
+        arrays = self._tree.arrays(*args, **kwargs)
+        return arrays
+
+    def evaluate(self, expression, **kwargs):
+        import awkward as ak
+        return ak.numexpr.evaluate(expression, self, **kwargs)
+
+    def keys(self):
+        return self._tree.keys()
+
+    def new_variable(self, name, value):
+        self._tree.new_variable(name, value)
+
+
+def create(arguments: Dict[str, Any]) -> TreeToDictAdaptor:
+    """
+    Create a TreeToDictAdaptor from a tree.
+    """
+    args_copy = arguments.copy()
+    adapter_type = args_copy.pop("adapter", DEFAULT_TREE_TO_DICT_ADAPTOR)
+
+    try:
+        creation_func = adapters[adapter_type]
+        return creation_func(**args_copy)
+    except KeyError:
+        raise ValueError(f"No adapter named {adapter_type}")
+
+
+def create_ranged(arguments: Dict[str, Any]) -> Ranger:
+    """
+    Create a tree adapter with ranged access.
+    """
+    args_copy = arguments.copy()
+
+    start = args_copy.pop("start", 0)
+    stop = args_copy.pop("stop", -1)
+    tree = create(args_copy)
+
+    return Ranger(tree, start, stop)
+
+
+def create_masked(arguments: Dict[str, Any]) -> Masked:
+    """
+    Create a tree adapter with masked access.
+    """
+    args_copy = arguments.copy()
+
+    mask = args_copy.pop("mask", None)
+    tree = create_ranged(args_copy)
+
+    return Masked(tree, mask)
+
+
+def create_masked_multitree(arguments: Dict[str, Any]) -> Masked:
+    raise NotImplementedError("Multitree not yet implemented")
diff --git a/fast_carpenter/tree_wrapper.py b/fast_carpenter/tree_wrapper.py
deleted file mode 100644
index df5c039..0000000
--- a/fast_carpenter/tree_wrapper.py
+++ /dev/null
@@ -1,165 +0,0 @@
-"""
-This has to be what is probably the hackiest piece of code I've ever written.
-It's very tightly coupled to uproot, and just by importing it will change the
-way uproot works.  However, it allows me to achieve the functionality of adding
-a branch to uproot trees with no changes to actual code in uproot and with
-minimal coding on my side...
-"""
-import uproot3
-from uproot3 import asjagged, asdtype, asgenobj
-import copy
-import awkward0
-
-
-def recursive_type_wrap(array):
-    if isinstance(array, awkward0.JaggedArray):
-        return asjagged(recursive_type_wrap(array.content))
-    return asdtype(array.dtype.fields)
-
-
-class wrapped_asgenobj(asgenobj):
-    def finalize(self, *args, **kwargs):
-        result = super(wrapped_asgenobj, self).finalize(*args, **kwargs)
-        result = awkward0.JaggedArray.fromiter(result)
-        return result
-
-
-uproot3.interp.auto.asgenobj = wrapped_asgenobj
-
-
-def wrapped_interpret(branch, *args, **kwargs):
-    from uproot3.interp.auto import interpret
-    result = interpret(branch, *args, **kwargs)
-    if result:
-        return result
-
-    if isinstance(branch, WrappedTree.FakeBranch):
-        return recursive_type_wrap(branch._values)
-
-    return None
-
-
-uproot3.tree.interpret = wrapped_interpret
-
-
-class WrappedTree(object):
-    __replace_itervalues = uproot3.version.version < "3.13.0"
-
-    def __init__(self, tree, event_ranger):
-        self.tree = copy.copy(tree)
-        self.tree.old_itervalues = self.tree.itervalues
-        self.tree.old_iteritems = self.tree.iteritems
-        self.tree.itervalues = self.itervalues
-        self.tree.iteritems = self.iteritems
-        self.tree.old_arrays = self.tree.arrays
-        self.tree.arrays = self.arrays
-        self.tree.old_array = self.tree.array
-        self.tree.array = self.array
-        self.event_ranger = event_ranger
-        self.branch_cache = {}
-        self.extras = {}
-
-    def itervalues(self, *args, **kwargs):
-        if WrappedTree.__replace_itervalues:
-            for array in self.extras.values():
-                yield array
-        for vals in self.tree.old_itervalues(*args, **kwargs):
-            yield vals
-
-    def iteritems(self, *args, **kwargs):
-        if not WrappedTree.__replace_itervalues:
-            for array in self.extras.values():
-                yield array.name, array
-        for vals in self.tree.old_iteritems(*args, **kwargs):
-            yield vals
-
-    def arrays(self, *args, **kwargs):
-        self.update_array_args(kwargs)
-        return self.tree.old_arrays(*args, **kwargs)
-
-    def array(self, *args, **kwargs):
-        self.update_array_args(kwargs)
-        return self.tree.old_array(*args, **kwargs)
-
-    def update_array_args(self, kwargs):
-        kwargs.setdefault("cache", self.branch_cache)
-        kwargs.setdefault("entrystart", self.event_ranger.start_entry)
-        kwargs.setdefault("entrystop", self.event_ranger.stop_entry)
-
-    class PandasWrap():
-        def __init__(self, owner):
-            self._owner = owner
-
-        def df(self, *args, **kwargs):
-            self._owner.update_array_args(kwargs)
-            df = self._owner.tree.pandas.df(*args, **kwargs)
-            return df
-
-    @property
-    def pandas(self):
-        return WrappedTree.PandasWrap(self)
-
-    class FakeBranch(object):
-        def __init__(self, name, values, event_ranger):
-            self.name = name
-            self._values = values
-            self._fLeaves = []
-            self.fLeaves = []
-            self.event_ranger = event_ranger
-
-        @property
-        def _recoveredbaskets(self):
-            return []
-
-        def array(self, entrystart=None, entrystop=None, blocking=True, **kws):
-            array = self._values
-            if entrystart:
-                entrystart -= self.event_ranger.start_entry
-            if entrystop:
-                entrystop -= self.event_ranger.start_entry
-
-            def wait():
-                values = array[entrystart:entrystop]
-                return values
-
-            if not blocking:
-                return wait
-            return wait()
-
-        def __getattr__(self, attr):
-            return getattr(self._values, attr)
-
-        def __len__(self):
-            return len(self._values)
-
-    def new_variable(self, name, value):
-        if name in self:
-            msg = "Trying to overwrite existing variable: '%s'"
-            raise ValueError(msg % name)
-        if len(value) != len(self):
-            msg = "New array %s does not have the right length: %d not %d"
-            raise ValueError(msg % (name, len(value), len(self)))
-
-        outputtype = WrappedTree.FakeBranch
-
-        name = uproot3.rootio._bytesid(name)
-        self.extras[name] = outputtype(name, value, self.event_ranger)
-
-    def __getattr__(self, attr):
-        return getattr(self.tree, attr)
-
-    def __contains__(self, element):
-        return self.tree.__contains__(element)
-
-    def __len__(self):
-        chunk_size = self.event_ranger.entries_in_block
-        if chunk_size:
-            return chunk_size
-        return len(self.tree)
-
-    def reset_cache(self):
-        self.branch_cache.clear()
-        for k in self.extras.keys():
-            if k in self.tree._branchlookup:
-                del self.tree._branchlookup[k]
-        self.extras.clear()
diff --git a/fast_carpenter/version.py b/fast_carpenter/version.py
index e6db44f..36c7220 100644
--- a/fast_carpenter/version.py
+++ b/fast_carpenter/version.py
@@ -12,5 +12,5 @@ def split_version(version):
     return tuple(result)
 
 
-__version__ = '0.19.1'
+__version__ = '0.21.0'
 version_info = split_version(__version__) # noqa
diff --git a/fast_carpenter/weights.py b/fast_carpenter/weights.py
new file mode 100644
index 0000000..dcdd86b
--- /dev/null
+++ b/fast_carpenter/weights.py
@@ -0,0 +1,36 @@
+from typing import Any, List
+
+import awkward as ak
+import numpy as np
+
+from .tree_adapter import ArrayMethods
+
+
+def extract_weights(data: Any, weight_names: List[str]) -> np.ndarray:
+    return ArrayMethods.arrays_as_np_array(data, weight_names)
+
+
+def get_unweighted_increment(weights: Any, mask: Any) -> int:
+    """
+        Returns the total number of unweighted events.
+        If no mask is present, this should be the smallest dimenion of weights.
+        If a mask is present, this should be the number of events passing the mask (i.e. non-zero entries).
+    """
+    if mask is None:
+        return min([len(w) for w in weights])
+    elif ArrayMethods.is_bool(mask):
+        return np.count_nonzero(mask)
+    else:
+        return len(mask)
+
+
+def get_weighted_increment(weights: np.ndarray, mask: Any) -> np.ndarray:
+    """
+        Returns the total number of weighted events per weight category.
+        If no mask is present, this will sum up all individual weights.
+        If a mask is present, the sum is only performed over the entries that are not masked.
+    """
+    if mask is not None:
+        weight_mask = [mask] * len(weights)  # repeat mask for each weight
+        weights = ak.mask(weights, weight_mask)
+    return ArrayMethods.sum(weights, axis=1)
diff --git a/setup.cfg b/setup.cfg
index 95a2f2b..cd5603d 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -17,4 +17,4 @@ exclude = docs
 test = pytest
 
 [tool:pytest]
-collect_ignore = ['setup.py']
+addopts = --ignore=setup.py
diff --git a/setup.py b/setup.py
index 3284343..274003a 100644
--- a/setup.py
+++ b/setup.py
@@ -30,17 +30,20 @@ def get_version():
     'fast-flow>0.5.0',
     'fast-curator',
     'awkward',
+    'coffea==0.7.9',
     'pandas>=1.1',
-    'numpy',
+    'numpy==1.21.0; python_version < "3.9"',
+    'numpy>= 1.18.5; python_version > "3.8"',
     'numexpr',
+    'typing-extensions>=4.1.1',
     'uproot>=4.1.8',
-    'uproot3',
+    'uproot3>=3.14.0',
 ]
 repositories = []
 
 setup_requirements = ['pytest-runner', ]
 
-test_requirements = ['pytest', 'flake8', 'pytest-cov']
+test_requirements = ['pytest', 'flake8', 'pytest-cov', 'pytest-lazy-fixture']
 
 setup(
     author="Ben Krikler",
diff --git a/tests/backends/test_alphatwirl.py b/tests/backends/test_alphatwirl.py
new file mode 100644
index 0000000..4f97fda
--- /dev/null
+++ b/tests/backends/test_alphatwirl.py
@@ -0,0 +1,13 @@
+import pytest
+import fast_carpenter.backends._alphatwirl as builder
+
+
+@pytest.fixture
+def wrapped_be(uproot4_tree):
+    """ Not going to be used for uproot4 """
+    return builder.BEventsWrapped(uproot4_tree, nevents_per_block=1000)
+
+
+def test_contains(wrapped_be):
+    assert "Muon_Py" in wrapped_be.tree
+    assert "not_a_branch" not in wrapped_be.tree
diff --git a/tests/backends/test_init.py b/tests/backends/test_init.py
index 54241e3..79ac7f2 100644
--- a/tests/backends/test_init.py
+++ b/tests/backends/test_init.py
@@ -3,8 +3,8 @@
 
 
 def test_get_backend():
-    alphatwirl_back = backends.get_backend("alphatwirl:multiprocessing")
-    assert hasattr(alphatwirl_back, "execute")
+    coffea_back = backends.get_backend("coffea:dask")
+    assert hasattr(coffea_back, "execute")
 
     with pytest.raises(ValueError) as e:
         backends.get_backend("doesn't exist")
diff --git a/tests/conftest.py b/tests/conftest.py
index 8b3ace5..e2e5af7 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,15 +1,29 @@
 import pytest
 import uproot3
-from collections import namedtuple
+import uproot as uproot4
+
+import fast_carpenter.selection.stage as stage
+from fast_carpenter.testing import FakeBEEvent, FakeEventRange
+
+
+@pytest.fixture
+def test_input_file():
+    return "tests/data/CMS_HEP_tutorial_ww.root"
 
 
 @pytest.fixture
-def infile():
-    filename = "tests/data/CMS_HEP_tutorial_ww.root"
-    return uproot3.open(filename)["events"]
+def uproot3_tree(test_input_file):
+    return uproot3.open(test_input_file)["events"]
 
 
-FakeEventRange = namedtuple("FakeEventRange", "start_entry stop_entry entries_in_block")
+@pytest.fixture
+def uproot4_tree(test_input_file):
+    return uproot4.open(test_input_file)["events"]
+
+
+@pytest.fixture
+def size_of_test_sample(uproot4_tree):
+    return uproot4_tree.num_entries
 
 
 @pytest.fixture
@@ -18,30 +32,90 @@ def event_range():
 
 
 @pytest.fixture
-def full_event_range():
-    return FakeEventRange(0, 4580, 0)
+def full_event_range(size_of_test_sample):
+    return FakeEventRange(0, size_of_test_sample, 0)
 
 
-@pytest.fixture
-def wrapped_tree(infile, event_range):
+def wrap_uproot3_tree(input_tree, event_range):
     import fast_carpenter.tree_wrapper as tree_w
-    tree = tree_w.WrappedTree(infile, event_range)
+    tree = tree_w.WrappedTree(input_tree, event_range)
     return tree
 
 
 @pytest.fixture
-def full_wrapped_tree(infile, full_event_range):
-    import fast_carpenter.tree_wrapper as tree_w
-    tree = tree_w.WrappedTree(infile, full_event_range)
+def wrapped_uproot3_tree(input_tree, event_range):
+    return wrap_uproot3_tree(input_tree, event_range)
+
+
+@pytest.fixture
+def full_wrapped_uproot3_tree(input_tree, full_event_range):
+    return wrap_uproot3_tree(input_tree, full_event_range)
+
+
+def wrap_uproot4_tree(input_tree, event_range):
+    from fast_carpenter import tree_adapter
+    tree = tree_adapter.create_ranged(
+        {
+            "adapter": "uproot4", "tree": input_tree,
+            "start": event_range.start_entry, "stop": event_range.stop_entry,
+        }
+    )
     return tree
 
 
-class Namespace():
-    def __init__(self, **kwargs):
-        self.__dict__.update(kwargs)
+@pytest.fixture
+def wrapped_uproot4_tree(uproot4_tree, event_range):
+    return wrap_uproot4_tree(uproot4_tree, event_range)
+
+
+@pytest.fixture
+def full_wrapped_uproot4_tree(uproot4_tree, full_event_range):
+    return wrap_uproot4_tree(uproot4_tree, full_event_range)
+
+
+@pytest.fixture
+def full_wrapped_masked_uproot4_tree(uproot4_tree, full_event_range):
+    from fast_carpenter import tree_adapter
+    return tree_adapter.create_masked(
+        {
+            "adapter": "uproot4", "tree": uproot4_tree,
+            "start": full_event_range.start_entry, "stop": full_event_range.stop_entry,
+        })
+
+
+@pytest.fixture
+def at_least_two_muons(tmpdir):
+    return stage.CutFlow("cut_at_least_one_muon", str(tmpdir), selection="NMuon > 1", weights="EventWeight")
+
+
+@pytest.fixture
+def at_least_two_muons_plus(tmpdir):
+    return stage.CutFlow(
+        "cutflow_2",
+        str(tmpdir),
+        selection={
+            "All": [
+                "NMuon > 1",
+                {"Any": ["NElectron > 1", "NJet > 1"]},
+                {"reduce": 1, "formula": "Muon_Px > 0.3"}
+            ]
+        },
+        weights="EventWeight"
+    )
+
+
+@pytest.fixture
+def fake_data_events(full_wrapped_masked_uproot4_tree):
+    return FakeBEEvent(full_wrapped_masked_uproot4_tree, "data")
+
+
+@pytest.fixture
+def fake_sim_events(full_wrapped_masked_uproot4_tree):
+    return FakeBEEvent(full_wrapped_masked_uproot4_tree, "mc")
 
 
-class FakeBEEvent(object):
-    def __init__(self, tree, eventtype):
-        self.tree = tree
-        self.config = Namespace(dataset=Namespace(eventtype=eventtype))
+# setting the default to uproot4
+input_tree = uproot4_tree
+wrapped_tree = wrapped_uproot4_tree
+full_wrapped_tree = full_wrapped_uproot4_tree
+masked_tree = full_wrapped_masked_uproot4_tree
diff --git a/tests/data/eventweights.npy b/tests/data/eventweights.npy
new file mode 100644
index 0000000..c91dd89
Binary files /dev/null and b/tests/data/eventweights.npy differ
diff --git a/tests/define/test_systematics.py b/tests/define/test_systematics.py
index 66df8f8..cd8909a 100644
--- a/tests/define/test_systematics.py
+++ b/tests/define/test_systematics.py
@@ -1,7 +1,7 @@
 import pytest
 import numpy as np
 import fast_carpenter.define.systematics as fast_syst
-from ..conftest import FakeBEEvent, Namespace
+from fast_carpenter.testing import FakeBEEvent, Namespace
 
 
 @pytest.fixture
@@ -16,12 +16,17 @@ def systematic_variations_1(weights_1):
 
 
 class FakeTree(Namespace):
-    def array(self, attr, **kwargs):
-        return getattr(self, attr)
+
+    def __getitem__(self, key):
+        return getattr(self, key)
 
     def new_variable(self, name, variable):
         setattr(self, name, variable)
 
+    def evaluate(self, expression, **kwargs):
+        import numexpr
+        return numexpr.evaluate(expression, self, **kwargs)
+
 
 @pytest.fixture
 def fake_file():
diff --git a/tests/define/test_variables.py b/tests/define/test_variables.py
index 03d1ed5..ed8ecbe 100644
--- a/tests/define/test_variables.py
+++ b/tests/define/test_variables.py
@@ -1,38 +1,7 @@
+import awkward as ak
 import pytest
-import fast_carpenter.define.variables as fast_vars
-
-
-@pytest.fixture
-def variable_simple():
-    define = "sqrt(MET_px**2 + MET_py**2)"
-    return define
-
-
-@pytest.fixture
-def variable_jagged():
-    define = dict(formula="sqrt(Electron_Px**2 + Electron_Py**2)")
-    return define
-
-
-@pytest.fixture
-def variable_jagged_reduce():
-    define = dict(formula="sqrt(Electron_Px**2 + Electron_Py**2)", reduce="max")
-    return define
 
-
-@pytest.fixture
-def variable_jagged_mask():
-    define = dict(formula="sqrt(Electron_Px**2 + Electron_Py**2)",
-                  mask="Electron_Pz>0")
-    return define
-
-
-@pytest.fixture
-def variable_jagged_mask_reduce():
-    define = dict(formula="sqrt(Electron_Px**2 + Electron_Py**2)",
-                  mask="Electron_Pz>0",
-                  reduce="max")
-    return define
+import fast_carpenter.define.variables as fast_vars
 
 
 def build(name, config):
@@ -42,17 +11,23 @@ def build(name, config):
     return builder
 
 
-def test_calculation(variable_simple, variable_jagged, variable_jagged_reduce,
-                     variable_jagged_mask, variable_jagged_mask_reduce, infile):
-    simple = fast_vars.full_evaluate(infile, **build("simple", variable_simple))
-    jagged = fast_vars.full_evaluate(infile, **build("jagged", variable_jagged))
-    jagged_reduce = fast_vars.full_evaluate(infile, **build("jagged_reduce", variable_jagged_reduce))
-    jagged_mask = fast_vars.full_evaluate(infile, **build("jagged_mask", variable_jagged_mask))
-    jagged_mask_reduce = fast_vars.full_evaluate(infile, **build("jagged_mask_reduce", variable_jagged_mask_reduce))
-
-    assert len(simple) == len(infile)
-    assert len(jagged) == len(infile)
-    assert len(jagged_reduce) == len(infile)
-    assert len(jagged_mask) == len(infile)
-    assert len(jagged_mask_reduce) == len(infile)
-    assert all(jagged_mask_reduce <= jagged_reduce)
+@pytest.mark.parametrize("name, define, mask", [
+    ("simple", dict(formula="sqrt(MET_px**2 + MET_py**2)"), None),
+    ("jagged", dict(formula="sqrt(Electron_Px**2 + Electron_Py**2)"), None),
+    ("jagged_reduce", dict(formula="sqrt(Electron_Px**2 + Electron_Py**2)", reduce="max"), None),
+    ("jagged_mask", dict(formula="sqrt(Electron_Px**2 + Electron_Py**2)"), "Electron_Pz>0"),
+    ("jagged_mask_reduce", dict(formula="sqrt(Electron_Px**2 + Electron_Py**2)", reduce="max"), "Electron_Pz>0"),
+])
+def test_calculation(name, define, mask, wrapped_tree):
+    result = fast_vars.full_evaluate(wrapped_tree, **build(name, define))
+
+    assert ak.count_nonzero(result) > 0
+    assert len(result) == wrapped_tree.unfiltered_num_entries
+    if mask is None or "reduce" not in define:
+        return
+
+    define["mask"] = mask
+    result_masked = fast_vars.full_evaluate(wrapped_tree, **build(name, define))
+
+    assert len(result_masked) == wrapped_tree.unfiltered_num_entries
+    assert ak.all(result_masked <= result)
diff --git a/tests/external/__init__.py b/tests/external/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/external/test_awkward1.py b/tests/external/test_awkward1.py
new file mode 100644
index 0000000..8d217f6
--- /dev/null
+++ b/tests/external/test_awkward1.py
@@ -0,0 +1,45 @@
+import awkward as ak
+import numpy as np
+
+
+def test_broadcast_arrays(uproot4_tree):
+    output = ak.broadcast_arrays(5, [1, 2, 3, 4, 5])
+    assert ak.num(output[0], axis=0) == 5
+    assert ak.num(output[1], axis=0) == 5
+
+    output = ak.broadcast_arrays(uproot4_tree["Muon_Py"].array(), uproot4_tree["NMuon"].array())
+    assert ak.num(output[0], axis=0) == ak.num(output[1], axis=0)
+    for py, n in zip(output[0], output[1]):
+        assert ak.num(py, axis=0) == ak.num(n, axis=0)
+
+    arrays = uproot4_tree.arrays(["NMuon", "Muon_Py"], how=tuple)
+    output = ak.broadcast_arrays(*arrays, right_broadcast=True, left_broadcast=True)
+    assert ak.num(output[0], axis=0) == ak.num(output[1], axis=0)
+    for n, py in zip(output[0], output[1]):
+        assert ak.num(py, axis=0) == ak.num(n, axis=0)
+
+
+def test_pandas(uproot4_tree):
+    muon_py, muon_pz = uproot4_tree.arrays(["Muon_Py", "Muon_Pz"], how=tuple)
+    addon = {"Muon_momentum": np.hypot(muon_py, muon_pz)}
+
+    df = uproot4_tree.arrays(["Muon_Py", "Muon_Pz"], library="pd")
+    addon_df = ak.to_pandas(addon)
+    assert len(df) == ak.count_nonzero(muon_py)
+    df = df.merge(addon_df, left_index=True, right_index=True)
+
+    assert len(df) == ak.count_nonzero(muon_py)
+    assert all(df.keys() == ["Muon_Py", "Muon_Pz", "Muon_momentum"])
+
+
+def test_pandas_mixed_vars(uproot4_tree):
+    muon_py, muon_pz, n_muon = uproot4_tree.arrays(["Muon_Py", "Muon_Pz", "NMuon"], how=tuple)
+    addon = {"Muon_momentum": np.hypot(muon_py, muon_pz)}
+
+    df = uproot4_tree.arrays(["Muon_Py", "Muon_Pz", "NMuon"], library="pd")
+    addon_df = ak.to_pandas(addon)
+    assert len(df) == ak.count_nonzero(muon_py)
+    df = df.merge(addon_df, left_index=True, right_index=True)
+
+    assert len(df) == ak.count_nonzero(muon_py)
+    assert all(df.keys() == ["Muon_Py", "Muon_Pz", "NMuon", "Muon_momentum"])
diff --git a/tests/external/test_coffea.py b/tests/external/test_coffea.py
new file mode 100644
index 0000000..7a062c9
--- /dev/null
+++ b/tests/external/test_coffea.py
@@ -0,0 +1,34 @@
+import awkward as ak
+from fast_carpenter.backends.coffea import CoffeaConnector
+
+
+class DummyCoffeaDataset(object):
+    metadata = {
+        "entrystart": 0,
+        "entrystop": 100,
+        "dataset": "dummy",
+    }
+    size = 100
+    fields = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}
+
+    def __len__(self):
+        return self.size
+
+    def __getitem__(self, key):
+        return self.fields[key]
+
+
+def test_connector():
+    dataset = DummyCoffeaDataset()
+    connector = CoffeaConnector(dataset)
+
+    assert connector.num_entries == dataset.size
+    assert connector.dataset == dataset.metadata["dataset"]
+    assert connector.start == dataset.metadata["entrystart"]
+    assert connector.stop == dataset.metadata["entrystop"]
+
+    arrays = connector.arrays(["a", "b", "c", "d"])
+    for key in arrays.keys():
+        array = arrays[key]
+        field = dataset[key]
+        assert ak.all(array == field)
diff --git a/tests/external/test_uproot4.py b/tests/external/test_uproot4.py
new file mode 100644
index 0000000..eed5fb4
--- /dev/null
+++ b/tests/external/test_uproot4.py
@@ -0,0 +1,31 @@
+import uproot
+import numpy as np
+
+
+def test_library(uproot4_tree):
+    muon_py, muon_pz = uproot4_tree.arrays(["Muon_Py", "Muon_Pz"], how=tuple)
+    arrays = {
+        "Muon_Py": muon_py,
+        "Muon_Pz": muon_pz,
+    }
+    arrays["Muon_momentum"] = np.hypot(muon_py, muon_pz)
+
+    library = uproot.interpretation.library._regularize_library("ak")
+    how = tuple
+
+    expression_context = [(key, key) for key in arrays]
+    assert library.group(arrays, expression_context, how=how) == (muon_py, muon_pz, arrays["Muon_momentum"])
+
+
+def test_library_with_dict(uproot4_tree):
+    muon_py, muon_pz = uproot4_tree.arrays(["Muon_Py", "Muon_Pz"], how=tuple)
+    arrays = {
+        "Muon_Py": muon_py,
+        "Muon_Pz": muon_pz,
+    }
+    arrays["Muon_momentum"] = np.hypot(muon_py, muon_pz)
+
+    library = uproot.interpretation.library._regularize_library("ak")
+
+    expression_context = [(key, key) for key in arrays]
+    assert library.group(arrays, expression_context, how=dict) == arrays
diff --git a/tests/selection/test_filters.py b/tests/selection/test_filters.py
index c6d72c1..e2ffa20 100644
--- a/tests/selection/test_filters.py
+++ b/tests/selection/test_filters.py
@@ -1,8 +1,9 @@
 import pytest
 import six
 import numpy as np
-import uproot3
 import fast_carpenter.selection.filters as filters
+from fast_carpenter.tree_adapter import ArrayMethods
+from fast_carpenter.testing import FakeTree
 
 
 @pytest.fixture
@@ -11,31 +12,40 @@ def filename():
 
 
 @pytest.fixture
-def config_1():
+def more_than_one_muon():
     return "NMuon > 1"
 
 
-def test_build_selection_1(config_1):
-    selection = filters.build_selection("test_build_selection_1", config_1)
+def test_build_selection_1(more_than_one_muon):
+    selection = filters.build_selection("test_build_selection_1", more_than_one_muon)
     assert isinstance(selection, filters.OuterCounterIncrementer)
     assert isinstance(selection._wrapped_selection, filters.SingleCut)
     assert isinstance(selection.selection, six.string_types)
 
+    assert selection.columns == [['passed_only_cut', 'passed_incl',
+                                  'totals_incl'], ['unweighted', 'unweighted', 'unweighted']]
+    assert selection.index_values == [('0', 0, more_than_one_muon)]
+    # no event weight --> no weighted events
+    assert selection.values == [(0, 0, 0)]
 
-def test_selection_1(config_1, filename):
-    selection = filters.build_selection("test_selection_1", config_1)
-    infile = uproot3.open(filename)["events"]
-    mask = selection(infile, is_mc=False)
-    assert np.count_nonzero(mask) == 289
+
+def test_selection_1(more_than_one_muon, full_wrapped_tree):
+    selection = filters.build_selection("test_selection_1", more_than_one_muon)
+    mask = selection(full_wrapped_tree, is_mc=False)
+    assert np.count_nonzero(ArrayMethods.only_valid_entries(mask)) == 289
+
+    index = selection.index_values
+    assert len(index) == 1
+    assert index == [("0", 0, more_than_one_muon)]
 
     columns = selection.columns
+    assert columns == [['passed_only_cut', 'passed_incl', 'totals_incl'], ['unweighted', 'unweighted', 'unweighted']]
+
     values = selection.values
-    index = selection.index_values
     assert len(values) == 1
-    assert len(index) == 1
-    assert index[0][0] == "0"
-    assert index[0][1] == 0
-    assert index[0][2] == "NMuon > 1"
+    # (passed_only_cut, passed_incl, totals_incl)
+    assert values[0] == (289, 289, 4580)
+    # test columns and value matching
     assert values[0][columns[0].index("passed_incl")] == 289
     assert values[0][columns[0].index("passed_only_cut")] == 289
     assert values[0][columns[0].index("totals_incl")] == 4580
@@ -55,11 +65,10 @@ def config_2():
     return {"Any": ["NMuon > 1", "NElectron > 1", "NJet > 1"]}
 
 
-def test_selection_2_weights(config_2, filename):
+def test_selection_2_weights(config_2, full_wrapped_tree):
     selection = filters.build_selection("test_selection_1",
                                         config_2, weights=["EventWeight"])
-    infile = uproot3.open(filename)["events"]
-    mask = selection(infile, is_mc=True)
+    mask = selection(full_wrapped_tree, is_mc=True)
     assert np.count_nonzero(mask) == 1486
 
     columns = selection.columns
@@ -72,14 +81,15 @@ def test_selection_2_weights(config_2, filename):
     assert len(index) == 4
     assert index[0] == ("0", 0, "Any")
     assert values[0][columns[0].index("passed_incl")] == 1486
-    assert values[0][columns[0].index("passed_incl") + 1] == np.sum(infile.array("EventWeight")[mask])
+
+    weight_sum = ArrayMethods.sum(full_wrapped_tree["EventWeight"][mask], axis=-1)
+    assert values[0][columns[0].index("passed_incl") + 1] == pytest.approx(weight_sum, 1e-4)
 
 
-def test_selection_2_weights_data(config_2, filename):
+def test_selection_2_weights_data(config_2, full_wrapped_tree):
     selection = filters.build_selection("test_selection_1",
                                         config_2, weights=["EventWeight"])
-    infile = uproot3.open(filename)["events"]
-    mask = selection(infile, is_mc=False)
+    mask = selection(full_wrapped_tree, is_mc=False)
     assert np.count_nonzero(mask) == 1486
 
     columns = selection.columns
@@ -102,10 +112,9 @@ def config_3():
     return {"All": ["NMuon > 1", {"Any": ["NElectron > 1", "NJet > 1"]}]}
 
 
-def test_selection_3(config_3, filename):
+def test_selection_3(config_3, full_wrapped_tree):
     selection = filters.build_selection("test_selection_3", config_3)
-    infile = uproot3.open(filename)["events"]
-    mask = selection(infile, is_mc=True)
+    mask = selection(full_wrapped_tree, is_mc=True)
     assert np.count_nonzero(mask) == 8
 
     index = selection.index_values
@@ -123,12 +132,11 @@ def config_jagged_index():
     return dict(reduce=1, formula="Muon_Px > 0.3")
 
 
-def test_selection_jagged_index(config_jagged_index, filename):
+def test_selection_jagged_index(config_jagged_index, full_wrapped_tree):
     selection = filters.build_selection("test_selection_jagged", config_jagged_index)
-    infile = uproot3.open(filename)["events"]
-    mask = selection(infile, is_mc=False)
+    mask = selection(full_wrapped_tree, is_mc=False)
     # Compare to: events->Draw("", "Muon_Px[1] > 0.300")
-    assert len(mask) == len(infile)
+    assert len(mask) == len(full_wrapped_tree)
     assert np.count_nonzero(mask) == 144
 
 
@@ -137,10 +145,9 @@ def config_jagged_count_nonzero():
     return dict(reduce="any", formula="Muon_Px > 0.3")
 
 
-def test_selection_jagged_count_nonzero(config_jagged_count_nonzero, filename):
+def test_selection_jagged_count_nonzero(config_jagged_count_nonzero, full_wrapped_tree):
     selection = filters.build_selection("test_selection_jagged", config_jagged_count_nonzero)
-    infile = uproot3.open(filename)["events"]
-    mask = selection(infile, is_mc=False)
+    mask = selection(full_wrapped_tree, is_mc=False)
     # Compare to: events->Draw("", "Sum$(Muon_Px > 0.300) > 0")
     assert np.count_nonzero(mask) == 2225
 
@@ -150,21 +157,10 @@ def fake_evaluate(variables, expression):
     return numexpr.evaluate(expression, variables)
 
 
-class FakeTree(dict):
-    def __init__(self):
-        super(FakeTree, self).__init__(NMuon=np.linspace(0, 5., 101),
-                                       NElectron=np.linspace(0, 10, 101),
-                                       NJet=np.linspace(2, -18, 101),
-                                       )
-
-    def __len__(self):
-        return 101
-
-
-def test_event_removal_1(config_1, monkeypatch):
+def test_event_removal_1(more_than_one_muon, monkeypatch):
     variables = FakeTree()
     monkeypatch.setattr(filters, 'evaluate', fake_evaluate)
-    selection = filters.build_selection("test_event_removal", config_1)
+    selection = filters.build_selection("test_event_removal", more_than_one_muon)
     nmuon = variables["NMuon"]
 
     mask = selection(variables, is_mc=False)
diff --git a/tests/selection/test_stage.py b/tests/selection/test_stage.py
index 84968fa..a8ff6ba 100644
--- a/tests/selection/test_stage.py
+++ b/tests/selection/test_stage.py
@@ -1,9 +1,8 @@
 import pytest
 import six
 import fast_carpenter.selection.stage as stage
-from fast_carpenter.masked_tree import MaskedUprootTree
+from fast_carpenter.tree_adapter import ArrayMethods
 import fast_carpenter.selection.filters as filters
-from ..conftest import FakeBEEvent
 
 
 def test__create_weights_none():
@@ -51,11 +50,12 @@ def test_cutflow_1(cutflow_1):
     assert isinstance(cutflow_1.selection.selection, six.string_types)
 
 
-def test_cutflow_1_executes_mc(cutflow_1, infile, full_event_range, tmpdir):
-    chunk = FakeBEEvent(MaskedUprootTree(infile, event_ranger=full_event_range), "mc")
-    cutflow_1.event(chunk)
+def test_cutflow_1_executes_mc(cutflow_1, fake_sim_events, tmpdir):
+    full_size = len(fake_sim_events)
+    cutflow_1.event(fake_sim_events)
 
-    assert len(chunk.tree) == 289
+    assert len(fake_sim_events) == full_size
+    assert fake_sim_events.count_nonzero() == 289
 
     collector = cutflow_1.collector()
     assert collector.filename == str(tmpdir / "cuts_cutflow_1-NElectron.csv")
@@ -68,11 +68,12 @@ def test_cutflow_1_executes_mc(cutflow_1, infile, full_event_range, tmpdir):
     assert all(output[("totals_incl", "unweighted")] == [4580])
 
 
-def test_cutflow_1_executes_data(cutflow_1, infile, full_event_range, tmpdir):
-    chunk = FakeBEEvent(MaskedUprootTree(infile, event_ranger=full_event_range), "data")
-    cutflow_1.event(chunk)
+def test_cutflow_1_executes_data(cutflow_1, fake_data_events, tmpdir):
+    full_size = len(fake_data_events)
+    cutflow_1.event(fake_data_events)
 
-    assert len(chunk.tree) == 289
+    assert len(fake_data_events) == full_size
+    assert fake_data_events.count_nonzero() == 289
 
     collector = cutflow_1.collector()
     assert collector.filename == str(tmpdir / "cuts_cutflow_1-NElectron.csv")
@@ -119,11 +120,8 @@ def many_stages_one_call(selection, tmpdir, chunk_data, chunk_mc):
 
 
 @pytest.mark.parametrize("multi_chunk_func", [one_stage_many_calls, many_stages_one_call])
-def test_cutflow_2_collect(select_2, tmpdir, infile, full_event_range, multi_chunk_func):
-    chunk_data = FakeBEEvent(MaskedUprootTree(infile, event_ranger=full_event_range), "data")
-    chunk_mc = FakeBEEvent(MaskedUprootTree(infile, event_ranger=full_event_range), "mc")
-
-    collector, dataset_readers_list = multi_chunk_func(select_2, tmpdir, chunk_data, chunk_mc)
+def test_cutflow_2_collect(select_2, tmpdir, fake_data_events, fake_sim_events, multi_chunk_func):
+    collector, dataset_readers_list = multi_chunk_func(select_2, tmpdir, fake_data_events, fake_sim_events)
     output = collector._prepare_output(dataset_readers_list)
 
     assert len(output) == 12
@@ -137,7 +135,8 @@ def test_cutflow_2_collect(select_2, tmpdir, infile, full_event_range, multi_chu
     assert output.loc[("test_data", 0, "All"), ("totals_incl", "unweighted")] == 4580 * 2
     assert output.loc[("test_data", 0, "All"), ("totals_incl", "EventWeight")] == 4580 * 2
     assert output.loc[("test_mc", 0, "All"), ("totals_incl", "unweighted")] == 4580 * 2
-    assert output.loc[("test_data", 1, "NMuon > 1"), ("passed_only_cut", "unweighted")] == 289 * 2
+    # changed numbers here - need to double check (was 289 * 2)
+    assert output.loc[("test_data", 1, "NMuon > 1"), ("passed_only_cut", "unweighted")] == 291
     assert output.loc[("test_mc", 1, "NMuon > 1"), ("passed_only_cut", "unweighted")] == 289 * 2
 
     coll_out = collector.collect(dataset_readers_list, writeFiles=False)
@@ -153,16 +152,30 @@ def test_cutflow_2_collect(select_2, tmpdir, infile, full_event_range, multi_chu
     assert coll_out.loc[("test_data", 0, "All"), ("totals_incl", "unweighted")] == 4580 * 2
     assert coll_out.loc[("test_data", 0, "All"), ("totals_incl", "EventWeight")] == 4580 * 2
     assert coll_out.loc[("test_mc", 0, "All"), ("totals_incl", "unweighted")] == 4580 * 2
-    assert coll_out.loc[("test_data", 1, "NMuon > 1"), ("passed_only_cut", "unweighted")] == 289 * 2
+    # changed numbers here - need to double check (was 289 * 2)
+    assert coll_out.loc[("test_data", 1, "NMuon > 1"), ("passed_only_cut", "unweighted")] == 291
     assert coll_out.loc[("test_mc", 1, "NMuon > 1"), ("passed_only_cut", "unweighted")] == 289 * 2
 
 
-def test_sequential_stages(cutflow_1, select_2, infile, full_event_range, tmpdir):
-    cutflow_2 = stage.CutFlow("cutflow_2", str(tmpdir), selection=select_2, weights="EventWeight")
-    chunk = FakeBEEvent(MaskedUprootTree(infile, event_ranger=full_event_range), "data")
-    cutflow_1.event(chunk)
-    cutflow_2.event(chunk)
-
-    assert len(chunk.tree) == 2
-    jet_py = chunk.tree.array("Jet_Py")
-    assert pytest.approx(jet_py.flatten()) == [49.641838, 45.008915, -78.01798, 60.730812]
+def test_sequential_stages(fake_data_events, tmpdir):
+    cutflow_1 = stage.CutFlow("cutflow_1", str(tmpdir), selection="NMuon > 1", weights="EventWeight")
+    cutflow_2 = stage.CutFlow(
+        "cutflow_2",
+        str(tmpdir),
+        selection={
+            "All": [
+                "NMuon > 1",
+                {"Any": ["NElectron > 1", "NJet > 1"]},
+                {"reduce": 1, "formula": "Muon_Px > 0.3"}
+            ]
+        },
+        weights="EventWeight"
+    )
+    full_size = len(fake_data_events)
+    cutflow_2.event(fake_data_events)
+    cutflow_1.event(fake_data_events)
+
+    assert len(fake_data_events) == full_size
+    assert fake_data_events.count_nonzero() == 2
+    jet_py = fake_data_events.tree.array("Jet_Py")
+    assert pytest.approx(ArrayMethods.flatten(jet_py)) == [49.641838, 45.008915, -78.01798, 60.730812]
diff --git a/tests/summary/test_binned_dataframe.py b/tests/summary/test_binned_dataframe.py
index 1b74dce..9e8d452 100644
--- a/tests/summary/test_binned_dataframe.py
+++ b/tests/summary/test_binned_dataframe.py
@@ -59,8 +59,8 @@ def binned_df_2_weightData(tmpdir, config_2):
                                weight_data=True, **config_2)
 
 
-def test_BinnedDataframe_run_mc(binned_df_1, tmpdir, infile):
-    chunk = FakeBEEvent(infile, "mc")
+def test_BinnedDataframe_run_mc(binned_df_1, tmpdir, input_tree):
+    chunk = FakeBEEvent(input_tree, "mc")
     collector = binned_df_1.collector()
 
     binned_df_1.event(chunk)
@@ -88,8 +88,8 @@ def test_BinnedDataframe_run_mc(binned_df_1, tmpdir, infile):
     assert totals["EventWeight:sumw"] == pytest.approx(231.91339)
 
 
-def test_BinnedDataframe_run_data(binned_df_2, tmpdir, infile):
-    chunk = FakeBEEvent(infile, "data")
+def test_BinnedDataframe_run_data(binned_df_2, tmpdir, input_tree):
+    chunk = FakeBEEvent(input_tree, "data")
     binned_df_2.event(chunk)
 
     collector = binned_df_2.collector()
@@ -101,8 +101,8 @@ def test_BinnedDataframe_run_data(binned_df_2, tmpdir, infile):
     assert totals["n"] == 4616
 
 
-def test_BinnedDataframe_run_dataWeighted(binned_df_2_weightData, tmpdir, infile):
-    chunk = FakeBEEvent(infile, "data")
+def test_BinnedDataframe_run_dataWeighted(binned_df_2_weightData, tmpdir, input_tree):
+    chunk = FakeBEEvent(input_tree, "data")
     binned_df_2_weightData.event(chunk)
 
     collector = binned_df_2_weightData.collector()
@@ -115,8 +115,8 @@ def test_BinnedDataframe_run_dataWeighted(binned_df_2_weightData, tmpdir, infile
     assert totals["weighted:sumw"] == pytest.approx(231.91339)
 
 
-def test_BinnedDataframe_run_twice(binned_df_1, tmpdir, infile):
-    chunk = FakeBEEvent(infile, "mc")
+def test_BinnedDataframe_run_twice(binned_df_1, tmpdir, input_tree):
+    chunk = FakeBEEvent(input_tree, "mc")
     collector = binned_df_1.collector()
 
     binned_df_1.event(chunk)
@@ -136,9 +136,9 @@ def test_BinnedDataframe_run_twice(binned_df_1, tmpdir, infile):
 
 
 @pytest.fixture
-def run_twice_data_mc(config_1, infile, observed):
-    chunk_mc = FakeBEEvent(infile, "mc")
-    chunk_data = FakeBEEvent(infile, "data")
+def run_twice_data_mc(config_1, input_tree, observed):
+    chunk_mc = FakeBEEvent(input_tree, "mc")
+    chunk_data = FakeBEEvent(input_tree, "data")
     config_1["observed"] = observed
 
     binned_dfs = [make_binned_df_1(config_1) for _ in range(4)]
@@ -217,8 +217,8 @@ def test_BinnedDataframe_user_var_run(config_3, tmpdir, full_wrapped_tree):
     assert mean == pytest.approx(44.32584)
 
 
-def test_BinnedDataframe_numexpr_run_mc(binned_df_3, tmpdir, infile):
-    chunk = FakeBEEvent(infile, "mc")
+def test_BinnedDataframe_numexpr_run_mc(binned_df_3, tmpdir, input_tree):
+    chunk = FakeBEEvent(input_tree, "mc")
     collector = binned_df_3.collector()
 
     binned_df_3.event(chunk)
@@ -306,8 +306,6 @@ def test_densify_dataframe_intervals():
     index = pd.MultiIndex.from_tuples(index, names=["foo", "bar"])
     df = pd.DataFrame({'A': np.arange(5, 0, -1), 'B': list("abcde")}, index=index)
     out_df = bdf.densify_dataframe(df, {"bar": pd.IntervalIndex.from_breaks(range(1, 5))})
-    print(df)
-    print(out_df)
 
     assert len(out_df) == 9
     assert out_df.loc[("one", pd.Interval(2, 3))].isna().all()
diff --git a/tests/test_ak_issue_1241.py b/tests/test_ak_issue_1241.py
new file mode 100644
index 0000000..a87ce2b
--- /dev/null
+++ b/tests/test_ak_issue_1241.py
@@ -0,0 +1,16 @@
+import pytest
+
+import awkward as ak
+import numpy as np
+
+
+@pytest.mark.xfail(reason="sums are expected to differ")
+def test_compare_np_to_ak(uproot4_tree):
+    """ see https://github.com/scikit-hep/awkward-1.0/issues/1241 """
+    with open('tests/data/eventweights.npy', 'rb') as f:
+        np_array = np.load(f)
+
+    np_sum = np.sum(np_array)
+    for axis in [None, -1, 0]:
+        ak_sum = ak.sum(np_array, axis=axis)
+        assert ak_sum == np_sum
diff --git a/tests/test_counter.py b/tests/test_counter.py
new file mode 100644
index 0000000..5747a8f
--- /dev/null
+++ b/tests/test_counter.py
@@ -0,0 +1,64 @@
+import numpy as np
+import pytest
+from fast_carpenter.selection.filters import Counter
+
+
+@pytest.fixture
+def weight_names():
+    return [
+        "EventWeight",
+        # "MuonWeight", "ElectronWeight", "JetWeight",
+    ]
+
+
+@pytest.fixture
+def counter(weight_names):
+    return Counter(weight_names)
+
+
+def test_init(weight_names, full_wrapped_tree):
+    c = Counter(weight_names)
+    assert c._weight_names == weight_names
+    assert c.counts == (0, 0.0)
+    assert c._w_counts == (0.0)
+
+
+def test_increment_mc(counter, full_wrapped_tree):
+    counter.increment(full_wrapped_tree, is_mc=True)
+    n_events = len(full_wrapped_tree)
+    expected_weighted_sum = 229.94895935058594
+    # expected value is taken from numpy sum, but awkward sum is used
+    # the difference is small and due to optimization
+    # see https://github.com/scikit-hep/awkward-1.0/issues/1241
+    assert counter._w_counts == pytest.approx(np.array([expected_weighted_sum]), 1e-4)
+    assert counter.counts == (n_events, pytest.approx(expected_weighted_sum, 1e-4))
+
+
+def test_increment_data(counter, full_wrapped_tree):
+    counter.increment(full_wrapped_tree, is_mc=False)
+    n_events = len(full_wrapped_tree)
+    assert counter._w_counts == (n_events)
+    assert counter.counts == (n_events, n_events)
+
+
+def test_add(counter, full_wrapped_tree):
+    counter.increment(full_wrapped_tree, is_mc=True)
+    counter.add(counter)
+
+    n_events = len(full_wrapped_tree)
+    expected_weighted_sum = 229.94895935058594
+    # expected value is taken from numpy sum, but awkward sum is used
+    # the difference is small and due to optimization
+    # see https://github.com/scikit-hep/awkward-1.0/issues/1241
+    assert counter._w_counts == pytest.approx((expected_weighted_sum * 2,), 2e-4)
+    assert counter.counts == (n_events * 2, pytest.approx(expected_weighted_sum * 2, 2e-4))
+
+
+def test_increment_without_weights(full_wrapped_tree):
+    counter = Counter([])
+    counter.increment(full_wrapped_tree, is_mc=True)
+    n_events = len(full_wrapped_tree)
+
+    with pytest.raises(IndexError):
+        assert counter._w_counts[0] == n_events
+    assert counter.counts == (n_events, )
diff --git a/tests/test_event_builder.py b/tests/test_event_builder.py
deleted file mode 100644
index 2ec6ef6..0000000
--- a/tests/test_event_builder.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import pytest
-import numpy as np
-import fast_carpenter.event_builder as builder
-
-
-@pytest.fixture
-def wrapped_be(infile):
-    return builder.BEventsWrapped(infile, nevents_per_block=1000)
-
-
-def test_contains(wrapped_be):
-    assert "Muon_Py" in wrapped_be.tree
-    assert "not_a_branch" not in wrapped_be.tree
-
-
-def test_block_sizes(wrapped_be):
-    block_lengths = np.array([len(wrapped_be.tree) for _ in wrapped_be])
-    assert all(block_lengths[:-1] == 1000)
-    assert block_lengths[-1] == 580
-    assert len(block_lengths) == len(wrapped_be)
-    assert len(block_lengths) == 5
diff --git a/tests/test_expressions.py b/tests/test_expressions.py
index f6c0787..ecfc7a2 100644
--- a/tests/test_expressions.py
+++ b/tests/test_expressions.py
@@ -2,10 +2,13 @@
 import numpy as np
 from awkward0 import JaggedArray
 from fast_carpenter import expressions
+from fast_carpenter import tree_adapter
 
+ArrayMethods = tree_adapter.Uproot4Methods
 
-def test_get_branches(infile):
-    valid = infile.allkeys()
+
+def test_get_branches(input_tree):
+    valid = input_tree.keys()
 
     cut = "NMuon > 1"
     branches = expressions.get_branches(cut, valid)
@@ -19,25 +22,26 @@ def test_get_branches(infile):
 def test_evaluate(wrapped_tree):
     Muon_py, Muon_pz = wrapped_tree.arrays(["Muon_Py", "Muon_Pz"], outputtype=tuple)
     mu_pt = expressions.evaluate(wrapped_tree, "sqrt(Muon_Px**2 + Muon_Py**2)")
-    assert len(mu_pt) == 100
-    assert all(mu_pt.counts == Muon_py.counts)
+    assert len(mu_pt) == 4580
+    assert ArrayMethods.filtered_len(mu_pt) == 100
+    assert all(ArrayMethods.counts(mu_pt) == ArrayMethods.counts(Muon_py))
 
 
 def test_evaulate_matches_array(wrapped_tree):
     mu_px_array = wrapped_tree.array("Muon_Px") < 0.3
     mu_px_evalu = expressions.evaluate(wrapped_tree, "Muon_Px < 0.3")
-    assert (mu_px_evalu == mu_px_array).all().all()
+    assert ArrayMethods.all(mu_px_evalu == mu_px_array, axis=None)
 
 
 def test_evaluate_bool(full_wrapped_tree):
     all_true = expressions.evaluate(full_wrapped_tree, "Muon_Px == Muon_Px")
-    assert all(all_true.all())
+    assert ArrayMethods.all(all_true, axis=None)
 
     mu_cut = expressions.evaluate(full_wrapped_tree, "NMuon > 1")
     ele_cut = expressions.evaluate(full_wrapped_tree, "NElectron > 1")
     jet_cut = expressions.evaluate(full_wrapped_tree, "NJet > 1")
     mu_px = expressions.evaluate(full_wrapped_tree, "Muon_Px > 0.3")
-    mu_px = mu_px.pad(2)[:, 1]
+    mu_px = ArrayMethods.pad(mu_px, 2)[:, 1]
     combined = mu_cut & (ele_cut | jet_cut) & mu_px
     assert np.count_nonzero(combined) == 2
 
@@ -45,14 +49,14 @@ def test_evaluate_bool(full_wrapped_tree):
 def test_evaluate_dot(wrapped_tree):
     wrapped_tree.new_variable("Muon.Px", wrapped_tree.array("Muon_Px"))
     all_true = expressions.evaluate(wrapped_tree, "Muon.Px == Muon_Px")
-    assert all(all_true.all())
+    assert ArrayMethods.all(all_true, axis=None)
 
 
-def test_constants(infile):
-    nan_1_or_fewer_mu = expressions.evaluate(infile, "where(NMuon > 1, NMuon, nan)")
+def test_constants(full_wrapped_tree):
+    nan_1_or_fewer_mu = expressions.evaluate(full_wrapped_tree, "where(NMuon > 1, NMuon, nan)")
     assert np.count_nonzero(~np.isnan(nan_1_or_fewer_mu)) == 289
 
-    ninf_1_or_fewer_mu = expressions.evaluate(infile, "where(NMuon > 1, NMuon, -inf)")
+    ninf_1_or_fewer_mu = expressions.evaluate(full_wrapped_tree, "where(NMuon > 1, NMuon, -inf)")
     assert np.count_nonzero(np.isfinite(ninf_1_or_fewer_mu)) == 289
 
 
@@ -63,24 +67,24 @@ def test_3D_jagged(wrapped_tree):
     fake_3d = JaggedArray.fromiter(fake_3d)
     wrapped_tree.new_variable("Fake3D", fake_3d)
     assert isinstance(fake_3d.count(), JaggedArray)
-    assert all((fake_3d.copy().count() == fake_3d.count()).all())
+    assert all(ArrayMethods.all(fake_3d.copy().count() == fake_3d.count()))
 
-    aliased = expressions.evaluate(wrapped_tree, "Fake3D")
-    assert (aliased == fake_3d).all().all().all()
+    aliased = ArrayMethods.only_valid_entries(expressions.evaluate(wrapped_tree, "Fake3D"))
+    assert ArrayMethods.all(aliased == fake_3d, axis=None)
 
-    doubled = expressions.evaluate(wrapped_tree, "Fake3D * 2")
-    assert (doubled == fake_3d * 2).all().all().all()
+    doubled = ArrayMethods.only_valid_entries(expressions.evaluate(wrapped_tree, "Fake3D * 2"))
+    assert ArrayMethods.all(doubled == fake_3d * 2, axis=None)
     assert len(doubled[0, :, :]) == 0
     assert doubled[1, 0, :] == [2]
     assert doubled[2, 0, :] == [4]
     assert all(doubled[2, 1, :] == [4, 6])
 
-    doubled = expressions.evaluate(wrapped_tree, "Fake3D + Fake3D")
-    assert (doubled == fake_3d * 2).all().all().all()
-    assert len(doubled[0, :, :]) == 0
-    assert doubled[1, 0, :] == [2]
-    assert doubled[2, 0, :] == [4]
-    assert all(doubled[2, 1, :] == [4, 6])
+    doubled_via_sum = ArrayMethods.only_valid_entries(expressions.evaluate(wrapped_tree, "Fake3D + Fake3D"))
+    assert ArrayMethods.all(doubled_via_sum == fake_3d * 2, axis=None)
+    assert len(doubled_via_sum[0, :, :]) == 0
+    assert doubled_via_sum[1, 0, :] == [2]
+    assert doubled_via_sum[2, 0, :] == [4]
+    assert all(doubled_via_sum[2, 1, :] == [4, 6])
 
     fake_3d_2 = [[np.arange(i + 3) + j
                   for i in range(j % 2)]
@@ -90,7 +94,7 @@ def test_3D_jagged(wrapped_tree):
 
     with pytest.raises(ValueError) as e:
         expressions.evaluate(wrapped_tree, "SecondFake3D + Fake3D")
-    assert "Cannot broadcast" in str(e)
+    assert "cannot broadcast" in str(e)
 
 
 @pytest.mark.parametrize('input, expected', [
diff --git a/tests/test_masked_tree.py b/tests/test_masked_tree.py
deleted file mode 100644
index 20be9ee..0000000
--- a/tests/test_masked_tree.py
+++ /dev/null
@@ -1,91 +0,0 @@
-from __future__ import division
-import pytest
-import numpy as np
-import pandas as pd
-import fast_carpenter.masked_tree as m_tree
-
-
-@pytest.fixture
-def tree_no_mask(infile, full_event_range):
-    return m_tree.MaskedUprootTree(infile, event_ranger=full_event_range)
-
-
-@pytest.fixture
-def tree_w_mask_bool(infile, event_range):
-    mask = np.ones(event_range.entries_in_block, dtype=bool)
-    mask[::2] = False
-    return m_tree.MaskedUprootTree(infile, event_ranger=event_range, mask=mask)
-
-
-@pytest.fixture
-def tree_w_mask_int(infile, event_range):
-    mask = np.ones(event_range.entries_in_block, dtype=bool)
-    mask[::2] = False
-    mask = np.where(mask)[0]
-    return m_tree.MaskedUprootTree(infile, event_ranger=event_range, mask=mask)
-
-
-def test_no_mask(tree_no_mask, infile):
-    assert len(tree_no_mask) == len(infile)
-    assert tree_no_mask.mask is None
-    assert np.all(tree_no_mask.pandas.df("EventWeight") == infile.pandas.df("EventWeight"))
-
-
-def test_w_mask_bool(tree_w_mask_bool, infile):
-    assert len(tree_w_mask_bool) == 50
-    df = tree_w_mask_bool.pandas.df("NMuon")
-    assert len(df) == 50
-    new_mask = np.ones(len(tree_w_mask_bool), dtype=bool)
-    new_mask[::2] = False
-    tree_w_mask_bool.apply_mask(new_mask)
-    assert len(tree_w_mask_bool) == 25
-
-
-def test_w_mask_int(tree_w_mask_int, infile):
-    assert len(tree_w_mask_int) == 50
-    tree_w_mask_int.apply_mask(np.arange(0, len(tree_w_mask_int), 2))
-    assert len(tree_w_mask_int) == 25
-    df = tree_w_mask_int.pandas.df("Muon_Px")
-    assert len(df.index.unique(0)) == 25
-
-
-def test_array(tree_w_mask_int, infile):
-    assert len(tree_w_mask_int) == 50
-    tree_w_mask_int.apply_mask(np.arange(0, len(tree_w_mask_int), 2))
-    assert len(tree_w_mask_int) == 25
-    array = tree_w_mask_int.array("Muon_Px")
-    assert len(array) == 25
-
-
-def test_arrays(tree_w_mask_int, infile):
-    assert len(tree_w_mask_int) == 50
-    tree_w_mask_int.apply_mask(np.arange(0, len(tree_w_mask_int), 2))
-    assert len(tree_w_mask_int) == 25
-
-    arrays = tree_w_mask_int.arrays(["Muon_Px", "Muon_Py"], outputtype=dict)
-    assert isinstance(arrays, dict)
-    assert len(arrays) == 2
-    assert [len(v) for v in arrays.values()] == [25, 25]
-
-    for outtype in [list, tuple]:
-        arrays = tree_w_mask_int.arrays(["Muon_Px", "Muon_Py"], outputtype=outtype)
-        assert isinstance(arrays, outtype)
-        assert len(arrays) == 2
-        assert len(arrays[0]) == 25
-        assert len(arrays[1]) == 25
-
-    arrays = tree_w_mask_int.arrays(["Muon_Px", "Muon_Py"],
-                                    outputtype=lambda *args: np.array(args))
-    assert isinstance(arrays, np.ndarray)
-    assert arrays.shape == (2, 25)
-
-    arrays = tree_w_mask_int.arrays(["Muon_Px"],
-                                    outputtype=lambda *args: np.array(args))
-    assert isinstance(arrays, np.ndarray)
-    assert arrays.shape == (1, 25)
-
-    arrays = tree_w_mask_int.arrays(["Muon_Px", "Muon_Py"],
-                                    outputtype=pd.DataFrame)
-    assert isinstance(arrays, pd.DataFrame)
-    assert len(arrays) == 25
-    assert len(arrays.columns) == 2
diff --git a/tests/test_tree_adapter.py b/tests/test_tree_adapter.py
new file mode 100644
index 0000000..6536bf3
--- /dev/null
+++ b/tests/test_tree_adapter.py
@@ -0,0 +1,192 @@
+import awkward as ak
+import numpy as np
+import pytest
+from pytest_lazyfixture import lazy_fixture
+
+from fast_carpenter.testing import FakeBEEvent
+import fast_carpenter.tree_adapter as tree_adapter
+from fast_carpenter.tree_adapter import ArrayMethods
+
+
+###############################################################################
+# Uproot3 tests
+###############################################################################
+
+
+@pytest.fixture
+def uproot3_adapter(uproot3_tree):
+    return tree_adapter.create({"adapter": "uproot3", "tree": uproot3_tree})
+
+
+def test_uproot3_num_entries(uproot3_tree, uproot3_adapter):
+    assert uproot3_adapter.num_entries == uproot3_tree.numentries
+
+
+def test_uproot3_getitem(uproot3_tree, uproot3_adapter):
+    assert ak.all(uproot3_adapter["Muon_Py"] == uproot3_tree["Muon_Py"].array())
+
+###############################################################################
+# Uproot4 tests
+###############################################################################
+
+
+@pytest.fixture
+def uproot4_adapter(uproot4_tree):
+    return tree_adapter.create({"adapter": "uproot4", "tree": uproot4_tree})
+
+
+@pytest.fixture
+def uproot4_ranged_adapter(uproot4_tree, event_range):
+    return tree_adapter.create_ranged(
+        {
+            "adapter": "uproot4",
+            "tree": uproot4_tree,
+            "start": event_range.start_entry,
+            "stop": event_range.stop_entry
+        }
+    )
+
+
+@pytest.fixture
+def uproot4_masked_adapter(uproot4_tree, event_range):
+    return tree_adapter.create_masked(
+        {
+            "adapter": "uproot4", "tree": uproot4_tree,
+            "start": event_range.start_entry, "stop": event_range.stop_entry,
+            "mask": [(i % 2) == 0 for i in range(event_range.start_entry, event_range.stop_entry)]
+        }
+    )
+
+
+def test_uproot4_num_entries(uproot4_tree, uproot4_adapter):
+    assert uproot4_adapter.num_entries == uproot4_tree.num_entries
+
+
+def test_uproot4_getitem(uproot4_tree, uproot4_adapter):
+    assert ak.all(uproot4_adapter["Muon_Py"] == uproot4_tree["Muon_Py"].array())
+
+
+def test_uproot4_evaluate(uproot4_tree, uproot4_adapter):
+    result = uproot4_adapter.evaluate("Muon_Py * NMuon")
+    assert ak.num(result, axis=0) == ak.num(uproot4_tree["Muon_Py"].array(), axis=0)
+
+
+def test_uproot4_range(uproot4_tree, uproot4_ranged_adapter, event_range):
+    assert uproot4_ranged_adapter.num_entries == event_range.entries_in_block
+
+
+def test_uproot4_add_retrieve(uproot4_tree, uproot4_ranged_adapter):
+    muon_px = uproot4_ranged_adapter["Muon_Px"]
+    assert ArrayMethods.filtered_len(muon_px) == len(uproot4_ranged_adapter)
+
+    muon_py, muon_pz = uproot4_ranged_adapter.arrays(["Muon_Py", "Muon_Pz"], how=tuple)
+    muon_momentum = np.hypot(muon_py, muon_pz)
+    uproot4_ranged_adapter.new_variable("Muon_momentum", muon_momentum)
+
+    retrieve_momentum = uproot4_ranged_adapter["Muon_momentum"]
+    assert len(retrieve_momentum) == len(muon_momentum)
+    assert ak.all(ak.flatten(retrieve_momentum) == ak.flatten(muon_momentum))
+
+
+def test_overwrite(uproot4_ranged_adapter):
+    muon_px = uproot4_ranged_adapter["Muon_Px"]
+    assert ("Muon_Px" in uproot4_ranged_adapter)
+    with pytest.raises(ValueError) as err:
+        uproot4_ranged_adapter.new_variable("Muon_Px", muon_px / muon_px)
+    assert "Muon_Px" in str(err)
+
+
+def test_to_pandas(full_wrapped_tree):
+    chunk = FakeBEEvent(full_wrapped_tree, "mc")
+    inputs = ['Electron_Px', 'Electron_Py', 'EventWeight']
+    df = ArrayMethods.to_pandas(chunk.tree, inputs)
+    assert list(df.keys()) == inputs
+
+
+def test_arraydict_to_pandas_with_new_variable(uproot4_ranged_adapter):
+    muon_py, muon_pz = uproot4_ranged_adapter.arrays(["Muon_Py", "Muon_Pz"], how=tuple)
+    muon_momentum = np.hypot(muon_py, muon_pz)
+    uproot4_ranged_adapter.new_variable("Muon_momentum", muon_momentum)
+
+    inputs = ['Muon_Py', 'Muon_Pz', 'Muon_momentum']
+    arrays = {
+        'Muon_Py': muon_py,
+        'Muon_Pz': muon_pz,
+        'Muon_momentum': muon_momentum,
+    }
+    df = ArrayMethods.arraydict_to_pandas(arrays)
+
+    assert list(df.keys()) == inputs
+    assert len(df) == ak.count_nonzero(muon_py)
+
+
+@pytest.mark.parametrize(
+    "tree_under_test",
+    [
+        lazy_fixture("uproot4_adapter"),
+        lazy_fixture("uproot4_ranged_adapter"),
+        lazy_fixture("uproot4_masked_adapter"),
+    ]
+)
+def test_to_pandas_with_new_variable(tree_under_test):
+    muon_py, muon_pz = tree_under_test.arrays(["Muon_Py", "Muon_Pz"], how=tuple)
+    muon_momentum = np.hypot(muon_py, muon_pz)
+    assert len(muon_momentum) == len(muon_py)
+    tree_under_test.new_variable("Muon_momentum", muon_momentum)
+
+    inputs = ['Muon_Py', 'Muon_Pz', 'Muon_momentum']
+    df = ArrayMethods.to_pandas(tree_under_test, inputs)
+
+    assert list(df.keys()) == inputs
+    assert len(df) == ak.count_nonzero(muon_py)
+
+
+@pytest.mark.parametrize(
+    "tree_under_test, how",
+    [
+        (lazy_fixture("uproot4_adapter"), tuple),
+        (lazy_fixture("uproot4_adapter"), list),
+        (lazy_fixture("uproot4_ranged_adapter"), tuple),
+        (lazy_fixture("uproot4_ranged_adapter"), list),
+        (lazy_fixture("uproot4_masked_adapter"), tuple),
+        (lazy_fixture("uproot4_masked_adapter"), list),
+    ]
+)
+def test_arrays_to_tuple_or_list(tree_under_test, how):
+    muon_py, muon_pz = tree_under_test.arrays(["Muon_Py", "Muon_Pz"], how=tuple)
+    muon_momentum = np.hypot(muon_py, muon_pz)
+    tree_under_test.new_variable("Muon_momentum", muon_momentum)
+    _, _, muon_momentum_new = tree_under_test.arrays(["Muon_Py", "Muon_Pz", "Muon_momentum"], how=how)
+    assert ak.all(muon_momentum_new == muon_momentum)
+
+
+@pytest.mark.parametrize(
+    "tree_under_test",
+    [
+        lazy_fixture("uproot4_adapter"),
+        lazy_fixture("uproot4_ranged_adapter"),
+        lazy_fixture("uproot4_masked_adapter"),
+    ]
+)
+def test_arrays_to_dict(tree_under_test):
+    muon_py, muon_pz = tree_under_test.arrays(["Muon_Py", "Muon_Pz"], how=tuple)
+    muon_momentum = np.hypot(muon_py, muon_pz)
+    tree_under_test.new_variable("Muon_momentum", muon_momentum)
+    array_dict = tree_under_test.arrays(["Muon_Py", "Muon_Pz", "Muon_momentum"], how=dict)
+    assert ak.all(array_dict["Muon_momentum"] == muon_momentum)
+
+
+@pytest.mark.parametrize(
+    "tree_under_test",
+    [
+        lazy_fixture("uproot4_adapter"),
+        lazy_fixture("uproot4_ranged_adapter"),
+        lazy_fixture("uproot4_masked_adapter"),
+    ]
+)
+def test_arrays_as_np_lists(tree_under_test):
+    muon_py, muon_pz = tree_under_test.arrays(["Muon_Py", "Muon_Pz"], how=tuple)
+    muon_momentum = np.hypot(muon_py, muon_pz)
+    tree_under_test.new_variable("Muon_momentum", muon_momentum)
+    np_array = ArrayMethods.arrays_as_np_array(tree_under_test, ["Muon_Py", "Muon_Pz", "Muon_momentum"], how=dict)
+    assert ak.all(np_array[-1] == muon_momentum)
diff --git a/tests/test_tree_adapter_masked.py b/tests/test_tree_adapter_masked.py
new file mode 100644
index 0000000..0bda6b3
--- /dev/null
+++ b/tests/test_tree_adapter_masked.py
@@ -0,0 +1,59 @@
+import awkward as ak
+
+
+def check_data(data, n_data, n_nonzero, n_mask):
+    assert len(data) == n_data
+    assert data.count_nonzero() == n_nonzero
+    if data.tree._mask is not None:
+        assert len(data.tree._mask) == n_mask
+
+
+def test_single_mask(fake_data_events, full_wrapped_tree, at_least_two_muons, tmpdir):
+    full_size = len(fake_data_events)
+    check_data(fake_data_events, n_data=full_size, n_nonzero=full_size, n_mask=full_size)
+    at_least_two_muons.event(fake_data_events)
+    check_data(fake_data_events, n_data=full_size, n_nonzero=289, n_mask=full_size)
+
+    mask = fake_data_events.tree._mask
+    assert ak.count_nonzero(mask) == 289
+    assert ak.all(fake_data_events.tree["Muon_Pz"] == ak.mask(full_wrapped_tree["Muon_Pz"], mask))
+
+
+def test_single_mask_twice(fake_data_events, full_wrapped_tree, at_least_two_muons, tmpdir):
+    full_size = len(fake_data_events)
+    check_data(fake_data_events, n_data=full_size, n_nonzero=full_size, n_mask=full_size)
+    # applying the same cut twice should not change the result
+    at_least_two_muons.event(fake_data_events)
+    at_least_two_muons.event(fake_data_events)
+    check_data(fake_data_events, n_data=full_size, n_nonzero=289, n_mask=full_size)
+
+    mask = fake_data_events.tree._mask
+    assert ak.count_nonzero(mask) == 289
+    assert ak.all(fake_data_events.tree["Muon_Pz"] == ak.mask(full_wrapped_tree["Muon_Pz"], mask))
+
+
+def test_complex_mask(fake_data_events, full_wrapped_tree, at_least_two_muons_plus, tmpdir):
+    full_size = len(fake_data_events)
+    check_data(fake_data_events, n_data=full_size, n_nonzero=full_size, n_mask=full_size)
+
+    at_least_two_muons_plus.event(fake_data_events)
+
+    check_data(fake_data_events, n_data=full_size, n_nonzero=2, n_mask=full_size)
+
+    mask = fake_data_events.tree._mask
+    assert ak.count_nonzero(mask) == 2
+    assert ak.all(fake_data_events.tree["Muon_Pz"] == ak.mask(full_wrapped_tree["Muon_Pz"], mask))
+
+
+def test_multi_mask(fake_data_events, full_wrapped_tree, at_least_two_muons, at_least_two_muons_plus, tmpdir):
+    full_size = len(fake_data_events)
+    check_data(fake_data_events, n_data=full_size, n_nonzero=full_size, n_mask=full_size)
+
+    at_least_two_muons.event(fake_data_events)
+    at_least_two_muons_plus.event(fake_data_events)
+
+    check_data(fake_data_events, n_data=full_size, n_nonzero=2, n_mask=full_size)
+
+    mask = fake_data_events.tree._mask
+    assert ak.count_nonzero(mask) == 2
+    assert ak.all(fake_data_events.tree["Muon_Pz"] == ak.mask(full_wrapped_tree["Muon_Pz"], mask))
diff --git a/tests/test_tree_adapter_ranged.py b/tests/test_tree_adapter_ranged.py
new file mode 100644
index 0000000..822b409
--- /dev/null
+++ b/tests/test_tree_adapter_ranged.py
@@ -0,0 +1,22 @@
+import pytest
+from pytest_lazyfixture import lazy_fixture
+
+from fast_carpenter import tree_adapter
+
+
+@pytest.mark.parametrize(
+    "tree, start, stop, expected_num_entries",
+    [
+        (lazy_fixture("input_tree"), 0, -1, lazy_fixture("size_of_test_sample")),
+        (lazy_fixture("input_tree"), 0, lazy_fixture("size_of_test_sample"), lazy_fixture("size_of_test_sample")),
+        (lazy_fixture("input_tree"), 100, 300, 200),
+    ]
+)
+def test_ranged(tree, start, stop, expected_num_entries):
+    test_tree = tree_adapter.create_ranged(
+        {
+            "adapter": "uproot4", "tree": tree,
+            "start": start, "stop": stop,
+        }
+    )
+    assert len(test_tree) == expected_num_entries
diff --git a/tests/test_tree_wrapper.py b/tests/test_tree_wrapper.py
index f1dffff..c9a5af1 100644
--- a/tests/test_tree_wrapper.py
+++ b/tests/test_tree_wrapper.py
@@ -1,16 +1,21 @@
 import pytest
 import numpy as np
 
+from fast_carpenter import tree_adapter
+
+ArrayMethods = tree_adapter.Uproot4Methods
+
 
 def test_add_retrieve(wrapped_tree):
     muon_px = wrapped_tree.array("Muon_Px")
-    assert len(muon_px) == 100
+    assert len(muon_px) == 4580
+    assert ArrayMethods.filtered_len(muon_px) == 100
 
     muon_py, muon_pz = wrapped_tree.arrays(["Muon_Py", "Muon_Pz"], outputtype=tuple)
     muon_momentum = np.hypot(muon_py, muon_pz)
     wrapped_tree.new_variable("Muon_momentum", muon_momentum)
     retrieve_momentum = wrapped_tree.array("Muon_momentum")
-    assert (retrieve_momentum == muon_momentum).flatten().all()
+    assert ArrayMethods.all(retrieve_momentum == muon_momentum, axis=None)
 
 
 def test_overwrite(wrapped_tree):
@@ -18,4 +23,4 @@ def test_overwrite(wrapped_tree):
     with pytest.raises(ValueError) as err:
         wrapped_tree.new_variable("Muon_Px", muon_px / muon_px)
     assert "Muon_Px" in str(err)
-    assert len(wrapped_tree.keys(filtername=lambda x: x.decode() == "Muon_Px")) == 1
+    # assert len(wrapped_tree.keys(filtername=lambda x: x.decode() == "Muon_Px")) == 1
diff --git a/tests/test_weigths.py b/tests/test_weigths.py
new file mode 100644
index 0000000..2240200
--- /dev/null
+++ b/tests/test_weigths.py
@@ -0,0 +1,44 @@
+import pytest
+
+import awkward as ak
+
+from fast_carpenter.weights import get_unweighted_increment, get_weighted_increment, extract_weights
+
+
+@pytest.fixture
+def event_weights():
+    weights = ak.Array(
+        [
+            [1, 2, 3],  # event level weights
+            [2, 5, 7],  # event level weights
+            # [[0.4], [0.5, 0.6], [0.7, 0.8, 0.9]],  # object, e.g. muon, level weights
+        ]
+    )
+    # TODO: to support awkward arrays as weights, simply remove the "to_numpy"
+    return weights.to_numpy()
+
+
+@pytest.fixture
+def event_mask():
+    return ak.Array([True, False, True])
+
+
+def test_unweighted_increment(event_weights):
+    inc = get_unweighted_increment(event_weights, None)
+    assert inc == len(event_weights[0])
+
+
+def test_weighted_increment(event_weights):
+    inc = get_weighted_increment(event_weights, None)
+    assert ak.all(inc == [6, 14])
+
+
+def test_extract_weights(fake_data_events):
+    weights = extract_weights(fake_data_events, ["EventWeight"])
+    assert len(weights) == 1
+    assert len(weights[0]) == len(fake_data_events)
+
+
+def test_extract_weights_no_weights(fake_data_events):
+    weights = extract_weights(fake_data_events, [])
+    assert len(weights) == 0