Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PFNano additions for NanoEvents #427

Merged
merged 6 commits into from
Apr 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion coffea/nanoevents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,17 @@

"""
from coffea.nanoevents.factory import NanoEventsFactory
from coffea.nanoevents.schemas import BaseSchema, NanoAODSchema, TreeMakerSchema
from coffea.nanoevents.schemas import (
BaseSchema,
NanoAODSchema,
PFNanoAODSchema,
TreeMakerSchema,
)

__all__ = [
"NanoEventsFactory",
"BaseSchema",
"NanoAODSchema",
"PFNanoAODSchema",
"TreeMakerSchema",
]
18 changes: 12 additions & 6 deletions coffea/nanoevents/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,9 @@ def from_root(
)
uuidpfn = {partition_key[0]: tree.file.file_path}
mapping = UprootSourceMapping(
TrivialUprootOpener(uuidpfn, uproot_options), cache={}, access_log=access_log
TrivialUprootOpener(uuidpfn, uproot_options),
cache={},
access_log=access_log,
)
mapping.preload_column_source(partition_key[0], partition_key[1], tree)

Expand Down Expand Up @@ -208,9 +210,11 @@ def from_parquet(
TrivialParquetOpener(uuidpfn, parquet_options), access_log=access_log
)

format_ = 'parquet'
if 'ceph_config_path' in rados_parquet_options:
format_ = ds.RadosParquetFileFormat(rados_parquet_options['ceph_config_path'].encode())
format_ = "parquet"
if "ceph_config_path" in rados_parquet_options:
format_ = ds.RadosParquetFileFormat(
rados_parquet_options["ceph_config_path"].encode()
)

dataset = ds.dataset(file, schema=table_file.schema_arrow, format=format_)

Expand Down Expand Up @@ -340,13 +344,15 @@ def _from_mapping(
Arbitrary metadata to add to the `base.NanoEvents` object

"""
if not issubclass(schemaclass, BaseSchema):
raise RuntimeError("Invalid schema type")
if persistent_cache is not None:
mapping = CachedMapping(persistent_cache, mapping)
if metadata is not None:
base_form["parameters"]["metadata"] = metadata
if not callable(schemaclass):
raise ValueError("Invalid schemaclass type")
schema = schemaclass(base_form)
if not isinstance(schema, BaseSchema):
raise RuntimeError("Invalid schema type")
return cls(schema, mapping, tuple_to_key(partition_key), cache=runtime_cache)

def __len__(self):
Expand Down
4 changes: 4 additions & 0 deletions coffea/nanoevents/methods/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ class NanoCollection:
and other advanced mixin types.
"""

def _collection_name(self):
"""The name of the collection (i.e. the field under events where it is found)"""
return self.layout.purelist_parameter("collection_name")

def _getlistarray(self):
"""Do some digging to find the initial listarray"""

Expand Down
110 changes: 110 additions & 0 deletions coffea/nanoevents/methods/nanoaod.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,12 @@ def matched_muons(self):
def matched_gen(self):
return self._events().GenJet._apply_global_index(self.genJetIdxG)

@property
def constituents(self):
if "pFCandsIdxG" not in self.fields:
raise RuntimeError("PF candidates are only available for PFNano")
return self._events().JetPFCands._apply_global_index(self.pFCandsIdxG)


_set_repr_name("Jet")

Expand Down Expand Up @@ -336,6 +342,12 @@ def subjets(self):
def matched_gen(self):
return self._events().GenJetAK8._apply_global_index(self.genJetAK8IdxG)

@property
def constituents(self):
if "pFCandsIdxG" not in self.fields:
raise RuntimeError("PF candidates are only available for PFNano")
return self._events().FatJetPFCands._apply_global_index(self.pFCandsIdxG)


_set_repr_name("FatJet")

Expand All @@ -351,6 +363,99 @@ def r(self):

_set_repr_name("MissingET")


@awkward.mixin_class(behavior)
class Vertex(vector.ThreeVector, base.NanoCollection):
"""NanoAOD vertex object"""

pass


_set_repr_name("Vertex")


@awkward.mixin_class(behavior)
class SecondaryVertex(Vertex):
"""NanoAOD secondary vertex object"""

@property
def p4(self):
"""4-momentum vector of tracks associated to this SV"""
return awkward.zip(
{
"pt": self["pt"],
"eta": self["eta"],
"phi": self["phi"],
"mass": self["mass"],
},
with_name="PtEtaPhiMLorentzVector",
)


_set_repr_name("SecondaryVertex")


@awkward.mixin_class(behavior)
class AssociatedPFCand(base.NanoCollection):
"""PFNano PF candidate to jet association object"""

collection_map = {
"JetPFCands": ("Jet", "PFCands"),
"FatJetPFCands": ("FatJet", "PFCands"),
"GenJetCands": ("GenJet", "GenCands"),
"GenFatJetCands": ("GenJetAK8", "GenCands"),
}

@property
def jet(self):
collection = self._events()[self.collection_map[self._collection_name()][0]]
return collection._apply_global_index(self.jetIdxG)

@property
def pf(self):
collection = self._events()[self.collection_map[self._collection_name()][1]]
return collection._apply_global_index(self.pFCandsIdxG)


_set_repr_name("AssociatedPFCand")


@awkward.mixin_class(behavior)
class AssociatedSV(base.NanoCollection):
"""PFNano secondary vertex to jet association object"""

collection_map = {
"JetSVs": ("Jet", "SV"),
"FatJetSVs": ("FatJet", "SV"),
# these two are unclear
"GenJetSVs": ("GenJet", "SV"),
"GenFatJetSVs": ("GenJetAK8", "SV"),
}

@property
def jet(self):
collection = self._events()[self.collection_map[self._collection_name()][0]]
return collection._apply_global_index(self.jetIdxG)

@property
def sv(self):
collection = self._events()[self.collection_map[self._collection_name()][1]]
return collection._apply_global_index(self.sVIdxG)


_set_repr_name("AssociatedSV")


@awkward.mixin_class(behavior)
class PFCand(candidate.PtEtaPhiMCandidate, base.NanoCollection):
"""PFNano particle flow candidate object"""

pass


_set_repr_name("PFCand")


__all__ = [
"PtEtaPhiMCollection",
"GenParticle",
Expand All @@ -363,4 +468,9 @@ def r(self):
"Jet",
"FatJet",
"MissingET",
"Vertex",
"SecondaryVertex",
"AssociatedPFCand",
"AssociatedSV",
"PFCand",
]
4 changes: 2 additions & 2 deletions coffea/nanoevents/schemas/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .base import BaseSchema
from .nanoaod import NanoAODSchema
from .nanoaod import NanoAODSchema, PFNanoAODSchema
from .treemaker import TreeMakerSchema

__all__ = ["BaseSchema", "NanoAODSchema", "TreeMakerSchema"]
__all__ = ["BaseSchema", "NanoAODSchema", "PFNanoAODSchema", "TreeMakerSchema"]
Loading