Skip to content

Commit

Permalink
Merge pull request #99 from FAST-HEP/BK_issue_87_ND-jagged-reductions
Browse files Browse the repository at this point in the history
Handle JaggedNth reductions for ND jagged arrays
  • Loading branch information
benkrikler authored Nov 1, 2019
2 parents 5f7654d + 99e9209 commit 65f5530
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 11 deletions.
6 changes: 4 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## unreleased
## Unreleased
### Added
### Changed
- Added support for variables with multiple dots in the name (nested branches). Issue #95, PR #97 [@kreczko](https://github.com/kreczko)
- Add protection against multiple dimensions using the same output name in a BinnedDataframe stage, Issue #92. [@benkrikler](https://github.com/benkrikler)
- Fix JaggedNth to work with arbitrary depth jagged arrays, Issue #87, PR #99 [@benkrikler](https://github.com/benkrikler)
- Add protection against multiple dimensions using the same output name in a BinnedDataframe stage, Issue #92, PR #100 [@benkrikler](https://github.com/benkrikler)

## [0.15.0] - 2019-10-27
### Added
Expand Down
35 changes: 26 additions & 9 deletions fast_carpenter/define/reductions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np
import six
from ..expressions import deconstruct_jaggedness, reconstruct_jaggedness


__all__ = ["get_pandas_reduction"]
Expand All @@ -14,17 +15,33 @@ def __init__(self, index, fill_missing, force_float=True):
self.index = index
self.fill_missing = fill_missing
self.dtype = None
if force_float and isinstance(fill_missing, int):
if fill_missing is True or fill_missing is False:
self.dtype = bool
else:
self.dtype = float
if fill_missing is True or fill_missing is False:
self.dtype = bool
elif force_float or isinstance(fill_missing, float):
self.dtype = float
else:
self.dtype = int

def __call__(self, array):
mask = array.counts > abs(self.index) - int(self.index < 0)
output = np.full(len(array), self.fill_missing, dtype=self.dtype)
output[mask] = array[mask, self.index]
return output
# The next two lines ought to be enough
# result = array.pad(abs(self.index) + int(self.index >= 0))
# result = result[..., self.index]

# Flatten out the first K-1 dimensions:
flat, counts = deconstruct_jaggedness(array, [])
result = reconstruct_jaggedness(flat, counts[:1])

# Now get the Nth item on the last dimension
result = result.pad(abs(self.index) + int(self.index >= 0))
result = result[..., self.index]

# Now replay the remaining dimensions on this
result = reconstruct_jaggedness(result, counts[1:])

if self.dtype is not None:
result = result.astype(self.dtype)
result = result.fillna(self.fill_missing)
return result


class JaggedMethod(object):
Expand Down
30 changes: 30 additions & 0 deletions tests/define/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,36 @@ def test_jagged_nth(jagged_1):
assert np.isnan(reduced[5])


def test_jagged_nth_3D(jagged_1):
fake_3d = [[np.arange(i + 1) + j
for i in range(j % 3)]
for j in range(5)]
fake_3d = JaggedArray.fromiter(fake_3d)
get_second = reductions.JaggedNth(1, np.nan)
reduced = get_second(fake_3d)
assert len(reduced[0]) == 0
assert len(reduced[1]) == 1
assert np.isnan(reduced[1])
assert len(reduced[2]) == 2
assert np.isnan(reduced[2][0])
assert reduced[2][1] == 3
assert len(reduced[3]) == 0
assert len(reduced[4]) == 1
assert np.isnan(reduced[4])

get_first = reductions.JaggedNth(0, np.nan)
reduced = get_first(fake_3d)
assert len(reduced[0]) == 0
assert len(reduced[1]) == 1
assert reduced[1][0] == 1
assert len(reduced[2]) == 2
assert reduced[2][0] == 2
assert reduced[2][1] == 2
assert len(reduced[3]) == 0
assert len(reduced[4]) == 1
assert reduced[4] == 4


def test_jagged_nth_negative(jagged_1):
get_first_second = reductions.JaggedNth(-1, np.nan)
reduced = get_first_second(jagged_1)
Expand Down

0 comments on commit 65f5530

Please sign in to comment.