Skip to content

Commit

Permalink
Merge pull request #352 from lsst/tickets/OPSIM-1065
Browse files Browse the repository at this point in the history
Tickets/opsim 1065
  • Loading branch information
yoachim authored Aug 23, 2023
2 parents 794a546 + f1e73d8 commit f20f5cb
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 38 deletions.
30 changes: 15 additions & 15 deletions rubin_sim/maf/slicers/one_d_slicer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# oneDSlicer - slices based on values in one data column in sim_data.

__all__ = ("OneDSlicer",)

import warnings
Expand Down Expand Up @@ -102,14 +100,14 @@ def setup_slicer(self, sim_data, maps=None):
"A safer choice is to use a separate OneDSlicer for each MetricBundle."
)
warnings.warn(warning_msg)
sliceCol = sim_data[self.slice_col_name]
slice_col = sim_data[self.slice_col_name]
# Set bins from data or specified values, if they were previously defined.
if self.bins is None:
# Set bin min/max values (could have been set in __init__)
if self.bin_min is None:
self.bin_min = np.nanmin(sliceCol)
self.bin_min = np.nanmin(slice_col)
if self.bin_max is None:
self.bin_max = np.nanmax(sliceCol)
self.bin_max = np.nanmax(slice_col)
# Give warning if bin_min = bin_max, and do something at least slightly reasonable.
if self.bin_min == self.bin_max:
warnings.warn(
Expand All @@ -121,7 +119,7 @@ def setup_slicer(self, sim_data, maps=None):
else:
self.bin_max = self.bin_max + 1
if self.bin_size is None:
bins = optimal_bins(sliceCol, self.bin_min, self.bin_max)
bins = optimal_bins(slice_col, self.bin_min, self.bin_max)
nbins = np.round(bins)
self.bin_size = (self.bin_max - self.bin_min) / float(nbins)
# Set bins
Expand All @@ -136,20 +134,22 @@ def setup_slicer(self, sim_data, maps=None):
self.slice_points["bins"] = self.bins
# Add metadata from map if needed.
self._run_maps(maps)
# Set up data slicing.
self.sim_idxs = np.argsort(sim_data[self.slice_col_name])
simFieldsSorted = np.sort(sim_data[self.slice_col_name])
# "left" values are location where simdata[i-1] < bins <= simdata[i]
self.left = np.searchsorted(simFieldsSorted, self.bins, "left")
# "right" values are locations where simdata[i-1] <= bins < simdata[i]
# This lets us have a closed final bin
self.right = np.searchsorted(simFieldsSorted, self.bins, "right")

indxs = np.argsort(sim_data[self.slice_col_name])
data_sorted = sim_data[self.slice_col_name][indxs]

# Setting up slices such that left_edge <= data < right_edge
# in each slice.
left = np.searchsorted(data_sorted, self.bins[0:-1], "left")
right = np.searchsorted(data_sorted, self.bins[1:], "left")

self.sim_idxs = [indxs[le:ri] for le, ri in zip(left, right)]

# Set up _slice_sim_data method for this class.
@wraps(self._slice_sim_data)
def _slice_sim_data(islice):
"""Slice sim_data on oneD sliceCol, to return relevant indexes for slice_point."""
idxs = self.sim_idxs[self.left[islice] : self.right[islice + 1]]
idxs = self.sim_idxs[islice]
bin_left = self.bins[islice]
bin_right = self.bins[islice + 1]
return {
Expand Down
44 changes: 21 additions & 23 deletions tests/maf/test_onedslicer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
from rubin_sim.maf.slicers import OneDSlicer, UniSlicer


def make_data_values(size=100, min=0.0, max=1.0, random=-1):
def make_data_values(size=100, min_val=0.0, max_val=1.0, random=-1):
"""Generate a simple array of numbers, evenly arranged between min/max, but (optional) random order."""
datavalues = np.arange(0, size, dtype="float")
datavalues *= (float(max) - float(min)) / (datavalues.max() - datavalues.min())
datavalues += min
datavalues *= (float(max_val) - float(min_val)) / (datavalues.max() - datavalues.min())
datavalues += min_val
if random > 0:
rng = np.random.RandomState(random)
randorder = rng.rand(size)
Expand Down Expand Up @@ -206,28 +206,8 @@ def test_slicing(self):
dvmax = 1
stepsize = 0.01
bins = np.arange(dvmin, dvmax + stepsize / 2, stepsize)
nbins = len(bins) - 1
# Test that testbinner raises appropriate error before it's set up (first time)
self.assertRaises(NotImplementedError, self.testslicer._slice_sim_data, 0)
# test that random values are split equally into all bins (when bins and data cover same range)
for nvalues in (1000, 10000, 100000):
dv = make_data_values(nvalues, dvmin, dvmax, random=560)
self.testslicer = OneDSlicer(slice_col_name="testdata", bins=bins)
self.testslicer.setup_slicer(dv)
sum = 0
for i, s in enumerate(self.testslicer):
idxs = s["idxs"]
data_slice = dv["testdata"][idxs]
sum += len(idxs)
if len(data_slice) > 0:
self.assertEqual(len(data_slice), nvalues / float(nbins))
else:
self.assertGreater(
len(data_slice),
0,
msg="Data in test case expected to always be > 0 len after slicing",
)
self.assertTrue(sum, nvalues)
# test that we're not dumping extra visits into the 'last bin' (that it's closed both sides)
nvalues = 1000
dv = make_data_values(nvalues, 0, 2, random=560)
Expand All @@ -238,6 +218,24 @@ def test_slicing(self):
for i, s in enumerate(self.testslicer):
self.assertEqual(len(s["idxs"]), nvalues / float(nbins) / 2.0)

def test_night_slicing(self):
"""Test slicing with 'night' column"""
numval = 1000
vals = np.arange(0, numval, 1) / (numval / 50)
nights = np.floor(vals)
np.random.shuffle(nights)
# If you were doing by-hand comparison, this can be useful
# bins = np.arange(0, 50.5, 1)
# vals, bb = np.histogram(nights, bins=bins)
vals = make_data_values(len(nights), random=10)
vals = np.array(list(zip(nights, vals["testdata"])), dtype=[("night", "int"), ("testdata", "float")])
test_slicer = OneDSlicer(slice_col_name="night", bins=np.arange(0, 50.5, 1))
test_slicer.setup_slicer(vals)
for s in test_slicer:
self.assertTrue(vals[s["idxs"]]["night"].min() >= s["slice_point"]["bin_left"])
self.assertTrue(vals[s["idxs"]]["night"].max() < s["slice_point"]["bin_right"])
self.assertEqual(len(s["idxs"]), numval / 50)


if __name__ == "__main__":
unittest.main()

0 comments on commit f20f5cb

Please sign in to comment.