diff --git a/CHANGELOG.md b/CHANGELOG.md index 5db5e8e..fba2700 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,14 @@ # Changelog All significant changes to the software will be documented here. +## [0.3.25] - 30/07/2021 + +### Changed +- Improvements to analyte handling in filt_obj to play nicely with custom denominators. +- Improved robustness of `sample_stats` and `export_traces` at all stages +- Added tests for `sample_stats`, `export_traces` and `minimal_export` + + ## [0.3.24] 29/07/2021 ### Changed @@ -13,6 +21,7 @@ All significant changes to the software will be documented here. ### Changed - Fixes to handling of SRMs with missing analytes. +- moved tests to GitHub Workflows ## [0.3.22] 27/05/2021 diff --git a/latools/D_obj.py b/latools/D_obj.py index 7ae354d..9c05f7d 100644 --- a/latools/D_obj.py +++ b/latools/D_obj.py @@ -166,9 +166,9 @@ def __init__(self, data_file=None, dataformat=None, errorhunt=False, cmap=None, self.bkgrng = np.array([]).reshape(0, 2) self.sigrng = np.array([]).reshape(0, 2) - # set up filtering environment + # set up blank filtering object + self._init_filts() # self.filt = filt(self.Time.size, self.analytes) - self.filt = None if errorhunt: print(' -> OK') @@ -184,7 +184,7 @@ def _analyte_checker(self, analytes=None, check_ratios=True, single=False, focus def analytes_sorted(self, analytes=None, check_ratios=True, single=False, focus_stage=None): return sorted(self._analyte_checker(analytes=analytes, check_ratios=check_ratios, single=single, focus_stage=focus_stage), key=analyte_sort_fn) - def _init_filts(self, analytes): + def _init_filts(self, analytes=None): self.filt = filt(self.Time.size, analytes) @_log @@ -592,9 +592,10 @@ def calibrate(self, calib_ps, analyte_ratios=None): c = 0 self.data['calibrated'][a] = self.data['ratios'][a] * m + c - + self.filt.add_to_table(a) + # initialise filtering framework - self._init_filts(self.analyte_ratios) + # self._init_filts(self.analyte_ratios) self.setfocus('calibrated') return @@ -666,6 +667,8 @@ def sample_stats(self, analytes=None, filt=True, for n, f in stat_fns.items(): self.stats[n] = [] for a in analytes: + if a not in self.data[focus_stage]: + continue ind = self.filt.grab_filt(filt, a) dat = nominal_values(self.data[focus_stage][a]) if eachtrace: diff --git a/latools/__init__.py b/latools/__init__.py index 4b0fb8a..41f3c1a 100644 --- a/latools/__init__.py +++ b/latools/__init__.py @@ -17,7 +17,7 @@ from .helpers import chemistry from . import preprocessing -__version__ = '0.3.24' +__version__ = '0.3.25' def cite(output='text'): """ diff --git a/latools/filtering/filt_obj.py b/latools/filtering/filt_obj.py index 1316535..4942dac 100644 --- a/latools/filtering/filt_obj.py +++ b/latools/filtering/filt_obj.py @@ -53,12 +53,12 @@ class filt(object): def __init__(self, size, analytes): self.size = size - self.analytes = analytes self.maxset = -1 findex = pd.MultiIndex(levels=[[], []], codes=[[], []], names=['N', 'filter']) self.fnames = [] - self.filter_table = pd.DataFrame(index=findex, columns=self.analytes) + self.filter_table = pd.DataFrame(index=findex, columns=analytes) + self.analytes = self.filter_table.columns self.filter_components = pd.DataFrame(index=np.arange(size), columns=findex) self.param = Bunch() @@ -67,6 +67,58 @@ def __init__(self, size, analytes): self.N = 0 + def check_analytes(self, analytes=None, single=False, allow_multiples=False): + """ + Checks analyte name and matches it to correct filter. + + Necessary because of distinction between analyte and ratio names. + + Parameters + ========== + analytes : str or array-like + The analyte(s) to check. + single : bool + If true a single analyte is returned as a string + allow_multiples : bool + If True, analytes that link to multiple possible filter + analytes are allowed. + + Returns + ======= + set : containing valid analytes + """ + if analytes is None: + return set(self.analytes.values) + + if isinstance(analytes, str): + analytes = [analytes] + + valid = set() + + for analyte in analytes: + if analyte in self.analytes: + valid.update([analyte]) + + if '_' not in analyte: + candidates = set() + for a in self.analytes: + if analyte in a: + candidates.update([analyte]) + if len(candidates) == 1: + valid.update([candidates.pop()]) + elif len(candidates) > 1: + if allow_multiples: + valid.update(candidates) + # valid.update([analyte]) + else: + raise ValueError(f'{analyte} matches one than one analyte name: {candidates}. Please be more specific.') + if single: + if len(valid) == 0: + return analytes[0] + return valid.pop() + else: + return valid + def add(self, name, filt, info='', params=(), setn=None): """ Add filter. @@ -129,6 +181,7 @@ def add_to_table(self, analyte, mode='all'): self.filter_table.loc[:, analyte] = True else: self.filter_table.loc[:, analyte] = False + self.analytes = self.filter_table.columns def clear(self): """ @@ -155,10 +208,7 @@ def on(self, analyte=None, filt=None): ------- None """ - if isinstance(analyte, str): - analyte = [analyte] - if analyte is None: - analyte = self.analytes + analyte = self.check_analytes(analyte) if isinstance(filt, str): # find filter name @@ -182,10 +232,7 @@ def off(self, analyte=None, filt=None): ------- None """ - if isinstance(analyte, str): - analyte = [analyte] - if analyte is None: - analyte = self.analytes + analyte = self.check_analytes(analyte) if isinstance(filt, str): # find filter name @@ -238,10 +285,7 @@ def make_analyte(self, analyte): array_like boolean filter """ - if isinstance(analyte, str): - analyte = [analyte] - elif analyte is None: - analyte = self.analytes + analyte = self.check_analytes(analyte) key = [] for n, f in self.filter_table[analyte].index[self.filter_table[analyte].any(1)]: @@ -296,10 +340,7 @@ def make_keydict(self, analyte=None): dict containing the logical filter expression for each analyte. """ - if isinstance(analyte, str): - analyte = [analyte] - elif analyte is None: - analyte = self.analytes + analyte = self.check_analytes(analyte) for a in analyte: key = [] @@ -307,13 +348,13 @@ def make_keydict(self, analyte=None): key.append(f'{n}:{f}') self.keydict[a] = ' & '.join(key) - def grab_filt(self, filt, analyte=None): + def grab_filt(self, filt, analyte=None, allow_multiples=True): """ Flexible access to specific filter using any key format. Parameters ---------- - f : str, dict or bool + filt : str, dict or bool either logical filter expression, dict of expressions, or a boolean analyte : str @@ -324,6 +365,15 @@ def grab_filt(self, filt, analyte=None): array_like boolean filter """ + analyte = self.check_analytes(analyte, single=True) + + if len(analyte) == 0: + return np.ones(self.size, dtype=bool) + + if analyte not in self.analytes: + return np.ones(self.size, dtype=bool) + # print(f'Warning: {analyte} is not in filter table. No filters applied.') + if isinstance(filt, str): if filt in self.fnames: fkey = self.fuzzmatch(filt) @@ -348,7 +398,7 @@ def grab_filt(self, filt, analyte=None): elif filt: ind = self.make_analyte(analyte) else: - ind = ~np.zeros(self.size, dtype=bool) + ind = np.ones(self.size, dtype=bool) return ind def get_components(self, analyte): diff --git a/latools/latools.py b/latools/latools.py index bfd0106..74d9dba 100644 --- a/latools/latools.py +++ b/latools/latools.py @@ -3833,6 +3833,9 @@ def sample_stats(self, analytes=None, filt=True, Adds dict to analyse object containing samples, analytes and functions and data. """ + if 'autorange' not in self.stages_complete: + raise RuntimeError('Cannot calculate statistics until autorange has been run.') + analytes = self.analytes_sorted(analytes, focus_stage=focus_stage) if focus_stage is None: @@ -4154,17 +4157,20 @@ def export_traces(self, outdir=None, focus_stage=None, analytes=None, 'despiked': 'counts', 'bkgsub': 'background corrected counts', 'ratios': 'counts/count', - 'calibrated': 'mol/mol'} + 'calibrated': 'mol/mol', + 'mass_fraction': 'mass fraction'} if not os.path.isdir(outdir): os.mkdir(outdir) for s in samples: d = self.data[s].data[focus_stage] - ind = self.data[s].filt.grab_filt(filt) out = Bunch() for a in analytes: + if a not in d: + continue + ind = self.data[s].filt.grab_filt(filt, a) out[a] = nominal_values(d[a][ind]) if focus_stage not in ['rawdata', 'despiked']: out[a + '_std'] = std_devs(d[a][ind]) diff --git a/tests/test_latools.py b/tests/test_latools.py index d39cd4b..fadf552 100644 --- a/tests/test_latools.py +++ b/tests/test_latools.py @@ -3,6 +3,16 @@ import pandas as pd import latools as la +def test_export_fns(d, stage=''): + for fs in d.stages_complete: + if fs == 'autorange': + continue + try: + d.sample_stats(focus_stage=fs) + except Exception as e: + raise Exception(f"sample_stats failed after {stage} at focus_stage={fs}") from e + d.export_traces(focus_stage=fs) + d.minimal_export() class test_latools(unittest.TestCase): """ @@ -20,6 +30,9 @@ class test_latools(unittest.TestCase): d.autorange(on_mult=[1.5, 0.8], off_mult=[0.8, 1.5]) + # test export functions + test_export_fns(d, 'applying autorange') + # trace plotting d.trace_plots(ranges=True) @@ -32,15 +45,24 @@ class test_latools(unittest.TestCase): # subtract background d.bkg_subtract() + # test export functions + test_export_fns(d, 'background subtraction') + # ratio d.ratio() + # test export functions + test_export_fns(d, 'calculating ratios') + # calibrate d.calibrate(drift_correct=False, n_min=10, srms_used=['NIST610', 'NIST612', 'NIST614']) # calibration plot fig, axs = d.calibration_plot() + # test export functions + test_export_fns(d, 'calibration') + # crossplot fig, axs = d.crossplot(save=True) @@ -55,9 +77,15 @@ class test_latools(unittest.TestCase): d.filter_on('Albelow') + # test export functions + test_export_fns(d, 'applying filters') + # test custom denominator d.ratio('Ba137', 'Ba138') + # test export functions + test_export_fns(d, 'calculating a custom ratio') + # calculate stats d.sample_stats(stats=['mean', 'std', 'se', 'H15_mean', 'H15_std', 'H15_se'], filt=True) s = d.getstats() @@ -67,6 +95,9 @@ class test_latools(unittest.TestCase): d.internal_standard_concs = pd.DataFrame(0.6, index=d.samples, columns=['int_stand_massfrac']) d.calculate_mass_fraction() + # test export functions + test_export_fns(d, 'calculating mass fractions') + # minimal export d.minimal_export()