Skip to content

Commit

Permalink
Merge pull request #70 from oscarbranson/dev
Browse files Browse the repository at this point in the history
0.3.25 - filtering and export fixes
  • Loading branch information
oscarbranson authored Jul 30, 2021
2 parents d6125df + 85a0810 commit ccdaa9b
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 29 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
# Changelog
All significant changes to the software will be documented here.

## [0.3.25] - 30/07/2021

### Changed
- Improvements to analyte handling in filt_obj to play nicely with custom denominators.
- Improved robustness of `sample_stats` and `export_traces` at all stages
- Added tests for `sample_stats`, `export_traces` and `minimal_export`


## [0.3.24] 29/07/2021

### Changed
Expand All @@ -13,6 +21,7 @@ All significant changes to the software will be documented here.

### Changed
- Fixes to handling of SRMs with missing analytes.
- moved tests to GitHub Workflows

## [0.3.22] 27/05/2021

Expand Down
13 changes: 8 additions & 5 deletions latools/D_obj.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,9 +166,9 @@ def __init__(self, data_file=None, dataformat=None, errorhunt=False, cmap=None,
self.bkgrng = np.array([]).reshape(0, 2)
self.sigrng = np.array([]).reshape(0, 2)

# set up filtering environment
# set up blank filtering object
self._init_filts()
# self.filt = filt(self.Time.size, self.analytes)
self.filt = None

if errorhunt:
print(' -> OK')
Expand All @@ -184,7 +184,7 @@ def _analyte_checker(self, analytes=None, check_ratios=True, single=False, focus
def analytes_sorted(self, analytes=None, check_ratios=True, single=False, focus_stage=None):
return sorted(self._analyte_checker(analytes=analytes, check_ratios=check_ratios, single=single, focus_stage=focus_stage), key=analyte_sort_fn)

def _init_filts(self, analytes):
def _init_filts(self, analytes=None):
self.filt = filt(self.Time.size, analytes)

@_log
Expand Down Expand Up @@ -592,9 +592,10 @@ def calibrate(self, calib_ps, analyte_ratios=None):
c = 0

self.data['calibrated'][a] = self.data['ratios'][a] * m + c

self.filt.add_to_table(a)

# initialise filtering framework
self._init_filts(self.analyte_ratios)
# self._init_filts(self.analyte_ratios)

self.setfocus('calibrated')
return
Expand Down Expand Up @@ -666,6 +667,8 @@ def sample_stats(self, analytes=None, filt=True,
for n, f in stat_fns.items():
self.stats[n] = []
for a in analytes:
if a not in self.data[focus_stage]:
continue
ind = self.filt.grab_filt(filt, a)
dat = nominal_values(self.data[focus_stage][a])
if eachtrace:
Expand Down
2 changes: 1 addition & 1 deletion latools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from .helpers import chemistry
from . import preprocessing

__version__ = '0.3.24'
__version__ = '0.3.25'

def cite(output='text'):
"""
Expand Down
92 changes: 71 additions & 21 deletions latools/filtering/filt_obj.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ class filt(object):

def __init__(self, size, analytes):
self.size = size
self.analytes = analytes
self.maxset = -1

findex = pd.MultiIndex(levels=[[], []], codes=[[], []], names=['N', 'filter'])
self.fnames = []
self.filter_table = pd.DataFrame(index=findex, columns=self.analytes)
self.filter_table = pd.DataFrame(index=findex, columns=analytes)
self.analytes = self.filter_table.columns
self.filter_components = pd.DataFrame(index=np.arange(size), columns=findex)

self.param = Bunch()
Expand All @@ -67,6 +67,58 @@ def __init__(self, size, analytes):

self.N = 0

def check_analytes(self, analytes=None, single=False, allow_multiples=False):
"""
Checks analyte name and matches it to correct filter.
Necessary because of distinction between analyte and ratio names.
Parameters
==========
analytes : str or array-like
The analyte(s) to check.
single : bool
If true a single analyte is returned as a string
allow_multiples : bool
If True, analytes that link to multiple possible filter
analytes are allowed.
Returns
=======
set : containing valid analytes
"""
if analytes is None:
return set(self.analytes.values)

if isinstance(analytes, str):
analytes = [analytes]

valid = set()

for analyte in analytes:
if analyte in self.analytes:
valid.update([analyte])

if '_' not in analyte:
candidates = set()
for a in self.analytes:
if analyte in a:
candidates.update([analyte])
if len(candidates) == 1:
valid.update([candidates.pop()])
elif len(candidates) > 1:
if allow_multiples:
valid.update(candidates)
# valid.update([analyte])
else:
raise ValueError(f'{analyte} matches one than one analyte name: {candidates}. Please be more specific.')
if single:
if len(valid) == 0:
return analytes[0]
return valid.pop()
else:
return valid

def add(self, name, filt, info='', params=(), setn=None):
"""
Add filter.
Expand Down Expand Up @@ -129,6 +181,7 @@ def add_to_table(self, analyte, mode='all'):
self.filter_table.loc[:, analyte] = True
else:
self.filter_table.loc[:, analyte] = False
self.analytes = self.filter_table.columns

def clear(self):
"""
Expand All @@ -155,10 +208,7 @@ def on(self, analyte=None, filt=None):
-------
None
"""
if isinstance(analyte, str):
analyte = [analyte]
if analyte is None:
analyte = self.analytes
analyte = self.check_analytes(analyte)

if isinstance(filt, str):
# find filter name
Expand All @@ -182,10 +232,7 @@ def off(self, analyte=None, filt=None):
-------
None
"""
if isinstance(analyte, str):
analyte = [analyte]
if analyte is None:
analyte = self.analytes
analyte = self.check_analytes(analyte)

if isinstance(filt, str):
# find filter name
Expand Down Expand Up @@ -238,10 +285,7 @@ def make_analyte(self, analyte):
array_like
boolean filter
"""
if isinstance(analyte, str):
analyte = [analyte]
elif analyte is None:
analyte = self.analytes
analyte = self.check_analytes(analyte)

key = []
for n, f in self.filter_table[analyte].index[self.filter_table[analyte].any(1)]:
Expand Down Expand Up @@ -296,24 +340,21 @@ def make_keydict(self, analyte=None):
dict
containing the logical filter expression for each analyte.
"""
if isinstance(analyte, str):
analyte = [analyte]
elif analyte is None:
analyte = self.analytes
analyte = self.check_analytes(analyte)

for a in analyte:
key = []
for n, f in self.filter_table[a].index[self.filter_table[a]]:
key.append(f'{n}:{f}')
self.keydict[a] = ' & '.join(key)

def grab_filt(self, filt, analyte=None):
def grab_filt(self, filt, analyte=None, allow_multiples=True):
"""
Flexible access to specific filter using any key format.
Parameters
----------
f : str, dict or bool
filt : str, dict or bool
either logical filter expression, dict of expressions,
or a boolean
analyte : str
Expand All @@ -324,6 +365,15 @@ def grab_filt(self, filt, analyte=None):
array_like
boolean filter
"""
analyte = self.check_analytes(analyte, single=True)

if len(analyte) == 0:
return np.ones(self.size, dtype=bool)

if analyte not in self.analytes:
return np.ones(self.size, dtype=bool)
# print(f'Warning: {analyte} is not in filter table. No filters applied.')

if isinstance(filt, str):
if filt in self.fnames:
fkey = self.fuzzmatch(filt)
Expand All @@ -348,7 +398,7 @@ def grab_filt(self, filt, analyte=None):
elif filt:
ind = self.make_analyte(analyte)
else:
ind = ~np.zeros(self.size, dtype=bool)
ind = np.ones(self.size, dtype=bool)
return ind

def get_components(self, analyte):
Expand Down
10 changes: 8 additions & 2 deletions latools/latools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3833,6 +3833,9 @@ def sample_stats(self, analytes=None, filt=True,
Adds dict to analyse object containing samples, analytes and
functions and data.
"""
if 'autorange' not in self.stages_complete:
raise RuntimeError('Cannot calculate statistics until autorange has been run.')

analytes = self.analytes_sorted(analytes, focus_stage=focus_stage)

if focus_stage is None:
Expand Down Expand Up @@ -4154,17 +4157,20 @@ def export_traces(self, outdir=None, focus_stage=None, analytes=None,
'despiked': 'counts',
'bkgsub': 'background corrected counts',
'ratios': 'counts/count',
'calibrated': 'mol/mol'}
'calibrated': 'mol/mol',
'mass_fraction': 'mass fraction'}

if not os.path.isdir(outdir):
os.mkdir(outdir)

for s in samples:
d = self.data[s].data[focus_stage]
ind = self.data[s].filt.grab_filt(filt)
out = Bunch()

for a in analytes:
if a not in d:
continue
ind = self.data[s].filt.grab_filt(filt, a)
out[a] = nominal_values(d[a][ind])
if focus_stage not in ['rawdata', 'despiked']:
out[a + '_std'] = std_devs(d[a][ind])
Expand Down
31 changes: 31 additions & 0 deletions tests/test_latools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,16 @@
import pandas as pd
import latools as la

def test_export_fns(d, stage=''):
for fs in d.stages_complete:
if fs == 'autorange':
continue
try:
d.sample_stats(focus_stage=fs)
except Exception as e:
raise Exception(f"sample_stats failed after {stage} at focus_stage={fs}") from e
d.export_traces(focus_stage=fs)
d.minimal_export()

class test_latools(unittest.TestCase):
"""
Expand All @@ -20,6 +30,9 @@ class test_latools(unittest.TestCase):
d.autorange(on_mult=[1.5, 0.8],
off_mult=[0.8, 1.5])

# test export functions
test_export_fns(d, 'applying autorange')

# trace plotting
d.trace_plots(ranges=True)

Expand All @@ -32,15 +45,24 @@ class test_latools(unittest.TestCase):
# subtract background
d.bkg_subtract()

# test export functions
test_export_fns(d, 'background subtraction')

# ratio
d.ratio()

# test export functions
test_export_fns(d, 'calculating ratios')

# calibrate
d.calibrate(drift_correct=False, n_min=10,
srms_used=['NIST610', 'NIST612', 'NIST614'])
# calibration plot
fig, axs = d.calibration_plot()

# test export functions
test_export_fns(d, 'calibration')

# crossplot
fig, axs = d.crossplot(save=True)

Expand All @@ -55,9 +77,15 @@ class test_latools(unittest.TestCase):

d.filter_on('Albelow')

# test export functions
test_export_fns(d, 'applying filters')

# test custom denominator
d.ratio('Ba137', 'Ba138')

# test export functions
test_export_fns(d, 'calculating a custom ratio')

# calculate stats
d.sample_stats(stats=['mean', 'std', 'se', 'H15_mean', 'H15_std', 'H15_se'], filt=True)
s = d.getstats()
Expand All @@ -67,6 +95,9 @@ class test_latools(unittest.TestCase):
d.internal_standard_concs = pd.DataFrame(0.6, index=d.samples, columns=['int_stand_massfrac'])
d.calculate_mass_fraction()

# test export functions
test_export_fns(d, 'calculating mass fractions')

# minimal export
d.minimal_export()

Expand Down

0 comments on commit ccdaa9b

Please sign in to comment.