Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Local score low pass filtering #151

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
11 changes: 11 additions & 0 deletions PyHa/IsoAutio.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .microfaune_package.microfaune import audio
from .tweetynet_package.tweetynet.TweetyNetModel import TweetyNetModel
from .tweetynet_package.tweetynet.Load_data_functions import compute_features, predictions_to_kaleidoscope
from .dsp_tools import local_score_filtering
import os
import torch
import librosa
Expand Down Expand Up @@ -179,6 +180,16 @@ def isolate(
# local_scores[ndx] = local_scores[ndx] / local_scores_max
# initializing the output dataframe that will contain labels across a
# single clip

# Filtering the local score arrays if desired
if "filter_local_scores" in dict.fromkeys(isolation_parameters):
assert isinstance(isolation_parameters["filter_local_scores"],tuple)
assert len(isolation_parameters["filter_local_scores"]) == 2
normalized_cutoff_freq = isolation_parameters["filter_local_scores"][0]
order = isolation_parameters["filter_local_scores"][1]
local_scores = local_score_filtering(local_scores,normalized_cutoff_freq,order)


isolation_df = pd.DataFrame()

# deciding which isolation technique to deploy for a given clip based on
Expand Down
4 changes: 3 additions & 1 deletion PyHa/annotation_post_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def annotation_chunker(kaleidoscope_df, chunk_length):

#Init list of clips to cycle through and output dataframe
clips = kaleidoscope_df["IN FILE"].unique()
df_columns = {'IN FILE' :'str', 'CLIP LENGTH' : 'float64', 'CHANNEL' : 'int64', 'OFFSET' : 'float64',
df_columns = {'FOLDER': 'str','IN FILE' :'str', 'CLIP LENGTH' : 'float64', 'CHANNEL' : 'int64', 'OFFSET' : 'float64',
'DURATION' : 'float64', 'SAMPLE RATE' : 'int64','MANUAL ID' : 'str'}
output_df = pd.DataFrame({c: pd.Series(dtype=t) for c, t in df_columns.items()})

Expand All @@ -34,6 +34,7 @@ def annotation_chunker(kaleidoscope_df, chunk_length):
birds = clip_df["MANUAL ID"].unique()
sr = clip_df["SAMPLE RATE"].unique()[0]
clip_len = clip_df["CLIP LENGTH"].unique()[0]
folder = clip_df["FOLDER"].unique()[0]

# quick data sanitization to remove very short clips
# do not consider any chunk that is less than chunk_length
Expand Down Expand Up @@ -68,6 +69,7 @@ def annotation_chunker(kaleidoscope_df, chunk_length):
row = pd.DataFrame(index = [0])
annotation_start = chunk_start / 1000
#updating the dictionary
row["FOLDER"] = folder
row["IN FILE"] = clip
row["CLIP LENGTH"] = clip_len
row["OFFSET"] = annotation_start
Expand Down
105 changes: 105 additions & 0 deletions PyHa/dsp_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
from scipy.signal import butter,filtfilt
from scipy.fft import fft
import matplotlib.pyplot as plt
import numpy as np


def build_low_pass_filter(normalized_cutoff, order):
"""
Scipy butterworth function wrapper that enables us to generate low pass filter coefficients
to filter the high frequency noise observed in the CNN-RNN local score arrays.

Args:
normalized_cutoff (float)
- Specifies what percentage of the frequency domain will be in the passband

order (int)
- Controls how many coefficients will be produced, the higher the order,
the more effective the filtering will be, but that comes with a time tradeoff

returns:
- numerator and denominator coefficients of low pass filter (ndarray)
"""
assert isinstance(normalized_cutoff, float)
assert normalized_cutoff > 0.0 and normalized_cutoff < 1.0
assert isinstance(order, int)

b, a = butter(order, normalized_cutoff, btype='low', analog=False)
return b, a

def filter_data(local_score_arr,b,a):
"""
Scipy filtering function wrapper that guarantees that the input is the same length as
the output after performing convolution on the local score array with the coefficients
outputted by build_low_pass_filter

Args:
local_score_arr (list):
- Audio timestep classifications that are the usual output of a CNN-RNN model

b (list):
- Numerator coefficients of low pass filter

a (list):
- Denominator coefficients of low pass filter

returns:
- Local score array that has been filtered by a low pass filter

"""
assert isinstance(local_score_arr,np.ndarray) or isinstance(local_score_arr,list)
assert isinstance(b, np.ndarray)
assert isinstance(a, np.ndarray)

return filtfilt(b,a,local_score_arr)

def local_score_filtering(local_score_arr, normalized_cutoff, order):
"""
Wrapper function for build_low_pass_filter() and filter_data() functions because not everyone
has a DSP background.

Args:
local_score_arr (list):
- Audio timestep classifications that are the usual output of a CNN-RNN model

normalized_cutoff (float):
- Specifies what percentage of the frequency domain will be in the passband

order (int):
- Controls how many coefficients will be produced, the higher the order,
the more effective the filtering will be, but that comes with a time tradeoff

returns:
- local score array that has been filtered by a low pass filter

"""
assert isinstance(local_score_arr,np.ndarray) or isinstance(local_score_arr,list)
assert isinstance(normalized_cutoff,float)
assert normalized_cutoff > 0 and normalized_cutoff < 1
b, a = build_low_pass_filter(normalized_cutoff=normalized_cutoff, order=order)

return filter_data(b=b,a=a,local_score_arr=local_score_arr)

# helper function that can help people understand the frequency domain of their local score arrays.
#def local_score_freq_domain(local_scores,save_fig=False,fig_name=None, a=None, b=None):
# if a is not None and b is not None:
# local_scores = filter_data(local_scores,b,a)
#
# local_score_freq = fft(local_scores)
# plt.subplot(2,1,1)
# plt.plot(local_scores)
# plt.title("Local Score Array")
# plt.xlabel("20ms timestep count")
# plt.ylabel("Timestep Score")
# plt.subplot(2,1,2)
# plt.plot(np.log(np.abs(local_score_freq[0:int(len(local_score_freq)/2)])))
# plt.title("Local Score Array Frequency Representation")
# plt.ylabel("Log Power")
# plt.xlabel("FFT")
# plt.grid()
# plt.tight_layout()
# if save_fig and fig_name is not None:
# plt.savefig(fig_name)
# else:
# plt.show()
# plt.clf()
2 changes: 1 addition & 1 deletion PyHa/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def automated_labeling_statistics(
print("Processed", num_processed, "clips in", int((time.time() - start_time) * 10) / 10.0, 'seconds')
start_time = time.time()
if num_errors > 0:
checkVerbose("Something went wrong with" + num_errors + "clips out of" + str(len(clips)) + "clips", verbose)
checkVerbose("Something went wrong with " + str(num_errors) + " clips out of " + str(len(clips)) + " clips", verbose)
statistics_df.reset_index(inplace=True, drop=True)
return statistics_df

Expand Down
9 changes: 8 additions & 1 deletion PyHa/visualizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .microfaune_package.microfaune import audio
from .tweetynet_package.tweetynet.TweetyNetModel import TweetyNetModel
from .tweetynet_package.tweetynet.Load_data_functions import compute_features
from .dsp_tools import local_score_filtering
import torch
import librosa
import matplotlib.pyplot as plt
Expand Down Expand Up @@ -185,7 +186,7 @@ def local_line_graph(
None
"""

assert isinstance(local_scores,list)
assert isinstance(local_scores,list) or isinstance(local_scores,np.ndarray)
assert isinstance(clip_name,str)
assert isinstance(sample_rate,int)
assert sample_rate > 0
Expand Down Expand Up @@ -453,6 +454,12 @@ def spectrogram_visualization(

# If local scores were generated, plot them AND spectrogram
if (local_scores is not None):
if "filter_local_scores" in dict.fromkeys(isolation_parameters):
assert isinstance(isolation_parameters["filter_local_scores"],tuple)
assert len(isolation_parameters["filter_local_scores"]) == 2
normalized_cutoff_freq = isolation_parameters["filter_local_scores"][0]
order = isolation_parameters["filter_local_scores"][1]
local_scores = local_score_filtering(local_scores,normalized_cutoff_freq,order)
local_line_graph(
local_scores,
clip_path,
Expand Down
535 changes: 247 additions & 288 deletions PyHa_Tutorial.ipynb

Large diffs are not rendered by default.