UCSD-E4E · JacobGlennAyers · Mar 1, 2023 · Mar 1, 2023 · Mar 3, 2023 · Mar 3, 2023
diff --git a/PyHa/IsoAutio.py b/PyHa/IsoAutio.py
@@ -3,6 +3,7 @@
 from .microfaune_package.microfaune import audio
 from .tweetynet_package.tweetynet.TweetyNetModel import TweetyNetModel
 from .tweetynet_package.tweetynet.Load_data_functions import compute_features, predictions_to_kaleidoscope
+from .dsp_tools import local_score_filtering
 import os
 import torch
 import librosa
@@ -179,6 +180,16 @@ def isolate(
     #        local_scores[ndx] = local_scores[ndx] / local_scores_max
     # initializing the output dataframe that will contain labels across a
     # single clip
+
+    # Filtering the local score arrays if desired
+    if "filter_local_scores" in dict.fromkeys(isolation_parameters):
+        assert isinstance(isolation_parameters["filter_local_scores"],tuple)
+        assert len(isolation_parameters["filter_local_scores"]) == 2
+        normalized_cutoff_freq = isolation_parameters["filter_local_scores"][0]
+        order = isolation_parameters["filter_local_scores"][1]
+        local_scores = local_score_filtering(local_scores,normalized_cutoff_freq,order)
+
+
     isolation_df = pd.DataFrame()
 
     # deciding which isolation technique to deploy for a given clip based on

diff --git a/PyHa/annotation_post_processing.py b/PyHa/annotation_post_processing.py
@@ -24,7 +24,7 @@ def annotation_chunker(kaleidoscope_df, chunk_length):
 
     #Init list of clips to cycle through and output dataframe
     clips = kaleidoscope_df["IN FILE"].unique()
-    df_columns = {'IN FILE' :'str', 'CLIP LENGTH' : 'float64', 'CHANNEL' : 'int64', 'OFFSET' : 'float64',
+    df_columns = {'FOLDER': 'str','IN FILE' :'str', 'CLIP LENGTH' : 'float64', 'CHANNEL' : 'int64', 'OFFSET' : 'float64',
                 'DURATION' : 'float64', 'SAMPLE RATE' : 'int64','MANUAL ID' : 'str'}
     output_df = pd.DataFrame({c: pd.Series(dtype=t) for c, t in df_columns.items()})
 
@@ -34,6 +34,7 @@ def annotation_chunker(kaleidoscope_df, chunk_length):
         birds = clip_df["MANUAL ID"].unique()
         sr = clip_df["SAMPLE RATE"].unique()[0]
         clip_len = clip_df["CLIP LENGTH"].unique()[0]
+        folder = clip_df["FOLDER"].unique()[0]
 
         # quick data sanitization to remove very short clips
         # do not consider any chunk that is less than chunk_length
@@ -68,6 +69,7 @@ def annotation_chunker(kaleidoscope_df, chunk_length):
                     row = pd.DataFrame(index = [0])
                     annotation_start = chunk_start / 1000
                     #updating the dictionary
+                    row["FOLDER"] = folder
                     row["IN FILE"] = clip
                     row["CLIP LENGTH"] = clip_len
                     row["OFFSET"] = annotation_start

diff --git a/PyHa/dsp_tools.py b/PyHa/dsp_tools.py
@@ -0,0 +1,105 @@
+from scipy.signal import butter,filtfilt
+from scipy.fft import fft
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+def build_low_pass_filter(normalized_cutoff, order):
+    """
+    Scipy butterworth function wrapper that enables us to generate low pass filter coefficients
+    to filter the high frequency noise observed in the CNN-RNN local score arrays.
+
+    Args:
+        normalized_cutoff (float)
+            - Specifies what percentage of the frequency domain will be in the passband
+
+        order (int)
+            - Controls how many coefficients will be produced, the higher the order,
+            the more effective the filtering will be, but that comes with a time tradeoff
+
+        returns:
+            - numerator and denominator coefficients of low pass filter (ndarray)
+    """
+    assert isinstance(normalized_cutoff, float)
+    assert normalized_cutoff > 0.0 and normalized_cutoff < 1.0
+    assert isinstance(order, int)
+
+    b, a = butter(order, normalized_cutoff, btype='low', analog=False)
+    return b, a
+
+def filter_data(local_score_arr,b,a):
+    """
+    Scipy filtering function wrapper that guarantees that the input is the same length as
+    the output after performing convolution on the local score array with the coefficients
+    outputted by build_low_pass_filter
+
+    Args:
+        local_score_arr (list):
+            - Audio timestep classifications that are the usual output of a CNN-RNN model
+
+        b (list):
+            - Numerator coefficients of low pass filter
+
+        a (list):
+            - Denominator coefficients of low pass filter
+
+        returns:
+            - Local score array that has been filtered by a low pass filter
+
+    """
+    assert isinstance(local_score_arr,np.ndarray) or isinstance(local_score_arr,list)
+    assert isinstance(b, np.ndarray)
+    assert isinstance(a, np.ndarray)
+
+    return filtfilt(b,a,local_score_arr)
+
+def local_score_filtering(local_score_arr, normalized_cutoff, order):
+    """
+    Wrapper function for build_low_pass_filter() and filter_data() functions because not everyone
+    has a DSP background.
+
+    Args:
+        local_score_arr (list):
+            - Audio timestep classifications that are the usual output of a CNN-RNN model
+
+        normalized_cutoff (float):
+            - Specifies what percentage of the frequency domain will be in the passband
+
+        order (int):
+            - Controls how many coefficients will be produced, the higher the order,
+            the more effective the filtering will be, but that comes with a time tradeoff
+
+        returns:
+            - local score array that has been filtered by a low pass filter
+
+    """
+    assert isinstance(local_score_arr,np.ndarray) or isinstance(local_score_arr,list)
+    assert isinstance(normalized_cutoff,float)
+    assert normalized_cutoff > 0 and normalized_cutoff < 1
+    b, a = build_low_pass_filter(normalized_cutoff=normalized_cutoff, order=order)
+
+    return filter_data(b=b,a=a,local_score_arr=local_score_arr)
+
+# helper function that can help people understand the frequency domain of their local score arrays.
+#def local_score_freq_domain(local_scores,save_fig=False,fig_name=None, a=None, b=None):
+#    if a is not None and b is not None:
+#        local_scores = filter_data(local_scores,b,a)
+#    
+#    local_score_freq = fft(local_scores)
+#    plt.subplot(2,1,1)
+#    plt.plot(local_scores)
+#    plt.title("Local Score Array")
+#    plt.xlabel("20ms timestep count")
+#    plt.ylabel("Timestep Score")
+#    plt.subplot(2,1,2)
+#    plt.plot(np.log(np.abs(local_score_freq[0:int(len(local_score_freq)/2)])))
+#    plt.title("Local Score Array Frequency Representation")
+#    plt.ylabel("Log Power")
+#    plt.xlabel("FFT")
+#    plt.grid()
+#    plt.tight_layout()
+#    if save_fig and fig_name is not None:
+#        plt.savefig(fig_name)
+#    else:
+#        plt.show()
+#    plt.clf()
diff --git a/PyHa/statistics.py b/PyHa/statistics.py
@@ -281,7 +281,7 @@ def automated_labeling_statistics(
             print("Processed", num_processed, "clips in", int((time.time() - start_time) * 10) / 10.0, 'seconds')
             start_time = time.time()
     if num_errors > 0:
-        checkVerbose("Something went wrong with" + num_errors + "clips out of" + str(len(clips)) + "clips", verbose)
+        checkVerbose("Something went wrong with " + str(num_errors) + " clips out of " + str(len(clips)) + " clips", verbose)
     statistics_df.reset_index(inplace=True, drop=True)
     return statistics_df
 

diff --git a/PyHa/visualizations.py b/PyHa/visualizations.py
@@ -2,6 +2,7 @@
 from .microfaune_package.microfaune import audio
 from .tweetynet_package.tweetynet.TweetyNetModel import TweetyNetModel
 from .tweetynet_package.tweetynet.Load_data_functions import compute_features
+from .dsp_tools import local_score_filtering
 import torch
 import librosa
 import matplotlib.pyplot as plt
@@ -185,7 +186,7 @@ def local_line_graph(
         None
     """
 
-    assert isinstance(local_scores,list)
+    assert isinstance(local_scores,list) or isinstance(local_scores,np.ndarray)
     assert isinstance(clip_name,str)
     assert isinstance(sample_rate,int)
     assert sample_rate > 0
@@ -453,6 +454,12 @@ def spectrogram_visualization(
 
     # If local scores were generated, plot them AND spectrogram
     if (local_scores is not None):
+        if "filter_local_scores" in dict.fromkeys(isolation_parameters):
+            assert isinstance(isolation_parameters["filter_local_scores"],tuple)
+            assert len(isolation_parameters["filter_local_scores"]) == 2
+            normalized_cutoff_freq = isolation_parameters["filter_local_scores"][0]
+            order = isolation_parameters["filter_local_scores"][1]
+            local_scores = local_score_filtering(local_scores,normalized_cutoff_freq,order)
         local_line_graph(
                 local_scores,
                 clip_path,

diff --git a/PyHa_Tutorial.ipynb b/PyHa_Tutorial.ipynb