NVIDIA · tango4j · Dec 21, 2024 · Dec 23, 2024 · Dec 24, 2024 · Dec 24, 2024
diff --git a/...peaker_tasks/diarization/conf/post_processing/sortformer_diar_4spk-v1_callhome-part1.yaml b/...peaker_tasks/diarization/conf/post_processing/sortformer_diar_4spk-v1_callhome-part1.yaml
@@ -5,9 +5,9 @@
 # These parameters were optimized on CallHome Dataset from the NIST SRE 2000 Disc8, especially from the part1 (callhome1) specified in: Kaldi, “Kaldi x-vector recipe v2,” https://github.com/kaldi-asr/kaldi/blob/master/egs/callhome_diarization/v2/run.sh
 # Trial 24682 finished with value: 0.10257785779242055 and parameters: {'onset': 0.53, 'offset': 0.49, 'pad_onset': 0.23, 'pad_offset': 0.01, 'min_duration_on': 0.42, 'min_duration_off': 0.34}. Best is trial 24682 with value: 0.10257785779242055.
 parameters: 
-  onset: 0.53  # Onset threshold for detecting the beginning and end of a speech
-  offset: 0.49  # Offset threshold for detecting the end of a speech
-  pad_onset: 0.23  # Adding durations before each speech segment
-  pad_offset: 0.01  # Adding durations after each speech segment
-  min_duration_on: 0.42  # Threshold for small non-speech deletion
-  min_duration_off: 0.34  # Threshold for short speech segment deletion
+  onset: 0.53  # Onset threshold for detecting the beginning of a speech segment
+  offset: 0.49  # Offset threshold for detecting the end of a speech segment
+  pad_onset: 0.23  # Adds the specified duration at the beginning of each speech segment
+  pad_offset: 0.01  # Adds the specified duration at the end of each speech segment
+  min_duration_on: 0.42  # Removes short silences if the duration is less than the specified minimum duration
+  min_duration_off: 0.34  # Removes short speech segments if the duration is less than the specified minimum duration
diff --git a/...s/speaker_tasks/diarization/conf/post_processing/sortformer_diar_4spk-v1_dihard3-dev.yaml b/...s/speaker_tasks/diarization/conf/post_processing/sortformer_diar_4spk-v1_dihard3-dev.yaml
@@ -5,9 +5,9 @@
 # These parameters were optimized on the development split of DIHARD3 dataset (See https://arxiv.org/pdf/2012.01477).
 # Trial 732 finished with value: 0.12171946949255649 and parameters: {'onset': 0.64, 'offset': 0.74, 'pad_onset': 0.06, 'pad_offset': 0.0, 'min_duration_on': 0.1, 'min_duration_off': 0.15}. Best is trial 732 with value: 0.12171946949255649. 
 parameters: 
-  onset: 0.64  # Onset threshold for detecting the beginning and end of a speech
-  offset: 0.74  # Offset threshold for detecting the end of a speech
-  pad_onset: 0.06  # Adding durations before each speech segment
-  pad_offset: 0.0  # Adding durations after each speech segment
-  min_duration_on: 0.1  # Threshold for small non-speech deletion
-  min_duration_off: 0.15  # Threshold for short speech segment deletion
+  onset: 0.64  # Onset threshold for detecting the beginning of a speech segment
+  offset: 0.74  # Offset threshold for detecting the end of a speech segment
+  pad_onset: 0.06  # Adds the specified duration at the beginning of each speech segment
+  pad_offset: 0.0  # Adds the specified duration at the end of each speech segment
+  min_duration_on: 0.1  # Removes short silences if the duration is less than the specified minimum duration
+  min_duration_off: 0.15  # Removes short speech segments if the duration is less than the specified minimum duration