forked from srvk/srvk-eesen-offline-transcriber
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run-scored.sh
executable file
·46 lines (35 loc) · 1.33 KB
/
run-scored.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/bin/bash
#
# Shell script to run eesen offline transcriber on TEDLIUM test data
# and produce scoring data. Takes as input a pair of corpus files, e.g.
# GaryFlake_2010.stm and GaryFlake_2010.sph
#
if [ $# -ne 1 ]; then
echo "Usage: run-scored.sh <file>"
echo "where <file> may have extension like .sph .wav .mp3"
echo
echo "./run-scored.sh /vagrant/GaryFlake_2010.wav"
exit 1;
fi
filename=$(basename "$1")
dirname=$(dirname "$1")
extension="${filename##*.}"
basename="${filename%.*}"
. path.sh
mkdir -p build/audio/base
# un-shorten-ify SPH files
if [ $extension == "sph" ]; then
sph2pipe $1 > build/audio/base/$basename.unshorten
sox build/audio/base/$basename.unshorten -c 1 build/audio/base/$basename.wav rate -v 16k
else
sox $1 -c 1 build/audio/base/$basename.wav rate -v 16k
fi
# 8k
# sox $1 -c 1 -e signed-integer build/audio/base/$basename.wav rate -v 8k
mkdir -p build/diarization/$basename
# make segments from $1.stm
cat $dirname/$basename.stm | grep -v "inter_segment_gap" | grep -v "ignore_time_segment_in_scoring" | awk '{OFMT = "%.0f"; print $1,$2,$4*100,($5-$4)*100,"M S U",$2}' > build/diarization/$basename/show.seg
make SEGMENTS=show.seg build/trans/$basename/wav.scp
cp $dirname/$basename.stm build/trans/$basename/stm
cp glm build/trans/$basename
make build/output/$basename.{txt,trs,ctm,sbv,srt,labels}