-
Notifications
You must be signed in to change notification settings - Fork 18
/
_natlink_save_audio.py
140 lines (119 loc) · 4.42 KB
/
_natlink_save_audio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# Adapted by Danesprite from natlink grammar example file
# "_samplehypothesis.py" and the original dragonfly-save-audio patch by dwks.
# For more info, see https://github.com/dwks/dragonfly-save-audio
#
# Natlink grammar module to save audio and words from natlink into a
# specified directory. This module generates .txt files with the words and
# recognition type. For example:
#
# Words: hello world
# Type: dictation
#
# 'SAVE_DIR' must be set to an existing absolute directory, otherwise this
# module will raise errors when loaded.
#
# There four recognition types:
#
# 1. "dictation" - directory for speech that used dictation words only.
# 2. "grammar" - directory for speech that used grammar words only.
# 3. "mixed" - directory for speech that used grammar and dictation words.
# 4. "rejects" - directory for speech data recognised as noise.
#
# Saving rejects is disabled by default because incomplete utterances for
# grammar rules will also be included, which may not be desirable.
#
# This grammar's rules can be used to start and stop recording audio or
# noise.
#
# TODO Remove Dragon's formatting from dictation output.
# E.g. "\cap\cap" -> "cap"
import os
import time
import natlink
from natlinkutils import GrammarBase
import _dragonfly_local as local
class SaveAudioGrammar(GrammarBase):
gramSpec = """
<RecordState> exported = (start|stop) saving audio;
<NoiseState> exported = (start|stop) saving (noise|rejects);
"""
def initialize(self):
self.load(self.gramSpec, hypothesis=1, allResults=1)
self.activateAll()
self.enabled = True # Start saving audio by default.
self.saveRejects = False
@classmethod
def getResultType(cls, details, resObj):
if details == "reject":
return "reject"
# Check the rules for each word.
# Anything between 1 and 1000000 (exclusive) is a grammar word.
# 0 is used as the rule number for free-form dictation.
rules = [r for _, r in resObj.getResults(0)]
grammar_words = len([r for r in rules if 0 < r < 1000000])
if grammar_words > 0:
return "mixed" if 1000000 in rules else "grammar"
else:
return "dictation"
def handleSelfResults(self, resObj):
# Handle results for this grammar using the first word and rule ID.
word, ruleId = resObj.getResults(0)[0]
# Start/stop recording audio/rejects.
value = word == "start"
if ruleId == 1:
self.enabled = value
elif ruleId == 2:
self.saveRejects = value
print(" ".join(resObj.getWords(0)))
def gotResultsObject(self, details, resObj):
# Get any words from the result object.
try:
if details == "reject":
words = "<???>"
else:
words = " ".join(resObj.getWords(0))
except (natlink.OutOfRange, IndexError):
details = "reject"
words = "<???>"
# Get the audio from the result object if there is any.
try:
wav = resObj.getWave()
except natlink.DataMissing:
wav = []
# Save audio if there is any and if SAVE_DIR exists.
# Only save rejects if specified.
isReject = words == "<???>"
shouldSave = (
len(wav) > 0 and self.enabled and os.path.isdir(SAVE_DIR)
and (not isReject or isReject and self.saveRejects)
)
if shouldSave:
name = "rec-%.03f" % time.time()
path = os.path.join(SAVE_DIR, name + ".wav")
f = open(path, "wb")
f.write(wav)
f.close()
path = os.path.join(SAVE_DIR, name + ".txt")
f = open(path, "w")
f.write("Words: %s\n" % str(words))
f.write("Type: %s\n" % self.getResultType(details, resObj))
f.close()
# Handle this grammar's control rules after recording.
if details == "self":
self.handleSelfResults(resObj)
# The directory to save .wav and .txt files into.
# Must exist and be an absolute path.
SAVE_DIR = local.SAVE_AUDIO_DIR
if not os.path.isabs(SAVE_DIR) or not os.path.isdir(SAVE_DIR):
grammar = None
print("Not saving audio.")
else:
# Instantiate and load the grammar.
grammar = SaveAudioGrammar()
grammar.initialize()
print("Saving audio.")
def unload():
global grammar
if grammar:
grammar.unload()
grammar = None