Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

修正: 話速によって前後の無音時間が変わらないようにする #1321

Open
wants to merge 9 commits into
base: master
Choose a base branch
from

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 12 additions & 9 deletions test/unit/tts_pipeline/test_wave_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,23 +258,26 @@ def test_query_to_decoder_feature() -> None:
# Expects
# frame_per_phoneme
# Pre k o N pau h i h O Pst
true_frame_per_phoneme = [1, 1, 2, 2, 2, 1, 2, 2, 1, 3]
true_frame_per_phoneme = [2, 1, 2, 2, 2, 1, 2, 2, 1, 6]
n_frame = sum(true_frame_per_phoneme)
# phoneme
# Pr k o o N N pau pau h i i h h O Pt Pt Pt
frame_phoneme_idxs = [0, 23, 30, 30, 4, 4, 0, 0, 19, 21, 21, 19, 19, 5, 0, 0, 0]
# Pr Pr k o o N N pau pau]
frame_phoneme_idxs = [0, 0, 23, 30, 30, 4, 4, 0, 0]
# h i i h h O Pt Pt Pt Pt Pt Pt
frame_phoneme_idxs += [19, 21, 21, 19, 19, 5, 0, 0, 0, 0, 0, 0]

true_phoneme = np.zeros([n_frame, TRUE_NUM_PHONEME], dtype=np.float32)
for frame_idx, phoneme_idx in enumerate(frame_phoneme_idxs):
true_phoneme[frame_idx, phoneme_idx] = 1.0
# Pitch
# paw ko N pau hi hO paw
# frame_per_vowel = [1, 3, 2, 1, 3, 3, 3]
# pau ko ko ko N N
true1_f0 = [0.0, 22.0, 22.0, 22.0, 22.0, 22.0]
# pau pau hi hi hi
# frame_per_vowel = [2, 3, 2, 2, 3, 3, 6]
# pau pau ko ko ko N N
true1_f0 = [0.0, 0.0, 22.0, 22.0, 22.0, 22.0, 22.0]
# pau pau hi hi hi
true2_f0 = [0.0, 0.0, 28.0, 28.0, 28.0]
# hO hO hO paw paw paw
true3_f0 = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
# hO hO hO paw paw paw paw paw paw
true3_f0 = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
true_f0 = np.array(true1_f0 + true2_f0 + true3_f0, dtype=np.float32)

# Outputs
Expand Down
2 changes: 1 addition & 1 deletion voicevox_engine/tts_pipeline/tts_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,10 +228,10 @@ def _query_to_decoder_feature(
moras = to_flatten_moras(query.accent_phrases)

# 設定を適用する
moras = _apply_prepost_silence(moras, query)
moras = _apply_pause_length(moras, query)
moras = _apply_pause_length_scale(moras, query)
moras = _apply_speed_scale(moras, query)
moras = _apply_prepost_silence(moras, query)
moras = _apply_pitch_scale(moras, query)
moras = _apply_intonation_scale(moras, query)

Expand Down