Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

整理: length 無音付加のCoreAdapter 移植 #1001

Merged
merged 2 commits into from
Jan 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions voicevox_engine/core_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,22 @@ def is_initialized_style_id_synthesis(self, style_id: StyleId) -> bool:
def safe_yukarin_s_forward(
self, phoneme_list_s: NDArray[np.int64], style_id: StyleId
) -> NDArray[np.float32]:
# 「指定スタイルを初期化」「mutexによる安全性」「系列長・データ型に関するアダプター」を提供する
# 「指定スタイルを初期化」「mutexによる安全性」「コア仕様に従う無音付加」「系列長・データ型に関するアダプター」を提供する
self.initialize_style_id_synthesis(style_id, skip_reinit=True)

# 前後無音を付加する(詳細: voicevox_engine#924)
phoneme_list_s = np.r_[0, phoneme_list_s, 0]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

今気づいたのですが、0を足すというよりは、Phoneme.pauのidを足すのが処理として正しそうな気がしました・・・!

core_adapterからPhonemeのimportが微妙であれば、CoreAdapterにidをDIする手もあるかも・・・?
(ちょっと自信ないです 🙇 )


with self.mutex:
phoneme_length = self.core.yukarin_s_forward(
length=len(phoneme_list_s),
phoneme_list=phoneme_list_s,
style_id=np.array(style_id, dtype=np.int64).reshape(-1),
)

# 前後無音に相当する領域を破棄する
phoneme_length = phoneme_length[1:-1]

return phoneme_length

def safe_yukarin_sa_forward(
Expand All @@ -89,7 +97,7 @@ def safe_yukarin_sa_forward(
end_accent_phrase_list: NDArray[np.int64],
style_id: StyleId,
) -> NDArray[np.float32]:
# 「指定スタイルを初期化」「mutexによる安全性」「コア仕様に従う無音自動付加」「系列長・データ型に関するアダプター」を提供する
# 「指定スタイルを初期化」「mutexによる安全性」「コア仕様に従う無音付加」「系列長・データ型に関するアダプター」を提供する
self.initialize_style_id_synthesis(style_id, skip_reinit=True)

# 前後無音を付加する(詳細: voicevox_engine#924)
Expand Down
7 changes: 3 additions & 4 deletions voicevox_engine/tts_pipeline/tts_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,8 @@ def update_length(
# モーラ系列を抽出する
moras = to_flatten_moras(accent_phrases)

# 音素系列を抽出し前後無音を付加する
# 音素系列を抽出する
phonemes = to_flatten_phonemes(moras)
phonemes = [Phoneme("pau")] + phonemes + [Phoneme("pau")]

# 音素クラスから音素IDスカラへ表現を変換する
phoneme_ids = np.array([p.phoneme_id for p in phonemes], dtype=np.int64)
Expand All @@ -278,8 +277,8 @@ def update_length(
if mora.consonant is None:
mora.consonant_length = None
else:
mora.consonant_length = phoneme_lengths[vowel_indexes[i + 1] - 1]
mora.vowel_length = phoneme_lengths[vowel_indexes[i + 1]]
mora.consonant_length = phoneme_lengths[vowel_indexes[i] - 1]
mora.vowel_length = phoneme_lengths[vowel_indexes[i]]

return accent_phrases

Expand Down