From 5a58a274926c39d991d49b19d0d0a4fddc5992a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E5=AD=90?= <54951765+kslz@users.noreply.github.com> Date: Tue, 23 Aug 2022 12:55:18 +0800 Subject: [PATCH] =?UTF-8?q?[TTS]=E6=8C=87=E5=AE=9AG2PW=E7=9A=84=E4=BC=A0?= =?UTF-8?q?=E5=85=A5=E6=95=B0=E6=8D=AE=E7=B1=BB=E5=9E=8B=20,=20test=3Dtts?= =?UTF-8?q?=20(#2288)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix ONNXRuntimeError Specify data type (int64),test=tts * Tactron2→Tacotron2 ,test=doc --- docs/source/released_model.md | 2 +- docs/source/tts/quick_start.md | 4 ++-- docs/source/tts/quick_start_cn.md | 4 ++-- docs/tutorial/tts/tts_tutorial.ipynb | 2 +- examples/aishell3/README.md | 4 ++-- examples/csmsc/README.md | 2 +- examples/ljspeech/README.md | 2 +- examples/vctk/README.md | 2 +- paddlespeech/t2s/frontend/g2pw/dataset.py | 10 +++++----- 9 files changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/source/released_model.md b/docs/source/released_model.md index a1e3eb87955..8d0ff1d47cd 100644 --- a/docs/source/released_model.md +++ b/docs/source/released_model.md @@ -67,7 +67,7 @@ WaveRNN | CSMSC |[WaveRNN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tr Model Type | Dataset| Example Link | Pretrained Models :-------------:| :------------:| :-----: | :-----: | GE2E| AISHELL-3, etc. |[ge2e](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/ge2e)|[ge2e_ckpt_0.3.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ge2e/ge2e_ckpt_0.3.zip) -GE2E + Tactron2| AISHELL-3 |[ge2e-tactron2-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/vc0)|[tacotron2_aishell3_ckpt_vc0_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_aishell3_ckpt_vc0_0.2.0.zip) +GE2E + Tacotron2| AISHELL-3 |[ge2e-Tacotron2-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/vc0)|[tacotron2_aishell3_ckpt_vc0_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_aishell3_ckpt_vc0_0.2.0.zip) GE2E + FastSpeech2 | AISHELL-3 |[ge2e-fastspeech2-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/vc1)|[fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip) diff --git a/docs/source/tts/quick_start.md b/docs/source/tts/quick_start.md index bddee778663..d8dbc646ca3 100644 --- a/docs/source/tts/quick_start.md +++ b/docs/source/tts/quick_start.md @@ -7,7 +7,7 @@ The examples in PaddleSpeech are mainly classified by datasets, the TTS datasets * VCTK (English multiple speakers) The models in PaddleSpeech TTS have the following mapping relationship: -* tts0 - Tactron2 +* tts0 - Tacotron2 * tts1 - TransformerTTS * tts2 - SpeedySpeech * tts3 - FastSpeech2 @@ -17,7 +17,7 @@ The models in PaddleSpeech TTS have the following mapping relationship: * voc3 - MultiBand MelGAN * voc4 - Style MelGAN * voc5 - HiFiGAN -* vc0 - Tactron2 Voice Clone with GE2E +* vc0 - Tacotron2 Voice Clone with GE2E * vc1 - FastSpeech2 Voice Clone with GE2E ## Quick Start diff --git a/docs/source/tts/quick_start_cn.md b/docs/source/tts/quick_start_cn.md index 37246e84e9b..c56d9bb4592 100644 --- a/docs/source/tts/quick_start_cn.md +++ b/docs/source/tts/quick_start_cn.md @@ -9,7 +9,7 @@ PaddleSpeech 的 TTS 模型具有以下映射关系: -* tts0 - Tactron2 +* tts0 - Tacotron2 * tts1 - TransformerTTS * tts2 - SpeedySpeech * tts3 - FastSpeech2 @@ -19,7 +19,7 @@ PaddleSpeech 的 TTS 模型具有以下映射关系: * voc3 - MultiBand MelGAN * voc4 - Style MelGAN * voc5 - HiFiGAN -* vc0 - Tactron2 Voice Clone with GE2E +* vc0 - Tacotron2 Voice Clone with GE2E * vc1 - FastSpeech2 Voice Clone with GE2E ## 快速开始 diff --git a/docs/tutorial/tts/tts_tutorial.ipynb b/docs/tutorial/tts/tts_tutorial.ipynb index 81f713efa91..583adb01470 100644 --- a/docs/tutorial/tts/tts_tutorial.ipynb +++ b/docs/tutorial/tts/tts_tutorial.ipynb @@ -769,7 +769,7 @@ "```\n", "我们在每个数据集的 README.md 介绍了子目录和模型的对应关系, 在 TTS 中有如下对应关系:\n", "```text\n", - "tts0 - Tactron2\n", + "tts0 - Tacotron2\n", "tts1 - TransformerTTS\n", "tts2 - SpeedySpeech\n", "tts3 - FastSpeech2\n", diff --git a/examples/aishell3/README.md b/examples/aishell3/README.md index 273f488e454..191974dec66 100644 --- a/examples/aishell3/README.md +++ b/examples/aishell3/README.md @@ -1,6 +1,6 @@ # Aishell3 -* tts0 - Tactron2 +* tts0 - Tacotron2 * tts1 - TransformerTTS * tts2 - SpeedySpeech * tts3 - FastSpeech2 @@ -8,5 +8,5 @@ * voc1 - Parallel WaveGAN * voc2 - MelGAN * voc3 - MultiBand MelGAN -* vc0 - Tactron2 Voice Cloning with GE2E +* vc0 - Tacotron2 Voice Cloning with GE2E * vc1 - FastSpeech2 Voice Cloning with GE2E diff --git a/examples/csmsc/README.md b/examples/csmsc/README.md index 2aad609cbb4..77375faa816 100644 --- a/examples/csmsc/README.md +++ b/examples/csmsc/README.md @@ -1,7 +1,7 @@ # CSMSC -* tts0 - Tactron2 +* tts0 - Tacotron2 * tts1 - TransformerTTS * tts2 - SpeedySpeech * tts3 - FastSpeech2 diff --git a/examples/ljspeech/README.md b/examples/ljspeech/README.md index 67b1bf47315..ccafdb141b2 100644 --- a/examples/ljspeech/README.md +++ b/examples/ljspeech/README.md @@ -1,7 +1,7 @@ # LJSpeech -* tts0 - Tactron2 +* tts0 - Tacotron2 * tts1 - TransformerTTS * tts2 - SpeedySpeech * tts3 - FastSpeech2 diff --git a/examples/vctk/README.md b/examples/vctk/README.md index 4007c0319f4..ac5fd24f878 100644 --- a/examples/vctk/README.md +++ b/examples/vctk/README.md @@ -1,7 +1,7 @@ # VCTK -* tts0 - Tactron2 +* tts0 - Tacotron2 * tts1 - TransformerTTS * tts2 - SpeedySpeech * tts3 - FastSpeech2 diff --git a/paddlespeech/t2s/frontend/g2pw/dataset.py b/paddlespeech/t2s/frontend/g2pw/dataset.py index ab715dc361c..98af5f46324 100644 --- a/paddlespeech/t2s/frontend/g2pw/dataset.py +++ b/paddlespeech/t2s/frontend/g2pw/dataset.py @@ -81,12 +81,12 @@ def prepare_onnx_input(tokenizer, position_ids.append(position_id) outputs = { - 'input_ids': np.array(input_ids), - 'token_type_ids': np.array(token_type_ids), - 'attention_masks': np.array(attention_masks), + 'input_ids': np.array(input_ids).astype(np.int64), + 'token_type_ids': np.array(token_type_ids).astype(np.int64), + 'attention_masks': np.array(attention_masks).astype(np.int64), 'phoneme_masks': np.array(phoneme_masks).astype(np.float32), - 'char_ids': np.array(char_ids), - 'position_ids': np.array(position_ids), + 'char_ids': np.array(char_ids).astype(np.int64), + 'position_ids': np.array(position_ids).astype(np.int64), } return outputs