Skip to content

Commit

Permalink
improve output formats support (#39)
Browse files Browse the repository at this point in the history
-  improve support for output formats
-  add command line flag to list supported formats
-  streamline command line parsing for voices and formats
  • Loading branch information
pviotti authored Apr 6, 2020
1 parent 87ed773 commit da2f50f
Show file tree
Hide file tree
Showing 7 changed files with 137 additions and 66 deletions.
33 changes: 21 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,43 +21,52 @@ and as CLR binary artifact.

## Usage

```bash
```
$ ./sayit --help
USAGE: sayit [--help] [--setup] [--version] [--listvoices] [--voice <de|en|es|fr|hi|it|ja|pt|ru|zh>] [--output <output>] <input>
USAGE: sayit [--help] [--version] [--setup] [--list-voices] [--list-formats]
[--voice <voice>] [--format <format>] [--output <output>] <input>
INPUT:
<input> the text to be pronounced
OPTIONS:
--setup setup the configuration file
--version print sayit version
--listvoices, -lv list available voice shorthands, with their corresponding voice ids
--voice, -v <de|en|es|fr|hi|it|ja|pt|ru|zh>
the voice shorthand, which maps to one of the available voice ids (see https://aka.ms/speech/tts-languages)
--setup setup the configuration file
--list-voices, -lv list available voice shorthands, with their
corresponding voice ids
--list-formats, -lf list available output format shorthands, with their
corresponding output format ids
--voice, -v <voice> the voice shorthand, which maps to one of the
available voice ids (see `sayit -lv` for details)
--format, -f <format> the format shorthand of the audio output, which maps
to one fo the available format ids (see `sayit -lf`
for details)
--output, -o <output> the path of the output file
--help display this list of options.
```
At the first use you're required to run the setup wizard (`./sayit --setup`)
and enter the configuration parameters of your Azure Cognitive Services resource,
which are the subscription key (which you can find in the Azure portal)
such as the subscription key (which you can find in the Azure portal)
and the region identifier (see [here][region-ids]).
SayIt will store these parameters in the configuration folder of the current
user (e.g. `~/.config/` in Linux) as an [App Setting XML file][appsetting].

Currently, SayIt supports these settings:
- languages (*voices*): English, Italian, French, German, Spanish, Hindi, Portuguese, Russian, Japanese and Chinese (Mandarin).
SayIt supports these settings:
- [languages][voices]: English, Italian, French, German, Spanish, Hindi, Portuguese, Russian, Japanese and Chinese (Mandarin).
- [output formats](output-formats):
`audio-16khz-32kbitrate-mono-mp3`, `audio-16khz-64kbitrate-mono-mp3`, `audio-16khz-128kbitrate-mono-mp3`, `audio-24khz-96kbitrate-mono-mp3`, `audio-24khz-160kbitrate-mono-mp3`, `audio-24khz-48kbitrate-mono-mp3`, `riff-8khz-16bit-mono-pcm`, `riff-16khz-16bit-mono-pcm`, `riff-24khz-16bit-mono-pcm`.

> NB: some neural voices might not be supported by your Azure Cognitive Services resource,
> NB: some languages and output formats might not be supported by your Azure Cognitive Services resource,
depending on its region (see [here][region-voices]).

- audio export formats: MP3 16Khz 32KB/s mono

[az-sub]: https://azure.microsoft.com/en-us/free/
[az-cs]: https://azure.microsoft.com/en-us/services/cognitive-services/speech-services/
[az-cs-price]: https://azure.microsoft.com/en-us/pricing/details/cognitive-services/speech-services/
[release]: https://github.com/pviotti/sayit/releases
[appsetting]: https://docs.microsoft.com/en-us/dotnet/framework/configure-apps/file-schema/appsettings/
[region-ids]: https://aka.ms/speech/sdkregion#speech-to-text-text-to-speech-and-translation
[region-voices]: https://aka.ms/speech/sdkregion#text-to-speech
[voices]: https://aka.ms/speech/tts-languages
[output-formats]: https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-text-to-speech#audio-outputs
48 changes: 34 additions & 14 deletions SayIt/Config.fs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ open System.Reflection
open Argu

open SayIt.Voices
open SayIt.Formats

let VERSION = Assembly.GetExecutingAssembly().GetName().Version.ToString()
let PROGRAM_NAME = "sayit"
Expand All @@ -15,21 +16,25 @@ let CONFIG_FILE = "sayit.config"
type Env = Environment

type Args =
| [<NoAppSettings>] Setup
| [<NoAppSettings>] Version
| [<AltCommandLine("-lv"); NoAppSettings>] ListVoices
| [<AltCommandLine("-v")>] Voice of VoiceType
| [<NoAppSettings>] Setup
| [<AltCommandLine("-lv"); NoAppSettings>] List_Voices
| [<AltCommandLine("-lf"); NoAppSettings>] List_Formats
| [<AltCommandLine("-v")>] Voice of voice: string
| [<AltCommandLine("-f")>] Format of format: string
| [<AltCommandLine("-o"); NoAppSettings>] Output of output: string
| [<NoCommandLine; Mandatory>] Key of key: string
| [<NoCommandLine; Mandatory>] Region of region: string
| [<MainCommand; Mandatory; NoAppSettings>] Input of input: string
interface IArgParserTemplate with
member s.Usage =
match s with
| Setup _ -> "setup the configuration file"
| Version _ -> "print sayit version"
| ListVoices _ -> "list available voice shorthands, with their corresponding voice ids"
| Voice _ -> "the voice shorthand, which maps to one of the available voice ids (see https://aka.ms/speech/tts-languages)"
| Setup _ -> "setup the configuration file"
| List_Voices _ -> "list available voice shorthands, with their corresponding voice ids"
| List_Formats _ -> "list available output format shorthands, with their corresponding output format ids"
| Voice _ -> "the voice shorthand, which maps to one of the available voice ids (see `sayit -lv` for details)"
| Format _ -> "the format shorthand of the audio output, which maps to one fo the available format ids (see `sayit -lf` for details)"
| Output _ -> "the path of the output file"
| Input _ -> "the text to be pronounced"
| Key _ -> "the subscription key of your Azure Cognitive Services resource"
Expand All @@ -45,14 +50,15 @@ let getConfigFilePath() =
Env.GetFolderPath(Env.SpecialFolder.ApplicationData, Env.SpecialFolderOption.Create)
+ string Path.DirectorySeparatorChar + CONFIG_FILE

let writeConfig (key: string, region: string, voice: VoiceType) =
let writeConfig (key: string, region: string, voice: VoiceType, format: FormatType) =
let parser = ArgumentParser.Create<Args>()

let xml =
parser.PrintAppSettingsArguments
[ Key key
Region region
Voice voice ]
Voice (voice.ToString())
Format (format.ToString()) ]
File.WriteAllText(getConfigFilePath(), xml, Text.Encoding.UTF8)

let configWizard() =
Expand All @@ -65,11 +71,22 @@ let configWizard() =
let subReg = ask "Region identifier: "

let voice =
match VoiceType.FromString(ask "Default voice [en]: ") with
| Some x -> x
| None -> En

writeConfig (subId, subReg, voice)
try
VoiceType.FromString(ask "Default voice [en]: ")
with
| Failure _ ->
Console.WriteLine "Voice defaulted to \"en\"."
En

let format =
try
FormatType.FromString(ask "Default output format [mp324khz96kbps]: ")
with
| Failure _ ->
Console.WriteLine "Output format defaulted to \"mp324khz96kbps\"."
Mp324khz96kbps

writeConfig (subId, subReg, voice, format)
("The configuration has been written to " + getConfigFilePath()) |> Console.WriteLine

let getConfiguration argv =
Expand All @@ -82,9 +99,12 @@ let getConfiguration argv =
elif config.Contains Setup then
configWizard()
ReturnVal 0
elif config.Contains ListVoices then
elif config.Contains List_Voices then
listVoices()
ReturnVal 0
elif config.Contains List_Formats then
listFormats()
ReturnVal 0
elif File.Exists(getConfigFilePath()) then
let confReader = ConfigurationReader.FromAppSettingsFile(getConfigFilePath())
Config (parser.Parse(argv, confReader, ignoreMissing = true))
Expand Down
37 changes: 37 additions & 0 deletions SayIt/Formats.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
module SayIt.Formats

open Microsoft.CognitiveServices.Speech
open Microsoft.FSharp.Reflection
open SayIt.Utils

type FormatType =
| Mp316khz32kbps
| Mp316khz64kbps
| Mp316khz128kbps
| Mp324khz48kbps
| Mp324khz96kbps
| Mp324khz160kbps
| Pcm8khz16b
| Pcm16khz16b
| Pcm24khz16b
override this.ToString() = toString this
static member FromString s = fromString<FormatType> s

let getFormatId (format: FormatType) =
match format with
| Mp316khz32kbps ->SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3
| Mp316khz64kbps ->SpeechSynthesisOutputFormat.Audio16Khz64KBitRateMonoMp3
| Mp316khz128kbps-> SpeechSynthesisOutputFormat.Audio16Khz128KBitRateMonoMp3
| Mp324khz48kbps->SpeechSynthesisOutputFormat.Audio24Khz48KBitRateMonoMp3
| Mp324khz96kbps-> SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3
| Mp324khz160kbps -> SpeechSynthesisOutputFormat.Audio24Khz160KBitRateMonoMp3
| Pcm8khz16b-> SpeechSynthesisOutputFormat.Riff8Khz16BitMonoPcm
| Pcm16khz16b -> SpeechSynthesisOutputFormat.Riff16Khz16BitMonoPcm
| Pcm24khz16b-> SpeechSynthesisOutputFormat.Riff24Khz16BitMonoPcm

let listFormats() =
let types = FSharpType.GetUnionCases typeof<FormatType>
printfn "Shorthand -> Id pairs for supported output formats (see https://bit.ly/2UOjVpg):"
for t in types do
let id = getFormatId (FormatType.FromString(t.Name.ToLower()))
printfn "%s -> %A" (t.Name.ToLower()) id
18 changes: 10 additions & 8 deletions SayIt/Program.fs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ module SayIt.Program

open SayIt.Config
open SayIt.Voices
open SayIt.Formats

open System.Threading.Tasks
open Microsoft.CognitiveServices.Speech
Expand All @@ -27,7 +28,8 @@ let handleSynthesisResult (task: Task<SpeechSynthesisResult>) =
let performSpeechSynthesis (config: Argu.ParseResults<Args>, speechConfig: SpeechConfig) =
if config.Contains Output then
let output = config.GetResult Output
speechConfig.SetSpeechSynthesisOutputFormat(SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3)
let outputFormat = getFormatId( config.PostProcessResult (Format, FormatType.FromString))
speechConfig.SetSpeechSynthesisOutputFormat(outputFormat)
use fileOutput = AudioConfig.FromWavFileOutput(output)
use synthetizer = new SpeechSynthesizer(speechConfig, fileOutput)
handleSynthesisResult (synthetizer.SpeakTextAsync(config.GetResult Input))
Expand All @@ -37,14 +39,14 @@ let performSpeechSynthesis (config: Argu.ParseResults<Args>, speechConfig: Speec

[<EntryPoint>]
let main argv =
match Config.getConfiguration (argv) with
match getConfiguration (argv) with
| Config config ->
let key = config.GetResult Key
let region = config.GetResult Region
let voice = getVoiceId (config.GetResult Voice)
let key = config.GetResult Key
let region = config.GetResult Region
let voice = getVoiceId (config.PostProcessResult (Voice, VoiceType.FromString))

let speechConfig = SpeechConfig.FromSubscription(key, region)
speechConfig.SpeechSynthesisVoiceName <- voice
let speechConfig = SpeechConfig.FromSubscription(key, region)
speechConfig.SpeechSynthesisVoiceName <- voice

performSpeechSynthesis (config, speechConfig)
performSpeechSynthesis (config, speechConfig)
| ReturnVal ret -> ret
38 changes: 18 additions & 20 deletions SayIt/SayIt.fsproj
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>netcoreapp3.1</TargetFramework>
<Version>0.3.0</Version>
</PropertyGroup>

<ItemGroup>
<Compile Include="Voices.fs" />
<Compile Include="Config.fs" />
<Compile Include="Program.fs" />
</ItemGroup>

<ItemGroup>
<PackageReference Include="Argu" Version="6.0.0" />
<PackageReference Include="Microsoft.CognitiveServices.Speech" Version="1.11.0" />
</ItemGroup>

</Project>
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>netcoreapp3.1</TargetFramework>
<Version>0.3.0</Version>
</PropertyGroup>
<ItemGroup>
<Compile Include="Utils.fs" />
<Compile Include="Formats.fs" />
<Compile Include="Voices.fs" />
<Compile Include="Config.fs" />
<Compile Include="Program.fs" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="Argu" Version="6.0.0" />
<PackageReference Include="Microsoft.CognitiveServices.Speech" Version="1.11.0" />
</ItemGroup>
</Project>
13 changes: 13 additions & 0 deletions SayIt/Utils.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
module SayIt.Utils

open Microsoft.FSharp.Reflection

// Create discriminated unions from string - http://fssnip.net/9l
let toString (x: 'a) =
match FSharpValue.GetUnionFields(x, typeof<'a>) with
| case, _ -> case.Name.ToLower()

let fromString<'a> (s: string) =
match FSharpType.GetUnionCases typeof<'a> |> Array.filter (fun case -> case.Name.ToLower() = s) with
| [| case |] -> FSharpValue.MakeUnion(case, [||]) :?> 'a
| _ -> failwith (s + " not recognized as a valid parameter.")
16 changes: 4 additions & 12 deletions SayIt/Voices.fs
Original file line number Diff line number Diff line change
@@ -1,16 +1,7 @@
module SayIt.Voices

open Microsoft.FSharp.Reflection

// Create discriminated unions from string - http://fssnip.net/9l
let toString (x: 'a) =
match FSharpValue.GetUnionFields(x, typeof<'a>) with
| case, _ -> case.Name

let fromString<'a> (s: string) =
match FSharpType.GetUnionCases typeof<'a> |> Array.filter (fun case -> case.Name = s) with
| [| case |] -> Some(FSharpValue.MakeUnion(case, [||]) :?> 'a)
| _ -> None
open SayIt.Utils

type VoiceType =
| De
Expand Down Expand Up @@ -41,6 +32,7 @@ let getVoiceId (voice: VoiceType) =

let listVoices() =
let types = FSharpType.GetUnionCases typeof<VoiceType>
printfn "Shorthand -> Id pairs for supported voices (see https://aka.ms/speech/tts-languages):"
for t in types do
let id = getVoiceId (VoiceType.FromString(t.Name).Value)
printfn "%s - %s" (t.Name.ToLower()) id
let id = getVoiceId (VoiceType.FromString(t.Name.ToLower()))
printfn "%s -> %s" (t.Name.ToLower()) id

0 comments on commit da2f50f

Please sign in to comment.