-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: do text-speech-text and signalR.
- Loading branch information
seilerch
committed
Aug 25, 2023
1 parent
5927f70
commit 47082dd
Showing
20 changed files
with
256 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
namespace Backend2023.Cognitive; | ||
|
||
public class AzureConfiguration | ||
{ | ||
public string SubscriptionKey { get; init; } = null!; | ||
|
||
public string ServiceRegion { get; init; } = null!; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<TargetFramework>net7.0</TargetFramework> | ||
<ImplicitUsings>enable</ImplicitUsings> | ||
<Nullable>enable</Nullable> | ||
</PropertyGroup> | ||
|
||
<ItemGroup> | ||
<PackageReference Include="Microsoft.CognitiveServices.Speech" Version="1.31.0" /> | ||
</ItemGroup> | ||
|
||
</Project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
namespace Backend2023.Cognitive; | ||
|
||
public abstract record SpeechRequest(string Language, string Voice); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
using Microsoft.CognitiveServices.Speech; | ||
using Microsoft.CognitiveServices.Speech.Audio; | ||
|
||
namespace Backend2023.Cognitive; | ||
|
||
public class SpeechServiceProvider | ||
{ | ||
private readonly AzureConfiguration _azureConfiguration; | ||
|
||
public SpeechServiceProvider(AzureConfiguration azureConfiguration) | ||
{ | ||
_azureConfiguration = azureConfiguration; | ||
} | ||
|
||
public async Task<byte[]> TextToAudioByteArray(TextToSpeedRequest textToSpeedRequest) | ||
{ | ||
using var result = await Synthesize(textToSpeedRequest); | ||
if (result.Reason == ResultReason.Canceled) | ||
{ | ||
var cancellation = SpeechSynthesisCancellationDetails.FromResult(result); | ||
throw new InvalidOperationException($"Cancelled(Error:{cancellation.ErrorCode},Details:{cancellation.ErrorDetails}"); | ||
} | ||
|
||
using AudioDataStream audioStream = AudioDataStream.FromResult(result); | ||
return result.AudioData; | ||
} | ||
|
||
public async Task<string> AudioToText(SpeechToTextRequest request) | ||
{ | ||
AudioConfig config = AudioConfig.FromWavFileInput("output.wav"); | ||
SpeechRecognizer recognizer = CreateRecognizer(request, config); | ||
SpeechRecognitionResult? result = await recognizer.RecognizeOnceAsync(); | ||
|
||
if (result.Reason == ResultReason.Canceled) | ||
{ | ||
var cancellation = CancellationDetails.FromResult(result); | ||
Console.WriteLine($"CANCELED: Reason={cancellation.Reason}"); | ||
|
||
if (cancellation.Reason == CancellationReason.Error) | ||
{ | ||
Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}"); | ||
Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}"); | ||
} | ||
} | ||
|
||
return result.Text; | ||
} | ||
|
||
private SpeechConfig CreateSpeechConfig(SpeechRequest speechRequest) | ||
{ | ||
SpeechConfig speechConfig = SpeechConfig.FromSubscription(_azureConfiguration.SubscriptionKey, _azureConfiguration.ServiceRegion); | ||
speechConfig.SetProxy("localhost", 3128); | ||
speechConfig.SpeechSynthesisLanguage = speechRequest.Language; | ||
speechConfig.SpeechRecognitionLanguage = speechRequest.Language; | ||
speechConfig.SpeechSynthesisVoiceName = speechRequest.Voice; | ||
|
||
return speechConfig; | ||
} | ||
|
||
private SpeechSynthesizer CreateSynthesizer(TextToSpeedRequest request) | ||
=> new(CreateSpeechConfig(request)); | ||
|
||
private SpeechRecognizer CreateRecognizer(SpeechToTextRequest request, AudioConfig audioConfig) | ||
=> new(CreateSpeechConfig(request), audioConfig); | ||
|
||
private Task<SpeechSynthesisResult> Synthesize(TextToSpeedRequest textToSpeedRequest) | ||
=> textToSpeedRequest.IsSpeechSynthesisMarkupLanguage | ||
? CreateSynthesizer(textToSpeedRequest).SpeakSsmlAsync(textToSpeedRequest.Text) | ||
: CreateSynthesizer(textToSpeedRequest).SpeakTextAsync(textToSpeedRequest.Text); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
namespace Backend2023.Cognitive; | ||
|
||
public record SpeechToTextRequest(Stream AudioStream, string Language = "de-CH", string Voice = "de-CH-LeniNeural") | ||
: SpeechRequest(Language, Voice); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
namespace Backend2023.Cognitive; | ||
|
||
public record TextToSpeedRequest(string Text, string Language = "de-CH", string Voice = "de-CH-LeniNeural", bool IsSpeechSynthesisMarkupLanguage = false) | ||
: SpeechRequest(Language, Voice); |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
using Microsoft.AspNetCore.SignalR; | ||
|
||
namespace Backend2023.Hubs; | ||
|
||
public class AudioHub : Hub | ||
{ | ||
private const int WAVHeaderSize = 44; | ||
|
||
// Dictionary to hold audio data for each client | ||
private static readonly Dictionary<string, MemoryStream> AudioData = new(); | ||
|
||
/// <summary> | ||
/// Stream uploaded audio chunks with a single WAV header to a memory stream. | ||
/// </summary> | ||
/// <param name="audioDataChunk">Audio chunk with a WAV Header of <see cref="WAVHeaderSize"/>.</param> | ||
/// <returns>Upload Task.</returns> | ||
public async Task TransmitUserAudio(byte[] audioDataChunk) | ||
{ | ||
var connectionId = Context.ConnectionId; | ||
if (!AudioData.ContainsKey(connectionId)) | ||
{ | ||
AudioData[connectionId] = new MemoryStream(); | ||
await AudioData[connectionId].WriteAsync(audioDataChunk); | ||
} | ||
else | ||
{ | ||
await AudioData[connectionId].WriteAsync(audioDataChunk, WAVHeaderSize, audioDataChunk.Length - WAVHeaderSize); | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// Closes the uploaded audio stream after verifying the completion. | ||
/// </summary> | ||
/// <returns>Completion Task.</returns> | ||
public async Task CloseAudioStream() | ||
{ | ||
var connectionId = Context.ConnectionId; | ||
if (!AudioData.ContainsKey(connectionId)) | ||
{ | ||
await Clients.Client(connectionId).SendAsync("CloseAudioStreamResponse", 0); | ||
return; | ||
} | ||
|
||
await Clients.Client(connectionId).SendAsync("CloseAudioStreamResponse", AudioData[connectionId].Length); | ||
|
||
// TODO: Generate Response | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
29 changes: 29 additions & 0 deletions
29
tests/Backend2023.Cognitive.Tests/Backend2023.Cognitive.Tests.csproj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<TargetFramework>net7.0</TargetFramework> | ||
<ImplicitUsings>enable</ImplicitUsings> | ||
<Nullable>enable</Nullable> | ||
|
||
<IsPackable>false</IsPackable> | ||
</PropertyGroup> | ||
|
||
<ItemGroup> | ||
<PackageReference Include="FluentAssertions" Version="6.12.0" /> | ||
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.3.2" /> | ||
<PackageReference Include="xunit" Version="2.4.2" /> | ||
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.5"> | ||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets> | ||
<PrivateAssets>all</PrivateAssets> | ||
</PackageReference> | ||
<PackageReference Include="coverlet.collector" Version="3.1.2"> | ||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets> | ||
<PrivateAssets>all</PrivateAssets> | ||
</PackageReference> | ||
</ItemGroup> | ||
|
||
<ItemGroup> | ||
<ProjectReference Include="..\..\src\Backend2023.Cognitive\Backend2023.Cognitive.csproj" /> | ||
</ItemGroup> | ||
|
||
</Project> |
47 changes: 47 additions & 0 deletions
47
tests/Backend2023.Cognitive.Tests/TextToSpeechServiceProviderFixture.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
using FluentAssertions; | ||
|
||
namespace Backend2023.Cognitive.Tests | ||
{ | ||
public class TextToSpeechServiceProviderFixture | ||
{ | ||
private readonly AzureConfiguration _azureConfiguration = new() | ||
{ | ||
ServiceRegion = "westeurope", | ||
//SubscriptionKey = "67ff4bd3-1dcc-44ae-80a8-65b1251fbd2b" | ||
SubscriptionKey = "098a9c0a3b1648ffb4ae57288c58d827" | ||
}; | ||
|
||
private SpeechServiceProvider _speechServiceProvider; | ||
|
||
public TextToSpeechServiceProviderFixture() | ||
{ | ||
_speechServiceProvider = new SpeechServiceProvider(_azureConfiguration); | ||
} | ||
|
||
[Fact] | ||
public async Task Should_GenerateSpeech() | ||
{ | ||
// Arrange | ||
TextToSpeedRequest request = new TextToSpeedRequest("Ich bin die Leni, und ich mag es durch den Regen zu tanzen."); | ||
|
||
// Act | ||
byte[] result = await _speechServiceProvider.TextToAudioByteArray(request); | ||
|
||
// Assert | ||
result.Should().NotBeNull(); | ||
} | ||
|
||
|
||
[Fact] | ||
public async Task Should_GenerateText() | ||
{ | ||
// Arrange | ||
|
||
// Act | ||
string result = await _speechServiceProvider.AudioToText(new SpeechToTextRequest(null!)); | ||
|
||
// Assert | ||
result.Should().NotBeNull(); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
global using Xunit; |