feat: do text-speech-text and signalR.

baernhaeckt · Aug 25, 2023 · 47082dd · 47082dd
1 parent 5927f70
commit 47082dd
Show file tree

Hide file tree

Showing 20 changed files with 256 additions and 4 deletions.
diff --git a/Backend2023.sln b/Backend2023.sln
@@ -3,7 +3,13 @@ Microsoft Visual Studio Solution File, Format Version 12.00
 # Visual Studio Version 17
 VisualStudioVersion = 17.7.34018.315
 MinimumVisualStudioVersion = 10.0.40219.1
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Backend2023", "Backend2023.csproj", "{668AA7FE-483A-4F6C-9688-DC87CDC9162E}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Backend2023", "src\Backend2023\Backend2023.csproj", "{668AA7FE-483A-4F6C-9688-DC87CDC9162E}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Backend2023.Cognitive", "src\Backend2023.Cognitive\Backend2023.Cognitive.csproj", "{B7C8A821-7D5E-4AC1-A882-81AA6DC119DA}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{ACFF960F-A8F9-447E-8AB9-5477D6819795}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Backend2023.Cognitive.Tests", "tests\Backend2023.Cognitive.Tests\Backend2023.Cognitive.Tests.csproj", "{5D56ED88-9E95-47AF-9ED8-2E01A988D01D}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -15,10 +21,21 @@ Global
 		{668AA7FE-483A-4F6C-9688-DC87CDC9162E}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{668AA7FE-483A-4F6C-9688-DC87CDC9162E}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{668AA7FE-483A-4F6C-9688-DC87CDC9162E}.Release|Any CPU.Build.0 = Release|Any CPU
+		{B7C8A821-7D5E-4AC1-A882-81AA6DC119DA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{B7C8A821-7D5E-4AC1-A882-81AA6DC119DA}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{B7C8A821-7D5E-4AC1-A882-81AA6DC119DA}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{B7C8A821-7D5E-4AC1-A882-81AA6DC119DA}.Release|Any CPU.Build.0 = Release|Any CPU
+		{5D56ED88-9E95-47AF-9ED8-2E01A988D01D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{5D56ED88-9E95-47AF-9ED8-2E01A988D01D}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{5D56ED88-9E95-47AF-9ED8-2E01A988D01D}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{5D56ED88-9E95-47AF-9ED8-2E01A988D01D}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
 	EndGlobalSection
+	GlobalSection(NestedProjects) = preSolution
+		{5D56ED88-9E95-47AF-9ED8-2E01A988D01D} = {ACFF960F-A8F9-447E-8AB9-5477D6819795}
+	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {BD6F36C8-DEB2-494D-A29E-F7D0866F76AF}
 	EndGlobalSection

diff --git a/src/Backend2023.Cognitive/AzureConfiguration.cs b/src/Backend2023.Cognitive/AzureConfiguration.cs
@@ -0,0 +1,8 @@
+namespace Backend2023.Cognitive;
+
+public class AzureConfiguration
+{
+    public string SubscriptionKey { get; init; } = null!;
+
+    public string ServiceRegion { get; init; } = null!;
+}
diff --git a/src/Backend2023.Cognitive/Backend2023.Cognitive.csproj b/src/Backend2023.Cognitive/Backend2023.Cognitive.csproj
@@ -0,0 +1,13 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>net7.0</TargetFramework>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.CognitiveServices.Speech" Version="1.31.0" />
+  </ItemGroup>
+
+</Project>
diff --git a/src/Backend2023.Cognitive/SpeechRequest.cs b/src/Backend2023.Cognitive/SpeechRequest.cs
@@ -0,0 +1,3 @@
+namespace Backend2023.Cognitive;
+
+public abstract record SpeechRequest(string Language, string Voice);
diff --git a/src/Backend2023.Cognitive/SpeechServiceProvider.cs b/src/Backend2023.Cognitive/SpeechServiceProvider.cs
@@ -0,0 +1,70 @@
+using Microsoft.CognitiveServices.Speech;
+using Microsoft.CognitiveServices.Speech.Audio;
+
+namespace Backend2023.Cognitive;
+
+public class SpeechServiceProvider
+{
+    private readonly AzureConfiguration _azureConfiguration;
+
+    public SpeechServiceProvider(AzureConfiguration azureConfiguration)
+    {
+        _azureConfiguration = azureConfiguration;
+    }
+
+    public async Task<byte[]> TextToAudioByteArray(TextToSpeedRequest textToSpeedRequest)
+    {
+        using var result = await Synthesize(textToSpeedRequest);
+        if (result.Reason == ResultReason.Canceled)
+        {
+            var cancellation = SpeechSynthesisCancellationDetails.FromResult(result);
+            throw new InvalidOperationException($"Cancelled(Error:{cancellation.ErrorCode},Details:{cancellation.ErrorDetails}");
+        }
+
+        using AudioDataStream audioStream = AudioDataStream.FromResult(result);
+        return result.AudioData;
+    }
+
+    public async Task<string> AudioToText(SpeechToTextRequest request)
+    {
+        AudioConfig config = AudioConfig.FromWavFileInput("output.wav");
+        SpeechRecognizer recognizer = CreateRecognizer(request, config);
+        SpeechRecognitionResult? result = await recognizer.RecognizeOnceAsync();
+
+        if (result.Reason == ResultReason.Canceled)
+        {
+            var cancellation = CancellationDetails.FromResult(result);
+            Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");
+
+            if (cancellation.Reason == CancellationReason.Error)
+            {
+                Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
+                Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}");
+            }
+        }
+
+        return result.Text;
+    }
+
+    private SpeechConfig CreateSpeechConfig(SpeechRequest speechRequest)
+    {
+        SpeechConfig speechConfig = SpeechConfig.FromSubscription(_azureConfiguration.SubscriptionKey, _azureConfiguration.ServiceRegion);
+        speechConfig.SetProxy("localhost", 3128);
+        speechConfig.SpeechSynthesisLanguage = speechRequest.Language;
+        speechConfig.SpeechRecognitionLanguage = speechRequest.Language;
+        speechConfig.SpeechSynthesisVoiceName = speechRequest.Voice;
+
+        return speechConfig;
+    }
+
+    private SpeechSynthesizer CreateSynthesizer(TextToSpeedRequest request) 
+        => new(CreateSpeechConfig(request));
+
+    private SpeechRecognizer CreateRecognizer(SpeechToTextRequest request, AudioConfig audioConfig) 
+        => new(CreateSpeechConfig(request), audioConfig);
+
+    private Task<SpeechSynthesisResult> Synthesize(TextToSpeedRequest textToSpeedRequest)
+        => textToSpeedRequest.IsSpeechSynthesisMarkupLanguage 
+            ? CreateSynthesizer(textToSpeedRequest).SpeakSsmlAsync(textToSpeedRequest.Text) 
+            : CreateSynthesizer(textToSpeedRequest).SpeakTextAsync(textToSpeedRequest.Text);
+}
diff --git a/src/Backend2023.Cognitive/SpeechToTextRequest.cs b/src/Backend2023.Cognitive/SpeechToTextRequest.cs
@@ -0,0 +1,4 @@
+namespace Backend2023.Cognitive;
+
+public record SpeechToTextRequest(Stream AudioStream, string Language = "de-CH", string Voice = "de-CH-LeniNeural")
+    : SpeechRequest(Language, Voice);
diff --git a/src/Backend2023.Cognitive/TextToSpeedRequest.cs b/src/Backend2023.Cognitive/TextToSpeedRequest.cs
@@ -0,0 +1,4 @@
+namespace Backend2023.Cognitive;
+
+public record TextToSpeedRequest(string Text, string Language = "de-CH", string Voice = "de-CH-LeniNeural", bool IsSpeechSynthesisMarkupLanguage = false)
+    : SpeechRequest(Language, Voice);
diff --git a/.dockerignore → src/Backend2023/.dockerignore b/.dockerignore → src/Backend2023/.dockerignore
diff --git a/Backend2023.csproj → src/Backend2023/Backend2023.csproj b/Backend2023.csproj → src/Backend2023/Backend2023.csproj
@@ -10,6 +10,7 @@
   </PropertyGroup>
 
   <ItemGroup>
+    <PackageReference Include="Microsoft.AspNet.SignalR.Core" Version="2.4.3" />
     <PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="7.0.10" />
     <PackageReference Include="Microsoft.VisualStudio.Azure.Containers.Tools.Targets" Version="1.19.4" />
     <PackageReference Include="Swashbuckle.AspNetCore" Version="6.5.0" />

diff --git a/Controllers/WeatherForecastController.cs → .../Controllers/WeatherForecastController.cs b/Controllers/WeatherForecastController.cs → .../Controllers/WeatherForecastController.cs
diff --git a/Dockerfile → src/Backend2023/Dockerfile b/Dockerfile → src/Backend2023/Dockerfile
diff --git a/src/Backend2023/Hubs/AudioHub.cs b/src/Backend2023/Hubs/AudioHub.cs
@@ -0,0 +1,48 @@
+using Microsoft.AspNetCore.SignalR;
+
+namespace Backend2023.Hubs;
+
+public class AudioHub : Hub
+{
+    private const int WAVHeaderSize = 44;
+
+    // Dictionary to hold audio data for each client
+    private static readonly Dictionary<string, MemoryStream> AudioData = new();
+
+    /// <summary>
+    ///     Stream uploaded audio chunks with a single WAV header to a memory stream.
+    /// </summary>
+    /// <param name="audioDataChunk">Audio chunk with a WAV Header of <see cref="WAVHeaderSize"/>.</param>
+    /// <returns>Upload Task.</returns>
+    public async Task TransmitUserAudio(byte[] audioDataChunk)
+    {
+        var connectionId = Context.ConnectionId;
+        if (!AudioData.ContainsKey(connectionId))
+        {
+            AudioData[connectionId] = new MemoryStream();
+            await AudioData[connectionId].WriteAsync(audioDataChunk);
+        }
+        else
+        {
+            await AudioData[connectionId].WriteAsync(audioDataChunk, WAVHeaderSize, audioDataChunk.Length - WAVHeaderSize);
+        }
+    }
+
+    /// <summary>
+    ///     Closes the uploaded audio stream after verifying the completion.
+    /// </summary>
+    /// <returns>Completion Task.</returns>
+    public async Task CloseAudioStream()
+    {
+        var connectionId = Context.ConnectionId;
+        if (!AudioData.ContainsKey(connectionId))
+        {
+            await Clients.Client(connectionId).SendAsync("CloseAudioStreamResponse", 0);
+            return;
+        }
+
+        await Clients.Client(connectionId).SendAsync("CloseAudioStreamResponse", AudioData[connectionId].Length);
+
+        // TODO: Generate Response 
+    }
+}
diff --git a/Program.cs → src/Backend2023/Program.cs b/Program.cs → src/Backend2023/Program.cs
@@ -1,3 +1,5 @@
+using Backend2023.Hubs;
+
 var builder = WebApplication.CreateBuilder(args);
 
 // Add services to the container.
@@ -6,6 +8,7 @@
 // Learn more about configuring Swagger/OpenAPI at https://aka.ms/aspnetcore/swashbuckle
 builder.Services.AddEndpointsApiExplorer();
 builder.Services.AddSwaggerGen();
+builder.Services.AddSignalR();
 
 var app = builder.Build();
 
@@ -16,10 +19,14 @@
     app.UseSwaggerUI();
 }
 
-app.UseHttpsRedirection();
-
-app.UseAuthorization();
+app.UseCors(x =>
+    x.AllowAnyMethod()
+        .WithOrigins("http://localhost:5173")
+        .AllowAnyHeader());
 
+app.UseRouting();
 app.MapControllers();
 
+app.UseEndpoints(endpoints => { endpoints.MapHub<AudioHub>("/audiohub"); });
+
 app.Run();
diff --git a/Properties/launchSettings.json → ...ackend2023/Properties/launchSettings.json b/Properties/launchSettings.json → ...ackend2023/Properties/launchSettings.json
diff --git a/WeatherForecast.cs → src/Backend2023/WeatherForecast.cs b/WeatherForecast.cs → src/Backend2023/WeatherForecast.cs
diff --git a/appsettings.Development.json → src/Backend2023/appsettings.Development.json b/appsettings.Development.json → src/Backend2023/appsettings.Development.json
diff --git a/appsettings.json → src/Backend2023/appsettings.json b/appsettings.json → src/Backend2023/appsettings.json
diff --git a/tests/Backend2023.Cognitive.Tests/Backend2023.Cognitive.Tests.csproj b/tests/Backend2023.Cognitive.Tests/Backend2023.Cognitive.Tests.csproj
@@ -0,0 +1,29 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>net7.0</TargetFramework>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+
+    <IsPackable>false</IsPackable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="FluentAssertions" Version="6.12.0" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.3.2" />
+    <PackageReference Include="xunit" Version="2.4.2" />
+    <PackageReference Include="xunit.runner.visualstudio" Version="2.4.5">
+      <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
+      <PrivateAssets>all</PrivateAssets>
+    </PackageReference>
+    <PackageReference Include="coverlet.collector" Version="3.1.2">
+      <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
+      <PrivateAssets>all</PrivateAssets>
+    </PackageReference>
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\src\Backend2023.Cognitive\Backend2023.Cognitive.csproj" />
+  </ItemGroup>
+
+</Project>
diff --git a/tests/Backend2023.Cognitive.Tests/TextToSpeechServiceProviderFixture.cs b/tests/Backend2023.Cognitive.Tests/TextToSpeechServiceProviderFixture.cs
@@ -0,0 +1,47 @@
+using FluentAssertions;
+
+namespace Backend2023.Cognitive.Tests
+{
+    public class TextToSpeechServiceProviderFixture
+    {
+        private readonly AzureConfiguration _azureConfiguration = new()
+        {
+            ServiceRegion = "westeurope",
+            //SubscriptionKey = "67ff4bd3-1dcc-44ae-80a8-65b1251fbd2b"
+            SubscriptionKey = "098a9c0a3b1648ffb4ae57288c58d827"
+        };
+
+        private SpeechServiceProvider _speechServiceProvider;
+
+        public TextToSpeechServiceProviderFixture()
+        {
+            _speechServiceProvider = new SpeechServiceProvider(_azureConfiguration);
+        }
+
+        [Fact]
+        public async Task Should_GenerateSpeech()
+        {
+            // Arrange
+            TextToSpeedRequest request = new TextToSpeedRequest("Ich bin die Leni, und ich mag es durch den Regen zu tanzen.");
+
+            // Act
+            byte[] result = await _speechServiceProvider.TextToAudioByteArray(request);
+
+            // Assert
+            result.Should().NotBeNull();
+        }
+
+
+        [Fact]
+        public async Task Should_GenerateText()
+        {
+            // Arrange
+
+            // Act
+            string result = await _speechServiceProvider.AudioToText(new SpeechToTextRequest(null!));
+
+            // Assert
+            result.Should().NotBeNull();
+        }
+    }
+}
diff --git a/tests/Backend2023.Cognitive.Tests/Usings.cs b/tests/Backend2023.Cognitive.Tests/Usings.cs
@@ -0,0 +1 @@
+global using Xunit;