Skip to content

Commit

Permalink
feat: do text-speech-text and signalR.
Browse files Browse the repository at this point in the history
  • Loading branch information
seilerch committed Aug 25, 2023
1 parent 5927f70 commit 47082dd
Show file tree
Hide file tree
Showing 20 changed files with 256 additions and 4 deletions.
19 changes: 18 additions & 1 deletion Backend2023.sln
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,13 @@ Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.7.34018.315
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Backend2023", "Backend2023.csproj", "{668AA7FE-483A-4F6C-9688-DC87CDC9162E}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Backend2023", "src\Backend2023\Backend2023.csproj", "{668AA7FE-483A-4F6C-9688-DC87CDC9162E}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Backend2023.Cognitive", "src\Backend2023.Cognitive\Backend2023.Cognitive.csproj", "{B7C8A821-7D5E-4AC1-A882-81AA6DC119DA}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{ACFF960F-A8F9-447E-8AB9-5477D6819795}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Backend2023.Cognitive.Tests", "tests\Backend2023.Cognitive.Tests\Backend2023.Cognitive.Tests.csproj", "{5D56ED88-9E95-47AF-9ED8-2E01A988D01D}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand All @@ -15,10 +21,21 @@ Global
{668AA7FE-483A-4F6C-9688-DC87CDC9162E}.Debug|Any CPU.Build.0 = Debug|Any CPU
{668AA7FE-483A-4F6C-9688-DC87CDC9162E}.Release|Any CPU.ActiveCfg = Release|Any CPU
{668AA7FE-483A-4F6C-9688-DC87CDC9162E}.Release|Any CPU.Build.0 = Release|Any CPU
{B7C8A821-7D5E-4AC1-A882-81AA6DC119DA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{B7C8A821-7D5E-4AC1-A882-81AA6DC119DA}.Debug|Any CPU.Build.0 = Debug|Any CPU
{B7C8A821-7D5E-4AC1-A882-81AA6DC119DA}.Release|Any CPU.ActiveCfg = Release|Any CPU
{B7C8A821-7D5E-4AC1-A882-81AA6DC119DA}.Release|Any CPU.Build.0 = Release|Any CPU
{5D56ED88-9E95-47AF-9ED8-2E01A988D01D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{5D56ED88-9E95-47AF-9ED8-2E01A988D01D}.Debug|Any CPU.Build.0 = Debug|Any CPU
{5D56ED88-9E95-47AF-9ED8-2E01A988D01D}.Release|Any CPU.ActiveCfg = Release|Any CPU
{5D56ED88-9E95-47AF-9ED8-2E01A988D01D}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(NestedProjects) = preSolution
{5D56ED88-9E95-47AF-9ED8-2E01A988D01D} = {ACFF960F-A8F9-447E-8AB9-5477D6819795}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {BD6F36C8-DEB2-494D-A29E-F7D0866F76AF}
EndGlobalSection
Expand Down
8 changes: 8 additions & 0 deletions src/Backend2023.Cognitive/AzureConfiguration.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
namespace Backend2023.Cognitive;

public class AzureConfiguration
{
public string SubscriptionKey { get; init; } = null!;

public string ServiceRegion { get; init; } = null!;
}
13 changes: 13 additions & 0 deletions src/Backend2023.Cognitive/Backend2023.Cognitive.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net7.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.CognitiveServices.Speech" Version="1.31.0" />
</ItemGroup>

</Project>
3 changes: 3 additions & 0 deletions src/Backend2023.Cognitive/SpeechRequest.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
namespace Backend2023.Cognitive;

public abstract record SpeechRequest(string Language, string Voice);
70 changes: 70 additions & 0 deletions src/Backend2023.Cognitive/SpeechServiceProvider.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;

namespace Backend2023.Cognitive;

public class SpeechServiceProvider
{
private readonly AzureConfiguration _azureConfiguration;

public SpeechServiceProvider(AzureConfiguration azureConfiguration)
{
_azureConfiguration = azureConfiguration;
}

public async Task<byte[]> TextToAudioByteArray(TextToSpeedRequest textToSpeedRequest)
{
using var result = await Synthesize(textToSpeedRequest);
if (result.Reason == ResultReason.Canceled)
{
var cancellation = SpeechSynthesisCancellationDetails.FromResult(result);
throw new InvalidOperationException($"Cancelled(Error:{cancellation.ErrorCode},Details:{cancellation.ErrorDetails}");
}

using AudioDataStream audioStream = AudioDataStream.FromResult(result);
return result.AudioData;
}

public async Task<string> AudioToText(SpeechToTextRequest request)
{
AudioConfig config = AudioConfig.FromWavFileInput("output.wav");
SpeechRecognizer recognizer = CreateRecognizer(request, config);
SpeechRecognitionResult? result = await recognizer.RecognizeOnceAsync();

if (result.Reason == ResultReason.Canceled)
{
var cancellation = CancellationDetails.FromResult(result);
Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");

if (cancellation.Reason == CancellationReason.Error)
{
Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}");
}
}

return result.Text;
}

private SpeechConfig CreateSpeechConfig(SpeechRequest speechRequest)
{
SpeechConfig speechConfig = SpeechConfig.FromSubscription(_azureConfiguration.SubscriptionKey, _azureConfiguration.ServiceRegion);
speechConfig.SetProxy("localhost", 3128);
speechConfig.SpeechSynthesisLanguage = speechRequest.Language;
speechConfig.SpeechRecognitionLanguage = speechRequest.Language;
speechConfig.SpeechSynthesisVoiceName = speechRequest.Voice;

return speechConfig;
}

private SpeechSynthesizer CreateSynthesizer(TextToSpeedRequest request)
=> new(CreateSpeechConfig(request));

private SpeechRecognizer CreateRecognizer(SpeechToTextRequest request, AudioConfig audioConfig)
=> new(CreateSpeechConfig(request), audioConfig);

private Task<SpeechSynthesisResult> Synthesize(TextToSpeedRequest textToSpeedRequest)
=> textToSpeedRequest.IsSpeechSynthesisMarkupLanguage
? CreateSynthesizer(textToSpeedRequest).SpeakSsmlAsync(textToSpeedRequest.Text)
: CreateSynthesizer(textToSpeedRequest).SpeakTextAsync(textToSpeedRequest.Text);
}
4 changes: 4 additions & 0 deletions src/Backend2023.Cognitive/SpeechToTextRequest.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
namespace Backend2023.Cognitive;

public record SpeechToTextRequest(Stream AudioStream, string Language = "de-CH", string Voice = "de-CH-LeniNeural")
: SpeechRequest(Language, Voice);
4 changes: 4 additions & 0 deletions src/Backend2023.Cognitive/TextToSpeedRequest.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
namespace Backend2023.Cognitive;

public record TextToSpeedRequest(string Text, string Language = "de-CH", string Voice = "de-CH-LeniNeural", bool IsSpeechSynthesisMarkupLanguage = false)
: SpeechRequest(Language, Voice);
File renamed without changes.
1 change: 1 addition & 0 deletions Backend2023.csproj → src/Backend2023/Backend2023.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.AspNet.SignalR.Core" Version="2.4.3" />
<PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="7.0.10" />
<PackageReference Include="Microsoft.VisualStudio.Azure.Containers.Tools.Targets" Version="1.19.4" />
<PackageReference Include="Swashbuckle.AspNetCore" Version="6.5.0" />
Expand Down
File renamed without changes.
File renamed without changes.
48 changes: 48 additions & 0 deletions src/Backend2023/Hubs/AudioHub.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
using Microsoft.AspNetCore.SignalR;

namespace Backend2023.Hubs;

public class AudioHub : Hub
{
private const int WAVHeaderSize = 44;

// Dictionary to hold audio data for each client
private static readonly Dictionary<string, MemoryStream> AudioData = new();

/// <summary>
/// Stream uploaded audio chunks with a single WAV header to a memory stream.
/// </summary>
/// <param name="audioDataChunk">Audio chunk with a WAV Header of <see cref="WAVHeaderSize"/>.</param>
/// <returns>Upload Task.</returns>
public async Task TransmitUserAudio(byte[] audioDataChunk)
{
var connectionId = Context.ConnectionId;
if (!AudioData.ContainsKey(connectionId))
{
AudioData[connectionId] = new MemoryStream();
await AudioData[connectionId].WriteAsync(audioDataChunk);
}
else
{
await AudioData[connectionId].WriteAsync(audioDataChunk, WAVHeaderSize, audioDataChunk.Length - WAVHeaderSize);
}
}

/// <summary>
/// Closes the uploaded audio stream after verifying the completion.
/// </summary>
/// <returns>Completion Task.</returns>
public async Task CloseAudioStream()
{
var connectionId = Context.ConnectionId;
if (!AudioData.ContainsKey(connectionId))
{
await Clients.Client(connectionId).SendAsync("CloseAudioStreamResponse", 0);
return;
}

await Clients.Client(connectionId).SendAsync("CloseAudioStreamResponse", AudioData[connectionId].Length);

// TODO: Generate Response
}
}
13 changes: 10 additions & 3 deletions Program.cs → src/Backend2023/Program.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using Backend2023.Hubs;

var builder = WebApplication.CreateBuilder(args);

// Add services to the container.
Expand All @@ -6,6 +8,7 @@
// Learn more about configuring Swagger/OpenAPI at https://aka.ms/aspnetcore/swashbuckle
builder.Services.AddEndpointsApiExplorer();
builder.Services.AddSwaggerGen();
builder.Services.AddSignalR();

var app = builder.Build();

Expand All @@ -16,10 +19,14 @@
app.UseSwaggerUI();
}

app.UseHttpsRedirection();

app.UseAuthorization();
app.UseCors(x =>
x.AllowAnyMethod()
.WithOrigins("http://localhost:5173")
.AllowAnyHeader());

app.UseRouting();
app.MapControllers();

app.UseEndpoints(endpoints => { endpoints.MapHub<AudioHub>("/audiohub"); });

app.Run();
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net7.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>

<IsPackable>false</IsPackable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="FluentAssertions" Version="6.12.0" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.3.2" />
<PackageReference Include="xunit" Version="2.4.2" />
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.5">
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
<PrivateAssets>all</PrivateAssets>
</PackageReference>
<PackageReference Include="coverlet.collector" Version="3.1.2">
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
<PrivateAssets>all</PrivateAssets>
</PackageReference>
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\..\src\Backend2023.Cognitive\Backend2023.Cognitive.csproj" />
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
using FluentAssertions;

namespace Backend2023.Cognitive.Tests
{
public class TextToSpeechServiceProviderFixture
{
private readonly AzureConfiguration _azureConfiguration = new()
{
ServiceRegion = "westeurope",
//SubscriptionKey = "67ff4bd3-1dcc-44ae-80a8-65b1251fbd2b"
SubscriptionKey = "098a9c0a3b1648ffb4ae57288c58d827"
};

private SpeechServiceProvider _speechServiceProvider;

public TextToSpeechServiceProviderFixture()
{
_speechServiceProvider = new SpeechServiceProvider(_azureConfiguration);
}

[Fact]
public async Task Should_GenerateSpeech()
{
// Arrange
TextToSpeedRequest request = new TextToSpeedRequest("Ich bin die Leni, und ich mag es durch den Regen zu tanzen.");

// Act
byte[] result = await _speechServiceProvider.TextToAudioByteArray(request);

// Assert
result.Should().NotBeNull();
}


[Fact]
public async Task Should_GenerateText()
{
// Arrange

// Act
string result = await _speechServiceProvider.AudioToText(new SpeechToTextRequest(null!));

// Assert
result.Should().NotBeNull();
}
}
}
1 change: 1 addition & 0 deletions tests/Backend2023.Cognitive.Tests/Usings.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
global using Xunit;

0 comments on commit 47082dd

Please sign in to comment.