-
-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement support for self-hosted and local LLMs (#20)
- Loading branch information
1 parent
6cc1d37
commit 2926366
Showing
21 changed files
with
408 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
namespace AIStudio.Provider.SelfHosted; | ||
|
||
/// <summary> | ||
/// The chat request model. | ||
/// </summary> | ||
/// <param name="Model">Which model to use for chat completion.</param> | ||
/// <param name="Messages">The chat messages.</param> | ||
/// <param name="Stream">Whether to stream the chat completion.</param> | ||
/// <param name="MaxTokens">The maximum number of tokens to generate.</param> | ||
public readonly record struct ChatRequest( | ||
string Model, | ||
IList<Message> Messages, | ||
bool Stream, | ||
|
||
int MaxTokens | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
namespace AIStudio.Provider.SelfHosted; | ||
|
||
/// <summary> | ||
/// Chat message model. | ||
/// </summary> | ||
/// <param name="Content">The text content of the message.</param> | ||
/// <param name="Role">The role of the message.</param> | ||
public readonly record struct Message(string Content, string Role); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
namespace AIStudio.Provider.SelfHosted; | ||
|
||
public readonly record struct ModelsResponse(string Object, Model[] Data); | ||
|
||
public readonly record struct Model(string Id, string Object, string OwnedBy); |
162 changes: 162 additions & 0 deletions
162
app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
using System.Runtime.CompilerServices; | ||
using System.Text; | ||
using System.Text.Json; | ||
|
||
using AIStudio.Chat; | ||
using AIStudio.Provider.OpenAI; | ||
using AIStudio.Settings; | ||
|
||
namespace AIStudio.Provider.SelfHosted; | ||
|
||
public sealed class ProviderSelfHosted(string hostname) : BaseProvider($"{hostname}/v1/"), IProvider | ||
{ | ||
private static readonly JsonSerializerOptions JSON_SERIALIZER_OPTIONS = new() | ||
{ | ||
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower, | ||
}; | ||
|
||
#region Implementation of IProvider | ||
|
||
public string Id => "Self-hosted"; | ||
|
||
public string InstanceName { get; set; } = "Self-hosted"; | ||
|
||
public async IAsyncEnumerable<string> StreamChatCompletion(IJSRuntime jsRuntime, SettingsManager settings, Provider.Model chatModel, ChatThread chatThread, [EnumeratorCancellation] CancellationToken token = default) | ||
{ | ||
// Prepare the system prompt: | ||
var systemPrompt = new Message | ||
{ | ||
Role = "system", | ||
Content = chatThread.SystemPrompt, | ||
}; | ||
|
||
// Prepare the OpenAI HTTP chat request: | ||
var providerChatRequest = JsonSerializer.Serialize(new ChatRequest | ||
{ | ||
Model = (await this.GetTextModels(jsRuntime, settings, token: token)).First().Id, | ||
|
||
// Build the messages: | ||
// - First of all the system prompt | ||
// - Then none-empty user and AI messages | ||
Messages = [systemPrompt, ..chatThread.Blocks.Where(n => n.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace((n.Content as ContentText)?.Text)).Select(n => new Message | ||
{ | ||
Role = n.Role switch | ||
{ | ||
ChatRole.USER => "user", | ||
ChatRole.AI => "assistant", | ||
ChatRole.SYSTEM => "system", | ||
_ => "user", | ||
}, | ||
Content = n.Content switch | ||
{ | ||
ContentText text => text.Text, | ||
_ => string.Empty, | ||
} | ||
}).ToList()], | ||
|
||
// Right now, we only support streaming completions: | ||
Stream = true, | ||
MaxTokens = -1, | ||
}, JSON_SERIALIZER_OPTIONS); | ||
|
||
// Build the HTTP post request: | ||
var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions"); | ||
|
||
// Set the content: | ||
request.Content = new StringContent(providerChatRequest, Encoding.UTF8, "application/json"); | ||
|
||
// Send the request with the ResponseHeadersRead option. | ||
// This allows us to read the stream as soon as the headers are received. | ||
// This is important because we want to stream the responses. | ||
var response = await this.httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, token); | ||
|
||
// Open the response stream: | ||
var providerStream = await response.Content.ReadAsStreamAsync(token); | ||
|
||
// Add a stream reader to read the stream, line by line: | ||
var streamReader = new StreamReader(providerStream); | ||
|
||
// Read the stream, line by line: | ||
while(!streamReader.EndOfStream) | ||
{ | ||
// Check if the token is cancelled: | ||
if(token.IsCancellationRequested) | ||
yield break; | ||
|
||
// Read the next line: | ||
var line = await streamReader.ReadLineAsync(token); | ||
|
||
// Skip empty lines: | ||
if(string.IsNullOrWhiteSpace(line)) | ||
continue; | ||
|
||
// Skip lines that do not start with "data: ". Regard | ||
// to the specification, we only want to read the data lines: | ||
if(!line.StartsWith("data: ", StringComparison.InvariantCulture)) | ||
continue; | ||
|
||
// Check if the line is the end of the stream: | ||
if (line.StartsWith("data: [DONE]", StringComparison.InvariantCulture)) | ||
yield break; | ||
|
||
ResponseStreamLine providerResponse; | ||
try | ||
{ | ||
// We know that the line starts with "data: ". Hence, we can | ||
// skip the first 6 characters to get the JSON data after that. | ||
var jsonData = line[6..]; | ||
|
||
// Deserialize the JSON data: | ||
providerResponse = JsonSerializer.Deserialize<ResponseStreamLine>(jsonData, JSON_SERIALIZER_OPTIONS); | ||
} | ||
catch | ||
{ | ||
// Skip invalid JSON data: | ||
continue; | ||
} | ||
|
||
// Skip empty responses: | ||
if(providerResponse == default || providerResponse.Choices.Count == 0) | ||
continue; | ||
|
||
// Yield the response: | ||
yield return providerResponse.Choices[0].Delta.Content; | ||
} | ||
} | ||
|
||
#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously | ||
/// <inheritdoc /> | ||
public async IAsyncEnumerable<ImageURL> StreamImageCompletion(IJSRuntime jsRuntime, SettingsManager settings, Provider.Model imageModel, string promptPositive, string promptNegative = FilterOperator.String.Empty, ImageURL referenceImageURL = default, [EnumeratorCancellation] CancellationToken token = default) | ||
{ | ||
yield break; | ||
} | ||
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously | ||
|
||
|
||
public async Task<IEnumerable<Provider.Model>> GetTextModels(IJSRuntime jsRuntime, SettingsManager settings, string? apiKeyProvisional = null, CancellationToken token = default) | ||
{ | ||
var request = new HttpRequestMessage(HttpMethod.Get, "models"); | ||
var response = await this.httpClient.SendAsync(request, token); | ||
if(!response.IsSuccessStatusCode) | ||
return []; | ||
|
||
var modelResponse = await response.Content.ReadFromJsonAsync<ModelsResponse>(token); | ||
if (modelResponse.Data.Length > 1) | ||
Console.WriteLine("Warning: multiple models found; using the first one."); | ||
|
||
var firstModel = modelResponse.Data.First(); | ||
return [ new Provider.Model(firstModel.Id) ]; | ||
} | ||
|
||
#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously | ||
/// <inheritdoc /> | ||
public Task<IEnumerable<Provider.Model>> GetImageModels(IJSRuntime jsRuntime, SettingsManager settings, string? apiKeyProvisional = null, CancellationToken token = default) | ||
{ | ||
return Task.FromResult(Enumerable.Empty<Provider.Model>()); | ||
} | ||
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously | ||
|
||
#endregion | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.