Skip to content

Commit

Permalink
Summarize messages and images using gpt-4o-mini
Browse files Browse the repository at this point in the history
  • Loading branch information
kiliankoe committed Oct 25, 2024
1 parent 01ffaae commit c773a1b
Show file tree
Hide file tree
Showing 5 changed files with 242 additions and 54 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
/*.xcodeproj
xcuserdata/
.env.local
.vscode
203 changes: 171 additions & 32 deletions Sources/SwiftDEBot/Command/Message Commands/Summarize.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,73 @@ import Foundation
import DiscordBM

struct SummarizeCommand: MessageCommand {
let helpText = "`!summarize`: Fasse einen Link im Reply zusammen."
let helpText = "`!summarize`: Fasse einen Link, eine Nachricht oder Bilder zusammen."

func run(client: DiscordClient, message: Gateway.MessageCreate) async throws {
guard message.content == "!summarize" else { return }

guard let replyContent = message.referenced_message?.value.content else {
try await client.send(
"Schicke bitte `!summarize` als Reply auf eine Nachricht mit einem Link.",
to: message.channel_id
)
return
}
guard let url = replyContent.firstURL else {
try await client.send(
"In der referenzierten Nachricht sehe ich leider keine URL 🤨",
to: message.channel_id
)
return
try await client.setTyping(in: message.channel_id)

do {
let summary = try await generateSummary(for: message)
for chunk in summary.maxMessageLengthChunks() {
try await client.send(chunk, to: message.channel_id)
}
} catch let error as SummaryError {
do {
switch error {
case .contentTooShort:
try await client.send(
"Srsly? Das fasse ich nicht zusammen. Lies das gefälligst selbst.",
to: message.channel_id
)
case .noReply:
try await client.send(
"Schicke bitte `!summarize` als Reply auf eine Nachricht.",
to: message.channel_id
)
case .brokenURL, .missingKagiToken, .missingOpenAIToken:
try await client.send(
"💀 \(error)",
to: message.channel_id
)
case .emptySummary:
try await client.send(
"Das kann ich leider nicht zusammenfassen 🫥: \(error)",
to: message.channel_id
)
}
} catch {
log.error("Failure to send message: \(error)")
}
} catch let error {
log.error("Failure to send message: \(error)")
}
log.info("Summarizing \(url)")
}

try await client.setTyping(in: message.channel_id)
func generateSummary(for message: Gateway.MessageCreate) async throws -> String {
guard let referencedMessage = message.referenced_message?.value else {
throw SummaryError.noReply
}
var summary: String
if let url = referencedMessage.content.firstURL {
summary = try await summarize(url: url)
} else {
summary = try await summarize(messageContent: referencedMessage)
}
return summary
}

func summarize(url: String) async throws -> String {
log.info("Summarizing \(url)")
guard let encodedURL = url.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed) else {
log.error("Unable to URL encode \(url)")
return
throw SummaryError.brokenURL
}

guard let apiToken = ProcessInfo.processInfo.environment["KAGI_API_TOKEN"] else {
log.error("Necessary env var not found, please set KAGI_API_TOKEN.")
return
throw SummaryError.missingKagiToken
}

let response = try await httpClient.get(
Expand All @@ -41,20 +77,51 @@ struct SummarizeCommand: MessageCommand {
response: KagiResponse.self
)

let summary = response.data.output
if response.data.output.isEmpty {
throw SummaryError.emptySummary
}

guard !summary.isEmpty else {
try await client.send(
"Das kann ich leider nicht zusammenfassen 🫥",
to: message.channel_id
)
return
return response.data.output
}

func summarize(messageContent: Gateway.MessageCreate) async throws -> String {
log.info("Summarizing message from \(messageContent.author?.mentionHandle ?? "unknown user")")

guard let apiToken = ProcessInfo.processInfo.environment["OPENAI_API_TOKEN"] else {
log.error("Necessary env var not found, please set OPENAI_API_TOKEN.")
throw SummaryError.missingOpenAIToken
}

print("Message Length: \(messageContent.content.count)")
print("Has Attachments: \(!messageContent.attachments.isEmpty)")
guard messageContent.content.count >= 500 || !messageContent.attachments.isEmpty else {
throw SummaryError.contentTooShort
}

try await client.send(summary, to: message.channel_id)
let response = try await httpClient.post(
"https://api.openai.com/v1/chat/completions",
headers: ["Authorization": "Bearer \(apiToken)"],
body: OpenAIRequest(message: messageContent),
response: OpenAIResponse.self
)

guard let summary = response.choices.first else {
throw SummaryError.emptySummary
}

return summary.message.content
}
}

enum SummaryError: Error {
case contentTooShort
case noReply
case brokenURL
case missingKagiToken
case missingOpenAIToken
case emptySummary
}

private extension String {
var firstURL: String? {
if let found = self.firstMatch(of: #/(https?://\S+)/#) {
Expand All @@ -64,10 +131,82 @@ private extension String {
}
}

private struct KagiResponse: Decodable {
let data: ResponseData
private struct KagiResponse: Decodable {
let data: ResponseData

struct ResponseData: Decodable {
let output: String
}
}

struct OpenAIResponse: Decodable {
let choices: [Choice]

struct Choice: Decodable {
let message: Message
let finish_reason: String

struct Message: Decodable {
let role, content: String
}
}
}

private struct OpenAIRequest: Encodable {
let model: String
let messages: [Message]
let temperature: Double

init(message: Gateway.MessageCreate) {
self.model = "gpt-4o-mini"

var content = message.attachments.compactMap { attachment -> Message.ContentUnion.ContentElement? in
guard let contentType = attachment.content_type, contentType.starts(with: "image/") else { return nil }
return .init(type: "image_url", text: nil, image_url: .init(url: attachment.url))
}

struct ResponseData: Decodable {
let output: String
}
}
if !message.content.isEmpty {
content.append(.init(type: "text", text: message.content, image_url: nil))
}

self.messages = [
.init(
role: "system",
content: .string("Du bist ein Assistent, der Text und Bildinhalte zusammenfasst. Bitte schreibe eine kurze Zusammenfassung für die folgende Nachricht, bestehend entweder aus Text, Bild oder beidem. Falls beides, dann bezieht sich der Text zwar vermutlich auf das Bild, die Zusammenfassung des Bildinhalts hat aber Vorrang.")
),
.init(role: "user", content: .contentElementArray(content))
]
self.temperature = 0.7
}

struct Message: Encodable {
let role: String
let content: ContentUnion

enum ContentUnion: Encodable {
case contentElementArray([ContentElement])
case string(String)

struct ContentElement: Encodable {
// This could likely be more sensibly represented as a union type, but eh... 🤷
let type: String
let text: String?
let image_url: ImageURL?

struct ImageURL: Encodable {
let url: String
}
}

func encode(to encoder: Encoder) throws {
var container = encoder.singleValueContainer()
switch self {
case .string(let stringValue):
try container.encode(stringValue)
case .contentElementArray(let contentElements):
try container.encode(contentElements)
}
}
}
}
}
22 changes: 0 additions & 22 deletions Sources/SwiftDEBot/Utils/HTTPClient+get.swift

This file was deleted.

51 changes: 51 additions & 0 deletions Sources/SwiftDEBot/Utils/HTTPClient+parsed.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import AsyncHTTPClient
import Foundation
import NIO

extension HTTPClient {
func get<Response>(
_ url: String,
headers: [String: String]? = nil,
response: Response.Type
) async throws -> Response where Response: Decodable {
var request = HTTPClientRequest(url: url)
if let headers {
request.headers = .init(headers.map{ ($0.key, $0.value) })
}
let response = try await self.execute(request, timeout: .seconds(60))
if response.status == .ok {
let body = try await response.body.collect(upTo: 50 * 1024 * 1024) // 50 MB
return try JSONDecoder().decode(Response.self, from: body)
} else {
throw response.status.description
}
}

func post<Response>(
_ url: String,
headers: [String: String]? = nil,
body: (any Encodable)?,
response: Response.Type
) async throws -> Response where Response: Decodable {
var request = HTTPClientRequest(url: url)
request.method = .POST
if let headers {
request.headers = .init(headers.map{ ($0.key, $0.value) })
}
request.headers.add(name: "Content-Type", value: "application/json")
if let body {
let data = try JSONEncoder().encode(body)
print(String(data: data, encoding: .utf8)!)
var buffer = ByteBufferAllocator().buffer(capacity: data.count)
buffer.writeBytes(data)
request.body = .bytes(buffer)
}
let response = try await self.execute(request, timeout: .seconds(60))
if response.status == .ok {
let body = try await response.body.collect(upTo: 50 * 1024 * 1024) // 50 MB
return try JSONDecoder().decode(Response.self, from: body)
} else {
throw response.status.description
}
}
}
19 changes: 19 additions & 0 deletions Sources/SwiftDEBot/Utils/String.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,23 @@ extension String {
.joined(separator: " ")
.trimmingCharacters(in: .whitespacesAndNewlines)
}

func maxMessageLengthChunks(maxChars: Int = 2000) -> [String] {
var chunks: [String] = []
var currentChunk = ""

let words = self.components(separatedBy: " ")
for word in words {
if currentChunk.count + word.count + 1 > maxChars {
// chunk is full
chunks.append(currentChunk)
currentChunk = ""
}
currentChunk += " " + word
}
if !currentChunk.isEmpty {
chunks.append(currentChunk)
}
return chunks
}
}

0 comments on commit c773a1b

Please sign in to comment.