From 692c7b0f34a4d855ae38723817e3b21572996fa8 Mon Sep 17 00:00:00 2001 From: j-mendez Date: Sat, 26 Oct 2024 07:02:35 -0400 Subject: [PATCH] chore(openai): add json_structure handling js browser events --- Cargo.lock | 12 ++++---- spider/Cargo.toml | 2 +- spider/src/utils/mod.rs | 50 +++++++++++++++++++++---------- spider_chrome/Cargo.toml | 2 +- spider_cli/Cargo.toml | 2 +- spider_transformations/Cargo.toml | 2 +- spider_utils/Cargo.toml | 2 +- spider_worker/Cargo.toml | 2 +- 8 files changed, 47 insertions(+), 27 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 88c6d9239..1baf06df0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3909,7 +3909,7 @@ dependencies = [ [[package]] name = "spider" -version = "2.10.26" +version = "2.10.27" dependencies = [ "ahash", "async-openai", @@ -3970,7 +3970,7 @@ dependencies = [ [[package]] name = "spider_chrome" -version = "2.10.26" +version = "2.10.27" dependencies = [ "adblock", "async-tungstenite", @@ -4005,7 +4005,7 @@ dependencies = [ [[package]] name = "spider_cli" -version = "2.10.26" +version = "2.10.27" dependencies = [ "clap", "env_logger", @@ -4029,7 +4029,7 @@ dependencies = [ [[package]] name = "spider_transformations" -version = "2.10.26" +version = "2.10.27" dependencies = [ "aho-corasick", "fast_html2md", @@ -4051,7 +4051,7 @@ dependencies = [ [[package]] name = "spider_utils" -version = "2.10.26" +version = "2.10.27" dependencies = [ "indexmap 1.9.3", "serde", @@ -4063,7 +4063,7 @@ dependencies = [ [[package]] name = "spider_worker" -version = "2.10.26" +version = "2.10.27" dependencies = [ "env_logger", "lazy_static", diff --git a/spider/Cargo.toml b/spider/Cargo.toml index 7c8ff92d6..ea4839558 100644 --- a/spider/Cargo.toml +++ b/spider/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider" -version = "2.10.26" +version = "2.10.27" authors = [ "j-mendez " ] diff --git a/spider/src/utils/mod.rs b/spider/src/utils/mod.rs index b83d83f99..ed4c37e52 100644 --- a/spider/src/utils/mod.rs +++ b/spider/src/utils/mod.rs @@ -1974,23 +1974,43 @@ pub async fn openai_request_base( let mut tokens_used = crate::features::openai_common::OpenAIUsage::default(); let json_mode = gpt_configs.extra_ai_data; - let response_format = match gpt_configs.json_schema { - Some(ref structure) => async_openai::types::ResponseFormat::JsonSchema { - json_schema: async_openai::types::ResponseFormatJsonSchema { - description: structure.description.clone(), - name: structure.name.clone(), - schema: serde_json::from_str(&structure.schema.clone().unwrap_or_default()) - .unwrap_or_default(), - strict: structure.strict, - }, - }, - _ => { - if json_mode { - async_openai::types::ResponseFormat::JsonObject - } else { - async_openai::types::ResponseFormat::Text + let response_format = { + let mut mode = if json_mode { + async_openai::types::ResponseFormat::JsonObject + } else { + async_openai::types::ResponseFormat::Text + }; + + if let Some(ref structure) = gpt_configs.json_schema { + if let Some(ref schema) = structure.schema { + if let Ok(mut schema) = serde_json::from_str::(&schema) { + if json_mode { + // Insert the "js" property into the schema's properties. Todo: capture if the js property exist and re-word prompt to match new js property with after removal. + if let Some(properties) = schema.get_mut("properties") { + if let Some(properties_map) = properties.as_object_mut() { + properties_map.insert( + "js".to_string(), + serde_json::json!({ + "type": "string" + }), + ); + } + } + } + + mode = async_openai::types::ResponseFormat::JsonSchema { + json_schema: async_openai::types::ResponseFormatJsonSchema { + description: structure.description.clone(), + name: structure.name.clone(), + schema: if schema.is_null() { None } else { Some(schema) }, + strict: structure.strict, + }, + } + } } } + + mode }; match async_openai::types::ChatCompletionRequestAssistantMessageArgs::default() diff --git a/spider_chrome/Cargo.toml b/spider_chrome/Cargo.toml index b80929acb..181d61602 100644 --- a/spider_chrome/Cargo.toml +++ b/spider_chrome/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_chrome" -version = "2.10.26" +version = "2.10.27" rust-version = "1.70" authors = [ "j-mendez " diff --git a/spider_cli/Cargo.toml b/spider_cli/Cargo.toml index 0416c68d1..2e970af0b 100644 --- a/spider_cli/Cargo.toml +++ b/spider_cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_cli" -version = "2.10.26" +version = "2.10.27" authors = [ "j-mendez " ] diff --git a/spider_transformations/Cargo.toml b/spider_transformations/Cargo.toml index cc02f2fbb..6086bdf21 100644 --- a/spider_transformations/Cargo.toml +++ b/spider_transformations/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_transformations" -version = "2.10.26" +version = "2.10.27" authors = [ "j-mendez " ] diff --git a/spider_utils/Cargo.toml b/spider_utils/Cargo.toml index aca8f627e..2cdb33482 100644 --- a/spider_utils/Cargo.toml +++ b/spider_utils/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_utils" -version = "2.10.26" +version = "2.10.27" authors = [ "j-mendez " ] diff --git a/spider_worker/Cargo.toml b/spider_worker/Cargo.toml index e6330ddf2..061f84e6f 100644 --- a/spider_worker/Cargo.toml +++ b/spider_worker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_worker" -version = "2.10.26" +version = "2.10.27" authors = [ "j-mendez " ]