support stable diffusion plugin (mlc-ai#49)

slash-under · Oct 17, 2023 · 516f3f4 · 516f3f4
1 parent 3454ea3
commit 516f3f4
Show file tree

Hide file tree

Showing 14 changed files with 269 additions and 47 deletions.
diff --git a/README.md b/README.md
@@ -41,18 +41,18 @@
     - [SerpAPI](https://js.langchain.com/docs/api/tools/classes/SerpAPI)
     - [BingSerpAPI](https://js.langchain.com/docs/api/tools/classes/BingSerpAPI)
     - DuckDuckGo
-
   - 计算
     - [Calculator](https://js.langchain.com/docs/api/tools_calculator/classes/Calculator)
-
   - 网络请求
     - [WebBrowser](https://js.langchain.com/docs/api/tools_webbrowser/classes/WebBrowser)
-
   - 其它
     - [Wiki](https://js.langchain.com/docs/api/tools/classes/WikipediaQueryRun)
     - DALL-E
       - DALL-E 插件需要配置 R2 存储，请参考 [Cloudflare R2 服务配置指南](./docs/cloudflare-r2-cn.md) 配置
-      - ~只支持非 Cloudflare 环境的部署方式，在 Cloudflare 下该插件会失效 https://github.com/Hk-Gosuto/ChatGPT-Next-Web-LangChain/issues/43~
+    - StableDiffusion
+      - 本插件目前为测试版本，后续可能会有较大的变更，请谨慎使用
+      - 使用本插件需要一定的专业知识，Stable Diffusion 本身的相关问题不在本项目的解答范围内，如果您确定要使用本插件请参考 [Stable Diffusion 插件配置指南](./docs/stable-diffusion-plugin-cn.md) 文档进行配置
+      - StableDiffusion 插件需要配置 R2 存储，请参考 [Cloudflare R2 服务配置指南](./docs/cloudflare-r2-cn.md) 配置
 
 
 

diff --git a/app/api/langchain-tools/stable_diffusion_image_generator.ts b/app/api/langchain-tools/stable_diffusion_image_generator.ts
@@ -0,0 +1,53 @@
+import { Tool } from "langchain/tools";
+import S3FileStorage from "../../utils/r2_file_storage";
+
+export class StableDiffusionWrapper extends Tool {
+  name = "stable_diffusion_image_generator";
+
+  constructor() {
+    super();
+  }
+
+  /** @ignore */
+  async _call(prompt: string) {
+    let url = process.env.STABLE_DIFFUSION_API_URL;
+    const data = {
+      prompt: prompt,
+      negative_prompt:
+        process.env.STABLE_DIFFUSION_NEGATIVE_PROMPT ??
+        "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
+      seed: -1,
+      subseed: -1,
+      subseed_strength: 0,
+      batch_size: 1,
+      n_iter: 1,
+      steps: process.env.STABLE_DIFFUSION_STEPS ?? 20,
+      cfg_scale: process.env.STABLE_DIFFUSION_CFG_SCALE ?? 7,
+      width: process.env.STABLE_DIFFUSION_WIDTH ?? 720,
+      height: process.env.STABLE_DIFFUSION_HEIGHT ?? 720,
+      restore_faces: process.env.STABLE_DIFFUSION_RESTORE_FACES ?? false,
+      eta: 0,
+      sampler_index: process.env.STABLE_DIFFUSION_SAMPLER_INDEX ?? "Euler a",
+    };
+    console.log(`[${this.name}]`, data);
+    const response = await fetch(`${url}/sdapi/v1/txt2img`, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify(data),
+    });
+    const json = await response.json();
+    let imageBase64 = json.images[0];
+    if (!imageBase64) return "No image was generated";
+    const buffer = Buffer.from(imageBase64, "base64");
+    const filePath = await S3FileStorage.put(`${Date.now()}.png`, buffer);
+    console.log(`[${this.name}]`, filePath);
+    return filePath;
+  }
+
+  description = `stable diffusion is an ai art generation model similar to dalle-2.
+    input requires english.
+    output will be the image link url.
+    use markdown to display images. like: ![img](/api/file/xxx.png)`;
+}
diff --git a/app/api/langchain/tool/agent/route.ts b/app/api/langchain/tool/agent/route.ts
@@ -21,6 +21,7 @@ import { DynamicTool, Tool } from "langchain/tools";
 import { DallEAPIWrapper } from "@/app/api/langchain-tools/dalle_image_generator";
 import { BaiduSearch } from "@/app/api/langchain-tools/baidu_search";
 import { GoogleSearch } from "@/app/api/langchain-tools/google_search";
+import { StableDiffusionWrapper } from "@/app/api/langchain-tools/stable_diffusion_image_generator";
 
 const serverConfig = getServerSideConfig();
 
@@ -228,10 +229,13 @@ async function handle(req: NextRequest) {
     const webBrowserTool = new WebBrowser({ model, embeddings });
     const calculatorTool = new Calculator();
     const dallEAPITool = new DallEAPIWrapper(apiKey, baseUrl);
+    const stableDiffusionTool = new StableDiffusionWrapper();
     if (useTools.includes("web-search")) tools.push(searchTool);
     if (useTools.includes(webBrowserTool.name)) tools.push(webBrowserTool);
     if (useTools.includes(calculatorTool.name)) tools.push(calculatorTool);
     if (useTools.includes(dallEAPITool.name)) tools.push(dallEAPITool);
+    if (useTools.includes(stableDiffusionTool.name))
+      tools.push(stableDiffusionTool);
 
     useTools.forEach((toolName) => {
       if (toolName) {

diff --git a/app/masks/cn.ts b/app/masks/cn.ts
@@ -1,6 +1,35 @@
 import { BuiltinMask } from "./typing";
 
 export const CN_MASKS: BuiltinMask[] = [
+  {
+    avatar: "1f3a8",
+    name: "Stable Diffusion",
+    context: [
+      {
+        id: "SVx3ybvohJAKXDQ1KKQcs",
+        date: "",
+        role: "system",
+        content:
+          "Stable Diffusion is an AI art generation model similar to DALLE-2.\nHere are some prompts for generating art with Stable Diffusion.\n\nPrompt Example:\n\n- A ghostly apparition drifting through a haunted mansion's grand ballroom, illuminated by flickering candlelight. Eerie, ethereal, moody lighting.\n- portait of a homer simpson archer shooting arrow at forest monster, front game card, drark, marvel comics, dark, smooth\n- pirate, deep focus, fantasy, matte, sharp focus\n- red dead redemption 2, cinematic view, epic sky, detailed, low angle, high detail, warm lighting, volumetric, godrays, vivid, beautiful\n- a fantasy style portrait painting of rachel lane / alison brie hybrid in the style of francois boucher oil painting, rpg portrait\n- athena, greek goddess, claudia black, bronze greek armor, owl crown, d & d, fantasy, portrait, headshot, sharp focus\n- closeup portrait shot of a large strong female biomechanic woman in a scenic scifi environment, elegant, smooth, sharp focus, warframe\n- ultra realistic illustration of steve urkle as the hulk, elegant, smooth, sharp focus\n- portrait of beautiful happy young ana de armas, ethereal, realistic anime, clean lines, sharp lines, crisp lines, vibrant color scheme\n- A highly detailed and hyper realistic portrait of a gorgeous young ana de armas, lisa frank, butterflies, floral, sharp focus\n- lots of delicious tropical fruits with drops of moisture on table, floating colorful water, mysterious expression, in a modern and abstract setting, with bold and colorful abstract art, blurred background, bright lighting\n- 1girl, The most beautiful form of chaos, Fauvist design, Flowing colors, Vivid colors, dynamic angle, fantasy world\n- solo, sitting, close-up, girl in the hourglass, Sand is spilling out of the broken hourglass, flowing sand, huge hourglass art, hologram, particles, nebula, magic circle\n- geometric abstract background, 1girl, depth of field, zentangle, mandala, tangle, entangle, beautiful and aesthetic, dynamic angle, glowing skin, floating colorful sparkles the most beautiful form of chaos, elegant, a brutalist designed, vivid colours, romanticism\n\nFollow the structure of the example prompts. This means a very short description of the scene, followed by modifiers divided by commas to alter the mood, style, lighting, and more.\nIf the user input is in English, directly use the user input as a parameter to call the stable_diffusion_image_generator plugin. If the user input is not in English, generate an English prompt word based on the example and then call the stable_diffusion_image_generator plugin.",
+      },
+    ],
+    modelConfig: {
+      model: "gpt-3.5-turbo",
+      temperature: 1,
+      top_p: 1,
+      max_tokens: 2000,
+      presence_penalty: 0,
+      frequency_penalty: 0,
+      sendMemory: false,
+      historyMessageCount: 0,
+      compressMessageLengthThreshold: 1000,
+    },
+    lang: "cn",
+    builtin: false,
+    createdAt: 1697205441045,
+    usePlugins: true,
+    hideContext: true,
+  },
   {
     avatar: "1f5bc-fe0f",
     name: "以文搜图",

diff --git a/app/masks/en.ts b/app/masks/en.ts
@@ -1,6 +1,35 @@
 import { BuiltinMask } from "./typing";
 
 export const EN_MASKS: BuiltinMask[] = [
+  {
+    avatar: "1f3a8",
+    name: "Stable Diffusion",
+    context: [
+      {
+        id: "SVx3ybvohJAKXDQ1KKQcs",
+        date: "",
+        role: "system",
+        content:
+          "Stable Diffusion is an AI art generation model similar to DALLE-2.\nHere are some prompts for generating art with Stable Diffusion.\n\nPrompt Example:\n\n- A ghostly apparition drifting through a haunted mansion's grand ballroom, illuminated by flickering candlelight. Eerie, ethereal, moody lighting.\n- portait of a homer simpson archer shooting arrow at forest monster, front game card, drark, marvel comics, dark, smooth\n- pirate, deep focus, fantasy, matte, sharp focus\n- red dead redemption 2, cinematic view, epic sky, detailed, low angle, high detail, warm lighting, volumetric, godrays, vivid, beautiful\n- a fantasy style portrait painting of rachel lane / alison brie hybrid in the style of francois boucher oil painting, rpg portrait\n- athena, greek goddess, claudia black, bronze greek armor, owl crown, d & d, fantasy, portrait, headshot, sharp focus\n- closeup portrait shot of a large strong female biomechanic woman in a scenic scifi environment, elegant, smooth, sharp focus, warframe\n- ultra realistic illustration of steve urkle as the hulk, elegant, smooth, sharp focus\n- portrait of beautiful happy young ana de armas, ethereal, realistic anime, clean lines, sharp lines, crisp lines, vibrant color scheme\n- A highly detailed and hyper realistic portrait of a gorgeous young ana de armas, lisa frank, butterflies, floral, sharp focus\n- lots of delicious tropical fruits with drops of moisture on table, floating colorful water, mysterious expression, in a modern and abstract setting, with bold and colorful abstract art, blurred background, bright lighting\n- 1girl, The most beautiful form of chaos, Fauvist design, Flowing colors, Vivid colors, dynamic angle, fantasy world\n- solo, sitting, close-up, girl in the hourglass, Sand is spilling out of the broken hourglass, flowing sand, huge hourglass art, hologram, particles, nebula, magic circle\n- geometric abstract background, 1girl, depth of field, zentangle, mandala, tangle, entangle, beautiful and aesthetic, dynamic angle, glowing skin, floating colorful sparkles the most beautiful form of chaos, elegant, a brutalist designed, vivid colours, romanticism\n\nFollow the structure of the example prompts. This means a very short description of the scene, followed by modifiers divided by commas to alter the mood, style, lighting, and more.\nIf the user input is in English, directly use the user input as a parameter to call the stable_diffusion_image_generator plugin. If the user input is not in English, generate an English prompt word based on the example and then call the stable_diffusion_image_generator plugin.",
+      },
+    ],
+    modelConfig: {
+      model: "gpt-3.5-turbo",
+      temperature: 1,
+      top_p: 1,
+      max_tokens: 2000,
+      presence_penalty: 0,
+      frequency_penalty: 0,
+      sendMemory: false,
+      historyMessageCount: 0,
+      compressMessageLengthThreshold: 1000,
+    },
+    lang: "en",
+    builtin: false,
+    createdAt: 1697205441045,
+    usePlugins: true,
+    hideContext: true,
+  },
   {
     avatar: "1f47e",
     name: "GitHub Copilot",

diff --git a/app/plugins/cn.ts b/app/plugins/cn.ts
@@ -48,4 +48,14 @@ export const CN_PLUGINS: BuiltinPlugin[] = [
     createdAt: 1694703673000,
     enable: false,
   },
+  {
+    name: "Stable Diffusion",
+    toolName: "stable_diffusion_image_generator",
+    lang: "cn",
+    description:
+      "Stable Diffusion 图像生成模型。使用本插件需要配置 Cloudflare R2 对象存储服务以及 stable-diffusion-webui 接口。",
+    builtin: true,
+    createdAt: 1688899480510,
+    enable: false,
+  },
 ];
diff --git a/app/plugins/en.ts b/app/plugins/en.ts
@@ -50,4 +50,14 @@ export const EN_PLUGINS: BuiltinPlugin[] = [
     createdAt: 1694703673000,
     enable: false,
   },
+  {
+    name: "Stable Diffusion",
+    toolName: "stable_diffusion_image_generator",
+    lang: "en",
+    description:
+      "Stable Diffusion text-to-image model. Using this plugin requires configuring Cloudflare R2 object storage service and stable-diffusion-webui API.",
+    builtin: true,
+    createdAt: 1688899480510,
+    enable: false,
+  },
 ];
diff --git a/app/utils/r2_file_storage.ts b/app/utils/r2_file_storage.ts
@@ -51,11 +51,16 @@ export default class S3FileStorage {
 
     console.log(signedUrl);
 
-    await fetch(signedUrl, {
-      method: "PUT",
-      body: data,
-    });
+    try {
+      await fetch(signedUrl, {
+        method: "PUT",
+        body: data,
+      });
 
-    return `/api/file/${fileName}`;
+      return `/api/file/${fileName}`;
+    } catch (e) {
+      console.error("[R2]", e);
+      throw e;
+    }
   }
 }
diff --git a/docs/images/plugin/sd-plugin-example.png b/docs/images/plugin/sd-plugin-example.png
diff --git a/docs/images/plugin/sd-plugin-manager.png b/docs/images/plugin/sd-plugin-manager.png
diff --git a/docs/images/plugin/sd-plugin-mask.png b/docs/images/plugin/sd-plugin-mask.png
diff --git a/docs/images/plugin/sd-web-ui.png b/docs/images/plugin/sd-web-ui.png
diff --git a/docs/stable-diffusion-plugin-cn.md b/docs/stable-diffusion-plugin-cn.md
@@ -0,0 +1,79 @@
+# Stable Diffusion 插件配置指南
+
+## 前置条件
+
+1. 部署 [stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) 项目，并保证可以正常使用 （必须）
+
+2. 在 [Civitai](https://civitai.com/) 挑选喜欢的底座模型 （可选）
+
+3. 生成一张图片并记住相关的参数配置 （可选）
+
+   ![sd-web-ui](./images/plugin/sd-web-ui.png)
+
+## 环境变量
+
+- `STABLE_DIFFUSION_API_URL`（必填）
+
+  stable-diffusion-webui 服务的 api 地址，示例：http://127.0.0.1:7860
+
+- `STABLE_DIFFUSION_NEGATIVE_PROMPT`（可选）
+
+  反向提示词(Negative Prompt)，默认值：`longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality`
+
+- `STABLE_DIFFUSION_STEPS`（可选）
+
+  采样迭代步数(Steps)，默认值：`20`
+
+- `STABLE_DIFFUSION_CFG_SCALE`（可选）
+
+  提示词相关性(CFG Scale)，默认值：`7`
+
+- `STABLE_DIFFUSION_WIDTH`（可选）
+
+  生成图像宽度，默认值：`720`
+
+- `STABLE_DIFFUSION_HEIGHT`（可选）
+
+  生成图像高度，默认值：`720`
+
+- `STABLE_DIFFUSION_SAMPLER_INDEX`（可选）
+
+  采样方法(Sampler)，默认值：`Euler a`
+
+## 如何使用
+
+由于 OpenAI 的函数描述输入字符数量有限，在描述中并不能很好的将 Stable Diffusion 的提示词规则告诉 GPT。
+
+所以这里提供了一个 Stable Diffusion 面具用来间接告诉 GPT 该如何调用 Stable Diffusion 插件，详细的内容请查看该面具的提示内容，当前您也可以自行修改。
+
+这里需要注意，R2 存储在中国网络环境下可能无法正常使用，请确保你的网络可以正常访问 R2 存储服务，否则将无法正常使用本插件。
+
+1. 首先第一步根据上面的章节配置好插件的变量
+
+2. 在插件中开启 StableDiffusion 插件
+
+   ![image-20231017123449909](./images/plugin/sd-plugin-manager.png)
+
+3. 在面具中找到 StableDiffusion 面具并进行对话
+
+   ![image-20231017123303383](./images/plugin/sd-plugin-mask.png)
+
+4. 使用愉快
+
+## 示例
+
+![image-20231017124240846](./images/plugin/sd-plugin-example.png)
+
+模型：[GhostMix - v2.0-BakedVAE | Stable Diffusion Checkpoint | Civitai](https://civitai.com/models/36520/ghostmix)
+
+环境变量：
+
+```
+STABLE_DIFFUSION_API_URL=http://127.0.0.1:7860
+STABLE_DIFFUSION_WIDTH=512
+STABLE_DIFFUSION_HEIGHT=768
+STABLE_DIFFUSION_NEGATIVE_PROMPT=(worst quality, low quality:2), monochrome, zombie,overexposure, watermark,text,bad anatomy,bad hand,extra hands,extra fingers,too many fingers,fused fingers,bad arm,distorted arm,extra arms,fused arms,extra legs,missing leg,disembodied leg,extra nipples, detached arm, liquid hand,inverted hand,disembodied limb, small breasts, loli, oversized head,extra body,completely nude, extra navel,easynegative,(hair between eyes),sketch, duplicate, ugly, huge eyes, text, logo, worst face, (bad and mutated hands:1.3),  (blurry:2.0), horror, geometry, bad_prompt, (bad hands), (missing fingers), multiple limbs, bad anatomy, (interlocked fingers:1.2), Ugly Fingers, (extra digit and hands and fingers and legs and arms:1.4), ((2girl)), (deformed fingers:1.2), (long fingers:1.2),(bad-artist-anime), bad-artist, bad hand, extra legs ,(ng_deepnegative_v1_75t)
+STABLE_DIFFUSION_STEPS=30
+STABLE_DIFFUSION_CFG_SCALE=6
+```
+