From b085fb023b4b3f694b7e40dfa82a0bd6c4dffe7a Mon Sep 17 00:00:00 2001
From: yym68686 <yym68686@outlook.com>
Date: Fri, 19 Apr 2024 19:06:07 +0800
Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Bug:=20Increase=20the=20request?=
 =?UTF-8?q?=20timeout=20to=2020=20seconds.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 bot.py                   |   3 -
 test/test_Web_crawler.py |  41 ++++++-
 test/test_ollama.py      |   7 +-
 test/test_tools_class.py | 243 +++++++++++++++++++++++++++++++++++++++
 utils/chatgpt2api.py     |  40 +++++--
 utils/function_call.py   |  31 ++++-
 utils/plugins.py         |  38 ++++++
 7 files changed, 385 insertions(+), 18 deletions(-)
 create mode 100644 test/test_tools_class.py

diff --git a/bot.py b/bot.py
index 473d7dea..ef8a146e 100644
--- a/bot.py
+++ b/bot.py
@@ -214,8 +214,6 @@ async def getChatGPT(update, context, title, robot, message, chatid, messageid):
         tmpresult = f"`{e}`"
     print(tmpresult)
     if lastresult != tmpresult and messageid:
-        if 'claude2' in title:
-            tmpresult = re.sub(r",", '，', tmpresult)
         await context.bot.edit_message_text(chat_id=chatid, message_id=messageid, text=escape(tmpresult), parse_mode='MarkdownV2', disable_web_page_preview=True, read_timeout=time_out, write_timeout=time_out, pool_timeout=time_out, connect_timeout=time_out)
 
 @decorators.GroupAuthorization
@@ -402,7 +400,6 @@ async def handle_pdf(update, context):
     new_file = await context.bot.get_file(file_id)
     file_url = new_file.file_path
     extracted_text_with_prompt = Document_extract(file_url)
-    # print(extracted_text_with_prompt)
     if config.ClaudeAPI and "claude-2.1" in config.GPT_ENGINE:
         robot = config.claudeBot
         role = "Human"
diff --git a/test/test_Web_crawler.py b/test/test_Web_crawler.py
index 8800f130..2213d955 100644
--- a/test/test_Web_crawler.py
+++ b/test/test_Web_crawler.py
@@ -100,6 +100,44 @@ def Web_crawler(url: str, isSearch=False) -> str:
     print("url content", result + "\n\n")
     return result
 
+def jina_ai_Web_crawler(url: str, isSearch=False) -> str:
+    """返回链接网址url正文内容，必须是合法的网址"""
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
+    }
+    result = ''
+    try:
+        requests.packages.urllib3.disable_warnings()
+        url = "https://r.jina.ai/" + url
+        response = requests.get(url, headers=headers, verify=False, timeout=3, stream=True)
+        if response.status_code == 404:
+            print("Page not found:", url)
+            return "抱歉，网页不存在，目前无法访问该网页。@Trash@"
+        content_length = int(response.headers.get('Content-Length', 0))
+        if content_length > 5000000:
+            print("Skipping large file:", url)
+            return result
+        soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8')
+
+        table_contents = ""
+        tables = soup.find_all('table')
+        for table in tables:
+            table_contents += table.get_text()
+            table.decompose()
+        body = "".join(soup.find('body').get_text().split('\n'))
+        result = table_contents + body
+        if result == '' and not isSearch:
+            result = "抱歉，可能反爬虫策略，目前无法访问该网页。@Trash@"
+        if result.count("\"") > 1000:
+            result = ""
+    except Exception as e:
+        print('\033[31m')
+        print("error url", url)
+        print("error", e)
+        print('\033[0m')
+    print(result + "\n\n")
+    return result
+
 # def Web_crawler(url: str) -> str:
 #     """返回链接网址url正文内容，必须是合法的网址"""
 #     headers = {
@@ -143,7 +181,8 @@ def Web_crawler(url: str, isSearch=False) -> str:
 # for url in ['https://www.reddit.com/r/China_irl/comments/15qojkh/46%E6%9C%88%E5%A4%96%E8%B5%84%E5%AF%B9%E4%B8%AD%E5%9B%BD%E7%9B%B4%E6%8E%A5%E6%8A%95%E8%B5%84%E5%87%8F87/', 'https://www.apple.com.cn/job-creation/Apple_China_CSR_Report_2020.pdf', 'https://hdr.undp.org/system/files/documents/hdr2013chpdf.pdf']:
 # for url in ['https://www.airuniversity.af.edu/JIPA/Display/Article/3111127/the-uschina-trade-war-vietnam-emerges-as-the-greatest-winner/']:
 # for url in ['https://zhuanlan.zhihu.com/p/646786536', 'https://zh.wikipedia.org/wiki/%E4%BF%84%E7%BE%85%E6%96%AF%E5%85%A5%E4%BE%B5%E7%83%8F%E5%85%8B%E8%98%AD', 'https://stock.finance.sina.com.cn/usstock/quotes/aapl.html']:
-    Web_crawler(url)
+    # Web_crawler(url)
+    jina_ai_Web_crawler(url)
     print('-----------------------------')
 end_time = time.time()
 run_time = end_time - start_time
diff --git a/test/test_ollama.py b/test/test_ollama.py
index a7e935a7..f90f3e9e 100644
--- a/test/test_ollama.py
+++ b/test/test_ollama.py
@@ -35,13 +35,16 @@ def query_ollama(prompt, model):
     console = Console()
     # model = 'llama2'
     # model = 'mistral'
-    model = 'codeqwen:7b-chat'
+    # model = 'llama3:8b'
+    model = 'qwen:14b'
+    # model = 'wizardlm2:7b'
+    # model = 'codeqwen:7b-chat'
     # model = 'phi'
 
     # 查询答案
     prompt = r'''
 
-
+详细讲解一下SAFE块是怎么进行进行时间特征聚合的？
 
 '''
     answer = ""
diff --git a/test/test_tools_class.py b/test/test_tools_class.py
new file mode 100644
index 00000000..9fc98238
--- /dev/null
+++ b/test/test_tools_class.py
@@ -0,0 +1,243 @@
+import json
+
+class ToolsBase:
+    def __init__(self, data):
+        if not isinstance(data, dict):
+            raise ValueError("Input should be a dictionary.")
+        for key, value in data.items():
+            setattr(self, key, value)
+
+    @property
+    def name(self):
+        return self._name
+
+    @property
+    def description(self):
+        return self._description
+
+    def to_json(self):
+        return json.dumps({k: getattr(self, k) for k in vars(self) if not k.startswith("__")}, ensure_ascii=False)
+
+class GPTFunctionCall(ToolsBase):
+
+    @property.setter
+    def name(self, value):
+        if not isinstance(value, str):
+            raise ValueError("Name should be a string.")
+        self._name = value
+
+    @property
+    def description(self):
+        return self._description
+
+    @property
+    def parameters(self):
+        return self._parameters
+
+    def to_json(self):
+        params = {k: getattr(self.parameters, k) for k in vars(self.parameters) if not k.startswith("__")}
+        return json.dumps({'name': self.name, 'description': self.description, 'parameters': params}, ensure_ascii=False)
+
+class CLAUDEToolsUse(ToolsBase):
+
+    @property.setter
+    def name(self, value):
+        if not isinstance(value, str):
+            raise ValueError("Name should be a string.")
+        self._name = value
+
+    @property
+    def description(self):
+        return self._description
+
+    @property
+    def input_schema(self):
+        return self._input_schema
+
+    def to_json(self):
+        props = {k: getattr(self.input_schema, k) for k in vars(self.input_schema['properties']) if not k.startswith("__")}
+        required = self.input_schema['required']
+        return json.dumps({'name': self.name, 'description': self.description, 'input_schema': {'type':
+'object', 'properties': props, 'required': required}}, ensure_ascii=False)
+# 示例
+gpt_function_call = GPTFunctionCall({"name": "get_search_results", "description": "Search Google to enhance knowledge.", "parameters": {"type": "object", "properties": {"prompt": {"type": "string", "description": "The prompt to search."}}, "required": ["prompt"]}})
+print(gpt_function_call.to_json())
+claude_tools_use = CLAUDEToolsUse({"name": "get_stock_price", "description": "Get the current stock pricefor a given ticker symbol.", "input_schema": {"type": "object", "properties": {"ticker": {"type": "string","description": "The stock ticker symbol, e.g. AAPL for Apple Inc."}}, "required": ["ticker"]}})
+print(claude_tools_use.to_json())
+
+class claude3bot:
+    def __init__(
+        self,
+        api_key: str,
+        engine: str = os.environ.get("GPT_ENGINE") or "claude-3-opus-20240229",
+        temperature: float = 0.5,
+        top_p: float = 0.7,
+        chat_url: str = "https://api.anthropic.com/v1/messages",
+        timeout: float = 20,
+        system_prompt: str = "You are ChatGPT, a large language model trained by OpenAI. Respond conversationally",
+        **kwargs,
+    ):
+        self.api_key: str = api_key
+        self.engine: str = engine
+        self.temperature = temperature
+        self.top_p = top_p
+        self.chat_url = chat_url
+        self.timeout = timeout
+        self.session = requests.Session()
+        self.conversation: dict[str, list[dict]] = {
+            "default": [],
+        }
+        self.system_prompt = system_prompt
+
+    def add_to_conversation(
+        self,
+        message: str,
+        role: str,
+        convo_id: str = "default",
+        pass_history: bool = True,
+    ) -> None:
+        """
+        Add a message to the conversation
+        """
+
+        if convo_id not in self.conversation or pass_history == False:
+            self.reset(convo_id=convo_id)
+        # print("message", message)
+        self.conversation[convo_id].append({"role": role, "content": message})
+        index = len(self.conversation[convo_id]) - 2
+        if index >= 0 and self.conversation[convo_id][index]["role"] == self.conversation[convo_id][index + 1]["role"]:
+            self.conversation[convo_id][index]["content"] += self.conversation[convo_id][index + 1]["content"]
+            self.conversation[convo_id].pop(index + 1)
+
+    def reset(self, convo_id: str = "default", system_prompt: str = None) -> None:
+        """
+        Reset the conversation
+        """
+        self.conversation[convo_id] = list()
+
+    def __truncate_conversation(self, convo_id: str = "default") -> None:
+        """
+        Truncate the conversation
+        """
+        while True:
+            if (
+                self.get_token_count(convo_id) > self.truncate_limit
+                and len(self.conversation[convo_id]) > 1
+            ):
+                # Don't remove the first message
+                self.conversation[convo_id].pop(1)
+            else:
+                break
+
+    def get_token_count(self, convo_id: str = "default") -> int:
+        """
+        Get token count
+        """
+        if self.engine not in ENGINES:
+            raise NotImplementedError(
+                f"Engine {self.engine} is not supported. Select from {ENGINES}",
+            )
+        tiktoken.model.MODEL_TO_ENCODING["claude-2.1"] = "cl100k_base"
+        encoding = tiktoken.encoding_for_model(self.engine)
+
+        num_tokens = 0
+        for message in self.conversation[convo_id]:
+            # every message follows <im_start>{role/name}\n{content}<im_end>\n
+            num_tokens += 5
+            for key, value in message.items():
+                if value:
+                    num_tokens += len(encoding.encode(value))
+                if key == "name":  # if there's a name, the role is omitted
+                    num_tokens += 5  # role is always required and always 1 token
+        num_tokens += 5  # every reply is primed with <im_start>assistant
+        return num_tokens
+
+    def ask_stream(
+        self,
+        prompt: str,
+        role: str = "user",
+        convo_id: str = "default",
+        model: str = None,
+        pass_history: bool = True,
+        model_max_tokens: int = 4096,
+        **kwargs,
+    ):
+        pass_history = True
+        if convo_id not in self.conversation or pass_history == False:
+            self.reset(convo_id=convo_id)
+        self.add_to_conversation(prompt, role, convo_id=convo_id)
+        # self.__truncate_conversation(convo_id=convo_id)
+        # print(self.conversation[convo_id])
+
+        url = self.chat_url
+        headers = {
+            "x-api-key": f"{kwargs.get('api_key', self.api_key)}",
+            "anthropic-version": "2023-06-01",
+            "content-type": "application/json",
+            "anthropic-beta": "tools-2024-04-04"
+        }
+
+        json_post = {
+            "model": os.environ.get("MODEL_NAME") or model or self.engine,
+            "messages": self.conversation[convo_id] if pass_history else [{
+                "role": "user",
+                "content": prompt
+            }],
+            "temperature": kwargs.get("temperature", self.temperature),
+            "top_p": kwargs.get("top_p", self.top_p),
+            "max_tokens": model_max_tokens,
+            # "stream": True,
+        }
+
+        # json_post.update(copy.deepcopy(json_post))
+        json_post.update(claude_tools_list["base"])
+        for item in config.PLUGINS.keys():
+            try:
+                if config.PLUGINS[item]:
+                    json_post["tools"].append(claude_tools_list[item])
+            except:
+                pass
+
+        if self.system_prompt:
+            json_post["system"] = self.system_prompt
+        print(json.dumps(json_post, indent=4, ensure_ascii=False))
+
+        try:
+            response = self.session.post(
+                url,
+                headers=headers,
+                json=json_post,
+                timeout=kwargs.get("timeout", self.timeout),
+                stream=True,
+            )
+        except ConnectionError:
+            print("连接错误，请检查服务器状态或网络连接。")
+            return
+        except Exception as e:
+            print(f"发生了未预料的错误: {e}")
+            return
+
+        if response.status_code != 200:
+            print(response.text)
+            raise BaseException(f"{response.status_code} {response.reason} {response.text}")
+        response_role: str = "assistant"
+        full_response: str = ""
+        for line in response.iter_lines():
+            if not line or line.decode("utf-8")[:6] == "event:" or line.decode("utf-8") == "data: {}":
+                continue
+            print(line.decode("utf-8"))
+            if "tool_use" in line.decode("utf-8"):
+                tool_input = json.loads(line.decode("utf-8")["content"][1]["input"])
+            else:
+                line = line.decode("utf-8")[6:]
+            resp: dict = json.loads(line)
+            delta = resp.get("delta")
+            if not delta:
+                continue
+            if "text" in delta:
+                content = delta["text"]
+                full_response += content
+                yield content
+        self.add_to_conversation(full_response, response_role, convo_id=convo_id)
+        # print(repr(self.conversation.Conversation(convo_id)))
+        # print("total tokens:", self.get_token_count(convo_id))
\ No newline at end of file
diff --git a/utils/chatgpt2api.py b/utils/chatgpt2api.py
index 8f8c9e1e..4da93343 100644
--- a/utils/chatgpt2api.py
+++ b/utils/chatgpt2api.py
@@ -14,7 +14,7 @@
 
 import config
 from utils.plugins import *
-from utils.function_call import function_call_list
+from utils.function_call import function_call_list, claude_tools_list
 
 def get_filtered_keys_from_object(obj: object, *keys: str) -> Set[str]:
     """
@@ -79,7 +79,7 @@ def __init__(
         temperature: float = 0.5,
         top_p: float = 0.7,
         chat_url: str = "https://api.anthropic.com/v1/complete",
-        timeout: float = 5,
+        timeout: float = 20,
         system_prompt: str = "You are ChatGPT, a large language model trained by OpenAI. Respond conversationally",
         **kwargs,
     ):
@@ -196,6 +196,9 @@ def ask_stream(
         except ConnectionError:
             print("连接错误，请检查服务器状态或网络连接。")
             return
+        except requests.exceptions.ReadTimeout:
+            print("请求超时，请检查网络连接或增加超时时间。{e}")
+            return
         except Exception as e:
             print(f"发生了未预料的错误: {e}")
             return
@@ -227,7 +230,7 @@ def __init__(
         temperature: float = 0.5,
         top_p: float = 0.7,
         chat_url: str = "https://api.anthropic.com/v1/messages",
-        timeout: float = 5,
+        timeout: float = 20,
         system_prompt: str = "You are ChatGPT, a large language model trained by OpenAI. Respond conversationally",
         **kwargs,
     ):
@@ -328,8 +331,7 @@ def ask_stream(
             "x-api-key": f"{kwargs.get('api_key', self.api_key)}",
             "anthropic-version": "2023-06-01",
             "content-type": "application/json",
-            "accept": "application/json"
-            # "Accept": "*/*"
+            # "anthropic-beta": "tools-2024-04-04"
         }
 
         json_post = {
@@ -359,6 +361,9 @@ def ask_stream(
         except ConnectionError:
             print("连接错误，请检查服务器状态或网络连接。")
             return
+        except requests.exceptions.ReadTimeout:
+            print("请求超时，请检查网络连接或增加超时时间。{e}")
+            return
         except Exception as e:
             print(f"发生了未预料的错误: {e}")
             return
@@ -371,6 +376,11 @@ def ask_stream(
         for line in response.iter_lines():
             if not line or line.decode("utf-8")[:6] == "event:" or line.decode("utf-8") == "data: {}":
                 continue
+            # print(line.decode("utf-8"))
+            # if "tool_use" in line.decode("utf-8"):
+            #     tool_input = json.loads(line.decode("utf-8")["content"][1]["input"])
+            # else:
+            #     line = line.decode("utf-8")[6:]
             line = line.decode("utf-8")[6:]
             # print(line)
             resp: dict = json.loads(line)
@@ -389,7 +399,7 @@ class Imagebot:
     def __init__(
         self,
         api_key: str,
-        timeout: float = 5,
+        timeout: float = 20,
     ):
         self.api_key: str = api_key
         self.engine: str = "dall-e-3"
@@ -422,6 +432,9 @@ def dall_e_3(
         except ConnectionError:
             print("连接错误，请检查服务器状态或网络连接。")
             return
+        except requests.exceptions.ReadTimeout:
+            print("请求超时，请检查网络连接或增加超时时间。{e}")
+            return
         except Exception as e:
             print(f"发生了未预料的错误: {e}")
             return
@@ -781,6 +794,9 @@ def ask_stream(
         except ConnectionError:
             print("连接错误，请检查服务器状态或网络连接。")
             return
+        except requests.exceptions.ReadTimeout:
+            print("请求超时，请检查网络连接或增加超时时间。{e}")
+            return
         except Exception as e:
             print(f"发生了未预料的错误: {e}")
             return
@@ -858,6 +874,7 @@ def ask_stream(
                 if function_call_name == "get_url_content":
                     url = json.loads(function_full_response)["url"]
                     print("\n\nurl", url)
+                    # function_response = jina_ai_Web_crawler(url)
                     function_response = Web_crawler(url)
                     function_response, text_len = cut_message(function_response, function_call_max_tokens)
                     function_response = (
@@ -879,7 +896,6 @@ def ask_stream(
             if self.conversation[convo_id][-1]["role"] == "function" and self.conversation[convo_id][-1]["name"] == "get_search_results":
                 mess = self.conversation[convo_id].pop(-1)
                 # print("Truncate message:", mess)
-            self.add_to_conversation(full_response, "assistant", convo_id=convo_id)
             yield from self.ask_stream(function_response, response_role, convo_id=convo_id, function_name=function_call_name)
         else:
             if self.conversation[convo_id][-1]["role"] == "function" and self.conversation[convo_id][-1]["name"] == "get_search_results":
@@ -1098,7 +1114,7 @@ def __init__(
         temperature: float = 0.5,
         top_p: float = 1,
         chat_url: str = "https://api.groq.com/openai/v1/chat/completions",
-        timeout: float = 5,
+        timeout: float = 20,
         system_prompt: str = "You are ChatGPT, a large language model trained by OpenAI. Respond conversationally",
         **kwargs,
     ):
@@ -1224,6 +1240,9 @@ def ask_stream(
         except ConnectionError:
             print("连接错误，请检查服务器状态或网络连接。")
             return
+        except requests.exceptions.ReadTimeout:
+            print("请求超时，请检查网络连接或增加超时时间。{e}")
+            return
         except Exception as e:
             print(f"发生了未预料的错误: {e}")
             return
@@ -1273,7 +1292,7 @@ def __init__(
         temperature: float = 0.5,
         top_p: float = 0.7,
         chat_url: str = "https://generativelanguage.googleapis.com/v1beta/models/{model}:{stream}?key={api_key}",
-        timeout: float = 5,
+        timeout: float = 20,
         system_prompt: str = "You are ChatGPT, a large language model trained by OpenAI. Respond conversationally",
         **kwargs,
     ):
@@ -1411,6 +1430,9 @@ def ask_stream(
         except ConnectionError:
             print("连接错误，请检查服务器状态或网络连接。")
             return
+        except requests.exceptions.ReadTimeout:
+            print("请求超时，请检查网络连接或增加超时时间。{e}")
+            return
         except Exception as e:
             print(f"发生了未预料的错误: {e}")
             return
diff --git a/utils/function_call.py b/utils/function_call.py
index 29a63232..0ac2bee0 100644
--- a/utils/function_call.py
+++ b/utils/function_call.py
@@ -61,10 +61,35 @@
     },
     "DATE": {
         "name": "get_date_time_weekday",
-        "description": "Get the current time, date, and day of the week"
+        "description": "Get the current time, date, and day of the week",
+        "parameters": {
+            "type": "object",
+            "properties": {}
+        }
     },
     "VERSION": {
         "name": "get_version_info",
-        "description": "Get version information"
+        "description": "Get version information",
+        "parameters": {
+            "type": "object",
+            "properties": {}
+        }
     },
-}
\ No newline at end of file
+}
+def gpt2claude_tools_json(json_dict):
+    import copy
+    json_dict = copy.deepcopy(json_dict)
+    keys_to_change = {
+        "parameters": "input_schema",
+        "functions": "tools",
+        "function_call": None  # 如果没有新的键名，则设置为None或留空
+    }
+    for old_key, new_key in keys_to_change.items():
+        if old_key in json_dict:
+            if new_key:
+                json_dict[new_key] = json_dict.pop(old_key)
+            else:
+                json_dict.pop(old_key)
+    return json_dict
+
+claude_tools_list = {f"{key}": gpt2claude_tools_json(function_call_list[key]) for key in function_call_list.keys()}
diff --git a/utils/plugins.py b/utils/plugins.py
index 6bd921ec..05784603 100644
--- a/utils/plugins.py
+++ b/utils/plugins.py
@@ -94,6 +94,43 @@ def Web_crawler(url: str, isSearch=False) -> str:
     # print("url content", result + "\n\n")
     return result
 
+def jina_ai_Web_crawler(url: str, isSearch=False) -> str:
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
+    }
+    result = ''
+    try:
+        requests.packages.urllib3.disable_warnings()
+        url = "https://r.jina.ai/" + url
+        response = requests.get(url, headers=headers, verify=False, timeout=5, stream=True)
+        if response.status_code == 404:
+            print("Page not found:", url)
+            return "抱歉，网页不存在，目前无法访问该网页。@Trash@"
+        content_length = int(response.headers.get('Content-Length', 0))
+        if content_length > 5000000:
+            print("Skipping large file:", url)
+            return result
+        soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8')
+
+        table_contents = ""
+        tables = soup.find_all('table')
+        for table in tables:
+            table_contents += table.get_text()
+            table.decompose()
+        body = "".join(soup.find('body').get_text().split('\n'))
+        result = table_contents + body
+        if result == '' and not isSearch:
+            result = "抱歉，可能反爬虫策略，目前无法访问该网页。@Trash@"
+        if result.count("\"") > 1000:
+            result = ""
+    except Exception as e:
+        print('\033[31m')
+        print("error url", url)
+        print("error", e)
+        print('\033[0m')
+    # print(result + "\n\n")
+    return result
+
 def getddgsearchurl(result, numresults=4):
     try:
         search = DuckDuckGoSearchResults(num_results=numresults)
@@ -252,6 +289,7 @@ def get_url_text_list(prompt):
     yield "🌐 已找到一些有用的链接，正在获取详细内容..."
     threads = []
     for url in url_set_list:
+        # url_search_thread = ThreadWithReturnValue(target=jina_ai_Web_crawler, args=(url,True,))
         url_search_thread = ThreadWithReturnValue(target=Web_crawler, args=(url,True,))
         url_search_thread.start()
         threads.append(url_search_thread)