From b085fb023b4b3f694b7e40dfa82a0bd6c4dffe7a Mon Sep 17 00:00:00 2001 From: yym68686 Date: Fri, 19 Apr 2024 19:06:07 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Bug:=20Increase=20the=20request?= =?UTF-8?q?=20timeout=20to=2020=20seconds.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bot.py | 3 - test/test_Web_crawler.py | 41 ++++++- test/test_ollama.py | 7 +- test/test_tools_class.py | 243 +++++++++++++++++++++++++++++++++++++++ utils/chatgpt2api.py | 40 +++++-- utils/function_call.py | 31 ++++- utils/plugins.py | 38 ++++++ 7 files changed, 385 insertions(+), 18 deletions(-) create mode 100644 test/test_tools_class.py diff --git a/bot.py b/bot.py index 473d7dea..ef8a146e 100644 --- a/bot.py +++ b/bot.py @@ -214,8 +214,6 @@ async def getChatGPT(update, context, title, robot, message, chatid, messageid): tmpresult = f"`{e}`" print(tmpresult) if lastresult != tmpresult and messageid: - if 'claude2' in title: - tmpresult = re.sub(r",", ',', tmpresult) await context.bot.edit_message_text(chat_id=chatid, message_id=messageid, text=escape(tmpresult), parse_mode='MarkdownV2', disable_web_page_preview=True, read_timeout=time_out, write_timeout=time_out, pool_timeout=time_out, connect_timeout=time_out) @decorators.GroupAuthorization @@ -402,7 +400,6 @@ async def handle_pdf(update, context): new_file = await context.bot.get_file(file_id) file_url = new_file.file_path extracted_text_with_prompt = Document_extract(file_url) - # print(extracted_text_with_prompt) if config.ClaudeAPI and "claude-2.1" in config.GPT_ENGINE: robot = config.claudeBot role = "Human" diff --git a/test/test_Web_crawler.py b/test/test_Web_crawler.py index 8800f130..2213d955 100644 --- a/test/test_Web_crawler.py +++ b/test/test_Web_crawler.py @@ -100,6 +100,44 @@ def Web_crawler(url: str, isSearch=False) -> str: print("url content", result + "\n\n") return result +def jina_ai_Web_crawler(url: str, isSearch=False) -> str: + """返回链接网址url正文内容,必须是合法的网址""" + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" + } + result = '' + try: + requests.packages.urllib3.disable_warnings() + url = "https://r.jina.ai/" + url + response = requests.get(url, headers=headers, verify=False, timeout=3, stream=True) + if response.status_code == 404: + print("Page not found:", url) + return "抱歉,网页不存在,目前无法访问该网页。@Trash@" + content_length = int(response.headers.get('Content-Length', 0)) + if content_length > 5000000: + print("Skipping large file:", url) + return result + soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8') + + table_contents = "" + tables = soup.find_all('table') + for table in tables: + table_contents += table.get_text() + table.decompose() + body = "".join(soup.find('body').get_text().split('\n')) + result = table_contents + body + if result == '' and not isSearch: + result = "抱歉,可能反爬虫策略,目前无法访问该网页。@Trash@" + if result.count("\"") > 1000: + result = "" + except Exception as e: + print('\033[31m') + print("error url", url) + print("error", e) + print('\033[0m') + print(result + "\n\n") + return result + # def Web_crawler(url: str) -> str: # """返回链接网址url正文内容,必须是合法的网址""" # headers = { @@ -143,7 +181,8 @@ def Web_crawler(url: str, isSearch=False) -> str: # for url in ['https://www.reddit.com/r/China_irl/comments/15qojkh/46%E6%9C%88%E5%A4%96%E8%B5%84%E5%AF%B9%E4%B8%AD%E5%9B%BD%E7%9B%B4%E6%8E%A5%E6%8A%95%E8%B5%84%E5%87%8F87/', 'https://www.apple.com.cn/job-creation/Apple_China_CSR_Report_2020.pdf', 'https://hdr.undp.org/system/files/documents/hdr2013chpdf.pdf']: # for url in ['https://www.airuniversity.af.edu/JIPA/Display/Article/3111127/the-uschina-trade-war-vietnam-emerges-as-the-greatest-winner/']: # for url in ['https://zhuanlan.zhihu.com/p/646786536', 'https://zh.wikipedia.org/wiki/%E4%BF%84%E7%BE%85%E6%96%AF%E5%85%A5%E4%BE%B5%E7%83%8F%E5%85%8B%E8%98%AD', 'https://stock.finance.sina.com.cn/usstock/quotes/aapl.html']: - Web_crawler(url) + # Web_crawler(url) + jina_ai_Web_crawler(url) print('-----------------------------') end_time = time.time() run_time = end_time - start_time diff --git a/test/test_ollama.py b/test/test_ollama.py index a7e935a7..f90f3e9e 100644 --- a/test/test_ollama.py +++ b/test/test_ollama.py @@ -35,13 +35,16 @@ def query_ollama(prompt, model): console = Console() # model = 'llama2' # model = 'mistral' - model = 'codeqwen:7b-chat' + # model = 'llama3:8b' + model = 'qwen:14b' + # model = 'wizardlm2:7b' + # model = 'codeqwen:7b-chat' # model = 'phi' # 查询答案 prompt = r''' - +详细讲解一下SAFE块是怎么进行进行时间特征聚合的? ''' answer = "" diff --git a/test/test_tools_class.py b/test/test_tools_class.py new file mode 100644 index 00000000..9fc98238 --- /dev/null +++ b/test/test_tools_class.py @@ -0,0 +1,243 @@ +import json + +class ToolsBase: + def __init__(self, data): + if not isinstance(data, dict): + raise ValueError("Input should be a dictionary.") + for key, value in data.items(): + setattr(self, key, value) + + @property + def name(self): + return self._name + + @property + def description(self): + return self._description + + def to_json(self): + return json.dumps({k: getattr(self, k) for k in vars(self) if not k.startswith("__")}, ensure_ascii=False) + +class GPTFunctionCall(ToolsBase): + + @property.setter + def name(self, value): + if not isinstance(value, str): + raise ValueError("Name should be a string.") + self._name = value + + @property + def description(self): + return self._description + + @property + def parameters(self): + return self._parameters + + def to_json(self): + params = {k: getattr(self.parameters, k) for k in vars(self.parameters) if not k.startswith("__")} + return json.dumps({'name': self.name, 'description': self.description, 'parameters': params}, ensure_ascii=False) + +class CLAUDEToolsUse(ToolsBase): + + @property.setter + def name(self, value): + if not isinstance(value, str): + raise ValueError("Name should be a string.") + self._name = value + + @property + def description(self): + return self._description + + @property + def input_schema(self): + return self._input_schema + + def to_json(self): + props = {k: getattr(self.input_schema, k) for k in vars(self.input_schema['properties']) if not k.startswith("__")} + required = self.input_schema['required'] + return json.dumps({'name': self.name, 'description': self.description, 'input_schema': {'type': +'object', 'properties': props, 'required': required}}, ensure_ascii=False) +# 示例 +gpt_function_call = GPTFunctionCall({"name": "get_search_results", "description": "Search Google to enhance knowledge.", "parameters": {"type": "object", "properties": {"prompt": {"type": "string", "description": "The prompt to search."}}, "required": ["prompt"]}}) +print(gpt_function_call.to_json()) +claude_tools_use = CLAUDEToolsUse({"name": "get_stock_price", "description": "Get the current stock pricefor a given ticker symbol.", "input_schema": {"type": "object", "properties": {"ticker": {"type": "string","description": "The stock ticker symbol, e.g. AAPL for Apple Inc."}}, "required": ["ticker"]}}) +print(claude_tools_use.to_json()) + +class claude3bot: + def __init__( + self, + api_key: str, + engine: str = os.environ.get("GPT_ENGINE") or "claude-3-opus-20240229", + temperature: float = 0.5, + top_p: float = 0.7, + chat_url: str = "https://api.anthropic.com/v1/messages", + timeout: float = 20, + system_prompt: str = "You are ChatGPT, a large language model trained by OpenAI. Respond conversationally", + **kwargs, + ): + self.api_key: str = api_key + self.engine: str = engine + self.temperature = temperature + self.top_p = top_p + self.chat_url = chat_url + self.timeout = timeout + self.session = requests.Session() + self.conversation: dict[str, list[dict]] = { + "default": [], + } + self.system_prompt = system_prompt + + def add_to_conversation( + self, + message: str, + role: str, + convo_id: str = "default", + pass_history: bool = True, + ) -> None: + """ + Add a message to the conversation + """ + + if convo_id not in self.conversation or pass_history == False: + self.reset(convo_id=convo_id) + # print("message", message) + self.conversation[convo_id].append({"role": role, "content": message}) + index = len(self.conversation[convo_id]) - 2 + if index >= 0 and self.conversation[convo_id][index]["role"] == self.conversation[convo_id][index + 1]["role"]: + self.conversation[convo_id][index]["content"] += self.conversation[convo_id][index + 1]["content"] + self.conversation[convo_id].pop(index + 1) + + def reset(self, convo_id: str = "default", system_prompt: str = None) -> None: + """ + Reset the conversation + """ + self.conversation[convo_id] = list() + + def __truncate_conversation(self, convo_id: str = "default") -> None: + """ + Truncate the conversation + """ + while True: + if ( + self.get_token_count(convo_id) > self.truncate_limit + and len(self.conversation[convo_id]) > 1 + ): + # Don't remove the first message + self.conversation[convo_id].pop(1) + else: + break + + def get_token_count(self, convo_id: str = "default") -> int: + """ + Get token count + """ + if self.engine not in ENGINES: + raise NotImplementedError( + f"Engine {self.engine} is not supported. Select from {ENGINES}", + ) + tiktoken.model.MODEL_TO_ENCODING["claude-2.1"] = "cl100k_base" + encoding = tiktoken.encoding_for_model(self.engine) + + num_tokens = 0 + for message in self.conversation[convo_id]: + # every message follows {role/name}\n{content}\n + num_tokens += 5 + for key, value in message.items(): + if value: + num_tokens += len(encoding.encode(value)) + if key == "name": # if there's a name, the role is omitted + num_tokens += 5 # role is always required and always 1 token + num_tokens += 5 # every reply is primed with assistant + return num_tokens + + def ask_stream( + self, + prompt: str, + role: str = "user", + convo_id: str = "default", + model: str = None, + pass_history: bool = True, + model_max_tokens: int = 4096, + **kwargs, + ): + pass_history = True + if convo_id not in self.conversation or pass_history == False: + self.reset(convo_id=convo_id) + self.add_to_conversation(prompt, role, convo_id=convo_id) + # self.__truncate_conversation(convo_id=convo_id) + # print(self.conversation[convo_id]) + + url = self.chat_url + headers = { + "x-api-key": f"{kwargs.get('api_key', self.api_key)}", + "anthropic-version": "2023-06-01", + "content-type": "application/json", + "anthropic-beta": "tools-2024-04-04" + } + + json_post = { + "model": os.environ.get("MODEL_NAME") or model or self.engine, + "messages": self.conversation[convo_id] if pass_history else [{ + "role": "user", + "content": prompt + }], + "temperature": kwargs.get("temperature", self.temperature), + "top_p": kwargs.get("top_p", self.top_p), + "max_tokens": model_max_tokens, + # "stream": True, + } + + # json_post.update(copy.deepcopy(json_post)) + json_post.update(claude_tools_list["base"]) + for item in config.PLUGINS.keys(): + try: + if config.PLUGINS[item]: + json_post["tools"].append(claude_tools_list[item]) + except: + pass + + if self.system_prompt: + json_post["system"] = self.system_prompt + print(json.dumps(json_post, indent=4, ensure_ascii=False)) + + try: + response = self.session.post( + url, + headers=headers, + json=json_post, + timeout=kwargs.get("timeout", self.timeout), + stream=True, + ) + except ConnectionError: + print("连接错误,请检查服务器状态或网络连接。") + return + except Exception as e: + print(f"发生了未预料的错误: {e}") + return + + if response.status_code != 200: + print(response.text) + raise BaseException(f"{response.status_code} {response.reason} {response.text}") + response_role: str = "assistant" + full_response: str = "" + for line in response.iter_lines(): + if not line or line.decode("utf-8")[:6] == "event:" or line.decode("utf-8") == "data: {}": + continue + print(line.decode("utf-8")) + if "tool_use" in line.decode("utf-8"): + tool_input = json.loads(line.decode("utf-8")["content"][1]["input"]) + else: + line = line.decode("utf-8")[6:] + resp: dict = json.loads(line) + delta = resp.get("delta") + if not delta: + continue + if "text" in delta: + content = delta["text"] + full_response += content + yield content + self.add_to_conversation(full_response, response_role, convo_id=convo_id) + # print(repr(self.conversation.Conversation(convo_id))) + # print("total tokens:", self.get_token_count(convo_id)) \ No newline at end of file diff --git a/utils/chatgpt2api.py b/utils/chatgpt2api.py index 8f8c9e1e..4da93343 100644 --- a/utils/chatgpt2api.py +++ b/utils/chatgpt2api.py @@ -14,7 +14,7 @@ import config from utils.plugins import * -from utils.function_call import function_call_list +from utils.function_call import function_call_list, claude_tools_list def get_filtered_keys_from_object(obj: object, *keys: str) -> Set[str]: """ @@ -79,7 +79,7 @@ def __init__( temperature: float = 0.5, top_p: float = 0.7, chat_url: str = "https://api.anthropic.com/v1/complete", - timeout: float = 5, + timeout: float = 20, system_prompt: str = "You are ChatGPT, a large language model trained by OpenAI. Respond conversationally", **kwargs, ): @@ -196,6 +196,9 @@ def ask_stream( except ConnectionError: print("连接错误,请检查服务器状态或网络连接。") return + except requests.exceptions.ReadTimeout: + print("请求超时,请检查网络连接或增加超时时间。{e}") + return except Exception as e: print(f"发生了未预料的错误: {e}") return @@ -227,7 +230,7 @@ def __init__( temperature: float = 0.5, top_p: float = 0.7, chat_url: str = "https://api.anthropic.com/v1/messages", - timeout: float = 5, + timeout: float = 20, system_prompt: str = "You are ChatGPT, a large language model trained by OpenAI. Respond conversationally", **kwargs, ): @@ -328,8 +331,7 @@ def ask_stream( "x-api-key": f"{kwargs.get('api_key', self.api_key)}", "anthropic-version": "2023-06-01", "content-type": "application/json", - "accept": "application/json" - # "Accept": "*/*" + # "anthropic-beta": "tools-2024-04-04" } json_post = { @@ -359,6 +361,9 @@ def ask_stream( except ConnectionError: print("连接错误,请检查服务器状态或网络连接。") return + except requests.exceptions.ReadTimeout: + print("请求超时,请检查网络连接或增加超时时间。{e}") + return except Exception as e: print(f"发生了未预料的错误: {e}") return @@ -371,6 +376,11 @@ def ask_stream( for line in response.iter_lines(): if not line or line.decode("utf-8")[:6] == "event:" or line.decode("utf-8") == "data: {}": continue + # print(line.decode("utf-8")) + # if "tool_use" in line.decode("utf-8"): + # tool_input = json.loads(line.decode("utf-8")["content"][1]["input"]) + # else: + # line = line.decode("utf-8")[6:] line = line.decode("utf-8")[6:] # print(line) resp: dict = json.loads(line) @@ -389,7 +399,7 @@ class Imagebot: def __init__( self, api_key: str, - timeout: float = 5, + timeout: float = 20, ): self.api_key: str = api_key self.engine: str = "dall-e-3" @@ -422,6 +432,9 @@ def dall_e_3( except ConnectionError: print("连接错误,请检查服务器状态或网络连接。") return + except requests.exceptions.ReadTimeout: + print("请求超时,请检查网络连接或增加超时时间。{e}") + return except Exception as e: print(f"发生了未预料的错误: {e}") return @@ -781,6 +794,9 @@ def ask_stream( except ConnectionError: print("连接错误,请检查服务器状态或网络连接。") return + except requests.exceptions.ReadTimeout: + print("请求超时,请检查网络连接或增加超时时间。{e}") + return except Exception as e: print(f"发生了未预料的错误: {e}") return @@ -858,6 +874,7 @@ def ask_stream( if function_call_name == "get_url_content": url = json.loads(function_full_response)["url"] print("\n\nurl", url) + # function_response = jina_ai_Web_crawler(url) function_response = Web_crawler(url) function_response, text_len = cut_message(function_response, function_call_max_tokens) function_response = ( @@ -879,7 +896,6 @@ def ask_stream( if self.conversation[convo_id][-1]["role"] == "function" and self.conversation[convo_id][-1]["name"] == "get_search_results": mess = self.conversation[convo_id].pop(-1) # print("Truncate message:", mess) - self.add_to_conversation(full_response, "assistant", convo_id=convo_id) yield from self.ask_stream(function_response, response_role, convo_id=convo_id, function_name=function_call_name) else: if self.conversation[convo_id][-1]["role"] == "function" and self.conversation[convo_id][-1]["name"] == "get_search_results": @@ -1098,7 +1114,7 @@ def __init__( temperature: float = 0.5, top_p: float = 1, chat_url: str = "https://api.groq.com/openai/v1/chat/completions", - timeout: float = 5, + timeout: float = 20, system_prompt: str = "You are ChatGPT, a large language model trained by OpenAI. Respond conversationally", **kwargs, ): @@ -1224,6 +1240,9 @@ def ask_stream( except ConnectionError: print("连接错误,请检查服务器状态或网络连接。") return + except requests.exceptions.ReadTimeout: + print("请求超时,请检查网络连接或增加超时时间。{e}") + return except Exception as e: print(f"发生了未预料的错误: {e}") return @@ -1273,7 +1292,7 @@ def __init__( temperature: float = 0.5, top_p: float = 0.7, chat_url: str = "https://generativelanguage.googleapis.com/v1beta/models/{model}:{stream}?key={api_key}", - timeout: float = 5, + timeout: float = 20, system_prompt: str = "You are ChatGPT, a large language model trained by OpenAI. Respond conversationally", **kwargs, ): @@ -1411,6 +1430,9 @@ def ask_stream( except ConnectionError: print("连接错误,请检查服务器状态或网络连接。") return + except requests.exceptions.ReadTimeout: + print("请求超时,请检查网络连接或增加超时时间。{e}") + return except Exception as e: print(f"发生了未预料的错误: {e}") return diff --git a/utils/function_call.py b/utils/function_call.py index 29a63232..0ac2bee0 100644 --- a/utils/function_call.py +++ b/utils/function_call.py @@ -61,10 +61,35 @@ }, "DATE": { "name": "get_date_time_weekday", - "description": "Get the current time, date, and day of the week" + "description": "Get the current time, date, and day of the week", + "parameters": { + "type": "object", + "properties": {} + } }, "VERSION": { "name": "get_version_info", - "description": "Get version information" + "description": "Get version information", + "parameters": { + "type": "object", + "properties": {} + } }, -} \ No newline at end of file +} +def gpt2claude_tools_json(json_dict): + import copy + json_dict = copy.deepcopy(json_dict) + keys_to_change = { + "parameters": "input_schema", + "functions": "tools", + "function_call": None # 如果没有新的键名,则设置为None或留空 + } + for old_key, new_key in keys_to_change.items(): + if old_key in json_dict: + if new_key: + json_dict[new_key] = json_dict.pop(old_key) + else: + json_dict.pop(old_key) + return json_dict + +claude_tools_list = {f"{key}": gpt2claude_tools_json(function_call_list[key]) for key in function_call_list.keys()} diff --git a/utils/plugins.py b/utils/plugins.py index 6bd921ec..05784603 100644 --- a/utils/plugins.py +++ b/utils/plugins.py @@ -94,6 +94,43 @@ def Web_crawler(url: str, isSearch=False) -> str: # print("url content", result + "\n\n") return result +def jina_ai_Web_crawler(url: str, isSearch=False) -> str: + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" + } + result = '' + try: + requests.packages.urllib3.disable_warnings() + url = "https://r.jina.ai/" + url + response = requests.get(url, headers=headers, verify=False, timeout=5, stream=True) + if response.status_code == 404: + print("Page not found:", url) + return "抱歉,网页不存在,目前无法访问该网页。@Trash@" + content_length = int(response.headers.get('Content-Length', 0)) + if content_length > 5000000: + print("Skipping large file:", url) + return result + soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8') + + table_contents = "" + tables = soup.find_all('table') + for table in tables: + table_contents += table.get_text() + table.decompose() + body = "".join(soup.find('body').get_text().split('\n')) + result = table_contents + body + if result == '' and not isSearch: + result = "抱歉,可能反爬虫策略,目前无法访问该网页。@Trash@" + if result.count("\"") > 1000: + result = "" + except Exception as e: + print('\033[31m') + print("error url", url) + print("error", e) + print('\033[0m') + # print(result + "\n\n") + return result + def getddgsearchurl(result, numresults=4): try: search = DuckDuckGoSearchResults(num_results=numresults) @@ -252,6 +289,7 @@ def get_url_text_list(prompt): yield "🌐 已找到一些有用的链接,正在获取详细内容..." threads = [] for url in url_set_list: + # url_search_thread = ThreadWithReturnValue(target=jina_ai_Web_crawler, args=(url,True,)) url_search_thread = ThreadWithReturnValue(target=Web_crawler, args=(url,True,)) url_search_thread.start() threads.append(url_search_thread)