🐛 Bug: Increase the request timeout to 20 seconds.

yym68686 · Apr 19, 2024 · b085fb0 · b085fb0
1 parent 0f13132
commit b085fb0
Show file tree

Hide file tree

Showing 7 changed files with 385 additions and 18 deletions.
diff --git a/bot.py b/bot.py
@@ -214,8 +214,6 @@ async def getChatGPT(update, context, title, robot, message, chatid, messageid):
         tmpresult = f"`{e}`"
     print(tmpresult)
     if lastresult != tmpresult and messageid:
-        if 'claude2' in title:
-            tmpresult = re.sub(r",", '，', tmpresult)
         await context.bot.edit_message_text(chat_id=chatid, message_id=messageid, text=escape(tmpresult), parse_mode='MarkdownV2', disable_web_page_preview=True, read_timeout=time_out, write_timeout=time_out, pool_timeout=time_out, connect_timeout=time_out)
 
 @decorators.GroupAuthorization
@@ -402,7 +400,6 @@ async def handle_pdf(update, context):
     new_file = await context.bot.get_file(file_id)
     file_url = new_file.file_path
     extracted_text_with_prompt = Document_extract(file_url)
-    # print(extracted_text_with_prompt)
     if config.ClaudeAPI and "claude-2.1" in config.GPT_ENGINE:
         robot = config.claudeBot
         role = "Human"

diff --git a/test/test_Web_crawler.py b/test/test_Web_crawler.py
@@ -100,6 +100,44 @@ def Web_crawler(url: str, isSearch=False) -> str:
     print("url content", result + "\n\n")
     return result
 
+def jina_ai_Web_crawler(url: str, isSearch=False) -> str:
+    """返回链接网址url正文内容，必须是合法的网址"""
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
+    }
+    result = ''
+    try:
+        requests.packages.urllib3.disable_warnings()
+        url = "https://r.jina.ai/" + url
+        response = requests.get(url, headers=headers, verify=False, timeout=3, stream=True)
+        if response.status_code == 404:
+            print("Page not found:", url)
+            return "抱歉，网页不存在，目前无法访问该网页。@Trash@"
+        content_length = int(response.headers.get('Content-Length', 0))
+        if content_length > 5000000:
+            print("Skipping large file:", url)
+            return result
+        soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8')
+
+        table_contents = ""
+        tables = soup.find_all('table')
+        for table in tables:
+            table_contents += table.get_text()
+            table.decompose()
+        body = "".join(soup.find('body').get_text().split('\n'))
+        result = table_contents + body
+        if result == '' and not isSearch:
+            result = "抱歉，可能反爬虫策略，目前无法访问该网页。@Trash@"
+        if result.count("\"") > 1000:
+            result = ""
+    except Exception as e:
+        print('\033[31m')
+        print("error url", url)
+        print("error", e)
+        print('\033[0m')
+    print(result + "\n\n")
+    return result
+
 # def Web_crawler(url: str) -> str:
 #     """返回链接网址url正文内容，必须是合法的网址"""
 #     headers = {
@@ -143,7 +181,8 @@ def Web_crawler(url: str, isSearch=False) -> str:
 # for url in ['https://www.reddit.com/r/China_irl/comments/15qojkh/46%E6%9C%88%E5%A4%96%E8%B5%84%E5%AF%B9%E4%B8%AD%E5%9B%BD%E7%9B%B4%E6%8E%A5%E6%8A%95%E8%B5%84%E5%87%8F87/', 'https://www.apple.com.cn/job-creation/Apple_China_CSR_Report_2020.pdf', 'https://hdr.undp.org/system/files/documents/hdr2013chpdf.pdf']:
 # for url in ['https://www.airuniversity.af.edu/JIPA/Display/Article/3111127/the-uschina-trade-war-vietnam-emerges-as-the-greatest-winner/']:
 # for url in ['https://zhuanlan.zhihu.com/p/646786536', 'https://zh.wikipedia.org/wiki/%E4%BF%84%E7%BE%85%E6%96%AF%E5%85%A5%E4%BE%B5%E7%83%8F%E5%85%8B%E8%98%AD', 'https://stock.finance.sina.com.cn/usstock/quotes/aapl.html']:
-    Web_crawler(url)
+    # Web_crawler(url)
+    jina_ai_Web_crawler(url)
     print('-----------------------------')
 end_time = time.time()
 run_time = end_time - start_time

diff --git a/test/test_ollama.py b/test/test_ollama.py
@@ -35,13 +35,16 @@ def query_ollama(prompt, model):
     console = Console()
     # model = 'llama2'
     # model = 'mistral'
-    model = 'codeqwen:7b-chat'
+    # model = 'llama3:8b'
+    model = 'qwen:14b'
+    # model = 'wizardlm2:7b'
+    # model = 'codeqwen:7b-chat'
     # model = 'phi'
 
     # 查询答案
     prompt = r'''
 
-
+详细讲解一下SAFE块是怎么进行进行时间特征聚合的？
 
 '''
     answer = ""

diff --git a/test/test_tools_class.py b/test/test_tools_class.py
@@ -0,0 +1,243 @@
+import json
+
+class ToolsBase:
+    def __init__(self, data):
+        if not isinstance(data, dict):
+            raise ValueError("Input should be a dictionary.")
+        for key, value in data.items():
+            setattr(self, key, value)
+
+    @property
+    def name(self):
+        return self._name
+
+    @property
+    def description(self):
+        return self._description
+
+    def to_json(self):
+        return json.dumps({k: getattr(self, k) for k in vars(self) if not k.startswith("__")}, ensure_ascii=False)
+
+class GPTFunctionCall(ToolsBase):
+
+    @property.setter
+    def name(self, value):
+        if not isinstance(value, str):
+            raise ValueError("Name should be a string.")
+        self._name = value
+
+    @property
+    def description(self):
+        return self._description
+
+    @property
+    def parameters(self):
+        return self._parameters
+
+    def to_json(self):
+        params = {k: getattr(self.parameters, k) for k in vars(self.parameters) if not k.startswith("__")}
+        return json.dumps({'name': self.name, 'description': self.description, 'parameters': params}, ensure_ascii=False)
+
+class CLAUDEToolsUse(ToolsBase):
+
+    @property.setter
+    def name(self, value):
+        if not isinstance(value, str):
+            raise ValueError("Name should be a string.")
+        self._name = value
+
+    @property
+    def description(self):
+        return self._description
+
+    @property
+    def input_schema(self):
+        return self._input_schema
+
+    def to_json(self):
+        props = {k: getattr(self.input_schema, k) for k in vars(self.input_schema['properties']) if not k.startswith("__")}
+        required = self.input_schema['required']
+        return json.dumps({'name': self.name, 'description': self.description, 'input_schema': {'type':
+'object', 'properties': props, 'required': required}}, ensure_ascii=False)
+# 示例
+gpt_function_call = GPTFunctionCall({"name": "get_search_results", "description": "Search Google to enhance knowledge.", "parameters": {"type": "object", "properties": {"prompt": {"type": "string", "description": "The prompt to search."}}, "required": ["prompt"]}})
+print(gpt_function_call.to_json())
+claude_tools_use = CLAUDEToolsUse({"name": "get_stock_price", "description": "Get the current stock pricefor a given ticker symbol.", "input_schema": {"type": "object", "properties": {"ticker": {"type": "string","description": "The stock ticker symbol, e.g. AAPL for Apple Inc."}}, "required": ["ticker"]}})
+print(claude_tools_use.to_json())
+
+class claude3bot:
+    def __init__(
+        self,
+        api_key: str,
+        engine: str = os.environ.get("GPT_ENGINE") or "claude-3-opus-20240229",
+        temperature: float = 0.5,
+        top_p: float = 0.7,
+        chat_url: str = "https://api.anthropic.com/v1/messages",
+        timeout: float = 20,
+        system_prompt: str = "You are ChatGPT, a large language model trained by OpenAI. Respond conversationally",
+        **kwargs,
+    ):
+        self.api_key: str = api_key
+        self.engine: str = engine
+        self.temperature = temperature
+        self.top_p = top_p
+        self.chat_url = chat_url
+        self.timeout = timeout
+        self.session = requests.Session()
+        self.conversation: dict[str, list[dict]] = {
+            "default": [],
+        }
+        self.system_prompt = system_prompt
+
+    def add_to_conversation(
+        self,
+        message: str,
+        role: str,
+        convo_id: str = "default",
+        pass_history: bool = True,
+    ) -> None:
+        """
+        Add a message to the conversation
+        """
+
+        if convo_id not in self.conversation or pass_history == False:
+            self.reset(convo_id=convo_id)
+        # print("message", message)
+        self.conversation[convo_id].append({"role": role, "content": message})
+        index = len(self.conversation[convo_id]) - 2
+        if index >= 0 and self.conversation[convo_id][index]["role"] == self.conversation[convo_id][index + 1]["role"]:
+            self.conversation[convo_id][index]["content"] += self.conversation[convo_id][index + 1]["content"]
+            self.conversation[convo_id].pop(index + 1)
+
+    def reset(self, convo_id: str = "default", system_prompt: str = None) -> None:
+        """
+        Reset the conversation
+        """
+        self.conversation[convo_id] = list()
+
+    def __truncate_conversation(self, convo_id: str = "default") -> None:
+        """
+        Truncate the conversation
+        """
+        while True:
+            if (
+                self.get_token_count(convo_id) > self.truncate_limit
+                and len(self.conversation[convo_id]) > 1
+            ):
+                # Don't remove the first message
+                self.conversation[convo_id].pop(1)
+            else:
+                break
+
+    def get_token_count(self, convo_id: str = "default") -> int:
+        """
+        Get token count
+        """
+        if self.engine not in ENGINES:
+            raise NotImplementedError(
+                f"Engine {self.engine} is not supported. Select from {ENGINES}",
+            )
+        tiktoken.model.MODEL_TO_ENCODING["claude-2.1"] = "cl100k_base"
+        encoding = tiktoken.encoding_for_model(self.engine)
+
+        num_tokens = 0
+        for message in self.conversation[convo_id]:
+            # every message follows <im_start>{role/name}\n{content}<im_end>\n
+            num_tokens += 5
+            for key, value in message.items():
+                if value:
+                    num_tokens += len(encoding.encode(value))
+                if key == "name":  # if there's a name, the role is omitted
+                    num_tokens += 5  # role is always required and always 1 token
+        num_tokens += 5  # every reply is primed with <im_start>assistant
+        return num_tokens
+
+    def ask_stream(
+        self,
+        prompt: str,
+        role: str = "user",
+        convo_id: str = "default",
+        model: str = None,
+        pass_history: bool = True,
+        model_max_tokens: int = 4096,
+        **kwargs,
+    ):
+        pass_history = True
+        if convo_id not in self.conversation or pass_history == False:
+            self.reset(convo_id=convo_id)
+        self.add_to_conversation(prompt, role, convo_id=convo_id)
+        # self.__truncate_conversation(convo_id=convo_id)
+        # print(self.conversation[convo_id])
+
+        url = self.chat_url
+        headers = {
+            "x-api-key": f"{kwargs.get('api_key', self.api_key)}",
+            "anthropic-version": "2023-06-01",
+            "content-type": "application/json",
+            "anthropic-beta": "tools-2024-04-04"
+        }
+
+        json_post = {
+            "model": os.environ.get("MODEL_NAME") or model or self.engine,
+            "messages": self.conversation[convo_id] if pass_history else [{
+                "role": "user",
+                "content": prompt
+            }],
+            "temperature": kwargs.get("temperature", self.temperature),
+            "top_p": kwargs.get("top_p", self.top_p),
+            "max_tokens": model_max_tokens,
+            # "stream": True,
+        }
+
+        # json_post.update(copy.deepcopy(json_post))
+        json_post.update(claude_tools_list["base"])
+        for item in config.PLUGINS.keys():
+            try:
+                if config.PLUGINS[item]:
+                    json_post["tools"].append(claude_tools_list[item])
+            except:
+                pass
+
+        if self.system_prompt:
+            json_post["system"] = self.system_prompt
+        print(json.dumps(json_post, indent=4, ensure_ascii=False))
+
+        try:
+            response = self.session.post(
+                url,
+                headers=headers,
+                json=json_post,
+                timeout=kwargs.get("timeout", self.timeout),
+                stream=True,
+            )
+        except ConnectionError:
+            print("连接错误，请检查服务器状态或网络连接。")
+            return
+        except Exception as e:
+            print(f"发生了未预料的错误: {e}")
+            return
+
+        if response.status_code != 200:
+            print(response.text)
+            raise BaseException(f"{response.status_code} {response.reason} {response.text}")
+        response_role: str = "assistant"
+        full_response: str = ""
+        for line in response.iter_lines():
+            if not line or line.decode("utf-8")[:6] == "event:" or line.decode("utf-8") == "data: {}":
+                continue
+            print(line.decode("utf-8"))
+            if "tool_use" in line.decode("utf-8"):
+                tool_input = json.loads(line.decode("utf-8")["content"][1]["input"])
+            else:
+                line = line.decode("utf-8")[6:]
+            resp: dict = json.loads(line)
+            delta = resp.get("delta")
+            if not delta:
+                continue
+            if "text" in delta:
+                content = delta["text"]
+                full_response += content
+                yield content
+        self.add_to_conversation(full_response, response_role, convo_id=convo_id)
+        # print(repr(self.conversation.Conversation(convo_id)))
+        # print("total tokens:", self.get_token_count(convo_id))