Skip to content

Commit

Permalink
1. Refactored the plugin function file, cleaning up unused functions.
Browse files Browse the repository at this point in the history
2. Update g4f version to 0.1.9.6, fixed bug: with g4f availability.

3. Update the readme file.
  • Loading branch information
yym68686 committed Dec 25, 2023
1 parent ea6c5e6 commit 3c90dcd
Show file tree
Hide file tree
Showing 10 changed files with 345 additions and 289 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ To develop plugins, please follow the steps outlined below:
- Initially, you need to add the environment variable for the plugin in the `config.PLUGINS` dictionary located in the `config.py` file. The value can be customized to be either enabled or disabled by default. It is advisable to use uppercase letters for the entire environment variable.
- Subsequently, append the function's name and description in the `utils/function_call.py` file.
- Then, enhance the `ask_stream` function in the `utils/chatgpt2api.py` file with the function's processing logic. You can refer to the existing examples within the `ask_stream` method for guidance on how to write it.
- Following that, write the function, as mentioned in the `utils/function_call.py` file, in the `utils/agent.py` file.
- Following that, write the function, as mentioned in the `utils/function_call.py` file, in the `utils/plugins.py` file.
- Next, in the `bot.py` file, augment the `update_first_buttons_message` function with buttons, enabling users to freely toggle plugins using the `info` command.
- Lastly, don't forget to add the plugin's description in the plugins section of the README.

Expand Down
8 changes: 4 additions & 4 deletions bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from utils.chatgpt2api import Chatbot as GPT
from utils.chatgpt2api import claudebot
from telegram.constants import ChatAction
from utils.agent import docQA, get_doc_from_local, Document_extract, pdfQA, get_encode_image
from utils.plugins import Document_extract, get_encode_image
from telegram import BotCommand, InlineKeyboardButton, InlineKeyboardMarkup, InlineQueryResultArticle, InputTextMessageContent
from telegram.ext import CommandHandler, MessageHandler, ApplicationBuilder, filters, CallbackQueryHandler, Application, AIORateLimiter, InlineQueryHandler
from config import WEB_HOOK, PORT, BOT_TOKEN
Expand Down Expand Up @@ -74,6 +74,9 @@ async def command_bot(update, context, language=None, prompt=translator_prompt,
if message:
if "claude" in config.GPT_ENGINE and config.ClaudeAPI:
robot = config.claudeBot
if not config.API or config.PLUGINS["USE_G4F"]:
import utils.gpt4free as gpt4free
robot = gpt4free
if image_url:
robot = config.GPT4visionbot
title = "`🤖️ gpt-4-vision-preview`\n\n"
Expand Down Expand Up @@ -124,9 +127,6 @@ async def getChatGPT(update, context, title, robot, message, chatid, messageid):
)
messageid = message.message_id
get_answer = robot.ask_stream
if not config.API or (config.PLUGINS["USE_G4F"] and not config.PLUGINS["SEARCH_USE_GPT"]):
import utils.gpt4free as gpt4free
get_answer = gpt4free.get_response

try:
for data in get_answer(text, convo_id=str(chatid), pass_history=config.PASS_HISTORY):
Expand Down
2 changes: 1 addition & 1 deletion config.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@

PLUGINS = {
"SEARCH_USE_GPT": (os.environ.get('SEARCH_USE_GPT', "True") == "False") == False,
"USE_G4F": False,
"USE_G4F": (os.environ.get('USE_G4F', "False") == "False") == False,
"DATE": True,
"URL": True,
"VERSION": True,
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ duckduckgo-search==4.1.0
langchain==0.0.271
oauth2client==3.0.0
pdfminer.six
g4f==0.1.8.8
g4f==0.1.9.6

# plugin
pytz
1 change: 1 addition & 0 deletions test/test_gpt4free.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def get_response(message, model="gpt-3.5-turbo"):
if __name__ == "__main__":
console = Console()
message = r"""
李雪主是谁?
"""
answer = ""
for result in get_response(message, "gpt-4"):
Expand Down
235 changes: 235 additions & 0 deletions test/test_langchain_search_old.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
import os
import re

import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import config

from langchain.chat_models import ChatOpenAI


from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain

from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate,
)
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter

from langchain.document_loaders import UnstructuredPDFLoader

def getmd5(string):
import hashlib
md5_hash = hashlib.md5()
md5_hash.update(string.encode('utf-8'))
md5_hex = md5_hash.hexdigest()
return md5_hex

from utils.sitemap import SitemapLoader
async def get_doc_from_sitemap(url):
# https://www.langchain.asia/modules/indexes/document_loaders/examples/sitemap#%E8%BF%87%E6%BB%A4%E7%AB%99%E7%82%B9%E5%9C%B0%E5%9B%BE-url-
sitemap_loader = SitemapLoader(web_path=url)
docs = await sitemap_loader.load()
return docs

async def get_doc_from_local(docpath, doctype="md"):
from langchain.document_loaders import DirectoryLoader
# 加载文件夹中的所有txt类型的文件
loader = DirectoryLoader(docpath, glob='**/*.' + doctype)
# 将数据转成 document 对象,每个文件会作为一个 document
documents = loader.load()
return documents

system_template="""Use the following pieces of context to answer the users question.
If you don't know the answer, just say "Hmm..., I'm not sure.", don't try to make up an answer.
ALWAYS return a "Sources" part in your answer.
The "Sources" part should be a reference to the source of the document from which you got your answer.
Example of your response should be:
```
The answer is foo
Sources:
1. abc
2. xyz
```
Begin!
----------------
{summaries}
"""
messages = [
SystemMessagePromptTemplate.from_template(system_template),
HumanMessagePromptTemplate.from_template("{question}")
]
prompt = ChatPromptTemplate.from_messages(messages)

def get_chain(store, llm):
chain_type_kwargs = {"prompt": prompt}
chain = RetrievalQAWithSourcesChain.from_chain_type(
llm,
chain_type="stuff",
retriever=store.as_retriever(),
chain_type_kwargs=chain_type_kwargs,
reduce_k_below_max_tokens=True
)
return chain

async def docQA(docpath, query_message, persist_db_path="db", model = "gpt-3.5-turbo"):
chatllm = ChatOpenAI(temperature=0.5, openai_api_base=config.bot_api_url.v1_url, model_name=model, openai_api_key=config.API)
embeddings = OpenAIEmbeddings(openai_api_base=config.bot_api_url.v1_url, openai_api_key=config.API)

sitemap = "sitemap.xml"
match = re.match(r'^(https?|ftp)://[^\s/$.?#].[^\s]*$', docpath)
if match:
doc_method = get_doc_from_sitemap
docpath = os.path.join(docpath, sitemap)
else:
doc_method = get_doc_from_local

persist_db_path = getmd5(docpath)
if not os.path.exists(persist_db_path):
documents = await doc_method(docpath)
# 初始化加载器
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=50)
# 持久化数据
split_docs = text_splitter.split_documents(documents)
vector_store = Chroma.from_documents(split_docs, embeddings, persist_directory=persist_db_path)
vector_store.persist()
else:
# 加载数据
vector_store = Chroma(persist_directory=persist_db_path, embedding_function=embeddings)

# 创建问答对象
qa = get_chain(vector_store, chatllm)
# qa = RetrievalQA.from_chain_type(llm=chatllm, chain_type="stuff", retriever=vector_store.as_retriever(), return_source_documents=True)
# 进行问答
result = qa({"question": query_message})
return result


def persist_emdedding_pdf(docurl, persist_db_path):
embeddings = OpenAIEmbeddings(openai_api_base=config.bot_api_url.v1_url, openai_api_key=os.environ.get('API', None))
filename = get_doc_from_url(docurl)
docpath = os.getcwd() + "/" + filename
loader = UnstructuredPDFLoader(docpath)
documents = loader.load()
# 初始化加载器
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=25)
# 切割加载的 document
split_docs = text_splitter.split_documents(documents)
vector_store = Chroma.from_documents(split_docs, embeddings, persist_directory=persist_db_path)
vector_store.persist()
os.remove(docpath)
return vector_store

async def pdfQA(docurl, docpath, query_message, model="gpt-3.5-turbo"):
chatllm = ChatOpenAI(temperature=0.5, openai_api_base=config.bot_api_url.v1_url, model_name=model, openai_api_key=os.environ.get('API', None))
embeddings = OpenAIEmbeddings(openai_api_base=config.bot_api_url.v1_url, openai_api_key=os.environ.get('API', None))
persist_db_path = getmd5(docpath)
if not os.path.exists(persist_db_path):
vector_store = persist_emdedding_pdf(docurl, persist_db_path)
else:
vector_store = Chroma(persist_directory=persist_db_path, embedding_function=embeddings)
qa = RetrievalQA.from_chain_type(llm=chatllm, chain_type="stuff", retriever=vector_store.as_retriever(), return_source_documents=True)
result = qa({"query": query_message})
return result['result']


def pdf_search(docurl, query_message, model="gpt-3.5-turbo"):
chatllm = ChatOpenAI(temperature=0.5, openai_api_base=config.bot_api_url.v1_url, model_name=model, openai_api_key=os.environ.get('API', None))
embeddings = OpenAIEmbeddings(openai_api_base=config.bot_api_url.v1_url, openai_api_key=os.environ.get('API', None))
filename = get_doc_from_url(docurl)
docpath = os.getcwd() + "/" + filename
loader = UnstructuredPDFLoader(docpath)
try:
documents = loader.load()
except:
print("pdf load error! docpath:", docpath)
return ""
os.remove(docpath)
# 初始化加载器
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=25)
# 切割加载的 document
split_docs = text_splitter.split_documents(documents)
vector_store = Chroma.from_documents(split_docs, embeddings)
# 创建问答对象
qa = RetrievalQA.from_chain_type(llm=chatllm, chain_type="stuff", retriever=vector_store.as_retriever(),return_source_documents=True)
# 进行问答
result = qa({"query": query_message})
return result['result']

def summary_each_url(threads, chainllm, prompt):
summary_prompt = PromptTemplate(
input_variables=["web_summary", "question", "language"],
template=(
"You need to response the following question: {question}."
"Your task is answer the above question in {language} based on the Search results provided. Provide a detailed and in-depth response"
"If there is no relevant content in the search results, just answer None, do not make any explanations."
"Search results: {web_summary}."
),
)
summary_threads = []

for t in threads:
tmp = t.join()
print(tmp)
chain = LLMChain(llm=chainllm, prompt=summary_prompt)
chain_thread = ThreadWithReturnValue(target=chain.run, args=({"web_summary": tmp, "question": prompt, "language": config.LANGUAGE},))
chain_thread.start()
summary_threads.append(chain_thread)

url_result = ""
for t in summary_threads:
tmp = t.join()
print("summary", tmp)
if tmp != "None":
url_result += "\n\n" + tmp
return url_result

def get_search_results(prompt: str, context_max_tokens: int):

url_text_list = get_url_text_list(prompt)
useful_source_text = "\n\n".join(url_text_list)
# useful_source_text = summary_each_url(threads, chainllm, prompt)

useful_source_text, search_tokens_len = cut_message(useful_source_text, context_max_tokens)
print("search tokens len", search_tokens_len, "\n\n")

return useful_source_text

from typing import Any
from langchain.schema.output import LLMResult
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
class ChainStreamHandler(StreamingStdOutCallbackHandler):
def __init__(self):
self.tokens = []
# 记得结束后这里置true
self.finish = False
self.answer = ""

def on_llm_new_token(self, token: str, **kwargs):
# print(token)
self.tokens.append(token)
# yield ''.join(self.tokens)
# print(''.join(self.tokens))

def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
self.finish = 1

def on_llm_error(self, error: Exception, **kwargs: Any) -> None:
print(str(error))
self.tokens.append(str(error))

def generate_tokens(self):
while not self.finish or self.tokens:
if self.tokens:
data = self.tokens.pop(0)
self.answer += data
yield data
else:
pass
return self.answer
7 changes: 5 additions & 2 deletions test/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@ def extract_date(url):
match = "1000/01/01"
else:
match = "1000/01/01"
return datetime.datetime.strptime(match, '%Y/%m/%d')

try:
return datetime.datetime.strptime(match, '%Y/%m/%d')
except:
match = "1000/01/01"
return datetime.datetime.strptime(match, '%Y/%m/%d')

# 提取日期并创建一个包含日期和URL的元组列表
date_url_pairs = [(extract_date(url), url) for url in urls]
Expand Down
2 changes: 1 addition & 1 deletion utils/chatgpt2api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from typing import Set

import config
from utils.agent import *
from utils.plugins import *
from utils.function_call import function_call_list

def get_filtered_keys_from_object(obj: object, *keys: str) -> Set[str]:
Expand Down
30 changes: 26 additions & 4 deletions utils/gpt4free.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,32 @@
import re
import g4f
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import config

def get_response(message, **kwargs):
GPT_ENGINE_map = {
"gpt-3.5-turbo": "gpt-3.5-turbo",
"gpt-3.5-turbo-16k": "gpt-3.5-turbo-16k",
"gpt-3.5-turbo-0301": "gpt-3.5-turbo",
"gpt-3.5-turbo-0613": "gpt-3.5-turbo-0613",
"gpt-3.5-turbo-1106": "gpt-3.5-turbo",
"gpt-3.5-turbo-16k-0613": "gpt-3.5-turbo-0613",
"gpt-4": "gpt-4",
"gpt-4-0314": "gpt-4",
"gpt-4-32k": "gpt-4-32k",
"gpt-4-32k-0314": "gpt-4",
"gpt-4-0613": "gpt-4-0613",
"gpt-4-32k-0613": "gpt-4-32k-0613",
"gpt-4-1106-preview": "gpt-4-turbo",
"gpt-4-vision-preview": "gpt-4",
"claude-2-web": "gpt-4",
"claude-2": "gpt-4",
}

def ask_stream(message, **kwargs):
response = g4f.ChatCompletion.create(
model=config.GPT_ENGINE,
model=GPT_ENGINE_map[config.GPT_ENGINE],
messages=[{"role": "user", "content": message}],
stream=True,
)
Expand All @@ -22,8 +44,8 @@ def bing(response):
if __name__ == "__main__":

message = rf"""
鲁迅和周树人为什么打架
"""
answer = ""
for result in get_response(message, "gpt-4"):
for result in ask_stream(message, model="gpt-4"):
print(result, end="")
Loading

0 comments on commit 3c90dcd

Please sign in to comment.