Skip to content

Commit

Permalink
updated LLM
Browse files Browse the repository at this point in the history
  • Loading branch information
mohdzain98 committed Jul 2, 2024
1 parent dc97431 commit b7c5be5
Show file tree
Hide file tree
Showing 16 changed files with 251 additions and 128 deletions.
78 changes: 55 additions & 23 deletions Microservices/LLM/Initialize.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from pdftxt import handlePDF, handleTXT, Embed
from spreadsheet import handleSS
from static.Tokens import calEtokens
import pandas as pd
from sql import sequel
import os
Expand All @@ -21,18 +22,25 @@ def __init__(self):
self.eToken = 0
self.db=""
self.initialized = True
self.ready=False

def initdb(self, file_type, name):
if(str(file_type) == 'pdf'):
pdf = handlePDF(f'tmp/{name}.pdf')
load = pdf.extract_text_from_pdf()
db,eToken = Embed.getEmbeddings(load)
self.eToken = eToken
self.db=db
wordCount = calEtokens(load)
if(wordCount < 5000):
db,eToken = Embed.getEmbeddings(load)
self.eToken = eToken
self.db=db
self.ready=True
else:
self.eToken = 0
self.db=0
Free.doFree(f'tmp/{name}.pdf')

def initret(self):
return self.eToken,self.db
return self.ready,self.eToken,self.db


class Initxt:
Expand All @@ -48,18 +56,25 @@ def __init__(self):
self.eToken = 0
self.db=""
self.initialized = True
self.ready=False

def initdb(self, file_type, name):
if(str(file_type) == 'txt'):
txt = handleTXT(f'tmp/{name}.txt')
load = txt.extract_text_from_txt()
db,eToken = Embed.getEmbeddings(load)
self.eToken = eToken
self.db=db
wordCount = calEtokens(load)
if(wordCount < 5000):
db,eToken = Embed.getEmbeddings(load)
self.eToken = eToken
self.db=db
self.ready=True
else:
self.eToken = 0
self.db=0
Free.doFree(f'tmp/{name}.txt')

def initret(self):
return self.eToken,self.db
return self.ready,self.eToken,self.db


class Initcsv:
Expand All @@ -75,18 +90,24 @@ def __init__(self):
self.eToken = 0
self.db=""
self.initialized = True
self.ready=False

def initdb(self, file_type,name):
if(str(file_type) == 'csv'):
file = handleSS(f'tmp/{name}.csv')
csvFile = file.loadData()
db,eToken = file.EmbedSS.getEmbeddings(csvFile)
self.eToken = eToken
self.db=db
if(calEtokens(csvFile)<5000):
db,eToken = file.EmbedSS.getEmbeddings(csvFile)
self.eToken = eToken
self.db=db
self.ready=True
else:
self.eToken=0
self.db=""
Free.doFree(f'tmp/{name}.csv')

def initret(self):
return self.eToken,self.db
return self.ready,self.eToken,self.db



Expand All @@ -103,19 +124,25 @@ def __init__(self):
self.eToken = 0
self.db=""
self.initialized = True
self.ready=False

def initdb(self, file_type, name):
if(str(file_type) == 'xlsx'):
file = handleSS(f'tmp/{name}.xlsx')
fle = pd.read_excel(f'tmp/{name}.xlsx')
xlFile = file.handleExcel(fle)
db,eToken = file.EmbedSS.getEmbeddings(xlFile)
self.eToken = eToken
self.db=db
if(calEtokens(xlFile)<5000):
db,eToken = file.EmbedSS.getEmbeddings(xlFile)
self.eToken = eToken
self.db=db
self.ready=True
else:
self.eToken=0
self.db=""
Free.doFree(f'tmp/{name}.xlsx')

def initret(self):
return self.eToken, self.db
return self.ready,self.eToken, self.db

class Initsql:
_instance = None
Expand All @@ -131,20 +158,25 @@ def __init__(self):
self.name=""
self.initialized = True
self.file=False
self.ready=False

def initdb(self, file_type,name):
if(str(file_type) == 'sql'):
getsql= sequel(f'tmp/{name}.sql')
sqliteCon = getsql.convert_mysql_to_sqlite()
cToken = len(sqliteCon.split())
sqliteFile = getsql.splite_script_to_db(f'{name}.db',sqliteCon)
if sqliteFile:
self.file = 1
if(cToken < 5000):
sqliteFile = getsql.splite_script_to_db(f'{name}.db',sqliteCon)
if sqliteFile:
self.file = 1
else:
self.file = 0
self.cToken = cToken
self.name = name
self.ready=True
else:
self.file = 0
self.cToken = cToken
self.name = name
self.ready=False
Free.doFree(f'tmp/{name}.sql')

def initret(self):
return self.cToken, self.file, self.name
return self.ready,self.cToken, self.file, self.name
6 changes: 0 additions & 6 deletions Microservices/LLM/Tokens.py

This file was deleted.

112 changes: 62 additions & 50 deletions Microservices/LLM/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,26 @@
from langchain_core.runnables import RunnablePassthrough
from langchain.chains import ConversationChain
from langchain.memory import ConversationSummaryBufferMemory
from langchain_community.callbacks.manager import get_openai_callback
from langchain_community.vectorstores import FAISS
from Tokens import calTokens
from static.Filename import filename
from static.Chain import chain
import os
import traceback
import redis
import math


app = Flask(__name__)
cors = CORS(app)
app.config['CORS_HEADERS'] = 'Content-Type'
status = "active"
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
llm = ChatOpenAI()
llm = ChatOpenAI(max_tokens=500)

redis_host = os.getenv('REDIS_HOST')
redis_port = os.getenv('REDIS_PORT')

redis_client = redis.StrictRedis(host=redis_host, port=redis_port, db=0)
redis_client = redis.Redis(host=redis_host, port=redis_port, db=0)

try:
path = os.path.dirname(os.path.abspath(__file__))
Expand All @@ -60,7 +60,7 @@ def uploadFile(name):

@app.route('/')
def hello_world():
return jsonify({"status":status,"Value":'LLM Server Running Successsfully',"Version":1.3})
return jsonify({"status":status,"Value":'LLM Server Running Successsfully',"Version":1.4})

@app.route('/uploadfile/<file_type>/<sid>',methods=['POST','GET'])
@cross_origin()
Expand All @@ -75,9 +75,12 @@ def uploaded(file_type,sid):
if(upload):
file=Init()
file.initdb(file_type,name)
eToken,db = file.initret()
redis_client.hset(sid, 'db', db)
redis_client.hset(sid, 'eToken', eToken)
ready,eToken,db = file.initret()
if(ready):
redis_client.hset(sid, 'db', db)
redis_client.hset(sid, 'eToken', eToken)
else:
return jsonify({"success":False,"msg":"File Size larger then 5000 tokens"})
return success
else:
return fail
Expand All @@ -87,9 +90,12 @@ def uploaded(file_type,sid):
if(upload):
file = Initxt()
file.initdb(file_type,name)
eToken,db = file.initret()
redis_client.hset(sid, 'db', db)
redis_client.hset(sid, 'eToken', eToken)
ready,eToken,db = file.initret()
if(ready):
redis_client.hset(sid, 'db', db)
redis_client.hset(sid, 'eToken', eToken)
else:
return jsonify({"success":False,"msg":"File Size larger then 5000 tokens"})
return success
else:
return fail
Expand All @@ -98,9 +104,12 @@ def uploaded(file_type,sid):
if(upload):
file = Initcsv()
file.initdb(file_type,name)
eToken,db = file.initret()
redis_client.hset(sid, 'db', db)
redis_client.hset(sid, 'eToken', eToken)
ready,eToken,db = file.initret()
if(ready):
redis_client.hset(sid, 'db', db)
redis_client.hset(sid, 'eToken', eToken)
else:
return jsonify({"success":False,"msg":"File Size larger then 5000 tokens"})
return success
else:
return fail
Expand All @@ -109,9 +118,12 @@ def uploaded(file_type,sid):
if(upload):
file = Initxlsx()
file.initdb(file_type,name)
eToken,db = file.initret()
redis_client.hset(sid, 'db', db)
redis_client.hset(sid, 'eToken', eToken)
ready,eToken,db = file.initret()
if(ready):
redis_client.hset(sid, 'db', db)
redis_client.hset(sid, 'eToken', eToken)
else:
return jsonify({"success":False,"msg":"File Size larger then 5000 tokens"})
return success
else:
return fail
Expand All @@ -120,10 +132,13 @@ def uploaded(file_type,sid):
if(upload):
file = Initsql()
file.initdb(file_type,name)
cToken,go,name = file.initret()
redis_client.hset(sid, 'go', go)
redis_client.hset(sid, 'cToken', cToken)
redis_client.hset(sid, 'name',name)
ready,cToken,go,name = file.initret()
if(ready):
redis_client.hset(sid, 'go', go)
redis_client.hset(sid, 'cToken', cToken)
redis_client.hset(sid, 'name',name)
else:
return jsonify({"success":False,"msg":"File Size larger then 5000 tokens"})
return success
else:
return fail
Expand All @@ -134,7 +149,7 @@ def uploaded(file_type,sid):
def chat():
req = request.json
ques = req.get('query')
memory = ConversationSummaryBufferMemory(llm=llm,max_token_limit=100)
memory = ConversationSummaryBufferMemory(llm=llm,max_token_limit=500)
conversation = ConversationChain(llm=llm,memory=memory)
result = conversation.predict(input=ques)
cToken = len(ques.split())
Expand All @@ -148,13 +163,12 @@ def chatpdf(sid):
ques = req.get('query')
eToken = int(redis_client.hget(sid, 'eToken'))
db = FAISS.deserialize_from_bytes(embeddings=OpenAIEmbeddings(), serialized=redis_client.hget(sid, 'db'),allow_dangerous_deserialization=True)
doc = db.similarity_search(ques)
cToken = calTokens(doc)
chain = load_qa_chain(llm,chain_type='stuff')
result = chain.run(input_documents=doc,question=ques)
gToken = math.floor(len(result.split())*1.334)
total = cToken + eToken + gToken
res = jsonify({"result":result,"cToken":cToken+eToken,"gToken":gToken,"total":total})
retriever = db.as_retriever(search_type='mmr',search_kwargs={'k':3})
chains = chain(retriever, llm)
with get_openai_callback() as cb:
result = chains.invoke(ques)
total = cb.prompt_tokens + eToken + cb.completion_tokens
res = jsonify({"result":result,"cToken":cb.prompt_tokens+eToken,"gToken":cb.completion_tokens,"total":total})
return (res)

@app.route('/chat/txt/<sid>', methods=['POST', 'GET'])
Expand All @@ -163,13 +177,12 @@ def chattxt(sid):
ques = req.get('query')
eToken = int(redis_client.hget(sid, 'eToken'))
db = FAISS.deserialize_from_bytes(embeddings=OpenAIEmbeddings(), serialized=redis_client.hget(sid, 'db'),allow_dangerous_deserialization=True)
doc = db.similarity_search(ques)
cToken = calTokens(doc)
chain = load_qa_chain(llm,chain_type='stuff')
result = chain.run(input_documents=doc,question=ques)
gToken = math.floor(len(result.split())*1.334)
total = cToken + eToken + gToken
res = jsonify({"result":result,"cToken":cToken+eToken,"gToken":gToken,"total":total})
retriever = db.as_retriever(search_type='mmr',search_kwargs={'k':3})
chains = chain(retriever, llm)
with get_openai_callback() as cb:
result = chains.invoke(ques)
total = cb.prompt_tokens + eToken + cb.completion_tokens
res = jsonify({"result":result,"cToken":cb.prompt_tokens+eToken,"gToken":cb.completion_tokens,"total":total})
return (res)

@app.route('/chat/csv/<sid>', methods=['POST', 'GET'])
Expand All @@ -178,13 +191,13 @@ def chatcsv(sid):
ques = req.get('query')
eToken = int(redis_client.hget(sid, 'eToken'))
db = FAISS.deserialize_from_bytes(embeddings=OpenAIEmbeddings(), serialized=redis_client.hget(sid, 'db'),allow_dangerous_deserialization=True)
doc = db.similarity_search(ques)
cToken = calTokens(doc)
chain = load_qa_chain(llm,chain_type='stuff')
result = chain.run(input_documents=doc,question=ques)
gToken = math.floor(len(result.split())*1.334)
total = cToken + eToken + gToken
res = jsonify({"result":result,"cToken":cToken+eToken,"gToken":gToken,"total":total})
retriever = db.as_retriever(search_type='mmr',search_kwargs={'k':3})
chains = chain(retriever, llm)
with get_openai_callback() as cb:
result = chains.invoke(ques)
total = cb.prompt_tokens + eToken + cb.completion_tokens
print(eToken, cb.prompt_tokens,cb.completion_tokens)
res = jsonify({"result":result,"cToken":cb.prompt_tokens+eToken,"gToken":cb.completion_tokens,"total":total})
return (res)

@app.route('/chat/xlsx/<sid>', methods=['POST', 'GET'])
Expand All @@ -193,13 +206,12 @@ def chatxlsx(sid):
ques = req.get('query')
eToken = int(redis_client.hget(sid, 'eToken'))
db = FAISS.deserialize_from_bytes(embeddings=OpenAIEmbeddings(), serialized=redis_client.hget(sid, 'db'),allow_dangerous_deserialization=True)
doc = db.similarity_search(ques)
cToken = calTokens(doc)
chain = load_qa_chain(llm,chain_type='stuff')
result = chain.run(input_documents=doc,question=ques)
gToken = math.floor(len(result.split())*1.334)
total = cToken + eToken + gToken
res = jsonify({"result":result,"cToken":cToken+eToken,"gToken":gToken,"total":total})
retriever = db.as_retriever(search_type='mmr',search_kwargs={'k':3})
chains = chain(retriever, llm)
with get_openai_callback() as cb:
result = chains.invoke(ques)
total = cb.prompt_tokens + eToken + cb.completion_tokens
res = jsonify({"result":result,"cToken":cb.prompt_tokens+eToken,"gToken":cb.completion_tokens,"total":total})
return (res)

@app.route('/chat/sql/<sid>', methods=['POST', 'GET'])
Expand Down
Loading

0 comments on commit b7c5be5

Please sign in to comment.