Skip to content
This repository has been archived by the owner on Apr 29, 2024. It is now read-only.

Commit

Permalink
move with session
Browse files Browse the repository at this point in the history
  • Loading branch information
sasha370 committed Apr 15, 2024
1 parent 014aff7 commit fe032b5
Show file tree
Hide file tree
Showing 21 changed files with 335 additions and 348 deletions.
9 changes: 4 additions & 5 deletions confluence/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,15 @@ def tui_choose_space():
return spaces[choice]['key'], spaces[choice]['name']


def import_space(space_key, space_name, session):
PageManager().store_pages_data(space_key, retrieve_space(space_key), session)
def import_space(space_key, space_name):
PageManager().store_pages_data(space_key, retrieve_space(space_key))

vector.pages.generate_missing_embeddings_to_database(session)
vector.pages.generate_missing_embeddings_to_database()

upsert_space_info(
session,
space_key=space_key,
space_name=space_name,
last_import_date=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
)

vector.pages.import_from_database(session, space_key)
vector.pages.import_from_database(space_key)
21 changes: 11 additions & 10 deletions database/bookmarked_conversation_manager.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
from models.bookmarked_conversation import BookmarkedConversation
from datetime import datetime, timezone
from database.database import get_db_session


def add_bookmarked_conversation(session, title, body, thread_id):
new_conversation = BookmarkedConversation(title=title, body=body, thread_id=thread_id)
session.add(new_conversation)
session.commit()
def add_bookmarked_conversation(title, body, thread_id):
with get_db_session() as session:
new_conversation = BookmarkedConversation(title=title, body=body, thread_id=thread_id)
session.add(new_conversation)


def update_posted_on_confluence(session, thread_id):
conversation = session.query(BookmarkedConversation).filter_by(thread_id=thread_id).first()
if conversation:
conversation.posted_on_confluence = datetime.now(timezone.utc)
session.commit()
print(f"Updated conversation with thread ID {thread_id} with timestamp")
def update_posted_on_confluence(thread_id):
with get_db_session() as session:
conversation = session.query(BookmarkedConversation).filter_by(thread_id=thread_id).first()
if conversation:
conversation.posted_on_confluence = datetime.now(timezone.utc)
print(f"Updated conversation with thread ID {thread_id} with timestamp")
92 changes: 49 additions & 43 deletions database/interaction_manager.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,76 @@
# ./database/interaction_manager.py
from datetime import datetime, timezone
from models.qa_interaction import QAInteraction
from database.database import get_db_session
import json


class QAInteractionManager:
def __init__(self, session):
self.session = session
def __init__(self):
pass

def add_question_and_answer(self, question, answer, thread_id, assistant_thread_id, channel_id, question_ts,
answer_ts, slack_user_id):

serialized_answer = json.dumps(answer.__dict__) if not isinstance(answer, str) else answer
interaction = QAInteraction(
question_text=question,
thread_id=thread_id,
assistant_thread_id=assistant_thread_id,
answer_text=serialized_answer,
channel_id=channel_id,
question_timestamp=question_ts,
answer_timestamp=answer_ts,
comments=json.dumps([]),
slack_user_id=slack_user_id
)
self.session.add(interaction)
self.session.commit()
with get_db_session() as session:
serialized_answer = json.dumps(answer.__dict__) if not isinstance(answer, str) else answer
interaction = QAInteraction(
question_text=question,
thread_id=thread_id,
assistant_thread_id=assistant_thread_id,
answer_text=serialized_answer,
channel_id=channel_id,
question_timestamp=question_ts,
answer_timestamp=answer_ts,
comments=json.dumps([]),
slack_user_id=slack_user_id
)
session.add(interaction)

def add_comment_to_interaction(self, thread_id, comment):
interaction = self.get_interaction_by_thread_id(thread_id)
if interaction:
if interaction.comments is None:
interaction.comments = json.dumps([])
comments = json.loads(interaction.comments)
comments.append(comment)
interaction.comments = json.dumps(comments)
self.session.commit()
with get_db_session() as session: # TODO Check if this is necessary
if interaction:
if interaction.comments is None:
interaction.comments = json.dumps([])
comments = json.loads(interaction.comments)
comments.append(comment)
interaction.comments = json.dumps(comments)

def get_interaction_by_thread_id(self, thread_id):
return self.session.query(QAInteraction).filter_by(thread_id=thread_id).first()
with get_db_session() as session:
return session.query(QAInteraction).filter_by(thread_id=thread_id).first()

def get_interaction_by_interaction_id(self, interaction_id):
return self.session.query(QAInteraction).filter_by(id=interaction_id).first()
with get_db_session() as session:
return session.query(QAInteraction).filter_by(id=interaction_id).first()

def get_interactions_by_interaction_ids(self, interaction_ids):
return self.session.query(QAInteraction).filter(QAInteraction.id.in_(interaction_ids)).all()
with get_db_session() as session:
return session.query(QAInteraction).filter(QAInteraction.id.in_(interaction_ids)).all()

def get_qa_interactions(self):
return self.session.query(QAInteraction).all()
with get_db_session() as session:
return session.query(QAInteraction).all()

def add_embed_to_interaction(self, interaction_id, embed):
interaction = self.session.query(QAInteraction).filter_by(id=interaction_id).first()
if interaction:
interaction.embed = json.dumps(embed)
interaction.last_embedded = datetime.now(timezone.utc)
self.session.commit()
with get_db_session() as session:
interaction = session.query(QAInteraction).filter_by(id=interaction_id).first()
if interaction:
interaction.embed = json.dumps(embed)
interaction.last_embedded = datetime.now(timezone.utc)

def get_interactions_without_embeds(self):
return self.session.query(QAInteraction).filter(
(QAInteraction.embed.is_(None)) |
(QAInteraction.embed == json.dumps([])) |
(QAInteraction.embed == '')
).all()
with get_db_session() as session:
return session.query(QAInteraction).filter(
(QAInteraction.embed.is_(None)) |
(QAInteraction.embed == json.dumps([])) |
(QAInteraction.embed == '')
).all()

def get_interactions_with_embeds(self):
return self.session.query(QAInteraction).filter(
(QAInteraction.embed.is_not(None)) |
(QAInteraction.embed == json.dumps([])) |
(QAInteraction.embed == '')
).all()
with get_db_session() as session:
return session.query(QAInteraction).filter(
(QAInteraction.embed.is_not(None)) |
(QAInteraction.embed == json.dumps([])) |
(QAInteraction.embed == '')
).all()
103 changes: 54 additions & 49 deletions database/page_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import List, Optional
from models.page_data import PageData
from datetime import datetime, timezone
from database.database import get_db_session
import json


Expand All @@ -21,60 +22,62 @@ def parse_datetime(self, date_string):
"""
return datetime.fromisoformat(date_string.replace('Z', '+00:00'))

def store_pages_data(self, space_key, pages, session):
def store_pages_data(self, space_key, pages):
"""
Store Confluence page data into the database.
Args:
space_key (str): The key of the Confluence space.
pages (list): A list of page data
"""
for page in pages:
page_id = page['pageId']
old_page = session.query(PageData).filter_by(page_id=page_id).first()
if old_page:
old_page.title = page['title']
old_page.lastUpdated = self.parse_datetime(page['lastUpdated'])
old_page.content = page['content']
old_page.comments = page['comments']
print(f"Page with ID {page_id} updated.")
else:
new_page = PageData(page_id=page_id,
space_key=space_key,
title=page['title'],
author=page['author'],
createdDate=self.parse_datetime(page['createdDate']),
lastUpdated=self.parse_datetime(page['lastUpdated']),
content=page['content'],
comments=page['comments']
)
session.add(new_page)
print(f"Page with ID {page_id} created.")
session.commit()

def get_page_ids_missing_embeds(self, session):
with get_db_session() as session:
for page in pages:
page_id = page['pageId']
old_page = session.query(PageData).filter_by(page_id=page_id).first()
if old_page:
old_page.title = page['title']
old_page.lastUpdated = self.parse_datetime(page['lastUpdated'])
old_page.content = page['content']
old_page.comments = page['comments']
print(f"Page with ID {page_id} updated.")
else:
new_page = PageData(page_id=page_id,
space_key=space_key,
title=page['title'],
author=page['author'],
createdDate=self.parse_datetime(page['createdDate']),
lastUpdated=self.parse_datetime(page['lastUpdated']),
content=page['content'],
comments=page['comments']
)
session.add(new_page)
print(f"Page with ID {page_id} created.")

def get_page_ids_missing_embeds(self):
"""
Retrieve the page IDs of pages that are missing embeddings.
:return: A list of page IDs.
"""
records = session.query(PageData).filter(
(PageData.lastUpdated > PageData.last_embedded) |
(PageData.last_embedded.is_(None))
).all()
page_ids = [record.page_id for record in records]
return page_ids
with get_db_session() as session:
records = session.query(PageData).filter(
(PageData.lastUpdated > PageData.last_embedded) |
(PageData.last_embedded.is_(None))
).all()
page_ids = [record.page_id for record in records]
return page_ids

def get_all_page_data_from_db(self, session, space_key=None):
def get_all_page_data_from_db(self, space_key=None):
"""
Retrieve all page data and embeddings from the database. If a space_key is provided,
filter the records to only include pages from that specific space.
:param space_key: Optional; the specific space key to filter pages by.
:return: Tuple of page_ids (list of page IDs), all_documents (list of document strings), and embeddings (list of embeddings as strings)
"""
if space_key:
records = session.query(PageData).filter(PageData.space_key == space_key).all()
else:
records = session.query(PageData).all()
with get_db_session() as session:
if space_key:
records = session.query(PageData).filter(PageData.space_key == space_key).all()
else:
records = session.query(PageData).all()

formatted = self.format_page_data(records)
return formatted
Expand All @@ -90,39 +93,41 @@ def format_page_data(self, records):
]
return page_ids, all_documents, embeddings

def add_or_update_embed_vector(self, page_id, embed_vector, session):
def add_or_update_embed_vector(self, page_id, embed_vector):
"""
Add or update the embed vector data for a specific page in the database, and update the last_embedded timestamp.
Args:
page_id (str): The ID of the page to update.
embed_vector: The embed vector data to be added or updated, expected to be a list of floats.
"""
page = session.query(PageData).filter_by(page_id=page_id).first()
with get_db_session() as session:
page = session.query(PageData).filter_by(page_id=page_id).first()

if page:
page.embed = json.dumps(embed_vector)
page.last_embedded = datetime.now(timezone.utc)
print(f"Embed vector and last_embedded timestamp for page ID {page_id} have been updated.")
session.commit()
else:
print(f"No page found with ID {page_id}. Consider handling this case as needed.")
if page:
page.embed = json.dumps(embed_vector)
page.last_embedded = datetime.now(timezone.utc)
print(f"Embed vector and last_embedded timestamp for page ID {page_id} have been updated.")
else:
print(f"No page found with ID {page_id}. Consider handling this case as needed.")

def find_page(self, page_id, session) -> Optional[PageData]:
def find_page(self, page_id) -> Optional[PageData]:
"""
Find a page in the database by its ID.
:param page_id: The ID of the page to find.
:return: The page data if found, or None if not found.
"""
return session.query(PageData).filter_by(page_id=page_id).first()
with get_db_session() as session:
return session.query(PageData).filter_by(page_id=page_id).first()

def find_pages(self, page_ids, session) -> List[PageData]:
def find_pages(self, page_ids) -> List[PageData]:
"""
Find multiple pages in the database by their IDs.
:param page_ids: A list of page IDs to find.
:return: A list of page data if found, or an empty list if not found.
"""
return session.query(PageData).filter(PageData.page_id.in_(page_ids)).all()
with get_db_session() as session:
return session.query(PageData).filter(PageData.page_id.in_(page_ids)).all()

def format_page_for_llm(self, page: PageData) -> str:
"""
Expand Down
46 changes: 24 additions & 22 deletions database/quiz_question_manager.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
from datetime import datetime, timezone
from interactions.quiz_question_dto import QuizQuestionDTO
from models.quiz_question import QuizQuestion
from database.database import get_db_session


class QuizQuestionManager:
def __init__(self, session):
self.session = session
def __init__(self):
pass

def add_quiz_question(self, question_text):
new_question = QuizQuestion(question_text=question_text, posted_on_slack=datetime.now(timezone.utc))
self.session.add(new_question)
self.session.commit()
with get_db_session() as session:
new_question = QuizQuestion(question_text=question_text, posted_on_slack=datetime.now(timezone.utc))
session.add(new_question)

print(f"New question ID: {new_question.id}")
dto = QuizQuestionDTO(
Expand All @@ -26,27 +27,28 @@ def add_quiz_question(self, question_text):
return dto

def update_with_summary(self, question_id, summary):
question = self.session.query(QuizQuestion).filter_by(id=question_id).first()
if question:
question.summary = summary
self.session.commit()
print(f"Summary updated for question with ID {question_id}")
with get_db_session() as session:
question = session.query(QuizQuestion).filter_by(id=question_id).first()
if question:
question.summary = summary
print(f"Summary updated for question with ID {question_id}")

def update_with_summary_by_thread_id(self, thread_id, summary):
question = self.session.query(QuizQuestion).filter_by(thread_id=thread_id).first()
print(f"question: {question}")
if question:
question.summary = summary
self.session.commit()
print(f"Updated question with thread ID {thread_id} with summary: {summary}")
with get_db_session() as session:
question = session.query(QuizQuestion).filter_by(thread_id=thread_id).first()
print(f"question: {question}")
if question:
question.summary = summary
print(f"Updated question with thread ID {thread_id} with summary: {summary}")

def update_with_thread_id(self, question_id, thread_id):
question = self.session.query(QuizQuestion).filter_by(id=question_id).first()
if question:
question.thread_id = thread_id
question.posted_on_slack = datetime.now(timezone.utc)
self.session.commit()
with get_db_session() as session:
question = session.query(QuizQuestion).filter_by(id=question_id).first()
if question:
question.thread_id = thread_id
print(f"Thread ID updated for question with ID {question_id}")

def get_unposted_questions_timestamps(self):
questions = self.session.query(QuizQuestion).filter_by(posted_on_confluence=None).all()
with get_db_session() as session:
questions = session.query(QuizQuestion).filter_by(posted_on_confluence=None).all()
return [question.thread_id for question in questions]
Loading

0 comments on commit fe032b5

Please sign in to comment.