GPT4o.Updates

from openai import AssistantEventHandler, OpenAI
from typing_extensions import override
import os

class EventHandler(AssistantEventHandler):
    @override
    def on_text_created(self, text) -> None:
        print(f"\nassistant > ", end="", flush=True)

    @override
    def on_tool_call_created(self, tool_call):
        print(f"\nassistant > {tool_call.type}\n", flush=True)

    @override
    def on_message_done(self, message) -> None:
        # print a citation to the file searched
        message_content = message.content[0].text
        annotations = message_content.annotations
        citations = []
        for index, annotation in enumerate(annotations):
            message_content.value = message_content.value.replace(
                annotation.text, f"[{index}]"
            )
            if file_citation := getattr(annotation, "file_citation", None):
                cited_file = client.files.retrieve(file_citation.file_id)
                citations.append(f"[{index}] {cited_file.filename}")

        print(message_content.value)
        print("\n".join(citations))


# Set your OpenAI API key as the environment variable OPENAI_API_KEY
os.environ['OPENAI_API_KEY'] = 'YOUR-API-KEY'
OpenAI.api_key = os.getenv('OPENAI_API_KEY')
 
client = OpenAI()
 
assistant = client.beta.assistants.create(
  name="Scientific Literature Analyst Assistant",
  instructions="You are an expert scientific literature analyst. Use you knowledge base to answer questions about and analyze scientific literature.",
  model="gpt-4o",
  tools=[{"type": "file_search"}],
)

def main():

    # Create a vector store 
    vector_store = client.beta.vector_stores.create(name="Sample Article DataBase")
 
    # Ready the files for upload to OpenAI
    file_paths = ["/Users/ian/VS-Projects/TextSummary-FalconAI(HF)/Sample-DataFrame/BlackHole.pdf", 
                  "/Users/ian/VS-Projects/TextSummary-FalconAI(HF)/Sample-DataFrame/Quasar1.pdf", 
                  "/Users/ian/VS-Projects/TextSummary-FalconAI(HF)/Sample-DataFrame/AI-SLR-Paper.pdf", 
                  "/Users/ian/VS-Projects/TextSummary-FalconAI(HF)/Sample-DataFrame/AI-Lit-Review.pdf", 
                  "/Users/ian/VS-Projects/TextSummary-FalconAI(HF)/Sample-DataFrame/Quasar2.pdf", 
                  "/Users/ian/VS-Projects/TextSummary-FalconAI(HF)/Sample-DataFrame/SpectralLines.pdf" ]
    file_streams = [open(path, "rb") for path in file_paths]
 
    # Use the upload and poll SDK helper to upload the files, add them to the vector store,
    # and poll the status of the file batch for completion.
    file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
        vector_store_id=vector_store.id, files=file_streams
    )
 
    # You can print the status and the file counts of the batch to see the result of this operation.
    print(file_batch.status)
    print(file_batch.file_counts)

    # Update the assistant to use the vector store
    global assistant
    assistant = client.beta.assistants.update(
        assistant_id=assistant.id,
        tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
    )
 
    # Create a thread and attach the file to the message
    thread = client.beta.threads.create(
      messages=[
        {
            "role": "user",
            "content": "Are the papers in the Vector store related?",
        }
     ],
     tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
    )
 
    # The thread now has a vector store with that file in its tool resources.
    # print(thread.tool_resources.file_search)    
    
    # Poll the thread for a response from the assistant
    #response = client.beta.threads.poll(thread_id=thread.id)
    # print(response.messages[-1]['content'])  # Print the assistant's response
    
    # Then, we use the `stream` SDK helper 
    # with the `EventHandler` class to create the Run 
    # and stream the response.
 
    with client.beta.threads.runs.stream(
        thread_id=thread.id,
        assistant_id=assistant.id,
        instructions="Please address the user as Jane Doe. The user has a premium account.",
        event_handler=EventHandler(),
    ) as stream:
        stream.until_done()   

if __name__ == "__main__":
    main()


PROJECT OUTPUT:

completed
FileCounts(cancelled=0, completed=6, failed=0, in_progress=0, total=6)

assistant > file_search


assistant > Yes, the papers in the vector store are related. They all focus on the use and development of AI techniques and tools for automating or assisting various phases of systematic literature reviews (SLR). Here's a summary of some of the common themes covered:

1. **Classification and Categorization**:
   - Multiple tools classify papers as relevant or irrelevant, often using machine learning techniques such as Support Vector Machine (SVM) and logistic regression[0][1].
   - Other tasks handled include identifying specific types of studies like randomized controlled trials (RCTs) or economic evaluations[0][3].

2. **Feature Extraction and Text Representation**:
   - Techniques for processing and representing text from papers include Bag of Words (BoW), term frequency-inverse document frequency (TF-IDF), and various word embedding methods such as word2vec, GloVe, and BioBERT[4][3].
   - These features are crucial for training classifiers to distinguish between relevant and irrelevant papers[0][7].

3. **Search and Retrieval**:
   - Certain tools act as search engines allowing users to enter natural language queries to retrieve and summarize relevant papers[8].
   - Enhancements such as Retriever-Augmented Generation (RAG) frameworks help integrate knowledge from scientific documents into these tools[9].

4. **Interactive and Iterative Paper Selection**:
   - Approaches like active learning and reinforcement learning support interactive and iterative paper selection processes, showing papers with high probability or uncertainty to reviewers for further labeling[10][11].

5. **Visualization and Clustering**:
   - Tools employ visualization techniques to map relationships among papers based on content similarity and aid reviewers in quality assessment[11].
   - Clustering techniques are used to group similar papers, which helps in building topic maps and aiding paper selection[13].

Overall, these themes indicate a cohesive area of research focused on leveraging AI to streamline the various stages of conducting systematic literature reviews. The tools mentioned are developed to assist researchers in efficiently finding, classifying, and analyzing relevant scientific literature.
[0] AI-Lit-Review.pdf
[1] AI-SLR-Paper.pdf
[2] AI-Lit-Review.pdf
[3] AI-Lit-Review.pdf
[4] AI-SLR-Paper.pdf
[5] AI-Lit-Review.pdf
[6] AI-Lit-Review.pdf
[7] AI-Lit-Review.pdf
[8] AI-Lit-Review.pdf
[9] AI-Lit-Review.pdf
[10] AI-SLR-Paper.pdf
[11] AI-SLR-Paper.pdf
[12] AI-SLR-Paper.pdf
[13] AI-Lit-Review.pdf