-
Notifications
You must be signed in to change notification settings - Fork 2
/
import.py
26 lines (20 loc) · 841 Bytes
/
import.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# Python code to preprocess and embed documents
import openai
import psycopg2
# Load OpenAI API key
openai.api_key = "sk-..." #YOUR OWN API KEY
# Pick the embedding model
model_id = "text-embedding-ada-002"
# Connect to PostgreSQL database
conn = psycopg2.connect(database="postgres", user="gulcin.jelinek", host="localhost", port="5432")
# Fetch documents from the database
cur = conn.cursor()
cur.execute("SELECT id, content FROM documents")
documents = cur.fetchall()
# Process and store embeddings in the database
for doc_id, doc_content in documents:
embedding = openai.Embedding.create(input=doc_content, model=model_id)['data'][0]['embedding']
cur.execute("INSERT INTO document_embeddings (id, embedding) VALUES (%s, %s);", (doc_id, embedding))
conn.commit()
# Commit and close the database connection
conn.commit()