Skip to content

Commit

Permalink
Create docs with custom embeddings
Browse files Browse the repository at this point in the history
  • Loading branch information
HamadaSalhab committed Nov 8, 2024
1 parent 2d2f624 commit be3becf
Show file tree
Hide file tree
Showing 10 changed files with 40 additions and 14 deletions.
13 changes: 9 additions & 4 deletions agents-api/agents_api/activities/embed_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,15 @@ async def embed_batch(snippets):
]
)

embeddings = reduce(
operator.add,
await asyncio.gather(*[embed_batch(snippets) for snippets in batched_snippets]),
)
if payload.embeddings:
embeddings = [payload.embeddings]
else:
embeddings = reduce(
operator.add,
await asyncio.gather(
*[embed_batch(snippets) for snippets in batched_snippets]
),
)

embed_snippets_query(
developer_id=payload.developer_id,
Expand Down
1 change: 1 addition & 0 deletions agents-api/agents_api/activities/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,4 @@ class EmbedDocsPayload(BaseModel):
embed_instruction: str | None
title: str | None = None
include_title: bool = False # Need to be a separate parameter for the activity
embeddings: list[float] | list[list[float]] | None = None
9 changes: 5 additions & 4 deletions agents-api/agents_api/autogen/Docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ class CreateDocRequest(BaseModel):
"""
Contents of the document
"""
embeddings: list[float] | list[list[float]] | None = None
"""
Embeddings for the document
"""
embed_instruction: str | None = None
"""
Instruction for the embedding model.
Expand All @@ -66,10 +70,7 @@ class Doc(BaseModel):
"""
Contents of the document
"""
embeddings: Annotated[
list[float] | list[list[float]] | None,
Field(json_schema_extra={"readOnly": True}),
] = None
embeddings: list[float] | list[list[float]] | None = None
"""
Embeddings for the document
"""
Expand Down
1 change: 1 addition & 0 deletions agents-api/agents_api/autogen/Tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
BaseModel,
ConfigDict,
Field,
RootModel,
StrictBool,
)

Expand Down
1 change: 1 addition & 0 deletions agents-api/agents_api/models/docs/create_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def create_doc(

doc_data = data.model_dump()
doc_data.pop("embed_instruction", None)
doc_data.pop("embeddings", None)
content = doc_data.pop("content")

doc_data["owner_type"] = owner_type
Expand Down
4 changes: 4 additions & 0 deletions agents-api/agents_api/routers/docs/create_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ async def run_embed_docs_task(
doc_id: UUID,
title: str,
content: list[str],
embeddings: list[float] | list[list[float]] | None = None,
embed_instruction: str | None = None,
job_id: UUID,
background_tasks: BackgroundTasks,
Expand All @@ -36,6 +37,7 @@ async def run_embed_docs_task(
content=content,
title=title,
embed_instruction=embed_instruction,
embeddings=embeddings,
)

handle = await client.start_workflow(
Expand Down Expand Up @@ -88,6 +90,7 @@ async def create_user_doc(
doc_id=doc.id,
title=doc.title,
content=doc.content,
embeddings=data.embeddings,
embed_instruction=data.embed_instruction,
job_id=embed_job_id,
background_tasks=background_tasks,
Expand Down Expand Up @@ -119,6 +122,7 @@ async def create_agent_doc(
doc_id=doc.id,
title=doc.title,
content=doc.content,
embeddings=data.embeddings,
embed_instruction=data.embed_instruction,
job_id=embed_job_id,
background_tasks=background_tasks,
Expand Down
9 changes: 5 additions & 4 deletions integrations-service/integrations/autogen/Docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ class CreateDocRequest(BaseModel):
"""
Contents of the document
"""
embeddings: list[float] | list[list[float]] | None = None
"""
Embeddings for the document
"""
embed_instruction: str | None = None
"""
Instruction for the embedding model.
Expand All @@ -66,10 +70,7 @@ class Doc(BaseModel):
"""
Contents of the document
"""
embeddings: Annotated[
list[float] | list[list[float]] | None,
Field(json_schema_extra={"readOnly": True}),
] = None
embeddings: list[float] | list[list[float]] | None = None
"""
Embeddings for the document
"""
Expand Down
1 change: 1 addition & 0 deletions integrations-service/integrations/autogen/Tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
BaseModel,
ConfigDict,
Field,
RootModel,
StrictBool,
)

Expand Down
1 change: 0 additions & 1 deletion typespec/docs/models.tsp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ model Doc {
content: string | string[];

/** Embeddings for the document */
@visibility("read")
embeddings?: float32[] | float32[][];
}

Expand Down
14 changes: 13 additions & 1 deletion typespec/tsp-output/@typespec/openapi3/openapi-1.0.0.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2486,6 +2486,19 @@ components:
items:
type: string
description: Contents of the document
embeddings:
anyOf:
- type: array
items:
type: number
format: float
- type: array
items:
type: array
items:
type: number
format: float
description: Embeddings for the document
embed_instruction:
type: string
nullable: true
Expand Down Expand Up @@ -2536,7 +2549,6 @@ components:
type: number
format: float
description: Embeddings for the document
readOnly: true
Docs.DocOwner:
type: object
required:
Expand Down

0 comments on commit be3becf

Please sign in to comment.