Skip to content

Commit

Permalink
feat(integrations): Add llama_parse integration for document parsing. (
Browse files Browse the repository at this point in the history
…#823)

<!-- ELLIPSIS_HIDDEN -->


> [!IMPORTANT]
> Add Llama document parsing integration with new classes, configuration
updates, and tests.
> 
>   - **Integration**:
> - Add `LlamaParseIntegrationDef` and `LlamaParseIntegrationDefUpdate`
classes in `Tools.py` for Llama document parsing.
> - Add `LlamaParseFetchArguments`, `LlamaParseFetchArgumentsUpdate`,
`LlamaParseSetup`, and `LlamaParseSetupUpdate` classes for handling
setup and arguments.
> - Implement `parse()` function in `llama_parse.py` for parsing
documents using LlamaParse.
>   - **Configuration**:
> - Add `llama-index` and `llama-parse` dependencies to
`pyproject.toml`.
>   - **Testing**:
> - Add `MockLlamaParseClient` in `mocks/llama_parse.py` for testing.
> - Add `test_llama_parse_provider()` in `test_providers.py` to test
LlamaParse provider configuration.
>     - Update `conftest.py` to include LlamaParse in mocked services.
> 
> <sup>This description was created by </sup>[<img alt="Ellipsis"
src="https://img.shields.io/badge/Ellipsis-blue?color=175173">](https://www.ellipsis.dev?ref=julep-ai%2Fjulep&utm_source=github&utm_medium=referral)<sup>
for 239e6c7. It will automatically
update as commits are pushed.</sup>


<!-- ELLIPSIS_HIDDEN -->

---------

Co-authored-by: Vedantsahai18 <[email protected]>
  • Loading branch information
Vedantsahai18 and Vedantsahai18 authored Nov 10, 2024
1 parent 1ded44f commit b6308f1
Show file tree
Hide file tree
Showing 21 changed files with 1,544 additions and 14 deletions.
2 changes: 1 addition & 1 deletion agents-api/agents_api/activities/execute_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from ..models.tools import get_tool_args_from_metadata


@auto_blob_store
@auto_blob_store(deep=True)
@beartype
async def execute_integration(
context: StepContext,
Expand Down
5 changes: 5 additions & 0 deletions agents-api/agents_api/activities/task_steps/base_evaluate.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
import ast
from typing import Any

import simpleeval
from beartype import beartype
from box import Box
from openai import BaseModel

# Increase the max string length to 300000
simpleeval.MAX_STRING_LENGTH = 300000

from simpleeval import NameNotDefined, SimpleEval
from temporalio import activity
from thefuzz import fuzz
Expand Down
163 changes: 163 additions & 0 deletions agents-api/agents_api/autogen/Tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
BaseModel,
ConfigDict,
Field,
RootModel,
StrictBool,
)

Expand Down Expand Up @@ -50,6 +51,10 @@ class ApiCallDef(BaseModel):
"""
The data to send as form data
"""
files: dict[str, Any] | None = None
"""
The data to send as files data
"""
json_: Annotated[dict[str, Any] | None, Field(alias="json")] = None
"""
JSON body to send with the request
Expand Down Expand Up @@ -117,6 +122,10 @@ class ApiCallDefUpdate(BaseModel):
"""
The data to send as form data
"""
files: dict[str, Any] | None = None
"""
The data to send as files data
"""
json_: Annotated[dict[str, Any] | None, Field(alias="json")] = None
"""
JSON body to send with the request
Expand Down Expand Up @@ -189,6 +198,7 @@ class BaseIntegrationDef(BaseModel):
"browserbase",
"email",
"remote_browser",
"llama_parse",
]
"""
The provider of the integration
Expand Down Expand Up @@ -225,6 +235,7 @@ class BaseIntegrationDefUpdate(BaseModel):
"browserbase",
"email",
"remote_browser",
"llama_parse",
]
| None
) = None
Expand Down Expand Up @@ -720,6 +731,7 @@ class CreateToolRequest(BaseModel):
| BrowserbaseGetSessionLiveUrlsIntegrationDef
| BrowserbaseGetSessionConnectUrlIntegrationDef
| RemoteBrowserIntegrationDef
| LlamaParseIntegrationDef
| None
) = None
"""
Expand Down Expand Up @@ -947,6 +959,154 @@ class FunctionDef(BaseModel):
"""


class LlamaParseFetchArguments(BaseModel):
"""
Arguments for LlamaParse integration
"""

model_config = ConfigDict(
populate_by_name=True,
)
result_format: Literal["text", "markdown"] = "text"
"""
The format of the result. Can be "text" or "markdown". Default is "text".
"""
num_workers: Annotated[int, Field(ge=1, le=10)] = 2
"""
Number of workers for parallel processing. Default is 2, but can be set between 1 and 10.
"""
verbose: StrictBool = True
"""
Verbose mode for detailed logging. Default is true.
"""
language: str = "en"
"""
Language of the text. Default is English.
"""
filename: str | None = None
"""
File Name. If not provided, a random name will be generated.
"""
file: str
"""
The base64 string of the file
"""


class LlamaParseFetchArgumentsUpdate(BaseModel):
"""
Arguments for LlamaParse integration
"""

model_config = ConfigDict(
populate_by_name=True,
)
result_format: Literal["text", "markdown"] = "text"
"""
The format of the result. Can be "text" or "markdown". Default is "text".
"""
num_workers: Annotated[int, Field(ge=1, le=10)] = 2
"""
Number of workers for parallel processing. Default is 2, but can be set between 1 and 10.
"""
verbose: StrictBool = True
"""
Verbose mode for detailed logging. Default is true.
"""
language: str = "en"
"""
Language of the text. Default is English.
"""
filename: str | None = None
"""
File Name. If not provided, a random name will be generated.
"""
file: str | None = None
"""
The base64 string of the file
"""


class LlamaParseIntegrationDef(BaseIntegrationDef):
"""
LlamaParse integration definition
"""

model_config = ConfigDict(
populate_by_name=True,
)
provider: Literal["llama_parse"] = "llama_parse"
"""
The provider must be "LlamaParseSetup"
"""
method: str | None = None
"""
The specific method of the integration to call
"""
setup: LlamaParseSetup | None = None
"""
The setup parameters for LlamaParse
"""
arguments: LlamaParseFetchArguments | None = None
"""
The arguments for LlamaParse
"""


class LlamaParseIntegrationDefUpdate(BaseIntegrationDefUpdate):
"""
LlamaParse integration definition
"""

model_config = ConfigDict(
populate_by_name=True,
)
provider: Literal["llama_parse"] = "llama_parse"
"""
The provider must be "LlamaParseSetup"
"""
method: str | None = None
"""
The specific method of the integration to call
"""
setup: LlamaParseSetupUpdate | None = None
"""
The setup parameters for LlamaParse
"""
arguments: LlamaParseFetchArgumentsUpdate | None = None
"""
The arguments for LlamaParse
"""


class LlamaParseSetup(BaseModel):
"""
Setup parameters for LlamaParse integration
"""

model_config = ConfigDict(
populate_by_name=True,
)
llamaparse_api_key: str
"""
The API key for LlamaParse
"""


class LlamaParseSetupUpdate(BaseModel):
"""
Setup parameters for LlamaParse integration
"""

model_config = ConfigDict(
populate_by_name=True,
)
llamaparse_api_key: str | None = None
"""
The API key for LlamaParse
"""


class NamedToolChoice(BaseModel):
model_config = ConfigDict(
populate_by_name=True,
Expand Down Expand Up @@ -1005,6 +1165,7 @@ class PatchToolRequest(BaseModel):
| BrowserbaseGetSessionLiveUrlsIntegrationDefUpdate
| BrowserbaseGetSessionConnectUrlIntegrationDefUpdate
| RemoteBrowserIntegrationDefUpdate
| LlamaParseIntegrationDefUpdate
| None
) = None
"""
Expand Down Expand Up @@ -1428,6 +1589,7 @@ class Tool(BaseModel):
| BrowserbaseGetSessionLiveUrlsIntegrationDef
| BrowserbaseGetSessionConnectUrlIntegrationDef
| RemoteBrowserIntegrationDef
| LlamaParseIntegrationDef
| None
) = None
"""
Expand Down Expand Up @@ -1517,6 +1679,7 @@ class UpdateToolRequest(BaseModel):
| BrowserbaseGetSessionLiveUrlsIntegrationDef
| BrowserbaseGetSessionConnectUrlIntegrationDef
| RemoteBrowserIntegrationDef
| LlamaParseIntegrationDef
| None
) = None
"""
Expand Down
3 changes: 2 additions & 1 deletion agents-api/agents_api/common/protocol/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,10 @@ def unload_attribute(self, name: str) -> None:
remote_obj = self.__save_item(data)
super().__setattr__(name, remote_obj)

def unload_all(self) -> None:
def unload_all(self) -> "BaseRemoteModel":
for name in list(self._remote_cache.keys()):
self.unload_attribute(name)
return self


class RemoteList(list):
Expand Down
50 changes: 50 additions & 0 deletions agents-api/agents_api/common/storage_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from functools import wraps
from typing import Any, Callable

from pydantic import BaseModel
from temporalio import workflow

from ..activities.sync_items_remote import load_inputs_remote
Expand Down Expand Up @@ -76,6 +77,31 @@ def load_args(
for key, value in arg.items()
}
)
elif isinstance(arg, BaseRemoteModel):
new_args.append(arg.unload_all())

elif isinstance(arg, BaseModel):
for field in arg.model_fields.keys():
if isinstance(getattr(arg, field), RemoteObject):
setattr(
arg,
field,
load_from_blob_store_if_remote(getattr(arg, field)),
)
elif isinstance(getattr(arg, field), RemoteList):
setattr(
arg,
field,
[
load_from_blob_store_if_remote(item)
for item in getattr(arg, field)
],
)
elif isinstance(getattr(arg, field), BaseRemoteModel):
setattr(arg, field, getattr(arg, field).unload_all())

new_args.append(arg)

else:
new_args.append(arg)

Expand All @@ -93,6 +119,30 @@ def load_args(
for key, value in v.items()
}

elif isinstance(v, BaseRemoteModel):
new_kwargs[k] = v.unload_all()

elif isinstance(v, BaseModel):
for field in v.model_fields.keys():
if isinstance(getattr(v, field), RemoteObject):
setattr(
v,
field,
load_from_blob_store_if_remote(getattr(v, field)),
)
elif isinstance(getattr(v, field), RemoteList):
setattr(
v,
field,
[
load_from_blob_store_if_remote(item)
for item in getattr(v, field)
],
)
elif isinstance(getattr(v, field), BaseRemoteModel):
setattr(v, field, getattr(v, field).unload_all())
new_kwargs[k] = v

else:
new_kwargs[k] = v

Expand Down
21 changes: 17 additions & 4 deletions cookbooks/01-Website_Crawler_using_Spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,34 @@
)

# Defining a Task
task_def = yaml.safe_load("""
name: Agent Crawler
task_def = yaml.safe_load(f"""
name: Crawling Task
# Define the tools that the agent will use in this workflow
tools:
- name: spider_crawler
type: integration
integration:
provider: spider
setup:
spider_api_key: "{{SPIDER_API_KEY}}"
spider_api_key: "{spider_api_key}"
# Define the steps of the workflow
main:
# Define a tool call step that calls the spider_crawler tool with the url input
- tool: spider_crawler
arguments:
url: '"https://spider.cloud"'
url: "_['url']" # You can also use 'inputs[0]['url']'
- prompt: |
You are {{{{agent.about}}}}
I have given you this url: {{{{inputs[0]['url']}}}}
And you have crawled that website. Here are the results you found:
{{{{_['documents']}}}}
I want you to create a short summary (no longer than 100 words) of the results you found while crawling that website.
unwrap: True
""")

# Creating/Updating a task
Expand Down
Loading

0 comments on commit b6308f1

Please sign in to comment.