-
Notifications
You must be signed in to change notification settings - Fork 80
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add upset examples multipart endpoint #1209
base: main
Are you sure you want to change the base?
Changes from 4 commits
e9e2131
ff30541
152ec59
27b1546
53a0f14
025aa6d
4208b6e
fd16baa
28a4677
816302d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
"""Client for interacting with the LangSmith API. | ||
Check notice on line 1 in python/langsmith/client.py GitHub Actions / benchmarkBenchmark results
Check notice on line 1 in python/langsmith/client.py GitHub Actions / benchmarkComparison against main
|
||
|
||
Use the client to customize API keys / workspace ocnnections, SSl certs, | ||
etc. for tracing. | ||
|
@@ -82,6 +82,7 @@ | |
_SIZE_LIMIT_BYTES, | ||
) | ||
from langsmith._internal._multipart import ( | ||
MultipartPart, | ||
MultipartPartsAndContext, | ||
join_multipart_parts_and_context, | ||
) | ||
|
@@ -3369,6 +3370,133 @@ | |
created_at=created_at, | ||
) | ||
|
||
def upsert_examples_multipart( | ||
self, | ||
*, | ||
upserts: List[ls_schemas.ExampleCreateWithAttachments] = [], | ||
) -> None: | ||
"""Upsert examples.""" | ||
# not sure if the below checks are necessary | ||
if not isinstance(upserts, list): | ||
raise TypeError(f"upserts must be a list, got {type(upserts)}") | ||
for item in upserts: | ||
if not isinstance(item, ls_schemas.ExampleCreateWithAttachments): | ||
raise TypeError(f"Each item must be ExampleCreateWithAttachments, got {type(item)}") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not necessary, we don't check types like this elsewhere |
||
|
||
parts: list[MultipartPart] = [] | ||
|
||
for example in upserts: | ||
if example.id is not None: | ||
example_id = str(example.id) # is the conversion to string neccessary? | ||
isahers1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
else: | ||
example_id = str(uuid.uuid4()) | ||
|
||
remaining_values = { | ||
isahers1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"dataset_id": example.dataset_id, | ||
"created_at": example.created_at, | ||
} | ||
if example.metadata is not None: | ||
remaining_values["metadata"] = example.metadata | ||
if example.split is not None: | ||
remaining_values["split"] = example.split | ||
valb = _dumps_json(remaining_values) | ||
|
||
( | ||
parts.append( | ||
( | ||
f"{example_id}", | ||
( | ||
None, | ||
valb, | ||
"application/json", | ||
{"Content-Length": str(len(valb))}, | ||
isahers1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
), | ||
) | ||
), | ||
) | ||
|
||
inputsb = _dumps_json(example.inputs) | ||
outputsb = _dumps_json(example.outputs) | ||
|
||
( | ||
parts.append( | ||
( | ||
f"{example_id}.inputs", | ||
( | ||
None, | ||
inputsb, | ||
"application/json", | ||
{"Content-Length": str(len(inputsb))}, | ||
isahers1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
), | ||
) | ||
), | ||
) | ||
|
||
( | ||
parts.append( | ||
( | ||
f"{example_id}.outputs", | ||
( | ||
None, | ||
outputsb, | ||
"application/json", | ||
{"Content-Length": str(len(outputsb))}, | ||
isahers1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
), | ||
) | ||
), | ||
) | ||
|
||
if example.attachments: | ||
for name, attachment in example.attachments.items(): | ||
if isinstance(attachment, tuple): | ||
mime_type, data = attachment | ||
( | ||
parts.append( | ||
( | ||
f"{example_id}.attachment.{name}", | ||
( | ||
None, | ||
data, | ||
f"{mime_type}; length={len(data)}", | ||
{}, | ||
isahers1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
), | ||
) | ||
), | ||
) | ||
else: | ||
( | ||
parts.append( | ||
( | ||
f"{example_id}.attachment.{name}", | ||
( | ||
None, | ||
attachment.data, | ||
f"{attachment.mime_type}; length={len(attachment.data)}", | ||
{}, | ||
), | ||
) | ||
), | ||
) | ||
|
||
encoder = rqtb_multipart.MultipartEncoder(parts, boundary=BOUNDARY) | ||
if encoder.len <= 20_000_000: # ~20 MB | ||
data = encoder.to_string() | ||
else: | ||
data = encoder | ||
|
||
response = self.request_with_retries( | ||
"POST", | ||
"/v1/platform/examples/multipart", # No clue what this is supposed to be | ||
isahers1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
request_kwargs={ | ||
"data": data, | ||
"headers": { | ||
**self._headers, | ||
"Content-Type": encoder.content_type, | ||
}, | ||
}, | ||
) | ||
ls_utils.raise_for_status_with_text(response) | ||
|
||
def create_examples( | ||
self, | ||
*, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,8 +20,9 @@ | |
from requests_toolbelt import MultipartEncoder, MultipartEncoderMonitor | ||
|
||
from langsmith.client import ID_TYPE, Client | ||
from langsmith.schemas import DataType | ||
from langsmith.schemas import DataType, ExampleCreateWithAttachments | ||
from langsmith.utils import ( | ||
LangSmithNotFoundError, | ||
LangSmithConnectionError, | ||
LangSmithError, | ||
get_env_var, | ||
|
@@ -368,6 +369,72 @@ def test_error_surfaced_invalid_uri(uri: str) -> None: | |
with pytest.raises(LangSmithConnectionError): | ||
client.create_run("My Run", inputs={"text": "hello world"}, run_type="llm") | ||
|
||
# NEED TO FIX ONCE CHANGES PUSH TO PROD | ||
@pytest.mark.parametrize("uri", ["https://dev.api.smith.langchain.com"]) | ||
def test_upsert_examples_multipart(uri: str) -> None: | ||
"""Test upserting examples with attachments via multipart endpoint.""" | ||
dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4] | ||
langchain_client = Client(api_url=uri, api_key="NEED TO HARDCODE FOR TESTING") | ||
if langchain_client.has_dataset(dataset_name=dataset_name): | ||
langchain_client.delete_dataset(dataset_name=dataset_name) | ||
|
||
dataset = langchain_client.create_dataset( | ||
dataset_name, | ||
description="Test dataset for multipart example upload", | ||
data_type=DataType.kv, | ||
) | ||
|
||
# Test example with all fields | ||
example_id = uuid4() | ||
example_1 = ExampleCreateWithAttachments( | ||
id=example_id, | ||
dataset_id=dataset.id, | ||
inputs={"text": "hello world"}, | ||
outputs={"response": "greeting"}, | ||
attachments={ | ||
"test_file": ("text/plain", b"test content"), | ||
}, | ||
) | ||
# Test example without id | ||
example_2 = ExampleCreateWithAttachments( | ||
dataset_id=dataset.id, | ||
inputs={"text": "foo bar"}, | ||
outputs={"response": "baz"}, | ||
attachments={ | ||
"my_file": ("text/plain", b"more test content"), | ||
}, | ||
) | ||
|
||
langchain_client.upsert_examples_multipart(upserts=[example_1, example_2]) | ||
|
||
created_example = langchain_client.read_example(example_id) | ||
assert created_example.inputs["text"] == "hello world" | ||
assert created_example.outputs["response"] == "greeting" | ||
|
||
all_examples_in_dataset = [example for example in langchain_client.list_examples(dataset_id=dataset.id)] | ||
assert len(all_examples_in_dataset) == 2 | ||
|
||
# Test that adding invalid example fails - even if valid examples are added alongside | ||
example_3 = ExampleCreateWithAttachments( | ||
dataset_id=uuid4(), # not a real dataset | ||
inputs={"text": "foo bar"}, | ||
outputs={"response": "baz"}, | ||
attachments={ | ||
"my_file": ("text/plain", b"more test content"), | ||
}, | ||
) | ||
|
||
with pytest.raises(LangSmithNotFoundError): | ||
langchain_client.upsert_examples_multipart(upserts=[example_3]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why should this raise a not found? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. because it can't find the dataset I believe |
||
|
||
all_examples_in_dataset = [example for example in langchain_client.list_examples(dataset_id=dataset.id)] | ||
assert len(all_examples_in_dataset) == 2 | ||
|
||
# Throw type errors when not passing ExampleCreateWithAttachments | ||
with pytest.raises(TypeError): | ||
langchain_client.upsert_examples_multipart(upserts=[{"foo":"bar"}]) | ||
|
||
langchain_client.delete_dataset(dataset_name=dataset_name) | ||
|
||
def test_create_dataset(langchain_client: Client) -> None: | ||
dataset_name = "__test_create_dataset" + uuid4().hex[:4] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should not initialize this to empty list due to the way python handles mutable default arguments, see here https://nikos7am.com/posts/mutable-default-arguments/
to fix, just remove the default