-
Notifications
You must be signed in to change notification settings - Fork 80
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
python[patch]: accept simple evaluators #1200
base: main
Are you sure you want to change the base?
Changes from 5 commits
7e901ad
3877b36
9b26f6b
4a24f9f
b3b841f
8604117
03506c5
a4d2b03
448bbf3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,7 +17,7 @@ | |
cast, | ||
) | ||
|
||
from typing_extensions import TypedDict | ||
from typing_extensions import TypedDict, get_type_hints | ||
|
||
try: | ||
from pydantic.v1 import ( # type: ignore[import] | ||
|
@@ -194,6 +194,10 @@ def __init__( | |
func (Callable): A function that takes a `Run` and an optional `Example` as | ||
arguments, and returns a dict or `ComparisonEvaluationResult`. | ||
""" | ||
func = _normalize_evaluator_func(func) | ||
if afunc: | ||
afunc = _normalize_evaluator_func(afunc) # type: ignore[assignment] | ||
|
||
wraps(func)(self) | ||
from langsmith import run_helpers # type: ignore | ||
|
||
|
@@ -632,3 +636,70 @@ def comparison_evaluator( | |
) -> DynamicComparisonRunEvaluator: | ||
"""Create a comaprison evaluator from a function.""" | ||
return DynamicComparisonRunEvaluator(func) | ||
|
||
|
||
def _normalize_evaluator_func( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. might be nice to add like a couple unit tests on this to make it obvious it's working |
||
func: Callable, | ||
) -> Union[ | ||
Callable[[Run, Optional[Example]], _RUNNABLE_OUTPUT], | ||
Callable[[Run, Optional[Example]], Awaitable[_RUNNABLE_OUTPUT]], | ||
]: | ||
# for backwards compatibility, if args are untyped we assume they correspond to | ||
# Run and Example: | ||
if not (type_hints := get_type_hints(func)): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we want to add debug logs letting you know what function type is being used? Might be helpful since we tell people to enable debug logs for debugging issues in the SDK? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shouldn't we check the number of args here? traditional evaluators have run and example whereas the simple evaluators take 3 args |
||
return func | ||
elif {Run, Example, Optional[Example]}.intersection(type_hints.values()): | ||
return func | ||
else: | ||
sig = inspect.signature(func) | ||
num_positional = len( | ||
[ | ||
p | ||
for p in sig.parameters.values() | ||
if p.kind in (p.POSITIONAL_OR_KEYWORD, p.POSITIONAL_ONLY) | ||
] | ||
) | ||
has_positional_var = any( | ||
p.kind == p.VAR_POSITIONAL for p in sig.parameters.values() | ||
) | ||
if not ( | ||
num_positional in (2, 3) or (num_positional <= 3 and has_positional_var) | ||
): | ||
msg = ( | ||
"Invalid evaluator function. Expected to take either 2 or 3 positional " | ||
"arguments. Please see " | ||
"https://docs.smith.langchain.com/evaluation/how_to_guides/evaluation/evaluate_llm_application#use-custom-evaluators" # noqa: E501 | ||
) | ||
raise ValueError(msg) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. seems like this check on arg length should be moved up |
||
|
||
if inspect.iscoroutinefunction(func): | ||
|
||
async def awrapper(run: Run, example: Example) -> _RUNNABLE_OUTPUT: | ||
args = (example.inputs, run.outputs or {}, example.outputs or {}) | ||
if has_positional_var: | ||
return await func(*args) | ||
else: | ||
return await func(*args[:num_positional]) | ||
|
||
awrapper.__name__ = ( | ||
getattr(func, "__name__") | ||
if hasattr(func, "__name__") | ||
else awrapper.__name__ | ||
) | ||
return awrapper # type: ignore[return-value] | ||
|
||
else: | ||
|
||
def wrapper(run: Run, example: Example) -> _RUNNABLE_OUTPUT: | ||
args = (example.inputs, run.outputs or {}, example.outputs or {}) | ||
if has_positional_var: | ||
return func(*args) | ||
else: | ||
return func(*args[:num_positional]) | ||
|
||
wrapper.__name__ = ( | ||
getattr(func, "__name__") | ||
if hasattr(func, "__name__") | ||
else wrapper.__name__ | ||
) | ||
return wrapper # type: ignore[return-value] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we update the docstring for evaluate() and
aevaluate()
to have examples or link to a docs page that shows the valid arguments?