-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Metrics Support in tritonfrontend
#7703
base: main
Are you sure you want to change the base?
Changes from 44 commits
59d9aa8
0b66a27
173216e
c9fa783
384da14
40da2be
f08de59
f3fc425
a657cae
be36c42
80ee6e5
723df84
2e2108c
d7971b7
23b4beb
6ebcdc9
1289f34
ac8e23d
48acc3e
c6efa9e
2932600
73e1782
1f09417
5e0df4e
43bd2ab
88a710d
e0abc3b
85d7676
8f0b4e1
7607884
a88be2d
c7503b3
569c68d
bd5c0b5
378fd2d
82897e1
246e380
80e88b5
acc9e9e
4402e3d
2712558
8e7bba1
ba14f56
f17dc17
717ee47
d53db9b
d2939cb
46121ff
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,4 +41,10 @@ output [ | |
data_type: TYPE_STRING | ||
dims: [ 1 ] | ||
} | ||
] | ||
] | ||
instance_group [ | ||
{ | ||
count: 1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you ever investigate the gpu label thing? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Investigated a bit, but did not find the root cause. Will create a ticket in my backlog with hopefully a more consistent reproducer. |
||
kind : KIND_CPU | ||
} | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,21 +25,20 @@ | |
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
|
||
import os | ||
import queue | ||
import re | ||
from functools import partial | ||
from typing import Union | ||
from typing import Tuple, Union | ||
|
||
import numpy as np | ||
import requests | ||
import tritonserver | ||
from tritonclient.utils import InferenceServerException | ||
from tritonfrontend import KServeGrpc, KServeHttp | ||
|
||
# TODO: Re-Format documentation to fit: | ||
# https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings | ||
from tritonfrontend import KServeGrpc, KServeHttp, Metrics | ||
|
||
|
||
def setup_server(model_repository="test_model_repository") -> tritonserver.Server: | ||
""" | ||
Using tritonserver, starts a server with the models: identity and delayed_identity | ||
""" | ||
module_directory = os.path.split(os.path.abspath(__file__))[0] | ||
model_path = os.path.abspath(os.path.join(module_directory, model_repository)) | ||
|
||
|
@@ -61,9 +60,12 @@ def teardown_server(server: tritonserver.Server) -> None: | |
|
||
def setup_service( | ||
server: tritonserver.Server, | ||
frontend: Union[KServeHttp, KServeGrpc], | ||
frontend: Union[KServeHttp, KServeGrpc, Metrics], | ||
options=None, | ||
) -> Union[KServeHttp, KServeGrpc]: | ||
) -> Union[KServeHttp, KServeGrpc, Metrics]: | ||
""" | ||
Used to create and start any of the frontends supported by tritonfrontend. | ||
""" | ||
service = frontend(server=server, options=options) | ||
service.start() | ||
return service | ||
|
@@ -73,16 +75,31 @@ def teardown_service(service: Union[KServeHttp, KServeGrpc]) -> None: | |
service.stop() | ||
|
||
|
||
def setup_client(frontend_client, url: str): | ||
def setup_client( | ||
frontend_client: Union["tritonclient.http", "tritonclient.grpc"], url: str | ||
): | ||
""" | ||
Sets up a client to communicate with the Server through the respective protocol. | ||
""" | ||
return frontend_client.InferenceServerClient(url=url) | ||
|
||
|
||
def teardown_client(client) -> None: | ||
def teardown_client( | ||
client: Union[ | ||
"tritonclient.http.InferenceServerClient", | ||
"tritonclient.grpc.InferenceServerClient", | ||
Comment on lines
+89
to
+90
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this type hint is correct, and the other places you use |
||
] | ||
) -> None: | ||
client.close() | ||
|
||
|
||
# Sends an inference to test_model_repository/identity model and verifies input == output. | ||
def send_and_test_inference_identity(frontend_client, url: str) -> bool: | ||
def send_and_test_inference_identity( | ||
frontend_client: Union["tritonclient.http", "tritonclient.grpc"], url: str | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See other comment on type hints, apply throughout |
||
) -> bool: | ||
""" | ||
Sends an inference request to the model at test_model_repository/identity | ||
and verifies input == output | ||
""" | ||
model_name = "identity" | ||
client = setup_client(frontend_client, url) | ||
input_data = np.array(["testing"], dtype=object) | ||
|
@@ -102,9 +119,13 @@ def send_and_test_inference_identity(frontend_client, url: str) -> bool: | |
return input_data[0] == output_data[0].decode() | ||
|
||
|
||
# Sends multiple streaming requests to "delayed_identity" model with negligible delays, | ||
# and verifies the inputs matches outputs and the ordering is preserved. | ||
def send_and_test_stream_inference(frontend_client, url: str) -> bool: | ||
def send_and_test_stream_inference( | ||
frontend_client: Union["tritonclient.http", "tritonclient.grpc"], url: str | ||
) -> bool: | ||
""" | ||
Sends multiple streaming requests to "delayed_identity" model with negligible delays | ||
and verifies the inputs matches outputs and the ordering is preserved. | ||
""" | ||
num_requests = 100 | ||
requests = [] | ||
for i in range(num_requests): | ||
|
@@ -135,14 +156,18 @@ def callback(responses, result, error): | |
|
||
|
||
def send_and_test_generate_inference() -> bool: | ||
""" | ||
Sends an inference request to and identity model through the | ||
HTTP generate endpoint and verifies input == output | ||
""" | ||
model_name = "identity" | ||
url = f"http://localhost:8000/v2/models/{model_name}/generate" | ||
input_text = "testing" | ||
data = { | ||
"INPUT0": input_text, | ||
} | ||
|
||
response = requests.post(url, json=data, stream=True) | ||
response = requests.post(url, json=data) | ||
if response.status_code == 200: | ||
result = response.json() | ||
output_text = result.get("OUTPUT0", "") | ||
|
@@ -151,3 +176,32 @@ def send_and_test_generate_inference() -> bool: | |
return True | ||
|
||
return False | ||
|
||
|
||
def get_metrics(metrics_url: str, model_name: str = "identity") -> Tuple[int, int]: | ||
""" | ||
Sends a request to the metrics endpoint and returns the following information: | ||
1. Status Code = Indicates whether interaction with Metrics endpoint was successful | ||
2. Inference Count = Indicates whether metrics data being returned is accurate | ||
""" | ||
response = requests.get(metrics_url) | ||
inference_count = None | ||
|
||
if response.status_code == 200: | ||
inference_count = _extract_inference_count(response.text, model_name) | ||
return response.status_code, inference_count | ||
|
||
|
||
def _extract_inference_count(metrics_data: str, model_name: str): | ||
""" | ||
Helper function for _get_metrics that parses metrics_data (prometheus-friendly | ||
format) with regex to extract the inference count of model_name. | ||
""" | ||
pattern = ( | ||
rf'nv_inference_count\{{.*?model="{re.escape(model_name)}".*?\}}\s+([0-9.]+)' | ||
) | ||
match = re.search(pattern, metrics_data) | ||
if match: | ||
return int(float(match.group(1))) | ||
|
||
return None |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,3 +37,10 @@ | |
# TRITON_ENABLE_GRPC=OFF | ||
# TritonFrontendGrpc Package was not present | ||
pass | ||
|
||
try: | ||
from ._metrics import Metrics | ||
except ImportError: | ||
# TRITON_ENABLE_Metrics=OFF | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make sure |
||
# TritonFrontendMetrics Package was not present | ||
pass |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't love that the
Metrics
object is a web server, so it makes me wonder if we should rename these down the line, ex:KServeHttpService
,MetricsService
, etcBut I don't have a strong opinion on an alternative right now so I think it's fine, just mentioning for later. We will probably be restructing some packaging and naming in the near-mid future.