From 2867f07e66cf2067fca5d6c4c59b88f14fb22a62 Mon Sep 17 00:00:00 2001
From: Tanmay Verma <tanmay2592@gmail.com>
Date: Fri, 5 Jan 2024 14:32:01 -0800
Subject: [PATCH] Fix docstring for sphinx autodoc (#443)

* Fix docstring for sphinx autodoc

* Format fix
---
 src/python/library/tritonclient/_plugin.py    |  4 +-
 src/python/library/tritonclient/_request.py   |  2 +-
 .../library/tritonclient/grpc/__init__.py     | 12 ++++
 .../library/tritonclient/grpc/_client.py      | 33 ++++++----
 .../tritonclient/grpc/_infer_result.py        |  2 +-
 .../tritonclient/grpc/_infer_stream.py        |  6 +-
 .../tritonclient/grpc/_requested_output.py    |  4 +-
 .../library/tritonclient/grpc/_utils.py       |  6 +-
 .../library/tritonclient/grpc/aio/__init__.py | 61 ++++++++++---------
 .../tritonclient/grpc/auth/__init__.py        |  2 +
 .../library/tritonclient/http/__init__.py     | 12 ++++
 .../library/tritonclient/http/_client.py      | 35 +++++++----
 .../tritonclient/http/_infer_result.py        |  4 +-
 .../tritonclient/http/_requested_output.py    |  4 +-
 .../library/tritonclient/http/_utils.py       |  4 +-
 .../library/tritonclient/http/aio/__init__.py | 55 +++++++++--------
 .../tritonclient/http/auth/__init__.py        |  2 +
 .../library/tritonclient/utils/__init__.py    |  2 +-
 18 files changed, 151 insertions(+), 99 deletions(-)

diff --git a/src/python/library/tritonclient/_plugin.py b/src/python/library/tritonclient/_plugin.py
index 9353b6730..037964874 100755
--- a/src/python/library/tritonclient/_plugin.py
+++ b/src/python/library/tritonclient/_plugin.py
@@ -30,7 +30,8 @@
 
 class InferenceServerClientPlugin(ABC):
     """Every Triton Client Plugin should extend this class.
-    Each plugin needs to implement the `__call__` method.
+    Each plugin needs to implement the :py:meth:`__call__` method.
+
     """
 
     @abstractmethod
@@ -42,5 +43,6 @@ def __call__(self, request):
         ----------
         request : Request
             The request object.
+
         """
         pass
diff --git a/src/python/library/tritonclient/_request.py b/src/python/library/tritonclient/_request.py
index 09d574497..6aea22832 100755
--- a/src/python/library/tritonclient/_request.py
+++ b/src/python/library/tritonclient/_request.py
@@ -29,7 +29,7 @@
 class Request:
     """A request object.
 
-    Attributes
+    Parameters
     ----------
     headers : dict
         A dictionary containing the request headers.
diff --git a/src/python/library/tritonclient/grpc/__init__.py b/src/python/library/tritonclient/grpc/__init__.py
index 852d5f0d6..5374f629e 100755
--- a/src/python/library/tritonclient/grpc/__init__.py
+++ b/src/python/library/tritonclient/grpc/__init__.py
@@ -32,6 +32,7 @@
     from tritonclient.utils import *
 
     from .._plugin import InferenceServerClientPlugin
+    from .._request import Request
     from ._client import MAX_GRPC_MESSAGE_SIZE, InferenceServerClient, KeepAliveOptions
     from ._infer_input import InferInput
     from ._infer_result import InferResult
@@ -59,3 +60,14 @@
         "use versions <1.43.0 or >=1.51.1 to avoid leaks "
         "(see https://github.com/grpc/grpc/issues/28513)."
     )
+
+__all__ = [
+    "InferenceServerClientPlugin",
+    "Request",
+    "InferenceServerClient",
+    "InferInput",
+    "InferRequestedOutput",
+    "InferResult",
+    "KeepAliveOptions",
+    "InferenceServerException",
+]
diff --git a/src/python/library/tritonclient/grpc/_client.py b/src/python/library/tritonclient/grpc/_client.py
index 90904acf1..1c68115c4 100755
--- a/src/python/library/tritonclient/grpc/_client.py
+++ b/src/python/library/tritonclient/grpc/_client.py
@@ -982,6 +982,7 @@ def update_log_settings(
     ):
         """Update the global log settings.
         Returns the log settings after the update.
+
         Parameters
         ----------
         settings: dict
@@ -1005,11 +1006,13 @@ def update_log_settings(
             InferenceServerExeption with message "Deadline Exceeded" when the
             specified time elapses. The default value is None which means
             client will wait for the response from the server.
+
         Returns
         -------
         dict or protobuf message
             The JSON dict or LogSettingsResponse message holding
             the updated log settings.
+
         Raises
         ------
         InferenceServerException
@@ -1047,6 +1050,7 @@ def update_log_settings(
 
     def get_log_settings(self, headers=None, as_json=False, client_timeout=None):
         """Get the global log settings.
+
         Parameters
         ----------
         headers: dict
@@ -1067,15 +1071,18 @@ def get_log_settings(self, headers=None, as_json=False, client_timeout=None):
             InferenceServerExeption with message "Deadline Exceeded" when the
             specified time elapses. The default value is None which means
             client will wait for the response from the server.
+
         Returns
         -------
         dict or protobuf message
             The JSON dict or LogSettingsResponse message holding
             the log settings.
+
         Raises
         ------
         InferenceServerException
             If unable to get the log settings or has timed out.
+
         """
         metadata = self._get_metadata(headers)
         try:
@@ -1460,14 +1467,14 @@ def infer(
         model_name: str
             The name of the model to run inference.
         inputs : list
-            A list of InferInput objects, each describing data for a input
+            A list of :py:class:`InferInput` objects, each describing data for a input
             tensor required by the model.
         model_version : str
             The version of the model to run inference. The default value
             is an empty string which means then the server will choose
             a version based on the model and internal policy.
         outputs : list
-            A list of InferRequestedOutput objects, each describing how the output
+            A list of :py:class:`InferRequestedOutput` objects, each describing how the output
             data must be returned. If not specified all outputs produced
             by the model will be returned using default settings.
         request_id : str
@@ -1590,12 +1597,12 @@ def async_infer(
         model_name: str
             The name of the model to run inference.
         inputs : list
-            A list of InferInput objects, each describing data for a input
+            A list of :py:class:`InferInput` objects, each describing data for a input
             tensor required by the model.
         callback : function
             Python function that is invoked once the request is completed.
             The function must reserve the last two arguments (result, error)
-            to hold InferResult and InferenceServerException
+            to hold :py:class:`InferResult` and :py:class:`InferenceServerException`
             objects respectively which will be provided to the function when
             executing the callback. The ownership of these objects will be given
             to the user. The 'error' would be None for a successful inference.
@@ -1604,7 +1611,7 @@ def async_infer(
             is an empty string which means then the server will choose
             a version based on the model and internal policy.
         outputs : list
-            A list of InferRequestedOutput objects, each describing how the output
+            A list of :py:class:`InferRequestedOutput` objects, each describing how the output
             data must be returned. If not specified all outputs produced
             by the model will be returned using default settings.
         request_id : str
@@ -1668,13 +1675,13 @@ def async_infer(
             Computations represented by a Future may be yet to be begun,
             ongoing, or have already completed.
 
+            Note
+            ----
             This object can be used to cancel the inference request like
             below:
-            ----------
-            future = async_infer(...)
-            ret = future.cancel()
-            ----------
 
+            >>> future = async_infer(...)
+            >>> ret = future.cancel()
 
         Raises
         ------
@@ -1745,8 +1752,8 @@ def start_stream(
         callback : function
             Python function that is invoked upon receiving response from
             the underlying stream. The function must reserve the last two
-            arguments (result, error) to hold InferResult and
-            InferenceServerException objects respectively
+            arguments (result, error) to hold :py:class:`InferResult` and
+            :py:class:`InferenceServerException` objects respectively
             which will be provided to the function when executing the callback.
             The ownership of these objects will be given to the user. The 'error'
             would be None for a successful inference.
@@ -1830,14 +1837,14 @@ def async_stream_infer(
         model_name: str
             The name of the model to run inference.
         inputs : list
-            A list of InferInput objects, each describing data for a input
+            A list of :py:class:`InferInput` objects, each describing data for a input
             tensor required by the model.
         model_version: str
             The version of the model to run inference. The default value
             is an empty string which means then the server will choose
             a version based on the model and internal policy.
         outputs : list
-            A list of InferRequestedOutput objects, each describing how the output
+            A list of :py:class:`InferRequestedOutput` objects, each describing how the output
             data must be returned. If not specified all outputs produced
             by the model will be returned using default settings.
         request_id : str
diff --git a/src/python/library/tritonclient/grpc/_infer_result.py b/src/python/library/tritonclient/grpc/_infer_result.py
index 2369d3bcf..ff3275b02 100755
--- a/src/python/library/tritonclient/grpc/_infer_result.py
+++ b/src/python/library/tritonclient/grpc/_infer_result.py
@@ -32,7 +32,7 @@
 
 
 class InferResult:
-    """An object of InferResult class holds the response of
+    """An object of :py:class:`InferResult` class holds the response of
     an inference request and provide methods to retrieve
     inference results.
 
diff --git a/src/python/library/tritonclient/grpc/_infer_stream.py b/src/python/library/tritonclient/grpc/_infer_stream.py
index 74e668d5e..92ec05890 100755
--- a/src/python/library/tritonclient/grpc/_infer_stream.py
+++ b/src/python/library/tritonclient/grpc/_infer_stream.py
@@ -45,11 +45,13 @@ class _InferStream:
     callback : function
         Python function that is invoked upon receiving response from
         the underlying stream. The function must reserve the last two
-        arguments (result, error) to hold InferResult and
-        InferenceServerException objects respectively which will be
+        arguments (result, error) to hold :py:class:`InferResult` and
+        :py:class:`InferenceServerException` objects respectively which will be
         provided to the function when executing the callback. The
         ownership of these objects will be given to the user. The
         'error' would be None for a successful inference.
+    verbose : bool
+        Enables verbose mode if set True.
     """
 
     def __init__(self, callback, verbose):
diff --git a/src/python/library/tritonclient/grpc/_requested_output.py b/src/python/library/tritonclient/grpc/_requested_output.py
index e2d446c58..442093c15 100755
--- a/src/python/library/tritonclient/grpc/_requested_output.py
+++ b/src/python/library/tritonclient/grpc/_requested_output.py
@@ -31,7 +31,7 @@
 
 
 class InferRequestedOutput:
-    """An object of InferRequestedOutput class is used to describe a
+    """An object of :py:class:`InferRequestedOutput` class is used to describe a
     requested output tensor for an inference request.
 
     Parameters
@@ -89,7 +89,7 @@ def set_shared_memory(self, region_name, byte_size, offset=0):
 
     def unset_shared_memory(self):
         """Clears the shared memory option set by the last call to
-        InferRequestedOutput.set_shared_memory(). After call to this
+        :py:meth:`InferRequestedOutput.set_shared_memory()`. After call to this
         function requested output will no longer be returned in a
         shared memory region.
         """
diff --git a/src/python/library/tritonclient/grpc/_utils.py b/src/python/library/tritonclient/grpc/_utils.py
index 1ea8450d2..8dd48f785 100755
--- a/src/python/library/tritonclient/grpc/_utils.py
+++ b/src/python/library/tritonclient/grpc/_utils.py
@@ -32,7 +32,7 @@
 
 
 def get_error_grpc(rpc_error):
-    """Convert a gRPC error to an InferenceServerException.
+    """Convert a gRPC error to an :py:class:`InferenceServerException`.
 
     Parameters
     ----------
@@ -51,7 +51,7 @@ def get_error_grpc(rpc_error):
 
 
 def get_cancelled_error(msg=None):
-    """Get InferenceServerException object for a cancelled RPC.
+    """Get :py:class:`InferenceServerException` object for a cancelled RPC.
 
     Returns
     -------
@@ -63,7 +63,7 @@ def get_cancelled_error(msg=None):
 
 
 def raise_error_grpc(rpc_error):
-    """Raise an InferenceServerException from a gRPC error.
+    """Raise an :py:class:`InferenceServerException` from a gRPC error.
 
     Parameters
     ----------
diff --git a/src/python/library/tritonclient/grpc/aio/__init__.py b/src/python/library/tritonclient/grpc/aio/__init__.py
index 4a2067530..3b4c949df 100755
--- a/src/python/library/tritonclient/grpc/aio/__init__.py
+++ b/src/python/library/tritonclient/grpc/aio/__init__.py
@@ -43,7 +43,7 @@
 class InferenceServerClient(InferenceServerClientBase):
     """This feature is currently in beta and may be subject to change.
 
-    An analogy of the tritonclient.grpc.InferenceServerClient to enable
+    An analogy of the :py:class:`tritonclient.grpc.InferenceServerClient` to enable
     calling via asyncio syntax. The object is intended to be used by a single
     thread and simultaneously calling methods with different threads is not
     supported and can cause undefined behavior.
@@ -142,7 +142,7 @@ def _get_metadata(self, headers):
         return request_metadata
 
     async def is_server_live(self, headers=None, client_timeout=None):
-        """Refer to tritonclient.grpc.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.is_server_live`"""
         metadata = self._get_metadata(headers)
         try:
             request = service_pb2.ServerLiveRequest()
@@ -158,7 +158,7 @@ async def is_server_live(self, headers=None, client_timeout=None):
             raise_error_grpc(rpc_error)
 
     async def is_server_ready(self, headers=None, client_timeout=None):
-        """Refer to tritonclient.grpc.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.is_server_ready`"""
         metadata = self._get_metadata(headers)
         try:
             request = service_pb2.ServerReadyRequest()
@@ -176,7 +176,7 @@ async def is_server_ready(self, headers=None, client_timeout=None):
     async def is_model_ready(
         self, model_name, model_version="", headers=None, client_timeout=None
     ):
-        """Refer to tritonclient.grpc.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.is_model_ready`"""
         metadata = self._get_metadata(headers)
         try:
             if type(model_version) != str:
@@ -198,7 +198,7 @@ async def is_model_ready(
     async def get_server_metadata(
         self, headers=None, as_json=False, client_timeout=None
     ):
-        """Refer to tritonclient.grpc.InferenceServerClient"""
+        """Refer to  :py:meth:`tritonclient.grpc.InferenceServerClient.get_server_metadata`"""
         metadata = self._get_metadata(headers)
         try:
             request = service_pb2.ServerMetadataRequest()
@@ -221,7 +221,7 @@ async def get_model_metadata(
         as_json=False,
         client_timeout=None,
     ):
-        """Refer to tritonclient.grpc.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.get_model_metadata`"""
         metadata = self._get_metadata(headers)
         try:
             if type(model_version) != str:
@@ -248,7 +248,7 @@ async def get_model_config(
         as_json=False,
         client_timeout=None,
     ):
-        """Refer to tritonclient.grpc.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.get_model_config`"""
         metadata = self._get_metadata(headers)
         try:
             if type(model_version) != str:
@@ -270,7 +270,7 @@ async def get_model_config(
     async def get_model_repository_index(
         self, headers=None, as_json=False, client_timeout=None
     ):
-        """Refer to tritonclient.grpc.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.get_model_repository_index`"""
         metadata = self._get_metadata(headers)
         try:
             request = service_pb2.RepositoryIndexRequest()
@@ -297,7 +297,7 @@ async def load_model(
         files=None,
         client_timeout=None,
     ):
-        """Refer to tritonclient.grpc.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.load_model`"""
         metadata = self._get_metadata(headers)
         try:
             request = service_pb2.RepositoryModelLoadRequest(model_name=model_name)
@@ -328,7 +328,7 @@ async def unload_model(
         unload_dependents=False,
         client_timeout=None,
     ):
-        """Refer to tritonclient.grpc.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.unload_model`"""
         metadata = self._get_metadata(headers)
         try:
             request = service_pb2.RepositoryModelUnloadRequest(model_name=model_name)
@@ -351,7 +351,7 @@ async def get_inference_statistics(
         as_json=False,
         client_timeout=None,
     ):
-        """Refer to tritonclient.grpc.InferenceServerClient"""
+        """Refer to ::py:meth:`tritonclient.grpc.InferenceServerClient.get_inference_statistics`"""
         metadata = self._get_metadata(headers)
         try:
             if type(model_version) != str:
@@ -382,7 +382,7 @@ async def update_trace_settings(
         as_json=False,
         client_timeout=None,
     ):
-        """Refer to tritonclient.grpc.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.update_trace_settings`"""
         metadata = self._get_metadata(headers)
         try:
             request = service_pb2.TraceSettingRequest()
@@ -412,7 +412,7 @@ async def update_trace_settings(
     async def get_trace_settings(
         self, model_name=None, headers=None, as_json=False, client_timeout=None
     ):
-        """Refer to tritonclient.grpc.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.get_trace_settings`"""
         metadata = self._get_metadata(headers)
         try:
             request = service_pb2.TraceSettingRequest()
@@ -432,7 +432,7 @@ async def get_trace_settings(
     async def update_log_settings(
         self, settings, headers=None, as_json=False, client_timeout=None
     ):
-        """Refer to tritonclient.grpc.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.update_log_settings`"""
         metadata = self._get_metadata(headers)
         try:
             request = service_pb2.LogSettingsRequest()
@@ -459,7 +459,7 @@ async def update_log_settings(
             raise_error_grpc(rpc_error)
 
     async def get_log_settings(self, headers=None, as_json=False, client_timeout=None):
-        """Refer to tritonclient.grpc.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.get_log_settings`"""
         metadata = self._get_metadata(headers)
         try:
             request = service_pb2.LogSettingsRequest()
@@ -477,7 +477,7 @@ async def get_log_settings(self, headers=None, as_json=False, client_timeout=Non
     async def get_system_shared_memory_status(
         self, region_name="", headers=None, as_json=False, client_timeout=None
     ):
-        """Refer to tritonclient.grpc.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.get_system_shared_memory_status`"""
         metadata = self._get_metadata(headers)
         try:
             request = service_pb2.SystemSharedMemoryStatusRequest(name=region_name)
@@ -499,7 +499,7 @@ async def get_system_shared_memory_status(
     async def register_system_shared_memory(
         self, name, key, byte_size, offset=0, headers=None, client_timeout=None
     ):
-        """Refer to tritonclient.grpc.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.register_system_shared_memory`"""
         metadata = self._get_metadata(headers)
         try:
             request = service_pb2.SystemSharedMemoryRegisterRequest(
@@ -522,7 +522,7 @@ async def register_system_shared_memory(
     async def unregister_system_shared_memory(
         self, name="", headers=None, client_timeout=None
     ):
-        """Refer to tritonclient.grpc.InferenceServerClient"""
+        """Refer to  :py:meth:`tritonclient.grpc.InferenceServerClient.unregister_system_shared_memory`"""
         metadata = self._get_metadata(headers)
         try:
             request = service_pb2.SystemSharedMemoryUnregisterRequest(name=name)
@@ -548,7 +548,7 @@ async def unregister_system_shared_memory(
     async def get_cuda_shared_memory_status(
         self, region_name="", headers=None, as_json=False, client_timeout=None
     ):
-        """Refer to tritonclient.grpc.InferenceServerClient"""
+        """Refer to  :py:meth:`tritonclient.grpc.InferenceServerClient.get_cuda_shared_memory_status`"""
 
         metadata = self._get_metadata(headers)
         try:
@@ -577,7 +577,7 @@ async def register_cuda_shared_memory(
         headers=None,
         client_timeout=None,
     ):
-        """Refer to tritonclient.grpc.InferenceServerClient"""
+        """Refer to  :py:meth:`tritonclient.grpc.InferenceServerClient.register_cuda_shared_memory`"""
         metadata = self._get_metadata(headers)
         try:
             request = service_pb2.CudaSharedMemoryRegisterRequest(
@@ -603,7 +603,7 @@ async def register_cuda_shared_memory(
     async def unregister_cuda_shared_memory(
         self, name="", headers=None, client_timeout=None
     ):
-        """Refer to tritonclient.grpc.InferenceServerClient"""
+        """Refer to  :py:meth:`tritonclient.grpc.InferenceServerClient.unregister_cuda_shared_memory`"""
         metadata = self._get_metadata(headers)
         try:
             request = service_pb2.CudaSharedMemoryUnregisterRequest(name=name)
@@ -641,7 +641,7 @@ async def infer(
         compression_algorithm=None,
         parameters=None,
     ):
-        """Refer to tritonclient.grpc.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.infer`"""
 
         metadata = self._get_metadata(headers)
 
@@ -692,8 +692,8 @@ def stream_infer(
         ----------
         inputs_iterator : asynchronous iterator
             Async iterator that yields a dict(s) consists of the input
-            parameters to the async_stream_infer function defined in
-            tritonclient.grpc.InferenceServerClient.
+            parameters to the :py:meth:`tritonclient.grpc.InferenceServerClient.async_stream_infer` function defined in
+            :py:class:`tritonclient.grpc.InferenceServerClient`.
         stream_timeout : float
             Optional stream timeout. The stream will be closed once the
             specified timeout expires.
@@ -708,17 +708,18 @@ def stream_infer(
         Returns
         -------
         asynchronous iterator
-            Yield tuple holding (InferResult, InferenceServerException) objects.
+            Yield tuple holding (:py:class:`tritonclient.grpc.InferResult`, :py:class:`tritonclient.grpc.InferenceServerException`) objects.
 
+            Note
+            ----
             This object can be used to cancel the inference request like below:
-            ----------
-            it = stream_infer(...)
-            ret = it.cancel()
-            ----------
+
+            >>> it = stream_infer(...)
+            >>> ret = it.cancel()
 
         Raises
         ------
-        InferenceServerException
+        :py:class:`tritonclient.grpc.InferenceServerException`
             If inputs_iterator does not yield the correct input.
 
         """
diff --git a/src/python/library/tritonclient/grpc/auth/__init__.py b/src/python/library/tritonclient/grpc/auth/__init__.py
index ba0e25719..af220636c 100755
--- a/src/python/library/tritonclient/grpc/auth/__init__.py
+++ b/src/python/library/tritonclient/grpc/auth/__init__.py
@@ -27,3 +27,5 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 from ..._auth import BasicAuth
+
+__all__ = ["BasicAuth"]
diff --git a/src/python/library/tritonclient/http/__init__.py b/src/python/library/tritonclient/http/__init__.py
index 84fafabf7..b221d0604 100755
--- a/src/python/library/tritonclient/http/__init__.py
+++ b/src/python/library/tritonclient/http/__init__.py
@@ -30,6 +30,7 @@
     from tritonclient.utils import *
 
     from .._plugin import InferenceServerClientPlugin
+    from .._request import Request
     from ._client import InferAsyncRequest, InferenceServerClient
     from ._infer_input import InferInput
     from ._infer_result import InferResult
@@ -39,3 +40,14 @@
     raise RuntimeError(
         "The installation does not include http support. Specify 'http' or 'all' while installing the tritonclient package to include the support"
     ) from error
+
+__all__ = [
+    "InferenceServerClientPlugin",
+    "Request",
+    "InferenceServerClient",
+    "InferInput",
+    "InferRequestedOutput",
+    "InferResult",
+    "InferAsyncRequest",
+    "InferenceServerException",
+]
diff --git a/src/python/library/tritonclient/http/_client.py b/src/python/library/tritonclient/http/_client.py
index a769d7f3b..5608b56b7 100755
--- a/src/python/library/tritonclient/http/_client.py
+++ b/src/python/library/tritonclient/http/_client.py
@@ -64,6 +64,7 @@ def __init__(self, greenlet, verbose=False):
 
     def get_result(self, block=True, timeout=None):
         """Get the results of the associated asynchronous inference.
+
         Parameters
         ----------
         block : bool
@@ -86,6 +87,7 @@ def get_result(self, block=True, timeout=None):
         InferenceServerException
             If server fails to perform inference or failed to respond
             within specified timeout.
+
         """
 
         try:
@@ -152,9 +154,9 @@ class InferenceServerClient(InferenceServerClientBase):
         specified False.
 
     Raises
-        ------
-        Exception
-            If unable to create a client.
+    ------
+    Exception
+        If unable to create a client.
 
     """
 
@@ -209,7 +211,7 @@ def close(self):
     def _get(self, request_uri, headers, query_params):
         """Issues the GET request to the server
 
-         Parameters
+        Parameters
         ----------
         request_uri: str
             The request URI to be used in GET request.
@@ -223,6 +225,7 @@ def _get(self, request_uri, headers, query_params):
         -------
         geventhttpclient.response.HTTPSocketPoolResponse
             The response from server.
+
         """
         request = Request(headers)
         self._call_plugin(request)
@@ -860,6 +863,7 @@ def get_trace_settings(self, model_name=None, headers=None, query_params=None):
 
     def update_log_settings(self, settings, headers=None, query_params=None):
         """Update the global log settings of the Triton server.
+
         Parameters
         ----------
         settings: dict
@@ -871,14 +875,17 @@ def update_log_settings(self, settings, headers=None, query_params=None):
         query_params: dict
             Optional url query parameters to use in network
             transaction
+
         Returns
         -------
         dict
             The JSON dict holding the updated log settings.
+
         Raises
         ------
         InferenceServerException
             If unable to update the log settings.
+
         """
         request_uri = "v2/logging"
         response = self._post(
@@ -897,6 +904,7 @@ def update_log_settings(self, settings, headers=None, query_params=None):
 
     def get_log_settings(self, headers=None, query_params=None):
         """Get the global log settings for the Triton server
+
         Parameters
         ----------
         headers: dict
@@ -905,14 +913,17 @@ def get_log_settings(self, headers=None, query_params=None):
         query_params: dict
             Optional url query parameters to use in network
             transaction
+
         Returns
         -------
         dict
             The JSON dict holding the log settings.
+
         Raises
         ------
         InferenceServerException
             If unable to get the log settings.
+
         """
 
         request_uri = "v2/logging"
@@ -1219,10 +1230,10 @@ def generate_request_body(
         Parameters
         ----------
         inputs : list
-            A list of InferInput objects, each describing data for a input
+            A list of :py:class:`InferInput` objects, each describing data for a input
             tensor required by the model.
         outputs : list
-            A list of InferRequestedOutput objects, each describing how the output
+            A list of :py:class:`InferRequestedOutput` objects, each describing how the output
             data must be returned. If not specified all outputs produced
             by the model will be returned using default settings.
         request_id: str
@@ -1290,7 +1301,7 @@ def generate_request_body(
     def parse_response_body(
         response_body, verbose=False, header_length=None, content_encoding=None
     ):
-        """Generate a InferResult object from the given 'response_body'
+        """Generate a :py:class:`InferResult` object from the given 'response_body'
 
         Parameters
         ----------
@@ -1340,14 +1351,14 @@ def infer(
         model_name: str
             The name of the model to run inference.
         inputs : list
-            A list of InferInput objects, each describing data for a input
+            A list of :py:class:`InferInput` objects, each describing data for a input
             tensor required by the model.
         model_version: str
             The version of the model to run inference. The default value
             is an empty string which means then the server will choose
             a version based on the model and internal policy.
         outputs : list
-            A list of InferRequestedOutput objects, each describing how the output
+            A list of :py:class:`InferRequestedOutput` objects, each describing how the output
             data must be returned. If not specified all outputs produced
             by the model will be returned using default settings.
         request_id: str
@@ -1501,14 +1512,14 @@ def async_infer(
         model_name: str
             The name of the model to run inference.
         inputs : list
-            A list of InferInput objects, each describing data for a input
+            A list of :py:class:`InferInput` objects, each describing data for a input
             tensor required by the model.
         model_version: str
             The version of the model to run inference. The default value
             is an empty string which means then the server will choose
             a version based on the model and internal policy.
         outputs : list
-            A list of InferRequestedOutput objects, each describing how the output
+            A list of :py:class:`InferRequestedOutput` objects, each describing how the output
             data must be returned. If not specified all outputs produced
             by the model will be returned using default settings.
         request_id: str
@@ -1564,7 +1575,7 @@ def async_infer(
 
         Returns
         -------
-        InferAsyncRequest object
+        InferAsyncRequest
             The handle to the asynchronous inference request.
 
         Raises
diff --git a/src/python/library/tritonclient/http/_infer_result.py b/src/python/library/tritonclient/http/_infer_result.py
index bf9f82476..756f0c4fe 100755
--- a/src/python/library/tritonclient/http/_infer_result.py
+++ b/src/python/library/tritonclient/http/_infer_result.py
@@ -39,7 +39,7 @@
 
 
 class InferResult:
-    """An object of InferResult class holds the response of
+    """An object of :py:class:`InferResult` class holds the response of
     an inference request and provide methods to retrieve
     inference results.
 
@@ -109,7 +109,7 @@ def read(self, length=-1):
     def from_response_body(
         cls, response_body, verbose=False, header_length=None, content_encoding=None
     ):
-        """A class method to construct InferResult object
+        """A class method to construct :py:class:`InferResult` object
         from a given 'response_body'.
 
         Parameters
diff --git a/src/python/library/tritonclient/http/_requested_output.py b/src/python/library/tritonclient/http/_requested_output.py
index 5432de491..cbc30f649 100755
--- a/src/python/library/tritonclient/http/_requested_output.py
+++ b/src/python/library/tritonclient/http/_requested_output.py
@@ -29,7 +29,7 @@
 
 
 class InferRequestedOutput:
-    """An object of InferRequestedOutput class is used to describe a
+    """An object of :py:class:`InferRequestedOutput` class is used to describe a
     requested output tensor for an inference request.
 
     Parameters
@@ -93,7 +93,7 @@ def set_shared_memory(self, region_name, byte_size, offset=0):
 
     def unset_shared_memory(self):
         """Clears the shared memory option set by the last call to
-        InferRequestedOutput.set_shared_memory(). After call to this
+        :py:meth:`InferRequestedOutput.set_shared_memory()`. After call to this
         function requested output will no longer be returned in a
         shared memory region.
         """
diff --git a/src/python/library/tritonclient/http/_utils.py b/src/python/library/tritonclient/http/_utils.py
index 2098a48bc..0770cc655 100755
--- a/src/python/library/tritonclient/http/_utils.py
+++ b/src/python/library/tritonclient/http/_utils.py
@@ -34,7 +34,7 @@
 
 def _get_error(response):
     """
-    Returns the InferenceServerException object if response
+    Returns the :py:class:`InferenceServerException` object if response
     indicates the error. If no error then return None
     """
     if response.status_code != 200:
@@ -61,7 +61,7 @@ def _get_error(response):
 
 def _raise_if_error(response):
     """
-    Raise InferenceServerException if received non-Success
+    Raise :py:class:`InferenceServerException` if received non-Success
     response from the server
     """
     error = _get_error(response)
diff --git a/src/python/library/tritonclient/http/aio/__init__.py b/src/python/library/tritonclient/http/aio/__init__.py
index 5101a310d..68cb99dce 100755
--- a/src/python/library/tritonclient/http/aio/__init__.py
+++ b/src/python/library/tritonclient/http/aio/__init__.py
@@ -50,7 +50,7 @@
 
 async def _get_error(response):
     """
-    Returns the InferenceServerException object if response
+    Returns the :py:class:`InferenceServerException` object if response
     indicates the error. If no error then return None
     """
     if response.status != 200:
@@ -78,7 +78,7 @@ async def _get_error(response):
 
 async def _raise_if_error(response):
     """
-    Raise InferenceServerException if received non-Success
+    Raise :py:class:`InferenceServerException` if received non-Success
     response from the server
     """
     error = await _get_error(response)
@@ -89,7 +89,7 @@ async def _raise_if_error(response):
 class InferenceServerClient(InferenceServerClientBase):
     """This feature is currently in beta and may be subject to change.
 
-    An analogy of the tritonclient.http.InferenceServerClient to enable
+    An analogy of the :py:class:`tritonclient.http.InferenceServerClient` to enable
     calling via asyncio syntax. The object is intended to be used by a single
     thread and simultaneously calling methods with different threads is not
     supported and can cause undefined behavior.
@@ -135,7 +135,7 @@ async def close(self):
     async def _get(self, request_uri, headers, query_params):
         """Issues the GET request to the server
 
-         Parameters
+        Parameters
         ----------
         request_uri: str
             The request URI to be used in GET request.
@@ -149,6 +149,7 @@ async def _get(self, request_uri, headers, query_params):
         -------
         aiohttp.ClientResponse
             The response from server.
+
         """
         request = Request(headers)
         self._call_plugin(request)
@@ -264,7 +265,7 @@ def _fix_header(self, headers):
         return fix_header
 
     async def is_server_live(self, headers=None, query_params=None):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.is_server_live`"""
         request_uri = "v2/health/live"
         response = await self._get(
             request_uri=request_uri, headers=headers, query_params=query_params
@@ -273,7 +274,7 @@ async def is_server_live(self, headers=None, query_params=None):
         return response.status == 200
 
     async def is_server_ready(self, headers=None, query_params=None):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.is_server_ready`"""
         request_uri = "v2/health/ready"
         response = await self._get(
             request_uri=request_uri, headers=headers, query_params=query_params
@@ -284,7 +285,7 @@ async def is_server_ready(self, headers=None, query_params=None):
     async def is_model_ready(
         self, model_name, model_version="", headers=None, query_params=None
     ):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.is_model_ready`"""
         if type(model_version) != str:
             raise_error("model version must be a string")
         if model_version != "":
@@ -301,7 +302,7 @@ async def is_model_ready(
         return response.status == 200
 
     async def get_server_metadata(self, headers=None, query_params=None):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.get_server_metadata`"""
         request_uri = "v2"
         response = await self._get(
             request_uri=request_uri, headers=headers, query_params=query_params
@@ -317,7 +318,7 @@ async def get_server_metadata(self, headers=None, query_params=None):
     async def get_model_metadata(
         self, model_name, model_version="", headers=None, query_params=None
     ):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.get_model_metadata`"""
         if type(model_version) != str:
             raise_error("model version must be a string")
         if model_version != "":
@@ -341,7 +342,7 @@ async def get_model_metadata(
     async def get_model_config(
         self, model_name, model_version="", headers=None, query_params=None
     ):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.get_model_config`"""
         if model_version != "":
             request_uri = "v2/models/{}/versions/{}/config".format(
                 quote(model_name), model_version
@@ -361,7 +362,7 @@ async def get_model_config(
         return json.loads(content)
 
     async def get_model_repository_index(self, headers=None, query_params=None):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.get_model_repository_index`"""
         request_uri = "v2/repository/index"
         response = await self._post(
             request_uri=request_uri,
@@ -380,7 +381,7 @@ async def get_model_repository_index(self, headers=None, query_params=None):
     async def load_model(
         self, model_name, headers=None, query_params=None, config=None, files=None
     ):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.load_model`"""
         request_uri = "v2/repository/models/{}/load".format(quote(model_name))
         load_request = {}
         if config is not None:
@@ -405,7 +406,7 @@ async def load_model(
     async def unload_model(
         self, model_name, headers=None, query_params=None, unload_dependents=False
     ):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.unload_model`"""
         request_uri = "v2/repository/models/{}/unload".format(quote(model_name))
         unload_request = {"parameters": {"unload_dependents": unload_dependents}}
         response = await self._post(
@@ -421,7 +422,7 @@ async def unload_model(
     async def get_inference_statistics(
         self, model_name="", model_version="", headers=None, query_params=None
     ):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.get_inference_statistics`"""
         if model_name != "":
             if type(model_version) != str:
                 raise_error("model version must be a string")
@@ -448,7 +449,7 @@ async def get_inference_statistics(
     async def update_trace_settings(
         self, model_name=None, settings={}, headers=None, query_params=None
     ):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.update_trace_settings`"""
         if (model_name is not None) and (model_name != ""):
             request_uri = "v2/models/{}/trace/setting".format(quote(model_name))
         else:
@@ -471,7 +472,7 @@ async def update_trace_settings(
     async def get_trace_settings(
         self, model_name=None, headers=None, query_params=None
     ):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.get_trace_settings`"""
         if (model_name is not None) and (model_name != ""):
             request_uri = "v2/models/{}/trace/setting".format(quote(model_name))
         else:
@@ -489,7 +490,7 @@ async def get_trace_settings(
         return json.loads(content)
 
     async def update_log_settings(self, settings, headers=None, query_params=None):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.update_log_settings`"""
         request_uri = "v2/logging"
 
         response = await self._post(
@@ -507,7 +508,7 @@ async def update_log_settings(self, settings, headers=None, query_params=None):
         return json.loads(content)
 
     async def get_log_settings(self, headers=None, query_params=None):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.get_log_settings`"""
         request_uri = "v2/logging"
 
         response = await self._get(
@@ -524,7 +525,7 @@ async def get_log_settings(self, headers=None, query_params=None):
     async def get_system_shared_memory_status(
         self, region_name="", headers=None, query_params=None
     ):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.get_system_shared_memory_status`"""
         if region_name != "":
             request_uri = "v2/systemsharedmemory/region/{}/status".format(
                 quote(region_name)
@@ -546,7 +547,7 @@ async def get_system_shared_memory_status(
     async def register_system_shared_memory(
         self, name, key, byte_size, offset=0, headers=None, query_params=None
     ):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.register_system_shared_memory`"""
         request_uri = "v2/systemsharedmemory/region/{}/register".format(quote(name))
 
         register_request = {"key": key, "offset": offset, "byte_size": byte_size}
@@ -565,7 +566,7 @@ async def register_system_shared_memory(
     async def unregister_system_shared_memory(
         self, name="", headers=None, query_params=None
     ):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.unregister_system_shared_memory`"""
         if name != "":
             request_uri = "v2/systemsharedmemory/region/{}/unregister".format(
                 quote(name)
@@ -589,7 +590,7 @@ async def unregister_system_shared_memory(
     async def get_cuda_shared_memory_status(
         self, region_name="", headers=None, query_params=None
     ):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.get_cuda_shared_memory_status`"""
         if region_name != "":
             request_uri = "v2/cudasharedmemory/region/{}/status".format(
                 quote(region_name)
@@ -611,7 +612,7 @@ async def get_cuda_shared_memory_status(
     async def register_cuda_shared_memory(
         self, name, raw_handle, device_id, byte_size, headers=None, query_params=None
     ):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.register_cuda_shared_memory`"""
         request_uri = "v2/cudasharedmemory/region/{}/register".format(quote(name))
 
         register_request = {
@@ -634,7 +635,7 @@ async def register_cuda_shared_memory(
     async def unregister_cuda_shared_memory(
         self, name="", headers=None, query_params=None
     ):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.unregister_cuda_shared_memory`"""
         if name != "":
             request_uri = "v2/cudasharedmemory/region/{}/unregister".format(quote(name))
         else:
@@ -665,7 +666,7 @@ def generate_request_body(
         timeout=None,
         parameters=None,
     ):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.generate_request_body`"""
         return _get_inference_request(
             inputs=inputs,
             request_id=request_id,
@@ -682,7 +683,7 @@ def generate_request_body(
     def parse_response_body(
         response_body, verbose=False, header_length=None, content_encoding=None
     ):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.parse_response_body`"""
         return InferResult.from_response_body(
             response_body, verbose, header_length, content_encoding
         )
@@ -705,7 +706,7 @@ async def infer(
         response_compression_algorithm=None,
         parameters=None,
     ):
-        """Refer to tritonclient.http.InferenceServerClient"""
+        """Refer to :py:meth:`tritonclient.http.InferenceServerClient.infer`"""
         request_body, json_size = _get_inference_request(
             inputs=inputs,
             request_id=request_id,
diff --git a/src/python/library/tritonclient/http/auth/__init__.py b/src/python/library/tritonclient/http/auth/__init__.py
index ba0e25719..af220636c 100755
--- a/src/python/library/tritonclient/http/auth/__init__.py
+++ b/src/python/library/tritonclient/http/auth/__init__.py
@@ -27,3 +27,5 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 from ..._auth import BasicAuth
+
+__all__ = ["BasicAuth"]
diff --git a/src/python/library/tritonclient/utils/__init__.py b/src/python/library/tritonclient/utils/__init__.py
index e761217ed..7f3079c66 100755
--- a/src/python/library/tritonclient/utils/__init__.py
+++ b/src/python/library/tritonclient/utils/__init__.py
@@ -193,7 +193,7 @@ def triton_to_np_dtype(dtype):
 def serialize_byte_tensor(input_tensor):
     """
     Serializes a bytes tensor into a flat numpy array of length prepended
-    bytes. The numpy array should use dtype of np.object_. For np.bytes_,
+    bytes. The numpy array should use dtype of np.object. For np.bytes,
     numpy will remove trailing zeros at the end of byte sequence and because
     of this it should be avoided.