[PyOV] Memory flow control with share_inputs and share_outputs (openv…

…inotoolkit#18275) * Added ReturnPolicy and updated common array helpers * Clean up * Remove ReturnPolicy initial * Add share_inputs and share_outputs * Tests and minor fixes * Fix docstrings * Fix whitespace * Fix typing
jiwaszki · Aug 16, 2023 · 4173c0d · 4173c0d
1 parent 4e790d7
commit 4173c0d
Show file tree

Hide file tree

Showing 6 changed files with 331 additions and 174 deletions.
diff --git a/src/bindings/python/src/openvino/runtime/ie_api.py b/src/bindings/python/src/openvino/runtime/ie_api.py
@@ -4,6 +4,7 @@
 
 from typing import Any, Iterable, Union, Dict, Optional
 from pathlib import Path
+import warnings
 
 import numpy as np
 
@@ -22,10 +23,30 @@
 )
 
 
+def _deprecated_memory_arg(shared_memory: bool, share_inputs: bool) -> bool:
+    if shared_memory is not None:
+        warnings.warn(
+            "`shared_memory` is deprecated and will be removed in 2024.0. "
+            "Value of `shared_memory` is going to override `share_inputs` value. "
+            "Please use only `share_inputs` explicitly.",
+            FutureWarning,
+            stacklevel=3,
+        )
+        return shared_memory
+    return share_inputs
+
+
 class InferRequest(_InferRequestWrapper):
     """InferRequest class represents infer request which can be run in asynchronous or synchronous manners."""
 
-    def infer(self, inputs: Any = None, shared_memory: bool = False) -> OVDict:
+    def infer(
+        self,
+        inputs: Any = None,
+        share_inputs: bool = False,
+        share_outputs: bool = False,
+        *,
+        shared_memory: Any = None,
+    ) -> OVDict:
         """Infers specified input(s) in synchronous mode.
 
         Blocks all methods of InferRequest while request is running.
@@ -48,7 +69,7 @@ def infer(self, inputs: Any = None, shared_memory: bool = False) -> OVDict:
 
         :param inputs: Data to be set on input tensors.
         :type inputs: Any, optional
-        :param shared_memory: Enables `shared_memory` mode.
+        :param share_inputs: Enables `share_inputs` mode. Controls memory usage on inference's inputs.
 
                               If set to `False` inputs the data dispatcher will safely copy data
                               to existing Tensors (including up- or down-casting according to data type,
@@ -63,25 +84,49 @@ def infer(self, inputs: Any = None, shared_memory: bool = False) -> OVDict:
                               * inputs that should be in `BF16` data type
                               * scalar inputs (i.e. `np.float_`/`int`/`float`)
                               Keeps Tensor inputs "as-is".
+
                               Note: Use with extra care, shared data can be modified during runtime!
-                              Note: Using `shared_memory` may result in the extra memory overhead.
+                              Note: Using `share_inputs` may result in extra memory overhead.
 
                               Default value: False
+        :type share_inputs: bool, optional
+        :param share_outputs: Enables `share_outputs` mode. Controls memory usage on inference's outputs.
+
+                              If set to `False` outputs will safely copy data to numpy arrays.
+
+                              If set to `True` the data will be returned in form of views of output Tensors.
+                              This mode still returns the data in format of numpy arrays but lifetime of the data
+                              is connected to OpenVINO objects.
+
+                              Note: Use with extra care, shared data can be modified or lost during runtime!
+
+                              Default value: False
+        :type share_outputs: bool, optional
+        :param shared_memory: Deprecated. Works like `share_inputs` mode.
+
+                              If not specified, function uses `share_inputs` value.
+
+                              Note: Will be removed in 2024.0 release!
+                              Note: This is keyword-only argument.
+
+                              Default value: None
         :type shared_memory: bool, optional
         :return: Dictionary of results from output tensors with port/int/str keys.
         :rtype: OVDict
         """
         return OVDict(super().infer(_data_dispatch(
             self,
             inputs,
-            is_shared=shared_memory,
-        )))
+            is_shared=_deprecated_memory_arg(shared_memory, share_inputs),
+        ), share_outputs=share_outputs))
 
     def start_async(
         self,
         inputs: Any = None,
         userdata: Any = None,
-        shared_memory: bool = False,
+        share_inputs: bool = False,
+        *,
+        shared_memory: Any = None,
     ) -> None:
         """Starts inference of specified input(s) in asynchronous mode.
 
@@ -108,7 +153,7 @@ def start_async(
         :type inputs: Any, optional
         :param userdata: Any data that will be passed inside the callback.
         :type userdata: Any
-        :param shared_memory: Enables `shared_memory` mode.
+        :param share_inputs: Enables `share_inputs` mode. Controls memory usage on inference's inputs.
 
                               If set to `False` inputs the data dispatcher will safely copy data
                               to existing Tensors (including up- or down-casting according to data type,
@@ -123,17 +168,27 @@ def start_async(
                               * inputs that should be in `BF16` data type
                               * scalar inputs (i.e. `np.float_`/`int`/`float`)
                               Keeps Tensor inputs "as-is".
+
                               Note: Use with extra care, shared data can be modified during runtime!
-                              Note: Using `shared_memory` may result in extra memory overhead.
+                              Note: Using `share_inputs` may result in extra memory overhead.
 
                               Default value: False
+        :type share_inputs: bool, optional
+        :param shared_memory: Deprecated. Works like `share_inputs` mode.
+
+                              If not specified, function uses `share_inputs` value.
+
+                              Note: Will be removed in 2024.0 release!
+                              Note: This is keyword-only argument.
+
+                              Default value: None
         :type shared_memory: bool, optional
         """
         super().start_async(
             _data_dispatch(
                 self,
                 inputs,
-                is_shared=shared_memory,
+                is_shared=_deprecated_memory_arg(shared_memory, share_inputs),
             ),
             userdata,
         )
@@ -203,9 +258,14 @@ def infer_new_request(self, inputs: Union[dict, list, tuple, Tensor, np.ndarray]
         # overloaded functions of InferRequest class
         return self.create_infer_request().infer(inputs)
 
-    def __call__(self,
-                 inputs: Union[dict, list, tuple, Tensor, np.ndarray] = None,
-                 shared_memory: bool = True) -> OVDict:
+    def __call__(
+        self,
+        inputs: Union[dict, list, tuple, Tensor, np.ndarray] = None,
+        share_inputs: bool = True,
+        share_outputs: bool = False,
+        *,
+        shared_memory: Any = None,
+    ) -> OVDict:
         """Callable infer wrapper for CompiledModel.
 
         Infers specified input(s) in synchronous mode.
@@ -236,7 +296,7 @@ def __call__(self,
 
         :param inputs: Data to be set on input tensors.
         :type inputs: Union[Dict[keys, values], List[values], Tuple[values], Tensor, numpy.ndarray], optional
-        :param shared_memory: Enables `shared_memory` mode.
+        :param share_inputs: Enables `share_inputs` mode. Controls memory usage on inference's inputs.
 
                               If set to `False` inputs the data dispatcher will safely copy data
                               to existing Tensors (including up- or down-casting according to data type,
@@ -251,12 +311,33 @@ def __call__(self,
                               * inputs that should be in `BF16` data type
                               * scalar inputs (i.e. `np.float_`/`int`/`float`)
                               Keeps Tensor inputs "as-is".
+
                               Note: Use with extra care, shared data can be modified during runtime!
-                              Note: Using `shared_memory` may result in extra memory overhead.
+                              Note: Using `share_inputs` may result in extra memory overhead.
 
                               Default value: True
-        :type shared_memory: bool, optional
+        :type share_inputs: bool, optional
+        :param share_outputs: Enables `share_outputs` mode. Controls memory usage on inference's outputs.
+
+                              If set to `False` outputs will safely copy data to numpy arrays.
+
+                              If set to `True` the data will be returned in form of views of output Tensors.
+                              This mode still returns the data in format of numpy arrays but lifetime of the data
+                              is connected to OpenVINO objects.
+
+                              Note: Use with extra care, shared data can be modified or lost during runtime!
+
+                              Default value: False
+        :type share_outputs: bool, optional
+        :param shared_memory: Deprecated. Works like `share_inputs` mode.
 
+                              If not specified, function uses `share_inputs` value.
+
+                              Note: Will be removed in 2024.0 release!
+                              Note: This is keyword-only argument.
+
+                              Default value: None
+        :type shared_memory: bool, optional
         :return: Dictionary of results from output tensors with port/int/str as keys.
         :rtype: OVDict
         """
@@ -265,7 +346,8 @@ def __call__(self,
 
         return self._infer_request.infer(
             inputs,
-            shared_memory=shared_memory,
+            share_inputs=_deprecated_memory_arg(shared_memory, share_inputs),
+            share_outputs=share_outputs,
         )
 
 
@@ -299,7 +381,9 @@ def start_async(
         self,
         inputs: Any = None,
         userdata: Any = None,
-        shared_memory: bool = False,
+        share_inputs: bool = False,
+        *,
+        shared_memory: Any = None,
     ) -> None:
         """Run asynchronous inference using the next available InferRequest from the pool.
 
@@ -322,7 +406,7 @@ def start_async(
         :type inputs: Any, optional
         :param userdata: Any data that will be passed to a callback.
         :type userdata: Any, optional
-        :param shared_memory: Enables `shared_memory` mode.
+        :param share_inputs: Enables `share_inputs` mode. Controls memory usage on inference's inputs.
 
                               If set to `False` inputs the data dispatcher will safely copy data
                               to existing Tensors (including up- or down-casting according to data type,
@@ -337,16 +421,27 @@ def start_async(
                               * inputs that should be in `BF16` data type
                               * scalar inputs (i.e. `np.float_`/`int`/`float`)
                               Keeps Tensor inputs "as-is".
+
                               Note: Use with extra care, shared data can be modified during runtime!
-                              Note: Using `shared_memory` may result in extra memory overhead.
+                              Note: Using `share_inputs` may result in extra memory overhead.
 
                               Default value: False
+        :type share_inputs: bool, optional
+        :param shared_memory: Deprecated. Works like `share_inputs` mode.
+
+                              If not specified, function uses `share_inputs` value.
+
+                              Note: Will be removed in 2024.0 release!
+                              Note: This is keyword-only argument.
+
+                              Default value: None
+        :type shared_memory: bool, optional
         """
         super().start_async(
             _data_dispatch(
                 self[self.get_idle_request_id()],
                 inputs,
-                is_shared=shared_memory,
+                is_shared=_deprecated_memory_arg(shared_memory, share_inputs),
             ),
             userdata,
         )

diff --git a/src/bindings/python/src/pyopenvino/core/common.cpp b/src/bindings/python/src/pyopenvino/core/common.cpp
@@ -131,65 +131,22 @@ py::array as_contiguous(py::array& array, ov::element::Type type) {
     }
 }
 
-py::array array_from_tensor(ov::Tensor&& t) {
-    switch (t.get_element_type()) {
-    case ov::element::Type_t::f32: {
-        return py::array_t<float>(t.get_shape(), t.data<float>());
-        break;
-    }
-    case ov::element::Type_t::f64: {
-        return py::array_t<double>(t.get_shape(), t.data<double>());
-        break;
-    }
-    case ov::element::Type_t::bf16: {
-        return py::array(py::dtype("float16"), t.get_shape(), t.data<ov::bfloat16>());
-        break;
-    }
-    case ov::element::Type_t::f16: {
-        return py::array(py::dtype("float16"), t.get_shape(), t.data<ov::float16>());
-        break;
-    }
-    case ov::element::Type_t::i8: {
-        return py::array_t<int8_t>(t.get_shape(), t.data<int8_t>());
-        break;
-    }
-    case ov::element::Type_t::i16: {
-        return py::array_t<int16_t>(t.get_shape(), t.data<int16_t>());
-        break;
-    }
-    case ov::element::Type_t::i32: {
-        return py::array_t<int32_t>(t.get_shape(), t.data<int32_t>());
-        break;
-    }
-    case ov::element::Type_t::i64: {
-        return py::array_t<int64_t>(t.get_shape(), t.data<int64_t>());
-        break;
-    }
-    case ov::element::Type_t::u8: {
-        return py::array_t<uint8_t>(t.get_shape(), t.data<uint8_t>());
-        break;
-    }
-    case ov::element::Type_t::u16: {
-        return py::array_t<uint16_t>(t.get_shape(), t.data<uint16_t>());
-        break;
-    }
-    case ov::element::Type_t::u32: {
-        return py::array_t<uint32_t>(t.get_shape(), t.data<uint32_t>());
-        break;
-    }
-    case ov::element::Type_t::u64: {
-        return py::array_t<uint64_t>(t.get_shape(), t.data<uint64_t>());
-        break;
-    }
-    case ov::element::Type_t::boolean: {
-        return py::array_t<bool>(t.get_shape(), t.data<bool>());
-        break;
-    }
-    default: {
-        OPENVINO_THROW("Numpy array cannot be created from given OV Tensor!");
-        break;
+py::array array_from_tensor(ov::Tensor&& t, bool is_shared) {
+    auto ov_type = t.get_element_type();
+    auto dtype = Common::ov_type_to_dtype().at(ov_type);
+
+    // Return the array as a view:
+    if (is_shared) {
+        if (ov_type.bitwidth() < Common::values::min_bitwidth) {
+            return py::array(dtype, t.get_byte_size(), t.data(), py::cast(t));
+        }
+        return py::array(dtype, t.get_shape(), t.get_strides(), t.data(), py::cast(t));
     }
+    // Return the array as a copy:
+    if (ov_type.bitwidth() < Common::values::min_bitwidth) {
+        return py::array(dtype, t.get_byte_size(), t.data());
     }
+    return py::array(dtype, t.get_shape(), t.get_strides(), t.data());
 }
 
 };  // namespace array_helpers
@@ -342,10 +299,10 @@ uint32_t get_optimal_number_of_requests(const ov::CompiledModel& actual) {
     }
 }
 
-py::dict outputs_to_dict(InferRequestWrapper& request) {
+py::dict outputs_to_dict(InferRequestWrapper& request, bool share_outputs) {
     py::dict res;
     for (const auto& out : request.m_outputs) {
-        res[py::cast(out)] = array_helpers::array_from_tensor(request.m_request.get_tensor(out));
+        res[py::cast(out)] = array_helpers::array_from_tensor(request.m_request.get_tensor(out), share_outputs);
     }
     return res;
 }

diff --git a/src/bindings/python/src/pyopenvino/core/common.hpp b/src/bindings/python/src/pyopenvino/core/common.hpp
@@ -58,7 +58,7 @@ std::vector<size_t> get_strides(const py::array& array);
 
 py::array as_contiguous(py::array& array, ov::element::Type type);
 
-py::array array_from_tensor(ov::Tensor&& t);
+py::array array_from_tensor(ov::Tensor&& t, bool is_shared);
 
 }; // namespace array_helpers
 
@@ -92,7 +92,7 @@ void set_request_tensors(ov::InferRequest& request, const py::dict& inputs);
 
 uint32_t get_optimal_number_of_requests(const ov::CompiledModel& actual);
 
-py::dict outputs_to_dict(InferRequestWrapper& request);
+py::dict outputs_to_dict(InferRequestWrapper& request, bool share_outputs);
 
 ov::pass::Serialize::Version convert_to_version(const std::string& version);