Skip to content

Commit

Permalink
[PyOV] Memory flow control with share_inputs and share_outputs (openv…
Browse files Browse the repository at this point in the history
…inotoolkit#18275)

* Added ReturnPolicy and updated common array helpers

* Clean up

* Remove ReturnPolicy initial

* Add share_inputs and share_outputs

* Tests and minor fixes

* Fix docstrings

* Fix whitespace

* Fix typing
  • Loading branch information
Jan Iwaszkiewicz committed Aug 16, 2023
1 parent 4e790d7 commit 4173c0d
Show file tree
Hide file tree
Showing 6 changed files with 331 additions and 174 deletions.
135 changes: 115 additions & 20 deletions src/bindings/python/src/openvino/runtime/ie_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from typing import Any, Iterable, Union, Dict, Optional
from pathlib import Path
import warnings

import numpy as np

Expand All @@ -22,10 +23,30 @@
)


def _deprecated_memory_arg(shared_memory: bool, share_inputs: bool) -> bool:
if shared_memory is not None:
warnings.warn(
"`shared_memory` is deprecated and will be removed in 2024.0. "
"Value of `shared_memory` is going to override `share_inputs` value. "
"Please use only `share_inputs` explicitly.",
FutureWarning,
stacklevel=3,
)
return shared_memory
return share_inputs


class InferRequest(_InferRequestWrapper):
"""InferRequest class represents infer request which can be run in asynchronous or synchronous manners."""

def infer(self, inputs: Any = None, shared_memory: bool = False) -> OVDict:
def infer(
self,
inputs: Any = None,
share_inputs: bool = False,
share_outputs: bool = False,
*,
shared_memory: Any = None,
) -> OVDict:
"""Infers specified input(s) in synchronous mode.
Blocks all methods of InferRequest while request is running.
Expand All @@ -48,7 +69,7 @@ def infer(self, inputs: Any = None, shared_memory: bool = False) -> OVDict:
:param inputs: Data to be set on input tensors.
:type inputs: Any, optional
:param shared_memory: Enables `shared_memory` mode.
:param share_inputs: Enables `share_inputs` mode. Controls memory usage on inference's inputs.
If set to `False` inputs the data dispatcher will safely copy data
to existing Tensors (including up- or down-casting according to data type,
Expand All @@ -63,25 +84,49 @@ def infer(self, inputs: Any = None, shared_memory: bool = False) -> OVDict:
* inputs that should be in `BF16` data type
* scalar inputs (i.e. `np.float_`/`int`/`float`)
Keeps Tensor inputs "as-is".
Note: Use with extra care, shared data can be modified during runtime!
Note: Using `shared_memory` may result in the extra memory overhead.
Note: Using `share_inputs` may result in extra memory overhead.
Default value: False
:type share_inputs: bool, optional
:param share_outputs: Enables `share_outputs` mode. Controls memory usage on inference's outputs.
If set to `False` outputs will safely copy data to numpy arrays.
If set to `True` the data will be returned in form of views of output Tensors.
This mode still returns the data in format of numpy arrays but lifetime of the data
is connected to OpenVINO objects.
Note: Use with extra care, shared data can be modified or lost during runtime!
Default value: False
:type share_outputs: bool, optional
:param shared_memory: Deprecated. Works like `share_inputs` mode.
If not specified, function uses `share_inputs` value.
Note: Will be removed in 2024.0 release!
Note: This is keyword-only argument.
Default value: None
:type shared_memory: bool, optional
:return: Dictionary of results from output tensors with port/int/str keys.
:rtype: OVDict
"""
return OVDict(super().infer(_data_dispatch(
self,
inputs,
is_shared=shared_memory,
)))
is_shared=_deprecated_memory_arg(shared_memory, share_inputs),
), share_outputs=share_outputs))

def start_async(
self,
inputs: Any = None,
userdata: Any = None,
shared_memory: bool = False,
share_inputs: bool = False,
*,
shared_memory: Any = None,
) -> None:
"""Starts inference of specified input(s) in asynchronous mode.
Expand All @@ -108,7 +153,7 @@ def start_async(
:type inputs: Any, optional
:param userdata: Any data that will be passed inside the callback.
:type userdata: Any
:param shared_memory: Enables `shared_memory` mode.
:param share_inputs: Enables `share_inputs` mode. Controls memory usage on inference's inputs.
If set to `False` inputs the data dispatcher will safely copy data
to existing Tensors (including up- or down-casting according to data type,
Expand All @@ -123,17 +168,27 @@ def start_async(
* inputs that should be in `BF16` data type
* scalar inputs (i.e. `np.float_`/`int`/`float`)
Keeps Tensor inputs "as-is".
Note: Use with extra care, shared data can be modified during runtime!
Note: Using `shared_memory` may result in extra memory overhead.
Note: Using `share_inputs` may result in extra memory overhead.
Default value: False
:type share_inputs: bool, optional
:param shared_memory: Deprecated. Works like `share_inputs` mode.
If not specified, function uses `share_inputs` value.
Note: Will be removed in 2024.0 release!
Note: This is keyword-only argument.
Default value: None
:type shared_memory: bool, optional
"""
super().start_async(
_data_dispatch(
self,
inputs,
is_shared=shared_memory,
is_shared=_deprecated_memory_arg(shared_memory, share_inputs),
),
userdata,
)
Expand Down Expand Up @@ -203,9 +258,14 @@ def infer_new_request(self, inputs: Union[dict, list, tuple, Tensor, np.ndarray]
# overloaded functions of InferRequest class
return self.create_infer_request().infer(inputs)

def __call__(self,
inputs: Union[dict, list, tuple, Tensor, np.ndarray] = None,
shared_memory: bool = True) -> OVDict:
def __call__(
self,
inputs: Union[dict, list, tuple, Tensor, np.ndarray] = None,
share_inputs: bool = True,
share_outputs: bool = False,
*,
shared_memory: Any = None,
) -> OVDict:
"""Callable infer wrapper for CompiledModel.
Infers specified input(s) in synchronous mode.
Expand Down Expand Up @@ -236,7 +296,7 @@ def __call__(self,
:param inputs: Data to be set on input tensors.
:type inputs: Union[Dict[keys, values], List[values], Tuple[values], Tensor, numpy.ndarray], optional
:param shared_memory: Enables `shared_memory` mode.
:param share_inputs: Enables `share_inputs` mode. Controls memory usage on inference's inputs.
If set to `False` inputs the data dispatcher will safely copy data
to existing Tensors (including up- or down-casting according to data type,
Expand All @@ -251,12 +311,33 @@ def __call__(self,
* inputs that should be in `BF16` data type
* scalar inputs (i.e. `np.float_`/`int`/`float`)
Keeps Tensor inputs "as-is".
Note: Use with extra care, shared data can be modified during runtime!
Note: Using `shared_memory` may result in extra memory overhead.
Note: Using `share_inputs` may result in extra memory overhead.
Default value: True
:type shared_memory: bool, optional
:type share_inputs: bool, optional
:param share_outputs: Enables `share_outputs` mode. Controls memory usage on inference's outputs.
If set to `False` outputs will safely copy data to numpy arrays.
If set to `True` the data will be returned in form of views of output Tensors.
This mode still returns the data in format of numpy arrays but lifetime of the data
is connected to OpenVINO objects.
Note: Use with extra care, shared data can be modified or lost during runtime!
Default value: False
:type share_outputs: bool, optional
:param shared_memory: Deprecated. Works like `share_inputs` mode.
If not specified, function uses `share_inputs` value.
Note: Will be removed in 2024.0 release!
Note: This is keyword-only argument.
Default value: None
:type shared_memory: bool, optional
:return: Dictionary of results from output tensors with port/int/str as keys.
:rtype: OVDict
"""
Expand All @@ -265,7 +346,8 @@ def __call__(self,

return self._infer_request.infer(
inputs,
shared_memory=shared_memory,
share_inputs=_deprecated_memory_arg(shared_memory, share_inputs),
share_outputs=share_outputs,
)


Expand Down Expand Up @@ -299,7 +381,9 @@ def start_async(
self,
inputs: Any = None,
userdata: Any = None,
shared_memory: bool = False,
share_inputs: bool = False,
*,
shared_memory: Any = None,
) -> None:
"""Run asynchronous inference using the next available InferRequest from the pool.
Expand All @@ -322,7 +406,7 @@ def start_async(
:type inputs: Any, optional
:param userdata: Any data that will be passed to a callback.
:type userdata: Any, optional
:param shared_memory: Enables `shared_memory` mode.
:param share_inputs: Enables `share_inputs` mode. Controls memory usage on inference's inputs.
If set to `False` inputs the data dispatcher will safely copy data
to existing Tensors (including up- or down-casting according to data type,
Expand All @@ -337,16 +421,27 @@ def start_async(
* inputs that should be in `BF16` data type
* scalar inputs (i.e. `np.float_`/`int`/`float`)
Keeps Tensor inputs "as-is".
Note: Use with extra care, shared data can be modified during runtime!
Note: Using `shared_memory` may result in extra memory overhead.
Note: Using `share_inputs` may result in extra memory overhead.
Default value: False
:type share_inputs: bool, optional
:param shared_memory: Deprecated. Works like `share_inputs` mode.
If not specified, function uses `share_inputs` value.
Note: Will be removed in 2024.0 release!
Note: This is keyword-only argument.
Default value: None
:type shared_memory: bool, optional
"""
super().start_async(
_data_dispatch(
self[self.get_idle_request_id()],
inputs,
is_shared=shared_memory,
is_shared=_deprecated_memory_arg(shared_memory, share_inputs),
),
userdata,
)
Expand Down
75 changes: 16 additions & 59 deletions src/bindings/python/src/pyopenvino/core/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,65 +131,22 @@ py::array as_contiguous(py::array& array, ov::element::Type type) {
}
}

py::array array_from_tensor(ov::Tensor&& t) {
switch (t.get_element_type()) {
case ov::element::Type_t::f32: {
return py::array_t<float>(t.get_shape(), t.data<float>());
break;
}
case ov::element::Type_t::f64: {
return py::array_t<double>(t.get_shape(), t.data<double>());
break;
}
case ov::element::Type_t::bf16: {
return py::array(py::dtype("float16"), t.get_shape(), t.data<ov::bfloat16>());
break;
}
case ov::element::Type_t::f16: {
return py::array(py::dtype("float16"), t.get_shape(), t.data<ov::float16>());
break;
}
case ov::element::Type_t::i8: {
return py::array_t<int8_t>(t.get_shape(), t.data<int8_t>());
break;
}
case ov::element::Type_t::i16: {
return py::array_t<int16_t>(t.get_shape(), t.data<int16_t>());
break;
}
case ov::element::Type_t::i32: {
return py::array_t<int32_t>(t.get_shape(), t.data<int32_t>());
break;
}
case ov::element::Type_t::i64: {
return py::array_t<int64_t>(t.get_shape(), t.data<int64_t>());
break;
}
case ov::element::Type_t::u8: {
return py::array_t<uint8_t>(t.get_shape(), t.data<uint8_t>());
break;
}
case ov::element::Type_t::u16: {
return py::array_t<uint16_t>(t.get_shape(), t.data<uint16_t>());
break;
}
case ov::element::Type_t::u32: {
return py::array_t<uint32_t>(t.get_shape(), t.data<uint32_t>());
break;
}
case ov::element::Type_t::u64: {
return py::array_t<uint64_t>(t.get_shape(), t.data<uint64_t>());
break;
}
case ov::element::Type_t::boolean: {
return py::array_t<bool>(t.get_shape(), t.data<bool>());
break;
}
default: {
OPENVINO_THROW("Numpy array cannot be created from given OV Tensor!");
break;
py::array array_from_tensor(ov::Tensor&& t, bool is_shared) {
auto ov_type = t.get_element_type();
auto dtype = Common::ov_type_to_dtype().at(ov_type);

// Return the array as a view:
if (is_shared) {
if (ov_type.bitwidth() < Common::values::min_bitwidth) {
return py::array(dtype, t.get_byte_size(), t.data(), py::cast(t));
}
return py::array(dtype, t.get_shape(), t.get_strides(), t.data(), py::cast(t));
}
// Return the array as a copy:
if (ov_type.bitwidth() < Common::values::min_bitwidth) {
return py::array(dtype, t.get_byte_size(), t.data());
}
return py::array(dtype, t.get_shape(), t.get_strides(), t.data());
}

}; // namespace array_helpers
Expand Down Expand Up @@ -342,10 +299,10 @@ uint32_t get_optimal_number_of_requests(const ov::CompiledModel& actual) {
}
}

py::dict outputs_to_dict(InferRequestWrapper& request) {
py::dict outputs_to_dict(InferRequestWrapper& request, bool share_outputs) {
py::dict res;
for (const auto& out : request.m_outputs) {
res[py::cast(out)] = array_helpers::array_from_tensor(request.m_request.get_tensor(out));
res[py::cast(out)] = array_helpers::array_from_tensor(request.m_request.get_tensor(out), share_outputs);
}
return res;
}
Expand Down
4 changes: 2 additions & 2 deletions src/bindings/python/src/pyopenvino/core/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ std::vector<size_t> get_strides(const py::array& array);

py::array as_contiguous(py::array& array, ov::element::Type type);

py::array array_from_tensor(ov::Tensor&& t);
py::array array_from_tensor(ov::Tensor&& t, bool is_shared);

}; // namespace array_helpers

Expand Down Expand Up @@ -92,7 +92,7 @@ void set_request_tensors(ov::InferRequest& request, const py::dict& inputs);

uint32_t get_optimal_number_of_requests(const ov::CompiledModel& actual);

py::dict outputs_to_dict(InferRequestWrapper& request);
py::dict outputs_to_dict(InferRequestWrapper& request, bool share_outputs);

ov::pass::Serialize::Version convert_to_version(const std::string& version);

Expand Down
Loading

0 comments on commit 4173c0d

Please sign in to comment.