From 500ac2ab432a659c51f98941f8cec36368c0f9ee Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Mon, 10 Jul 2023 16:41:03 +0400 Subject: [PATCH 01/21] Remove vector of so objects from Tensor, Any, VariableState and (#18430) RemoteContext --- src/core/include/openvino/core/any.hpp | 4 ++-- src/core/include/openvino/runtime/tensor.hpp | 6 +++--- src/core/src/any.cpp | 2 +- src/core/src/runtime/ov_tensor.cpp | 7 +++---- src/inference/include/openvino/runtime/remote_context.hpp | 6 +++--- src/inference/include/openvino/runtime/variable_state.hpp | 4 ++-- src/inference/src/cpp/ie_remote_context.cpp | 3 +-- src/inference/src/cpp/ie_variable_state.cpp | 3 +-- src/inference/src/dev/converter_utils.cpp | 1 - src/inference/src/infer_request.cpp | 4 ++-- 10 files changed, 18 insertions(+), 22 deletions(-) diff --git a/src/core/include/openvino/core/any.hpp b/src/core/include/openvino/core/any.hpp index d312d347ccb7e2..f27e223bf7f630 100644 --- a/src/core/include/openvino/core/any.hpp +++ b/src/core/include/openvino/core/any.hpp @@ -377,7 +377,7 @@ class RemoteTensor; * @brief This class represents an object to work with different types */ class OPENVINO_API Any { - std::vector> _so; + std::shared_ptr _so; template using decay_t = typename std::decay::type; @@ -663,7 +663,7 @@ class OPENVINO_API Any { friend class ::ov::RemoteTensor; friend class ::ov::Plugin; - Any(const Any& other, const std::vector>& so); + Any(const Any& other, const std::shared_ptr& so); void impl_check() const; diff --git a/src/core/include/openvino/runtime/tensor.hpp b/src/core/include/openvino/runtime/tensor.hpp index 4dee04d70f0dd9..3b8ba26928247b 100644 --- a/src/core/include/openvino/runtime/tensor.hpp +++ b/src/core/include/openvino/runtime/tensor.hpp @@ -52,8 +52,8 @@ class VariableValue; */ class OPENVINO_API Tensor { protected: - std::shared_ptr _impl; //!< Shared pointer to internal tensor representation - std::vector> _so; //!< Reference to dynamically loaded library + std::shared_ptr _impl; //!< Shared pointer to internal tensor representation + std::shared_ptr _so; //!< Reference to dynamically loaded library /** * @brief Constructs Tensor from the initialized std::shared_ptr @@ -61,7 +61,7 @@ class OPENVINO_API Tensor { * @param so Plugin to use. This is required to ensure that Tensor can work properly even if plugin object is * destroyed. */ - Tensor(const std::shared_ptr& impl, const std::vector>& so); + Tensor(const std::shared_ptr& impl, const std::shared_ptr& so); friend class ov::Core; friend class ov::CoreImpl; diff --git a/src/core/src/any.cpp b/src/core/src/any.cpp index 38772c74d5966b..7303ebddc8fcd2 100644 --- a/src/core/src/any.cpp +++ b/src/core/src/any.cpp @@ -86,7 +86,7 @@ Any& Any::operator=(const Any& other) { return *this; }; -Any::Any(const Any& other, const std::vector>& so) : _so{so}, _impl{other._impl} {} +Any::Any(const Any& other, const std::shared_ptr& so) : _so{so}, _impl{other._impl} {} Any::Any(const char* str) : Any(std::string{str}) {} diff --git a/src/core/src/runtime/ov_tensor.cpp b/src/core/src/runtime/ov_tensor.cpp index 8b64c0322b77a1..e686338ca2f490 100644 --- a/src/core/src/runtime/ov_tensor.cpp +++ b/src/core/src/runtime/ov_tensor.cpp @@ -34,12 +34,11 @@ Tensor::~Tensor() { Tensor::Tensor(const Tensor& tensor, const std::shared_ptr& so) : _impl{tensor._impl}, _so{tensor._so} { OPENVINO_ASSERT(_impl != nullptr, "Tensor was not initialized."); - _so.emplace_back(so); + if (!_so) + _so = so; } -Tensor::Tensor(const std::shared_ptr& impl, const std::vector>& so) - : _impl{impl}, - _so{so} { +Tensor::Tensor(const std::shared_ptr& impl, const std::shared_ptr& so) : _impl{impl}, _so{so} { OPENVINO_ASSERT(_impl != nullptr, "Tensor was not initialized."); } diff --git a/src/inference/include/openvino/runtime/remote_context.hpp b/src/inference/include/openvino/runtime/remote_context.hpp index f722a7086101b6..7dc5cc5e7ce1b8 100644 --- a/src/inference/include/openvino/runtime/remote_context.hpp +++ b/src/inference/include/openvino/runtime/remote_context.hpp @@ -50,8 +50,8 @@ class RemoteContext; */ class OPENVINO_RUNTIME_API RemoteContext { protected: - std::shared_ptr _impl; //!< Pointer to the remote context implementation. - std::vector> _so; //!< Reference to the shared object that loaded implementation. + std::shared_ptr _impl; //!< Pointer to the remote context implementation. + std::shared_ptr _so; //!< Reference to the shared object that loaded implementation. /** * @brief Constructs RemoteContext from the initialized std::shared_ptr. @@ -59,7 +59,7 @@ class OPENVINO_RUNTIME_API RemoteContext { * @param so Plugin to use. This is required to ensure that RemoteContext can work properly even if a plugin * object is destroyed. */ - RemoteContext(const std::shared_ptr& impl, const std::vector>& so); + RemoteContext(const std::shared_ptr& impl, const std::shared_ptr& so); friend class InferenceEngine::Core; friend class InferenceEngine::IPluginWrapper; friend class InferenceEngine::ICompiledModelWrapper; diff --git a/src/inference/include/openvino/runtime/variable_state.hpp b/src/inference/include/openvino/runtime/variable_state.hpp index 29c1baa6f39d2f..711544b6efa0da 100644 --- a/src/inference/include/openvino/runtime/variable_state.hpp +++ b/src/inference/include/openvino/runtime/variable_state.hpp @@ -31,7 +31,7 @@ class IInferRequestInternalWrapper; */ class OPENVINO_RUNTIME_API VariableState { std::shared_ptr _impl; - std::vector> _so; + std::shared_ptr _so; /** * @brief Constructs VariableState from the initialized std::shared_ptr. @@ -39,7 +39,7 @@ class OPENVINO_RUNTIME_API VariableState { * @param so Optional: plugin to use. This is required to ensure that VariableState can work properly even if a * plugin object is destroyed. */ - VariableState(const std::shared_ptr& impl, const std::vector>& so); + VariableState(const std::shared_ptr& impl, const std::shared_ptr& so); friend class ov::InferRequest; friend class ov::IInferRequestInternalWrapper; diff --git a/src/inference/src/cpp/ie_remote_context.cpp b/src/inference/src/cpp/ie_remote_context.cpp index 02ed3406b5680f..2c97a66bb144fb 100644 --- a/src/inference/src/cpp/ie_remote_context.cpp +++ b/src/inference/src/cpp/ie_remote_context.cpp @@ -58,8 +58,7 @@ RemoteContext::~RemoteContext() { _impl = {}; } -RemoteContext::RemoteContext(const std::shared_ptr& impl, - const std::vector>& so) +RemoteContext::RemoteContext(const std::shared_ptr& impl, const std::shared_ptr& so) : _impl{impl}, _so{so} { OPENVINO_ASSERT(_impl != nullptr, "RemoteContext was not initialized."); diff --git a/src/inference/src/cpp/ie_variable_state.cpp b/src/inference/src/cpp/ie_variable_state.cpp index 6d40e2954f6906..4801fb04352163 100644 --- a/src/inference/src/cpp/ie_variable_state.cpp +++ b/src/inference/src/cpp/ie_variable_state.cpp @@ -66,8 +66,7 @@ VariableState::~VariableState() { _impl = {}; } -VariableState::VariableState(const std::shared_ptr& impl, - const std::vector>& so) +VariableState::VariableState(const std::shared_ptr& impl, const std::shared_ptr& so) : _impl{impl}, _so{so} { OPENVINO_ASSERT(_impl != nullptr, "VariableState was not initialized."); diff --git a/src/inference/src/dev/converter_utils.cpp b/src/inference/src/dev/converter_utils.cpp index dcbe4b12b415f2..469a98cd8ee747 100644 --- a/src/inference/src/dev/converter_utils.cpp +++ b/src/inference/src/dev/converter_utils.cpp @@ -745,7 +745,6 @@ class IAsyncInferRequestWrapper : public ov::IAsyncInferRequest { std::vector> query_state() const override { std::vector> variable_states; - std::vector> soVec; for (auto&& state : m_request->QueryState()) { variable_states.emplace_back(std::make_shared(state)); } diff --git a/src/inference/src/infer_request.cpp b/src/inference/src/infer_request.cpp index e4668cdd4dc47c..e025f2b70ee74e 100644 --- a/src/inference/src/infer_request.cpp +++ b/src/inference/src/infer_request.cpp @@ -160,7 +160,6 @@ void InferRequest::set_output_tensor(const Tensor& tensor) { } Tensor InferRequest::get_tensor(const ov::Output& port) { - std::vector> soVec; OV_INFER_REQ_CALL_STATEMENT({ OPENVINO_ASSERT(_impl->get_tensors(port).empty(), "get_tensor shall not be used together with batched " @@ -168,7 +167,8 @@ Tensor InferRequest::get_tensor(const ov::Output& port) { port, "'"); auto tensor = _impl->get_tensor(port); - tensor._so.emplace_back(_so); + if (!tensor._so) + tensor._so = _so; return tensor; }); From d6c0289008978ef5efd34c1709a4506646a73321 Mon Sep 17 00:00:00 2001 From: Oleg Pipikin Date: Mon, 10 Jul 2023 14:48:24 +0200 Subject: [PATCH 02/21] Move common test utils (#18339) * Move common test utils * Apply comments * Comments 2 --- .ci/azure/linux.yml | 2 +- .ci/azure/windows.yml | 2 +- src/bindings/c/tests/CMakeLists.txt | 4 +-- .../src/pyopenvino/test_utils/CMakeLists.txt | 2 +- .../tests/CMakeLists.txt | 2 +- src/common/snippets/tests/CMakeLists.txt | 4 +-- src/core/tests/CMakeLists.txt | 2 +- src/frontends/ir/tests/CMakeLists.txt | 2 +- .../tests/frontend/shared/CMakeLists.txt | 2 +- src/inference/tests/functional/CMakeLists.txt | 1 + .../tests/functional/caching_test.cpp | 1 - .../tests/unit/nodes/reorder_node_test.cpp | 2 +- .../tests/deprecated/helpers/CMakeLists.txt | 4 +-- .../intel_gna/tests/unit/CMakeLists.txt | 2 +- src/tests/CMakeLists.txt | 1 + src/tests/engines_util/CMakeLists.txt | 2 +- .../include/matchers/base_matcher.hpp | 2 +- .../subgraphs_dumper/include/ops_cache.hpp | 2 +- .../tests/matchers/convolutions_matcher.cpp | 2 +- .../tests/matchers/generic_single_op.cpp | 2 +- src/tests/ie_test_utils/CMakeLists.txt | 1 - .../xml_net_builder/xml_filler.hpp | 27 ------------------- .../functional_test_utils/CMakeLists.txt | 2 +- .../unit_test_utils/CMakeLists.txt | 5 +++- .../impl/mock_inference_plugin_internal.hpp | 24 +++-------------- .../interface/mock_iinference_plugin.hpp | 4 +-- .../snippets_ngraph_functions/CMakeLists.txt | 4 +-- src/tests/test_utils/CMakeLists.txt | 7 +++++ .../common_test_utils/CMakeLists.txt | 13 +++++++-- .../common_test_utils/common_utils.hpp | 0 .../include}/common_test_utils/data_utils.hpp | 0 .../include}/common_test_utils/file_utils.hpp | 8 +++--- .../common_test_utils/graph_comparator.hpp | 0 .../common_test_utils/ngraph_test_utils.hpp | 4 +-- .../common_test_utils/ov_tensor_utils.hpp | 0 .../include}/common_test_utils/precomp.hpp | 0 .../common_test_utils/test_assertions.hpp | 0 .../common_test_utils/test_common.hpp | 2 +- .../common_test_utils/test_constants.hpp | 0 .../common_test_utils/unicode_utils.hpp | 4 +-- .../include}/common_test_utils/w_dirent.h | 0 .../common_test_utils/src}/common_utils.cpp | 0 .../common_test_utils/src}/data_utils.cpp | 0 .../common_test_utils/src}/file_utils.cpp | 0 .../src}/graph_comparator.cpp | 0 .../src}/ngraph_test_utils.cpp | 2 +- .../src}/ov_tensor_utils.cpp | 0 .../common_test_utils/src}/test_common.cpp | 6 ++--- .../common_test_utils/src}/test_constants.cpp | 0 .../common_test_utils/src}/unicode_utils.cpp | 2 +- .../common_test_utils/tests/CMakeLists.txt | 5 ++-- .../tests/graph_comparator_tests.cpp | 0 52 files changed, 68 insertions(+), 95 deletions(-) delete mode 100644 src/tests/ie_test_utils/common_test_utils/xml_net_builder/xml_filler.hpp create mode 100644 src/tests/test_utils/CMakeLists.txt rename src/tests/{ie_test_utils => test_utils}/common_test_utils/CMakeLists.txt (83%) rename src/tests/{ie_test_utils => test_utils/common_test_utils/include}/common_test_utils/common_utils.hpp (100%) rename src/tests/{ie_test_utils => test_utils/common_test_utils/include}/common_test_utils/data_utils.hpp (100%) rename src/tests/{ie_test_utils => test_utils/common_test_utils/include}/common_test_utils/file_utils.hpp (97%) rename src/tests/{ie_test_utils => test_utils/common_test_utils/include}/common_test_utils/graph_comparator.hpp (100%) rename src/tests/{ie_test_utils => test_utils/common_test_utils/include}/common_test_utils/ngraph_test_utils.hpp (96%) rename src/tests/{ie_test_utils => test_utils/common_test_utils/include}/common_test_utils/ov_tensor_utils.hpp (100%) rename src/tests/{ie_test_utils => test_utils/common_test_utils/include}/common_test_utils/precomp.hpp (100%) rename src/tests/{ie_test_utils => test_utils/common_test_utils/include}/common_test_utils/test_assertions.hpp (100%) rename src/tests/{ie_test_utils => test_utils/common_test_utils/include}/common_test_utils/test_common.hpp (90%) rename src/tests/{ie_test_utils => test_utils/common_test_utils/include}/common_test_utils/test_constants.hpp (100%) rename src/tests/{ie_test_utils => test_utils/common_test_utils/include}/common_test_utils/unicode_utils.hpp (98%) rename src/tests/{ie_test_utils => test_utils/common_test_utils/include}/common_test_utils/w_dirent.h (100%) rename src/tests/{ie_test_utils/common_test_utils => test_utils/common_test_utils/src}/common_utils.cpp (100%) rename src/tests/{ie_test_utils/common_test_utils => test_utils/common_test_utils/src}/data_utils.cpp (100%) rename src/tests/{ie_test_utils/common_test_utils => test_utils/common_test_utils/src}/file_utils.cpp (100%) rename src/tests/{ie_test_utils/common_test_utils => test_utils/common_test_utils/src}/graph_comparator.cpp (100%) rename src/tests/{ie_test_utils/common_test_utils => test_utils/common_test_utils/src}/ngraph_test_utils.cpp (98%) rename src/tests/{ie_test_utils/common_test_utils => test_utils/common_test_utils/src}/ov_tensor_utils.cpp (100%) rename src/tests/{ie_test_utils/common_test_utils => test_utils/common_test_utils/src}/test_common.cpp (94%) rename src/tests/{ie_test_utils/common_test_utils => test_utils/common_test_utils/src}/test_constants.cpp (100%) rename src/tests/{ie_test_utils/common_test_utils => test_utils/common_test_utils/src}/unicode_utils.cpp (92%) rename src/tests/{ie_test_utils => test_utils}/common_test_utils/tests/CMakeLists.txt (74%) rename src/tests/{ie_test_utils => test_utils}/common_test_utils/tests/graph_comparator_tests.cpp (100%) diff --git a/.ci/azure/linux.yml b/.ci/azure/linux.yml index b7143c1628ffee..e5f2dd178737cc 100644 --- a/.ci/azure/linux.yml +++ b/.ci/azure/linux.yml @@ -398,7 +398,7 @@ jobs: - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_legacy_transformations_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-LegacyTransformations.xml displayName: 'Legacy Transformations Tests' - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/commonUtilsTests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-CommonUtilTests.xml + - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_util_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-CommonUtilTests.xml displayName: 'Common Utils Tests' - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/InferenceEngineUnitTests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-InferenceEngineUnitTests.xml diff --git a/.ci/azure/windows.yml b/.ci/azure/windows.yml index 5396fcc915ad0e..a4f1c8f075020d 100644 --- a/.ci/azure/windows.yml +++ b/.ci/azure/windows.yml @@ -297,7 +297,7 @@ jobs: - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ov_legacy_transformations_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)\LegacyTransformations.xml displayName: 'Legacy Transformations Tests' - - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\commonUtilsTests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)\CommonUtilTests.xml + - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ov_util_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)\CommonUtilTests.xml displayName: 'Common Utils Tests' - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\InferenceEngineUnitTests --gtest_output=xml:$(INSTALL_TEST_DIR)\TEST-InferenceEngineUnitTests.xml diff --git a/src/bindings/c/tests/CMakeLists.txt b/src/bindings/c/tests/CMakeLists.txt index 6ce03a7a840045..2f18cd010cfe4a 100644 --- a/src/bindings/c/tests/CMakeLists.txt +++ b/src/bindings/c/tests/CMakeLists.txt @@ -7,7 +7,7 @@ set(TARGET_NAME "InferenceEngineCAPITests") add_executable(${TARGET_NAME} ie_c_api_test.cpp test_model_repo.hpp test_model_repo.cpp) -target_link_libraries(${TARGET_NAME} PRIVATE openvino_c commonTestUtils gtest_main) +target_link_libraries(${TARGET_NAME} PRIVATE openvino_c common_test_utils gtest_main) if(ENABLE_AUTO OR ENABLE_MULTI) add_dependencies(${TARGET_NAME} openvino_auto_plugin) @@ -34,7 +34,7 @@ endif() add_executable(${TARGET_NAME} ${SOURCES} ${HEADERS}) target_link_libraries(${TARGET_NAME} PRIVATE openvino_c openvino::util - commonTestUtils gtest_main) + common_test_utils gtest_main) target_include_directories(${TARGET_NAME} PUBLIC $) diff --git a/src/bindings/python/src/pyopenvino/test_utils/CMakeLists.txt b/src/bindings/python/src/pyopenvino/test_utils/CMakeLists.txt index 2f23a1a9434857..accf683d23afee 100644 --- a/src/bindings/python/src/pyopenvino/test_utils/CMakeLists.txt +++ b/src/bindings/python/src/pyopenvino/test_utils/CMakeLists.txt @@ -25,7 +25,7 @@ endif() if(OpenVINODeveloperPackage_FOUND) list(APPEND link_libraries openvino::commonTestUtils) else() - list(APPEND link_libraries commonTestUtils) + list(APPEND link_libraries common_test_utils) endif() target_include_directories(${TARGET_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}") diff --git a/src/common/low_precision_transformations/tests/CMakeLists.txt b/src/common/low_precision_transformations/tests/CMakeLists.txt index d6caedae2566c2..6a3d094d4360f4 100644 --- a/src/common/low_precision_transformations/tests/CMakeLists.txt +++ b/src/common/low_precision_transformations/tests/CMakeLists.txt @@ -12,7 +12,7 @@ ov_add_test_target( gtest gtest_main openvino::runtime::dev - commonTestUtils + common_test_utils lptNgraphFunctions gmock INCLUDES ${CMAKE_CURRENT_SOURCE_DIR} diff --git a/src/common/snippets/tests/CMakeLists.txt b/src/common/snippets/tests/CMakeLists.txt index 4bf75dbf74d7b6..f34bf4a3eb9829 100644 --- a/src/common/snippets/tests/CMakeLists.txt +++ b/src/common/snippets/tests/CMakeLists.txt @@ -12,7 +12,7 @@ addIeTargetTest( $ LINK_LIBRARIES openvino::runtime::dev - commonTestUtils + common_test_utils snippetsNgraphFunctions ADD_CPPLINT LABELS @@ -30,4 +30,4 @@ ie_faster_build(${TARGET_NAME} add_library(snippets_test_utils STATIC ${CMAKE_CURRENT_SOURCE_DIR}/include/lowering_utils.hpp ${CMAKE_CURRENT_SOURCE_DIR}/src/lowering_utils.cpp) target_include_directories(snippets_test_utils PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) -target_link_libraries(snippets_test_utils PRIVATE commonTestUtils snippetsNgraphFunctions) +target_link_libraries(snippets_test_utils PRIVATE common_test_utils snippetsNgraphFunctions) diff --git a/src/core/tests/CMakeLists.txt b/src/core/tests/CMakeLists.txt index 829f1822a21488..2ddb021e9daa72 100644 --- a/src/core/tests/CMakeLists.txt +++ b/src/core/tests/CMakeLists.txt @@ -39,7 +39,7 @@ ov_add_test_target( test_model_zoo LINK_LIBRARIES engines_test_util - commonTestUtils + common_test_utils ngraph_reference ngraph::builder openvino::util diff --git a/src/frontends/ir/tests/CMakeLists.txt b/src/frontends/ir/tests/CMakeLists.txt index ac3e724f555ba5..3962d4c96c0546 100644 --- a/src/frontends/ir/tests/CMakeLists.txt +++ b/src/frontends/ir/tests/CMakeLists.txt @@ -13,7 +13,7 @@ ov_add_test_target( gtest gtest_main openvino::runtime::dev - commonTestUtils + common_test_utils INCLUDES "${CMAKE_CURRENT_SOURCE_DIR}/../include" ADD_CLANG_FORMAT diff --git a/src/frontends/tests/frontend/shared/CMakeLists.txt b/src/frontends/tests/frontend/shared/CMakeLists.txt index bd0246006fc300..51d20862827f95 100644 --- a/src/frontends/tests/frontend/shared/CMakeLists.txt +++ b/src/frontends/tests/frontend/shared/CMakeLists.txt @@ -18,7 +18,7 @@ target_include_directories(${TARGET_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../ target_link_libraries(${TARGET_NAME} PUBLIC engines_test_util offline_transformations - commonTestUtils ngraph_test_util funcTestUtils openvino::util + common_test_utils ngraph_test_util funcTestUtils openvino::util openvino::runtime PRIVATE cnpy) diff --git a/src/inference/tests/functional/CMakeLists.txt b/src/inference/tests/functional/CMakeLists.txt index 26618a66eec3d3..049e6e24d9b96c 100644 --- a/src/inference/tests/functional/CMakeLists.txt +++ b/src/inference/tests/functional/CMakeLists.txt @@ -44,6 +44,7 @@ ov_add_test_target( LINK_LIBRARIES gmock funcTestUtils + common_test_utils INCLUDES $/src ${CMAKE_CURRENT_SOURCE_DIR} diff --git a/src/inference/tests/functional/caching_test.cpp b/src/inference/tests/functional/caching_test.cpp index 531c115a32fb91..bc21ad326ddcc0 100644 --- a/src/inference/tests/functional/caching_test.cpp +++ b/src/inference/tests/functional/caching_test.cpp @@ -28,7 +28,6 @@ #include "openvino/op/logical_not.hpp" #include "openvino/util/file_util.hpp" #include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp" -#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iinference_plugin.hpp" #include "unit_test_utils/mocks/mock_iexecutable_network.hpp" #include "unit_test_utils/mocks/mock_iinfer_request.hpp" diff --git a/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp b/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp index acb937acc173f4..63ad2abf574f0e 100644 --- a/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp @@ -15,7 +15,7 @@ #include #include -#include "../../../ie_test_utils/common_test_utils/common_utils.hpp" +#include "common_test_utils/common_utils.hpp" #include "cache/multi_cache.h" #include "nodes/input.h" diff --git a/src/plugins/intel_gna/tests/deprecated/helpers/CMakeLists.txt b/src/plugins/intel_gna/tests/deprecated/helpers/CMakeLists.txt index 1cc11ac2a02cb9..7b55f773b71f7b 100644 --- a/src/plugins/intel_gna/tests/deprecated/helpers/CMakeLists.txt +++ b/src/plugins/intel_gna/tests/deprecated/helpers/CMakeLists.txt @@ -30,12 +30,12 @@ endfunction() add_helpers(${TARGET_NAME}) -target_link_libraries(${TARGET_NAME} PUBLIC commonTestUtils +target_link_libraries(${TARGET_NAME} PUBLIC common_test_utils PRIVATE inference_engine_legacy) add_helpers(${TARGET_NAME}_s USE_STATIC_IE) -target_link_libraries(${TARGET_NAME}_s PUBLIC commonTestUtils_s +target_link_libraries(${TARGET_NAME}_s PUBLIC common_test_utils_s PRIVATE inference_engine_legacy_s) if (ENABLE_DATA) diff --git a/src/plugins/intel_gna/tests/unit/CMakeLists.txt b/src/plugins/intel_gna/tests/unit/CMakeLists.txt index 1d95ce2928a3c0..912d28d728b41d 100644 --- a/src/plugins/intel_gna/tests/unit/CMakeLists.txt +++ b/src/plugins/intel_gna/tests/unit/CMakeLists.txt @@ -25,7 +25,7 @@ addIeTargetTest( PRIVATE ngraphFunctions gmock - commonTestUtils_s + common_test_utils_s openvino_intel_gna_plugin_test_static inference_engine_legacy_s ADD_CLANG_FORMAT diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index c88c3b33acf157..81dc9eb2703f9f 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -28,6 +28,7 @@ ov_deprecated_no_errors() add_subdirectory(ngraph_helpers) add_subdirectory(ie_test_utils) +add_subdirectory(test_utils) add_subdirectory(util) add_subdirectory(engines_util) diff --git a/src/tests/engines_util/CMakeLists.txt b/src/tests/engines_util/CMakeLists.txt index 14643bcc68a181..8f3a28abf55bd0 100644 --- a/src/tests/engines_util/CMakeLists.txt +++ b/src/tests/engines_util/CMakeLists.txt @@ -15,7 +15,7 @@ target_include_directories(engines_test_util "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/.." PRIVATE - $) + $) add_clang_format_target(engines_test_util_clang FOR_SOURCES ${ENGINES_UTIL_SRC}) diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/base_matcher.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/base_matcher.hpp index cebf0f30379362..f29a8984dca2dc 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/base_matcher.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/base_matcher.hpp @@ -9,7 +9,7 @@ #include "ngraph/node.hpp" #include "pugixml.hpp" -#include "functional_test_utils/include/functional_test_utils/summary/op_info.hpp" +#include "functional_test_utils/summary/op_info.hpp" namespace SubgraphsDumper { diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/ops_cache.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/ops_cache.hpp index 41a91ea5ceeccf..097866e52b9429 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/ops_cache.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/ops_cache.hpp @@ -10,7 +10,7 @@ #include #include #include "matchers/matchers_manager.hpp" -#include "functional_test_utils/include/functional_test_utils/summary/op_info.hpp" +#include "functional_test_utils/summary/op_info.hpp" #include "utils/model_wrap_struct.hpp" diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/convolutions_matcher.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/convolutions_matcher.cpp index 3e571a11b4b7fa..9643b38cdfdcef 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/convolutions_matcher.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/convolutions_matcher.cpp @@ -5,7 +5,7 @@ #include "gtest/gtest.h" #include "matchers/convolutions.hpp" #include "ngraph/ops.hpp" -#include "functional_test_utils/include/functional_test_utils/summary/op_info.hpp" +#include "functional_test_utils/summary/op_info.hpp" class ConvolutionMatcherTest : public ::testing::Test { protected: diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/generic_single_op.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/generic_single_op.cpp index 0fb9edda85e6cb..c70bcde5b1f38a 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/generic_single_op.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/generic_single_op.cpp @@ -5,7 +5,7 @@ #include "gtest/gtest.h" #include "matchers/single_op.hpp" #include "ngraph/ops.hpp" -#include "functional_test_utils/include/functional_test_utils/summary/op_info.hpp" +#include "functional_test_utils/summary/op_info.hpp" class SingleOpMatcherTest : public ::testing::Test { protected: diff --git a/src/tests/ie_test_utils/CMakeLists.txt b/src/tests/ie_test_utils/CMakeLists.txt index feba8b83fbf241..4ae964a804dcf1 100644 --- a/src/tests/ie_test_utils/CMakeLists.txt +++ b/src/tests/ie_test_utils/CMakeLists.txt @@ -3,7 +3,6 @@ # if(ENABLE_TESTS) - add_subdirectory(common_test_utils) add_subdirectory(unit_test_utils) endif() diff --git a/src/tests/ie_test_utils/common_test_utils/xml_net_builder/xml_filler.hpp b/src/tests/ie_test_utils/common_test_utils/xml_net_builder/xml_filler.hpp deleted file mode 100644 index b5df4fcb75a218..00000000000000 --- a/src/tests/ie_test_utils/common_test_utils/xml_net_builder/xml_filler.hpp +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "common_test_utils/file_utils.hpp" - -#if defined(__GNUC__) && (__GNUC__ <= 4) && (__GNUC_MINOR__ < 9) && !defined(__clang__) -# define IE_GCC_4_8 -#endif - -#ifndef IE_GCC_4_8 -# include -# define REPLACE_WITH_STR(SRC, PATTERN, STR) SRC = std::regex_replace(SRC, std::regex(PATTERN), STR) -# define FIND_STR(SRC, PATTERN) std::regex_search(SRC, std::regex(PATTERN)) -#elif defined USE_BOOST_RE -# include -# define REPLACE_WITH_STR(SRC, PATTERN, STR) SRC = boost::regex_replace(SRC, boost::regex(PATTERN), STR) -# define FIND_STR(SRC, PATTERN) boost::regex_search(SRC, boost::regex(PATTERN)) -#else -# error "Cannot implement regex" -# define REPLACE_WITH_STR(SRC, PATTERN, STR) -# define FIND_STR(SRC, PATTERN) -#endif - -#define REPLACE_WITH_NUM(SRC, PATTERN, NUM) REPLACE_WITH_STR(SRC, PATTERN, CommonTestUtils::to_string_c_locale(NUM)) diff --git a/src/tests/ie_test_utils/functional_test_utils/CMakeLists.txt b/src/tests/ie_test_utils/functional_test_utils/CMakeLists.txt index 397ab8ba5912f4..dfda7ea257c063 100644 --- a/src/tests/ie_test_utils/functional_test_utils/CMakeLists.txt +++ b/src/tests/ie_test_utils/functional_test_utils/CMakeLists.txt @@ -18,7 +18,7 @@ addIeTarget( ${CMAKE_CURRENT_SOURCE_DIR}/src LINK_LIBRARIES PUBLIC - commonTestUtils + common_test_utils openvino::runtime openvino::runtime::dev PRIVATE diff --git a/src/tests/ie_test_utils/unit_test_utils/CMakeLists.txt b/src/tests/ie_test_utils/unit_test_utils/CMakeLists.txt index c99a67859c0897..c0ed18717b62ee 100644 --- a/src/tests/ie_test_utils/unit_test_utils/CMakeLists.txt +++ b/src/tests/ie_test_utils/unit_test_utils/CMakeLists.txt @@ -19,7 +19,10 @@ addIeTarget( tests LINK_LIBRARIES PUBLIC - commonTestUtils_s + common_test_utils_s inference_engine_s gmock + INCLUDES + PUBLIC + "${CMAKE_CURRENT_SOURCE_DIR}/.." ) diff --git a/src/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp b/src/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp index ddab965d77b168..50385fec1834c5 100644 --- a/src/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp +++ b/src/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp @@ -13,23 +13,13 @@ #include #include #include - -class MockInferencePluginInternal2 : public InferenceEngine::IInferencePlugin { -public: - MOCK_METHOD2(LoadExeNetworkImpl, std::shared_ptr( - const InferenceEngine::CNNNetwork &, const std::map &)); - MOCK_METHOD2(LoadNetwork, std::shared_ptr( - const InferenceEngine::CNNNetwork &, - const std::map &)); - MOCK_METHOD1(AddExtension, void(InferenceEngine::IExtensionPtr ext_ptr)); - MOCK_METHOD1(SetConfig, void(const std::map &)); -}; +#include "openvino/runtime/iplugin.hpp" class MockInferencePluginInternal : public InferenceEngine::IInferencePlugin { public: MOCK_METHOD2(LoadExeNetworkImpl, std::shared_ptr( const InferenceEngine::CNNNetwork &, const std::map &)); - MOCK_METHOD1(AddExtension, void(InferenceEngine::IExtensionPtr ext_ptr)); + MOCK_METHOD1(AddExtension, void(const std::shared_ptr&)); MOCK_METHOD1(SetConfig, void(const std::map &)); std::shared_ptr @@ -38,12 +28,4 @@ class MockInferencePluginInternal : public InferenceEngine::IInferencePlugin { } std::string importedString; -}; - -class MockInferencePluginInternal3 : public InferenceEngine::IInferencePlugin { -public: - MOCK_METHOD2(LoadExeNetworkImpl, std::shared_ptr( - const InferenceEngine::CNNNetwork &, const std::map &)); - MOCK_METHOD1(AddExtension, void(InferenceEngine::IExtensionPtr ext_ptr)); - MOCK_METHOD1(SetConfig, void(const std::map &)); -}; +}; \ No newline at end of file diff --git a/src/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_iinference_plugin.hpp b/src/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_iinference_plugin.hpp index 32a76216c82f8e..55f95b435477ca 100644 --- a/src/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_iinference_plugin.hpp +++ b/src/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_iinference_plugin.hpp @@ -13,7 +13,7 @@ class MockIInferencePlugin : public InferenceEngine::IInferencePlugin { public: - MOCK_METHOD1(AddExtension, void(InferenceEngine::IExtensionPtr)); + MOCK_METHOD1(AddExtension, void(const std::shared_ptr&)); MOCK_METHOD2(LoadNetwork, std::shared_ptr( const InferenceEngine::CNNNetwork&, const std::map&)); MOCK_METHOD2(LoadNetwork, ov::SoPtr( @@ -36,7 +36,7 @@ class MockIInferencePlugin : public InferenceEngine::IInferencePlugin { MOCK_METHOD1(GetDefaultContext, std::shared_ptr(const InferenceEngine::ParamMap&)); MOCK_METHOD3(LoadNetwork, std::shared_ptr( const InferenceEngine::CNNNetwork&, const std::map&, - std::shared_ptr)); + const std::shared_ptr&)); MOCK_METHOD2(ImportNetwork, std::shared_ptr( std::istream&, const std::map&)); MOCK_METHOD3(ImportNetwork, std::shared_ptr( diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/CMakeLists.txt b/src/tests/ngraph_helpers/snippets_ngraph_functions/CMakeLists.txt index 26c5904487ad55..bbf4607296abc6 100644 --- a/src/tests/ngraph_helpers/snippets_ngraph_functions/CMakeLists.txt +++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/CMakeLists.txt @@ -6,7 +6,7 @@ set(TARGET_NAME snippetsNgraphFunctions) set(PUBLIC_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include") set(SNIPPETS_INCLUDES "$/include") -set(COMMON_TEST_UTILS_INCLUDES "$") +set(COMMON_TEST_UTILS_INCLUDES "$") addIeTarget( NAME ${TARGET_NAME} TYPE STATIC @@ -23,7 +23,7 @@ addIeTarget( LINK_LIBRARIES PUBLIC openvino::runtime::dev - commonTestUtils + common_test_utils inference_engine_snippets lptNgraphFunctions ADD_CPPLINT diff --git a/src/tests/test_utils/CMakeLists.txt b/src/tests/test_utils/CMakeLists.txt new file mode 100644 index 00000000000000..eedb45ccb62ee3 --- /dev/null +++ b/src/tests/test_utils/CMakeLists.txt @@ -0,0 +1,7 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +if(ENABLE_TESTS) + add_subdirectory(common_test_utils) +endif() diff --git a/src/tests/ie_test_utils/common_test_utils/CMakeLists.txt b/src/tests/test_utils/common_test_utils/CMakeLists.txt similarity index 83% rename from src/tests/ie_test_utils/common_test_utils/CMakeLists.txt rename to src/tests/test_utils/common_test_utils/CMakeLists.txt index 8686aaa72ec9cb..89b2ddaac46728 100644 --- a/src/tests/ie_test_utils/common_test_utils/CMakeLists.txt +++ b/src/tests/test_utils/common_test_utils/CMakeLists.txt @@ -9,7 +9,6 @@ function(add_common_utils ADD_TARGET_NAME) TYPE STATIC ROOT ${CMAKE_CURRENT_SOURCE_DIR} EXCLUDED_SOURCE_PATHS - ${CMAKE_CURRENT_SOURCE_DIR}/gtest ${CMAKE_CURRENT_SOURCE_DIR}/tests ADD_CPPLINT DEVELOPER_PACKAGE @@ -24,6 +23,10 @@ function(add_common_utils ADD_TARGET_NAME) openvino::runtime::dev PRIVATE openvino::util + INCLUDES + PUBLIC + "${CMAKE_CURRENT_SOURCE_DIR}/include" + ) # USE_STATIC_IE is passed @@ -33,7 +36,7 @@ function(add_common_utils ADD_TARGET_NAME) ie_faster_build(${ADD_TARGET_NAME} UNITY - PCH PRIVATE "precomp.hpp" + PCH PRIVATE "include/common_test_utils/precomp.hpp" ) # detecting regex support @@ -62,9 +65,15 @@ function(add_common_utils ADD_TARGET_NAME) target_compile_definitions(${ADD_TARGET_NAME} PUBLIC ${ARGN}) endfunction() +# Keep old name so that library can be used from VPU repo set(TARGET_NAME commonTestUtils) +set(NEW_TARGET_NAME common_test_utils) add_common_utils(${TARGET_NAME}) add_common_utils(${TARGET_NAME}_s USE_STATIC_IE) +# Add new names to use them from openvino repo +add_library(${NEW_TARGET_NAME} ALIAS ${TARGET_NAME}) +add_library(${NEW_TARGET_NAME}_s ALIAS ${TARGET_NAME}_s) + add_subdirectory(tests) diff --git a/src/tests/ie_test_utils/common_test_utils/common_utils.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/common_utils.hpp similarity index 100% rename from src/tests/ie_test_utils/common_test_utils/common_utils.hpp rename to src/tests/test_utils/common_test_utils/include/common_test_utils/common_utils.hpp diff --git a/src/tests/ie_test_utils/common_test_utils/data_utils.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/data_utils.hpp similarity index 100% rename from src/tests/ie_test_utils/common_test_utils/data_utils.hpp rename to src/tests/test_utils/common_test_utils/include/common_test_utils/data_utils.hpp diff --git a/src/tests/ie_test_utils/common_test_utils/file_utils.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/file_utils.hpp similarity index 97% rename from src/tests/ie_test_utils/common_test_utils/file_utils.hpp rename to src/tests/test_utils/common_test_utils/include/common_test_utils/file_utils.hpp index 8149a31bf3189b..92f55e77c325f7 100644 --- a/src/tests/ie_test_utils/common_test_utils/file_utils.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/file_utils.hpp @@ -10,9 +10,9 @@ #include #include -#include "test_constants.hpp" -#include "w_dirent.h" -#include "common_utils.hpp" +#include "common_test_utils/test_constants.hpp" +#include "common_test_utils/w_dirent.h" +#include "common_test_utils/common_utils.hpp" #ifdef _WIN32 #include @@ -247,7 +247,7 @@ inline std::vector splitStringByDelimiter(std::string paths, const return splitPath; } -inline std::string getModelFromTestModelZoo(const std::string& relModelPath); +std::string getModelFromTestModelZoo(const std::string& relModelPath); inline std::vector readListFiles(const std::vector& filePaths) { std::vector res; diff --git a/src/tests/ie_test_utils/common_test_utils/graph_comparator.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/graph_comparator.hpp similarity index 100% rename from src/tests/ie_test_utils/common_test_utils/graph_comparator.hpp rename to src/tests/test_utils/common_test_utils/include/common_test_utils/graph_comparator.hpp diff --git a/src/tests/ie_test_utils/common_test_utils/ngraph_test_utils.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/ngraph_test_utils.hpp similarity index 96% rename from src/tests/ie_test_utils/common_test_utils/ngraph_test_utils.hpp rename to src/tests/test_utils/common_test_utils/include/common_test_utils/ngraph_test_utils.hpp index 951e0059c30d54..3ef0dc1d729bec 100644 --- a/src/tests/ie_test_utils/common_test_utils/ngraph_test_utils.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/ngraph_test_utils.hpp @@ -8,12 +8,12 @@ #include #include -#include "graph_comparator.hpp" +#include "common_test_utils/graph_comparator.hpp" #include "openvino/core/dimension.hpp" #include "openvino/core/model.hpp" #include "openvino/pass/manager.hpp" #include "openvino/pass/pass.hpp" -#include "test_common.hpp" +#include "common_test_utils/test_common.hpp" #include "transformations/init_node_info.hpp" #define DYN ov::Dimension::dynamic() diff --git a/src/tests/ie_test_utils/common_test_utils/ov_tensor_utils.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_tensor_utils.hpp similarity index 100% rename from src/tests/ie_test_utils/common_test_utils/ov_tensor_utils.hpp rename to src/tests/test_utils/common_test_utils/include/common_test_utils/ov_tensor_utils.hpp diff --git a/src/tests/ie_test_utils/common_test_utils/precomp.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/precomp.hpp similarity index 100% rename from src/tests/ie_test_utils/common_test_utils/precomp.hpp rename to src/tests/test_utils/common_test_utils/include/common_test_utils/precomp.hpp diff --git a/src/tests/ie_test_utils/common_test_utils/test_assertions.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_assertions.hpp similarity index 100% rename from src/tests/ie_test_utils/common_test_utils/test_assertions.hpp rename to src/tests/test_utils/common_test_utils/include/common_test_utils/test_assertions.hpp diff --git a/src/tests/ie_test_utils/common_test_utils/test_common.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_common.hpp similarity index 90% rename from src/tests/ie_test_utils/common_test_utils/test_common.hpp rename to src/tests/test_utils/common_test_utils/include/common_test_utils/test_common.hpp index 872bb339766ddd..5663be01db5553 100644 --- a/src/tests/ie_test_utils/common_test_utils/test_common.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_common.hpp @@ -7,7 +7,7 @@ #include #include -#include "test_assertions.hpp" +#include "common_test_utils/test_assertions.hpp" namespace CommonTestUtils { diff --git a/src/tests/ie_test_utils/common_test_utils/test_constants.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_constants.hpp similarity index 100% rename from src/tests/ie_test_utils/common_test_utils/test_constants.hpp rename to src/tests/test_utils/common_test_utils/include/common_test_utils/test_constants.hpp diff --git a/src/tests/ie_test_utils/common_test_utils/unicode_utils.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/unicode_utils.hpp similarity index 98% rename from src/tests/ie_test_utils/common_test_utils/unicode_utils.hpp rename to src/tests/test_utils/common_test_utils/include/common_test_utils/unicode_utils.hpp index 3ba071ca060beb..1167884955de9c 100644 --- a/src/tests/ie_test_utils/common_test_utils/unicode_utils.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/unicode_utils.hpp @@ -10,8 +10,8 @@ #include #include -#include "common_utils.hpp" -#include "w_dirent.h" +#include "common_test_utils/common_utils.hpp" +#include "common_test_utils/w_dirent.h" #ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT diff --git a/src/tests/ie_test_utils/common_test_utils/w_dirent.h b/src/tests/test_utils/common_test_utils/include/common_test_utils/w_dirent.h similarity index 100% rename from src/tests/ie_test_utils/common_test_utils/w_dirent.h rename to src/tests/test_utils/common_test_utils/include/common_test_utils/w_dirent.h diff --git a/src/tests/ie_test_utils/common_test_utils/common_utils.cpp b/src/tests/test_utils/common_test_utils/src/common_utils.cpp similarity index 100% rename from src/tests/ie_test_utils/common_test_utils/common_utils.cpp rename to src/tests/test_utils/common_test_utils/src/common_utils.cpp diff --git a/src/tests/ie_test_utils/common_test_utils/data_utils.cpp b/src/tests/test_utils/common_test_utils/src/data_utils.cpp similarity index 100% rename from src/tests/ie_test_utils/common_test_utils/data_utils.cpp rename to src/tests/test_utils/common_test_utils/src/data_utils.cpp diff --git a/src/tests/ie_test_utils/common_test_utils/file_utils.cpp b/src/tests/test_utils/common_test_utils/src/file_utils.cpp similarity index 100% rename from src/tests/ie_test_utils/common_test_utils/file_utils.cpp rename to src/tests/test_utils/common_test_utils/src/file_utils.cpp diff --git a/src/tests/ie_test_utils/common_test_utils/graph_comparator.cpp b/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp similarity index 100% rename from src/tests/ie_test_utils/common_test_utils/graph_comparator.cpp rename to src/tests/test_utils/common_test_utils/src/graph_comparator.cpp diff --git a/src/tests/ie_test_utils/common_test_utils/ngraph_test_utils.cpp b/src/tests/test_utils/common_test_utils/src/ngraph_test_utils.cpp similarity index 98% rename from src/tests/ie_test_utils/common_test_utils/ngraph_test_utils.cpp rename to src/tests/test_utils/common_test_utils/src/ngraph_test_utils.cpp index b2827d4ad59317..54c721a2b6e763 100644 --- a/src/tests/ie_test_utils/common_test_utils/ngraph_test_utils.cpp +++ b/src/tests/test_utils/common_test_utils/src/ngraph_test_utils.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph_test_utils.hpp" +#include "common_test_utils/ngraph_test_utils.hpp" namespace ov { namespace pass { diff --git a/src/tests/ie_test_utils/common_test_utils/ov_tensor_utils.cpp b/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp similarity index 100% rename from src/tests/ie_test_utils/common_test_utils/ov_tensor_utils.cpp rename to src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp diff --git a/src/tests/ie_test_utils/common_test_utils/test_common.cpp b/src/tests/test_utils/common_test_utils/src/test_common.cpp similarity index 94% rename from src/tests/ie_test_utils/common_test_utils/test_common.cpp rename to src/tests/test_utils/common_test_utils/src/test_common.cpp index 6afc55520b9ca1..ce5ee43aebb3a6 100644 --- a/src/tests/ie_test_utils/common_test_utils/test_common.cpp +++ b/src/tests/test_utils/common_test_utils/src/test_common.cpp @@ -2,9 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "test_common.hpp" -#include "common_utils.hpp" -#include "test_constants.hpp" +#include "common_test_utils/test_common.hpp" +#include "common_test_utils/common_utils.hpp" +#include "common_test_utils/test_constants.hpp" #include diff --git a/src/tests/ie_test_utils/common_test_utils/test_constants.cpp b/src/tests/test_utils/common_test_utils/src/test_constants.cpp similarity index 100% rename from src/tests/ie_test_utils/common_test_utils/test_constants.cpp rename to src/tests/test_utils/common_test_utils/src/test_constants.cpp diff --git a/src/tests/ie_test_utils/common_test_utils/unicode_utils.cpp b/src/tests/test_utils/common_test_utils/src/unicode_utils.cpp similarity index 92% rename from src/tests/ie_test_utils/common_test_utils/unicode_utils.cpp rename to src/tests/test_utils/common_test_utils/src/unicode_utils.cpp index 1749d000c57a76..dd8951c61b9a38 100644 --- a/src/tests/ie_test_utils/common_test_utils/unicode_utils.cpp +++ b/src/tests/test_utils/common_test_utils/src/unicode_utils.cpp @@ -3,7 +3,7 @@ // #include -#include "unicode_utils.hpp" +#include "common_test_utils/unicode_utils.hpp" #ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT diff --git a/src/tests/ie_test_utils/common_test_utils/tests/CMakeLists.txt b/src/tests/test_utils/common_test_utils/tests/CMakeLists.txt similarity index 74% rename from src/tests/ie_test_utils/common_test_utils/tests/CMakeLists.txt rename to src/tests/test_utils/common_test_utils/tests/CMakeLists.txt index 57d8a6bfb81bcf..8ef78985db9271 100644 --- a/src/tests/ie_test_utils/common_test_utils/tests/CMakeLists.txt +++ b/src/tests/test_utils/common_test_utils/tests/CMakeLists.txt @@ -2,14 +2,13 @@ # SPDX-License-Identifier: Apache-2.0 # -set(TARGET_NAME commonUtilsTests) +set(TARGET_NAME ov_util_tests) addIeTargetTest( NAME ${TARGET_NAME} ROOT ${CMAKE_CURRENT_SOURCE_DIR} DEPENDENCIES - commonTestUtils LINK_LIBRARIES - commonTestUtils + common_test_utils ADD_CPPLINT ) diff --git a/src/tests/ie_test_utils/common_test_utils/tests/graph_comparator_tests.cpp b/src/tests/test_utils/common_test_utils/tests/graph_comparator_tests.cpp similarity index 100% rename from src/tests/ie_test_utils/common_test_utils/tests/graph_comparator_tests.cpp rename to src/tests/test_utils/common_test_utils/tests/graph_comparator_tests.cpp From 58de48a4910a8f8a4b6d55bee2935cfadf3bbd11 Mon Sep 17 00:00:00 2001 From: Artyom Anokhov Date: Mon, 10 Jul 2023 15:15:59 +0200 Subject: [PATCH 03/21] Temporary block nvidia plugin to enable CPack target first (#18456) * Temporary block nvidia plugin to enable CPack target first * rpm.cmake: Added the same block for nvidia --- cmake/packaging/debian.cmake | 2 ++ cmake/packaging/rpm.cmake | 2 ++ 2 files changed, 4 insertions(+) diff --git a/cmake/packaging/debian.cmake b/cmake/packaging/debian.cmake index baf82cdeb10cbb..49a1ad9fe08d36 100644 --- a/cmake/packaging/debian.cmake +++ b/cmake/packaging/debian.cmake @@ -53,6 +53,8 @@ macro(ov_cpack_settings) (NOT item MATCHES "^${OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE}_python.*" OR ENABLE_PYTHON_PACKAGING) AND # see ticket # 82605 NOT item STREQUAL "gna" AND + # temporary block nvidia + NOT item STREQUAL "nvidia" AND # don't install Intel OpenMP NOT item STREQUAL "omp" AND # even for case of system TBB we have installation rules for wheels packages diff --git a/cmake/packaging/rpm.cmake b/cmake/packaging/rpm.cmake index 83dfb397c5d4af..24ce1b2cb0696b 100644 --- a/cmake/packaging/rpm.cmake +++ b/cmake/packaging/rpm.cmake @@ -39,6 +39,8 @@ macro(ov_cpack_settings) (NOT item MATCHES "^${OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE}_python.*" OR ENABLE_PYTHON_PACKAGING) AND # see ticket # 82605 NOT item STREQUAL "gna" AND + # temporary block nvidia + NOT item STREQUAL "nvidia" AND # don't install Intel OpenMP NOT item STREQUAL "omp" AND # even for case of system TBB we have installation rules for wheels packages From 975ba2a92b19ac79108611c65bcca4311fdb2a63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Do=C5=82bniak?= Date: Mon, 10 Jul 2023 15:58:37 +0200 Subject: [PATCH 04/21] Optimization of ScatterElementsUpdate ref impl (#18313) Co-authored-by: Katarzyna Mitrus Co-authored-by: Michal Lukaszewski --- .../reference/scatter_elements_update.hpp | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/src/core/reference/include/ngraph/runtime/reference/scatter_elements_update.hpp b/src/core/reference/include/ngraph/runtime/reference/scatter_elements_update.hpp index f5507fb553a050..aaa4a75d2e940f 100644 --- a/src/core/reference/include/ngraph/runtime/reference/scatter_elements_update.hpp +++ b/src/core/reference/include/ngraph/runtime/reference/scatter_elements_update.hpp @@ -149,10 +149,7 @@ typename std::enable_if::value || std::is_class::va template typename std::enable_if::value, T>::type arithmetic_mean(const T accumulator, const int32_t N) { - const auto old_mode = std::fegetround(); - std::fesetround(FE_DOWNWARD); const T value = static_cast(std::nearbyint(static_cast(accumulator) / N)); - std::fesetround(old_mode); return value; } @@ -165,6 +162,25 @@ size_t normalize_index(const T idx, const size_t dim_value) { } } +template +struct RoundingDirectionGuard { + RoundingDirectionGuard() { + if (std::is_integral::value) { + m_original_mode = std::fegetround(); + std::fesetround(FE_DOWNWARD); + } + } + + ~RoundingDirectionGuard() { + if (std::is_integral::value) { + std::fesetround(m_original_mode); + } + } + +private: + decltype(std::fegetround()) m_original_mode; +}; + template void scatter_elem_update_with_reduction(const DataType* input_data, const IndicesType* indices, @@ -221,6 +237,9 @@ void scatter_elem_update_with_reduction(const DataType* input_data, } if (reduction_type == Reduction::MEAN) { + // this object will change the rounding mode only for integer types which is required to match torch + // upon destruction the previously used rounding mode will be restored + RoundingDirectionGuard rounding_guard; for (const auto& counter : mean_reduction_counters) { // include the initial value in the arithmetic mean divisor (if needed) const auto N = counter.second + static_cast(use_init_val); From 75696f4545ff7998f409700959e4f75e108a434d Mon Sep 17 00:00:00 2001 From: Pavel Durandin Date: Mon, 10 Jul 2023 19:32:38 +0400 Subject: [PATCH 05/21] [GPU] Fix DG2 units tests with null users (#18455) * [GPU] Fix DG2 units tests with null users * [GPU] Fix DG2 units tests with null users --- src/plugins/intel_gpu/src/graph/layout_optimizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 4cb42270a54fab..6b90e365cd1387 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -1196,7 +1196,7 @@ format layout_optimizer::get_expected_format(quantize_node const& node) { if (use_onednn_impls) { auto& user = node.get_users().front(); - if (user->get_preferred_input_fmt(user->get_dependency_index(node)) != format::any) { + if (user != nullptr && user->get_preferred_input_fmt(user->get_dependency_index(node)) != format::any) { expected = user->get_preferred_input_fmt(user->get_dependency_index(node)); } else { expected = format::any; From d829c8086d98a0a5ec624bd17aad8e738449ee02 Mon Sep 17 00:00:00 2001 From: Tatiana Savina Date: Mon, 10 Jul 2023 18:53:41 +0200 Subject: [PATCH 06/21] basiq quantization flow changes (#18439) --- docs/optimization_guide/nncf/ptq/basic_quantization_flow.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/optimization_guide/nncf/ptq/basic_quantization_flow.md b/docs/optimization_guide/nncf/ptq/basic_quantization_flow.md index 4399b6cdb8849b..5fc93dcc34405e 100644 --- a/docs/optimization_guide/nncf/ptq/basic_quantization_flow.md +++ b/docs/optimization_guide/nncf/ptq/basic_quantization_flow.md @@ -69,7 +69,8 @@ If there is no framework dataset object, you can create your own entity that imp Quantize a Model ##################### -Once the dataset is ready and the model object is instantiated, you can apply 8-bit quantization to it: +Once the dataset is ready and the model object is instantiated, you can apply 8-bit quantization to it. +See the `example section <#examples-of-how-to-apply-nncf-post-training-quantization>`__ at the end of this document for examples for each framework. .. tab-set:: @@ -102,7 +103,8 @@ Once the dataset is ready and the model object is instantiated, you can apply 8- :fragment: [quantization] -After that the model can be converted into the OpenVINO Intermediate Representation (IR) if needed, compiled and run with OpenVINO: +After that the model can be converted into the OpenVINO Intermediate Representation (IR) if needed, compiled and run with OpenVINO. +If you have not already installed OpenVINO developer tools, install it with ``pip install openvino-dev``. .. tab-set:: From 3278bc1566fedfeb4c56d78d61add1d0d935d7c4 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Mon, 10 Jul 2023 22:38:00 +0400 Subject: [PATCH 07/21] Fixed build without proxy (#18442) --- src/inference/src/dev/core_impl.cpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index e92d744f5f9bbd..085728709d7008 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -49,7 +49,9 @@ ov::ICore::~ICore() = default; namespace { +#ifdef PROXY_PLUGIN_ENABLED static constexpr const char* internal_plugin_suffix = "_ov_internal"; +#endif template void allowNotImplemented(F&& f) { @@ -387,13 +389,11 @@ void ov::CoreImpl::register_plugin_in_registry_unsafe(const std::string& device_ }; #endif - auto&& config = desc.defaultConfig; std::string dev_name = device_name; +#ifdef PROXY_PLUGIN_ENABLED + auto&& config = desc.defaultConfig; // Register proxy plugin if (config.find(ov::proxy::configuration::alias.name()) != config.end()) { -#ifndef PROXY_PLUGIN_ENABLED - OPENVINO_THROW("Cannot register plugin under the proxy. Proxy plugin is disabled."); -#else // Create proxy plugin for alias auto alias = config.at(ov::proxy::configuration::alias.name()).as(); if (alias == device_name) @@ -415,18 +415,13 @@ void ov::CoreImpl::register_plugin_in_registry_unsafe(const std::string& device_ " plugin with the same name already registered!"); fill_config(plugin.defaultConfig, config, dev_name); } -#endif } else if (config.find(ov::proxy::configuration::fallback.name()) != config.end()) { -#ifndef PROXY_PLUGIN_ENABLED - OPENVINO_THROW("Cannot register plugin under the proxy. Proxy plugin is disabled."); -#else // Fallback without alias means that we need to replace original plugin to proxy dev_name += internal_plugin_suffix; PluginDescriptor desc = PluginDescriptor(ov::proxy::create_plugin); fill_config(desc.defaultConfig, config, dev_name); pluginRegistry[device_name] = desc; add_mutex(device_name); -#endif } const static std::vector proxy_conf_properties = {ov::proxy::configuration::alias, @@ -440,6 +435,7 @@ void ov::CoreImpl::register_plugin_in_registry_unsafe(const std::string& device_ desc.defaultConfig.erase(it); } } +#endif pluginRegistry[dev_name] = desc; add_mutex(dev_name); From ad7caf5d7667882bfa08b67c5859408a55279633 Mon Sep 17 00:00:00 2001 From: Evgeny Kotov Date: Mon, 10 Jul 2023 21:18:46 +0200 Subject: [PATCH 08/21] Pruning add Pad12 support (#18363) * fix transformation * add unit test --------- Co-authored-by: Ivan Tikhonov --- .../src/pruning/propagate_masks.cpp | 3 +- .../offline_transformations/pruning_test.cpp | 95 +++++++++++++++++++ 2 files changed, 97 insertions(+), 1 deletion(-) diff --git a/src/common/offline_transformations/src/pruning/propagate_masks.cpp b/src/common/offline_transformations/src/pruning/propagate_masks.cpp index 20bf72849f0102..2ce6b688134bea 100644 --- a/src/common/offline_transformations/src/pruning/propagate_masks.cpp +++ b/src/common/offline_transformations/src/pruning/propagate_masks.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include "mask_attribute.hpp" #include "openvino/util/log.hpp" @@ -875,7 +876,7 @@ class ngraph::pass::mask_propagation::PassThrough : public MatcherPass { opset10::MaxPool, opset10::ROIPooling, opset10::PSROIPooling, - opset10::Pad, + ov::op::util::PadBase, opset10::MVN, op::v0::Gelu, opset10::Gelu>(); diff --git a/src/common/transformations/tests/offline_transformations/pruning_test.cpp b/src/common/transformations/tests/offline_transformations/pruning_test.cpp index dc7468c1f16f1c..a173d91deac83f 100644 --- a/src/common/transformations/tests/offline_transformations/pruning_test.cpp +++ b/src/common/transformations/tests/offline_transformations/pruning_test.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -21,12 +22,14 @@ #include #include "common_test_utils/ngraph_test_utils.hpp" +#include "openvino/opsets/opset12.hpp" #define VISUALIZE_TESTS_TREE false #define VISUALIZE_TREE_ROOT "/tmp/" using namespace testing; using namespace ngraph; +using namespace ov::opset12; void compare_masks(const Mask& mask, const Mask& ref_mask) { ASSERT_EQ(mask.size(), ref_mask.size()); @@ -600,6 +603,98 @@ TEST_F(TransformationTestsF, PropagateMaskPassThrough) { comparator.enable(FunctionsComparator::CmpValues::ACCURACY); } +TEST_F(TransformationTestsF, NegativePad12PropagateMaskPassThrough) { + Shape input_shape{1, 3, 64, 64}; + Shape weights_shape{8, 3, 3, 3}; + Shape weight_shape2{3, 8, 3, 3}; + auto input = std::make_shared(element::f32, input_shape); + input->set_friendly_name("input"); + auto weights_const_1 = create_constant_with_zeros(weights_shape, {{1, 2, 3}, {}, {}, {}}); + weights_const_1.get_node_shared_ptr()->set_friendly_name("weights_1"); + + auto conv_1 = std::make_shared(input, + weights_const_1, + Strides(2, 1), + CoordinateDiff(2, 0), + CoordinateDiff(2, 0), + Strides(2, 1)); + conv_1->set_friendly_name("conv_1"); + + // Adding a couple of PassThrough operations + auto relu = std::make_shared(conv_1); + relu->set_friendly_name("relu"); + + auto clamp = std::make_shared(relu, 0, 6); + clamp->set_friendly_name("clamp"); + + auto pads_begin = Constant::create(element::i32, Shape{4}, {0, 0, 1, -1}); + auto pads_end = Constant::create(element::i32, Shape{4}, {0, 0, 2, -2}); + auto pad = std::make_shared(clamp, pads_begin, pads_end, op::PadMode::CONSTANT); + auto max_pool = std::make_shared(pad, Strides{1, 1}, Strides{1, 1}, Shape{0, 0}, Shape{1, 1}, Shape{4, 4}); + max_pool->set_friendly_name("max_pool"); + + auto weights2 = Constant::create(element::f32, weight_shape2, {0}); + auto conv2 = std::make_shared(max_pool, + weights2, + Strides(2, 1), + CoordinateDiff(2, 0), + CoordinateDiff(2, 0), + Strides(2, 1)); + function = std::make_shared(NodeVector{conv2}, ParameterVector{input}); + { + auto input = std::make_shared(element::f32, input_shape); + auto weights_const_1 = + create_constant_with_zeros({weights_shape[0] - 3, weights_shape[1], weights_shape[2], weights_shape[3]}, + {{}, {}, {}, {}}); + weights_const_1.get_node_shared_ptr()->set_friendly_name("weights_1"); + + auto conv_1 = std::make_shared(input, + weights_const_1, + Strides(2, 1), + CoordinateDiff(2, 0), + CoordinateDiff(2, 0), + Strides(2, 1)); + // Adding a couple of PassThrough operations + auto relu = std::make_shared(conv_1); + + auto clamp = std::make_shared(relu, 0, 6); + + auto pads_begin = Constant::create(element::i32, Shape{4}, {0, 0, 1, -1}); + auto pads_end = Constant::create(element::i32, Shape{4}, {0, 0, 2, -2}); + auto pad = std::make_shared(clamp, pads_begin, pads_end, op::PadMode::CONSTANT); + auto max_pool = + std::make_shared(pad, Strides{1, 1}, Strides{1, 1}, Shape{0, 0}, Shape{1, 1}, Shape{4, 4}); + + auto weights2 = Constant::create(element::f32, + {weight_shape2[0], weight_shape2[1] - 3, weight_shape2[2], weight_shape2[3]}, + {0}); + auto conv2 = std::make_shared(max_pool, + weights2, + Strides(2, 1), + CoordinateDiff(2, 0), + CoordinateDiff(2, 0), + Strides(2, 1)); + function_ref = std::make_shared(NodeVector{conv2}, ParameterVector{input}); + } + if (VISUALIZE_TESTS_TREE) + ngraph::pass::VisualizeTree(std::string(VISUALIZE_TREE_ROOT) + "PropagateMaskPassThrough.svg") + .run_on_model(function); + { + pass::Manager m; + m.register_pass(); + m.register_pass(); + m.run_passes(function); + } + compare_masks(*getMask(weights_const_1.get_node_shared_ptr()->output(0)), Mask({{1, 2, 3}, {}, {}, {}})); + compare_masks(*getMask(conv_1->output(0)), Mask({{}, {1, 2, 3}, {}, {}})); + compare_masks(*getMask(relu->output(0)), Mask({{}, {1, 2, 3}, {}, {}})); + compare_masks(*getMask(clamp->output(0)), Mask({{}, {1, 2, 3}, {}, {}})); + compare_masks(*getMask(max_pool->output(0)), Mask({{}, {1, 2, 3}, {}, {}})); + + manager.register_pass(); + comparator.enable(FunctionsComparator::CmpValues::ACCURACY); +} + TEST_F(TransformationTestsF, PropagateMasksHardDependencies) { Shape input_shape{1, 3, 3, 3}; From ff4ed54bba9d496503645e7dd0e7379336c6b0ae Mon Sep 17 00:00:00 2001 From: Paul Youngsoo Ahn Date: Tue, 11 Jul 2023 14:54:56 +0900 Subject: [PATCH 09/21] [GPU] Fix kernel selecting issue on dynamic model with static input shape (#18448) * [GPU] disable blocked format for dynamic shape model(#18448) * [GPU] Return default format for output layout rank when user node is reshape in get_preferred_format - Rollback code to disable blocked formmat for dynamic shape * [GPU] Add unit test checking has_reshape_user * [GPU] remove redundant comments --- .../intel_gpu/src/graph/layout_optimizer.cpp | 17 ++++- .../tests/unit/passes/reorder_inputs_test.cpp | 74 +++++++++++++++++++ 2 files changed, 89 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 6b90e365cd1387..397a1eec5fd8d4 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -1633,11 +1633,24 @@ format layout_optimizer::get_preferred_format(program_node& node) { bool allow_new_shape_infer = node.get_program().get_config().get_property(ov::intel_gpu::allow_new_shape_infer); if (allow_new_shape_infer) { + // Let reorder_input pass to check input format instead of output_format in forward investigation, vice versa + auto out_lay_rank = node.get_output_layout(false).get_rank(); + auto has_reshape_user = [&](const program_node& node) -> bool { + for (auto& user_node : node.get_users()) { + if (user_node->is_type()) + return true; + } + return false; + }; + + // Return default format for output layout rank when user node is reshape + // to add reorder in front of reshape in reorder_input stage instead of handle_reshpae stage. + if (has_reshape_user(node)) + return format::get_default_format(out_lay_rank); + if (node.is_type()) return format::get_default_format(node.get_input_layout(0).get_rank()); - // Let reorder_input pass to check input format instead of output_format in forward investigation, vice versa - auto out_lay_rank = node.get_output_layout(false).get_rank(); auto dep_size = node.get_dependencies().size(); for (size_t i = 0; i < dep_size; i++) { auto in_lay_rank = node.get_input_layout(i).get_rank(); diff --git a/src/plugins/intel_gpu/tests/unit/passes/reorder_inputs_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/reorder_inputs_test.cpp index 7d9e88de18445e..15b91748d4eb18 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/reorder_inputs_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/reorder_inputs_test.cpp @@ -350,3 +350,77 @@ TEST(reorder_inputs, no_add_reorder_infront_of_reshape) { // ASSERT_EQ(out_mem_ptr[7], 44.f); // } //} + +#ifdef ENABLE_ONEDNN_FOR_GPU +TEST(reorder_inputs, has_reshape_user) { + auto& engine = get_test_engine(); + + if (!engine.get_device_info().supports_immad) + return; + + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { 1, 1, 4, 4, 4 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfzyx, { 1, 1, 2, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1, 1 } }); + + set_values(input, { + 1.0f, 0.0f, 1.0f, 0.0f, + 1.0f, 1.0f, 3.0f, 1.0f, + 1.0f, 1.0f, 0.0f, 2.0f, + 0.0f, 2.0f, 1.0f, 1.0f, + 1.0f, 0.0f, 0.0f, 1.0f, + 2.0f, 0.0f, 1.0f, 2.0f, + 3.0f, 1.0f, 1.0f, 1.0f, + 0.0f, 0.0f, 3.0f, 1.0f, + 2.0f, 0.0f, 1.0f, 1.0f, + 3.0f, 3.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 1.0f, 0.0f, + 3.0f, 2.0f, 1.0f, 2.0f, + 1.0f, 0.0f, 2.0f, 0.0f, + 1.0f, 0.0f, 3.0f, 3.0f, + 3.0f, 1.0f, 0.0f, 0.0f, + 1.0f, 1.0f, 0.0f, 2.0f, + }); + + set_values(weights, { + 0.0f, 1.0f, + 0.0f, 0.0f, + 2.0f, 1.0f, + 0.0f, 0.0f, + }); + + set_values(biases, { 1.0f }); + + topology topology; + topology.add(input_layout("input", input->get_layout())); + topology.add(data("weights", weights)), + topology.add(data("biases", biases)), + topology.add(convolution("conv", input_info("input"), "weights", "biases", 1, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, false)); + topology.add(reshape("reshape1", input_info("conv"), false, { 1, 1, 3, 3, 3 }, { 1, 1, 3, 3, 3 })); + topology.add(permute("permute", input_info("reshape1"), { 0, 1, 2, 3, 4 })); + topology.add(reshape("reshape2", input_info("permute"), false, { 1, 3, 3, 3 }, { 1, 3, 3, 3 })); + topology.add(reorder("output", input_info("reshape2"), format::bfyx, data_types::f32)); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); + + network.set_input_data("input", input); + + primitive_id out_id = "output"; + auto output = network.execute(); + auto out_l = network.get_output_layout(out_id); + auto out_mem = output.at(out_id).get_memory(); + cldnn::mem_lock output_ptr(out_mem, get_test_stream()); + + std::vector ref_output = { + 3, 2, 2, 6, 5, 6, 9, 4, 6, + 5, 2, 5, 10, 9, 5, 7, 5, 4, + 3, 4, 6, 6, 5, 10, 9, 4, 1 + }; + + for (size_t x = 0; x < out_l.count(); ++x) { + ASSERT_EQ(static_cast(ref_output[x]), output_ptr[x]); + } +} +#endif From b1e250757e40d87f6c37230240d4dde3ba190a18 Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Tue, 11 Jul 2023 09:12:25 +0300 Subject: [PATCH 10/21] [IE TESTS][CONFORMANCE] Change data_generation initialization to avoid incorrect value (#18193) * [IE TESTS][CONFORMANCE] Fix data generation * fix cpu tests * CPP Lint * Update ranges.hpp * change start_from and range according typo * Update deformable_convolution.cpp * clenup * Update activation.cpp * Update deformable_convolution.cpp * Update grid_sample.cpp * Fix tests -> change start_from type --- .../shared_test_classes/base/utils/ranges.hpp | 34 +++++++++++++++++-- .../src/base/utils/generate_inputs.cpp | 4 +++ .../include/common_test_utils/data_utils.hpp | 10 ++++-- .../common_test_utils/ov_tensor_utils.hpp | 2 +- .../common_test_utils/src/ov_tensor_utils.cpp | 2 +- 5 files changed, 45 insertions(+), 7 deletions(-) diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/ranges.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/ranges.hpp index 8245b72fd080c6..32d585c449b89c 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/ranges.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/ranges.hpp @@ -27,14 +27,42 @@ namespace ov { namespace test { namespace utils { +// todo: remove w/a to generate correct constant data (replace parameter to const) in conformance with defined range +struct ConstRanges { + static double max, min; + static bool is_defined; + + static void set(double _min, double _max) { + min = _min; + max = _max; + is_defined = true; + } + + static void reset() { + min = std::numeric_limits::max(); + max = std::numeric_limits::min(); + is_defined = false; + } +}; + struct InputGenerateData { - int32_t start_from; + double_t start_from; uint32_t range; int32_t resolution; int seed; - InputGenerateData(int32_t _start_from = 0, uint32_t _range = 10, int32_t _resolution = 1, int _seed = 1) - : start_from(_start_from), range(_range), resolution(_resolution), seed(_seed) {} + InputGenerateData(double_t _start_from = 0, uint32_t _range = 10, int32_t _resolution = 1, int _seed = 1) + : start_from(_start_from), range(_range), resolution(_resolution), seed(_seed) { + if (ConstRanges::is_defined) { + auto min_orig = start_from; + auto max_orig = start_from + range * resolution; + auto min_ref = ConstRanges::min; + auto max_ref = ConstRanges::max; + if (min_orig < min_ref || min_orig == 0) + start_from = min_ref; + range = (max_orig > max_ref || max_orig == 10 ? max_ref : max_orig - start_from) - start_from; + } + } }; static std::map>> inputRanges = { diff --git a/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp b/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp index ad05a21f01dd6d..099c92aa61e779 100644 --- a/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp +++ b/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp @@ -18,6 +18,10 @@ namespace ov { namespace test { namespace utils { +double ConstRanges::max = std::numeric_limits::min(); +double ConstRanges::min = std::numeric_limits::max(); +bool ConstRanges::is_defined = false; + namespace { /** diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/data_utils.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/data_utils.hpp index 78a29d0740e1bd..d8005b00bef182 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/data_utils.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/data_utils.hpp @@ -193,8 +193,15 @@ fill_data_roi(ov::runtime::Tensor& tensor, const uint32_t range, const int heigh template void inline -fill_data_random(T *pointer, std::size_t size, const uint32_t range = 10, int32_t start_from = 0, const int32_t k = 1, +fill_data_random(T *pointer, std::size_t size, const uint32_t range = 10, double_t start_from = 0, const int32_t k = 1, const int seed = 1) { + if (range == 0) { + for (std::size_t i = 0; i < size; i++) { + pointer[i] = static_cast(start_from); + } + return; + } + testing::internal::Random random(seed); const uint32_t k_range = k * range; // range with respect to k random.Generate(k_range); @@ -202,7 +209,6 @@ fill_data_random(T *pointer, std::size_t size, const uint32_t range = 10, int32_ if (start_from < 0 && !std::is_signed::value) { start_from = 0; } - for (std::size_t i = 0; i < size; i++) { pointer[i] = static_cast(start_from + static_cast(random.Generate(k_range)) / k); } diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_tensor_utils.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_tensor_utils.hpp index 189cc4a3af1a14..0d70612bbd585a 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_tensor_utils.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_tensor_utils.hpp @@ -13,7 +13,7 @@ ov::Tensor create_and_fill_tensor( const ov::element::Type element_type, const ov::Shape &shape, const uint32_t range = 10, - const int32_t start_from = 0, + const double_t start_from = 0, const int32_t resolution = 1, const int seed = 1); diff --git a/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp b/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp index 37da4fc88ead90..5bd3e2aff87b61 100644 --- a/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp +++ b/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp @@ -16,7 +16,7 @@ ov::Tensor create_and_fill_tensor( const ov::element::Type element_type, const ov::Shape& shape, const uint32_t range, - const int32_t start_from, + const double_t start_from, const int32_t resolution, const int seed) { auto tensor = ov::Tensor{element_type, shape}; From 2ea277fc7260738f5b9e2d0aae17ead5e951354e Mon Sep 17 00:00:00 2001 From: Maciej Smyk Date: Tue, 11 Jul 2023 09:44:08 +0200 Subject: [PATCH 11/21] Update installing-openvino-docker-linux.md (#18458) --- docs/install_guides/installing-openvino-docker-linux.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/install_guides/installing-openvino-docker-linux.md b/docs/install_guides/installing-openvino-docker-linux.md index 2a3db6546ff7a2..875b1f69f5a6ab 100644 --- a/docs/install_guides/installing-openvino-docker-linux.md +++ b/docs/install_guides/installing-openvino-docker-linux.md @@ -36,6 +36,7 @@ More information about Docker CI for Intel® Distribution of OpenVINOâ„¢ toolset * `Docker CI framework for Intel® Distribution of OpenVINOâ„¢ toolkit `__ * `Get Started with DockerHub CI for Intel® Distribution of OpenVINOâ„¢ toolkit `__ +* `Using OpenVINOâ„¢ Toolkit containers with GPU accelerators `__ * `Dockerfiles with Intel® Distribution of OpenVINOâ„¢ toolkit `__ @endsphinxdirective From c7388994088371d6d48dbc3fc1de66284ee4ff59 Mon Sep 17 00:00:00 2001 From: Andrey Kashchikhin Date: Tue, 11 Jul 2023 08:54:33 +0100 Subject: [PATCH 12/21] [CI] Introduce Linux pipeline in GitHub Actions (#18355) * skip validation, always include cmake * rm unconditional inclusion of zlib * always include zlib * correct path for builtin_extensions * find builtin extensions recursively * include test_utils always * add logs for build_samples * skip tests with dir accessing * remove platform specification for samples build * do not pkgconfig on win, use cmake generic on linux for samples * rm make * fix num_threads * use bare numbers * skip failing * skip test_lrn_basic * find zlib * print error of downloading * add linux pipeline * do not save cache from PRs; add skipif only in GHA * rm caching * evaluate against a string * do not include test_utils to the install dir --- .github/workflows/linux.yml | 714 ++++++++++++++++++ .../py_frontend_tests/test_torch_frontend.py | 2 +- .../tensorflow_tests/test_tf_LRN.py | 2 + .../tensorflow_tests/test_tf_SplitV.py | 2 + .../common/samples_common_test_class.py | 3 +- .../mo/unit_tests/mo/frontend_ngraph_test.py | 4 +- .../mo/utils/simple_proto_parser_test.py | 2 +- .../unit_tests/ovc/utils/cli_parser_test.py | 4 +- 8 files changed, 726 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/linux.yml diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml new file mode 100644 index 00000000000000..586aed18743005 --- /dev/null +++ b/.github/workflows/linux.yml @@ -0,0 +1,714 @@ +name: Tests on Linux (Ubuntu 22.04, Python 3.11) +on: + workflow_dispatch: + pull_request: + push: + branches: + - master + +jobs: + Build: + defaults: + run: + shell: bash + runs-on: ubuntu-latest-8-cores + env: + CMAKE_BUILD_TYPE: 'Release' + CMAKE_GENERATOR: 'Ninja' + CMAKE_CXX_COMPILER_LAUNCHER: ccache + CMAKE_C_COMPILER_LAUNCHER: ccache + OPENVINO_REPO: ${{ github.workspace }}/openvino + OPENVINO_CONTRIB_REPO: ${{ github.workspace }}/openvino_contrib + INSTALL_DIR: ${{ github.workspace }}/install + INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests + SAMPLES_INSTALL_DIR: ${{ github.workspace }}/install/samples + LAYER_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/layer_tests + BUILD_DIR: ${{ github.workspace }}/build + DATA_PATH: ${{ github.workspace }}/testdata + MODELS_PATH: ${{ github.workspace }}/testdata + OV_TEMP: ${{ github.workspace }}/openvino_temp + PYTHON_STATIC_ARGS: -m "not dynamic_library and not template_plugin" + steps: + - name: Clone OpenVINO + uses: actions/checkout@v3 + with: + path: 'openvino' + submodules: 'recursive' + + - name: Clone OpenVINO Contrib + uses: actions/checkout@v3 + with: + repository: 'openvinotoolkit/openvino_contrib' + path: 'openvino_contrib' + submodules: 'recursive' + + - name: Clone testdata for C API tests + uses: actions/checkout@v3 + with: + repository: 'openvinotoolkit/testdata' + path: 'testdata' + submodules: 'recursive' + lfs: 'true' + + # + # Dependencies + # + + - name: Install build dependencies + run: | + sudo -E ${{ env.OPENVINO_REPO }}/install_build_dependencies.sh + sudo -E apt update + sudo -E apt --assume-yes install openjdk-11-jdk libbz2-dev clang unzip libpugixml-dev libtbb-dev intel-opencl-icd ocl-icd-opencl-dev opencl-headers + + wget https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-linux.zip + unzip ninja-linux.zip + sudo cp -v ninja /usr/local/bin/ + + - uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install python dependencies + run: | + # For Python API + python3 -m pip install --upgrade pip + python3 -m pip install Scons + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/src/bindings/python/wheel/requirements-dev.txt + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/src/bindings/python/requirements.txt + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/src/bindings/python/requirements_test.txt + + # For running Python API tests + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/src/bindings/python/src/compatibility/openvino/requirements-dev.txt + + # For running ONNX frontend unit tests + python3 -m pip install --force-reinstall -r ${{ env.OPENVINO_REPO }}/src/frontends/onnx/tests/requirements.txt + + # For running TensorFlow frontend unit tests + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/src/frontends/tensorflow/tests/requirements.txt + + # For running Paddle frontend unit tests + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/src/frontends/paddle/tests/requirements.txt + + - name: Install MO dependencies + run: | + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/tools/mo/requirements_mxnet.txt + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/tools/mo/requirements_caffe.txt + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/tools/mo/requirements_kaldi.txt + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/tools/mo/requirements_onnx.txt + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/tools/mo/requirements_tf2.txt + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/tools/mo/requirements_dev.txt + + # + # Build + # + + - name: Setup ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + max-size: "2000M" + # Should save cache only if run in the master branch of the base repo + # github.ref_name is 'ref/PR_#' in case of the PR, and 'branch_name' when executed on push + save: ${{ github.ref_name == 'master' && 'true' || 'false' }} + verbose: 2 + key: ${{ github.job }}-linux + restore-keys: | + ${{ github.job }}-linux + + - name: Get tools versions + run: | + ninja --version || exit 1 + ccache --version || exit 1 + python3 --version || exit 1 + cmake --version || exit 1 + + - name: Get number of CPU cores + uses: SimenB/github-actions-cpu-cores@v1 + id: cpu-cores + + - name: CMake configure + run: | + cmake \ + -GNinja \ + -DENABLE_CPPLINT=OFF \ + -DENABLE_NCC_STYLE=OFF \ + -DENABLE_TESTS=ON \ + -DENABLE_PYTHON=ON \ + -DCMAKE_VERBOSE_MAKEFILE=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_SHARED_LIBS=ON \ + -DENABLE_ONEDNN_FOR_GPU=OFF \ + -DENABLE_OV_ONNX_FRONTEND=ON \ + -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \ + -DENABLE_STRICT_DEPENDENCIES=OFF \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_LINKER_LAUNCHER=ccache \ + -DCMAKE_C_LINKER_LAUNCHER=ccache \ + -DENABLE_SYSTEM_SNAPPY=ON \ + -DENABLE_SYSTEM_TBB=ON \ + -DBUILD_nvidia_plugin=OFF \ + -DENABLE_DEBUG_CAPS=ON \ + -DCUSTOM_OPERATIONS="calculate_grid;complex_mul;fft;grid_sample;sparse_conv;sparse_conv_transpose" \ + -DOPENVINO_EXTRA_MODULES=${{ env.OPENVINO_CONTRIB_REPO }}/modules \ + -S ${{ env.OPENVINO_REPO }} \ + -B ${{ env.BUILD_DIR }} + + - name: Clean ccache stats + run: ccache --zero-stats --show-config + + - name: Build + run: cmake --build ${{ env.BUILD_DIR }} --parallel ${{ steps.cpu-cores.outputs.count }} --config Release + + - name: Show ccache stats + run: ccache --show-stats + + - name: Cmake Layer Tests + run: cmake -GNinja -S ${{ env.OPENVINO_REPO }}/tests/layer_tests -B ${{ env.BUILD_DIR }}/layer_tests + + - name: Build Layer Tests + run: cmake --build ${{ env.BUILD_DIR }}/layer_tests --parallel --config Release + + - name: Install wheel packages + run: cmake -DCOMPONENT=python_wheels -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -P ${{ env.BUILD_DIR }}/cmake_install.cmake + + - name: Install Layer Tests + run: cmake -DCOMPONENT=tests -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -P ${{ env.BUILD_DIR }}/layer_tests/cmake_install.cmake + + - name: Install python wheels + run: python3 -m pip install openvino-dev --find-links=${{ env.INSTALL_DIR }}/tools + + - name: Install tests + run: cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -DCOMPONENT=tests -P ${{ env.BUILD_DIR }}/cmake_install.cmake + + - name: Install OpenVINO + run: cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -P ${{ env.BUILD_DIR }}/cmake_install.cmake + + - name: CMake Samples Tests + run: cmake -GNinja -S ${{ env.OPENVINO_REPO }}/tests/samples_tests -B ${{ env.BUILD_DIR }}/samples_tests + + - name: Build Samples Tests + run: cmake --build ${{ env.BUILD_DIR }}/samples_tests --config Release + + - name: Install Samples Tests + run: cmake -DCOMPONENT=tests -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -P ${{ env.BUILD_DIR }}/samples_tests/cmake_install.cmake + + - name: Pack Artifacts + run: | + pushd ${{ env.INSTALL_DIR }} + tar -czvf ${{ env.BUILD_DIR }}/openvino_package.tar.gz --exclude=tests * + popd + + pushd ${{ env.INSTALL_DIR }} + tar -czvf ${{ env.BUILD_DIR }}/openvino_tests.tar.gz tests/ + popd + + - name: Build cpp samples + run: ${{ env.SAMPLES_INSTALL_DIR }}/cpp/build_samples.sh -i ${{ env.INSTALL_DIR }} -b ${{ env.BUILD_DIR }}/cpp_samples + + - name: Build c samples + run: ${{ env.SAMPLES_INSTALL_DIR }}/c/build_samples.sh -i ${{ env.INSTALL_DIR }} -b ${{ env.BUILD_DIR }}/c_samples + + # + # Tests + # + + - name: Samples tests + run: | + python3 -m pip install --ignore-installed PyYAML -r ${{ env.INSTALL_TEST_DIR }}/smoke_tests/requirements.txt + export LD_LIBRARY_PATH=${{ env.IE_APP_PATH }}:$LD_LIBRARY_PATH + + source ${{ env.INSTALL_DIR }}/setupvars.sh + + python3 -m pytest -sv ${{ env.INSTALL_TEST_DIR }}/smoke_tests \ + --env_conf ${{ env.INSTALL_TEST_DIR }}/smoke_tests/env_config.yml \ + --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-SamplesSmokeTests.xml + env: + IE_APP_PATH: ${{ env.INSTALL_DIR }}/samples_bin + IE_APP_PYTHON_PATH: ${{ env.INSTALL_DIR }}/samples/python + SHARE: ${{ env.INSTALL_TEST_DIR }}/smoke_tests/samples_smoke_tests_data + WORKSPACE: ${{ env.INSTALL_DIR }} + + # Present in the "Build" job due to the fact that these tests require build directory + - name: ONNX frontend tests + if: ${{ always() }} + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_onnx_frontend_tests --gtest_print_time=1 --gtest_filter=-*IE_GPU*:*FrontEndLoadFromTest.testLoadFromTwoStreams*:*FrontEndLoadFromTest.testLoadFromTwoFiles* \ + --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ONNXFrontend.xml + + # + # Upload build artifacts + # + + - name: Upload openvino package + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: openvino_package + path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz + if-no-files-found: 'error' + + - name: Upload openvino tests package + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: openvino_tests + path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz + if-no-files-found: 'error' + + CXX_Unit_Tests: + needs: Build + defaults: + run: + shell: bash + runs-on: ubuntu-22.04 + env: + INSTALL_DIR: ${{ github.workspace }}/install + INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests + + steps: + - name: Create Directories + run: | + mkdir -p ${{ env.INSTALL_DIR }} ${{ env.INSTALL_TEST_DIR }} + + # + # Dependencies + # + + - name: Install dependencies + run: | + sudo -E apt update + sudo -E apt --assume-yes install openjdk-11-jdk libbz2-dev clang unzip libpugixml-dev libtbb-dev intel-opencl-icd ocl-icd-opencl-dev opencl-headers + + - name: Download OpenVINO package + uses: actions/download-artifact@v3 + with: + name: openvino_package + path: ${{ env.INSTALL_DIR }} + + - name: Download OpenVINO tests package + uses: actions/download-artifact@v3 + with: + name: openvino_tests + path: ${{ env.INSTALL_TEST_DIR }} + + - name: Extract OpenVINO packages + run: | + pushd ${{ env.INSTALL_DIR }} + tar -xzf openvino_package.tar.gz -C ${{ env.INSTALL_DIR }} && rm openvino_package.tar.gz || exit 1 + popd + pushd ${{ env.INSTALL_TEST_DIR }} + tar -xzf openvino_tests.tar.gz -C ${{ env.INSTALL_DIR }} && rm openvino_tests.tar.gz || exit 1 + popd + + # + # Tests + # + + - name: OpenVINO Core Unit Tests + if: ${{ always() }} + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_core_unit_tests --gtest_print_time=1 --gtest_filter=-*IE_GPU* \ + --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-NGraphUT.xml + + - name: OpenVINO Inference Functional Tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_inference_functional_tests --gtest_print_time=1 \ + --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-InferenceFunc.xml + + - name: OpenVINO Inference Unit Tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_inference_unit_tests --gtest_print_time=1 \ + --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-InferenceUnit.xml + + - name: Low Precision Transformations Tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_lp_transformations_tests --gtest_print_time=1 \ + --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-LpTransformations.xml + + - name: OpenVINO Conditional compilation tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_conditional_compilation_tests --gtest_print_time=1 \ + --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ConditionalCompilation.xml + + - name: IR frontend tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_ir_frontend_tests --gtest_print_time=1 \ + --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-IRFrontend.xml + + # Disabled in Azure: https://github.com/openvinotoolkit/openvino/blob/master/.ci/azure/linux.yml#L403 + # - name: PaddlePaddle frontend tests + # run: | + # source ${{ env.INSTALL_DIR }}/setupvars.sh + # ${{ env.INSTALL_TEST_DIR }}/paddle_tests --gtest_print_time=1 --gtest_filter=*smoke* \ + # --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-PaddleTests.xml + + # Present in the "Build" job as these tests require build directory + # - name: ONNX frontend tests + # run: | + # source ${{ env.INSTALL_DIR }}/setupvars.sh + # ${{ env.INSTALL_TEST_DIR }}/ov_onnx_frontend_tests --gtest_print_time=1 --gtest_filter=-*IE_GPU*:*FrontEndLoadFromTest.testLoadFromTwoStreams*:*FrontEndLoadFromTest.testLoadFromTwoFiles* \ + # --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ONNXFrontend.xml + + - name: TensorFlow Common tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_tensorflow_common_tests --gtest_print_time=1 \ + --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-TensorFlowCommonFrontend.xml + + - name: TensorFlow frontend tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_tensorflow_frontend_tests --gtest_print_time=1 \ + --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-TensorFlowFrontend.xml + + - name: TensorFlow Lite frontend tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_tensorflow_lite_frontend_tests --gtest_print_time=1 \ + --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-TensorFlowLiteFrontend.xml + + - name: Transformations Tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_transformations_tests --gtest_print_time=1 \ + --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-Transformations.xml + + - name: Common test utils tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/commonUtilsTests --gtest_print_time=1 \ + --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-commonUtilsTests.xml + + - name: CPU plugin unit tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_cpu_unit_tests --gtest_print_time=1 \ + --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-CPUUnitTests.xml + + # Disabled in Azure: https://github.com/openvinotoolkit/openvino/blob/master/.ci/azure/linux.yml#L409 + # - name: GNA plugin unit tests + # run: | + # source ${{ env.INSTALL_DIR }}/setupvars.sh + # ${{ env.INSTALL_TEST_DIR }}/ov_gna_unit_tests --gtest_print_time=1 \ + # --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-GNAUnitTests.xml + + - name: AUTO UT + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_auto_unit_tests --gtest_print_time=1 \ + --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ov_auto_unit_tests.xml + + - name: Template plugin tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_template_func_tests --gtest_print_time=1 \ + --gtest_filter=*smoke* \ + --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-TemplateFuncTests.xml + + - name: Inference Engine C API tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/InferenceEngineCAPITests --gtest_print_time=1 \ + --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-InferenceEngineCAPITests.xml + + - name: OpenVINO C API tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_capi_test --gtest_print_time=1 \ + --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-OpenVINOCAPITests.xml + + - name: AutoBatch FuncTests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_auto_batch_func_tests --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ov_auto_batch_func_tests.xml + + - name: Upload Test Results + uses: actions/upload-artifact@v3 + if: ${{ always() }} + with: + name: test-results-cpp + path: ${{ env.INSTALL_TEST_DIR }}/TEST*.xml + if-no-files-found: 'error' + + Python_Unit_Tests: + needs: Build + defaults: + run: + shell: bash + runs-on: ubuntu-22.04 + env: + OPENVINO_REPO: ${{ github.workspace }}/openvino + OPENVINO_CONTRIB_REPO: ${{ github.workspace }}/openvino_contrib + INSTALL_DIR: ${{ github.workspace }}/install + INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests + SAMPLES_INSTALL_DIR: ${{ github.workspace }}/install/samples + LAYER_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/layer_tests + BUILD_DIR: ${{ github.workspace }}/build + DATA_PATH: ${{ github.workspace }}/testdata + MODELS_PATH: ${{ github.workspace }}/testdata + OV_TEMP: ${{ github.workspace }}/openvino_temp + PYTHON_STATIC_ARGS: -m "not dynamic_library and not template_plugin" + + steps: + - name: Create Directories + run: | + mkdir -p ${{ env.INSTALL_DIR }} ${{ env.INSTALL_TEST_DIR }} + + - name: Clone OpenVINO + uses: actions/checkout@v3 + with: + path: 'openvino' + submodules: 'recursive' + + # + # Dependencies + # + + - name: Install dependencies + run: | + sudo -E apt update + sudo -E apt --assume-yes install openjdk-11-jdk libbz2-dev clang unzip libpugixml-dev libtbb-dev intel-opencl-icd ocl-icd-opencl-dev opencl-headers + + - uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install python dependencies + run: | + # For Python API + python3 -m pip install --upgrade pip + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/src/bindings/python/wheel/requirements-dev.txt + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/src/bindings/python/requirements.txt + + # For running Python API tests + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/src/bindings/python/src/compatibility/openvino/requirements-dev.txt + + # For running ONNX frontend unit tests + python3 -m pip install --force-reinstall -r ${{ env.OPENVINO_REPO }}/src/frontends/onnx/tests/requirements.txt + + # For running TensorFlow frontend unit tests + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/src/frontends/tensorflow/tests/requirements.txt + + # For running Paddle frontend unit tests + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/src/frontends/paddle/tests/requirements.txt + + - name: Install MO dependencies + run: | + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/tools/mo/requirements_mxnet.txt + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/tools/mo/requirements_caffe.txt + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/tools/mo/requirements_kaldi.txt + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/tools/mo/requirements_onnx.txt + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/tools/mo/requirements_tf2.txt + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/tools/mo/requirements_dev.txt + + - name: Download OpenVINO package + uses: actions/download-artifact@v3 + with: + name: openvino_package + path: ${{ env.INSTALL_DIR }} + + - name: Download OpenVINO tests package + uses: actions/download-artifact@v3 + with: + name: openvino_tests + path: ${{ env.INSTALL_TEST_DIR }} + + - name: Extract OpenVINO packages + run: | + pushd ${{ env.INSTALL_DIR }} + tar -xzf openvino_package.tar.gz -C ${{ env.INSTALL_DIR }} && rm openvino_package.tar.gz || exit 1 + popd + + pushd ${{ env.INSTALL_TEST_DIR }} + tar -xzf openvino_tests.tar.gz -C ${{ env.INSTALL_DIR }} && rm openvino_tests.tar.gz || exit 1 + popd + + - name: Install Python wheels + run: | + python3 -m pip install openvino-dev --find-links=${{ env.INSTALL_DIR }}/tools + + - name: nGraph and IE Python Bindings Tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + python3 -m pytest -s ${{ env.INSTALL_TEST_DIR }}/pyngraph ${{ env.PYTHON_STATIC_ARGS }} \ + --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-Pyngraph.xml \ + --ignore=${{ env.INSTALL_TEST_DIR }}/pyngraph/tests/test_onnx/test_zoo_models.py \ + --ignore=${{ env.INSTALL_TEST_DIR }}/pyngraph/tests/test_onnx/test_backend.py + + - name: Python API 2.0 Tests + run: | + # For python imports to import pybind_mock_frontend + export PYTHONPATH=${{ env.INSTALL_TEST_DIR }}:$PYTHONPATH + source ${{ env.INSTALL_DIR }}/setupvars.sh + + python3 -m pytest -sv ${{ env.INSTALL_TEST_DIR }}/pyopenvino ${{ env.PYTHON_STATIC_ARGS }} \ + --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-Pyngraph.xml \ + --ignore=${{ env.INSTALL_TEST_DIR }}/pyopenvino/tests/test_utils/test_utils.py \ + --ignore=${{ env.INSTALL_TEST_DIR }}/pyopenvino/tests/test_onnx/test_zoo_models.py \ + --ignore=${{ env.INSTALL_TEST_DIR }}/pyopenvino/tests/test_onnx/test_backend.py + + - name: Model Optimizer UT + run: | + + export PYTHONPATH=${{ env.OPENVINO_REPO }}/tools/mo/:${{ env.LAYER_TESTS_INSTALL_DIR }}:${{ env.INSTALL_TEST_DIR }}:${{ env.INSTALL_DIR }}/python/python3.11:$PYTHONPATH + + # TODO: figure out why they need to be reinstalled + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/tools/mo/requirements_mxnet.txt + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/tools/mo/requirements_caffe.txt + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/tools/mo/requirements_kaldi.txt + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/tools/mo/requirements_onnx.txt + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/tools/mo/requirements_tf2.txt + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/tools/mo/requirements_dev.txt + + source ${{ env.INSTALL_DIR }}/setupvars.sh + + python3 -m pytest -s ${{ env.INSTALL_TEST_DIR }}/mo/unit_tests \ + --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-ModelOptimizer.xml + + - name: PyTorch Layer Tests + run: | + python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt + export PYTHONPATH=${{ env.OPENVINO_REPO }}/tools/mo/:${{ env.LAYER_TESTS_INSTALL_DIR }}:$PYTHONPATH + source ${{ env.INSTALL_DIR }}/setupvars.sh + + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests -m precommit --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch.xml + env: + TEST_DEVICE: CPU + + - name: TensorFlow 1 Layer Tests - TF FE + run: | + python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt + + export PYTHONPATH=${{ env.OPENVINO_REPO }}/tools/mo/:${{ env.LAYER_TESTS_INSTALL_DIR }}:$PYTHONPATH + + source ${{ env.INSTALL_DIR }}/setupvars.sh + + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow_tests/ --use_new_frontend -m precommit_tf_fe --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf_fe.xml + env: + TEST_DEVICE: CPU + + - name: TensorFlow 2 Layer Tests - TF FE + run: | + python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt + export PYTHONPATH=${{ env.OPENVINO_REPO }}/tools/mo/:${{ env.LAYER_TESTS_INSTALL_DIR }}:$PYTHONPATH + + source ${{ env.INSTALL_DIR }}/setupvars.sh + + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow2_keras_tests/ --use_new_frontend -m precommit_tf_fe --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf2_fe.xml + env: + TEST_DEVICE: CPU + + - name: TensorFlow 1 Layer Tests - Legacy FE + run: | + python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt + export PYTHONPATH=${{ env.OPENVINO_REPO }}/tools/mo/:${{ env.LAYER_TESTS_INSTALL_DIR }}:$PYTHONPATH + source ${{ env.INSTALL_DIR }}/setupvars.sh + + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow_tests/test_tf_Roll.py --ir_version=10 --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf_Roll.xml + + - name: TensorFlow 2 Layer Tests - Legacy FE + run: | + python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt + export PYTHONPATH=${{ env.OPENVINO_REPO }}/tools/mo/:${{ env.LAYER_TESTS_INSTALL_DIR }}:$PYTHONPATH + source ${{ env.INSTALL_DIR }}/setupvars.sh + + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow2_keras_tests/test_tf2_keras_activation.py \ + --ir_version=11 --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf2_Activation.xml -k "sigmoid" + env: + TEST_DEVICE: CPU + + - name: TensorFlow Lite Layer Tests - TFL FE + run: | + python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt + export PYTHONPATH=${{ env.OPENVINO_REPO }}/tools/mo/:${{ env.LAYER_TESTS_INSTALL_DIR }}:$PYTHONPATH + source ${{ env.INSTALL_DIR }}/setupvars.sh + + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow_lite_tests/ --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tfl_fe.xml + env: + TEST_DEVICE: CPU + + - name: MO Python API Tests + run: | + python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt + export PYTHONPATH=${{ env.OPENVINO_REPO }}/tools/mo/:${{ env.LAYER_TESTS_INSTALL_DIR }}:$PYTHONPATH + source ${{ env.INSTALL_DIR }}/setupvars.sh + + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/mo_python_api_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_mo_convert.xml + env: + TEST_DEVICE: CPU + + - name: Python Frontend tests + run: | + python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt + export PYTHONPATH=${{ env.OPENVINO_REPO }}/tools/mo/:${{ env.LAYER_TESTS_INSTALL_DIR }}:$PYTHONPATH + source ${{ env.INSTALL_DIR }}/setupvars.sh + + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/py_frontend_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_py_fontend.xml + + - name: Upload Test Results + uses: actions/upload-artifact@v3 + if: ${{ always() }} + with: + name: test-results-python + path: ${{ env.INSTALL_TEST_DIR }}/TEST*.xml + if-no-files-found: 'error' + + CPU_Functional_Tests: + needs: Build + defaults: + run: + shell: bash + runs-on: ubuntu-22.04 + env: + INSTALL_DIR: ${{ github.workspace }}/install + INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests + + steps: + - name: Create Directories + run: mkdir -p ${{ env.INSTALL_DIR }} ${{ env.INSTALL_TEST_DIR }} + + - name: Install dependencies + run: | + sudo -E apt update + sudo -E apt --assume-yes install openjdk-11-jdk libbz2-dev clang unzip libpugixml-dev libtbb-dev intel-opencl-icd ocl-icd-opencl-dev opencl-headers + + - name: Download OpenVINO package + uses: actions/download-artifact@v3 + with: + name: openvino_package + path: ${{ env.INSTALL_DIR }} + + - name: Download OpenVINO tests package + uses: actions/download-artifact@v3 + with: + name: openvino_tests + path: ${{ env.INSTALL_TEST_DIR }} + + - name: Extract OpenVINO packages + run: | + pushd ${{ env.INSTALL_DIR }} + tar -xzf openvino_package.tar.gz -C ${{ env.INSTALL_DIR }} && rm openvino_package.tar.gz || exit 1 + popd + pushd ${{ env.INSTALL_TEST_DIR }} + tar -xzf openvino_tests.tar.gz -C ${{ env.INSTALL_DIR }} && rm openvino_tests.tar.gz || exit 1 + popd + + - name: Intel CPU plugin func tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_cpu_func_tests --gtest_print_time=1 --gtest_filter=*smoke* --gtest_output=xml:"${{ env.INSTALL_TEST_DIR }}/TEST-CPUFuncTests.xml" + + - name: Upload Test Results + uses: actions/upload-artifact@v3 + if: ${{ always() }} + with: + name: test-results-functional-cpu + path: ${{ env.INSTALL_TEST_DIR }}/TEST*.xml + if-no-files-found: 'error' diff --git a/tests/layer_tests/py_frontend_tests/test_torch_frontend.py b/tests/layer_tests/py_frontend_tests/test_torch_frontend.py index d348f933843f6a..4c1be35f0f28b8 100644 --- a/tests/layer_tests/py_frontend_tests/test_torch_frontend.py +++ b/tests/layer_tests/py_frontend_tests/test_torch_frontend.py @@ -177,7 +177,7 @@ def get_builtin_extensions_path(): base_paths.append(repo_dir) for base_path in base_paths: - paths = glob.glob(os.path.join(base_path, "bin", "*", "*", "*test_builtin_extensions*")) + paths = glob.glob(os.path.join(base_path, "**", "*test_builtin_extensions*"), recursive=True) for path in paths: if re.search(r"(lib)?test_builtin_extensions.?\.(dll|so)", path): return path diff --git a/tests/layer_tests/tensorflow_tests/test_tf_LRN.py b/tests/layer_tests/tensorflow_tests/test_tf_LRN.py index 145041b981438e..fca388312767de 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_LRN.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_LRN.py @@ -1,5 +1,6 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import os import pytest import tensorflow as tf @@ -27,6 +28,7 @@ def create_lrn_net(self, input_shape, depth_radius, bias, alpha, beta): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.skipif(os.getenv("GITHUB_ACTIONS") == 'true', reason="Ticket - 113362") def test_lrn_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_lrn_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_SplitV.py b/tests/layer_tests/tensorflow_tests/test_tf_SplitV.py index 8d3328fe839677..d410d304c3066b 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_SplitV.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_SplitV.py @@ -1,5 +1,6 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import os import pytest import tensorflow as tf @@ -33,6 +34,7 @@ def create_splitv_net(self, value_shape, size_splits_values, axis_value): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.skipif(os.getenv("GITHUB_ACTIONS") == 'true', reason="Ticket - 113359") def test_split_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_splitv_net(**params), diff --git a/tests/samples_tests/smoke_tests/common/samples_common_test_class.py b/tests/samples_tests/smoke_tests/common/samples_common_test_class.py index 395058637f361a..8ff9d22a5eeee0 100644 --- a/tests/samples_tests/smoke_tests/common/samples_common_test_class.py +++ b/tests/samples_tests/smoke_tests/common/samples_common_test_class.py @@ -144,7 +144,8 @@ def getting_samples_data_zip(url, samples_path, size_of_chunk=128): print("\nRemoving samples_smoke_tests_data.zip...") os.remove(samples_path) - except Exception: + except Exception as error: + print(error) print(f"Exception during downloading samples_smoke_tests_data.zip") class SamplesCommonTestClass(): diff --git a/tools/mo/unit_tests/mo/frontend_ngraph_test.py b/tools/mo/unit_tests/mo/frontend_ngraph_test.py index 4c44d61aeb9579..463017f95addc0 100644 --- a/tools/mo/unit_tests/mo/frontend_ngraph_test.py +++ b/tools/mo/unit_tests/mo/frontend_ngraph_test.py @@ -24,7 +24,7 @@ def test_no_ie_ngraph(self, mock_find): 'Consider building the Inference Engine and nGraph Python APIs from sources' in i] assert res - +@pytest.mark.skipif(os.getenv("GITHUB_ACTIONS") == 'true', reason="Ticket - 113358") def test_frontends(): setup_env() args = [sys.executable, '-m', 'pytest', @@ -33,7 +33,7 @@ def test_frontends(): status = subprocess.run(args, env=os.environ) assert not status.returncode - +@pytest.mark.skipif(os.getenv("GITHUB_ACTIONS") == 'true', reason="Ticket - 113358") def test_moc_extractor(): setup_env() args = [sys.executable, '-m', 'pytest', diff --git a/tools/mo/unit_tests/mo/utils/simple_proto_parser_test.py b/tools/mo/unit_tests/mo/utils/simple_proto_parser_test.py index 80dce8ca0fb3e9..d2917e3e512a13 100644 --- a/tools/mo/unit_tests/mo/utils/simple_proto_parser_test.py +++ b/tools/mo/unit_tests/mo/utils/simple_proto_parser_test.py @@ -172,7 +172,7 @@ def test_correct_proto_reader_from_file(self): self.assertDictEqual(result, expected_result) os.unlink(file_name) - @unittest.skipIf(sys.platform.startswith("win"), "chmod() on Windows do nor support not writable dir") + @unittest.skip("Temporary disabled since chmod() is temporary not working on Linux. (Windows do not support not writable dir at all)") def test_proto_reader_from_non_readable_file(self): file = tempfile.NamedTemporaryFile('wt', delete=False) file.write(correct_proto_message_1) diff --git a/tools/ovc/unit_tests/ovc/utils/cli_parser_test.py b/tools/ovc/unit_tests/ovc/utils/cli_parser_test.py index 0f9aef0ce673b1..409fcf4ee27f42 100644 --- a/tools/ovc/unit_tests/ovc/utils/cli_parser_test.py +++ b/tools/ovc/unit_tests/ovc/utils/cli_parser_test.py @@ -1255,12 +1255,12 @@ def tearDownClass(cls): def test_single_writable_dir(self): self.assertEqual(__class__.WRITABLE_DIR, writable_dir(__class__.WRITABLE_DIR)) - @unittest.skipIf(sys.platform.startswith("win"), "chmod() on Windows do nor support not writable dir") + @unittest.skip("Temporary disabled since chmod() is temporary not working on Linux. (Windows do not support not writable dir at all)") def test_single_non_writable_dir(self): with self.assertRaises(Error) as cm: writable_dir(__class__.NOT_WRITABLE_DIR) - @unittest.skipIf(sys.platform.startswith("win"), "chmod() on Windows do nor support not writable dir") + @unittest.skip("Temporary disabled since chmod() is temporary not working on Linux. (Windows do not support not writable dir at all)") def test_single_non_writable_sub_dir(self): with self.assertRaises(Error) as cm: writable_dir(__class__.NOT_WRITABLE_SUB_DIR) From 2663b002e8e3780468ef485768a1a8f902bceb9d Mon Sep 17 00:00:00 2001 From: Evgeny Kotov Date: Tue, 11 Jul 2023 10:27:33 +0200 Subject: [PATCH 13/21] fix transformation; add unit test (#18346) Co-authored-by: Ivan Tikhonov --- .../common_optimizations/ric_fusion.cpp | 5 +- .../preprocessing_fusion_tests.cpp | 64 +++++++++++++++++++ 2 files changed, 67 insertions(+), 2 deletions(-) diff --git a/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp index d846c5f6f9fb64..ce6e3ea616a4a7 100644 --- a/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp @@ -18,6 +18,7 @@ #include #include "itt.hpp" +#include "openvino/op/util/pad_base.hpp" #include "openvino/util/log.hpp" #include "transformations/utils/utils.hpp" @@ -515,8 +516,8 @@ class PassThrough : public ov::pass::MatcherPass { public: PassThrough() { MATCHER_SCOPE(PassThrough); - auto pattern_root = - pattern::wrap_type(); + auto pattern_root = pattern:: + wrap_type(); auto callback = [=](pattern::Matcher& m) { auto root = m.get_match_root(); diff --git a/src/common/transformations/tests/common_optimizations/preprocessing_fusion_tests.cpp b/src/common/transformations/tests/common_optimizations/preprocessing_fusion_tests.cpp index ecb4d2576429f2..80db36d6718eba 100644 --- a/src/common/transformations/tests/common_optimizations/preprocessing_fusion_tests.cpp +++ b/src/common/transformations/tests/common_optimizations/preprocessing_fusion_tests.cpp @@ -18,6 +18,7 @@ #include #include "common_test_utils/ngraph_test_utils.hpp" +#include "openvino/opsets/opset12.hpp" using namespace testing; using namespace ngraph; @@ -205,6 +206,69 @@ TEST_F(TransformationTestsF, RICFusionHard) { comparator.enable(FunctionsComparator::CmpValues::ACCURACY); } +TEST_F(TransformationTestsF, RICFusionHardNegativePad12) { + { + auto input = create_param({-1, -1, -1, -1}); + auto relu = std::make_shared(input); + + auto input2 = create_param({-1, 3, -1, -1}); + auto split = std::make_shared(input2, Constant::create(element::i64, {}, {1}), 3); + auto concat = std::make_shared(OutputVector{split->output(2), split->output(1), split->output(0)}, 1); + auto eltwise = std::make_shared(relu, concat); + + auto pads_begin = Constant::create(element::i64, Shape{4}, {0, 0, 0, -1}); + auto pads_end = Constant::create(element::i64, Shape{4}, {0, 0, 0, -1}); + auto pad = std::make_shared(eltwise, pads_begin, pads_end, op::PadMode::CONSTANT); + + auto gconv = create_group_conv(pad, {3, 4, 1, 3, 3}); + + auto pow = std::make_shared(gconv, Constant::create(element::f32, Shape{}, {-1.0f})); + auto convert1 = std::make_shared(pow, element::f16); + auto convert2 = std::make_shared(convert1, element::f32); + + auto gconv2 = create_group_conv(convert2, {12, 1, 1, 3, 3}); + + auto conv = create_conv(gconv2, {6, 12, 3, 3}); + auto conv2 = create_conv(concat, {6, 3, 3, 3}); + + function = std::make_shared(NodeVector{conv, conv2}, ParameterVector{input, input2}); + + apply_reverse_input_channels(function, {{0, "NCHW"}}); + + manager.register_pass(); + } + { + auto input = create_param({-1, -1, -1, -1}); + auto relu = std::make_shared(input); + + auto input2 = create_param({-1, 3, -1, -1}); + auto eltwise = std::make_shared(relu, input2); + + auto pads_begin = Constant::create(element::i64, Shape{4}, {0, 0, 0, -1}); + auto pads_end = Constant::create(element::i64, Shape{4}, {0, 0, 0, -1}); + auto pad = std::make_shared(eltwise, pads_begin, pads_end, op::PadMode::CONSTANT); + + // 0 1 2 2 1 0 + // [0, 1, 2, 3]-[4, 5, 6, 7]-[8, 9, 10, 11] -> [8, 9, 10, 11]-[4, 5, 6, 7]-[0, 1, 2, 3] + auto gconv = create_group_conv_with_gather(pad, {3, 4, 1, 3, 3}, {2, 1, 0}); + + auto pow = std::make_shared(gconv, Constant::create(element::f32, Shape{}, {-1.0f})); + auto convert1 = std::make_shared(pow, element::f16); + auto convert2 = std::make_shared(convert1, element::f32); + + auto gconv2 = create_group_conv_with_gather(convert2, {12, 1, 1, 3, 3}, {8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3}); + + auto conv = create_conv_with_gather(gconv2, {6, 12, 3, 3}, {8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3}); + auto conv2 = create_conv_with_gather(input2, {6, 3, 3, 3}, {2, 1, 0}); + + function_ref = std::make_shared(NodeVector{conv, conv2}, ParameterVector{input, input2}); + } + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); + comparator.enable(FunctionsComparator::CmpValues::ACCURACY); +} + TEST_F(TransformationTestsF, RICFusionDynamic) { { auto input = create_param({-1, -1, -1, -1}); From 827fb0234b4687856edf5ed989eac8f0826a4f4d Mon Sep 17 00:00:00 2001 From: Katarzyna Mitrus Date: Tue, 11 Jul 2023 10:38:31 +0200 Subject: [PATCH 14/21] [Shape Infer] RNN/GRU/LSTM Cell & Sequence - common shape infer and review (#18009) * rrn seq base common shape infer init * Update and add shape infer to all seq rnn based ops * Use shape_infer in lstm seq core op * Use shape_infer in rnn seq core op * Unified shape infer for rnn cell base * Update and add missing cells shape_infer functions * Use shpe_infer in rnn_cell * Update shape_infer map * shape_infer tests * Move new shape_infer to TA map * More tests and shape improvements * Introduce common base tests * Fix merge conflict * Missing setters and default ctor tests * Use RNNCellBase class instead of template type op * Update v0 LSTMSequence to use RNNCellBase as parent class * Style * V0::LSTMSequence update * Specify opset in shape infer registry * Move rank check * Output vec generation refactor * Update num_dir_validation * Tests warining fix * Test types adjustment * Commented code cleanup * Move test helpers to test fixture * Common default ctor tests for Cell * Update GRU shape infer tests * Update LSTM shape infer tests * Update RNN shape infer tests * File names update * Functions renaming * Cast hidden size in test * Move v0::LSTMSequence conformance test --------- Co-authored-by: Pawel Raasz --- .../include/ov_ops/augru_sequence.hpp | 3 + src/core/include/openvino/op/gru_cell.hpp | 4 + src/core/include/openvino/op/gru_sequence.hpp | 6 + .../include/openvino/op/lstm_sequence.hpp | 19 +- src/core/include/openvino/op/rnn_sequence.hpp | 3 + .../openvino/op/util/rnn_cell_base.hpp | 3 + .../include/augru_cell_shape_inference.hpp | 6 +- .../augru_sequence_shape_inference.hpp | 11 +- .../include/gru_cell_shape_inference.hpp | 97 +----- .../include/gru_sequence_shape_inference.hpp | 163 +-------- .../include/lstm_cell_shape_inference.hpp | 185 ++-------- .../include/lstm_sequence_shape_inference.hpp | 43 +++ .../include/rnn_base_shape_inference.hpp | 280 +++++++++++++++ .../include/rnn_cell_shape_inference.hpp | 23 ++ .../include/rnn_sequence_shape_inference.hpp | 19 + src/core/src/op/lstm_cell.cpp | 2 +- src/core/src/op/lstm_sequence.cpp | 326 ++--------------- src/core/src/op/rnn_cell.cpp | 71 +--- src/core/src/op/rnn_sequence.cpp | 107 +----- src/core/tests/type_prop/gru_cell.cpp | 66 ++++ src/core/tests/type_prop/lstm_cell.cpp | 22 +- src/core/tests/type_prop/lstm_sequence.cpp | 182 ++++++---- src/core/tests/type_prop/rnn_cell.cpp | 59 ++-- src/core/tests/type_prop/rnn_cell_base.cpp | 177 ++++++++++ src/core/tests/type_prop/rnn_seq_base.cpp | 284 +++++++++++++++ src/core/tests/type_prop/rnn_sequence.cpp | 87 +++-- .../utils/shape_inference/shape_inference.cpp | 10 +- .../gru_cell_shape_inference_test.cpp | 144 ++++++++ .../shape_inference_test/gru_cell_test.cpp | 123 ------- ... => gru_sequence_shape_inference_test.cpp} | 121 ++++--- ...cpp => lstm_cell_shape_inference_test.cpp} | 51 ++- .../lstm_seq_shape_inference_test.cpp | 327 ++++++++++++++++++ .../rnn_cell_shape_inference_test.cpp | 110 ++++++ .../rnn_seq_shape_inference_test.cpp | 132 +++++++ .../op_impl_check/single_op_graph.cpp | 40 +-- 35 files changed, 2102 insertions(+), 1204 deletions(-) create mode 100644 src/core/shape_inference/include/lstm_sequence_shape_inference.hpp create mode 100644 src/core/shape_inference/include/rnn_base_shape_inference.hpp create mode 100644 src/core/shape_inference/include/rnn_cell_shape_inference.hpp create mode 100644 src/core/shape_inference/include/rnn_sequence_shape_inference.hpp create mode 100644 src/core/tests/type_prop/rnn_cell_base.cpp create mode 100644 src/core/tests/type_prop/rnn_seq_base.cpp create mode 100644 src/plugins/intel_cpu/tests/unit/shape_inference_test/gru_cell_shape_inference_test.cpp delete mode 100644 src/plugins/intel_cpu/tests/unit/shape_inference_test/gru_cell_test.cpp rename src/plugins/intel_cpu/tests/unit/shape_inference_test/{gru_sequence_test.cpp => gru_sequence_shape_inference_test.cpp} (51%) rename src/plugins/intel_cpu/tests/unit/shape_inference_test/{lstm_cell_shape_inference.cpp => lstm_cell_shape_inference_test.cpp} (60%) create mode 100644 src/plugins/intel_cpu/tests/unit/shape_inference_test/lstm_seq_shape_inference_test.cpp create mode 100644 src/plugins/intel_cpu/tests/unit/shape_inference_test/rnn_cell_shape_inference_test.cpp create mode 100644 src/plugins/intel_cpu/tests/unit/shape_inference_test/rnn_seq_shape_inference_test.cpp diff --git a/src/common/transformations/include/ov_ops/augru_sequence.hpp b/src/common/transformations/include/ov_ops/augru_sequence.hpp index 9bad673b39841a..c1d93f3d25425f 100644 --- a/src/common/transformations/include/ov_ops/augru_sequence.hpp +++ b/src/common/transformations/include/ov_ops/augru_sequence.hpp @@ -43,6 +43,9 @@ class TRANSFORMATIONS_API AUGRUSequence : public ov::op::util::RNNCellBase { op::RecurrentSequenceDirection get_direction() const { return m_direction; } + void set_direction(const RecurrentSequenceDirection& direction) { + m_direction = direction; + } protected: op::RecurrentSequenceDirection m_direction; diff --git a/src/core/include/openvino/op/gru_cell.hpp b/src/core/include/openvino/op/gru_cell.hpp index 15543eec2b943d..de58ba04154347 100644 --- a/src/core/include/openvino/op/gru_cell.hpp +++ b/src/core/include/openvino/op/gru_cell.hpp @@ -134,6 +134,10 @@ class OPENVINO_API GRUCell : public util::RNNCellBase { return m_linear_before_reset; } + void set_linear_before_reset(bool linear_before_reset) { + m_linear_before_reset = linear_before_reset; + } + private: /// brief Add and initialize bias input to all zeros. void add_default_bias_input(); diff --git a/src/core/include/openvino/op/gru_sequence.hpp b/src/core/include/openvino/op/gru_sequence.hpp index fae54509ad0c64..220dfed4b8a81c 100644 --- a/src/core/include/openvino/op/gru_sequence.hpp +++ b/src/core/include/openvino/op/gru_sequence.hpp @@ -44,9 +44,15 @@ class OPENVINO_API GRUSequence : public util::RNNCellBase { bool get_linear_before_reset() const { return m_linear_before_reset; } + void set_linear_before_reset(bool linear_before_reset) { + m_linear_before_reset = linear_before_reset; + } op::RecurrentSequenceDirection get_direction() const { return m_direction; } + void set_direction(const RecurrentSequenceDirection& direction) { + m_direction = direction; + } protected: op::RecurrentSequenceDirection m_direction; diff --git a/src/core/include/openvino/op/lstm_sequence.hpp b/src/core/include/openvino/op/lstm_sequence.hpp index 1e7599a35ba982..f29b9db58aec43 100644 --- a/src/core/include/openvino/op/lstm_sequence.hpp +++ b/src/core/include/openvino/op/lstm_sequence.hpp @@ -29,10 +29,10 @@ namespace v0 { /// /// /// \ingroup ov_ops_cpp_api -class OPENVINO_API LSTMSequence : public Op { +class OPENVINO_API LSTMSequence : public util::RNNCellBase { public: - OPENVINO_OP("LSTMSequence", "opset1"); - LSTMSequence(); + OPENVINO_OP("LSTMSequence", "opset1", util::RNNCellBase); + LSTMSequence() = default; using direction = RecurrentSequenceDirection; @@ -87,11 +87,14 @@ class OPENVINO_API LSTMSequence : public Op { return m_activations; } float get_clip_threshold() const { - return m_clip_threshold; + return m_clip; } direction get_direction() const { return m_direction; } + void set_direction(const direction& dir) { + m_direction = dir; + } std::int64_t get_hidden_size() const { return m_hidden_size; } @@ -103,12 +106,7 @@ class OPENVINO_API LSTMSequence : public Op { } private: - std::vector m_activations_alpha; - std::vector m_activations_beta; - std::vector m_activations; - float m_clip_threshold; direction m_direction; - std::int64_t m_hidden_size; bool m_input_forget; LSTMWeightsFormat m_weights_format; }; @@ -166,6 +164,9 @@ class OPENVINO_API LSTMSequence : public util::RNNCellBase { direction get_direction() const { return m_direction; } + void set_direction(const direction& dir) { + m_direction = dir; + } private: direction m_direction{direction::FORWARD}; diff --git a/src/core/include/openvino/op/rnn_sequence.hpp b/src/core/include/openvino/op/rnn_sequence.hpp index 0ee5543687422f..ae61adb49cc703 100644 --- a/src/core/include/openvino/op/rnn_sequence.hpp +++ b/src/core/include/openvino/op/rnn_sequence.hpp @@ -44,6 +44,9 @@ class OPENVINO_API RNNSequence : public util::RNNCellBase { op::RecurrentSequenceDirection get_direction() const { return m_direction; } + void set_direction(const RecurrentSequenceDirection& direction) { + m_direction = direction; + } protected: op::RecurrentSequenceDirection m_direction; diff --git a/src/core/include/openvino/op/util/rnn_cell_base.hpp b/src/core/include/openvino/op/util/rnn_cell_base.hpp index f2caf0788bb10c..9afbc8a8649740 100644 --- a/src/core/include/openvino/op/util/rnn_cell_base.hpp +++ b/src/core/include/openvino/op/util/rnn_cell_base.hpp @@ -100,6 +100,9 @@ class OPENVINO_API RNNCellBase : public Op { std::size_t get_hidden_size() const { return m_hidden_size; } + void set_hidden_size(size_t hidden_size) { + m_hidden_size = hidden_size; + } float get_clip() const { return m_clip; } diff --git a/src/core/shape_inference/include/augru_cell_shape_inference.hpp b/src/core/shape_inference/include/augru_cell_shape_inference.hpp index 62e061f8083fd7..a320b6668fcf1d 100644 --- a/src/core/shape_inference/include/augru_cell_shape_inference.hpp +++ b/src/core/shape_inference/include/augru_cell_shape_inference.hpp @@ -3,9 +3,9 @@ // #pragma once -#include "gru_cell_shape_inference.hpp" #include "ov_ops/augru_cell.hpp" #include "ov_ops/augru_sequence.hpp" +#include "rnn_base_shape_inference.hpp" #include "utils.hpp" namespace ov { @@ -25,7 +25,9 @@ void shape_infer(const ov::op::internal::AUGRUCell* op, input_shapes.size(), "."); - rnn::gru_cell_shape_infer(op, input_shapes, output_shapes); + constexpr auto num_gates = 3; + constexpr auto num_state_nodes = 1; + output_shapes = rnn::cell_base_shape_infer(op, input_shapes, num_gates, num_state_nodes); // `A` input shape validation // [batch_size, 1] const auto& a_shape = input_shapes.back(); diff --git a/src/core/shape_inference/include/augru_sequence_shape_inference.hpp b/src/core/shape_inference/include/augru_sequence_shape_inference.hpp index be97c1ced7fdf3..f02d7499ea8c2e 100644 --- a/src/core/shape_inference/include/augru_sequence_shape_inference.hpp +++ b/src/core/shape_inference/include/augru_sequence_shape_inference.hpp @@ -3,8 +3,8 @@ // #pragma once -#include "gru_sequence_shape_inference.hpp" #include "ov_ops/augru_sequence.hpp" +#include "rnn_base_shape_inference.hpp" #include "utils.hpp" namespace ov { @@ -24,7 +24,14 @@ void shape_infer(const ov::op::internal::AUGRUSequence* op, input_shapes.size(), "."); - rnn::gru_sequence_shape_infer(op, input_shapes, output_shapes); + constexpr auto num_gates = 3; + constexpr auto num_state_nodes = 1; + output_shapes = rnn::seq_base_shape_infer(op, + input_shapes, + num_gates, + num_state_nodes, + op->get_direction(), + op->get_linear_before_reset()); // A input shape validation // [batch_size, seq_length, 1] const auto& a_shape = input_shapes.back(); diff --git a/src/core/shape_inference/include/gru_cell_shape_inference.hpp b/src/core/shape_inference/include/gru_cell_shape_inference.hpp index 5e68442e555e11..277801b1da8d69 100644 --- a/src/core/shape_inference/include/gru_cell_shape_inference.hpp +++ b/src/core/shape_inference/include/gru_cell_shape_inference.hpp @@ -11,94 +11,17 @@ namespace ov { namespace op { -namespace rnn { - -// Output shape layout: -// output_shapes[0]: [batch_size, hidden_size] // Rank always 2 -template -void gru_cell_shape_infer(const OpType* op, - const std::vector& input_shapes, - std::vector& output_shapes) { - NODE_VALIDATION_CHECK(op, - input_shapes.size() >= 5 && output_shapes.size() == 1, - "Incorrect number of shapes has been provided."); - - auto& y_out_shape = output_shapes[0]; - y_out_shape.resize(2); // Rank always 2 - - rnn::validate_inputs_rank(op, input_shapes, {2, 2, 2, 2, 1}); - - const auto& x_pshape = input_shapes[0]; // [batch_size, input_size] - const auto& ht_pshape = input_shapes[1]; // [batch_size, hidden_size] - const auto& w_pshape = input_shapes[2]; // [3 * hidden_size, input_size] - const auto& r_pshape = input_shapes[3]; // [3 * hidden_size, hidden_size] - const auto& b_pshape = input_shapes[4]; // if linear_before_reset [4 * hidden_size], otherwise [3 * hidden_size] - - using DimType = typename std::iterator_traits::value_type; - - // Merge batch_size dimension across all inputs to evaluate output[0] dimension - DimType merged_batch_size = x_pshape.rank().is_static() ? x_pshape[0] : DimType(); - NODE_VALIDATION_CHECK( - op, - DimType::merge(merged_batch_size, merged_batch_size, ht_pshape.rank().is_static() ? ht_pshape[0] : DimType()), - "Dimension `batch_size` is not matched between inputs."); - - // Set batch_size dimension - y_out_shape[0] = merged_batch_size; - - // Merge hidden_size dimension across all inputs to evaluate output dimension - // `hidden_size` attribute is not used for backward compatibility - DimType merged_hidden_size = ht_pshape.rank().is_static() ? ht_pshape[1] : DimType(); - NODE_VALIDATION_CHECK( - op, - DimType::merge(merged_hidden_size, merged_hidden_size, r_pshape.rank().is_static() ? r_pshape[1] : DimType()), - "Dimension `hidden_size` is not matched between inputs."); - - // Validate dimensions related to hidden_size for W, R, B inputs - if (merged_hidden_size.is_static()) { - constexpr auto gru_gates_count = 3; - if (w_pshape.rank().is_static()) { - NODE_VALIDATION_CHECK(op, - w_pshape[0].compatible(merged_hidden_size * gru_gates_count), - "First dimension of W input shape is required to be compatible with ", - merged_hidden_size * gru_gates_count, - ". Got shape: ", - w_pshape[0], - "."); - } - - if (r_pshape.rank().is_static()) { - NODE_VALIDATION_CHECK(op, - r_pshape[0].compatible(merged_hidden_size * gru_gates_count), - "Fisrt dimension of R input shape is required to be compatible with ", - merged_hidden_size * gru_gates_count, - ". Got shape: ", - r_pshape[0], - "."); - } - - if (b_pshape.rank().is_static()) { - auto bias_dim_multiplier = op->get_linear_before_reset() ? (gru_gates_count + 1) : gru_gates_count; - NODE_VALIDATION_CHECK(op, - b_pshape[0].compatible(merged_hidden_size * bias_dim_multiplier), - "First dimension of B input shape is required to be compatible with ", - merged_hidden_size * bias_dim_multiplier, - ". Got shape: ", - b_pshape[0], - "."); - } - } - - // Set hidden_size dimension - y_out_shape[1] = merged_hidden_size; -} -} // namespace rnn namespace v3 { -template -void shape_infer(const ov::op::v3::GRUCell* op, - const std::vector& input_shapes, - std::vector& output_shapes) { - rnn::gru_cell_shape_infer(op, input_shapes, output_shapes); +template +std::vector shape_infer(const GRUCell* op, const std::vector& input_shapes) { + constexpr auto num_gates = 3; + constexpr auto num_state_nodes = 1; + return rnn::cell_base_shape_infer(op, input_shapes, num_gates, num_state_nodes, op->get_linear_before_reset()); +} + +template +void shape_infer(const GRUCell* op, const std::vector& input_shapes, std::vector& output_shapes) { + output_shapes = shape_infer(op, input_shapes); } } // namespace v3 } // namespace op diff --git a/src/core/shape_inference/include/gru_sequence_shape_inference.hpp b/src/core/shape_inference/include/gru_sequence_shape_inference.hpp index a781853f013dff..e7b5a8065fb152 100644 --- a/src/core/shape_inference/include/gru_sequence_shape_inference.hpp +++ b/src/core/shape_inference/include/gru_sequence_shape_inference.hpp @@ -5,162 +5,16 @@ #include #include +#include "rnn_base_shape_inference.hpp" #include "utils.hpp" namespace ov { namespace op { -namespace rnn { -template -void validate_inputs_rank(const OpType* op, - const std::vector& input_shapes, - const std::vector& expected_ranks) { - NODE_VALIDATION_CHECK(op, input_shapes.size() >= expected_ranks.size(), "Can't validate inputs rank."); - for (size_t i = 0; i < expected_ranks.size(); ++i) { - NODE_VALIDATION_CHECK(op, - input_shapes[i].rank().compatible(expected_ranks[i]), - "Shape rank of input at ", - i, - " is incompatible. Expected rank: ", - expected_ranks[i], - ", actual shape: ", - input_shapes[i], - "."); - } -} - -// Output shapes layout: -// output_shapes[0]: [batch_size, num_directions, seq_length, hidden_size] // Rank always 4 -// output_shapes[1]: [batch_size, num_directions, hidden_size] // Rank always 3 -template -void gru_sequence_shape_infer(const OpType* op, - const std::vector& input_shapes, - std::vector& output_shapes) { - NODE_VALIDATION_CHECK(op, - input_shapes.size() >= 6 && output_shapes.size() == 2, - "Incorrect number of shapes has been provided."); - - auto& y_out_shape = output_shapes[0]; - auto& ho_out_shape = output_shapes[1]; - y_out_shape.resize(4); // Rank always 4 - ho_out_shape.resize(3); // Rank always 3 - - rnn::validate_inputs_rank(op, input_shapes, {3, 3, 1, 3, 3, 2}); - - const auto& x_pshape = input_shapes[0]; - const auto& ht_pshape = input_shapes[1]; - const auto& sl_pshape = input_shapes[2]; - const auto& w_pshape = input_shapes[3]; - const auto& r_pshape = input_shapes[4]; - const auto& b_pshape = input_shapes[5]; - - using DimType = typename std::iterator_traits::value_type; - - // Merge batch_size dimension across all inputs to evaluate output[0] dimension - DimType merged_batch_size = x_pshape.rank().is_static() ? x_pshape[0] : DimType(); - NODE_VALIDATION_CHECK( - op, - DimType::merge(merged_batch_size, merged_batch_size, ht_pshape.rank().is_static() ? ht_pshape[0] : DimType()) && - DimType::merge(merged_batch_size, - merged_batch_size, - sl_pshape.rank().is_static() ? sl_pshape[0] : DimType()), - "Dimension `batch_size` is not matched between inputs."); - - // Set batch_size dimension - y_out_shape[0] = merged_batch_size; - ho_out_shape[0] = merged_batch_size; - - // Merge hidden_size dimension across all inputs to evaluate output dimension - // `hidden_size` attribute is not used for backward compatibility - DimType merged_hidden_size = ht_pshape.rank().is_static() ? ht_pshape[2] : DimType(); - NODE_VALIDATION_CHECK(op, - DimType::merge(merged_hidden_size, - merged_hidden_size, - ht_pshape.rank().is_static() ? ht_pshape[2] : DimType()) && - DimType::merge(merged_hidden_size, - merged_hidden_size, - r_pshape.rank().is_static() ? r_pshape[2] : DimType()), - "Dimension `hidden_size` is not matched between inputs."); - - // Validate num_directions dimension across all inputs - size_t valid_num_directions; - const auto m_direction = op->get_direction(); - if (m_direction == op::RecurrentSequenceDirection::FORWARD || - m_direction == op::RecurrentSequenceDirection::REVERSE) { - valid_num_directions = 1; - } else if (m_direction == op::RecurrentSequenceDirection::BIDIRECTIONAL) { - valid_num_directions = 2; - } else { - NODE_VALIDATION_CHECK(op, false, "Attribute direction must be FORWARD or REVERSE or BIDIRECTIONAL."); - } - - DimType merged_num_directions = DimType(valid_num_directions); - NODE_VALIDATION_CHECK(op, - DimType::merge(merged_num_directions, - merged_num_directions, - ht_pshape.rank().is_static() ? ht_pshape[1] : DimType()) && - DimType::merge(merged_num_directions, - merged_num_directions, - w_pshape.rank().is_static() ? w_pshape[0] : DimType()) && - DimType::merge(merged_num_directions, - merged_num_directions, - r_pshape.rank().is_static() ? r_pshape[0] : DimType()) && - DimType::merge(merged_num_directions, - merged_num_directions, - b_pshape.rank().is_static() ? b_pshape[0] : DimType()), - "Dimension `num_directions` doesn't match to other inputs or `direction` attribute."); - - // Set num_directions dimension - y_out_shape[1] = merged_num_directions; - ho_out_shape[1] = merged_num_directions; - - // Set seq_len dimension - y_out_shape[2] = x_pshape.rank().is_static() ? x_pshape[1] : DimType(); - - // Validate dimensions related to hidden_size for W, R, B inputs - if (merged_hidden_size.is_static()) { - constexpr auto gru_seq_gates_count = 3; - if (w_pshape.rank().is_static()) { - NODE_VALIDATION_CHECK(op, - w_pshape[1].compatible(merged_hidden_size * gru_seq_gates_count), - "Second dimension of W input shape is required to be compatible with ", - merged_hidden_size * gru_seq_gates_count, - ". Got shape: ", - w_pshape[1], - "."); - } - - if (r_pshape.rank().is_static()) { - NODE_VALIDATION_CHECK(op, - r_pshape[1].compatible(merged_hidden_size * gru_seq_gates_count), - "Second dimension of R input shape is required to be compatible with ", - merged_hidden_size * gru_seq_gates_count, - ". Got shape: ", - r_pshape[1], - "."); - } - - if (b_pshape.rank().is_static()) { - auto bias_dim_multiplier = op->get_linear_before_reset() ? (gru_seq_gates_count + 1) : gru_seq_gates_count; - NODE_VALIDATION_CHECK(op, - b_pshape[1].compatible(merged_hidden_size * bias_dim_multiplier), - "Second dimension of B input shape is required to be compatible with ", - merged_hidden_size * bias_dim_multiplier, - ". Got shape: ", - b_pshape[1], - "."); - } - } - - // Set hidden_size dimension - y_out_shape[3] = merged_hidden_size; - ho_out_shape[2] = merged_hidden_size; -} -} // namespace rnn namespace v5 { -template +template void shape_infer(const ov::op::v5::GRUSequence* op, - const std::vector& input_shapes, - std::vector& output_shapes) { + const std::vector& input_shapes, + std::vector& output_shapes) { constexpr size_t expected_in_shapes_count = 6; NODE_VALIDATION_CHECK(op, input_shapes.size() == expected_in_shapes_count, @@ -170,7 +24,14 @@ void shape_infer(const ov::op::v5::GRUSequence* op, input_shapes.size(), "."); - rnn::gru_sequence_shape_infer(op, input_shapes, output_shapes); + constexpr auto num_gates = 3; + constexpr auto num_state_nodes = 1; + output_shapes = rnn::seq_base_shape_infer(op, + input_shapes, + num_gates, + num_state_nodes, + op->get_direction(), + op->get_linear_before_reset()); } } // namespace v5 } // namespace op diff --git a/src/core/shape_inference/include/lstm_cell_shape_inference.hpp b/src/core/shape_inference/include/lstm_cell_shape_inference.hpp index e39131d8fd89fa..0100f2d6329041 100644 --- a/src/core/shape_inference/include/lstm_cell_shape_inference.hpp +++ b/src/core/shape_inference/include/lstm_cell_shape_inference.hpp @@ -2,174 +2,26 @@ // SPDX-License-Identifier: Apache-2.0 // #pragma once -#include - +#include "openvino/op/lstm_cell.hpp" +#include "openvino/op/lstm_sequence.hpp" +#include "rnn_base_shape_inference.hpp" #include "utils.hpp" namespace ov { namespace op { -namespace ShapeInferLSTM { -template -void lstm_shape_infer(const OpsType* op, - const std::vector& input_shapes, - std::vector& output_shapes, - std::size_t gates_count) { - using DimType = typename std::iterator_traits::value_type; - enum { X, initial_hidden_state, initial_cell_state, W, R, B }; - std::vector input_rank_static(6, false); - bool all_rank_dynamic = true; - bool all_rank_static = true; - // Prepare OutShape - auto& hidden_shape = output_shapes[0]; - auto& cell_shape = output_shapes[1]; - hidden_shape.resize(2); - cell_shape.resize(2); - - // If rank is dynamic, then output_shape is undefined - for (size_t i = 0; i < input_shapes.size() && i < 6; i++) { - input_rank_static[i] = input_shapes[i].rank().is_static(); - all_rank_dynamic = all_rank_dynamic && !input_rank_static[i]; - all_rank_static = all_rank_static && input_rank_static[i]; - } - - if (all_rank_dynamic) { - return; - } - const auto& x_pshape = input_shapes[0]; - const auto& w_pshape = input_shapes[3]; - - DimType output_batch_size; - DimType output_hidden_size; - bool is_batch_init = false; - bool is_hidden_init = false; - - // deduce batch/hidden_size - for (size_t i = 0; i < input_shapes.size() && i < 6; i++) { - const auto& input = input_shapes[i]; - if (input_rank_static[i]) { - // batch could be deduced from x, cell_state or hidden_state - if (i == X || i == initial_cell_state || i == initial_hidden_state) { - NODE_VALIDATION_CHECK(op, - (input.size() == 2), - "LSTMCell input rank is not correct for ", - i, - " input parameter. Current rank: ", - input.size(), - ", expected: 2."); - if (!is_batch_init) { - output_batch_size = input[0]; - is_batch_init = true; - } else { - NODE_VALIDATION_CHECK( - op, - DimType::merge(output_batch_size, output_batch_size, input[0]), - "Parameter batch_size not matched for X, initial_hidden_state or initial_cell_state " - "inputs."); - } - if (i == initial_cell_state || i == initial_hidden_state) { - if (!is_hidden_init) { - output_hidden_size = input[1]; - is_hidden_init = true; - } else { - NODE_VALIDATION_CHECK(op, - DimType::merge(output_hidden_size, output_hidden_size, input[1]), - "Parameter hidden_size not matched for W, R, B, initial_hidden_state and " - "initial_cell_state " - "inputs."); - } - } - } else if (i == W || i == R || i == B) { - // check input dimension - if (i == B) { - NODE_VALIDATION_CHECK(op, - (input.size() == 1), - "LSTMCell input tensor dimension is not correct for ", - i, - " input parameter. Current input length: ", - input.size(), - ", expected: 1."); - if (input[0].is_static()) { - if (!is_hidden_init) { - output_hidden_size = input[0].get_length() / gates_count; - is_hidden_init = true; - } else { - NODE_VALIDATION_CHECK( - op, - DimType::merge(output_hidden_size, - output_hidden_size, - input[0].get_length() / gates_count), - "Parameter hidden_size not matched for W, R, B, initial_hidden_state and " - "initial_cell_state " - "inputs."); - } - } - } else { - NODE_VALIDATION_CHECK(op, - (input.size() == 2), - "LSTMCell input rank is not correct for ", - i, - " input parameter. Current rank: ", - input.size(), - ", expected: 2."); - if (input[0].is_static()) { - if (!is_hidden_init) { - output_hidden_size = input[0].get_length() / gates_count; - is_hidden_init = true; - } else { - NODE_VALIDATION_CHECK( - op, - DimType::merge(output_hidden_size, - output_hidden_size, - input[0].get_length() / gates_count), - "Parameter hidden_size not matched for W, R, B, initial_hidden_state and " - "initial_cell_state " - "inputs."); - } - } - if (i == R) { - if (!is_hidden_init) { - output_hidden_size = input[1]; - is_hidden_init = true; - } else { - NODE_VALIDATION_CHECK(op, - DimType::merge(output_hidden_size, output_hidden_size, input[1]), - "Parameter hidden_size not matched for W, R, B, initial_hidden_state " - "and initial_cell_state " - "inputs."); - } - } - } - } - } - } - // Check peepholes - if (input_shapes.size() == 7) { - const auto& p_pshape = input_shapes[6]; - NODE_VALIDATION_CHECK(op, (p_pshape.rank().compatible(1)), "LSTMCell input tensor P shall have dimension 1D."); - } - - // check input size - if (input_rank_static[X] && input_rank_static[W]) { - NODE_VALIDATION_CHECK(op, (x_pshape[1].compatible(w_pshape[1])), "LSTMCell mismatched input_size dimension."); - } - - hidden_shape[0] = output_batch_size; - hidden_shape[1] = output_hidden_size; - cell_shape[0] = output_batch_size; - cell_shape[1] = output_hidden_size; -} - -} // namespace ShapeInferLSTM namespace v0 { -using ShapeInferLSTM::lstm_shape_infer; template void shape_infer(const LSTMCell* op, const std::vector& input_shapes, std::vector& output_shapes) { NODE_VALIDATION_CHECK(op, input_shapes.size() == 7 && output_shapes.size() == 2); - const auto& p_pshape = input_shapes[6]; - - lstm_shape_infer(op, input_shapes, output_shapes, op->s_gates_count); + constexpr auto num_state_nodes = 2; + output_shapes = rnn::cell_base_shape_infer(op, input_shapes, op->s_gates_count, num_state_nodes); const auto& hidden_size = output_shapes[0][1]; + if (hidden_size.is_dynamic()) { // set hidden_size based on attribute + output_shapes[0][1] = op->get_hidden_size(); + output_shapes[1][1] = op->get_hidden_size(); + } + const auto& p_pshape = input_shapes[6]; if (p_pshape[0].is_static() && hidden_size.is_static()) { NODE_VALIDATION_CHECK(op, p_pshape[0].compatible(hidden_size * op->s_peepholes_count), @@ -183,11 +35,22 @@ void shape_infer(const LSTMCell* op, const std::vector& input_shapes, std::ve } // namespace v0 namespace v4 { -using ShapeInferLSTM::lstm_shape_infer; +template +std::vector shape_infer(const LSTMCell* op, const std::vector& input_shapes) { + NODE_VALIDATION_CHECK(op, input_shapes.size() == 6); + constexpr auto num_gates = 4; + constexpr auto num_state_nodes = 2; + auto output_shapes = rnn::cell_base_shape_infer(op, input_shapes, num_gates, num_state_nodes); + if (output_shapes[0][1].is_dynamic()) { // set hidden_size based on attribute + output_shapes[0][1] = op->get_hidden_size(); + output_shapes[1][1] = op->get_hidden_size(); + } + return output_shapes; +} + template void shape_infer(const LSTMCell* op, const std::vector& input_shapes, std::vector& output_shapes) { - NODE_VALIDATION_CHECK(op, input_shapes.size() == 6 && output_shapes.size() == 2); - lstm_shape_infer(op, input_shapes, output_shapes, op->s_gates_count); + output_shapes = shape_infer(op, input_shapes); } } // namespace v4 } // namespace op diff --git a/src/core/shape_inference/include/lstm_sequence_shape_inference.hpp b/src/core/shape_inference/include/lstm_sequence_shape_inference.hpp new file mode 100644 index 00000000000000..375b7b3bf22411 --- /dev/null +++ b/src/core/shape_inference/include/lstm_sequence_shape_inference.hpp @@ -0,0 +1,43 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#pragma once +#include "openvino/op/lstm_sequence.hpp" +#include "rnn_base_shape_inference.hpp" + +namespace ov { +namespace op { +namespace v0 { +template +std::vector shape_infer(const LSTMSequence* op, const std::vector& input_shapes) { + constexpr auto num_gates = 4; + constexpr auto num_state_nodes = 2; + const auto output_shapes = + rnn::seq_base_shape_infer(op, input_shapes, num_gates, num_state_nodes, op->get_direction()); + // Validate rank and dimension for P input (the input doesn't exists in the next version of LSTM or other RNN based + // ops) The checks are compatible with the original restrictions of the v0::LSTMSequence + const auto& hidden_size = output_shapes[0][3]; + if (input_shapes.size() > 7 && input_shapes[7].is_static() && hidden_size.is_static()) { + const auto& p_pshape = input_shapes[7]; + NODE_VALIDATION_CHECK(op, p_pshape.rank().compatible(2), "Input tensor P should have rank equal 2."); + NODE_VALIDATION_CHECK(op, + p_pshape[1].compatible(hidden_size * (num_gates - 1)), + "Inorrect shape of P input. Second dimension is: ", + p_pshape[1], + ", expected: ", + hidden_size.get_length() * (num_gates - 1), + "."); + } + return output_shapes; +} +} // namespace v0 +namespace v5 { +template +std::vector shape_infer(const LSTMSequence* op, const std::vector& input_shapes) { + constexpr auto num_gates = 4; + constexpr auto num_state_nodes = 2; + return rnn::seq_base_shape_infer(op, input_shapes, num_gates, num_state_nodes, op->get_direction()); +} +} // namespace v5 +} // namespace op +} // namespace ov diff --git a/src/core/shape_inference/include/rnn_base_shape_inference.hpp b/src/core/shape_inference/include/rnn_base_shape_inference.hpp new file mode 100644 index 00000000000000..fcf6af6fee13d0 --- /dev/null +++ b/src/core/shape_inference/include/rnn_base_shape_inference.hpp @@ -0,0 +1,280 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#pragma once +#include "openvino/core/validation_util.hpp" +#include "openvino/op/util/rnn_cell_base.hpp" +#include "utils.hpp" + +namespace ov { +namespace op { +namespace rnn { +template +void validate_inputs_rank(const op::util::RNNCellBase* op, + const std::vector& input_shapes, + const std::vector& expected_ranks) { + NODE_VALIDATION_CHECK(op, input_shapes.size() >= expected_ranks.size(), "Can't validate inputs rank."); + for (size_t i = 0; i < expected_ranks.size(); ++i) { + NODE_VALIDATION_CHECK(op, + input_shapes[i].rank().compatible(expected_ranks[i]), + "Shape rank of input at ", + i, + " is incompatible. Expected rank: ", + expected_ranks[i], + ", actual shape: ", + input_shapes[i], + "."); + } +} + +// Output shape layout: +// output_shapes[0...num_state_nodes]: [batch_size, hidden_size] // Rank always 2 +template +std::vector cell_base_shape_infer(const op::util::RNNCellBase* op, + const std::vector& input_shapes, + size_t num_gates, + size_t num_state_nodes, + bool linear_before_reset = false) { + const auto num_inputs = 4 + num_state_nodes; + NODE_VALIDATION_CHECK(op, input_shapes.size() >= num_inputs, "Incorrect number of shapes has been provided."); + + std::vector output_shapes; + output_shapes.reserve(num_state_nodes); + + std::vector expected_in_ranks; + expected_in_ranks.reserve(num_inputs); + expected_in_ranks.insert(expected_in_ranks.end(), 1 + num_state_nodes, Rank(2)); + expected_in_ranks.insert(expected_in_ranks.end(), {2, 2, 1}); + + rnn::validate_inputs_rank(op, input_shapes, expected_in_ranks); + + const auto& x_pshape = input_shapes[0]; // [batch_size, input_size] + const auto& ht_pshape = input_shapes[1]; // [batch_size, hidden_size] + const auto& w_pshape = input_shapes[1 + num_state_nodes]; // [3 * hidden_size, input_size] + const auto& r_pshape = input_shapes[2 + num_state_nodes]; // [3 * hidden_size, hidden_size] + const auto& b_pshape = + input_shapes[3 + num_state_nodes]; // if linear_before_reset [4 * hidden_size], otherwise [3 * hidden_size] + + using DimType = typename TShape::value_type; + + // Merge batch_size dimension across all inputs to evaluate output[0] dimension + DimType merged_batch_size = x_pshape.rank().is_static() ? x_pshape[0] : DimType(); + for (size_t i = 1; i <= num_state_nodes; ++i) { + NODE_VALIDATION_CHECK(op, + DimType::merge(merged_batch_size, + merged_batch_size, + input_shapes[i].rank().is_static() ? input_shapes[i][0] : DimType()), + "Dimension `batch_size` is not matched between inputs."); + } + + // Merge hidden_size dimension across all inputs to evaluate output dimension + // `hidden_size` attribute is not used for backward compatibility + DimType merged_hidden_size = ht_pshape.rank().is_static() ? ht_pshape[1] : DimType(); + for (size_t i = 2; i <= num_state_nodes; ++i) { + if (input_shapes[i].rank().is_static()) { + NODE_VALIDATION_CHECK(op, + DimType::merge(merged_hidden_size, merged_hidden_size, input_shapes[i][1]), + "Dimension `hidden_size` is not matched between inputs."); + } + } + + if (r_pshape.rank().is_static()) { + NODE_VALIDATION_CHECK(op, + DimType::merge(merged_hidden_size, merged_hidden_size, r_pshape[1]), + "Dimension `hidden_size` is not matched between inputs."); + } + + // Validate dimensions related to hidden_size for W, R, B inputs + if (merged_hidden_size.is_static()) { + if (w_pshape.rank().is_static()) { + NODE_VALIDATION_CHECK(op, + w_pshape[0].compatible(merged_hidden_size * num_gates), + "First dimension of W input shape is required to be compatible with ", + merged_hidden_size * num_gates, + ". Got shape: ", + w_pshape[0], + "."); + } + + if (r_pshape.rank().is_static()) { + NODE_VALIDATION_CHECK(op, + r_pshape[0].compatible(merged_hidden_size * num_gates), + "Fisrt dimension of R input shape is required to be compatible with ", + merged_hidden_size * num_gates, + ". Got shape: ", + r_pshape[0], + "."); + } + + if (b_pshape.rank().is_static()) { + auto bias_dim_multiplier = linear_before_reset ? (num_gates + 1) : num_gates; + NODE_VALIDATION_CHECK(op, + b_pshape[0].compatible(merged_hidden_size * bias_dim_multiplier), + "First dimension of B input shape is required to be compatible with ", + merged_hidden_size * bias_dim_multiplier, + ". Got shape: ", + b_pshape[0], + "."); + } + } else { + const size_t w_idx = 1 + num_state_nodes; + for (size_t i = w_idx; i < w_idx + 2; ++i) { + if (input_shapes[i].rank().is_static() && input_shapes[i][0].is_static()) { + NODE_VALIDATION_CHECK( + op, + DimType::merge(merged_hidden_size, merged_hidden_size, input_shapes[i][0] / num_gates), + "Dimension `hidden_size` is not matched between inputs."); + } + } + } + + return {num_state_nodes, TShape{merged_batch_size, merged_hidden_size}}; +} + +// Output shapes layout: +// output_shapes[0]: [batch_size, num_directions, seq_length, hidden_size] // Rank always 4 +// output_shapes[1... num_state_nodes]: [batch_size, num_directions, hidden_size] // Rank always 3 +template +std::vector seq_base_shape_infer(const op::util::RNNCellBase* op, + const std::vector& input_shapes, + size_t num_gates, + size_t num_state_nodes, + op::RecurrentSequenceDirection direction, + bool linear_before_reset = false) { + const auto num_inputs = 5 + num_state_nodes; + NODE_VALIDATION_CHECK(op, input_shapes.size() >= num_inputs, "Incorrect number of shapes has been provided."); + + std::vector output_shapes; + output_shapes.reserve(1 + num_state_nodes); + + std::vector expected_in_ranks; + expected_in_ranks.reserve(num_inputs); + expected_in_ranks.insert(expected_in_ranks.end(), 1 + num_state_nodes, Rank(3)); + expected_in_ranks.insert(expected_in_ranks.end(), {1, 3, 3, 2}); + + rnn::validate_inputs_rank(op, input_shapes, expected_in_ranks); + + const auto& x_pshape = input_shapes[0]; + const auto& ht_pshape = input_shapes[1]; + + const auto& w_pshape = input_shapes[2 + num_state_nodes]; + const auto& r_pshape = input_shapes[3 + num_state_nodes]; + const auto& b_pshape = input_shapes[4 + num_state_nodes]; + + using DimType = typename TShape::value_type; + + // Merge batch_size dimension across all inputs to evaluate output[0] dimension + DimType merged_batch_size = x_pshape.rank().is_static() ? x_pshape[0] : DimType(); + for (size_t i = 1; i <= 1 + num_state_nodes; ++i) { + if (input_shapes[i].rank().is_static()) { + NODE_VALIDATION_CHECK(op, + DimType::merge(merged_batch_size, merged_batch_size, input_shapes[i][0]), + "Dimension `batch_size` is not matched between inputs."); + } + } + + // Merge hidden_size dimension across all inputs to evaluate output dimension + // `hidden_size` attribute is not used for backward compatibility + DimType merged_hidden_size = ht_pshape.rank().is_static() ? ht_pshape[2] : DimType(); + for (size_t i = 2; i <= num_state_nodes; ++i) { + if (input_shapes[i].rank().is_static()) { + NODE_VALIDATION_CHECK(op, + DimType::merge(merged_hidden_size, merged_hidden_size, input_shapes[i][2]), + "Dimension `hidden_size` is not matched between inputs."); + } + } + + if (r_pshape.rank().is_static()) { + NODE_VALIDATION_CHECK(op, + DimType::merge(merged_hidden_size, merged_hidden_size, r_pshape[2]), + "Dimension `hidden_size` is not matched between inputs."); + } + + // Validate num_directions dimension across all inputs + size_t valid_num_directions; + const auto m_direction = direction; + if (m_direction == op::RecurrentSequenceDirection::FORWARD || + m_direction == op::RecurrentSequenceDirection::REVERSE) { + valid_num_directions = 1; + } else if (m_direction == op::RecurrentSequenceDirection::BIDIRECTIONAL) { + valid_num_directions = 2; + } else { + NODE_VALIDATION_CHECK(op, false, "Attribute direction must be FORWARD or REVERSE or BIDIRECTIONAL."); + } + + bool is_num_dir_valid = true; + DimType merged_num_directions = DimType(valid_num_directions); + for (size_t i = 1; i <= num_state_nodes; ++i) { + is_num_dir_valid &= DimType::merge(merged_num_directions, + merged_num_directions, + input_shapes[i].rank().is_static() ? input_shapes[i][1] : DimType()); + } + + for (size_t i = 2 + num_state_nodes; i < num_inputs; ++i) { + is_num_dir_valid &= DimType::merge(merged_num_directions, + merged_num_directions, + input_shapes[i].rank().is_static() ? input_shapes[i][0] : DimType()); + } + + NODE_VALIDATION_CHECK(op, + is_num_dir_valid, + "Dimension `num_directions` doesn't match to other inputs or `direction` attribute."); + + // Validate dimensions related to hidden_size for W, R, B inputs + if (merged_hidden_size.is_static()) { + if (w_pshape.rank().is_static()) { + NODE_VALIDATION_CHECK(op, + w_pshape[1].compatible(merged_hidden_size * num_gates), + "Second dimension of W input shape is required to be compatible with ", + merged_hidden_size * num_gates, + ". Got shape: ", + w_pshape[1], + "."); + } + + if (r_pshape.rank().is_static()) { + NODE_VALIDATION_CHECK(op, + r_pshape[1].compatible(merged_hidden_size * num_gates), + "Second dimension of R input shape is required to be compatible with ", + merged_hidden_size * num_gates, + ". Got shape: ", + r_pshape[1], + "."); + } + + if (b_pshape.rank().is_static()) { + const auto bias_dim_multiplier = linear_before_reset ? (num_gates + 1) : num_gates; + NODE_VALIDATION_CHECK(op, + b_pshape[1].compatible(merged_hidden_size * bias_dim_multiplier), + "Second dimension of B input shape is required to be compatible with ", + merged_hidden_size * bias_dim_multiplier, + ". Got shape: ", + b_pshape[1], + "."); + } + } else { + const size_t w_idx = 2 + num_state_nodes; + for (size_t i = w_idx; i < w_idx + 2; ++i) { + if (input_shapes[i].rank().is_static() && input_shapes[i][0].is_static()) { + NODE_VALIDATION_CHECK( + op, + DimType::merge(merged_hidden_size, merged_hidden_size, input_shapes[i][1] / num_gates), + "Dimension `hidden_size` is not matched between inputs."); + } + } + } + + // Y output + output_shapes.push_back(TShape{merged_batch_size, + merged_num_directions, + x_pshape.rank().is_static() ? x_pshape[1] : DimType(), + merged_hidden_size}); + // Ho, Co outputs + output_shapes.insert(output_shapes.end(), + num_state_nodes, + TShape{merged_batch_size, merged_num_directions, merged_hidden_size}); + return output_shapes; +} +} // namespace rnn +} // namespace op +} // namespace ov diff --git a/src/core/shape_inference/include/rnn_cell_shape_inference.hpp b/src/core/shape_inference/include/rnn_cell_shape_inference.hpp new file mode 100644 index 00000000000000..5476718a3df02a --- /dev/null +++ b/src/core/shape_inference/include/rnn_cell_shape_inference.hpp @@ -0,0 +1,23 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#pragma once +#include "openvino/op/rnn_cell.hpp" +#include "rnn_base_shape_inference.hpp" + +namespace ov { +namespace op { +namespace v0 { +template +std::vector shape_infer(const RNNCell* op, const std::vector& input_shapes) { + constexpr auto num_gates = 1; + constexpr auto num_state_nodes = 1; + return rnn::cell_base_shape_infer(op, input_shapes, num_gates, num_state_nodes); +} +template +void shape_infer(const RNNCell* op, const std::vector& input_shapes, std::vector& output_shapes) { + output_shapes = shape_infer(op, input_shapes); +} +} // namespace v0 +} // namespace op +} // namespace ov diff --git a/src/core/shape_inference/include/rnn_sequence_shape_inference.hpp b/src/core/shape_inference/include/rnn_sequence_shape_inference.hpp new file mode 100644 index 00000000000000..c69dddd0e5567e --- /dev/null +++ b/src/core/shape_inference/include/rnn_sequence_shape_inference.hpp @@ -0,0 +1,19 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#pragma once +#include "openvino/op/rnn_sequence.hpp" +#include "rnn_base_shape_inference.hpp" + +namespace ov { +namespace op { +namespace v5 { +template +std::vector shape_infer(const RNNSequence* op, const std::vector& input_shapes) { + constexpr auto num_gates = 1; + constexpr auto num_state_nodes = 1; + return rnn::seq_base_shape_infer(op, input_shapes, num_gates, num_state_nodes, op->get_direction()); +} +} // namespace v5 +} // namespace op +} // namespace ov diff --git a/src/core/src/op/lstm_cell.cpp b/src/core/src/op/lstm_cell.cpp index eb5ec3a96caf10..1a6c46d7ad6ccc 100644 --- a/src/core/src/op/lstm_cell.cpp +++ b/src/core/src/op/lstm_cell.cpp @@ -6,9 +6,9 @@ #include #include -#include #include "itt.hpp" +#include "lstm_cell_shape_inference.hpp" #include "ngraph/attribute_visitor.hpp" #include "ngraph/op/concat.hpp" #include "ngraph/op/constant.hpp" diff --git a/src/core/src/op/lstm_sequence.cpp b/src/core/src/op/lstm_sequence.cpp index a23263d60cb895..169f10d07f9ca3 100644 --- a/src/core/src/op/lstm_sequence.cpp +++ b/src/core/src/op/lstm_sequence.cpp @@ -5,6 +5,7 @@ #include "ngraph/op/lstm_sequence.hpp" #include "itt.hpp" +#include "lstm_sequence_shape_inference.hpp" #include "ngraph/attribute_visitor.hpp" #include "ngraph/builder/autobroadcast.hpp" #include "ngraph/builder/reshape.hpp" @@ -16,17 +17,6 @@ using namespace ngraph; using namespace std; -op::v0::LSTMSequence::LSTMSequence() - : Op(), - m_activations_alpha(), - m_activations_beta(), - m_activations(), - m_clip_threshold(), - m_direction(), - m_hidden_size(), - m_input_forget(), - m_weights_format() {} - op::v0::LSTMSequence::LSTMSequence(const Output& X, const Output& initial_hidden_state, const Output& initial_cell_state, @@ -43,13 +33,13 @@ op::v0::LSTMSequence::LSTMSequence(const Output& X, const std::vector activations, const float clip_threshold, const bool input_forget) - : Op({X, initial_hidden_state, initial_cell_state, sequence_lengths, W, R, B, P}), - m_activations_alpha(activations_alpha), - m_activations_beta(activations_beta), - m_activations(activations), - m_clip_threshold(clip_threshold), + : RNNCellBase({X, initial_hidden_state, initial_cell_state, sequence_lengths, W, R, B, P}, + hidden_size, + clip_threshold, + activations, + activations_alpha, + activations_beta), m_direction(lstm_direction), - m_hidden_size(hidden_size), m_input_forget(input_forget), m_weights_format(weights_format) { constructor_validate_and_infer_types(); @@ -97,7 +87,7 @@ bool op::v0::LSTMSequence::visit_attributes(AttributeVisitor& visitor) { visitor.on_attribute("activations", m_activations); visitor.on_attribute("activations_alpha", m_activations_alpha); visitor.on_attribute("activations_beta", m_activations_beta); - visitor.on_attribute("clip", m_clip_threshold); + visitor.on_attribute("clip", m_clip); visitor.on_attribute("direction", m_direction); visitor.on_attribute("input_forget", m_input_forget); @@ -123,7 +113,7 @@ shared_ptr op::v0::LSTMSequence::clone_with_new_inputs(const OutputVector& m_activations_alpha, m_activations_beta, m_activations, - m_clip_threshold, + m_clip, m_input_forget); } else if (new_args.size() == 7) { return make_shared(new_args.at(0), // X @@ -139,7 +129,7 @@ shared_ptr op::v0::LSTMSequence::clone_with_new_inputs(const OutputVector& m_activations_alpha, m_activations_beta, m_activations, - m_clip_threshold, + m_clip, m_input_forget); } else { OPENVINO_THROW("Incorrect number of new arguments"); @@ -148,62 +138,8 @@ shared_ptr op::v0::LSTMSequence::clone_with_new_inputs(const OutputVector& void op::v0::LSTMSequence::validate_and_infer_types() { OV_OP_SCOPE(v0_LSTMSequence_validate_and_infer_types); - for (const auto& input : inputs()) { - if (input.get_partial_shape().rank().is_dynamic()) { - set_output_type(0, get_input_element_type(0), ov::PartialShape::dynamic()); - set_output_type(1, get_input_element_type(0), ov::PartialShape::dynamic()); - set_output_type(2, get_input_element_type(0), ov::PartialShape::dynamic()); - return; - } - } - std::vector input_param{}; - - auto lstm_seq_gates_count = 4; - auto lstm_seq_peepholes_count = 3; - auto merged_batch_size = Dimension::dynamic(); - auto merged_hidden_size = Dimension::dynamic(); - auto merged_num_directions = Dimension::dynamic(); auto result_et = element::dynamic; - NODE_VALIDATION_CHECK(this, get_input_size() > 0, "The number of inputs of the LSTMSequence op cannot be zero."); - // Copy all inputs without peephole and initial_cell_state information for further validation - for (size_t i = 0; i < get_input_size() - 1; i++) { - // exclude initial_cell_state from the loop - if (i != 2) { - input_param.push_back(get_input_partial_shape(i)); - } - } - - // Get input partial shape for all inputs - const auto& x_pshape = get_input_partial_shape(0); - const auto& ht_pshape = get_input_partial_shape(1); - const auto& ct_pshape = get_input_partial_shape(2); - const auto& sl_pshape = get_input_partial_shape(3); - const auto& w_pshape = get_input_partial_shape(4); - const auto& r_pshape = get_input_partial_shape(5); - const auto& b_pshape = get_input_partial_shape(6); - const auto& p_pshape = get_input_partial_shape(7); - - OPENVINO_SUPPRESS_DEPRECATED_START - ngraph::op::util::validate_seq_input_rank_dimension(input_param); - OPENVINO_SUPPRESS_DEPRECATED_END - - // Validate rank and dimension for initial_cell_state input - NODE_VALIDATION_CHECK(this, - (ct_pshape.rank().is_static()), - "LSTMSequence input tensor initial_cell_state shall have static rank."); - - NODE_VALIDATION_CHECK(this, - (ct_pshape.rank().get_length() == 3), - "LSTMSequence input tensor initial_cell_state shall have dimension 3D."); - - // Validate rank and dimension for P input - NODE_VALIDATION_CHECK(this, (p_pshape.rank().is_static()), "LSTMSequence input tensor P shall have static rank."); - - NODE_VALIDATION_CHECK(this, - (p_pshape.rank().get_length() == 2), - "LSTMSequence input tensor P shall have dimension 2D."); - // Validate input types and save result for output type NODE_VALIDATION_CHECK(this, element::Type::merge(result_et, result_et, get_input_element_type(0)) && @@ -213,110 +149,21 @@ void op::v0::LSTMSequence::validate_and_infer_types() { element::Type::merge(result_et, result_et, get_input_element_type(5)) && element::Type::merge(result_et, result_et, get_input_element_type(6)), "Element types for X, initial_hidden_state, initial_cell_state, W, R and B inputs do " - "not " - "match."); - - // Merge batch_size dimension across all inputs to evaluate output[0] dimension - NODE_VALIDATION_CHECK(this, - Dimension::merge(merged_batch_size, merged_batch_size, ht_pshape[0]) && - Dimension::merge(merged_batch_size, merged_batch_size, ct_pshape[0]) && - Dimension::merge(merged_batch_size, merged_batch_size, x_pshape[0]) && - Dimension::merge(merged_batch_size, merged_batch_size, sl_pshape[0]), - "Parameter batch_size not matched in LSTMSequence."); - - // Merge hidden_size dimension across all inputs to evaluate output dimension - NODE_VALIDATION_CHECK(this, - Dimension::merge(merged_hidden_size, merged_hidden_size, ht_pshape[2]) && - Dimension::merge(merged_hidden_size, merged_hidden_size, ct_pshape[2]) && - Dimension::merge(merged_hidden_size, merged_hidden_size, r_pshape[2]), - "Parameter hidden_size not matched LSTMSequence."); - - // Merge num_directions dimension across all inputs to evaluate output dimension - NODE_VALIDATION_CHECK(this, - Dimension::merge(merged_num_directions, merged_num_directions, ht_pshape[1]) && - Dimension::merge(merged_num_directions, merged_num_directions, ct_pshape[1]) && - Dimension::merge(merged_num_directions, merged_num_directions, w_pshape[0]) && - Dimension::merge(merged_num_directions, merged_num_directions, r_pshape[0]) && - Dimension::merge(merged_num_directions, merged_num_directions, b_pshape[0]), - "Parameter num_directions not matched in LSTMSequence."); - - auto valid_num_directions = 0; - if (m_direction == op::RecurrentSequenceDirection::FORWARD || - m_direction == op::RecurrentSequenceDirection::REVERSE) { - valid_num_directions = 1; - } else if (m_direction == op::RecurrentSequenceDirection::BIDIRECTIONAL) { - valid_num_directions = 2; - } else { - // Guard for potential future extension of RecurrentSequenceDirection enum - NODE_VALIDATION_CHECK(this, false, "Parameter direction must be FORWARD or REVERSE or BIDIRECTIONAL."); - } - - NODE_VALIDATION_CHECK(this, - Dimension::merge(merged_num_directions, merged_num_directions, valid_num_directions), - "Parameter 'num_directions' doesn't match with direction '", - m_direction, - "' in LSTMSequence. Expected ", - valid_num_directions, - ", actual ", - merged_num_directions); - - // Validate hidden_size value for W, R, B and P inputs - if (merged_hidden_size.is_static()) { - if (w_pshape[1].is_static()) { - NODE_VALIDATION_CHECK(this, - w_pshape[1].compatible(merged_hidden_size * lstm_seq_gates_count), - "Parameter hidden_size mistmatched in P input. Current value is: ", - w_pshape[1].get_length(), - ", expected: ", - merged_hidden_size.get_length() * lstm_seq_gates_count, - "."); - } - - if (r_pshape[1].is_static()) { - NODE_VALIDATION_CHECK(this, - r_pshape[1].compatible(merged_hidden_size * lstm_seq_gates_count), - "Parameter hidden_size mistmatched in R input. Current value is: ", - r_pshape[1].get_length(), - ", expected: ", - merged_hidden_size.get_length() * lstm_seq_gates_count, - "."); - } - - if (b_pshape[1].is_static()) { - NODE_VALIDATION_CHECK(this, - b_pshape[1].compatible(merged_hidden_size * lstm_seq_gates_count), - "Parameter hidden_size mistmatched in B input. Current value is: ", - b_pshape[1].get_length(), - ", expected: ", - merged_hidden_size.get_length() * lstm_seq_gates_count, - "."); - } - - if (p_pshape[1].is_static()) { - NODE_VALIDATION_CHECK(this, - p_pshape[1].compatible(merged_hidden_size * lstm_seq_peepholes_count), - "Parameter hidden_size mistmatched in P input. Current value is: ", - p_pshape[1].get_length(), - ", expected: ", - merged_hidden_size.get_length() * lstm_seq_peepholes_count, - "."); - } - } + "not match."); // Mark inputs which are relevant to output parameters - set_input_is_relevant_to_shape(0); - set_input_is_relevant_to_shape(1); - set_input_is_relevant_to_shape(2); - set_input_is_relevant_to_shape(3); - set_input_is_relevant_to_shape(4); - set_input_is_relevant_to_shape(5); - set_input_is_relevant_to_shape(6); + for (size_t i = 0; i <= 6; ++i) + set_input_is_relevant_to_shape(i); + + OPENVINO_SUPPRESS_DEPRECATED_START + const auto input_shapes = get_node_input_partial_shapes(*this); + OPENVINO_SUPPRESS_DEPRECATED_END + auto output_shapes = shape_infer(this, input_shapes); // Set output size, type and shape - set_output_size(3); - set_output_type(0, result_et, {merged_batch_size, merged_num_directions, x_pshape[1], merged_hidden_size}); - set_output_type(1, result_et, {merged_batch_size, merged_num_directions, merged_hidden_size}); - set_output_type(2, result_et, {merged_batch_size, merged_num_directions, merged_hidden_size}); + set_output_type(0, result_et, output_shapes[0]); + set_output_type(1, result_et, output_shapes[1]); + set_output_type(2, result_et, output_shapes[2]); } bool ngraph::op::v5::LSTMSequence::visit_attributes(AttributeVisitor& visitor) { @@ -349,48 +196,9 @@ shared_ptr op::v5::LSTMSequence::clone_with_new_inputs(const OutputVector& void op::v5::LSTMSequence::validate_and_infer_types() { OV_OP_SCOPE(v5_LSTMSequence_validate_and_infer_types); - for (const auto& input : inputs()) { - if (input.get_partial_shape().rank().is_dynamic()) { - set_output_type(0, get_input_element_type(0), ov::PartialShape::dynamic()); - set_output_type(1, get_input_element_type(0), ov::PartialShape::dynamic()); - set_output_type(2, get_input_element_type(0), ov::PartialShape::dynamic()); - return; - } - } - std::vector input_param{}; - auto lstm_seq_gates_count = 4; - auto merged_batch_size = Dimension::dynamic(); - auto merged_hidden_size = Dimension::dynamic(); - auto merged_num_directions = Dimension::dynamic(); auto result_et = element::dynamic; - // Copy all inputs without initial_cell_state information for further validation - for (size_t i = 0; i < get_input_size(); i++) { - // exclude initial_cell_state from the loop - if (i != 2) { - input_param.push_back(get_input_partial_shape(i)); - } - } - - // Get input partial shape for all inputs - const auto& x_pshape = get_input_partial_shape(0); - const auto& ht_pshape = get_input_partial_shape(1); - const auto& ct_pshape = get_input_partial_shape(2); - const auto& sl_pshape = get_input_partial_shape(3); - const auto& w_pshape = get_input_partial_shape(4); - const auto& r_pshape = get_input_partial_shape(5); - const auto& b_pshape = get_input_partial_shape(6); - - OPENVINO_SUPPRESS_DEPRECATED_START - ngraph::op::util::validate_seq_input_rank_dimension(input_param); - OPENVINO_SUPPRESS_DEPRECATED_END - - // Validate rank and dimension for initial_cell_state input - NODE_VALIDATION_CHECK(this, - (ct_pshape.rank().get_length() == 3), - "LSTMSequence input tensor initial_cell_state shall have dimension 3D."); - // Validate input types and save result for output type NODE_VALIDATION_CHECK(this, element::Type::merge(result_et, result_et, get_input_element_type(0)) && @@ -400,93 +208,19 @@ void op::v5::LSTMSequence::validate_and_infer_types() { element::Type::merge(result_et, result_et, get_input_element_type(5)) && element::Type::merge(result_et, result_et, get_input_element_type(6)), "Element types for X, initial_hidden_state, initial_cell_state, W, R and B inputs do " - "not " - "match."); - - // Merge batch_size dimension across all inputs to evaluate output[0] dimension - NODE_VALIDATION_CHECK(this, - Dimension::merge(merged_batch_size, merged_batch_size, ht_pshape[0]) && - Dimension::merge(merged_batch_size, merged_batch_size, ct_pshape[0]) && - Dimension::merge(merged_batch_size, merged_batch_size, x_pshape[0]) && - Dimension::merge(merged_batch_size, merged_batch_size, sl_pshape[0]), - "Parameter batch_size not matched in LSTMSequence."); - - // Merge hidden_size dimension across all inputs to evaluate output dimension - NODE_VALIDATION_CHECK(this, - Dimension::merge(merged_hidden_size, merged_hidden_size, ht_pshape[2]) && - Dimension::merge(merged_hidden_size, merged_hidden_size, ct_pshape[2]) && - Dimension::merge(merged_hidden_size, merged_hidden_size, r_pshape[2]), - "Parameter hidden_size not matched LSTMSequence."); - - // Merge num_directions dimension across all inputs to evaluate output dimension - NODE_VALIDATION_CHECK(this, - Dimension::merge(merged_num_directions, merged_num_directions, ht_pshape[1]) && - Dimension::merge(merged_num_directions, merged_num_directions, ct_pshape[1]) && - Dimension::merge(merged_num_directions, merged_num_directions, w_pshape[0]) && - Dimension::merge(merged_num_directions, merged_num_directions, r_pshape[0]) && - Dimension::merge(merged_num_directions, merged_num_directions, b_pshape[0]), - "Parameter num_directions not matched in LSTMSequence."); - - auto valid_num_directions = 0; - if (m_direction == op::RecurrentSequenceDirection::FORWARD || - m_direction == op::RecurrentSequenceDirection::REVERSE) { - valid_num_directions = 1; - } else if (m_direction == op::RecurrentSequenceDirection::BIDIRECTIONAL) { - valid_num_directions = 2; - } else { - // Guard for potential future extension of RecurrentSequenceDirection enum - NODE_VALIDATION_CHECK(this, false, "Parameter direction must be FORWARD or REVERSE or BIDIRECTIONAL."); - } - - NODE_VALIDATION_CHECK(this, - Dimension::merge(merged_num_directions, merged_num_directions, valid_num_directions), - "Parameter 'num_directions' doesn't match with direction '", - m_direction, - "' in LSTMSequence. Expected ", - valid_num_directions, - ", actual ", - merged_num_directions); - - // Validate hidden_size value for W, R, B inputs - if (merged_hidden_size.is_static()) { - if (w_pshape[1].is_static()) { - NODE_VALIDATION_CHECK(this, - w_pshape[1].compatible(merged_hidden_size * lstm_seq_gates_count), - "Parameter hidden_size mistmatched in W input. Current value is: ", - w_pshape[1].get_length(), - ", expected: ", - merged_hidden_size.get_length() * lstm_seq_gates_count, - "."); - } - - if (r_pshape[1].is_static()) { - NODE_VALIDATION_CHECK(this, - r_pshape[1].compatible(merged_hidden_size * lstm_seq_gates_count), - "Parameter hidden_size mistmatched in R input. Current value is: ", - r_pshape[1].get_length(), - ", expected: ", - merged_hidden_size.get_length() * lstm_seq_gates_count, - "."); - } - - if (b_pshape[1].is_static()) { - NODE_VALIDATION_CHECK(this, - b_pshape[1].compatible(merged_hidden_size * lstm_seq_gates_count), - "Parameter hidden_size mistmatched in B input. Current value is: ", - b_pshape[1].get_length(), - ", expected: ", - merged_hidden_size.get_length() * lstm_seq_gates_count, - "."); - } - } + "not match."); // Mark inputs which are relevant to output parameters for (size_t i = 0; i <= 6; ++i) set_input_is_relevant_to_shape(i); + OPENVINO_SUPPRESS_DEPRECATED_START + const auto input_shapes = get_node_input_partial_shapes(*this); + OPENVINO_SUPPRESS_DEPRECATED_END + auto output_shapes = shape_infer(this, input_shapes); + // Set output size, type and shape - set_output_size(3); - set_output_type(0, result_et, {merged_batch_size, merged_num_directions, x_pshape[1], merged_hidden_size}); - set_output_type(1, result_et, {merged_batch_size, merged_num_directions, merged_hidden_size}); - set_output_type(2, result_et, {merged_batch_size, merged_num_directions, merged_hidden_size}); + set_output_type(0, result_et, output_shapes[0]); + set_output_type(1, result_et, output_shapes[1]); + set_output_type(2, result_et, output_shapes[2]); } diff --git a/src/core/src/op/rnn_cell.cpp b/src/core/src/op/rnn_cell.cpp index 97dda033aa0741..02a85c34faafbf 100644 --- a/src/core/src/op/rnn_cell.cpp +++ b/src/core/src/op/rnn_cell.cpp @@ -11,6 +11,7 @@ #include "ngraph/op/constant.hpp" #include "ngraph/shape.hpp" #include "ngraph/type/element_type.hpp" +#include "rnn_cell_shape_inference.hpp" using namespace std; using namespace ngraph; @@ -62,25 +63,8 @@ bool op::v0::RNNCell::visit_attributes(AttributeVisitor& visitor) { void op::v0::RNNCell::validate_and_infer_types() { OV_OP_SCOPE(v0_RNNCell_validate_and_infer_types); - for (const auto& input : inputs()) { - if (input.get_partial_shape().rank().is_dynamic()) { - set_output_type(0, get_input_element_type(0), ov::PartialShape::dynamic()); - return; - } - } - auto merged_batch_size = Dimension::dynamic(); - auto merged_hidden_size = Dimension::dynamic(); auto result_et = element::dynamic; - // Get input partial shape for all inputs - const auto& x_pshape = get_input_partial_shape(0); - const auto& ht_pshape = get_input_partial_shape(1); - const auto& w_pshape = get_input_partial_shape(2); - const auto& r_pshape = get_input_partial_shape(3); - const auto& b_pshape = get_input_partial_shape(4); - - validate_input_rank_dimension({x_pshape, ht_pshape, w_pshape, r_pshape, b_pshape}); - // Validate input types and save result for output type NODE_VALIDATION_CHECK(this, element::Type::merge(result_et, result_et, get_input_element_type(0)) && @@ -90,58 +74,15 @@ void op::v0::RNNCell::validate_and_infer_types() { element::Type::merge(result_et, result_et, get_input_element_type(4)), "Element types for X, initial_hidden_state, W, R and B inputs do not match."); - // Merge batch_size dimension across all inputs to evaluate output[0] dimension - NODE_VALIDATION_CHECK(this, - Dimension::merge(merged_batch_size, merged_batch_size, ht_pshape[0]) && - Dimension::merge(merged_batch_size, merged_batch_size, x_pshape[0]), - "Parameter batch_size not matched for X and initial_hidden_state inputs."); - - // Merge hidden_size dimension across all inputs to evaluate output[1] dimension - NODE_VALIDATION_CHECK(this, - Dimension::merge(merged_hidden_size, merged_hidden_size, ht_pshape[1]) && - Dimension::merge(merged_hidden_size, merged_hidden_size, r_pshape[1]), - "Parameter hidden_size not matched for R and initial_hidden_state inputs."); - - // Validate hidden_size value for W, B and R inputs - if (merged_hidden_size.is_static()) { - if (w_pshape[0].is_static()) { - NODE_VALIDATION_CHECK(this, - w_pshape[0].compatible(merged_hidden_size * s_gates_count), - "Parameter hidden_size mistmatched in W input. Current value is: ", - w_pshape[0].get_length(), - ", expected: ", - merged_hidden_size.get_length() * s_gates_count, - "."); - } - - if (r_pshape[0].is_static()) { - NODE_VALIDATION_CHECK(this, - r_pshape[0].compatible(merged_hidden_size * s_gates_count), - "Parameter hidden_size mistmatched in R input. Current value is: ", - r_pshape[0].get_length(), - ", expected: ", - merged_hidden_size.get_length() * s_gates_count, - "."); - } - - if (b_pshape[0].is_static()) { - NODE_VALIDATION_CHECK(this, - b_pshape[0].compatible(merged_hidden_size * s_gates_count), - "Parameter hidden_size mistmatched in B input. Current value is: ", - b_pshape[0].get_length(), - ", expected: ", - merged_hidden_size.get_length() * s_gates_count, - "."); - } - } - // Mark inputs which are relevant to output parameters for (size_t i = 0; i <= 4; ++i) set_input_is_relevant_to_shape(i); - // Set output size, type and shape - set_output_size(1); - set_output_type(0, result_et, {merged_batch_size, merged_hidden_size}); + OPENVINO_SUPPRESS_DEPRECATED_START + const auto input_shapes = get_node_input_partial_shapes(*this); + OPENVINO_SUPPRESS_DEPRECATED_END + auto output_shapes = shape_infer(this, input_shapes); + set_output_type(0, result_et, output_shapes[0]); } Output op::v0::RNNCell::get_default_bias_input() const { diff --git a/src/core/src/op/rnn_sequence.cpp b/src/core/src/op/rnn_sequence.cpp index e9dcd4b692dab8..8e33461510e2bb 100644 --- a/src/core/src/op/rnn_sequence.cpp +++ b/src/core/src/op/rnn_sequence.cpp @@ -11,6 +11,7 @@ #include "itt.hpp" #include "ngraph/op/util/recurrent_sequence.hpp" #include "ngraph/opsets/opset4.hpp" +#include "rnn_sequence_shape_inference.hpp" using namespace std; using namespace ngraph; @@ -41,32 +42,8 @@ op::v5::RNNSequence::RNNSequence(const Output& X, void op::v5::RNNSequence::validate_and_infer_types() { OV_OP_SCOPE(v5_RNNSequence_validate_and_infer_types); - for (const auto& input : inputs()) { - if (input.get_partial_shape().rank().is_dynamic()) { - set_output_type(0, get_input_element_type(0), ov::PartialShape::dynamic()); - set_output_type(1, get_input_element_type(0), ov::PartialShape::dynamic()); - return; - } - } - auto rnn_seq_gates_count = 1; - auto merged_batch_size = Dimension::dynamic(); - auto merged_hidden_size = Dimension::dynamic(); - auto merged_num_directions = Dimension::dynamic(); auto result_et = element::dynamic; - - auto x_pshape = get_input_partial_shape(0); - auto ht_pshape = get_input_partial_shape(1); - auto sl_pshape = get_input_partial_shape(2); - auto w_pshape = get_input_partial_shape(3); - auto r_pshape = get_input_partial_shape(4); - auto b_pshape = get_input_partial_shape(5); - - OPENVINO_SUPPRESS_DEPRECATED_START - ngraph::op::util::validate_seq_input_rank_dimension({x_pshape, ht_pshape, sl_pshape, w_pshape, r_pshape, b_pshape}); - OPENVINO_SUPPRESS_DEPRECATED_END - - // Validate input types and save result for output type NODE_VALIDATION_CHECK(this, element::Type::merge(result_et, result_et, get_input_element_type(0)) && element::Type::merge(result_et, result_et, get_input_element_type(1)) && @@ -76,88 +53,18 @@ void op::v5::RNNSequence::validate_and_infer_types() { "Element types for X, initial_hidden_state, W, R and B inputs do not " "match."); - // Merge batch_size dimension across all inputs to evaluate output[0] dimension - NODE_VALIDATION_CHECK(this, - Dimension::merge(merged_batch_size, merged_batch_size, ht_pshape[0]) && - Dimension::merge(merged_batch_size, merged_batch_size, x_pshape[0]) && - Dimension::merge(merged_batch_size, merged_batch_size, sl_pshape[0]), - "Parameter batch_size not matched in RNNSequence."); - - // Merge hidden_size dimension across all inputs to evaluate output dimension - NODE_VALIDATION_CHECK(this, - Dimension::merge(merged_hidden_size, merged_hidden_size, ht_pshape[2]) && - Dimension::merge(merged_hidden_size, merged_hidden_size, r_pshape[2]), - "Parameter hidden_size not matched RNNSequence."); - - // Merge num_directions dimension across all inputs to evaluate output dimension - NODE_VALIDATION_CHECK(this, - Dimension::merge(merged_num_directions, merged_num_directions, ht_pshape[1]) && - Dimension::merge(merged_num_directions, merged_num_directions, w_pshape[0]) && - Dimension::merge(merged_num_directions, merged_num_directions, r_pshape[0]) && - Dimension::merge(merged_num_directions, merged_num_directions, b_pshape[0]), - "Parameter num_directions not matched in RNNSequence."); - - auto valid_num_directions = 0; - if (m_direction == op::RecurrentSequenceDirection::FORWARD || - m_direction == op::RecurrentSequenceDirection::REVERSE) { - valid_num_directions = 1; - } else if (m_direction == op::RecurrentSequenceDirection::BIDIRECTIONAL) { - valid_num_directions = 2; - } else { - // Guard for potential future extension of RecurrentSequenceDirection enum - NODE_VALIDATION_CHECK(this, false, "Parameter direction must be FORWARD or REVERSE or BIDIRECTIONAL."); - } - - NODE_VALIDATION_CHECK(this, - Dimension::merge(merged_num_directions, merged_num_directions, valid_num_directions), - "Parameter 'num_directions' doesn't match with direction '", - m_direction, - "' in RNNSequence. Expected ", - valid_num_directions, - ", actual ", - merged_num_directions); - - // Validate hidden_size value for W, R, B inputs - if (merged_hidden_size.is_static()) { - if (w_pshape[1].is_static()) { - NODE_VALIDATION_CHECK(this, - w_pshape[1].compatible(merged_hidden_size * rnn_seq_gates_count), - "Parameter hidden_size mistmatched in W input. Current value is: ", - w_pshape[1].get_length(), - ", expected: ", - merged_hidden_size.get_length() * rnn_seq_gates_count, - "."); - } - - if (r_pshape[1].is_static()) { - NODE_VALIDATION_CHECK(this, - r_pshape[1].compatible(merged_hidden_size * rnn_seq_gates_count), - "Parameter hidden_size mistmatched in R input. Current value is: ", - r_pshape[1].get_length(), - ", expected: ", - merged_hidden_size.get_length() * rnn_seq_gates_count, - "."); - } - - if (b_pshape[1].is_static()) { - NODE_VALIDATION_CHECK(this, - b_pshape[1].compatible(merged_hidden_size * rnn_seq_gates_count), - "Parameter hidden_size mistmatched in B input. Current value is: ", - b_pshape[1].get_length(), - ", expected: ", - merged_hidden_size.get_length() * rnn_seq_gates_count, - "."); - } - } + OPENVINO_SUPPRESS_DEPRECATED_START + const auto input_shapes = get_node_input_partial_shapes(*this); + OPENVINO_SUPPRESS_DEPRECATED_END + auto output_shapes = shape_infer(this, input_shapes); // Mark inputs which are relevant to output parameters for (size_t i = 0; i <= 5; ++i) set_input_is_relevant_to_shape(i); // Set output size, type and shape - set_output_size(2); - set_output_type(0, result_et, {merged_batch_size, merged_num_directions, x_pshape[1], merged_hidden_size}); - set_output_type(1, result_et, {merged_batch_size, merged_num_directions, merged_hidden_size}); + set_output_type(0, result_et, output_shapes[0]); + set_output_type(1, result_et, output_shapes[1]); } bool op::v5::RNNSequence::visit_attributes(AttributeVisitor& visitor) { diff --git a/src/core/tests/type_prop/gru_cell.cpp b/src/core/tests/type_prop/gru_cell.cpp index 660d43a5b2e6c0..1fd164e3ecda5b 100644 --- a/src/core/tests/type_prop/gru_cell.cpp +++ b/src/core/tests/type_prop/gru_cell.cpp @@ -28,6 +28,72 @@ TEST(type_prop, gru_cell) { EXPECT_EQ(gru_cell->get_output_shape(0), (Shape{batch_size, hidden_size})); } +TEST(type_prop, gru_cell_with_bias) { + const size_t batch_size = 2; + const size_t input_size = 3; + const size_t hidden_size = 3; + const size_t gates_count = 3; + + const auto X = make_shared(element::f32, Shape{batch_size, input_size}); + const auto H_t = make_shared(element::f32, Shape{batch_size, hidden_size}); + const auto W = make_shared(element::f32, Shape{gates_count * hidden_size, input_size}); + const auto R = make_shared(element::f32, Shape{gates_count * hidden_size, hidden_size}); + const auto B = make_shared(element::f32, Shape{gates_count * hidden_size}); + + const auto gru_cell = make_shared(X, H_t, W, R, B, hidden_size); + EXPECT_EQ(gru_cell->get_output_element_type(0), element::f32); + EXPECT_EQ(gru_cell->get_output_shape(0), (Shape{batch_size, hidden_size})); +} + +TEST(type_prop, gru_cell_with_bias_linear_before) { + const size_t batch_size = 2; + const size_t input_size = 3; + const size_t hidden_size = 3; + const size_t gates_count = 3; + + const auto X = make_shared(element::f32, Shape{batch_size, input_size}); + const auto H_t = make_shared(element::f32, Shape{batch_size, hidden_size}); + const auto W = make_shared(element::f32, Shape{gates_count * hidden_size, input_size}); + const auto R = make_shared(element::f32, Shape{gates_count * hidden_size, hidden_size}); + const auto B = make_shared(element::f32, Shape{(gates_count + 1) * hidden_size}); + + const auto gru_cell = make_shared(X, + H_t, + W, + R, + B, + hidden_size, + std::vector{"sigmoid", "tanh"}, + std::vector{}, + std::vector{}, + 0.f, + true); + + EXPECT_EQ(gru_cell->get_output_element_type(0), element::f32); + EXPECT_EQ(gru_cell->get_output_shape(0), (Shape{batch_size, hidden_size})); +} + +TEST(type_prop, gru_cell_default_ctor_linear_before) { + const size_t batch_size = 2; + const size_t input_size = 3; + const size_t hidden_size = 3; + const size_t gates_count = 3; + + const auto X = make_shared(element::f32, Shape{batch_size, input_size}); + const auto H_t = make_shared(element::f32, Shape{batch_size, hidden_size}); + const auto W = make_shared(element::f32, Shape{gates_count * hidden_size, input_size}); + const auto R = make_shared(element::f32, Shape{gates_count * hidden_size, hidden_size}); + const auto B = make_shared(element::f32, Shape{(gates_count + 1) * hidden_size}); + + const auto gru_cell = make_shared(); + gru_cell->set_linear_before_reset(true); + gru_cell->set_arguments(OutputVector{X, H_t, W, R, B}); + gru_cell->validate_and_infer_types(); + + EXPECT_EQ(gru_cell->get_output_element_type(0), element::f32); + EXPECT_EQ(gru_cell->get_output_shape(0), (Shape{batch_size, hidden_size})); +} + TEST(type_prop, gru_cell_invalid_input) { const size_t batch_size = 2; const size_t input_size = 3; diff --git a/src/core/tests/type_prop/lstm_cell.cpp b/src/core/tests/type_prop/lstm_cell.cpp index 36721682c69bab..c7f376258d2109 100644 --- a/src/core/tests/type_prop/lstm_cell.cpp +++ b/src/core/tests/type_prop/lstm_cell.cpp @@ -53,9 +53,8 @@ TEST(type_prop, lstm_cell_invalid_input) { const auto lstm_cell = make_shared(X, H_t, C_t, W, R, hidden_size); FAIL() << "LSTMCell node was created with invalid data."; } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING( - error.what(), - std::string("Parameter hidden_size not matched for W, R, B, initial_hidden_state and initial_cell_state")); + EXPECT_HAS_SUBSTRING(error.what(), + std::string("First dimension of W input shape is required to be compatible")); } // Invalid R tensor shape. @@ -65,9 +64,7 @@ TEST(type_prop, lstm_cell_invalid_input) { const auto lstm_cell = make_shared(X, H_t, C_t, W, R, hidden_size); FAIL() << "LSTMCell node was created with invalid data."; } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), - std::string("Parameter hidden_size not matched for W, R, B, " - "initial_hidden_state and initial_cell_state inputs.")); + EXPECT_HAS_SUBSTRING(error.what(), std::string("Dimension `hidden_size` is not matched between inputs")); } // Invalid H_t tensor shape. @@ -77,9 +74,7 @@ TEST(type_prop, lstm_cell_invalid_input) { const auto lstm_cell = make_shared(X, H_t, C_t, W, R, hidden_size); FAIL() << "LSTMCell node was created with invalid data."; } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), - std::string("Parameter batch_size not matched for X, " - "initial_hidden_state or initial_cell_state inputs.")); + EXPECT_HAS_SUBSTRING(error.what(), std::string("Dimension `batch_size` is not matched between inputs")); } // Invalid C_t tensor shape. @@ -89,20 +84,19 @@ TEST(type_prop, lstm_cell_invalid_input) { const auto lstm_cell = make_shared(X, H_t, C_t, W, R, hidden_size); FAIL() << "LSTMCell node was created with invalid data."; } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), - std::string("Parameter batch_size not matched for X, " - "initial_hidden_state or initial_cell_state inputs.")); + EXPECT_HAS_SUBSTRING(error.what(), std::string("Dimension `batch_size` is not matched between inputs")); } // Invalid B tensor shape. C_t = make_shared(element::f32, Shape{batch_size, hidden_size}); auto B = make_shared(element::f32, Shape{2 * gates_count * hidden_size}); - auto P = make_shared(element::f32, Shape{3 * hidden_size}); try { const auto lstm_cell = make_shared(X, H_t, C_t, W, R, B, hidden_size); FAIL() << "LSTMCell node was created with invalid data."; } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter hidden_size not matched for W, R, B")); + EXPECT_HAS_SUBSTRING( + error.what(), + std::string("First dimension of B input shape is required to be compatible with 12. Got shape: 24.")); } } diff --git a/src/core/tests/type_prop/lstm_sequence.cpp b/src/core/tests/type_prop/lstm_sequence.cpp index 75ed94049aae94..c78b1b363a6504 100644 --- a/src/core/tests/type_prop/lstm_sequence.cpp +++ b/src/core/tests/type_prop/lstm_sequence.cpp @@ -90,7 +90,7 @@ shared_ptr lstm_seq_direction_initialization(const recurre return lstm_sequence; } -shared_ptr lstm_seq_v1_tensor_initialization(const recurrent_sequence_parameters& param) { +shared_ptr lstm_seq_v0_tensor_initialization(const recurrent_sequence_parameters& param) { auto batch_size = param.batch_size; auto seq_length = param.seq_length; auto input_size = param.input_size; @@ -169,7 +169,7 @@ TEST(type_prop, lstm_sequence_forward) { EXPECT_EQ(lstm_sequence->get_output_shape(2), (Shape{batch_size, num_directions, hidden_size})); } -TEST(type_prop, lstm_sequence_v1_forward) { +TEST(type_prop, lstm_sequence_v0_forward) { const size_t batch_size = 8; const size_t num_directions = 1; const size_t seq_length = 6; @@ -267,7 +267,7 @@ TEST(type_prop, lstm_sequence_bidirectional) { EXPECT_EQ(lstm_sequence->get_output_shape(2), (Shape{batch_size, num_directions, hidden_size})); } -TEST(type_prop, lstm_sequence_v1_bidirectional) { +TEST(type_prop, lstm_sequence_v0_bidirectional) { const size_t batch_size = 24; const size_t num_directions = 2; const size_t seq_length = 12; @@ -466,33 +466,57 @@ TEST(type_prop, lstm_sequence_invalid_input_dimension) { } } -TEST(type_prop, lstm_sequence_invalid_input_dynamic_rank) { +TEST(type_prop, lstm_sequence_input_dynamic_shape_ranges) { + recurrent_sequence_parameters param; + + param.batch_size = Dimension(1, 8); + param.num_directions = Dimension(1, 2); + param.seq_length = Dimension(5, 7); + param.input_size = Dimension(64, 128); + param.hidden_size = Dimension(32, 64); + param.et = element::f32; + + auto op = lstm_seq_tensor_initialization(param); + op->validate_and_infer_types(); + + EXPECT_EQ(op->get_output_partial_shape(0), + (PartialShape{param.batch_size, 1, param.seq_length, param.hidden_size})); + EXPECT_EQ(op->get_output_partial_shape(1), (PartialShape{param.batch_size, 1, param.hidden_size})); + EXPECT_EQ(op->get_output_element_type(0), param.et); + EXPECT_EQ(op->get_output_element_type(1), param.et); +} + +TEST(type_prop, lstm_sequence_all_inputs_dynamic_rank) { recurrent_sequence_parameters param; param.batch_size = 24; - param.num_directions = 2; + param.num_directions = 1; param.seq_length = 12; param.input_size = 8; param.hidden_size = 256; param.et = element::f32; - auto check_dynamic_lstm = [](const shared_ptr& lstm) -> bool { - return lstm->output(0).get_partial_shape() == PartialShape::dynamic() && - lstm->output(1).get_partial_shape() == PartialShape::dynamic() && - lstm->output(2).get_partial_shape() == PartialShape::dynamic() && - lstm->output(0).get_element_type() == lstm->input(0).get_element_type(); - }; - - auto lstm_sequence = lstm_seq_tensor_initialization(param); - auto invalid_dynamic_tensor = make_shared(param.et, PartialShape::dynamic(Rank::dynamic())); + auto op = lstm_seq_tensor_initialization(param); + auto dynamic_tensor = make_shared(param.et, PartialShape::dynamic(Rank::dynamic())); + + for (size_t i = 0; i < op->get_input_size(); i++) { + auto op = lstm_seq_tensor_initialization(param); + op->set_argument(i, dynamic_tensor); + op->validate_and_infer_types(); + if (i == 0) { // X input + EXPECT_EQ(op->get_output_partial_shape(0), + (PartialShape{param.batch_size, param.num_directions, -1, param.hidden_size})); + } else { + EXPECT_EQ(op->get_output_partial_shape(0), + (PartialShape{param.batch_size, param.num_directions, param.seq_length, param.hidden_size})); + } + EXPECT_EQ(op->get_output_partial_shape(1), + (PartialShape{param.batch_size, param.num_directions, param.hidden_size})); + EXPECT_EQ(op->get_output_partial_shape(2), + (PartialShape{param.batch_size, param.num_directions, param.hidden_size})); - // Validate invalid dynamic tensor for all inputs: X, initial_hidden_state, initial_cell_state - // W, R, B - for (size_t i = 0; i < lstm_sequence->get_input_size(); i++) { - lstm_sequence = lstm_seq_tensor_initialization(param); - lstm_sequence->set_argument(i, invalid_dynamic_tensor); - lstm_sequence->validate_and_infer_types(); - EXPECT_EQ(check_dynamic_lstm(lstm_sequence), true); + EXPECT_EQ(op->get_output_element_type(0), param.et); + EXPECT_EQ(op->get_output_element_type(1), param.et); } } @@ -511,7 +535,8 @@ TEST(type_prop, lstm_sequence_invalid_input_direction) { lstm_sequence->validate_and_infer_types(); FAIL() << "LSTMSequence node was created with invalid data."; } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter 'num_directions' doesn't match with direction")); + EXPECT_HAS_SUBSTRING(error.what(), + "Dimension `num_directions` doesn't match to other inputs or `direction` attribute."); } } @@ -526,11 +551,12 @@ TEST(type_prop, lstm_sequence_invalid_input_direction_num_mismatch) { param.hidden_size = 256; param.et = element::f32; try { - auto gru_sequence = lstm_seq_direction_initialization(param, direction); - gru_sequence->validate_and_infer_types(); + auto op = lstm_seq_direction_initialization(param, direction); + op->validate_and_infer_types(); FAIL() << "LSTMSequence node was created with invalid data."; } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter 'num_directions' doesn't match with direction")); + EXPECT_HAS_SUBSTRING(error.what(), + "Dimension `num_directions` doesn't match to other inputs or `direction` attribute."); } }; @@ -539,7 +565,7 @@ TEST(type_prop, lstm_sequence_invalid_input_direction_num_mismatch) { check_error(op::RecurrentSequenceDirection::REVERSE, 2); } -TEST(type_prop, lstm_sequence_v1_dynamic_num_directions) { +TEST(type_prop, lstm_sequence_v0_dynamic_num_directions) { recurrent_sequence_parameters param; param.batch_size = 24; @@ -549,7 +575,7 @@ TEST(type_prop, lstm_sequence_v1_dynamic_num_directions) { param.hidden_size = 256; param.et = element::f32; - auto lstm_sequence = lstm_seq_v1_tensor_initialization(param); + auto lstm_sequence = lstm_seq_v0_tensor_initialization(param); lstm_sequence->validate_and_infer_types(); EXPECT_EQ(lstm_sequence->get_output_partial_shape(0), @@ -561,7 +587,7 @@ TEST(type_prop, lstm_sequence_v1_dynamic_num_directions) { EXPECT_EQ(lstm_sequence->get_output_element_type(2), param.et); } -TEST(type_prop, lstm_sequence_v1_dynamic_seq_length) { +TEST(type_prop, lstm_sequence_v0_dynamic_seq_length) { recurrent_sequence_parameters param; param.batch_size = 24; @@ -571,7 +597,7 @@ TEST(type_prop, lstm_sequence_v1_dynamic_seq_length) { param.hidden_size = 256; param.et = element::f32; - auto lstm_sequence = lstm_seq_v1_tensor_initialization(param); + auto lstm_sequence = lstm_seq_v0_tensor_initialization(param); lstm_sequence->validate_and_infer_types(); EXPECT_EQ(lstm_sequence->get_output_partial_shape(0), @@ -585,7 +611,7 @@ TEST(type_prop, lstm_sequence_v1_dynamic_seq_length) { EXPECT_EQ(lstm_sequence->get_output_element_type(2), param.et); } -TEST(type_prop, lstm_sequence_v1_dynamic_hidden_size) { +TEST(type_prop, lstm_sequence_v0_dynamic_hidden_size) { recurrent_sequence_parameters param; param.batch_size = 24; @@ -595,7 +621,7 @@ TEST(type_prop, lstm_sequence_v1_dynamic_hidden_size) { param.hidden_size = Dimension::dynamic(); param.et = element::f32; - auto lstm_sequence = lstm_seq_v1_tensor_initialization(param); + auto lstm_sequence = lstm_seq_v0_tensor_initialization(param); lstm_sequence->validate_and_infer_types(); EXPECT_EQ(lstm_sequence->get_output_partial_shape(0), @@ -609,7 +635,7 @@ TEST(type_prop, lstm_sequence_v1_dynamic_hidden_size) { EXPECT_EQ(lstm_sequence->get_output_element_type(2), param.et); } -TEST(type_prop, lstm_sequence_v1_dynamic_inputs) { +TEST(type_prop, lstm_sequence_v0_dynamic_inputs) { recurrent_sequence_parameters param; param.batch_size = Dimension::dynamic(); @@ -619,7 +645,7 @@ TEST(type_prop, lstm_sequence_v1_dynamic_inputs) { param.seq_length = Dimension::dynamic(); param.et = element::f32; - auto lstm_sequence = lstm_seq_v1_tensor_initialization(param); + auto lstm_sequence = lstm_seq_v0_tensor_initialization(param); lstm_sequence->validate_and_infer_types(); EXPECT_EQ(lstm_sequence->get_output_partial_shape(0), @@ -631,7 +657,7 @@ TEST(type_prop, lstm_sequence_v1_dynamic_inputs) { EXPECT_EQ(lstm_sequence->get_output_element_type(2), param.et); } -TEST(type_prop, lstm_sequence_v1_invalid_input_dimension) { +TEST(type_prop, lstm_sequence_v0_invalid_input_dimension) { recurrent_sequence_parameters param; param.batch_size = 24; @@ -641,50 +667,53 @@ TEST(type_prop, lstm_sequence_v1_invalid_input_dimension) { param.hidden_size = 256; param.et = element::f32; - auto lstm_sequence = lstm_seq_v1_tensor_initialization(param); + auto lstm_sequence = lstm_seq_v0_tensor_initialization(param); auto invalid_rank0_tensor = make_shared(param.et, PartialShape{}); // Validate invalid rank0 tensor for all inputs: X, initial_hidden_state, initial_cell_state W, // R, B for (size_t i = 0; i < lstm_sequence->get_input_size(); i++) { - lstm_sequence = lstm_seq_v1_tensor_initialization(param); + lstm_sequence = lstm_seq_v0_tensor_initialization(param); lstm_sequence->set_argument(i, invalid_rank0_tensor); ASSERT_THROW(lstm_sequence->validate_and_infer_types(), ngraph::CheckFailure) << "LSTMSequence node was created with invalid data."; } } -TEST(type_prop, lstm_sequence_v1_invalid_input_dynamic_rank) { +TEST(type_prop, lstm_sequence_v0_input_dynamic_rank) { recurrent_sequence_parameters param; - param.batch_size = 24; - param.num_directions = 2; + param.num_directions = 1; param.seq_length = 12; param.input_size = 8; param.hidden_size = 256; param.et = element::f32; - auto check_dynamic_lstm = [](const shared_ptr& lstm) -> bool { - return lstm->output(0).get_partial_shape() == PartialShape::dynamic() && - lstm->output(1).get_partial_shape() == PartialShape::dynamic() && - lstm->output(2).get_partial_shape() == PartialShape::dynamic() && - lstm->output(0).get_element_type() == lstm->input(0).get_element_type(); - }; - - auto lstm_sequence = lstm_seq_v1_tensor_initialization(param); - auto invalid_dynamic_tensor = make_shared(param.et, PartialShape::dynamic(Rank::dynamic())); + auto op = lstm_seq_tensor_initialization(param); + auto dynamic_tensor = make_shared(param.et, PartialShape::dynamic(Rank::dynamic())); + + for (size_t i = 0; i < op->get_input_size(); i++) { + auto op = lstm_seq_v0_tensor_initialization(param); + op->set_argument(i, dynamic_tensor); + op->validate_and_infer_types(); + if (i == 0) { // X input + EXPECT_EQ(op->get_output_partial_shape(0), + (PartialShape{param.batch_size, param.num_directions, -1, param.hidden_size})); + } else { + EXPECT_EQ(op->get_output_partial_shape(0), + (PartialShape{param.batch_size, param.num_directions, param.seq_length, param.hidden_size})); + } + EXPECT_EQ(op->get_output_partial_shape(1), + (PartialShape{param.batch_size, param.num_directions, param.hidden_size})); + EXPECT_EQ(op->get_output_partial_shape(2), + (PartialShape{param.batch_size, param.num_directions, param.hidden_size})); - // Validate invalid dynamic tensor for all inputs: X, initial_hidden_state, initial_cell_state - // W, R, B - for (size_t i = 0; i < lstm_sequence->get_input_size(); i++) { - lstm_sequence = lstm_seq_v1_tensor_initialization(param); - lstm_sequence->set_argument(i, invalid_dynamic_tensor); - lstm_sequence->validate_and_infer_types(); - EXPECT_EQ(check_dynamic_lstm(lstm_sequence), true); + EXPECT_EQ(op->get_output_element_type(0), param.et); + EXPECT_EQ(op->get_output_element_type(1), param.et); } } -TEST(type_prop, lstm_sequence_v1_invalid_input_direction) { +TEST(type_prop, lstm_sequence_v0_invalid_input_direction) { recurrent_sequence_parameters param; param.batch_size = 24; @@ -694,20 +723,53 @@ TEST(type_prop, lstm_sequence_v1_invalid_input_direction) { param.hidden_size = 256; param.et = element::f32; - auto lstm_sequence = lstm_seq_v1_tensor_initialization(param); + auto lstm_sequence = lstm_seq_v0_tensor_initialization(param); try { lstm_sequence->validate_and_infer_types(); FAIL() << "LSTMSequence node was created with invalid data."; } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter 'num_directions' doesn't match with direction")); + EXPECT_HAS_SUBSTRING( + error.what(), + std::string("Dimension `num_directions` doesn't match to other inputs or `direction` attribute")); } param.num_directions = 2; // 2 is also not allowed for default 'm_direction' = FORWARD - lstm_sequence = lstm_seq_v1_tensor_initialization(param); + lstm_sequence = lstm_seq_v0_tensor_initialization(param); + try { + lstm_sequence->validate_and_infer_types(); + FAIL() << "LSTMSequence node was created with invalid data."; + } catch (const NodeValidationFailure& error) { + EXPECT_HAS_SUBSTRING( + error.what(), + std::string("Dimension `num_directions` doesn't match to other inputs or `direction` attribute")); + } +} + +TEST(type_prop, lstm_sequence_v0_invalid_input_P) { + recurrent_sequence_parameters param; + + param.batch_size = 24; + param.num_directions = 1; + param.seq_length = 12; + param.input_size = 8; + param.hidden_size = 256; + param.et = element::f32; + + auto lstm_sequence = lstm_seq_v0_tensor_initialization(param); + auto P = make_shared(element::f32, PartialShape{param.hidden_size * 5}); + lstm_sequence->set_argument(7, P); + try { + lstm_sequence->validate_and_infer_types(); + FAIL() << "LSTMSequence node was created with invalid data."; + } catch (const NodeValidationFailure& error) { + EXPECT_HAS_SUBSTRING(error.what(), "Input tensor P should have rank equal 2"); + } + P = make_shared(element::f32, PartialShape{param.num_directions, param.hidden_size * 5}); + lstm_sequence->set_argument(7, P); try { lstm_sequence->validate_and_infer_types(); FAIL() << "LSTMSequence node was created with invalid data."; } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter 'num_directions' doesn't match with direction")); + EXPECT_HAS_SUBSTRING(error.what(), "Inorrect shape of P input. Second dimension is: 1280, expected: 768"); } } diff --git a/src/core/tests/type_prop/rnn_cell.cpp b/src/core/tests/type_prop/rnn_cell.cpp index ba8e1f3de7fa5d..124cb601c84c76 100644 --- a/src/core/tests/type_prop/rnn_cell.cpp +++ b/src/core/tests/type_prop/rnn_cell.cpp @@ -25,6 +25,22 @@ TEST(type_prop, rnn_cell) { EXPECT_EQ(rnn_cell->get_output_shape(0), (Shape{batch_size, hidden_size})); } +TEST(type_prop, rnn_cell_with_bias) { + const size_t batch_size = 2; + const size_t input_size = 3; + const size_t hidden_size = 3; + + const auto X = make_shared(element::f32, Shape{batch_size, input_size}); + const auto H_t = make_shared(element::f32, Shape{batch_size, hidden_size}); + const auto W = make_shared(element::f32, Shape{hidden_size, input_size}); + const auto R = make_shared(element::f32, Shape{hidden_size, hidden_size}); + const auto B = make_shared(element::f32, Shape{hidden_size}); + + const auto rnn_cell = make_shared(X, H_t, W, R, B, hidden_size); + EXPECT_EQ(rnn_cell->get_output_element_type(0), element::f32); + EXPECT_EQ(rnn_cell->get_output_shape(0), (Shape{batch_size, hidden_size})); +} + TEST(type_prop, rnn_cell_invalid_input) { const size_t batch_size = 2; const size_t input_size = 3; @@ -40,7 +56,9 @@ TEST(type_prop, rnn_cell_invalid_input) { const auto rnn_cell = make_shared(X, H_t, W, R, hidden_size); FAIL() << "RNNCell node was created with invalid data."; } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter hidden_size mistmatched in W input.")); + EXPECT_HAS_SUBSTRING( + error.what(), + std::string("First dimension of W input shape is required to be compatible with 3. Got shape: 6.")); } // Invalid R tensor shape. @@ -50,8 +68,7 @@ TEST(type_prop, rnn_cell_invalid_input) { const auto rnn_cell = make_shared(X, H_t, W, R, hidden_size); FAIL() << "RNNCell node was created with invalid data."; } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), - std::string("Parameter hidden_size not matched for R and initial_hidden_state inputs.")); + EXPECT_HAS_SUBSTRING(error.what(), std::string("Dimension `hidden_size` is not matched between inputs")); } // Invalid H_t tensor shape. @@ -61,8 +78,7 @@ TEST(type_prop, rnn_cell_invalid_input) { const auto rnn_cell = make_shared(X, H_t, W, R, hidden_size); FAIL() << "RNNCell node was created with invalid data."; } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), - std::string("Parameter batch_size not matched for X and initial_hidden_state inputs.")); + EXPECT_HAS_SUBSTRING(error.what(), std::string("Dimension `batch_size` is not matched between inputs")); } // Invalid B tensor shape. @@ -72,7 +88,9 @@ TEST(type_prop, rnn_cell_invalid_input) { const auto rnn_cell = make_shared(X, H_t, W, R, B, hidden_size); FAIL() << "RNNCell node was created with invalid data."; } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter hidden_size mistmatched in B input.")); + EXPECT_HAS_SUBSTRING( + error.what(), + std::string("First dimension of B input shape is required to be compatible with 3. Got shape: 6.")); } } @@ -138,7 +156,7 @@ TEST(type_prop, rnn_cell_invalid_input_rank0) { << "RNNCell node was created with invalid data."; // Invalid rank0 for X tensor. - W = make_shared(element::f32, PartialShape{hidden_size, input_size}); + W = make_shared(element::f32, Shape{hidden_size, input_size}); X = make_shared(element::f32, PartialShape{}); ASSERT_THROW(const auto unused = make_shared(X, H_t, W, R, hidden_size), ngraph::NodeValidationFailure) @@ -159,24 +177,25 @@ TEST(type_prop, rnn_cell_invalid_input_rank0) { << "RNNCell node was created with invalid data."; // Invalid rank0 for B tensor. - R = make_shared(element::f32, PartialShape{hidden_size, hidden_size}); + R = make_shared(element::f32, Shape{hidden_size, hidden_size}); auto B = make_shared(element::f32, PartialShape{}); ASSERT_THROW(const auto unused = make_shared(X, H_t, W, R, B, hidden_size), ngraph::NodeValidationFailure) << "RNNCell node was created with invalid data."; } -TEST(type_prop, rnn_cell_invalid_input_dynamic_rank) { - const size_t batch_size = 2; - const size_t input_size = 3; +TEST(type_prop, rnn_cell_input_dynamic_rank) { + const int64_t batch_size = 2; + const int64_t input_size = 3; const size_t hidden_size = 3; + const auto hidden_size_dim = Dimension(static_cast(hidden_size)); - auto X = make_shared(element::f32, Shape{batch_size, input_size}); - auto R = make_shared(element::f32, Shape{hidden_size, hidden_size}); - auto H_t = make_shared(element::f32, Shape{batch_size, hidden_size}); + auto X = make_shared(element::f32, PartialShape{batch_size, input_size}); + auto R = make_shared(element::f32, PartialShape{hidden_size_dim, hidden_size_dim}); + auto H_t = make_shared(element::f32, PartialShape{batch_size, hidden_size}); - auto check_dynamic_rnn = [](const shared_ptr& rnn) -> bool { - return rnn->output(0).get_partial_shape() == PartialShape::dynamic() && + auto check_dynamic_rnn = [=](const shared_ptr& rnn) -> bool { + return rnn->output(0).get_partial_shape() == PartialShape{batch_size, hidden_size_dim} && rnn->output(0).get_element_type() == rnn->input(0).get_element_type(); }; // Invalid dynamic rank for W tensor. @@ -185,25 +204,25 @@ TEST(type_prop, rnn_cell_invalid_input_dynamic_rank) { EXPECT_EQ(check_dynamic_rnn(rnn_w), true); // Invalid dynamic rank for X tensor. - W = make_shared(element::f32, PartialShape{hidden_size, input_size}); + W = make_shared(element::f32, PartialShape{hidden_size_dim, input_size}); X = make_shared(element::f32, PartialShape::dynamic(Rank::dynamic())); auto rnn_x = make_shared(X, H_t, W, R, hidden_size); EXPECT_EQ(check_dynamic_rnn(rnn_x), true); // Invalid dynamic rank for H_t tensor. - X = make_shared(element::f32, Shape{batch_size, input_size}); + X = make_shared(element::f32, PartialShape{batch_size, input_size}); H_t = make_shared(element::f32, PartialShape::dynamic(Rank::dynamic())); auto rnn_h = make_shared(X, H_t, W, R, hidden_size); EXPECT_EQ(check_dynamic_rnn(rnn_h), true); // Invalid dynamic rank for R tensor. - H_t = make_shared(element::f32, Shape{batch_size, hidden_size}); + H_t = make_shared(element::f32, PartialShape{batch_size, hidden_size}); R = make_shared(element::f32, PartialShape::dynamic(Rank::dynamic())); auto rnn_r = make_shared(X, H_t, W, R, hidden_size); EXPECT_EQ(check_dynamic_rnn(rnn_r), true); // Invalid dynamic rank for B tensor. - R = make_shared(element::f32, PartialShape{hidden_size, hidden_size}); + R = make_shared(element::f32, PartialShape{hidden_size_dim, hidden_size_dim}); auto B = make_shared(element::f32, PartialShape::dynamic(Rank::dynamic())); auto rnn_b = make_shared(X, H_t, W, R, B, hidden_size); EXPECT_EQ(check_dynamic_rnn(rnn_b), true); diff --git a/src/core/tests/type_prop/rnn_cell_base.cpp b/src/core/tests/type_prop/rnn_cell_base.cpp new file mode 100644 index 00000000000000..e669970461d4ca --- /dev/null +++ b/src/core/tests/type_prop/rnn_cell_base.cpp @@ -0,0 +1,177 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/test_assertions.hpp" +#include "gmock/gmock.h" +#include "openvino/openvino.hpp" +#include "openvino/opsets/opset12.hpp" +#include "util/type_prop.hpp" + +namespace rnn_cell_test { +using namespace std; +using namespace ov; +using namespace op; +using namespace testing; + +struct RNNCellParams { + Dimension batch_size = 8; + Dimension input_size = 4; + Dimension hidden_size = 128; + size_t outputs_size = 1; + element::Type et = element::f32; + int64_t gates_count = 1; +}; + +template +class RNNCellTest : public TypePropOpTest { +public: + template ::value, bool>::type = true> + std::shared_ptr make_rnn_cell_based_op(RNNCellParams& p, bool use_default_ctor = false) { + const auto X = make_shared(p.et, PartialShape{p.batch_size, p.input_size}); + const auto H_t = make_shared(p.et, PartialShape{p.batch_size, p.hidden_size}); + const auto W = make_shared(p.et, PartialShape{p.hidden_size, p.input_size}); + const auto R = make_shared(p.et, PartialShape{p.hidden_size, p.hidden_size}); + const auto B = make_shared(p.et, PartialShape{p.hidden_size}); + + if (use_default_ctor) { + auto op = std::make_shared(); + op->set_hidden_size(p.hidden_size.get_max_length()); + op->set_arguments(OutputVector{X, H_t, W, R, B}); + op->validate_and_infer_types(); + return op; + } + + return std::make_shared(X, H_t, W, R, B, p.hidden_size.get_max_length()); + } + + template ::value, bool>::type = true> + std::shared_ptr make_rnn_cell_based_op(RNNCellParams& p, bool use_default_ctor = false) { + p.gates_count = 3; + + const auto X = make_shared(p.et, PartialShape{p.batch_size, p.input_size}); + const auto H_t = make_shared(p.et, PartialShape{p.batch_size, p.hidden_size}); + const auto W = make_shared(p.et, PartialShape{p.hidden_size * p.gates_count, p.input_size}); + const auto R = make_shared(p.et, PartialShape{p.hidden_size * p.gates_count, p.hidden_size}); + const auto B = make_shared(p.et, PartialShape{p.hidden_size * p.gates_count}); + + if (use_default_ctor) { + auto op = std::make_shared(); + op->set_hidden_size(p.hidden_size.get_max_length()); + op->set_arguments(OutputVector{X, H_t, W, R, B}); + op->validate_and_infer_types(); + return op; + } + + return std::make_shared(X, H_t, W, R, B, p.hidden_size.get_max_length()); + } + + template ::value || std::is_same::value, + bool>::type = true> + std::shared_ptr make_rnn_cell_based_op(RNNCellParams& p, bool use_default_ctor = false) { + p.gates_count = 4; + p.outputs_size = 2; + + const auto X = make_shared(p.et, PartialShape{p.batch_size, p.input_size}); + const auto H_t = make_shared(p.et, PartialShape{p.batch_size, p.hidden_size}); + const auto C_t = make_shared(p.et, PartialShape{p.batch_size, p.hidden_size}); + const auto W = make_shared(p.et, PartialShape{p.hidden_size * p.gates_count, p.input_size}); + const auto R = make_shared(p.et, PartialShape{p.hidden_size * p.gates_count, p.hidden_size}); + const auto B = make_shared(p.et, PartialShape{p.hidden_size * p.gates_count}); + + if (use_default_ctor) { + auto op = std::make_shared(); + op->set_hidden_size(p.hidden_size.get_max_length()); + auto inputs = OutputVector{X, H_t, C_t, W, R, B}; + if (ov::is_type(op)) { + const auto P = make_shared(p.et, PartialShape{p.hidden_size * (p.gates_count - 1)}); + inputs.push_back(P); + } + op->set_arguments(inputs); + op->validate_and_infer_types(); + return op; + } + + return std::make_shared(X, H_t, C_t, W, R, B, p.hidden_size.get_max_length()); + } +}; + +TYPED_TEST_SUITE_P(RNNCellTest); + +TYPED_TEST_P(RNNCellTest, basic_shape_infer) { + RNNCellParams params; + + auto op = this->make_rnn_cell_based_op(params); + EXPECT_EQ(op->get_output_size(), params.outputs_size); + for (size_t i = 0; i < params.outputs_size; ++i) { + EXPECT_EQ(op->get_output_partial_shape(i), (PartialShape{params.batch_size, params.hidden_size})); + } +} + +TYPED_TEST_P(RNNCellTest, default_ctor) { + RNNCellParams params; + + auto op = this->make_rnn_cell_based_op(params, true); + EXPECT_EQ(op->get_output_size(), params.outputs_size); + for (size_t i = 0; i < params.outputs_size; ++i) { + EXPECT_EQ(op->get_output_partial_shape(i), (PartialShape{params.batch_size, params.hidden_size})); + } +} + +TYPED_TEST_P(RNNCellTest, static_labels_dims_shape_infer) { + RNNCellParams params; + params.batch_size = Dimension(8); + ov::DimensionTracker::set_label(params.batch_size, 10); + params.input_size = Dimension(64); + ov::DimensionTracker::set_label(params.input_size, 11); + params.hidden_size = Dimension(128); + ov::DimensionTracker::set_label(params.hidden_size, 12); + + auto op = this->make_rnn_cell_based_op(params); + EXPECT_EQ(op->get_output_size(), params.outputs_size); + + for (size_t i = 0; i < params.outputs_size; ++i) { + EXPECT_EQ(op->get_output_partial_shape(i), (PartialShape{params.batch_size, params.hidden_size})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(i)), ElementsAre(10, 12)); + } +} + +TYPED_TEST_P(RNNCellTest, interval_labels_dims_shape_infer) { + RNNCellParams params; + params.batch_size = Dimension(8, 16); + ov::DimensionTracker::set_label(params.batch_size, 10); + params.input_size = Dimension(64, 128); + ov::DimensionTracker::set_label(params.input_size, 11); + params.hidden_size = Dimension(128, 256); + ov::DimensionTracker::set_label(params.hidden_size, 12); + + auto op = this->make_rnn_cell_based_op(params); + EXPECT_EQ(op->get_output_size(), params.outputs_size); + + for (size_t i = 0; i < params.outputs_size; ++i) { + if (ov::is_type(op) || ov::is_type(op)) { + // For backward compatibility, if hidden_size dim is dynamic, set the value based on attribute + EXPECT_EQ(op->get_output_partial_shape(i), + (PartialShape{params.batch_size, static_cast(op->get_hidden_size())})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(i)), ElementsAre(10, 0)); + } else { + // For backward compatibility, hidden_size attribute is ignored + EXPECT_EQ(op->get_output_partial_shape(i), (PartialShape{params.batch_size, params.hidden_size})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(i)), ElementsAre(10, 12)); + } + } +} + +REGISTER_TYPED_TEST_SUITE_P(RNNCellTest, + basic_shape_infer, + default_ctor, + static_labels_dims_shape_infer, + interval_labels_dims_shape_infer); + +using RNNCellBaseTypes = Types; +INSTANTIATE_TYPED_TEST_SUITE_P(type_prop, RNNCellTest, RNNCellBaseTypes); + +} // namespace rnn_cell_test diff --git a/src/core/tests/type_prop/rnn_seq_base.cpp b/src/core/tests/type_prop/rnn_seq_base.cpp new file mode 100644 index 00000000000000..eaedf61b0fb351 --- /dev/null +++ b/src/core/tests/type_prop/rnn_seq_base.cpp @@ -0,0 +1,284 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/test_assertions.hpp" +#include "gmock/gmock.h" +#include "openvino/openvino.hpp" +#include "openvino/opsets/opset12.hpp" +#include "util/type_prop.hpp" + +namespace rnn_seq_test { +using namespace std; +using namespace ov; +using namespace op; +using namespace testing; + +struct RNNSeqParams { + Dimension batch_size = 8; + Dimension num_directions = 1; + Dimension seq_length = 6; + Dimension input_size = 4; + Dimension hidden_size = 128; + size_t outputs_size = 2; + op::RecurrentSequenceDirection direction = op::RecurrentSequenceDirection::FORWARD; + element::Type et = element::f32; + int64_t gates_count = 1; + bool linear_before_reset = false; +}; + +template +class RNNSeqBaseTest : public TypePropOpTest { +public: + template ::value, bool>::type = true> + std::shared_ptr make_rnn_seq_based_op(RNNSeqParams& p, bool use_default_ctor = false) { + const auto X = make_shared(p.et, PartialShape{p.batch_size, p.seq_length, p.input_size}); + const auto H_t = make_shared(p.et, PartialShape{p.batch_size, p.num_directions, p.hidden_size}); + const auto sequence_lengths = make_shared(p.et, PartialShape{p.batch_size}); + const auto W = + make_shared(p.et, + PartialShape{p.num_directions, p.hidden_size * p.gates_count, p.input_size}); + const auto R = + make_shared(p.et, + PartialShape{p.num_directions, p.hidden_size * p.gates_count, p.hidden_size}); + const auto B = make_shared(p.et, PartialShape{p.num_directions, p.hidden_size * p.gates_count}); + + if (use_default_ctor) { + auto op = std::make_shared(); + op->set_direction(p.direction); + op->set_hidden_size(p.hidden_size.get_max_length()); + op->set_arguments(OutputVector{X, H_t, sequence_lengths, W, R, B}); + op->validate_and_infer_types(); + return op; + } + + return std::make_shared(X, H_t, sequence_lengths, W, R, B, p.hidden_size.get_max_length(), p.direction); + } + + template ::value, bool>::type = true> + std::shared_ptr make_rnn_seq_based_op(RNNSeqParams& p, bool use_default_ctor = false) { + p.gates_count = 3; + + const auto X = make_shared(p.et, PartialShape{p.batch_size, p.seq_length, p.input_size}); + const auto H_t = make_shared(p.et, PartialShape{p.batch_size, p.num_directions, p.hidden_size}); + const auto sequence_lengths = make_shared(p.et, PartialShape{p.batch_size}); + const auto W = + make_shared(p.et, + PartialShape{p.num_directions, p.hidden_size * p.gates_count, p.input_size}); + const auto R = + make_shared(p.et, + PartialShape{p.num_directions, p.hidden_size * p.gates_count, p.hidden_size}); + const auto B = make_shared(p.et, PartialShape{p.num_directions, p.hidden_size * p.gates_count}); + + if (use_default_ctor) { + auto op = std::make_shared(); + op->set_direction(p.direction); + op->set_hidden_size(p.hidden_size.get_max_length()); + op->set_arguments(OutputVector{X, H_t, sequence_lengths, W, R, B}); + op->validate_and_infer_types(); + return op; + } + return std::make_shared(X, H_t, sequence_lengths, W, R, B, p.hidden_size.get_max_length(), p.direction); + } + + template < + typename T = TOp, + typename std::enable_if::value || std::is_same::value, + bool>::type = true> + std::shared_ptr make_rnn_seq_based_op(RNNSeqParams& p, bool use_default_ctor = false) { + p.gates_count = 4; + p.outputs_size = 3; + + const auto X = make_shared(p.et, PartialShape{p.batch_size, p.seq_length, p.input_size}); + const auto H_t = make_shared(p.et, PartialShape{p.batch_size, p.num_directions, p.hidden_size}); + const auto C_t = make_shared(p.et, PartialShape{p.batch_size, p.num_directions, p.hidden_size}); + const auto sequence_lengths = make_shared(p.et, PartialShape{p.batch_size}); + const auto W = + make_shared(p.et, + PartialShape{p.num_directions, p.hidden_size * p.gates_count, p.input_size}); + const auto R = + make_shared(p.et, + PartialShape{p.num_directions, p.hidden_size * p.gates_count, p.hidden_size}); + const auto B = make_shared(p.et, PartialShape{p.num_directions, p.hidden_size * p.gates_count}); + + if (use_default_ctor) { + auto op = std::make_shared(); + op->set_direction(p.direction); + op->set_hidden_size(p.hidden_size.get_max_length()); + auto inputs = OutputVector{X, H_t, C_t, sequence_lengths, W, R, B}; + if (ov::is_type(op)) { + const auto P = + make_shared(p.et, + PartialShape{p.num_directions, p.hidden_size * (p.gates_count - 1)}); + inputs.push_back(P); + } + op->set_arguments(inputs); + op->validate_and_infer_types(); + return op; + } + return std::make_shared(X, H_t, C_t, sequence_lengths, W, R, B, p.hidden_size.get_max_length(), p.direction); + } +}; + +TYPED_TEST_SUITE_P(RNNSeqBaseTest); + +TYPED_TEST_P(RNNSeqBaseTest, basic_shape_infer) { + RNNSeqParams params; + auto op = this->make_rnn_seq_based_op(params); + + EXPECT_EQ(op->get_output_size(), params.outputs_size); + EXPECT_EQ(op->get_output_partial_shape(0), + (PartialShape{params.batch_size, 1, params.seq_length, params.hidden_size})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), Each(ov::no_label)); + for (size_t i = 1; i < params.outputs_size; ++i) { + EXPECT_EQ(op->get_output_partial_shape(i), (PartialShape{params.batch_size, 1, params.hidden_size})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(i)), Each(ov::no_label)); + } +} + +TYPED_TEST_P(RNNSeqBaseTest, default_ctor) { + RNNSeqParams params; + auto op = this->make_rnn_seq_based_op(params, true); + + EXPECT_EQ(op->get_output_size(), params.outputs_size); + EXPECT_EQ(op->get_output_partial_shape(0), + (PartialShape{params.batch_size, 1, params.seq_length, params.hidden_size})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), Each(ov::no_label)); + for (size_t i = 1; i < params.outputs_size; ++i) { + EXPECT_EQ(op->get_output_partial_shape(i), (PartialShape{params.batch_size, 1, params.hidden_size})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(i)), Each(ov::no_label)); + } +} + +TYPED_TEST_P(RNNSeqBaseTest, default_ctor_BIDIRECTIONAL) { + RNNSeqParams params; + params.direction = op::RecurrentSequenceDirection::BIDIRECTIONAL; + params.num_directions = Dimension(2); + + auto op = this->make_rnn_seq_based_op(params, true); + + EXPECT_EQ(op->get_output_size(), params.outputs_size); + EXPECT_EQ(op->get_output_partial_shape(0), + (PartialShape{params.batch_size, 2, params.seq_length, params.hidden_size})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), Each(ov::no_label)); + for (size_t i = 1; i < params.outputs_size; ++i) { + EXPECT_EQ(op->get_output_partial_shape(i), (PartialShape{params.batch_size, 2, params.hidden_size})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(i)), Each(ov::no_label)); + } +} + +TYPED_TEST_P(RNNSeqBaseTest, static_labels_dims_shape_infer) { + RNNSeqParams params; + params.batch_size = Dimension(8); + ov::DimensionTracker::set_label(params.batch_size, 10); + params.input_size = Dimension(64); + ov::DimensionTracker::set_label(params.seq_length, 11); + params.hidden_size = Dimension(128); + ov::DimensionTracker::set_label(params.hidden_size, 12); + params.num_directions = Dimension(1); + ov::DimensionTracker::set_label(params.num_directions, 13); + + auto op = this->make_rnn_seq_based_op(params); + EXPECT_EQ(op->get_output_size(), params.outputs_size); + + EXPECT_EQ(op->get_output_partial_shape(0), + (PartialShape{params.batch_size, 1, params.seq_length, params.hidden_size})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), ElementsAre(10, 13, 11, 12)); + + for (size_t i = 1; i < params.outputs_size; ++i) { + EXPECT_EQ(op->get_output_partial_shape(i), (PartialShape{params.batch_size, 1, params.hidden_size})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(i)), ElementsAre(10, 13, 12)); + } +} + +TYPED_TEST_P(RNNSeqBaseTest, interval_labels_dims_shape_infer_FORWARD) { + RNNSeqParams params; + params.batch_size = Dimension(8, 16); + ov::DimensionTracker::set_label(params.batch_size, 10); + params.input_size = Dimension(64, 128); + ov::DimensionTracker::set_label(params.seq_length, 11); + params.hidden_size = Dimension(128, 256); + ov::DimensionTracker::set_label(params.hidden_size, 12); + params.num_directions = Dimension(1, 2); + ov::DimensionTracker::set_label(params.num_directions, 13); + + auto op = this->make_rnn_seq_based_op(params); + EXPECT_EQ(op->get_output_size(), params.outputs_size); + + EXPECT_EQ(op->get_output_partial_shape(0), + (PartialShape{params.batch_size, 1, params.seq_length, params.hidden_size})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), ElementsAre(10, 13, 11, 12)); + for (size_t i = 1; i < params.outputs_size; ++i) { + // For backward compatibility, hidden_size attribute is ignored + EXPECT_EQ(op->get_output_partial_shape(i), (PartialShape{params.batch_size, 1, params.hidden_size})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(i)), ElementsAre(10, 13, 12)); + } +} + +TYPED_TEST_P(RNNSeqBaseTest, interval_labels_dims_shape_infer_REVERSE) { + RNNSeqParams params; + params.batch_size = Dimension(8, 16); + ov::DimensionTracker::set_label(params.batch_size, 10); + params.input_size = Dimension(64, 128); + ov::DimensionTracker::set_label(params.seq_length, 11); + params.hidden_size = Dimension(128, 256); + ov::DimensionTracker::set_label(params.hidden_size, 12); + params.num_directions = Dimension(1, 2); + ov::DimensionTracker::set_label(params.num_directions, 13); + + params.direction = op::RecurrentSequenceDirection::REVERSE; + + auto op = this->make_rnn_seq_based_op(params); + EXPECT_EQ(op->get_output_size(), params.outputs_size); + + EXPECT_EQ(op->get_output_partial_shape(0), + (PartialShape{params.batch_size, 1, params.seq_length, params.hidden_size})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), ElementsAre(10, 13, 11, 12)); + for (size_t i = 1; i < params.outputs_size; ++i) { + // For backward compatibility, hidden_size attribute is ignored + EXPECT_EQ(op->get_output_partial_shape(i), (PartialShape{params.batch_size, 1, params.hidden_size})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(i)), ElementsAre(10, 13, 12)); + } +} + +TYPED_TEST_P(RNNSeqBaseTest, interval_labels_dims_shape_infer_BIDIRECTIONAL) { + RNNSeqParams params; + params.batch_size = Dimension(8, 16); + ov::DimensionTracker::set_label(params.batch_size, 10); + params.input_size = Dimension(64, 128); + ov::DimensionTracker::set_label(params.seq_length, 11); + params.hidden_size = Dimension(128, 256); + ov::DimensionTracker::set_label(params.hidden_size, 12); + params.num_directions = Dimension(1, 2); + ov::DimensionTracker::set_label(params.num_directions, 13); + + params.direction = op::RecurrentSequenceDirection::BIDIRECTIONAL; + + auto op = this->make_rnn_seq_based_op(params); + EXPECT_EQ(op->get_output_size(), params.outputs_size); + + EXPECT_EQ(op->get_output_partial_shape(0), + (PartialShape{params.batch_size, 2, params.seq_length, params.hidden_size})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), ElementsAre(10, 13, 11, 12)); + for (size_t i = 1; i < params.outputs_size; ++i) { + // For backward compatibility, hidden_size attribute is ignored + EXPECT_EQ(op->get_output_partial_shape(i), (PartialShape{params.batch_size, 2, params.hidden_size})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(i)), ElementsAre(10, 13, 12)); + } +} + +REGISTER_TYPED_TEST_SUITE_P(RNNSeqBaseTest, + default_ctor, + default_ctor_BIDIRECTIONAL, + basic_shape_infer, + static_labels_dims_shape_infer, + interval_labels_dims_shape_infer_FORWARD, + interval_labels_dims_shape_infer_REVERSE, + interval_labels_dims_shape_infer_BIDIRECTIONAL); + +using RNNSeqBaseTypes = Types; +INSTANTIATE_TYPED_TEST_SUITE_P(type_prop, RNNSeqBaseTest, RNNSeqBaseTypes); + +} // namespace rnn_seq_test diff --git a/src/core/tests/type_prop/rnn_sequence.cpp b/src/core/tests/type_prop/rnn_sequence.cpp index da15125e0f40d3..4af18811ea471f 100644 --- a/src/core/tests/type_prop/rnn_sequence.cpp +++ b/src/core/tests/type_prop/rnn_sequence.cpp @@ -68,7 +68,9 @@ TEST(type_prop, rnn_sequence_invalid_input) { make_shared(X, H_t, sequence_lengths, W, R, B, hidden_size, direction); FAIL() << "RNNSequence node was created with invalid data."; } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter hidden_size mistmatched in W input")); + EXPECT_HAS_SUBSTRING( + error.what(), + std::string("Second dimension of W input shape is required to be compatible with 128. Got shape: 256")); } // Invalid R tensor shape. @@ -79,7 +81,7 @@ TEST(type_prop, rnn_sequence_invalid_input) { make_shared(X, H_t, sequence_lengths, W, R, B, hidden_size, direction); FAIL() << "RNNSequence node was created with invalid data."; } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter hidden_size not matched RNNSequence.")); + EXPECT_HAS_SUBSTRING(error.what(), std::string("Dimension `hidden_size` is not matched between inputs")); } // Invalid H_t tensor shape. @@ -90,7 +92,7 @@ TEST(type_prop, rnn_sequence_invalid_input) { make_shared(X, H_t, sequence_lengths, W, R, B, hidden_size, direction); FAIL() << "RNNSequence node was created with invalid data."; } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter batch_size not matched in RNNSequence.")); + EXPECT_HAS_SUBSTRING(error.what(), std::string("Dimension `batch_size` is not matched between inputs")); } // Invalid B tensor shape. @@ -101,7 +103,9 @@ TEST(type_prop, rnn_sequence_invalid_input) { make_shared(X, H_t, sequence_lengths, W, R, B, hidden_size, direction); FAIL() << "RNNSequence node was created with invalid data."; } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter hidden_size mistmatched in B inpu")); + EXPECT_HAS_SUBSTRING( + error.what(), + std::string("Second dimension of B input shape is required to be compatible with 128. Got shape: 256")); } // Invalid direction. @@ -111,7 +115,9 @@ TEST(type_prop, rnn_sequence_invalid_input) { make_shared(X, H_t, sequence_lengths, W, R, B, hidden_size, direction); FAIL() << "RNNSequence node was created with invalid data."; } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter num_directions not matched in RNNSequence.")); + EXPECT_HAS_SUBSTRING( + error.what(), + std::string("Dimension `num_directions` doesn't match to other inputs or `direction` attribute")); } // Invalid direction. @@ -122,7 +128,9 @@ TEST(type_prop, rnn_sequence_invalid_input) { make_shared(X, H_t, sequence_lengths, W, R, B, hidden_size, direction); FAIL() << "RNNSequence node was created with invalid data."; } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter 'num_directions' doesn't match with direction")); + EXPECT_HAS_SUBSTRING( + error.what(), + std::string("Dimension `num_directions` doesn't match to other inputs or `direction` attribute")); } } @@ -295,12 +303,12 @@ TEST(type_prop, rnn_sequence_dynamic_invalid_input_rank0) { << "RNNSequence node was created with invalid data."; } -TEST(type_prop, rnn_sequence_dynamic_invalid_input_dynamic_rank) { - const size_t batch_size = 8; - const size_t num_directions = 1; - const size_t seq_length = 6; - const size_t input_size = 4; - const size_t hidden_size = 128; +TEST(type_prop, rnn_sequence_input_dynamic_rank) { + const int64_t batch_size = 8; + const int64_t num_directions = 1; + const int64_t seq_length = 6; + const int64_t input_size = 4; + const int64_t hidden_size = 128; auto X = make_shared(element::f32, PartialShape{batch_size, seq_length, input_size}); auto H_t = make_shared(element::f32, PartialShape{batch_size, num_directions, hidden_size}); @@ -312,34 +320,71 @@ TEST(type_prop, rnn_sequence_dynamic_invalid_input_dynamic_rank) { const auto direction = op::RecurrentSequenceDirection::FORWARD; - auto check_dynamic_rnn = [](const shared_ptr& rnn) -> bool { - return rnn->output(0).get_partial_shape() == PartialShape::dynamic() && + auto check_dynamic_rnn = [=](const shared_ptr& rnn) -> bool { + return rnn->output(0).get_partial_shape() == + PartialShape{batch_size, num_directions, seq_length, hidden_size} && rnn->output(0).get_element_type() == rnn->input(0).get_element_type() && - rnn->output(1).get_partial_shape() == PartialShape::dynamic() && + rnn->output(1).get_partial_shape() == PartialShape{batch_size, num_directions, hidden_size} && rnn->output(1).get_element_type() == rnn->input(0).get_element_type(); }; X = make_shared(element::f32, PartialShape::dynamic(Rank::dynamic())); - auto rnn_x = make_shared(X, H_t, sequence_lengths, W, R, B, hidden_size, direction); - EXPECT_EQ(check_dynamic_rnn(rnn_x), true); + auto rnn_x = make_shared(X, + H_t, + sequence_lengths, + W, + R, + B, + static_cast(hidden_size), + direction); + EXPECT_EQ(rnn_x->get_output_partial_shape(0), (PartialShape{batch_size, num_directions, -1, hidden_size})); + EXPECT_EQ(rnn_x->get_output_partial_shape(1), (PartialShape{batch_size, num_directions, hidden_size})); X = make_shared(element::f32, PartialShape{batch_size, seq_length, input_size}); H_t = make_shared(element::f32, PartialShape::dynamic(Rank::dynamic())); - auto rnn_h = make_shared(X, H_t, sequence_lengths, W, R, B, hidden_size, direction); + auto rnn_h = make_shared(X, + H_t, + sequence_lengths, + W, + R, + B, + static_cast(hidden_size), + direction); EXPECT_EQ(check_dynamic_rnn(rnn_h), true); H_t = make_shared(element::f32, PartialShape{batch_size, num_directions, hidden_size}); W = make_shared(element::f32, PartialShape::dynamic(Rank::dynamic())); - auto rnn_w = make_shared(X, H_t, sequence_lengths, W, R, B, hidden_size, direction); + auto rnn_w = make_shared(X, + H_t, + sequence_lengths, + W, + R, + B, + static_cast(hidden_size), + direction); EXPECT_EQ(check_dynamic_rnn(rnn_w), true); W = make_shared(element::f32, PartialShape{num_directions, hidden_size, input_size}); R = make_shared(element::f32, PartialShape::dynamic(Rank::dynamic())); - auto rnn_r = make_shared(X, H_t, sequence_lengths, W, R, B, hidden_size, direction); + auto rnn_r = make_shared(X, + H_t, + sequence_lengths, + W, + R, + B, + static_cast(hidden_size), + direction); EXPECT_EQ(check_dynamic_rnn(rnn_r), true); R = make_shared(element::f32, PartialShape{num_directions, hidden_size, hidden_size}); B = make_shared(element::f32, PartialShape::dynamic(Rank::dynamic())); - auto rnn_b = make_shared(X, H_t, sequence_lengths, W, R, B, hidden_size, direction); + auto rnn_b = make_shared(X, + H_t, + sequence_lengths, + W, + R, + B, + static_cast(hidden_size), + direction); EXPECT_EQ(check_dynamic_rnn(rnn_b), true); } diff --git a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp index cc19444f33b9d5..e84da8c6ab33d1 100644 --- a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp +++ b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp @@ -56,6 +56,7 @@ #include "interpolate_shape_inference.hpp" #include "irdft_shape_inference.hpp" #include "lstm_cell_shape_inference.hpp" +#include "lstm_sequence_shape_inference.hpp" #include "matmul_shape_inference.hpp" #include "max_pool_shape_inference.hpp" #include "one_hot_shape_inference.hpp" @@ -72,6 +73,8 @@ #include "reorg_yolo_shape_inference.hpp" #include "reverse_sequence_shape_inference.hpp" #include "reverse_shape_inference.hpp" +#include "rnn_cell_shape_inference.hpp" +#include "rnn_sequence_shape_inference.hpp" #include "roi_align_shape_inference.hpp" #include "roi_pooling_shape_inference.hpp" #include "roll_shape_inference.hpp" @@ -682,7 +685,6 @@ const IStaticShapeInferFactory::TRegistry IStaticShapeInferFactory::registry{ // Default opset _OV_OP_SHAPE_INFER_MASK_REG(ExperimentalDetectronROIFeatureExtractor, ShapeInferTA, util::bit::mask()), _OV_OP_SHAPE_INFER_MASK_REG(Proposal, ShapeInferTA, util::bit::mask()), - _OV_OP_SHAPE_INFER_MASK_REG(Tile, ShapeInferenceTA, util::bit::mask(1)), _OV_OP_SHAPE_INFER_VA_REG(ReduceL1, ShapeInferTA, op::util::ArithmeticReductionKeepDims, util::bit::mask(1)), _OV_OP_SHAPE_INFER_VA_REG(ReduceL2, ShapeInferTA, op::util::ArithmeticReductionKeepDims, util::bit::mask(1)), _OV_OP_SHAPE_INFER_VA_REG(ReduceLogicalAnd, ShapeInferTA, op::util::LogicalReductionKeepDims, util::bit::mask(1)), @@ -692,15 +694,21 @@ const IStaticShapeInferFactory::TRegistry IStaticShapeInferFactory::registry{ _OV_OP_SHAPE_INFER_VA_REG(ReduceMin, ShapeInferTA, op::util::ArithmeticReductionKeepDims, util::bit::mask(1)), _OV_OP_SHAPE_INFER_VA_REG(ReduceProd, ShapeInferTA, op::util::ArithmeticReductionKeepDims, util::bit::mask(1)), _OV_OP_SHAPE_INFER_VA_REG(ReduceSum, ShapeInferTA, op::util::ArithmeticReductionKeepDims, util::bit::mask(1)), + _OV_OP_SHAPE_INFER_MASK_REG(Tile, ShapeInferenceTA, util::bit::mask(1)), // Operators shape inferences for specific opset version should be specified below // opset11 _OV_OP_SHAPE_INFER_MASK_REG(opset11::Interpolate, ShapeInferPaddingTA, util::bit::mask(1, 2, 3)), + // opset5 + _OV_OP_SHAPE_INFER_MASK_REG(opset5::LSTMSequence, ShapeInferTA, util::bit::mask()), + _OV_OP_SHAPE_INFER_MASK_REG(opset5::RNNSequence, ShapeInferTA, util::bit::mask()), // opset4 _OV_OP_SHAPE_INFER_MASK_REG(opset4::Interpolate, ShapeInferPaddingTA, util::bit::mask(1, 2)), // opset1 _OV_OP_SHAPE_INFER_MASK_REG(opset1::Interpolate, ShapeInferTA, util::bit::mask(1)), + _OV_OP_SHAPE_INFER_MASK_REG(opset1::LSTMSequence, ShapeInferTA, util::bit::mask()), _OV_OP_SHAPE_INFER_MASK_REG(opset1::Proposal, ShapeInferTA, util::bit::mask()), _OV_OP_SHAPE_INFER_MASK_REG(opset1::Reverse, ShapeInferTA, util::bit::mask(1)), + _OV_OP_SHAPE_INFER_MASK_REG(opset1::RNNCell, ShapeInferTA, util::bit::mask()), }; #undef _OV_OP_NON_TEMPLATE_SHAPE_INFER_REG diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/gru_cell_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/gru_cell_shape_inference_test.cpp new file mode 100644 index 00000000000000..229178fb0f1048 --- /dev/null +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/gru_cell_shape_inference_test.cpp @@ -0,0 +1,144 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "utils.hpp" + +using namespace ov; +using namespace ov::intel_cpu; + +class GRUCellV3StaticShapeInferenceTest : public OpStaticShapeInferenceTest { +protected: + void SetUp() override { + this->output_shapes = ShapeVector(1); + } +}; + +TEST_F(GRUCellV3StaticShapeInferenceTest, default_ctor) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t gates_count = 3; + + const auto gru = make_op(); + + input_shapes = {StaticShape{batch_size, input_size}, // X + StaticShape{batch_size, hidden_size}, // H_t + StaticShape{gates_count * hidden_size, input_size}, // W + StaticShape{gates_count * hidden_size, hidden_size}, // R + StaticShape{gates_count * hidden_size}}; // B + + shape_inference(gru.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, hidden_size})); +} + +TEST_F(GRUCellV3StaticShapeInferenceTest, default_bias) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t gates_count = 3; + + const auto X = std::make_shared(element::f32, PartialShape::dynamic(2)); + const auto H_t = std::make_shared(element::f32, PartialShape::dynamic(2)); + const auto W = std::make_shared(element::f32, PartialShape::dynamic(2)); + const auto R = std::make_shared(element::f32, PartialShape::dynamic(2)); + + // Default `B` input is created as Constant by GRUCell contructor + const auto gru = make_op(X, H_t, W, R, hidden_size); + + input_shapes = {StaticShape{batch_size, input_size}, // X + StaticShape{batch_size, hidden_size}, // H_t + StaticShape{gates_count * hidden_size, input_size}, // W + StaticShape{gates_count * hidden_size, hidden_size}, // R + StaticShape{gates_count * hidden_size}}; // B + + shape_inference(gru.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, hidden_size})); +} + +TEST_F(GRUCellV3StaticShapeInferenceTest, with_bias) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t gates_count = 3; + + const auto X = std::make_shared(element::f32, PartialShape::dynamic(2)); + const auto H_t = std::make_shared(element::f32, PartialShape::dynamic(2)); + const auto W = std::make_shared(element::f32, PartialShape::dynamic(2)); + const auto R = std::make_shared(element::f32, PartialShape::dynamic(2)); + const auto B = std::make_shared(element::f32, PartialShape::dynamic(1)); + const auto gru = make_op(X, H_t, W, R, B, hidden_size); + + input_shapes = {StaticShape{batch_size, input_size}, // X + StaticShape{batch_size, hidden_size}, // H_t + StaticShape{gates_count * hidden_size, input_size}, // W + StaticShape{gates_count * hidden_size, hidden_size}, // R + StaticShape{gates_count * hidden_size}}; // B + + output_shapes = {StaticShape{}, StaticShape{}}; + + shape_inference(gru.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, hidden_size})); +} + +TEST_F(GRUCellV3StaticShapeInferenceTest, linear_before) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t gates_count = 3; + + const auto X = std::make_shared(element::f32, PartialShape::dynamic(2)); + const auto H_t = std::make_shared(element::f32, PartialShape::dynamic(2)); + const auto W = std::make_shared(element::f32, PartialShape::dynamic(2)); + const auto R = std::make_shared(element::f32, PartialShape::dynamic(2)); + const auto B = std::make_shared(element::f32, PartialShape::dynamic(1)); + + const auto gru = make_op(X, + H_t, + W, + R, + B, + hidden_size, + std::vector{"sigmoid", "tanh"}, + std::vector{}, + std::vector{}, + 0.f, + true); + + input_shapes = {StaticShape{batch_size, input_size}, // X + StaticShape{batch_size, hidden_size}, // H_t + StaticShape{gates_count * hidden_size, input_size}, // W + StaticShape{gates_count * hidden_size, hidden_size}, // R + StaticShape{(gates_count + 1) * hidden_size}}; // B + + output_shapes = {StaticShape{}, StaticShape{}}; + + shape_inference(gru.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, hidden_size})); +} + +TEST_F(GRUCellV3StaticShapeInferenceTest, dynamic_rank_inputs) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t gates_count = 3; + + const auto X = std::make_shared(element::f32, PartialShape::dynamic()); + const auto H_t = std::make_shared(element::f32, PartialShape::dynamic()); + const auto W = std::make_shared(element::f32, PartialShape::dynamic()); + const auto R = std::make_shared(element::f32, PartialShape::dynamic()); + const auto B = std::make_shared(element::f32, PartialShape::dynamic()); + + const auto gru = make_op(X, H_t, W, R, B, hidden_size); + + input_shapes = {StaticShape{batch_size, input_size}, // X + StaticShape{batch_size, hidden_size}, // H_t + StaticShape{gates_count * hidden_size, input_size}, // W + StaticShape{gates_count * hidden_size, hidden_size}, // R + StaticShape{gates_count * hidden_size}}; // B + + shape_inference(gru.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, hidden_size})); +} diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/gru_cell_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/gru_cell_test.cpp deleted file mode 100644 index 5fdaf6680ec600..00000000000000 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/gru_cell_test.cpp +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include - -#include "utils.hpp" - -using namespace ov; -using namespace ov::intel_cpu; - -TEST(StaticShapeInferenceTest, GRUCellTest_default_bias) { - constexpr size_t batch_size = 2; - constexpr size_t input_size = 3; - constexpr size_t hidden_size = 5; - constexpr size_t gates_count = 3; - - const auto X = std::make_shared(element::f32, PartialShape::dynamic(2)); - const auto H_t = std::make_shared(element::f32, PartialShape::dynamic(2)); - const auto W = std::make_shared(element::f32, PartialShape::dynamic(2)); - const auto R = std::make_shared(element::f32, PartialShape::dynamic(2)); - - // Default `B` input is created as Constant by GRUCell contructor - const auto gru = std::make_shared(X, H_t, W, R, hidden_size); - - std::vector static_input_shapes{StaticShape{batch_size, input_size}, // X - StaticShape{batch_size, hidden_size}, // H_t - StaticShape{gates_count * hidden_size, input_size}, // W - StaticShape{gates_count * hidden_size, hidden_size}, // R - StaticShape{gates_count * hidden_size}}; // B - - std::vector static_output_shapes{StaticShape{}}; - - shape_inference(gru.get(), static_input_shapes, static_output_shapes); - EXPECT_EQ(static_output_shapes[0], StaticShape({batch_size, hidden_size})); -} - -TEST(StaticShapeInferenceTest, GRUCellTest_with_bias) { - constexpr size_t batch_size = 2; - constexpr size_t input_size = 3; - constexpr size_t hidden_size = 5; - constexpr size_t gates_count = 3; - - const auto X = std::make_shared(element::f32, PartialShape::dynamic(2)); - const auto H_t = std::make_shared(element::f32, PartialShape::dynamic(2)); - const auto W = std::make_shared(element::f32, PartialShape::dynamic(2)); - const auto R = std::make_shared(element::f32, PartialShape::dynamic(2)); - const auto B = std::make_shared(element::f32, PartialShape::dynamic(1)); - const auto gru = std::make_shared(X, H_t, W, R, B, hidden_size); - - std::vector static_input_shapes{StaticShape{batch_size, input_size}, // X - StaticShape{batch_size, hidden_size}, // H_t - StaticShape{gates_count * hidden_size, input_size}, // W - StaticShape{gates_count * hidden_size, hidden_size}, // R - StaticShape{gates_count * hidden_size}}; // B - - std::vector static_output_shapes{StaticShape{}, StaticShape{}}; - - shape_inference(gru.get(), static_input_shapes, static_output_shapes); - EXPECT_EQ(static_output_shapes[0], StaticShape({batch_size, hidden_size})); -} - -TEST(StaticShapeInferenceTest, GRUCellTest_linear_before) { - constexpr size_t batch_size = 2; - constexpr size_t input_size = 3; - constexpr size_t hidden_size = 5; - constexpr size_t gates_count = 3; - - const auto X = std::make_shared(element::f32, PartialShape::dynamic(2)); - const auto H_t = std::make_shared(element::f32, PartialShape::dynamic(2)); - const auto W = std::make_shared(element::f32, PartialShape::dynamic(2)); - const auto R = std::make_shared(element::f32, PartialShape::dynamic(2)); - const auto B = std::make_shared(element::f32, PartialShape::dynamic(1)); - - const auto gru = std::make_shared(X, - H_t, - W, - R, - B, - hidden_size, - std::vector{"sigmoid", "tanh"}, - std::vector{}, - std::vector{}, - 0.f, - true); - - std::vector static_input_shapes{StaticShape{batch_size, input_size}, // X - StaticShape{batch_size, hidden_size}, // H_t - StaticShape{gates_count * hidden_size, input_size}, // W - StaticShape{gates_count * hidden_size, hidden_size}, // R - StaticShape{(gates_count + 1) * hidden_size}}; // B - - std::vector static_output_shapes{StaticShape{}, StaticShape{}}; - - shape_inference(gru.get(), static_input_shapes, static_output_shapes); - EXPECT_EQ(static_output_shapes[0], StaticShape({batch_size, hidden_size})); -} - -TEST(StaticShapeInferenceTest, GRUCellTest_dynamic_rank_inputs) { - constexpr size_t batch_size = 2; - constexpr size_t input_size = 3; - constexpr size_t hidden_size = 5; - constexpr size_t gates_count = 3; - - const auto X = std::make_shared(element::f32, PartialShape::dynamic()); - const auto H_t = std::make_shared(element::f32, PartialShape::dynamic()); - const auto W = std::make_shared(element::f32, PartialShape::dynamic()); - const auto R = std::make_shared(element::f32, PartialShape::dynamic()); - const auto B = std::make_shared(element::f32, PartialShape::dynamic()); - - const auto gru = std::make_shared(X, H_t, W, R, B, hidden_size); - - std::vector static_input_shapes{StaticShape{batch_size, input_size}, // X - StaticShape{batch_size, hidden_size}, // H_t - StaticShape{gates_count * hidden_size, input_size}, // W - StaticShape{gates_count * hidden_size, hidden_size}, // R - StaticShape{gates_count * hidden_size}}; // B - - std::vector static_output_shapes{StaticShape{}}; - - shape_inference(gru.get(), static_input_shapes, static_output_shapes); - EXPECT_EQ(static_output_shapes[0], StaticShape({batch_size, hidden_size})); -} diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/gru_sequence_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/gru_sequence_shape_inference_test.cpp similarity index 51% rename from src/plugins/intel_cpu/tests/unit/shape_inference_test/gru_sequence_test.cpp rename to src/plugins/intel_cpu/tests/unit/shape_inference_test/gru_sequence_shape_inference_test.cpp index e55da4d19e937b..c42f1c8c65934c 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/gru_sequence_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/gru_sequence_shape_inference_test.cpp @@ -9,7 +9,36 @@ using namespace ov; using namespace ov::intel_cpu; -TEST(StaticShapeInferenceTest, GRUSequenceTest_FORWARD) { +class GRUSequenceV5StaticShapeInferenceTest : public OpStaticShapeInferenceTest { +protected: + void SetUp() override { + this->output_shapes = ShapeVector(2); + } +}; + +TEST_F(GRUSequenceV5StaticShapeInferenceTest, default_ctor) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t seq_len = 4; + constexpr size_t num_directions = 1; + constexpr size_t gates_count = 3; + + const auto gru_sequence = make_op(); + + input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X + StaticShape{batch_size, num_directions, hidden_size}, // H_t + StaticShape{batch_size}, // seq_lengths + StaticShape{num_directions, gates_count * hidden_size, input_size}, // W + StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R + StaticShape{num_directions, gates_count * hidden_size}}; // B + + shape_inference(gru_sequence.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); + EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); +} + +TEST_F(GRUSequenceV5StaticShapeInferenceTest, FORWARD) { constexpr size_t batch_size = 2; constexpr size_t input_size = 3; constexpr size_t hidden_size = 5; @@ -29,22 +58,19 @@ TEST(StaticShapeInferenceTest, GRUSequenceTest_FORWARD) { const auto gru_sequence = std::make_shared(X, H_t, seq_lengths, W, R, B, hidden_size, direction); - std::vector static_input_shapes{ - StaticShape{batch_size, seq_len, input_size}, // X - StaticShape{batch_size, num_directions, hidden_size}, // H_t - StaticShape{batch_size}, // seq_lengths - StaticShape{num_directions, gates_count * hidden_size, input_size}, // W - StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R - StaticShape{num_directions, gates_count * hidden_size}}; // B - - std::vector static_output_shapes{StaticShape{}, StaticShape{}}; + input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X + StaticShape{batch_size, num_directions, hidden_size}, // H_t + StaticShape{batch_size}, // seq_lengths + StaticShape{num_directions, gates_count * hidden_size, input_size}, // W + StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R + StaticShape{num_directions, gates_count * hidden_size}}; // B - shape_inference(gru_sequence.get(), static_input_shapes, static_output_shapes); - ASSERT_EQ(static_output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); - ASSERT_EQ(static_output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); + shape_inference(gru_sequence.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); + EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); } -TEST(StaticShapeInferenceTest, GRUSequenceTest_FORWARD_linear_before) { +TEST_F(GRUSequenceV5StaticShapeInferenceTest, FORWARD_linear_before) { constexpr size_t batch_size = 2; constexpr size_t input_size = 3; constexpr size_t hidden_size = 5; @@ -75,22 +101,19 @@ TEST(StaticShapeInferenceTest, GRUSequenceTest_FORWARD_linear_before) { 0.f, true); - std::vector static_input_shapes{ - StaticShape{batch_size, seq_len, input_size}, // X - StaticShape{batch_size, num_directions, hidden_size}, // H_t - StaticShape{batch_size}, // seq_lengths - StaticShape{num_directions, gates_count * hidden_size, input_size}, // W - StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R - StaticShape{num_directions, (gates_count + 1) * hidden_size}}; // B + input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X + StaticShape{batch_size, num_directions, hidden_size}, // H_t + StaticShape{batch_size}, // seq_lengths + StaticShape{num_directions, gates_count * hidden_size, input_size}, // W + StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R + StaticShape{num_directions, (gates_count + 1) * hidden_size}}; // B - std::vector static_output_shapes{StaticShape{}, StaticShape{}}; - - shape_inference(gru_sequence.get(), static_input_shapes, static_output_shapes); - ASSERT_EQ(static_output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); - ASSERT_EQ(static_output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); + shape_inference(gru_sequence.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); + EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); } -TEST(StaticShapeInferenceTest, GRUSequenceTest_REVERSE) { +TEST_F(GRUSequenceV5StaticShapeInferenceTest, REVERSE) { constexpr size_t batch_size = 2; constexpr size_t input_size = 3; constexpr size_t hidden_size = 5; @@ -110,22 +133,19 @@ TEST(StaticShapeInferenceTest, GRUSequenceTest_REVERSE) { const auto gru_sequence = std::make_shared(X, H_t, seq_lengths, W, R, B, hidden_size, direction); - std::vector static_input_shapes{ - StaticShape{batch_size, seq_len, input_size}, // X - StaticShape{batch_size, num_directions, hidden_size}, // H_t - StaticShape{batch_size}, // seq_lengths - StaticShape{num_directions, gates_count * hidden_size, input_size}, // W - StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R - StaticShape{num_directions, gates_count * hidden_size}}; // B - - std::vector static_output_shapes{StaticShape{}, StaticShape{}}; + input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X + StaticShape{batch_size, num_directions, hidden_size}, // H_t + StaticShape{batch_size}, // seq_lengths + StaticShape{num_directions, gates_count * hidden_size, input_size}, // W + StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R + StaticShape{num_directions, gates_count * hidden_size}}; // B - shape_inference(gru_sequence.get(), static_input_shapes, static_output_shapes); - ASSERT_EQ(static_output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); - ASSERT_EQ(static_output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); + shape_inference(gru_sequence.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); + EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); } -TEST(StaticShapeInferenceTest, GRUSequenceTest_BIDIRECTIONAL) { +TEST_F(GRUSequenceV5StaticShapeInferenceTest, BIDIRECTIONAL) { constexpr size_t batch_size = 2; constexpr size_t input_size = 3; constexpr size_t hidden_size = 5; @@ -145,17 +165,14 @@ TEST(StaticShapeInferenceTest, GRUSequenceTest_BIDIRECTIONAL) { const auto gru_sequence = std::make_shared(X, H_t, seq_lengths, W, R, B, hidden_size, direction); - std::vector static_input_shapes{ - StaticShape{batch_size, seq_len, input_size}, // X - StaticShape{batch_size, num_directions, hidden_size}, // H_t - StaticShape{batch_size}, // seq_lengths - StaticShape{num_directions, gates_count * hidden_size, input_size}, // W - StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R - StaticShape{num_directions, gates_count * hidden_size}}; // B - - std::vector static_output_shapes{StaticShape{}, StaticShape{}}; + input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X + StaticShape{batch_size, num_directions, hidden_size}, // H_t + StaticShape{batch_size}, // seq_lengths + StaticShape{num_directions, gates_count * hidden_size, input_size}, // W + StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R + StaticShape{num_directions, gates_count * hidden_size}}; // B - shape_inference(gru_sequence.get(), static_input_shapes, static_output_shapes); - ASSERT_EQ(static_output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); - ASSERT_EQ(static_output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); + shape_inference(gru_sequence.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); + EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/lstm_cell_shape_inference.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/lstm_cell_shape_inference_test.cpp similarity index 60% rename from src/plugins/intel_cpu/tests/unit/shape_inference_test/lstm_cell_shape_inference.cpp rename to src/plugins/intel_cpu/tests/unit/shape_inference_test/lstm_cell_shape_inference_test.cpp index 330ed81d67ebe4..23d7d31515ad29 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/lstm_cell_shape_inference.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/lstm_cell_shape_inference_test.cpp @@ -9,7 +9,33 @@ using namespace ov; using namespace ov::intel_cpu; -TEST(StaticShapeInferenceTest, LstmCellTest) { +class LSTMCellV4StaticShapeInferenceTest : public OpStaticShapeInferenceTest { +protected: + void SetUp() override { + this->output_shapes = ShapeVector(2); + } +}; + +TEST_F(LSTMCellV4StaticShapeInferenceTest, default_ctor) { + const size_t batch_size = 2; + const size_t input_size = 3; + const size_t hidden_size = 3; + const size_t gates_count = 4; + + const auto lstm_cell = make_op(); + + input_shapes = {StaticShape{batch_size, input_size}, + StaticShape{batch_size, hidden_size}, + StaticShape{batch_size, hidden_size}, + StaticShape{gates_count * hidden_size, input_size}, + StaticShape{gates_count * hidden_size, hidden_size}, + StaticShape{gates_count * hidden_size}}, + shape_inference(lstm_cell.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, hidden_size})); + EXPECT_EQ(output_shapes[1], StaticShape({batch_size, hidden_size})); +} + +TEST_F(LSTMCellV4StaticShapeInferenceTest, basic_shape_infer) { const size_t batch_size = 2; const size_t input_size = 3; const size_t hidden_size = 3; @@ -21,21 +47,20 @@ TEST(StaticShapeInferenceTest, LstmCellTest) { const auto H_t = std::make_shared(element::f32, PartialShape{-1, -1}); const auto C_t = std::make_shared(element::f32, PartialShape{-1, -1}); const auto Bias = std::make_shared(element::f32, PartialShape{-1}); - const auto lstm_cell = std::make_shared(X, H_t, C_t, W, R, Bias, hidden_size); + const auto lstm_cell = make_op(X, H_t, C_t, W, R, Bias, hidden_size); - std::vector static_input_shapes = {StaticShape{batch_size, input_size}, - StaticShape{batch_size, hidden_size}, - StaticShape{batch_size, hidden_size}, - StaticShape{gates_count * hidden_size, input_size}, - StaticShape{gates_count * hidden_size, hidden_size}, - StaticShape{gates_count * hidden_size}}, - static_output_shapes = {StaticShape{}, StaticShape{}}; - shape_inference(lstm_cell.get(), static_input_shapes, static_output_shapes); - ASSERT_EQ(static_output_shapes[0], StaticShape({batch_size, hidden_size})); - ASSERT_EQ(static_output_shapes[1], StaticShape({batch_size, hidden_size})); + input_shapes = {StaticShape{batch_size, input_size}, + StaticShape{batch_size, hidden_size}, + StaticShape{batch_size, hidden_size}, + StaticShape{gates_count * hidden_size, input_size}, + StaticShape{gates_count * hidden_size, hidden_size}, + StaticShape{gates_count * hidden_size}}, + shape_inference(lstm_cell.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, hidden_size})); + EXPECT_EQ(output_shapes[1], StaticShape({batch_size, hidden_size})); } -TEST(StaticShapeInferenceTest, LstmCellV1Test) { +TEST(StaticShapeInferenceTest, LSTMCellV0Test) { const size_t batch_size = 2; const size_t input_size = 3; const size_t hidden_size = 3; diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/lstm_seq_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/lstm_seq_shape_inference_test.cpp new file mode 100644 index 00000000000000..bf6ef9e3676125 --- /dev/null +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/lstm_seq_shape_inference_test.cpp @@ -0,0 +1,327 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "utils.hpp" + +using namespace ov; +using namespace ov::intel_cpu; + +class LSTMSequenceV0StaticShapeInferenceTest : public OpStaticShapeInferenceTest { +protected: + void SetUp() override { + this->output_shapes = ShapeVector(3); + } +}; + +TEST_F(LSTMSequenceV0StaticShapeInferenceTest, default_ctor) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t seq_len = 4; + constexpr size_t num_directions = 1; + constexpr size_t gates_count = 4; + + const auto op = make_op(); + + input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X + StaticShape{batch_size, num_directions, hidden_size}, // H_t + StaticShape{batch_size, num_directions, hidden_size}, // C_t + StaticShape{batch_size}, // seq_lengths + StaticShape{num_directions, gates_count * hidden_size, input_size}, // W + StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R + StaticShape{num_directions, gates_count * hidden_size}, // B + StaticShape{num_directions, (gates_count - 1) * hidden_size}}; // P + + shape_inference(op.get(), input_shapes, output_shapes); + + EXPECT_EQ(output_shapes.size(), 3); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); + EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); + EXPECT_EQ(output_shapes[2], StaticShape({batch_size, num_directions, hidden_size})); +} + +TEST_F(LSTMSequenceV0StaticShapeInferenceTest, FORWARD_without_P) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t seq_len = 4; + constexpr size_t num_directions = 1; + constexpr size_t gates_count = 4; + + constexpr auto direction = op::RecurrentSequenceDirection::FORWARD; + + const auto X = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto H_t = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto C_t = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto seq_lengths = std::make_shared(element::f32, PartialShape::dynamic(1)); + const auto W = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto R = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto B = std::make_shared(element::f32, PartialShape::dynamic(2)); + + const auto op = make_op(X, H_t, C_t, seq_lengths, W, R, B, hidden_size, direction); + + input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X + StaticShape{batch_size, num_directions, hidden_size}, // H_t + StaticShape{batch_size, num_directions, hidden_size}, // C_t + StaticShape{batch_size}, // seq_lengths + StaticShape{num_directions, gates_count * hidden_size, input_size}, // W + StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R + StaticShape{num_directions, gates_count * hidden_size}}; // B + + shape_inference(op.get(), input_shapes, output_shapes); + + EXPECT_EQ(output_shapes.size(), 3); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); + EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); + EXPECT_EQ(output_shapes[2], StaticShape({batch_size, num_directions, hidden_size})); +} + +TEST_F(LSTMSequenceV0StaticShapeInferenceTest, FORWARD_with_P) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t seq_len = 4; + constexpr size_t num_directions = 1; + constexpr size_t gates_count = 4; + + constexpr auto direction = op::RecurrentSequenceDirection::FORWARD; + + const auto X = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto H_t = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto C_t = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto seq_lengths = std::make_shared(element::f32, PartialShape::dynamic(1)); + const auto W = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto R = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto B = std::make_shared(element::f32, PartialShape::dynamic(2)); + const auto P = std::make_shared(element::f32, PartialShape::dynamic(2)); + + const auto op = make_op(X, H_t, C_t, seq_lengths, W, R, B, P, hidden_size, direction); + + input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X + StaticShape{batch_size, num_directions, hidden_size}, // H_t + StaticShape{batch_size, num_directions, hidden_size}, // C_t + StaticShape{batch_size}, // seq_lengths + StaticShape{num_directions, gates_count * hidden_size, input_size}, // W + StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R + StaticShape{num_directions, gates_count * hidden_size}, // B + StaticShape{num_directions, (gates_count - 1) * hidden_size}}; // P + + shape_inference(op.get(), input_shapes, output_shapes); + + EXPECT_EQ(output_shapes.size(), 3); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); + EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); + EXPECT_EQ(output_shapes[2], StaticShape({batch_size, num_directions, hidden_size})); +} + +TEST_F(LSTMSequenceV0StaticShapeInferenceTest, REVERSE) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t seq_len = 4; + constexpr size_t num_directions = 1; + constexpr size_t gates_count = 4; + + constexpr auto direction = op::RecurrentSequenceDirection::REVERSE; + + const auto X = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto H_t = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto C_t = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto seq_lengths = std::make_shared(element::f32, PartialShape::dynamic(1)); + const auto W = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto R = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto B = std::make_shared(element::f32, PartialShape::dynamic(2)); + + const auto op = make_op(X, H_t, C_t, seq_lengths, W, R, B, hidden_size, direction); + + input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X + StaticShape{batch_size, num_directions, hidden_size}, // H_t + StaticShape{batch_size, num_directions, hidden_size}, // C_t + StaticShape{batch_size}, // seq_lengths + StaticShape{num_directions, gates_count * hidden_size, input_size}, // W + StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R + StaticShape{num_directions, gates_count * hidden_size}}; // B + + shape_inference(op.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes.size(), 3); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); + EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); + EXPECT_EQ(output_shapes[2], StaticShape({batch_size, num_directions, hidden_size})); +} + +TEST_F(LSTMSequenceV0StaticShapeInferenceTest, BIDIRECTIONAL) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t seq_len = 4; + constexpr size_t num_directions = 2; + constexpr size_t gates_count = 4; + + constexpr auto direction = op::RecurrentSequenceDirection::BIDIRECTIONAL; + + const auto X = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto H_t = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto C_t = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto seq_lengths = std::make_shared(element::f32, PartialShape::dynamic(1)); + const auto W = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto R = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto B = std::make_shared(element::f32, PartialShape::dynamic(2)); + + const auto op = make_op(X, H_t, C_t, seq_lengths, W, R, B, hidden_size, direction); + + input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X + StaticShape{batch_size, num_directions, hidden_size}, // H_t + StaticShape{batch_size, num_directions, hidden_size}, // C_t + StaticShape{batch_size}, // seq_lengths + StaticShape{num_directions, gates_count * hidden_size, input_size}, // W + StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R + StaticShape{num_directions, gates_count * hidden_size}}; // B + + shape_inference(op.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes.size(), 3); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); + EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); + EXPECT_EQ(output_shapes[2], StaticShape({batch_size, num_directions, hidden_size})); +} + +class LSTMSequenceV5StaticShapeInferenceTest : public OpStaticShapeInferenceTest { +protected: + void SetUp() override { + this->output_shapes = ShapeVector(3); + } +}; + +TEST_F(LSTMSequenceV5StaticShapeInferenceTest, default_ctor) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t seq_len = 4; + constexpr size_t num_directions = 1; + constexpr size_t gates_count = 4; + + const auto op = std::make_shared(); + + input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X + StaticShape{batch_size, num_directions, hidden_size}, // H_t + StaticShape{batch_size, num_directions, hidden_size}, // C_t + StaticShape{batch_size}, // seq_lengths + StaticShape{num_directions, gates_count * hidden_size, input_size}, // W + StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R + StaticShape{num_directions, gates_count * hidden_size}}; // B + + shape_inference(op.get(), input_shapes, output_shapes); + + EXPECT_EQ(output_shapes.size(), 3); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); + EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); + EXPECT_EQ(output_shapes[2], StaticShape({batch_size, num_directions, hidden_size})); +} + +TEST_F(LSTMSequenceV5StaticShapeInferenceTest, FORWARD) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t seq_len = 4; + constexpr size_t num_directions = 1; + constexpr size_t gates_count = 4; + + constexpr auto direction = op::RecurrentSequenceDirection::FORWARD; + + const auto X = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto H_t = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto C_t = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto seq_lengths = std::make_shared(element::f32, PartialShape::dynamic(1)); + const auto W = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto R = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto B = std::make_shared(element::f32, PartialShape::dynamic(2)); + + const auto op = std::make_shared(X, H_t, C_t, seq_lengths, W, R, B, hidden_size, direction); + + input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X + StaticShape{batch_size, num_directions, hidden_size}, // H_t + StaticShape{batch_size, num_directions, hidden_size}, // C_t + StaticShape{batch_size}, // seq_lengths + StaticShape{num_directions, gates_count * hidden_size, input_size}, // W + StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R + StaticShape{num_directions, gates_count * hidden_size}}; // B + + shape_inference(op.get(), input_shapes, output_shapes); + + EXPECT_EQ(output_shapes.size(), 3); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); + EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); + EXPECT_EQ(output_shapes[2], StaticShape({batch_size, num_directions, hidden_size})); +} + +TEST_F(LSTMSequenceV5StaticShapeInferenceTest, REVERSE) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t seq_len = 4; + constexpr size_t num_directions = 1; + constexpr size_t gates_count = 4; + + constexpr auto direction = op::RecurrentSequenceDirection::REVERSE; + + const auto X = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto H_t = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto C_t = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto seq_lengths = std::make_shared(element::f32, PartialShape::dynamic(1)); + const auto W = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto R = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto B = std::make_shared(element::f32, PartialShape::dynamic(2)); + + const auto op = std::make_shared(X, H_t, C_t, seq_lengths, W, R, B, hidden_size, direction); + + input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X + StaticShape{batch_size, num_directions, hidden_size}, // H_t + StaticShape{batch_size, num_directions, hidden_size}, // C_t + StaticShape{batch_size}, // seq_lengths + StaticShape{num_directions, gates_count * hidden_size, input_size}, // W + StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R + StaticShape{num_directions, gates_count * hidden_size}}; // B + + shape_inference(op.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes.size(), 3); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); + EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); + EXPECT_EQ(output_shapes[2], StaticShape({batch_size, num_directions, hidden_size})); +} + +TEST_F(LSTMSequenceV5StaticShapeInferenceTest, BIDIRECTIONAL) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t seq_len = 4; + constexpr size_t num_directions = 2; + constexpr size_t gates_count = 4; + + constexpr auto direction = op::RecurrentSequenceDirection::BIDIRECTIONAL; + + const auto X = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto H_t = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto C_t = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto seq_lengths = std::make_shared(element::f32, PartialShape::dynamic(1)); + const auto W = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto R = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto B = std::make_shared(element::f32, PartialShape::dynamic(2)); + + const auto op = std::make_shared(X, H_t, C_t, seq_lengths, W, R, B, hidden_size, direction); + + input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X + StaticShape{batch_size, num_directions, hidden_size}, // H_t + StaticShape{batch_size, num_directions, hidden_size}, // C_t + StaticShape{batch_size}, // seq_lengths + StaticShape{num_directions, gates_count * hidden_size, input_size}, // W + StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R + StaticShape{num_directions, gates_count * hidden_size}}; // B + + shape_inference(op.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes.size(), 3); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); + EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); + EXPECT_EQ(output_shapes[2], StaticShape({batch_size, num_directions, hidden_size})); +} diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/rnn_cell_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/rnn_cell_shape_inference_test.cpp new file mode 100644 index 00000000000000..8427492edf67a0 --- /dev/null +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/rnn_cell_shape_inference_test.cpp @@ -0,0 +1,110 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "utils.hpp" + +using namespace ov; +using namespace ov::intel_cpu; + +class RNNCellV0StaticShapeInferenceTest : public OpStaticShapeInferenceTest { +protected: + void SetUp() override { + this->output_shapes = ShapeVector(1); + } +}; + +TEST_F(RNNCellV0StaticShapeInferenceTest, default_ctor) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t gates_count = 1; + + // Default `B` input is created as Constant by RNNCell contructor + const auto gru = make_op(); + + input_shapes = {StaticShape{batch_size, input_size}, // X + StaticShape{batch_size, hidden_size}, // H_t + StaticShape{gates_count * hidden_size, input_size}, // W + StaticShape{gates_count * hidden_size, hidden_size}, // R + StaticShape{gates_count * hidden_size}}; // B + + std::vector output_shapes; + shape_inference(gru.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, hidden_size})); +} + +TEST_F(RNNCellV0StaticShapeInferenceTest, default_bias) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t gates_count = 1; + + const auto X = std::make_shared(element::f32, PartialShape::dynamic(2)); + const auto H_t = std::make_shared(element::f32, PartialShape::dynamic(2)); + const auto W = std::make_shared(element::f32, PartialShape::dynamic(2)); + const auto R = std::make_shared(element::f32, PartialShape::dynamic(2)); + + // Default `B` input is created as Constant by RNNCell contructor + const auto gru = make_op(X, H_t, W, R, hidden_size); + + input_shapes = {StaticShape{batch_size, input_size}, // X + StaticShape{batch_size, hidden_size}, // H_t + StaticShape{gates_count * hidden_size, input_size}, // W + StaticShape{gates_count * hidden_size, hidden_size}, // R + StaticShape{gates_count * hidden_size}}; // B + + std::vector output_shapes; + shape_inference(gru.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, hidden_size})); +} + +TEST_F(RNNCellV0StaticShapeInferenceTest, with_bias) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t gates_count = 1; + + const auto X = std::make_shared(element::f32, PartialShape::dynamic(2)); + const auto H_t = std::make_shared(element::f32, PartialShape::dynamic(2)); + const auto W = std::make_shared(element::f32, PartialShape::dynamic(2)); + const auto R = std::make_shared(element::f32, PartialShape::dynamic(2)); + const auto B = std::make_shared(element::f32, PartialShape::dynamic(1)); + const auto gru = make_op(X, H_t, W, R, B, hidden_size); + + input_shapes = {StaticShape{batch_size, input_size}, // X + StaticShape{batch_size, hidden_size}, // H_t + StaticShape{gates_count * hidden_size, input_size}, // W + StaticShape{gates_count * hidden_size, hidden_size}, // R + StaticShape{gates_count * hidden_size}}; // B + + shape_inference(gru.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, hidden_size})); +} + +TEST_F(RNNCellV0StaticShapeInferenceTest, dynamic_rank_inputs) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t gates_count = 1; + + const auto X = std::make_shared(element::f32, PartialShape::dynamic()); + const auto H_t = std::make_shared(element::f32, PartialShape::dynamic()); + const auto W = std::make_shared(element::f32, PartialShape::dynamic()); + const auto R = std::make_shared(element::f32, PartialShape::dynamic()); + const auto B = std::make_shared(element::f32, PartialShape::dynamic()); + + const auto gru = make_op(X, H_t, W, R, B, hidden_size); + + input_shapes = {StaticShape{batch_size, input_size}, // X + StaticShape{batch_size, hidden_size}, // H_t + StaticShape{gates_count * hidden_size, input_size}, // W + StaticShape{gates_count * hidden_size, hidden_size}, // R + StaticShape{gates_count * hidden_size}}; // B + + std::vector output_shapes; + shape_inference(gru.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, hidden_size})); +} diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/rnn_seq_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/rnn_seq_shape_inference_test.cpp new file mode 100644 index 00000000000000..ef409775ab6766 --- /dev/null +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/rnn_seq_shape_inference_test.cpp @@ -0,0 +1,132 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "utils.hpp" + +using namespace ov; +using namespace ov::intel_cpu; + +class RNNSequenceV5StaticShapeInferenceTest : public OpStaticShapeInferenceTest { +protected: + void SetUp() override { + this->output_shapes = ShapeVector(1); + } +}; + +TEST_F(RNNSequenceV5StaticShapeInferenceTest, default_ctor) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t seq_len = 4; + constexpr size_t num_directions = 1; + constexpr size_t gates_count = 1; + + const auto op = make_op(); + + input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X + StaticShape{batch_size, num_directions, hidden_size}, // H_t + StaticShape{batch_size}, // seq_lengths + StaticShape{num_directions, gates_count * hidden_size, input_size}, // W + StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R + StaticShape{num_directions, gates_count * hidden_size}}; // B + + shape_inference(op.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); + EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); +} + +TEST_F(RNNSequenceV5StaticShapeInferenceTest, FORWARD) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t seq_len = 4; + constexpr size_t num_directions = 1; + constexpr size_t gates_count = 1; + + constexpr auto direction = op::RecurrentSequenceDirection::FORWARD; + + const auto X = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto H_t = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto seq_lengths = std::make_shared(element::f32, PartialShape::dynamic(1)); + const auto W = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto R = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto B = std::make_shared(element::f32, PartialShape::dynamic(2)); + + const auto op = make_op(X, H_t, seq_lengths, W, R, B, hidden_size, direction); + + input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X + StaticShape{batch_size, num_directions, hidden_size}, // H_t + StaticShape{batch_size}, // seq_lengths + StaticShape{num_directions, gates_count * hidden_size, input_size}, // W + StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R + StaticShape{num_directions, gates_count * hidden_size}}; // B + + shape_inference(op.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); + EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); +} + +TEST_F(RNNSequenceV5StaticShapeInferenceTest, REVERSE) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t seq_len = 4; + constexpr size_t num_directions = 1; + constexpr size_t gates_count = 1; + + constexpr auto direction = op::RecurrentSequenceDirection::REVERSE; + + const auto X = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto H_t = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto seq_lengths = std::make_shared(element::f32, PartialShape::dynamic(1)); + const auto W = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto R = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto B = std::make_shared(element::f32, PartialShape::dynamic(2)); + + const auto op = make_op(X, H_t, seq_lengths, W, R, B, hidden_size, direction); + + input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X + StaticShape{batch_size, num_directions, hidden_size}, // H_t + StaticShape{batch_size}, // seq_lengths + StaticShape{num_directions, gates_count * hidden_size, input_size}, // W + StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R + StaticShape{num_directions, gates_count * hidden_size}}; // B + + shape_inference(op.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); + EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); +} + +TEST_F(RNNSequenceV5StaticShapeInferenceTest, BIDIRECTIONAL) { + constexpr size_t batch_size = 2; + constexpr size_t input_size = 3; + constexpr size_t hidden_size = 5; + constexpr size_t seq_len = 4; + constexpr size_t num_directions = 2; + constexpr size_t gates_count = 1; + + constexpr auto direction = op::RecurrentSequenceDirection::BIDIRECTIONAL; + + const auto X = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto H_t = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto seq_lengths = std::make_shared(element::f32, PartialShape::dynamic(1)); + const auto W = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto R = std::make_shared(element::f32, PartialShape::dynamic(3)); + const auto B = std::make_shared(element::f32, PartialShape::dynamic(2)); + + const auto op = make_op(X, H_t, seq_lengths, W, R, B, hidden_size, direction); + + input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X + StaticShape{batch_size, num_directions, hidden_size}, // H_t + StaticShape{batch_size}, // seq_lengths + StaticShape{num_directions, gates_count * hidden_size, input_size}, // W + StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R + StaticShape{num_directions, gates_count * hidden_size}}; // B + + shape_inference(op.get(), input_shapes, output_shapes); + EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); + EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); +} diff --git a/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/single_op_graph.cpp b/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/single_op_graph.cpp index 3878af278fa344..288c5e8da08361 100644 --- a/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/single_op_graph.cpp +++ b/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/single_op_graph.cpp @@ -545,31 +545,6 @@ std::shared_ptr generate(const std::shared_ptr &node return std::make_shared(results, params, "LRN"); } -std::shared_ptr generate(const std::shared_ptr &node) { - const auto params = - ngraph::builder::makeDynamicParams({ov::element::f32, ov::element::f32, ov::element::f32, ov::element::i32}, - {{5, 10, 10}, {5, 1, 10}, {5, 1, 10}, {5}}); - const auto W = ngraph::builder::makeConstant(ov::element::f32, {1, 40, 10}, {}, true); - const auto R = ngraph::builder::makeConstant(ov::element::f32, {1, 40, 10}, {}, true); - const auto B = ngraph::builder::makeConstant(ov::element::f32, {1, 40}, {}, true); - const auto P = ngraph::builder::makeConstant(ov::element::f32, {1, 30}, {}, true); - const int64_t hidden_size = 10; - const auto lstm_sequence = - std::make_shared(params[0], - params[1], - params[2], - params[3], - W, - R, - B, - P, - hidden_size, - ov::op::RecurrentSequenceDirection::FORWARD); - ov::ResultVector results{std::make_shared(lstm_sequence->output(0)), - std::make_shared(lstm_sequence->output(1)), - std::make_shared(lstm_sequence->output(2))}; - return std::make_shared(results, params, "LSTMSequence"); -} std::shared_ptr generate(const std::shared_ptr &node) { const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1}}); @@ -1765,6 +1740,19 @@ std::shared_ptr generateRNNCellBase(const std::shared_ptr ov::ResultVector results{std::make_shared(RNNCellBaseNode->output(0)), std::make_shared(RNNCellBaseNode->output(1))};; return std::make_shared(results, params, "LSTMCell4BaseGraph"); + } else if (ov::is_type(node)) { + const auto params = ngraph::builder::makeDynamicParams({ov::element::f32, ov::element::f32, ov::element::f32, ov::element::i64}, + {{5, 10, 10}, {5, 1, 10}, {5, 1, 10}, {5}}); + const auto W = ngraph::builder::makeConstant(ov::element::f32, {1, 40, 10}, {}, true); + const auto R = ngraph::builder::makeConstant(ov::element::f32, {1, 40, 10}, {}, true); + const auto B = ngraph::builder::makeConstant(ov::element::f32, {1, 40}, {}, true); + const auto P = ngraph::builder::makeConstant(ov::element::f32, {1, 30}, {}, true); + RNNCellBaseNode = std::make_shared(params.at(0), params.at(1), params.at(2), params.at(3), + W, R, B, 10, ov::op::RecurrentSequenceDirection::FORWARD); + ov::ResultVector results{std::make_shared(RNNCellBaseNode->output(0)), + std::make_shared(RNNCellBaseNode->output(1)), + std::make_shared(RNNCellBaseNode->output(2))}; + return std::make_shared(results, params, "LSTMSeq1BaseGraph"); } else if (ov::is_type(node)) { const auto params = ngraph::builder::makeDynamicParams({ov::element::f32, ov::element::f32, ov::element::f32, ov::element::i64}, {{5, 10, 10}, {5, 1, 10}, {5, 1, 10}, {5}}); @@ -1776,7 +1764,7 @@ std::shared_ptr generateRNNCellBase(const std::shared_ptr ov::ResultVector results{std::make_shared(RNNCellBaseNode->output(0)), std::make_shared(RNNCellBaseNode->output(1)), std::make_shared(RNNCellBaseNode->output(2))}; - return std::make_shared(results, params, "LSTMSeqBaseGraph"); + return std::make_shared(results, params, "LSTMSeq5BaseGraph"); } else if (ov::is_type(node)) { const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 3}, {2, 3}}); const auto W = ngraph::builder::makeConstant(ov::element::f32, {3, 3}, {}, true); From 900163c4849be3c8a13c90e05d2da1a925df2cec Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Tue, 11 Jul 2023 10:39:29 +0200 Subject: [PATCH 15/21] [CPU] In place memory for dynamic shapes (#17741) * InPlace memory direction resolution pass * Partitioned mem mngr * Concat reshape pattern has been enabled * Enhanced in place conflicts detection * Refactor Concat * Fix Reshape isExecutable call * Split node refactoring * Gather node inPlace special case * Temporal WA to enable zero copy on Split input * Process inPlace edges in order * Fixes * Remove implicit initialization from Edge::getMemory * Allow NotAllocated edges call allocate * extract IMemory interface and implements Memory class. * IMemory fixes after merge * Prevent input memory modification * Minor build fixes * Fix unittest build * Fix for variadic concat * Edge reference fix * Lock based mem manager * Return value fix * Naming refactoring * Use make_unique * Split output dims check * Special processing for empty parts * Concat null memory fix * Fix downstream inplace resolution * Concat allocated edge search fix * Variadic split clean up tests * Variadic Split tests update * Fix make_unique conflict * Split inPlace tests are back * Concat inPlace dyn shapes tests * Add safety checks to isInputTensorAtPortEmpty * Fix partitioned mem mngr zero size reset * Introduce Static Memory * Gather InPlace Tests * InPlace conflict check in merge Reorder Transpose * Apply review comments * fix after rebase --------- Co-authored-by: jialipen --- src/plugins/intel_cpu/src/cpu_memory.cpp | 316 ++++++++++++----- src/plugins/intel_cpu/src/cpu_memory.h | 270 +++++++++------ .../intel_cpu/src/dnnl_postops_composer.cpp | 15 +- src/plugins/intel_cpu/src/dnnl_scratch_pad.h | 7 +- src/plugins/intel_cpu/src/edge.cpp | 288 ++++++---------- src/plugins/intel_cpu/src/edge.h | 15 +- src/plugins/intel_cpu/src/graph.cpp | 319 ++++++++++++++---- src/plugins/intel_cpu/src/graph.h | 4 + src/plugins/intel_cpu/src/graph_optimizer.cpp | 16 +- src/plugins/intel_cpu/src/infer_request.cpp | 82 ++--- src/plugins/intel_cpu/src/infer_request.h | 2 +- .../src/memory_desc/cpu_memory_desc_utils.cpp | 6 +- .../src/memory_desc/cpu_memory_desc_utils.h | 6 +- src/plugins/intel_cpu/src/memory_state.h | 2 +- src/plugins/intel_cpu/src/node.cpp | 130 ++++--- src/plugins/intel_cpu/src/node.h | 9 +- .../intel_cpu/src/nodes/adaptive_pooling.cpp | 26 +- .../intel_cpu/src/nodes/batch_to_space.cpp | 14 +- src/plugins/intel_cpu/src/nodes/bin_conv.cpp | 18 +- src/plugins/intel_cpu/src/nodes/broadcast.cpp | 26 +- src/plugins/intel_cpu/src/nodes/bucketize.cpp | 12 +- .../intel_cpu/src/nodes/color_convert.cpp | 4 +- .../src/nodes/common/tile_broadcast_utils.cpp | 6 +- src/plugins/intel_cpu/src/nodes/concat.cpp | 218 +++++------- src/plugins/intel_cpu/src/nodes/concat.h | 3 +- src/plugins/intel_cpu/src/nodes/conv.cpp | 57 ++-- src/plugins/intel_cpu/src/nodes/convert.cpp | 8 +- .../src/nodes/ctc_greedy_decoder.cpp | 6 +- .../src/nodes/ctc_greedy_decoder_seq_len.cpp | 10 +- src/plugins/intel_cpu/src/nodes/ctc_loss.cpp | 12 +- src/plugins/intel_cpu/src/nodes/cum_sum.cpp | 14 +- src/plugins/intel_cpu/src/nodes/cum_sum.h | 2 +- src/plugins/intel_cpu/src/nodes/deconv.cpp | 46 +-- src/plugins/intel_cpu/src/nodes/def_conv.cpp | 30 +- .../intel_cpu/src/nodes/depth_to_space.cpp | 12 +- .../intel_cpu/src/nodes/depth_to_space.h | 2 +- .../intel_cpu/src/nodes/detection_output.cpp | 20 +- src/plugins/intel_cpu/src/nodes/dft.cpp | 12 +- src/plugins/intel_cpu/src/nodes/eltwise.cpp | 20 +- .../src/nodes/embedding_bag_offset_sum.cpp | 10 +- .../src/nodes/embedding_bag_packed_sum.cpp | 6 +- .../intel_cpu/src/nodes/embedding_bag_sum.cpp | 4 +- .../src/nodes/embedding_segments_sum.cpp | 14 +- .../src/nodes/executors/acl/acl_eltwise.cpp | 4 +- .../nodes/executors/acl/acl_interpolate.cpp | 2 +- .../src/nodes/executors/acl/acl_mvn.cpp | 4 +- .../src/nodes/executors/acl/acl_pooling.cpp | 6 +- .../src/nodes/executors/acl/acl_reduce.cpp | 4 +- .../src/nodes/executors/interpolate.cpp | 2 +- ...xperimental_detectron_detection_output.cpp | 16 +- ...ectron_generate_proposals_single_image.cpp | 12 +- ...erimental_detectron_priorgridgenerator.cpp | 4 +- ...rimental_detectron_roifeatureextractor.cpp | 8 +- .../nodes/experimental_detectron_topkrois.cpp | 6 +- .../src/nodes/extract_image_patches.cpp | 8 +- src/plugins/intel_cpu/src/nodes/eye.cpp | 2 +- src/plugins/intel_cpu/src/nodes/eye.h | 10 +- .../intel_cpu/src/nodes/fake_quantize.cpp | 53 ++- .../intel_cpu/src/nodes/fullyconnected.cpp | 32 +- src/plugins/intel_cpu/src/nodes/gather.cpp | 89 ++++- src/plugins/intel_cpu/src/nodes/gather.h | 3 + .../intel_cpu/src/nodes/gather_elements.cpp | 8 +- src/plugins/intel_cpu/src/nodes/gather_nd.cpp | 28 +- src/plugins/intel_cpu/src/nodes/gather_nd.h | 6 +- .../intel_cpu/src/nodes/gather_tree.cpp | 12 +- src/plugins/intel_cpu/src/nodes/gather_tree.h | 2 +- .../src/nodes/generate_proposals.cpp | 16 +- .../intel_cpu/src/nodes/grid_sample.cpp | 12 +- src/plugins/intel_cpu/src/nodes/grn.cpp | 4 +- src/plugins/intel_cpu/src/nodes/if.cpp | 10 +- src/plugins/intel_cpu/src/nodes/input.cpp | 25 +- .../intel_cpu/src/nodes/interaction.cpp | 21 +- .../intel_cpu/src/nodes/interpolate.cpp | 32 +- .../intel_cpu/src/nodes/log_softmax.cpp | 4 +- src/plugins/intel_cpu/src/nodes/lrn.cpp | 12 +- .../intel_cpu/src/nodes/mathematics.cpp | 6 +- src/plugins/intel_cpu/src/nodes/matmul.cpp | 22 +- .../intel_cpu/src/nodes/matrix_nms.cpp | 10 +- src/plugins/intel_cpu/src/nodes/memory.cpp | 24 +- src/plugins/intel_cpu/src/nodes/memory.hpp | 2 +- src/plugins/intel_cpu/src/nodes/mha.cpp | 20 +- .../intel_cpu/src/nodes/multiclass_nms.cpp | 18 +- src/plugins/intel_cpu/src/nodes/mvn.cpp | 12 +- src/plugins/intel_cpu/src/nodes/ngram.cpp | 8 +- .../src/nodes/non_max_suppression.cpp | 28 +- src/plugins/intel_cpu/src/nodes/non_zero.cpp | 8 +- src/plugins/intel_cpu/src/nodes/normalize.cpp | 8 +- src/plugins/intel_cpu/src/nodes/one_hot.cpp | 14 +- src/plugins/intel_cpu/src/nodes/pad.cpp | 38 +-- src/plugins/intel_cpu/src/nodes/pad.h | 12 +- src/plugins/intel_cpu/src/nodes/pooling.cpp | 14 +- src/plugins/intel_cpu/src/nodes/priorbox.cpp | 16 +- .../src/nodes/priorbox_clustered.cpp | 18 +- src/plugins/intel_cpu/src/nodes/proposal.cpp | 10 +- .../intel_cpu/src/nodes/psroi_pooling.cpp | 12 +- src/plugins/intel_cpu/src/nodes/range.cpp | 8 +- src/plugins/intel_cpu/src/nodes/rdft.cpp | 16 +- src/plugins/intel_cpu/src/nodes/reduce.cpp | 22 +- src/plugins/intel_cpu/src/nodes/reference.cpp | 8 +- .../intel_cpu/src/nodes/region_yolo.cpp | 10 +- src/plugins/intel_cpu/src/nodes/reorder.cpp | 87 +++-- src/plugins/intel_cpu/src/nodes/reorder.h | 2 +- .../intel_cpu/src/nodes/reorg_yolo.cpp | 4 +- src/plugins/intel_cpu/src/nodes/reshape.cpp | 30 +- .../intel_cpu/src/nodes/reverse_sequence.cpp | 8 +- .../intel_cpu/src/nodes/reverse_sequence.h | 2 +- src/plugins/intel_cpu/src/nodes/rnn.cpp | 34 +- src/plugins/intel_cpu/src/nodes/roi_align.cpp | 20 +- .../intel_cpu/src/nodes/roi_pooling.cpp | 36 +- src/plugins/intel_cpu/src/nodes/roi_pooling.h | 6 +- src/plugins/intel_cpu/src/nodes/roll.cpp | 12 +- src/plugins/intel_cpu/src/nodes/roll.h | 2 +- .../intel_cpu/src/nodes/scatter_update.cpp | 20 +- src/plugins/intel_cpu/src/nodes/shapeof.cpp | 2 +- .../intel_cpu/src/nodes/shuffle_channels.cpp | 12 +- src/plugins/intel_cpu/src/nodes/softmax.cpp | 8 +- .../intel_cpu/src/nodes/space_to_batch.cpp | 20 +- .../intel_cpu/src/nodes/space_to_depth.cpp | 12 +- src/plugins/intel_cpu/src/nodes/split.cpp | 168 ++++----- src/plugins/intel_cpu/src/nodes/split.h | 2 +- .../intel_cpu/src/nodes/strided_slice.cpp | 28 +- src/plugins/intel_cpu/src/nodes/subgraph.cpp | 16 +- .../intel_cpu/src/nodes/tensoriterator.cpp | 70 ++-- src/plugins/intel_cpu/src/nodes/tile.cpp | 12 +- src/plugins/intel_cpu/src/nodes/topk.cpp | 30 +- src/plugins/intel_cpu/src/nodes/transpose.cpp | 44 +-- src/plugins/intel_cpu/src/nodes/unique.cpp | 28 +- .../intel_cpu/src/partitioned_mem_mgr.cpp | 33 ++ .../intel_cpu/src/partitioned_mem_mgr.h | 40 +++ src/plugins/intel_cpu/src/utils/blob_dump.cpp | 8 +- src/plugins/intel_cpu/src/utils/blob_dump.h | 5 +- .../src/utils/debug_capabilities.cpp | 4 +- .../intel_cpu/src/utils/general_utils.h | 9 + .../intel_cpu/src/utils/node_dumper.cpp | 3 +- .../shape_inference_ngraph.cpp | 2 +- src/plugins/intel_cpu/src/weights_cache.hpp | 2 +- .../functional/single_layer_tests/concat.cpp | 73 +++- .../functional/single_layer_tests/gather.cpp | 104 ++++++ .../functional/single_layer_tests/split.cpp | 126 ++++--- .../single_layer_tests/variadic_split.cpp | 114 ++++++- .../src/concat_reshape_concat.cpp | 147 ++++++++ .../intel_cpu/tests/unit/dnnl_memory_test.cpp | 19 +- .../tests/unit/nodes/reorder_node_test.cpp | 26 +- 143 files changed, 2593 insertions(+), 1757 deletions(-) create mode 100644 src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp create mode 100644 src/plugins/intel_cpu/src/partitioned_mem_mgr.h create mode 100644 src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_reshape_concat.cpp diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp index 62d528315932b8..6c34123a65b046 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.cpp +++ b/src/plugins/intel_cpu/src/cpu_memory.cpp @@ -32,14 +32,51 @@ namespace { } } } + + void transferData(const IMemory& src, const IMemory& dst, bool ftz) { + node::Reorder::reorderData(src, dst); + + auto localPrim = dst.getPrimitive(); + auto desc = localPrim.get_desc(); + dnnl::impl::memory_desc_wrapper wrapper(desc.get()); + + if (ftz + && src.getDataType() == memory::data_type::f32 + && !wrapper.is_wino_desc() + // WA: to avoid zero filling auxiliary information + && !wrapper.is_rnn_packed_desc() + && dst.getDataType() != memory::data_type::bf16) { + // Internal blobs don't have strides yet. + auto *memData = static_cast(dst.getData()); + memData += wrapper.offset0(); + setSubnormalsToZero(memData, dst.getSize() / sizeof(float)); + } + } + } // namespace -Memory::Memory(const dnnl::engine& eng) : - eng(eng), mgrHandle(std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())), this), dnnlMemHandle(this) {} -Memory::Memory(const dnnl::engine& eng, std::unique_ptr mngr) : - eng(eng), mgrHandle(std::make_shared(std::move(mngr)), this), dnnlMemHandle(this) {} +Memory::Memory(const dnnl::engine& eng, MemoryDescPtr desc, const void* data, bool pads_zeroing) : + m_eng(eng), + m_pMemDesc(desc), + m_mgrHandle(std::make_shared(make_unique()), this), + dnnlMemHandle(this) { + create(m_pMemDesc, data, pads_zeroing); + } + +Memory::Memory(const dnnl::engine& eng, const MemoryDesc& desc, const void* data, bool pads_zeroing) : + Memory::Memory(eng, desc.clone(), data, pads_zeroing) {} -size_t Memory::GetSize() const { +Memory::Memory(const dnnl::engine& eng, MemoryDescPtr desc, MemoryMngrPtr mngr) : + m_eng(eng), m_pMemDesc(desc), m_mgrHandle(mngr, this), dnnlMemHandle(this) { + bool memAllocated = m_mgrHandle->getRawPtr(); + + create(desc, nullptr, !memAllocated); + } + +Memory::Memory(const dnnl::engine& eng, const MemoryDesc& desc, MemoryMngrPtr mngr) : + Memory::Memory(eng, desc.clone(), mngr) {} + +size_t Memory::getSize() const { auto size = getDesc().getCurrentMemSize(); if (size == MemoryDesc::UNDEFINED_SIZE) { IE_THROW() << "Can't get memory size for undefined shape"; @@ -47,111 +84,62 @@ size_t Memory::GetSize() const { return size; } -void Memory::Create(const MemoryDesc &desc, const void *data, bool pads_zeroing) { - Create(desc.clone(), data, pads_zeroing); +void Memory::create(const MemoryDesc &desc, const void *data, bool pads_zeroing) { + create(desc.clone(), data, pads_zeroing); } -void Memory::Create(MemoryDescPtr desc, const void* data, bool pads_zeroing) { - pMemDesc = desc; - padsZeroing = pads_zeroing; +void Memory::create(MemoryDescPtr desc, const void* data, bool pads_zeroing) { + m_pMemDesc = desc; + m_padsZeroing = pads_zeroing; dnnlMemHandle.resetDnnlPrim(); - size_t memSize = 0; - if (pMemDesc->isDefined()) { - memSize = pMemDesc->getCurrentMemSize(); + if (!m_pMemDesc->isDefined()) { + return; } - + auto memSize = m_pMemDesc->getCurrentMemSize(); if (nullptr != data) { - mgrHandle->setExtBuff(const_cast(data), memSize); + m_mgrHandle->setExtBuff(const_cast(data), memSize); } else { - mgrHandle->resize(memSize); + m_mgrHandle->resize(memSize); } } -void Memory::SetData(const Memory& src, bool ftz) const { - node::Reorder::reorderData(src, *this); - - auto localPrim = GetPrimitive(); - auto desc = localPrim.get_desc(); - dnnl::impl::memory_desc_wrapper wrapper(desc.get()); - - if (ftz - && src.GetDataType() == memory::data_type::f32 - && !wrapper.is_wino_desc() - // WA: to avoid zero filling auxiliary information - && !wrapper.is_rnn_packed_desc() - && GetDataType() != memory::data_type::bf16) { - // Internal blobs haven't strides yet. - auto *memData = static_cast(GetData()); - memData += wrapper.offset0(); - setSubnormalsToZero(memData, GetSize() / sizeof(float)); - } +void Memory::load(const IMemory& src, bool ftz) const { + transferData(src, *this, ftz); } -void Memory::FillZero() { - void* dataPtr = GetData(); +void Memory::nullify() { + void* dataPtr = getData(); if (dataPtr != nullptr) memset(dataPtr, 0, getDesc().getCurrentMemSize()); } -void *Memory::GetPtr() const { - auto ptr = static_cast(GetData()); - ptr += pMemDesc->getOffsetPadding() * pMemDesc->getPrecision().size(); - return ptr; -} - void Memory::redefineDesc(MemoryDescPtr desc) { if (!desc->hasDefinedMaxSize()) { IE_THROW() << "Can not reset descriptor, memory upper bound is unknown."; } - this->Create(desc, nullptr, false); + this->create(desc, nullptr, false); } template<> -DnnlMemoryDescPtr Memory::GetDescWithType() const { - return MemoryDescUtils::convertToDnnlMemoryDesc(pMemDesc); +DnnlMemoryDescPtr IMemory::getDescWithType() const { + return MemoryDescUtils::convertToDnnlMemoryDesc(getDescPtr()); } -void Memory::setDataHandle(void *data) { - if (!mgrHandle->hasExtBuffer()) { - mgrHandle = DnnlMemMngrHandle( - std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())), - this); - } - - size_t maxMemSize = pMemDesc->isDefined() ? pMemDesc->getCurrentMemSize() : 0; - mgrHandle->setExtBuff(data, maxMemSize); - if (dnnlMemHandle.isInit()) { - auto prim = dnnlMemHandle.getPrim(); - prim.set_data_handle(mgrHandle->getRawPtr()); // for pads zeroing, to preserve dnnl::memory::set_data_handle behaviour - } +template<> +BlockedMemoryDescPtr IMemory::getDescWithType() const { + return MemoryDescUtils::convertToBlockedMemoryDesc(getDescPtr()); } void Memory::update() { if (dnnlMemHandle.isInit()) { auto prim = dnnlMemHandle.getPrim(); - prim.set_data_handle_no_pads_proc(mgrHandle->getRawPtr()); + prim.set_data_handle_no_pads_proc(m_mgrHandle->getRawPtr()); } } -void Memory::Create(const MemoryDesc &desc, DnnlMemoryMngrPtr memMgr) { - Create(desc.clone(), memMgr); -} - -void Memory::Create(MemoryDescPtr desc, DnnlMemoryMngrPtr memMgr) { - mgrHandle = DnnlMemMngrHandle(memMgr, this); - bool memAllocated = mgrHandle->getRawPtr(); - - Create(desc, nullptr, !memAllocated); -} - -template<> -BlockedMemoryDescPtr Memory::GetDescWithType() const { - return MemoryDescUtils::convertToBlockedMemoryDesc(pMemDesc); -} - -dnnl::memory Memory::GetPrimitive() const { +dnnl::memory Memory::getPrimitive() const { return dnnlMemHandle.getPrim(); } @@ -179,7 +167,7 @@ dnnl::memory Memory::DnnlMemPrimHandle::getPrim() const { // // ======================== auto data = m_memObjPtr->getDataNoThrow(); - auto pads_zeroing = m_memObjPtr->padsZeroing; + auto pads_zeroing = m_memObjPtr->m_padsZeroing; if (data != nullptr) { if (pads_zeroing) m_prim.set_data_handle(data); @@ -190,34 +178,59 @@ dnnl::memory Memory::DnnlMemPrimHandle::getPrim() const { return m_prim; } +bool Memory::isAllocated() const noexcept { + if (m_mgrHandle->getRawPtr()) { + return true; + } + if (!m_pMemDesc) { + return false; + } + if (!(m_pMemDesc->isDefined())) { + return true; + } + if (m_pMemDesc->getCurrentMemSize() == 0) { + return true; + } + return false; +} + +void* Memory::getData() const { + void* data = getDataNoThrow(); + if (data == nullptr && + m_pMemDesc->getShape().isStatic() && + m_pMemDesc->getShape().getElementsCount() != 0) + IE_THROW() << "Memory has not been allocated"; + return data; +} + void* MemoryMngrWithReuse::getRawPtr() const noexcept { - return _data.get(); + return m_data.get(); } void MemoryMngrWithReuse::setExtBuff(void *ptr, size_t size) { - _useExternalStorage = true; - _memUpperBound = size; - _data = decltype(_data)(ptr, release); + m_useExternalStorage = true; + m_memUpperBound = size; + m_data = decltype(m_data)(ptr, release); } bool MemoryMngrWithReuse::resize(size_t size) { constexpr int cacheLineSize = 64; bool sizeChanged = false; - if (size > _memUpperBound) { + if (size > m_memUpperBound) { void *ptr = dnnl::impl::malloc(size, cacheLineSize); if (!ptr) { IE_THROW() << "Failed to allocate " << size << " bytes of memory"; } - _memUpperBound = size; - _useExternalStorage = false; - _data = decltype(_data)(ptr, destroy); + m_memUpperBound = size; + m_useExternalStorage = false; + m_data = decltype(m_data)(ptr, destroy); sizeChanged = true; } return sizeChanged; } bool MemoryMngrWithReuse::hasExtBuffer() const noexcept { - return _useExternalStorage; + return m_useExternalStorage; } void MemoryMngrWithReuse::release(void *ptr) {} @@ -227,16 +240,16 @@ void MemoryMngrWithReuse::destroy(void *ptr) { } void* DnnlMemoryMngr::getRawPtr() const noexcept { - return _pMemMngr->getRawPtr(); + return m_pMemMngr->getRawPtr(); } void DnnlMemoryMngr::setExtBuff(void *ptr, size_t size) { - _pMemMngr->setExtBuff(ptr, size); + m_pMemMngr->setExtBuff(ptr, size); notifyUpdate(); } bool DnnlMemoryMngr::resize(size_t size) { - bool sizeChanged = _pMemMngr->resize(size); + bool sizeChanged = m_pMemMngr->resize(size); if (sizeChanged) { notifyUpdate(); } @@ -244,27 +257,144 @@ bool DnnlMemoryMngr::resize(size_t size) { } bool DnnlMemoryMngr::hasExtBuffer() const noexcept { - return _pMemMngr->hasExtBuffer(); + return m_pMemMngr->hasExtBuffer(); } void DnnlMemoryMngr::registerMemory(Memory* memPtr) { if (memPtr) { - _setMemPtrs.insert(memPtr); + m_setMemPtrs.insert(memPtr); } } void DnnlMemoryMngr::unregisterMemory(Memory* memPtr) { if (memPtr) { - _setMemPtrs.erase(memPtr); + m_setMemPtrs.erase(memPtr); } } void DnnlMemoryMngr::notifyUpdate() { - for (auto& item : _setMemPtrs) { + for (auto& item : m_setMemPtrs) { if (item) { item->update(); } } } + +StaticMemory::StaticMemory(const dnnl::engine& eng, MemoryDescPtr desc, const void* data, bool pads_zeroing) : + m_eng(eng), m_pMemDesc(desc) { + if (!m_pMemDesc->isDefined()) { + IE_THROW() << "Can not create StaticMemory object. The memory desc is undefined"; + } + + m_size = m_pMemDesc->getCurrentMemSize(); + + auto dnnl_desc = MemoryDescUtils::convertToDnnlMemoryDesc(m_pMemDesc); + + if (data) { + m_pMemMngr = std::make_shared(const_cast(data), m_size); + } else { + m_pMemMngr = std::make_shared(m_size); + } + + // ======================== + // Equivalent of constructor memory(const primitive_desc &desc, void *hdl) + // but with ability to skip pads zeroing. + m_prim = memory(dnnl_desc->getDnnlDesc(), m_eng, DNNL_MEMORY_NONE); + // + // ======================== + if (pads_zeroing) + m_prim.set_data_handle(m_pMemMngr->getRawPtr()); + else + m_prim.set_data_handle_no_pads_proc(m_pMemMngr->getRawPtr()); +} + +StaticMemory::StaticMemory(const dnnl::engine& eng, const MemoryDesc& desc, const void* data, bool pads_zeroing) : + StaticMemory::StaticMemory(eng, desc.clone(), data, pads_zeroing) {} + +bool StaticMemory::isAllocated() const noexcept { + return 0 == m_size || getData() != nullptr; +} + +const MemoryDesc& StaticMemory::getDesc() const { + return *m_pMemDesc; +} + +MemoryDescPtr StaticMemory::getDescPtr() const { + return m_pMemDesc; +} + +void* StaticMemory::getData() const { + return m_pMemMngr->getRawPtr(); +} + +size_t StaticMemory::getSize() const { + return m_size; +} + +const Shape& StaticMemory::getShape() const { + return m_pMemDesc->getShape(); +} + +const VectorDims& StaticMemory::getStaticDims() const { + return getShape().getStaticDims(); +} + +void StaticMemory::redefineDesc(MemoryDescPtr desc) { + IE_THROW(Unexpected) << "Memory descriptor may not be modified in StaticMemory object"; +} + +void StaticMemory::load(const IMemory& src, bool ftz) const { + transferData(src, *this, ftz); +} + +MemoryMngrPtr StaticMemory::getMemoryMngr() const { + return m_pMemMngr; +} + +//oneDNN specifics for backward compatibility +dnnl::memory StaticMemory::getPrimitive() const { + return m_prim; +} + +void StaticMemory::nullify() { + void* dataPtr = getData(); + if (dataPtr != nullptr) + memset(dataPtr, 0, getSize()); +} + +StaticMemory::StaticMemoryMngr::StaticMemoryMngr(size_t size) : m_size(size) { + memMngrImpl.resize(m_size); +} + +StaticMemory::StaticMemoryMngr::StaticMemoryMngr(void* data, size_t size) : m_size(size) { + memMngrImpl.setExtBuff(data, m_size); +} + +void* StaticMemory::StaticMemoryMngr::getRawPtr() const noexcept { + return memMngrImpl.getRawPtr(); +} + +void StaticMemory::StaticMemoryMngr::setExtBuff(void* ptr, size_t size) { + IE_THROW(Unexpected) << "StaticMemoryMngr may not be modified"; +} + +bool StaticMemory::StaticMemoryMngr::resize(size_t size) { + if (size != m_size) { + IE_THROW(Unexpected) << "StaticMemoryMngr may not resize the memory"; + } + return false; +} + +bool StaticMemory::StaticMemoryMngr::hasExtBuffer() const noexcept { + return memMngrImpl.hasExtBuffer(); +} + +void StaticMemory::StaticMemoryMngr::registerMemory(Memory* memPtr) { + //do nothing +} + +void StaticMemory::StaticMemoryMngr::unregisterMemory(Memory* memPtr) { + //do nothing +} } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/cpu_memory.h b/src/plugins/intel_cpu/src/cpu_memory.h index a49c30eee420a7..856772c922d430 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.h +++ b/src/plugins/intel_cpu/src/cpu_memory.h @@ -74,50 +74,56 @@ class IMemoryMngr { */ class MemoryMngrWithReuse : public IMemoryMngr { public: - MemoryMngrWithReuse() : _data(nullptr, release) {} + MemoryMngrWithReuse() : m_data(nullptr, release) {} void* getRawPtr() const noexcept override; void setExtBuff(void* ptr, size_t size) override; bool resize(size_t size) override; bool hasExtBuffer() const noexcept override; private: - bool _useExternalStorage = false; - size_t _memUpperBound = 0ul; - std::unique_ptr _data; + bool m_useExternalStorage = false; + size_t m_memUpperBound = 0ul; + std::unique_ptr m_data; static void release(void *ptr); static void destroy(void *ptr); }; +class IMemoryMngrObserver : public IMemoryMngr { +public: + virtual void registerMemory(Memory* memPtr) = 0; + virtual void unregisterMemory(Memory* memPtr) = 0; +}; + /** * @brief A proxy object that additionally implements observer pattern */ -class DnnlMemoryMngr : public IMemoryMngr { +class DnnlMemoryMngr : public IMemoryMngrObserver { public: - explicit DnnlMemoryMngr(std::unique_ptr mngr) : _pMemMngr(std::move(mngr)) {} + explicit DnnlMemoryMngr(std::unique_ptr mngr) : m_pMemMngr(std::move(mngr)) {} void* getRawPtr() const noexcept override; void setExtBuff(void* ptr, size_t size) override; bool resize(size_t size) override; bool hasExtBuffer() const noexcept override; - void registerMemory(Memory* memPtr); - void unregisterMemory(Memory* memPtr); + void registerMemory(Memory* memPtr) override; + void unregisterMemory(Memory* memPtr) override; private: void notifyUpdate(); private: - std::unordered_set _setMemPtrs; - std::unique_ptr _pMemMngr; + std::unordered_set m_setMemPtrs; + std::unique_ptr m_pMemMngr; }; -using DnnlMemoryMngrPtr = std::shared_ptr; -using DnnlMemoryMngrCPtr = std::shared_ptr; +using MemoryMngrPtr = std::shared_ptr; +using MemoryMngrCPtr = std::shared_ptr; class DnnlMemMngrHandle { public: - DnnlMemMngrHandle(DnnlMemoryMngrPtr pMgr, Memory* pMem) : _pMgr(pMgr), _pMem(pMem) { - if (_pMgr) { - _pMgr->registerMemory(_pMem); + DnnlMemMngrHandle(MemoryMngrPtr pMgr, Memory* pMem) : m_pMgr(pMgr), m_pMem(pMem) { + if (m_pMgr) { + m_pMgr->registerMemory(m_pMem); } } @@ -125,139 +131,184 @@ class DnnlMemMngrHandle { DnnlMemMngrHandle& operator= (const DnnlMemMngrHandle&) = delete; DnnlMemMngrHandle(DnnlMemMngrHandle&& source) { - std::swap(_pMgr, source._pMgr); - std::swap(_pMem, source._pMem); + std::swap(m_pMgr, source.m_pMgr); + std::swap(m_pMem, source.m_pMem); } DnnlMemMngrHandle& operator= (DnnlMemMngrHandle&& rhs) { - std::swap(_pMgr, rhs._pMgr); - std::swap(_pMem, rhs._pMem); + std::swap(m_pMgr, rhs.m_pMgr); + std::swap(m_pMem, rhs.m_pMem); return *this; } ~DnnlMemMngrHandle() { - if (_pMgr) { - _pMgr->unregisterMemory(_pMem); + if (m_pMgr) { + m_pMgr->unregisterMemory(m_pMem); } } - DnnlMemoryMngrPtr get() const { - return _pMgr; + MemoryMngrPtr get() const { + return m_pMgr; } - DnnlMemoryMngrPtr::element_type* operator->() const noexcept { - return _pMgr.get(); + MemoryMngrPtr::element_type* operator->() const noexcept { + return m_pMgr.get(); } private: - DnnlMemoryMngrPtr _pMgr = nullptr; - Memory* _pMem = nullptr; + MemoryMngrPtr m_pMgr = nullptr; + Memory* m_pMem = nullptr; }; -class Memory { +class IMemory { public: - explicit Memory(const dnnl::engine& eng); - Memory(const dnnl::engine& eng, std::unique_ptr mngr); + virtual ~IMemory() = default; - Memory(const Memory&) = delete; - Memory& operator= (const Memory&) = delete; + virtual bool isAllocated() const noexcept = 0; - Memory(Memory&&) = delete; - Memory& operator= (Memory&&) = delete; + virtual const MemoryDesc& getDesc() const = 0; + virtual MemoryDescPtr getDescPtr() const = 0; - dnnl::memory GetPrimitive() const; + virtual void* getData() const = 0; // pointer to the actual memory - bool isAllocated() const noexcept { - if (mgrHandle->getRawPtr()) { - return true; - } - if (!pMemDesc) { - return false; - } - if (!(pMemDesc->isDefined())) { - return true; - } - if (pMemDesc->getCurrentMemSize() == 0) { - return true; - } - return false; - } + virtual size_t getSize() const = 0; // in bytes + virtual const Shape& getShape() const = 0; + virtual const VectorDims& getStaticDims() const = 0; - /** - * @brief Resets the memory manager to a new one created with the provided raw memory - */ - void setDataHandle(void* data); + // Redefines descriptor. The memory descriptor will be replaced with the new one. + // Memory will not be reallocated if the new tensor size is less or equal the upper bound. + // Caution!!! This action invalidates the previous data layout. The old data may become unreachable. + virtual void redefineDesc(MemoryDescPtr desc) = 0; - const MemoryDesc& getDesc() const { - return *pMemDesc; - } + virtual void load(const IMemory& src, bool ftz = true) const = 0; - MemoryDescPtr getDescPtr() const { - return pMemDesc; + virtual MemoryMngrPtr getMemoryMngr() const = 0; + + //oneDNN specifics for backward compatibility + virtual dnnl::memory getPrimitive() const = 0; + dnnl::memory::data_type getDataType() const { + return DnnlExtensionUtils::IEPrecisionToDataType(getDesc().getPrecision()); } + virtual void nullify() = 0; + template ::value && !std::is_reference::value, int>::type = 0, typename std::enable_if::value, int>::type = 0> - std::shared_ptr GetDescWithType() const; + std::shared_ptr getDescWithType() const; +}; - /** - * Return handler of buffer. Real data may starts from some other offset - * @return - */ - void* GetData() const { - void* data = getDataNoThrow(); - if (data == nullptr && - pMemDesc->getShape().isStatic() && - pMemDesc->getShape().getElementsCount() != 0) - IE_THROW() << "Memory has not been allocated"; - return data; - } +class StaticMemory final : public IMemory { +public: + class StaticMemoryMngr : public IMemoryMngrObserver { + public: + explicit StaticMemoryMngr(size_t size); + StaticMemoryMngr(void* data, size_t size); + void* getRawPtr() const noexcept override; + void setExtBuff(void* ptr, size_t size) override; + bool resize(size_t size) override; + bool hasExtBuffer() const noexcept override; + void registerMemory(Memory* memPtr) override; + void unregisterMemory(Memory* memPtr) override; - /** - * Return raw pointer on first element - * Like a GetData() but offset is applied. - * @return - */ - void* GetPtr() const; + private: + size_t m_size = 0; + MemoryMngrWithReuse memMngrImpl; + }; - dnnl::memory::data_type GetDataType() const { - return DnnlExtensionUtils::IEPrecisionToDataType(getDesc().getPrecision()); + using MemMngrPtr = std::shared_ptr; + +public: + StaticMemory(const dnnl::engine& eng, MemoryDescPtr desc, const void* data = nullptr, bool pads_zeroing = true); + StaticMemory(const dnnl::engine& eng, const MemoryDesc& desc, const void* data = nullptr, bool pads_zeroing = true); + + StaticMemory(const StaticMemory&) = delete; + StaticMemory& operator= (const StaticMemory&) = delete; + + StaticMemory(Memory&&) = delete; + StaticMemory& operator= (StaticMemory&&) = delete; + + bool isAllocated() const noexcept override; + + const MemoryDesc& getDesc() const override; + MemoryDescPtr getDescPtr() const override; + + void* getData() const override; // pointer to the actual memory + + size_t getSize() const override; // in bytes + const Shape& getShape() const override; + const VectorDims& getStaticDims() const override; + + // Always throws since a static memory descriptor should not be modified + void redefineDesc(MemoryDescPtr desc) override; + + void load(const IMemory& src, bool ftz = true) const override; + + MemoryMngrPtr getMemoryMngr() const override; + + //oneDNN specifics for backward compatibility + dnnl::memory getPrimitive() const override; + + void nullify() override; + +private: + dnnl::engine m_eng; + MemoryDescPtr m_pMemDesc; + size_t m_size; + dnnl::memory m_prim; + MemMngrPtr m_pMemMngr; +}; + +class Memory : public IMemory { +public: + Memory(const dnnl::engine& eng, MemoryDescPtr desc, const void* data = nullptr, bool pads_zeroing = true); + Memory(const dnnl::engine& eng, const MemoryDesc& desc, const void* data = nullptr, bool pads_zeroing = true); + Memory(const dnnl::engine& eng, MemoryDescPtr desc, MemoryMngrPtr mngr); + Memory(const dnnl::engine& eng, const MemoryDesc& desc, MemoryMngrPtr mbgr); + + Memory(const Memory&) = delete; + Memory& operator= (const Memory&) = delete; + + Memory(Memory&&) = delete; + Memory& operator= (Memory&&) = delete; + + dnnl::memory getPrimitive() const override; + + bool isAllocated() const noexcept override; + + const MemoryDesc& getDesc() const override { + return *m_pMemDesc; } - size_t GetSize() const; + MemoryDescPtr getDescPtr() const override { + return m_pMemDesc; + } + + void* getData() const override; - const Shape& GetShape() const { + size_t getSize() const override; + + const Shape& getShape() const override { return getDesc().getShape(); } - void Create(const MemoryDesc& desc, const void* data = nullptr, bool pads_zeroing = true); - void Create(MemoryDescPtr desc, const void* data = nullptr, bool pads_zeroing = true); - - void Create(const MemoryDesc& desc, DnnlMemoryMngrPtr memMgr); - void Create(MemoryDescPtr desc, DnnlMemoryMngrPtr memMgr); + const VectorDims& getStaticDims() const override { + return getDesc().getShape().getStaticDims(); + } // Redefines descriptor. The memory descriptor will be replaced with the new one. // Memory will not be reallocated if the new tensor size is less or equal the upper bound. // Caution!!! This action invalidates the previous data layout. The old data may become unreachable. - void redefineDesc(MemoryDescPtr desc); + void redefineDesc(MemoryDescPtr desc) override; - void SetData(const Memory& memory, bool ftz = true) const; - void FillZero(); - - const VectorDims& getStaticDims() const { - return getDesc().getShape().getStaticDims(); - } + void load(const IMemory& src, bool ftz = true) const override; + void nullify() override; dnnl::engine getEngine() const { - return eng; - } - - bool isUsedExternalStorage() const { - return mgrHandle->hasExtBuffer(); + return m_eng; } - DnnlMemoryMngrPtr getDnnlMemoryMngr() const { - return mgrHandle.get(); + MemoryMngrPtr getMemoryMngr() const override { + return m_mgrHandle.get(); } private: @@ -266,11 +317,14 @@ class Memory { private: void update(); + void create(const MemoryDesc& desc, const void* data = nullptr, bool pads_zeroing = true); + void create(MemoryDescPtr desc, const void* data = nullptr, bool pads_zeroing = true); + private: - MemoryDescPtr pMemDesc; - dnnl::engine eng; - DnnlMemMngrHandle mgrHandle; - bool padsZeroing = true; + dnnl::engine m_eng; + MemoryDescPtr m_pMemDesc; + DnnlMemMngrHandle m_mgrHandle; + bool m_padsZeroing = true; class DnnlMemPrimHandle { public: explicit DnnlMemPrimHandle(const Memory* memObjPtr): m_memObjPtr(memObjPtr) {} @@ -287,12 +341,12 @@ class Memory { } dnnlMemHandle; void* getDataNoThrow() const noexcept { - return mgrHandle->getRawPtr(); + return m_mgrHandle->getRawPtr(); } }; -using MemoryPtr = std::shared_ptr; -using MemoryCPtr = std::shared_ptr; +using MemoryPtr = std::shared_ptr; +using MemoryCPtr = std::shared_ptr; } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp index 0ad586e1bc863c..9bd51f665f9a02 100644 --- a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp +++ b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp @@ -59,9 +59,8 @@ void DnnlPostOpsComposer::updateWeiScales() { attr.set_scales_mask(DNNL_ARG_WEIGHTS, wei_scale_mask); DnnlBlockedMemoryDesc memoryDesc(InferenceEngine::Precision::FP32, Shape({wei_scale_values.size()})); - auto mem = std::make_shared(engine); - mem->Create(memoryDesc); - memcpy(mem->GetPtr(), wei_scale_values.data(), wei_scale_values.size() * sizeof(float)); + auto mem = std::make_shared(engine, memoryDesc); + memcpy(mem->getData(), wei_scale_values.data(), wei_scale_values.size() * sizeof(float)); args[DNNL_ARG_ATTR_SCALES | DNNL_ARG_WEIGHTS] = mem; } @@ -73,9 +72,8 @@ void DnnlPostOpsComposer::updateDestScales() { attr.set_scales_mask(DNNL_ARG_DST, 0); DnnlBlockedMemoryDesc memoryDesc(InferenceEngine::Precision::FP32, Shape({1})); - auto mem = std::make_shared(engine); - mem->Create(memoryDesc); - memcpy(mem->GetPtr(), &dst_scale_val, sizeof(float)); + auto mem = std::make_shared(engine, memoryDesc); + memcpy(mem->getData(), &dst_scale_val, sizeof(float)); args[DNNL_ARG_ATTR_SCALES | DNNL_ARG_DST] = mem; } @@ -92,9 +90,8 @@ void DnnlPostOpsComposer::appendBinary(const dnnl::algorithm alg, const std::vec ops.append_binary(alg, memoryDesc.getDnnlDesc()); // copy the data as args - auto mem = std::make_shared(engine); - mem->Create(memoryDesc); - memcpy(mem->GetPtr(), data.data(), data.size() * sizeof(float)); + auto mem = std::make_shared(engine, memoryDesc); + memcpy(mem->getData(), data.data(), data.size() * sizeof(float)); args[DNNL_ARG_ATTR_MULTIPLE_POST_OP(ops.len() - 1) | DNNL_ARG_SRC_1] = mem; } diff --git a/src/plugins/intel_cpu/src/dnnl_scratch_pad.h b/src/plugins/intel_cpu/src/dnnl_scratch_pad.h index 79157ec5bbd709..552ede454e659d 100644 --- a/src/plugins/intel_cpu/src/dnnl_scratch_pad.h +++ b/src/plugins/intel_cpu/src/dnnl_scratch_pad.h @@ -14,17 +14,16 @@ namespace ov { namespace intel_cpu { class DnnlScratchPad { - DnnlMemoryMngrPtr mgrPtr; + MemoryMngrPtr mgrPtr; dnnl::engine eng; public: DnnlScratchPad(dnnl::engine eng) : eng(eng) { - mgrPtr = std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())); + mgrPtr = std::make_shared(make_unique()); } MemoryPtr createScratchPadMem(const MemoryDescPtr& md) { - auto mem = std::make_shared(eng); - mem->Create(md, mgrPtr); + auto mem = std::make_shared(eng, md, mgrPtr); return mem; } }; diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp index 73e76566678c31..b18af69ec03ce6 100644 --- a/src/plugins/intel_cpu/src/edge.cpp +++ b/src/plugins/intel_cpu/src/edge.cpp @@ -82,7 +82,6 @@ void Edge::collectConsumers(std::vector& result) const { } bool Edge::enforceReorder() { - bool canBeInPlaceConflicts = false; auto parentNode = getParent(); auto parentSPD = parentNode->getSelectedPrimitiveDescriptor(); auto childNode = getChild(); @@ -90,79 +89,29 @@ bool Edge::enforceReorder() { if (!parentSPD || !childSPD) IE_THROW() << "Cannot make a decision about reorder. Primitive descriptors weren't selected."; - auto childCanChangeMem = [](const Edge& edge) { - bool result = false; - int outNumber = edge.getOutputNum(); - if (auto childSPD = edge.getChild()->getSelectedPrimitiveDescriptor()) { - result = childSPD->getConfig().outConfs.empty(); - for (const auto& conf : childSPD->getConfig().outConfs) { - if (conf.inPlace() == outNumber && outNumber >= 0) - result = true; - } - } - return result; - }; + bool in_place = inPlace(); - const auto& detectInPlaceChildrenNum = [&childCanChangeMem](const std::vector& edges) -> size_t { - size_t count = 0; - for (const auto& edge : edges) { - if (childCanChangeMem(*edge)) { - count++; - } + if (in_place) { + if (inPlace(LOOK_DOWN) && inPlace(LOOK_UP)) { + return true; } - return count; - }; + } - bool in_place = inPlace(); int inNumber = getInputNum(); - const auto portChildEdges = parentNode->getChildEdgesAtPort(inNumber); - if (childCanChangeMem(*this) && portChildEdges.size() > 1) { - if (childNode->getType() == Type::Convolution) { - auto execIndex = childNode->getExecIndex(); - for (auto pEdgePeer : portChildEdges) { - if (pEdgePeer.get() == this) + + if (portChildEdges.size() > 1) { + if (in_place) { + for (auto& p_edge_peer : portChildEdges) { + if (p_edge_peer.get() == this) continue; - std::vector vecConsumers; - pEdgePeer->collectConsumers(vecConsumers); - - for (auto node : vecConsumers) { - if (node->getExecIndex() >= execIndex) { - canBeInPlaceConflicts = true; - break; - } + if (p_edge_peer->inPlace(LOOK_DOWN)) { + return true; } - if (canBeInPlaceConflicts) break; - } - } else if (in_place && detectInPlaceChildrenNum(portChildEdges) > 1) { - canBeInPlaceConflicts = true; - } - } - - if (!canBeInPlaceConflicts && in_place && !parentNode->getChildEdges().empty()) { - for (auto& p_edge_peer : portChildEdges) { - if (p_edge_peer.get() == this) - continue; - if (p_edge_peer->getChild()->getType() != Type::Reorder && p_edge_peer->inPlace(LOOK_DOWN)) { - canBeInPlaceConflicts = true; - break; } } } - if (in_place) { - int outNumber = getOutputNum(); - if (inNumber >= 0 && static_cast(inNumber) < parentSPD->getConfig().outConfs.size() && - parentSPD->getConfig().outConfs[inNumber].inPlace() >= 0 && outNumber >= 0 && - static_cast(outNumber) < childSPD->getConfig().inConfs.size() && - childSPD->getConfig().inConfs[outNumber].inPlace() >= 0) - canBeInPlaceConflicts = true; - } - - if (canBeInPlaceConflicts) { - return true; - } - // In case the parent node is an input constant, the memory is unaligned and the child primitive isa is SSE, // we have to insert reorder since the vast majority of arithmetic and data processing instructions in legacy SSE isa requires // the memory address in the operands must be aligned on 16-byte boundary. @@ -170,7 +119,7 @@ bool Edge::enforceReorder() { Type::Input == parentNode->getType() && parentNode->isConstant()) { if (auto pInputNode = std::dynamic_pointer_cast(parentNode)) { - auto rawMemPtr = pInputNode->getMemoryPtr()->GetData(); + auto rawMemPtr = pInputNode->getMemoryPtr()->getData(); bool isAligned = (reinterpret_cast(rawMemPtr) & 15) == 0; if (!isAligned) { return true; @@ -287,10 +236,9 @@ Edge::ReorderStatus Edge::needReorder() { } void Edge::reuse(MemoryPtr ptr) { - if (status != Status::NeedAllocation) - return; + OPENVINO_ASSERT(ptr != nullptr, "Attempt to reuse initialized memory in " + name()); memoryPtr = ptr; - status = Status::Allocated; + changeStatus(Status::Allocated); DEBUG_LOG(*this, " memoryPtr=", memoryPtr); } @@ -303,10 +251,7 @@ int Edge::getOutputNum() const { return child_port; } -void Edge::allocateCommon(const std::function& allocate) { - if (status != Status::NeedAllocation) - return; - +void Edge::allocateCommon(const std::function& allocate) { if (memoryPtr) IE_THROW() << "Unexpected behaviour: status == NeedAllocation but memory is already allocated."; @@ -315,29 +260,28 @@ void Edge::allocateCommon(const std::functiongetEngine())); - - allocate(memoryPtr, inputDesc); + memoryPtr = allocate(inputDesc); DEBUG_LOG(*this, " memoryPtr=", memoryPtr); status = Status::Allocated; } void Edge::allocate(const void* mem_ptr) { - auto allocateFunc = [=](const MemoryPtr& memoryPtr, const MemoryDesc& inputDesc) { - memoryPtr->Create(inputDesc, mem_ptr, false); // no pads zeroing + auto allocateFunc = [=](const MemoryDesc& inputDesc) -> MemoryPtr { + auto parentPtr = getParent(); + return std::make_shared(parentPtr->getEngine(), inputDesc, mem_ptr, false); // no pads zeroing }; allocateCommon(allocateFunc); } -void Edge::allocate(DnnlMemoryMngrPtr memMngr) { +void Edge::allocate(MemoryMngrPtr memMngr) { if (!memMngr) { IE_THROW(Unexpected) << "Memory manager ptr is NULL"; } - auto allocateFunc = [=](const MemoryPtr& memoryPtr, const MemoryDesc& inputDesc) { - memoryPtr->Create(inputDesc, memMngr); + auto allocateFunc = [=](const MemoryDesc& inputDesc) -> MemoryPtr { + auto parentPtr = getParent(); + return std::make_shared(parentPtr->getEngine(), inputDesc, memMngr); }; allocateCommon(allocateFunc); @@ -360,7 +304,12 @@ void Edge::externalAllocate(WeightsSharing::Ptr weightsCache) { if (weightsCache) { auto alloc = [this] () { - allocate(); + auto allocateFunc = [this](const MemoryDesc& inputDesc) -> MemoryPtr { + auto parentPtr = getParent(); + return std::make_shared(parentPtr->getEngine(), inputDesc, nullptr, false); // no pads zeroing + }; + + allocateCommon(allocateFunc); return memoryPtr; }; @@ -381,11 +330,14 @@ void Edge::changeStatus(Edge::Status state) { if (state == Status::Validated) { IE_THROW() << "Incorrect behaviour! Use method validate()"; } - if (status != Status::Uninitialized && state == Status::NeedAllocation) + if (Status::Validated == this->status) { + IE_THROW() << "Unexpected attempt of memory change on edge: " << name(); + } + if (this->status != Status::Uninitialized && state == Status::NeedAllocation) return; - if (status == Status::NotAllocated) + if (this->status == Status::NotAllocated) memoryFromEdge.reset(); - status = state; + this->status = state; } PortDescBaseCPtr Edge::getInputPortDesc() const { @@ -463,27 +415,13 @@ const MemoryDesc& Edge::getDesc() const { return getInputDesc(); } -const Memory &Edge::getMemory() { - return *getMemoryPtr(); +const IMemory &Edge::getMemory() { + auto memPtr = getMemoryPtr(); + IE_ASSERT(memPtr != nullptr) << " Dereferencing NULL memory in edge: " << name(); + return *memPtr; } -MemoryPtr &Edge::getMemoryPtr() { - if (status == Status::NotAllocated) { - memoryPtr.reset(new Memory(getParent()->getEngine())); - const auto &desc = getDesc(); - auto sharedEdge = getSharedEdge(); - auto sharedEdgeParent = sharedEdge->getParent(); - if (sharedEdgeParent->isConstant()) { - memoryPtr->Create(desc, sharedEdge->getMemoryPtr()->GetData()); - DEBUG_LOG(*this, " const sharedEdge with ", *sharedEdge); - } else { - memoryPtr->Create(desc, sharedEdge->getMemoryPtr()->getDnnlMemoryMngr()); - DEBUG_LOG(*this, " sharedEdge with ", *sharedEdge); - } - memoryFromEdge.reset(); - changeStatus(Status::Allocated); - } - +MemoryPtr Edge::getMemoryPtr() const { return memoryPtr; } @@ -496,11 +434,11 @@ void Edge::sharedMemFrom(const EdgePtr &edge) { void Edge::validate() { if (status == Status::Validated) return; - getMemory(); + getParent(); getChild(); - if (status != Status::Allocated) { + if (status != Status::Allocated || !memoryPtr) { IE_THROW() << "Error memory is not allocated!"; } status = Status::Validated; @@ -534,22 +472,6 @@ void Edge::init() { } sharedMemFrom(edgePtr); } - - auto port = getInputNum(); - if (port < 0) - return; - auto edges_at_same_port = getParent()->getChildEdgesAtPort(static_cast(port)); - for (auto edge : edges_at_same_port) { - if (edge->getStatus() != Status::NeedAllocation && edge->getStatus() != Status::Uninitialized) { - if (edge->getSharedEdge() != edgePtr) - IE_THROW() << "Unsupported behavior. Cannot mark edge " - << getParent()->getChildEdgeAt(0)->getParent()->getName() << "->" - << getParent()->getChildEdgeAt(0)->getChild()->getName() << " as not allocated!"; - } else { - if (edge != edgePtr) - edge->sharedMemFrom(edgePtr); - } - } } /** @@ -559,95 +481,101 @@ void Edge::init() { * @return root of view-on-memory subgraph */ EdgePtr Edge::getBaseEdge(int look) { - auto parentConfig = getParent()->getSelectedPrimitiveDescriptor()->getConfig(); - auto childConfig = getChild()->getSelectedPrimitiveDescriptor()->getConfig(); - int inputNum = getInputNum(); - int outputNum = getOutputNum(); + const int inputNum = getInputNum(); + const int outputNum = getOutputNum(); - if (childConfig.inConfs[outputNum].inPlace() >= 0 && parentConfig.outConfs[inputNum].inPlace() >= 0) { - // in case of parentConfig requiring upstream-inplace and childConfig supports downstream-inplace - // must further check whether childConfig also supports upstream inplace, - // if so, we can safely inplace as upstream - auto down_stream_inplace = childConfig.inConfs[outputNum].inPlace(); - int up_stream_inplace = -1; - if (down_stream_inplace >= 0) - up_stream_inplace = childConfig.outConfs[down_stream_inplace].inPlace(); - - if ((up_stream_inplace >= 0) && (look & LOOK_UP)) { - look = LOOK_UP; - } else { - DEBUG_LOG(*this, " Danger: Inplace assumption will be broken!"); - inputNum = getInputNum(); - return getParent()->getChildEdgeAt(inputNum); - } - } + const int parentInPlacePort = getParent()->inPlaceOutPort(inputNum); + const int childInPlacePort = getChild()->inPlaceInputPort(outputNum); - if (childConfig.inConfs[outputNum].inPlace() >= 0 && (look & LOOK_DOWN)) { - int next_port_idx = childConfig.inConfs[outputNum].inPlace(); - if (childConfig.outConfs[next_port_idx].inPlace() >= 0) { - childConfig.outConfs[next_port_idx].inPlace(-1); - getChild()->initDescriptor(childConfig); - } + IE_ASSERT(!(parentInPlacePort >=0 && childInPlacePort >= 0)) << + "Unresolved in place memory conflict detected on edge: " << name(); - auto ch_edges = getChild()->getChildEdgesAtPort(next_port_idx); + if ((childInPlacePort >= 0) && (look & LOOK_DOWN)) { + auto ch_edges = getChild()->getChildEdgesAtPort(childInPlacePort); auto &next_ch_edge = ch_edges[0]; // Multiple connection to some out port // Will try to find inplace consumer for (auto &ch_edge : ch_edges) { - auto &chch_conf = ch_edge->getChild()->getSelectedPrimitiveDescriptor()->getConfig(); - - if (chch_conf.inConfs[ch_edge->getOutputNum()].inPlace() >= 0) { + if (ch_edge->getChild()->inPlaceInputPort(ch_edge->getOutputNum()) >= 0) { next_ch_edge = ch_edge; // To align with upstream-inplace, we stop searching once found the first inplace consumer break; } } - return next_ch_edge->getBaseEdge(LOOK_DOWN); - } else if (parentConfig.outConfs[inputNum].inPlace() >= 0 && (look & LOOK_UP)) { - int next_port_idx = parentConfig.outConfs[inputNum].inPlace(); - if (parentConfig.inConfs[next_port_idx].inPlace() >= 0) { - parentConfig.inConfs[next_port_idx].inPlace(-1); - getParent()->initDescriptor(parentConfig); - } - return getParent()->getParentEdgesAtPort(next_port_idx)[0]->getBaseEdge(LOOK_UP); + return next_ch_edge; + } else if (parentInPlacePort >= 0 && (look & LOOK_UP)) { + return getParent()->getParentEdgesAtPort(parentInPlacePort)[0]; } - auto edges_for_same_port = getParent()->getChildEdgesAtPort(inputNum); - if (!(look & LOOK_NO_RECURRENT)) { - for (auto edge : edges_for_same_port) { - if (edge.get() != this) { - auto base = edge->getBaseEdge(LOOK_BOTH | LOOK_NO_RECURRENT); - // Return once found the first inplace consumer - if (base != edge && base != edges_for_same_port[0]) return base; - } + auto edgesForSamePort = getParent()->getChildEdgesAtPort(inputNum); + for (auto edge : edgesForSamePort) { + if (edge.get() != this) { + // Return once found the first inplace consumer + if (edge->inPlace() && edge != edgesForSamePort[0]) return edge; } } - return edges_for_same_port[0]; + return edgesForSamePort[0]; } bool Edge::inPlace(LOOK look) const { - auto parentSPD = getParent()->getSelectedPrimitiveDescriptor(); - auto childSPD = getChild()->getSelectedPrimitiveDescriptor(); - if (!parentSPD || !childSPD) - IE_THROW() << "Cannot make a decision about reorder. Primitive descriptors weren't selected."; int inputNum = getInputNum(); int outputNum = getOutputNum(); - if (inputNum >= static_cast(parentSPD->getConfig().outConfs.size())) - inputNum = 0; - if (outputNum >= static_cast(childSPD->getConfig().inConfs.size())) - outputNum = 0; - if (look & LOOK_UP) { - if (parentSPD->getConfig().outConfs[inputNum].inPlace() >= 0) + if (getParent()->inPlaceOutPort(inputNum) >= 0) return true; } if (look & LOOK_DOWN) { - if (childSPD->getConfig().inConfs[outputNum].inPlace() >= 0) + if (getChild()->inPlaceInputPort(outputNum) >= 0) return true; } return false; } +NodePtr Edge::modifiedInPlace() const { + auto childNode = getChild(); + if (!childNode || !childNode->isInPlace()) { + return nullptr; + } + // check if the children nodes are able to modify the memory + auto childPort = getOutputNum(); + auto inPlaceInputPort = childNode->inPlaceInputPort(childPort); + if (inPlaceInputPort >= 0) { + if (childNode->isExecutable()) { + // Node can modify the memory + return childNode; + } + for (auto&& edge : childNode->getChildEdgesAtPort(inPlaceInputPort)) { + // continue searching + if (auto result = edge->modifiedInPlace()) { + return result; + } + } + } + // check backward dependency + if (auto childSPD = childNode->getSelectedPrimitiveDescriptor()) { + auto& outConfs = childSPD->getConfig().outConfs; + for (size_t i = 0; i < outConfs.size(); ++i) { + const auto& conf = outConfs[i]; + if (childPort < 0 || conf.inPlace() != childPort) { + continue; + } + if (childNode->isExecutable()) { + // Node can modify the memory + return childNode; + } + for (auto&& edge : childNode->getChildEdgesAtPort(i)) { + // continue searching + if (auto result = edge->modifiedInPlace()) { + return result; + } + } + } + } + + // nothing has been found + return nullptr; +} + } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/edge.h b/src/plugins/intel_cpu/src/edge.h index 0292f9767f46f3..aa934379cf9343 100644 --- a/src/plugins/intel_cpu/src/edge.h +++ b/src/plugins/intel_cpu/src/edge.h @@ -43,15 +43,18 @@ class Edge { No = 2 }; + enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2, LOOK_BOTH = LOOK_UP | LOOK_DOWN }; + inline Status getStatus() const noexcept { return status; } void changeStatus(Status state); + bool inPlace(LOOK look = LOOK_BOTH) const; void init(); void allocate(const void* mem_ptr = nullptr); - void allocate(DnnlMemoryMngrPtr memMngr); + void allocate(MemoryMngrPtr memMngr); void externalAllocate(WeightsSharing::Ptr weightsCache); void reuse(MemoryPtr ptr); void validate(); @@ -60,10 +63,11 @@ class Edge { const std::shared_ptr getParent() const; const std::shared_ptr getChild() const; - const Memory& getMemory(); - MemoryPtr& getMemoryPtr(); + const IMemory& getMemory(); + MemoryPtr getMemoryPtr() const; ReorderStatus needReorder(); + std::shared_ptr modifiedInPlace() const; bool isDropped() const; bool isUseExternalMemory() const; @@ -103,11 +107,8 @@ class Edge { void collectConsumers(std::vector>& result) const; - enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2, LOOK_BOTH = LOOK_UP | LOOK_DOWN, LOOK_NO_RECURRENT = 4 }; - EdgePtr getBaseEdge(int look = LOOK_BOTH); - bool inPlace(LOOK look = LOOK_BOTH) const; - void allocateCommon(const std::function& allocate); + void allocateCommon(const std::function& allocate); friend class Graph; }; diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index d12f75bac986f9..185f3384c9c758 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -354,6 +354,8 @@ void Graph::InitGraph() { InitDescriptors(); + ResolveInplaceDirections(); + InitOptimalPrimitiveDescriptors(); InitEdges(); @@ -361,30 +363,7 @@ void Graph::InitGraph() { optimizer.ApplyImplSpecificGraphOptimizations(*this); SortTopologically(); - bool haveDynNodes = false; - for (size_t i = 0; i < graphNodes.size(); ++i) { - const auto& node = graphNodes[i]; - if (node->isDynamicNode()) { - haveDynNodes = true; - if (node->outputShapeDataDependency() || - // WA: for convolution plus sum(broadcast). Due to the fact that a convolution with sum use the same memory for second sum term and the output - // tensors (inPlace) resizing the output tensor, may lead to reallocation of this second term memory and possible data lost. The reallocation - // may happen when the second term shape is broadcasted to the output tensor shape. To avoid the data loss, we have a special processing for - // such cases inside the convolution node, but it works properly only when dynamic shapes inference, preparation and execution a called - // for this node sequentially. - (node->getType() == Type::Convolution && node->isInPlace())) { - syncNodesInds.insert({node.get(), i}); - } - } - } - - // In case of dynamic shapes, tensors may be resized due to the shapes variations. - // If the input tensor is included to memory reuse, it means that its memory manager is shared with other tensors in the graph, which in turn may cause data - // loss when one of the tensors down the graph requests mem resize, while the input data have not been yet read by the consumers. To avoid such situations - // we disable io mem reuse for the case of dynamic shapes. - if (haveDynNodes) { - this->reuse_io_tensors = false; - } + const bool hasDynNodes = ProcessDynNodes(); Allocate(); @@ -398,7 +377,7 @@ void Graph::InitGraph() { ExtractExecutableNodes(); - status = haveDynNodes ? Status::ReadyDynamic : Status::ReadyStatic; + status = hasDynNodes ? Status::ReadyDynamic : Status::ReadyStatic; } void Graph::InitNodes() { @@ -454,6 +433,15 @@ void Graph::InitDescriptors() { } } +void Graph::ResolveInplaceDirections() { + OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Graph::ResolveInplaceDirections"); + + for (auto& node : graphNodes) { + resolveInPlaceDirection(node); + } +} + + void Graph::InitOptimalPrimitiveDescriptors() { OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Graph::InitOptimalPrimitiveDescriptors"); for (auto &node : graphNodes) { @@ -625,6 +613,41 @@ void Graph::InitEdges() { updateEdge(i); } } + + // secondary pass to eliminate complex implace conflicts + auto needReorder = [](const EdgePtr& edge) -> bool { + int inNumber = edge->getInputNum(); + const auto portChildEdges = edge->getParent()->getChildEdgesAtPort(inNumber); + if (portChildEdges.size() > 1) { + if (auto modifyingNode = edge->modifiedInPlace()) { + auto execIndex = modifyingNode->getExecIndex(); + for (auto pEdgePeer : portChildEdges) { + if (pEdgePeer == edge) + continue; + std::vector vecConsumers; + pEdgePeer->collectConsumers(vecConsumers); + + for (auto node : vecConsumers) { + if (node->getExecIndex() >= execIndex) { + return true; + } + } + } + } + } + return false; + }; + + numberOfEdges = graphEdges.size(); //update the total number + + for (ptrdiff_t i = 0; i < numberOfEdges; i++) { + auto edge = graphEdges[i]; + if (needReorder(edge)) { + constexpr bool optimizedReorder = false; + insertReorder(edge, optimizedReorder); + updateEdge(i); + } + } } static inline bool isConstOutput(EdgePtr edge) { @@ -679,40 +702,38 @@ static edge_clusters_t findEdgeClusters(const std::vector & graphEdges) void Graph::AllocateWithReuse() { edge_clusters_t edge_clusters = findEdgeClusters(graphEdges); - size_t edge_clusters_count = edge_clusters.size(); + size_t remaining_edge_clusters_count = edge_clusters.size(); - for (size_t i = 0; i < edge_clusters_count;) { + for (size_t i = 0; i < remaining_edge_clusters_count;) { auto &cluster = edge_clusters[i]; bool erase = false; for (auto &edge : cluster) { - if (edge->getStatus() == Edge::Status::NeedAllocation - && edge->getParent()->isConstant()) { - if (edge->getParent()->getType() == Type::Input) { - auto constNode = std::static_pointer_cast(edge->getParent()); - edge->reuse(std::const_pointer_cast(constNode->getMemoryPtr())); - } else { - edge->externalAllocate(context->getWeightsCache()); - } - erase = true; + if (edge->getStatus() != Edge::Status::NeedAllocation || !edge->getParent()->isConstant()) { + continue; + } + if (edge->getParent()->getType() == Type::Input) { + auto constNode = std::static_pointer_cast(edge->getParent()); + edge->reuse(std::const_pointer_cast(constNode->getMemoryPtr())); + } else { + edge->externalAllocate(context->getWeightsCache()); } + erase = true; } if (erase) { - std::swap(edge_clusters[i], edge_clusters[edge_clusters_count - 1]); - --edge_clusters_count; + std::swap(edge_clusters[i], edge_clusters[remaining_edge_clusters_count - 1]); + --remaining_edge_clusters_count; } else { ++i; } } - edge_clusters.resize(edge_clusters_count); - const int64_t alignment = 32; // 32 bytes std::vector definedBoxes; std::vector undefinedBoxes; - for (size_t i = 0; i < edge_clusters.size(); i++) { - MemorySolver::Box box = {std::numeric_limits::max(), 0, 0, static_cast(i)}; + for (size_t i = 0; i < remaining_edge_clusters_count; i++) { + MemorySolver::Box box = { std::numeric_limits::max(), 0, 0, static_cast(i) }; int64_t boxSize = 0; for (auto &edge : edge_clusters[i]) { int e_start = edge->getParent()->execIndex; @@ -761,13 +782,12 @@ void Graph::AllocateWithReuse() { MemorySolver staticMemSolver(definedBoxes); size_t total_size = static_cast(staticMemSolver.solve()) * alignment; - memWorkspace = std::make_shared(getEngine()); - memWorkspace->Create(DnnlBlockedMemoryDesc(InferenceEngine::Precision::I8, Shape(InferenceEngine::SizeVector{total_size}))); + memWorkspace = std::make_shared(getEngine(), DnnlBlockedMemoryDesc(InferenceEngine::Precision::I8, Shape(InferenceEngine::SizeVector{total_size}))); if (edge_clusters.empty()) return; - auto* workspace_ptr = static_cast(memWorkspace->GetData()); + auto* workspace_ptr = static_cast(memWorkspace->getData()); for (auto& box : definedBoxes) { int count = 0; @@ -782,7 +802,7 @@ void Graph::AllocateWithReuse() { // shapes {0}. And it is implisitly converted into {1} tensor. // Zeroing of input data allow pass tests. if (edge->getParent()->type == Type::Input && edge->hasDefinedMaxSize()) - edge->getMemoryPtr()->FillZero(); + edge->getMemoryPtr()->nullify(); count++; } @@ -844,7 +864,7 @@ void Graph::AllocateWithReuse() { } for (auto& group : groups) { auto grpMemMngr = - std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())); + std::make_shared(make_unique()); for (auto& box : group) { for (auto& edge : edge_clusters[box.id]) { if (edge->getStatus() == Edge::Status::NeedAllocation) { @@ -854,6 +874,36 @@ void Graph::AllocateWithReuse() { } } } + + // Resolve all other edges with status NotAllocated and in-place + for (auto& cluster : edge_clusters) { + for (auto& edge : cluster) { + if (edge->getStatus() != Edge::Status::NotAllocated) { + continue; + } + std::vector edges_to_process; + edges_to_process.push_back(edge); + for (auto next_edge = edge->getSharedEdge(std::nothrow); + next_edge; + next_edge = next_edge->getSharedEdge(std::nothrow)) { + edges_to_process.push_back(next_edge); + } + std::for_each(edges_to_process.rbegin(), edges_to_process.rend(), [](const EdgePtr& edge) { + if (edge->getStatus() == Edge::Status::NotAllocated) { + if (edge->inPlace(Edge::LOOK_DOWN)) { + edge->getChild()->resolveInPlaceEdges(Edge::LOOK_DOWN); + } else if (edge->inPlace(Edge::LOOK_UP)) { + edge->getParent()->resolveInPlaceEdges(Edge::LOOK_UP); + } else { + auto sharedEdge = edge->getSharedEdge(); + auto sharedEdgeParent = sharedEdge->getParent(); + edge->allocate(sharedEdge->getMemoryPtr()->getMemoryMngr()); + DEBUG_LOG(*edge, " sharedEdge with ", *sharedEdge); + } + } + }); + } + } } void Graph::Allocate() { @@ -868,12 +918,42 @@ void Graph::Allocate() { AllocateWithReuse(); // Resolve all other edges with status NotAllocated and in-place - for (auto& node : graphNodes) node->resolveInPlaceEdges(); + //for (auto& node : graphNodes) node->resolveInPlaceEdges(); // Check all getters. Should work. for (auto& edge : graphEdges) edge->validate(); } +bool Graph::ProcessDynNodes() { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::ProcessDynNodes"); + + bool result = false; + for (size_t i = 0; i < graphNodes.size(); ++i) { + const auto& node = graphNodes[i]; + if (node->isDynamicNode()) { + result = true; + if (node->outputShapeDataDependency() || + // WA: for convolution plus sum(broadcast). Due to the fact that a convolution with sum use the same memory for second sum term and the output + // tensors (inPlace) resizing the output tensor, may lead to reallocation of this second term memory and possible data lost. The reallocation + // may happen when the second term shape is broadcasted to the output tensor shape. To avoid the data loss, we have a special processing for + // such cases inside the convolution node, but it works properly only when dynamic shapes inference, preparation and execution a called + // for this node sequentially. + (node->getType() == Type::Convolution && node->isInPlace())) { + syncNodesInds.insert({node.get(), i}); + } + } + } + + // In case of dynamic shapes, tensors may be resized due to the shapes variations. + // If the input tensor is included to memory reuse, it means that its memory manager is shared with other tensors in the graph, which in turn may cause data + // loss when one of the tensors down the graph requests mem resize, while the input data have not been yet read by the consumers. To avoid such situations + // we disable io mem reuse for the case of dynamic shapes. + if (result) { + this->reuse_io_tensors = false; + } + return result; +} + void Graph::PushInputData(const std::string& name, const InferenceEngine::Blob::Ptr &in) { if (!IsReady()) IE_THROW()<< "Wrong state. Topology not ready."; @@ -885,15 +965,14 @@ void Graph::PushInputData(const std::string& name, const InferenceEngine::Blob:: const auto& outDims = node->getOutputShapeAtPort(0); const void *ext_data_ptr = in->cbuffer(); - void *inter_data_ptr = childEdge->getMemory().GetData(); + void *inter_data_ptr = childEdge->getMemory().getData(); if (ext_data_ptr != inter_data_ptr) { auto ext_tdesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(in->getTensorDesc()); - Memory ext_mem(getEngine()); - ext_mem.Create(ext_tdesc, ext_data_ptr, false); + Memory ext_mem(getEngine(), ext_tdesc, ext_data_ptr, false); - childEdge->getMemory().SetData(ext_mem, false); + childEdge->getMemory().load(ext_mem, false); } // todo: make sure 'name' exists in this map... @@ -918,7 +997,7 @@ void Graph::PullOutputData(BlobMap &out) { auto name = outputMap.first; auto node = outputMap.second; auto parentEdge = node->getParentEdgeAt(0); - const Memory& intr_blob = parentEdge->getMemory(); + const auto& intr_blob = parentEdge->getMemory(); const auto ext_blob_map = out.find(name); const auto ext_blob = ext_blob_map->second; @@ -960,12 +1039,12 @@ void Graph::PullOutputData(BlobMap &out) { auto srcPrec = actualDesc.getPrecision(); auto dstPrec = expectedDesc.getPrecision(); - if (!getConfig().isLegacyApi && srcPrec == dstPrec && ext_blob->byteSize() != intr_blob.GetSize()) + if (!getConfig().isLegacyApi && srcPrec == dstPrec && ext_blob->byteSize() != intr_blob.getSize()) IE_THROW() << "Output blob byte size is not equal network output byte size (" << ext_blob->byteSize() - << "!=" << intr_blob.GetSize() << ")."; + << "!=" << intr_blob.getSize() << ")."; void *ext_blob_ptr = ext_blob->buffer(); - void *intr_blob_ptr = intr_blob.GetData(); + void *intr_blob_ptr = intr_blob.getData(); // That is the same memory. No need to copy if (ext_blob_ptr == intr_blob_ptr) continue; @@ -976,12 +1055,10 @@ void Graph::PullOutputData(BlobMap &out) { auto outBlobDesc = expectedDesc.getLayout() == InferenceEngine::Layout::ANY ? DnnlBlockedMemoryDesc(expectedDesc.getPrecision(), Shape(expectedDesc.getDims())) : MemoryDescUtils::convertToDnnlBlockedMemoryDesc(expectedDesc); - Memory outBloMem(getEngine()); - outBloMem.Create(outBlobDesc, ext_blob_ptr, false); - - outBloMem.SetData(intr_blob, false); + Memory outBloMem(getEngine(), outBlobDesc, ext_blob_ptr, false); + outBloMem.load(intr_blob, false); } else { - size_t size_to_copy = intr_blob.GetDescWithType()->getPaddedElementsCount(); + size_t size_to_copy = intr_blob.getDescWithType()->getPaddedElementsCount(); cpu_convert(intr_blob_ptr, ext_blob_ptr, srcPrec, dstPrec, size_to_copy); } @@ -1562,6 +1639,7 @@ bool Graph::InsertNode(NodePtr parent, NodePtr child, NodePtr node, int parentPo node->initSupportedPrimitiveDescriptors(); node->filterSupportedPrimitiveDescriptors(); node->selectOptimalPrimitiveDescriptor(); + resolveInPlaceDirection(node); node->initOptimalPrimitiveDescriptor(); } @@ -1675,5 +1753,124 @@ std::shared_ptr Graph::dump() const { return dump_graph_as_ie_ngraph_net(*this); } +void Graph::resolveInPlaceDirection(const NodePtr& node) const { + enum InplaceDirectionType {UP, DOWN, CYCLIC, NONE}; + enum PortType {INPUT, OUTPUT}; + + auto inPlaceDirection = [](const NodePtr& node, PortType portType, int portNum) -> InplaceDirectionType { + if (PortType::INPUT == portType) { + auto inPlaceInpPort = node->inPlaceInputPort(portNum); + if (inPlaceInpPort >= 0) { + auto inPlaceOutPort = node->inPlaceOutPort(inPlaceInpPort); + if (inPlaceOutPort == inPlaceInpPort) { + return InplaceDirectionType::CYCLIC; + } else if (inPlaceOutPort < 0) { + return InplaceDirectionType::DOWN; + } else { + IE_THROW() << "Non trivial inPlace memory dependency has been detected"; + } + } + // the requested port has a negative inPlace tag, let's check whether it is referenced from the output + auto& config = node->getSelectedPrimitiveDescriptor()->getConfig(); + for (auto& portConf : config.outConfs) { + if (portConf.inPlace() == portNum) { + return InplaceDirectionType::UP; + } + } + } else if (PortType::OUTPUT == portType) { + auto inPlaceOutPort = node->inPlaceOutPort(portNum); + if (inPlaceOutPort >= 0) { + auto inPlaceInpPort = node->inPlaceInputPort(inPlaceOutPort); + if (inPlaceOutPort == inPlaceInpPort) { + return InplaceDirectionType::CYCLIC; + } else if (inPlaceInpPort < 0) { + return InplaceDirectionType::UP; + } else { + IE_THROW() << "Non trivial inPlace memory dependency has been detected"; + } + } + // the requested port has a negative inPlace tag, let's check whether it is referenced from the input + auto& config = node->getSelectedPrimitiveDescriptor()->getConfig(); + for (auto& portConf : config.inConfs) { + if (portConf.inPlace() == portNum) { + return InplaceDirectionType::DOWN; + } + } + } + return InplaceDirectionType::NONE; + }; + + auto& inpEdges = node->getParentEdges(); + for (auto& wEdge : inpEdges) { + if (auto pEdge = wEdge.lock()) { + auto inpPort = pEdge->getOutputNum(); + auto inPlaceInpPort = node->inPlaceInputPort(inpPort); + if (inPlaceInpPort < 0 || inPlaceDirection(node, PortType::INPUT, inpPort) != InplaceDirectionType::CYCLIC) { + continue; + } + // inPlace memory cyclic dependency detected, need to resolve + // let's check the parent node first + auto pParent = pEdge->getParent(); + auto parentInPlaceDirection = inPlaceDirection(pParent, PortType::OUTPUT, pEdge->getInputNum()); + if (parentInPlaceDirection == InplaceDirectionType::UP) { + auto config = node->getSelectedPrimitiveDescriptor()->getConfig(); + config.inConfs[inpPort].inPlace(-1); + node->initDescriptor(config); + } else if (parentInPlaceDirection == InplaceDirectionType::DOWN) { + //search if siblings already have downstream direction + auto downstreamPeers = [&] { + for (auto& peerEdge : pParent->getChildEdgesAtPort(pEdge->getInputNum())) { + auto peerNode = peerEdge->getChild(); + if (peerNode == node) continue; + if (inPlaceDirection(peerNode, PortType::INPUT, peerEdge->getOutputNum()) == InplaceDirectionType::DOWN) { + return true; + } + } + return false; + }(); + if (downstreamPeers) { + // when there is an downstream peer we have to resolve upstream inplace for the node + // to avoid inplace conflict + auto config = node->getSelectedPrimitiveDescriptor()->getConfig(); + config.inConfs[inpPort].inPlace(-1); + node->initDescriptor(config); + } else { + auto config = node->getSelectedPrimitiveDescriptor()->getConfig(); + config.outConfs[inPlaceInpPort].inPlace(-1); + node->initDescriptor(config); + } + } else { + // the parent node does not use inPlace memory, let's check children + std::function searchNonCyclicDirection; + searchNonCyclicDirection = [&](const NodePtr& node, int portIdx) -> InplaceDirectionType { + auto& childEdges = node->getChildEdgesAtPort(portIdx); + for (auto& edge : childEdges) { + auto pChild = edge->getChild(); + auto result = inPlaceDirection(pChild, PortType::INPUT, edge->getOutputNum()); + if (InplaceDirectionType::UP == result || InplaceDirectionType::DOWN == result) { + return result; + } else if (InplaceDirectionType::CYCLIC == result) { + return searchNonCyclicDirection(pChild, pChild->inPlaceInputPort(edge->getOutputNum())); + } + } + return InplaceDirectionType::NONE; + }; + auto result = searchNonCyclicDirection(node, inPlaceInpPort); + if (one_of(result, InplaceDirectionType::UP, InplaceDirectionType::NONE)) { + auto config = node->getSelectedPrimitiveDescriptor()->getConfig(); + config.inConfs[inpPort].inPlace(-1); + node->initDescriptor(config); + } else if (InplaceDirectionType::DOWN == result) { + auto config = node->getSelectedPrimitiveDescriptor()->getConfig(); + config.outConfs[inPlaceInpPort].inPlace(-1); + node->initDescriptor(config); + } else { + IE_THROW() << "A node without an inPlace memory cyclic dependency has not been found"; + } + } + } + } +} + } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h index 581910ae4a9f9b..f2b9cae7ecda47 100644 --- a/src/plugins/intel_cpu/src/graph.h +++ b/src/plugins/intel_cpu/src/graph.h @@ -226,8 +226,10 @@ class Graph { void InitGraph(); void InitNodes(); void InitDescriptors(); + void ResolveInplaceDirections(); void InitOptimalPrimitiveDescriptors(); void InitEdges(); + bool ProcessDynNodes(); void Allocate(); void AllocateWithReuse(); void ExtractExecutableNodes(); @@ -256,6 +258,8 @@ class Graph { GraphContext::CPtr context; void EnforceInferencePrecision(); + void EnforceBF16(); + void resolveInPlaceDirection(const NodePtr& node) const; }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index 6cd217fc065c28..8952b09ea6f9af 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -242,7 +242,7 @@ void GraphOptimizer::FuseConvMatmulFCDeconvAndDQScales(Graph &graph) { if (scalesBlob == nullptr) IE_THROW() << "Cannot cast to TBlob internal scales blob"; - auto scalesData = static_cast(scalesBlob->GetPtr()); + auto scalesData = static_cast(scalesBlob->getData()); if (scalesData == nullptr) IE_THROW() << "scalesBlob has not allocated buffer"; auto scalesDims = getNormalizedDimsBySize(scales->getOutputShapeAtPort(0).getDims(), @@ -768,7 +768,7 @@ void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph &graph) { if (zeroPointsBlob == nullptr) IE_THROW() << "Cannot cast to TBlob internal zero points blob"; - auto zeroPointsData = static_cast(zeroPointsBlob->GetPtr()); + auto zeroPointsData = static_cast(zeroPointsBlob->getData()); if (zeroPointsData == nullptr) IE_THROW() << "zeroPointsBlob has not allocated buffer"; @@ -798,7 +798,7 @@ void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph &graph) { if (weightsBlob == nullptr) IE_THROW() << "Cannot cast to TBlob internal weights blob"; - auto weightsPtr = static_cast(weightsBlob->GetPtr()); + auto weightsPtr = static_cast(weightsBlob->getData()); if (weightsPtr == nullptr) IE_THROW() << "weightsBlob has not allocated buffer"; @@ -2243,6 +2243,15 @@ void GraphOptimizer::MergeTransposeAndReorder(Graph &graph) { } } + // to prevent inPlace conflict we must check that the memory reference is unidirectional or + // inPlace memory is not used + const auto parentInPlace = parentNode->getParentEdgeAt(0)->inPlace(Edge::LOOK_UP); + const auto& childEdges = childNode->getChildEdgesAtPort(0); + const auto childInPlace = std::any_of(childEdges.begin(), childEdges.end(), + [](const EdgePtr& edge){ return edge->inPlace(Edge::LOOK_DOWN); }); + + bool isOptimized = !(parentInPlace && childInPlace); + graph.DropNode(parentNode); graph.DropNode(childNode); @@ -2269,7 +2278,6 @@ void GraphOptimizer::MergeTransposeAndReorder(Graph &graph) { IE_THROW() << "Transpose node '" << parentNode->getName() << "' has invalid edges."; } - bool isOptimized = true; std::vector srcPerm; auto configReorder = [&]() { // transposeNode support blocked input & non-blocked output, in the case, the reorder diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index b16b2a232349b4..fc54c77a2992d7 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -105,7 +105,7 @@ void InferRequestBase::PushStates() { auto cur_state_mem = cur_node->getStore(); auto data_ptr = state->GetState()->cbuffer().as(); auto data_size = state->GetState()->byteSize(); - auto cur_state_mem_buf = static_cast(cur_state_mem->GetPtr()); + auto cur_state_mem_buf = static_cast(cur_state_mem->getData()); cpu_memcpy(cur_state_mem_buf, data_ptr, data_size); } @@ -127,7 +127,7 @@ void InferRequestBase::PullStates() { auto cur_state_mem = cur_node->getStore(); auto data_ptr = state->GetState()->cbuffer().as(); auto data_size = state->GetState()->byteSize(); - auto cur_state_mem_buf = static_cast(cur_state_mem->GetPtr()); + auto cur_state_mem_buf = static_cast(cur_state_mem->getData()); cpu_memcpy(data_ptr, cur_state_mem_buf, data_size); } @@ -193,8 +193,12 @@ std::map InferRequestB return perfMap; } -static inline void changeEdgePtr(const EdgePtr &edge, void *newPtr) { - edge->getMemoryPtr()->setDataHandle(newPtr); +static inline void changeEdgePtr(const EdgePtr &edge, InferenceEngine::Blob::Ptr blob) { + auto size = blob->byteSize(); + auto& mem = edge->getMemory(); + auto memMngr = mem.getMemoryMngr(); + IE_ASSERT(memMngr); + memMngr->setExtBuff(blob->buffer(), size); } void InferRequestBase::changeDefaultPtr() { @@ -203,10 +207,10 @@ void InferRequestBase::changeDefaultPtr() { auto input = inputNodesMap.find(it.first); if (input != inputNodesMap.end()) { NodePtr inputNodePtr = input->second; - if (inputNodePtr->getChildEdgeAt(0)->getMemory().GetData() == it.second) + if (inputNodePtr->getChildEdgeAt(0)->getMemory().getData() == static_cast(it.second->buffer())) continue; auto& childEdges = inputNodePtr->getChildEdges(); - // Input cannot be in-place with other primitives + // Perform checks that the user's memory will not be modified bool canBeInPlace = true; for (auto& childEdge : childEdges) { auto ce = childEdge.lock(); @@ -220,39 +224,22 @@ void InferRequestBase::changeDefaultPtr() { break; } - if (child->getType() == Type::Concatenation) { - auto concat = dynamic_cast(child.get()); - if (concat && concat->isOptimized()) { - canBeInPlace = false; - break; - } - } - - // Cannot be in-place before split because split is using different ptrs without offsets - if (child->getType() == Type::Split) { + // the input memory should be referenced by the children, otherwise it should be written to a + // specific location + if (ce->inPlace(Edge::LOOK_DOWN)) { canBeInPlace = false; break; } - if (child->isInPlace()) { + if (auto result = ce->modifiedInPlace()) { canBeInPlace = false; break; } - auto& edges = child->getChildEdges(); - for (auto& edge : edges) { - auto e = edge.lock(); - if (!e) - IE_THROW() << "Node " << child->getName() << " contains empty child edge"; - - if (e->getMemory().GetData() == ce->getMemory().GetData()) { - canBeInPlace = false; - break; - } - } - - if (!canBeInPlace) + if (child->getType() == Type::Concatenation && child->isInPlace()) { + canBeInPlace = false; break; + } } if (canBeInPlace) { for (auto& edge : childEdges) { @@ -263,7 +250,6 @@ void InferRequestBase::changeDefaultPtr() { changeEdgePtr(e, it.second); } } - continue; } @@ -271,11 +257,11 @@ void InferRequestBase::changeDefaultPtr() { auto output = outputNodesMap.find(it.first); if (output != outputNodesMap.end()) { auto parentEdge = output->second->getParentEdgeAt(0); - if (parentEdge->getMemory().GetData() == it.second) + if (parentEdge->getMemory().getData() == static_cast(it.second->buffer())) continue; bool canBeInPlace = true; - void* defaultPtr = parentEdge->getMemory().GetData(); + void* defaultPtr = parentEdge->getMemory().getData(); // Cannot be in-place after concat because concat is using different ptrs without offsets auto parent = parentEdge->getParent(); NodePtr previousParent; @@ -292,7 +278,7 @@ void InferRequestBase::changeDefaultPtr() { if (!e) IE_THROW() << "Node " << parent->getName() << " contains empty parent edge"; - if (e->getMemory().GetData() == defaultPtr) { + if (e->getMemory().getData() == defaultPtr) { parent = e->getParent(); break; } @@ -360,16 +346,16 @@ void LegacyInferRequest::changeDefaultPtr() { for (auto &it : inMap) { const auto &name = it.first; auto itr = externalPtr.find(name); - if (itr != externalPtr.end() && itr->second != _inputs[name]->buffer()) { - itr->second = _inputs[name]->buffer(); + if (itr != externalPtr.end() && !(itr->second->buffer() == _inputs[name]->buffer())) { + itr->second = _inputs[name]; } } const auto &outMap = graph->outputNodesMap; for (auto &it : outMap) { const auto &name = it.first; auto itr = externalPtr.find(name); - if (itr != externalPtr.end() && itr->second != _outputs[name]->buffer()) { - itr->second = _outputs[name]->buffer(); + if (itr != externalPtr.end() && !(itr->second->buffer() == _outputs[name]->buffer())) { + itr->second = _outputs[name]; } } InferRequestBase::changeDefaultPtr(); @@ -436,7 +422,7 @@ void LegacyInferRequest::SetBlob(const std::string& name, const InferenceEngine: auto pBlobDesc = MemoryDescUtils::interpretAsBlobDesc(graph->getInputNodeByName(name)->getChildEdgesAtPort(0)[0]->getMemory()); if (data->getTensorDesc() == pBlobDesc && graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end()) { - externalPtr[name] = data->buffer(); + externalPtr[name] = data; } else if (externalPtr.find(name) != externalPtr.end()) { externalPtr.erase(name); } @@ -469,7 +455,7 @@ void LegacyInferRequest::SetBlob(const std::string& name, const InferenceEngine: auto pBlobDesc = MemoryDescUtils::interpretAsBlobDesc(graph->getOutputNodeByName(name)->getParentEdgesAtPort(0)[0]->getMemory()); if (data->getTensorDesc() == pBlobDesc) { - externalPtr[name] = data->buffer(); + externalPtr[name] = data; } else if (externalPtr.find(name) != externalPtr.end()) { externalPtr.erase(name); } @@ -514,7 +500,7 @@ InferenceEngine::Blob::Ptr LegacyInferRequest::GetBlob(const std::string& name) _inputs[name]->allocate(); if (pBlob->getTensorDesc() == desc && graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end()) { - externalPtr[name] = _inputs[name]->buffer(); + externalPtr[name] = _inputs[name]; } } data = _inputs[name]; @@ -576,7 +562,7 @@ InferenceEngine::Blob::Ptr LegacyInferRequest::GetBlob(const std::string& name) _outputs[name] = data; if (!externalPtr.count(name) && data->getTensorDesc() == pBlobDesc) { - externalPtr[name] = data->buffer(); + externalPtr[name] = data; } } data = _outputs[name]; @@ -692,8 +678,8 @@ void InferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob: blobDesc.getDims()); } if (actualDesc->isCompatible(MemoryDescUtils::convertToCpuBlockedMemoryDesc(blobDesc)) && - graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end()) { - externalPtr[name] = data->buffer(); + graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end()) { + externalPtr[name] = data; } else if (externalPtr.find(name) != externalPtr.end()) { externalPtr.erase(name); } @@ -725,7 +711,7 @@ void InferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob: const auto &desc = graph->getOutputNodeByName(name)->getParentEdgesAtPort(0)[0]->getMemory().getDesc(); if (!isDynamic && blobDesc == MemoryDescUtils::convertToTensorDesc(desc)) { - externalPtr[name] = data->buffer(); + externalPtr[name] = data; } else if (externalPtr.find(name) != externalPtr.end()) { externalPtr.erase(name); } @@ -772,8 +758,8 @@ InferenceEngine::Blob::Ptr InferRequest::GetBlob(const std::string& name) { if (!isDynamic && desc == MemoryDescUtils::convertToTensorDesc(graph->getInputNodeByName(name)->getChildEdgesAtPort(0)[0]->getMemory().getDesc()) && - graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end()) { - externalPtr[name] = _inputs[name]->buffer(); + graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end()) { + externalPtr[name] = _inputs[name]; } } else { IE_THROW() << "Blob with name: " << name << " exists in CPU plugin graph, but absents in network inputs"; @@ -832,7 +818,7 @@ InferenceEngine::Blob::Ptr InferRequest::GetBlob(const std::string& name) { _outputs[name] = data; if (!isDynamic && !externalPtr.count(name) && data->getTensorDesc() == MemoryDescUtils::convertToTensorDesc(output->second->getParentEdgesAtPort(0)[0]->getMemory().getDesc())) { - externalPtr[name] = data->buffer(); + externalPtr[name] = data; } } else { IE_THROW() << "Blob with name: " << name << " exists in CPU plugin graph, but absents in network outputs"; diff --git a/src/plugins/intel_cpu/src/infer_request.h b/src/plugins/intel_cpu/src/infer_request.h index e7abea883803c7..dc1b34a9f4e469 100644 --- a/src/plugins/intel_cpu/src/infer_request.h +++ b/src/plugins/intel_cpu/src/infer_request.h @@ -56,7 +56,7 @@ class InferRequestBase : public InferenceEngine::IInferRequestInternal { virtual void PushInputData() = 0; Graph* graph = nullptr; - std::unordered_map externalPtr; + std::unordered_map externalPtr; private: void PushStates(); diff --git a/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.cpp b/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.cpp index cc17e6acd83293..999f2b98199615 100644 --- a/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.cpp +++ b/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.cpp @@ -89,16 +89,16 @@ BlockedMemoryDescPtr MemoryDescUtils::convertToBlockedMemoryDesc(const MemoryDes } } -InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const Memory &mem) { +InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const IMemory &mem) { // TODO [DS]: Rewrite when IE is moved to the new TensorDescriptor auto& memDesc = mem.getDesc(); InferenceEngine::TensorDesc desc = convertToTensorDesc(memDesc); desc = InferenceEngine::TensorDesc(desc.getPrecision(), memDesc.getShape().getStaticDims(), desc.getBlockingDesc()); - return make_blob_with_precision(desc, mem.GetData()); + return make_blob_with_precision(desc, mem.getData()); } -InferenceEngine::TensorDesc MemoryDescUtils::interpretAsBlobDesc(const Memory &mem) { +InferenceEngine::TensorDesc MemoryDescUtils::interpretAsBlobDesc(const IMemory &mem) { auto& memDesc = mem.getDesc(); InferenceEngine::TensorDesc desc = convertToTensorDesc(memDesc); diff --git a/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.h b/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.h index 02d637d2010c8f..f30a34ecfd11ad 100644 --- a/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.h +++ b/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.h @@ -19,7 +19,7 @@ class DnnlMemoryDesc; class BlockedMemoryDesc; class DnnlBlockedMemoryDesc; class CpuBlockedMemoryDesc; -class Memory; +class IMemory; class MemoryDescUtils { public: @@ -65,14 +65,14 @@ class MemoryDescUtils { * @param desc Memory from which will be created InferenceEngine::Blob * @return pointer to InferenceEngine::Blob */ - static InferenceEngine::Blob::Ptr interpretAsBlob(const Memory& mem); + static InferenceEngine::Blob::Ptr interpretAsBlob(const IMemory& mem); /** * @brief Creates InferenceEngine::TensorDesc from Memory with the memory reuse * @param desc Memory from which will be created InferenceEngine::Blob * @return InferenceEngine::TensorDesc */ - static InferenceEngine::TensorDesc interpretAsBlobDesc(const Memory& mem); + static InferenceEngine::TensorDesc interpretAsBlobDesc(const IMemory& mem); /** * @brief Converts MemoryDesc to InferenceEngine::TensorDesc diff --git a/src/plugins/intel_cpu/src/memory_state.h b/src/plugins/intel_cpu/src/memory_state.h index 4ac9246747da61..286de6a5353b12 100644 --- a/src/plugins/intel_cpu/src/memory_state.h +++ b/src/plugins/intel_cpu/src/memory_state.h @@ -21,7 +21,7 @@ class VariableState : public InferenceEngine::IVariableStateInternal { : InferenceEngine::IVariableStateInternal{name} { state = make_blob_with_precision(MemoryDescUtils::convertToTensorDesc(storage->getDesc())); state->allocate(); - cpu_memcpy(state->buffer(), storage->GetData(), storage->GetSize()); + cpu_memcpy(state->buffer(), storage->getData(), storage->getSize()); } void Reset() override; diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index fd35aea6c8cc49..cdd343c126277c 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -5,6 +5,7 @@ #include "node.h" #include "edge.h" #include "extension_mngr.h" +#include "partitioned_mem_mgr.h" #include "itt.h" #include "caseless.hpp" @@ -368,33 +369,49 @@ bool Node::canBeInPlace() const { return true; } -void Node::resolveInPlaceEdges() { +void Node::resolveInPlaceEdges(Edge::LOOK look) { const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); if (!selected_pd) IE_THROW() << "Cannot find selected primitive descriptor for node: " << getName(); - for (size_t i = 0; i < getParentEdges().size() && i < selected_pd->getConfig().inConfs.size(); i++) { - auto parentEdge = getParentEdgeAt(i); + if (look & Edge::LOOK_DOWN) { + for (size_t i = 0; i < getParentEdges().size() && i < selected_pd->getConfig().inConfs.size(); i++) { + auto inplaceOutIndx = selected_pd->getConfig().inConfs[i].inPlace(); - if (parentEdge->getStatus() != Edge::Status::NotAllocated || selected_pd->getConfig().inConfs[i].inPlace() < 0) - continue; + if (inplaceOutIndx < 0) + continue; - auto memMgr = parentEdge->getMemory().getDnnlMemoryMngr(); - parentEdge->getMemoryPtr().reset(new Memory(getEngine())); - parentEdge->getMemoryPtr()->Create(selected_pd->getConfig().inConfs[i].getMemDesc(), memMgr); + auto parentEdge = getParentEdgeAt(i); + IE_ASSERT(parentEdge->getStatus() == Edge::Status::NotAllocated) << " Unexpected inplace resolve call to an allocated edge: " << parentEdge->name(); - parentEdge->changeStatus(Edge::Status::Allocated); + //search for already allocated edge + const auto& childEdges = getChildEdgesAtPort(inplaceOutIndx); + auto itr = std::find_if(childEdges.begin(), childEdges.end(), [](const EdgePtr& edge) { return edge->getStatus() == Edge::Status::Allocated; }); + IE_ASSERT(itr != childEdges.end()) << " Could not find an allocated edge to resolve in-place for node: " << getName(); + + auto baseMemMngr = (*itr)->getMemory().getMemoryMngr(); + auto memMngr = std::make_shared(baseMemMngr); + auto newMem = std::make_shared(getEngine(), selected_pd->getConfig().inConfs[i].getMemDesc(), memMngr); + parentEdge->reuse(newMem); + } } - for (size_t i = 0; i < getChildEdges().size() && i < selected_pd->getConfig().outConfs.size(); i++) { - auto childEdge = getChildEdgeAt(i); + if (look & Edge::LOOK_UP) { + for (size_t i = 0; i < getChildEdges().size() && i < selected_pd->getConfig().outConfs.size(); i++) { + auto inplaceInpIndx = selected_pd->getConfig().outConfs[i].inPlace(); - if (childEdge->getStatus() != Edge::Status::NotAllocated || selected_pd->getConfig().outConfs[i].inPlace() < 0) - continue; + if (inplaceInpIndx < 0) + continue; - auto memMgr = childEdge->getMemory().getDnnlMemoryMngr(); - childEdge->getMemoryPtr().reset(new Memory(getEngine())); - childEdge->getMemoryPtr()->Create(selected_pd->getConfig().outConfs[i].getMemDesc(), memMgr); + auto baseMemMngr = getParentEdgesAtPort(inplaceInpIndx).front()->getMemory().getMemoryMngr(); + auto memMngr = std::make_shared(baseMemMngr); + const auto& childEdges = getChildEdgesAtPort(i); - childEdge->changeStatus(Edge::Status::Allocated); + for (auto& childEdge : childEdges) { + IE_ASSERT(childEdge->getStatus() == Edge::Status::NotAllocated) << + " Unexpected inplace resolve call to an allocated edge: " << childEdge->name(); + auto newMem = std::make_shared(getEngine(), selected_pd->getConfig().outConfs[i].getMemDesc(), memMngr); + childEdge->reuse(newMem); + } + } } } @@ -801,11 +818,9 @@ void Node::prepareMemory(const DnnlMemoryDescPtr& intDesc, size_t indx) { // TODO [DS]: internal blobs should be removed or rewritten using Memory object auto newDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(internalBlob->getTensorDesc()); - Memory memory{ engine }; - memory.Create(newDesc, internalBlob->buffer()); + Memory memory{engine, newDesc, internalBlob->buffer()}; - MemoryPtr _ptr = std::make_shared(engine); - _ptr->Create(intDesc); + MemoryPtr _ptr = std::make_shared(engine, intDesc); node::Reorder::reorderData(memory, *_ptr, context->getParamsCache()); return _ptr; }; @@ -857,17 +872,14 @@ MemoryPtr Node::prepareWeightMemory(DnnlMemoryDescPtr weightDesc) { if (!edgeMem) IE_THROW() << "Cannot get const weights edgeMem for node " << getName() << "."; - auto constDnnlMemOutDesc = edgeMem->GetDescWithType(); + auto constDnnlMemOutDesc = edgeMem->getDescWithType(); auto weightSrcDesc = constDnnlMemOutDesc->getDnnlDesc(); weightSrcDesc = weightSrcDesc.reshape(weightDesc->getDnnlDesc().get_dims()); auto create = [&] () { auto newSrcDesc = DnnlExtensionUtils::makeDescriptor(weightSrcDesc); - Memory srcMemory{ getEngine() }; - srcMemory.Create(newSrcDesc, edgeMem->GetData()); - - MemoryPtr _ptr = std::make_shared(getEngine()); - _ptr->Create(weightDesc); + Memory srcMemory{ getEngine(), newSrcDesc, edgeMem->getData() }; + MemoryPtr _ptr = std::make_shared(getEngine(), weightDesc); node::Reorder::reorderData(srcMemory, *_ptr, context->getParamsCache()); return _ptr; @@ -882,8 +894,8 @@ MemoryPtr Node::prepareWeightMemory(DnnlMemoryDescPtr weightDesc) { auto weightCache = context->getWeightsCache(); if (weightCache != nullptr) { const std::string string_hash = getName() + "_" + format - + "_" + std::to_string(edgeMem->GetSize()) - + "_" + std::to_string(reinterpret_cast(edgeMem->GetData())); + + "_" + std::to_string(edgeMem->getSize()) + + "_" + std::to_string(reinterpret_cast(edgeMem->getData())); ptr = *weightCache->findOrCreate(string_hash, create); } else { @@ -895,7 +907,7 @@ MemoryPtr Node::prepareWeightMemory(DnnlMemoryDescPtr weightDesc) { return ptr; } -bool Node::isInPlace() { +bool Node::isInPlace() const { if (inplace == InPlaceType::Unknown) { auto selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) @@ -1181,7 +1193,7 @@ void Node::appendPostOpArgs(const dnnl::primitive_attr& attr, std::unordered_map& primArgs, const std::unordered_map& postOpsArgs) { for (auto & entry : postOpsArgs) { - primArgs[entry.first] = entry.second->GetPrimitive(); + primArgs[entry.first] = entry.second->getPrimitive(); } } @@ -1228,7 +1240,7 @@ std::vector Node::getInputPrecisions() const { for (size_t i = 0; i < getParentEdges().size(); i++) { auto parentEdge = getParentEdgeAt(i); if (parentEdge && parentEdge->getStatus() == Edge::Status::Validated) { - inputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->GetDataType()))); + inputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->getDataType()))); } } return inputPrecisions; @@ -1239,7 +1251,7 @@ std::vector Node::getOutputPrecisions() const { for (size_t i = 0; i < getChildEdges().size(); i++) { auto childEdge = getChildEdgeAt(i); if (childEdge && childEdge->getStatus() == Edge::Status::Validated) { - outputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((childEdge->getMemoryPtr()->GetDataType()))); + outputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((childEdge->getMemoryPtr()->getDataType()))); } } return outputPrecisions; @@ -1400,11 +1412,11 @@ std::pair, std::vector> Node::getScalesAndShifts(const IE_THROW() << "Cannot cast " << constInput->getName() << " to Input"; } auto constBlob = constInputNode->getMemoryPtr(); - const auto elementsCount = constBlob->GetDescWithType()->getPaddedElementsCount(); + const auto elementsCount = constBlob->getDescWithType()->getPaddedElementsCount(); buffer.resize(elementsCount); - cpu_convert(constBlob->GetPtr(), + cpu_convert(constBlob->getData(), &buffer[0], - DnnlExtensionUtils::DataTypeToIEPrecision(constBlob->GetDataType()), + DnnlExtensionUtils::DataTypeToIEPrecision(constBlob->getDataType()), Precision::FP32, elementsCount); }; @@ -1458,14 +1470,32 @@ bool Node::isInputTensorAtPortEmpty(size_t port) const { if (inputShapes.size() <= port) { IE_THROW() << "Incorrect input port number for node " << getName(); } - return getParentEdgesAtPort(port)[0]->getMemory().GetShape().hasZeroDims(); + + if (inputShapes[port].hasZeroDims()) { + return true; + } + auto edge = getParentEdgesAtPort(port)[0]; + if (one_of(edge->getStatus(), Edge::Status::Allocated, Edge::Status::Validated)) { + auto&& mem = edge->getMemory(); + if (mem.isAllocated()) { + return mem.getShape().hasZeroDims(); + } + } + return false; } bool Node::isOutputTensorAtPortEmpty(size_t port) const { if (outputShapes.size() <= port) { IE_THROW() << "Incorrect output port number for node " << getName(); } - return getChildEdgesAtPort(port)[0]->getMemory().GetShape().hasZeroDims(); + if (outputShapes[port].isStatic()) { + return outputShapes[port].hasZeroDims(); + } + auto&& mem = getChildEdgesAtPort(port)[0]->getMemory(); + if (mem.isAllocated()) { + return mem.getShape().hasZeroDims(); + } + return false; } bool Node::hasEmptyInputTensors() const { @@ -1670,5 +1700,29 @@ void Node::fuseDQScales(const float* scaleData, const size_t scaleSize) { DQScales.resize(1); } +int Node::inPlaceInputPort(int portIdx) const { + const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); + if (!selected_pd) + IE_THROW() << "Cannot find selected primitive descriptor for node: " << getName(); + + const auto& conf = selected_pd->getConfig(); + + IE_ASSERT(portIdx >= 0 && portIdx < static_cast(conf.inConfs.size())) << + "Wrong portIndx: " << portIdx << " acceptable interval: [0, " << conf.inConfs.size() << ")"; + + return conf.inConfs[portIdx].inPlace(); +} +int Node::inPlaceOutPort(int portIdx) const { + const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); + if (!selected_pd) + IE_THROW() << "Cannot find selected primitive descriptor for node: " << getName(); + + const auto& conf = selected_pd->getConfig(); + + IE_ASSERT(portIdx >= 0 && portIdx < static_cast(conf.outConfs.size())) << + "Wrong portIndx: " << portIdx << " acceptable interval: [0, " << conf.outConfs.size() << ")"; + + return conf.outConfs[portIdx].inPlace(); +} } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index f1f1f5ca22f500..4cfe9c7d708660 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -188,6 +188,9 @@ class Node { const std::vector getParentEdgesAtPort(size_t idx) const; const std::vector getChildEdgesAtPort(size_t idx) const; + int inPlaceInputPort(int portIdx) const; + int inPlaceOutPort(int portIdx) const; + bool isDropped() { return (isEdgesEmpty(childEdges) && isEdgesEmpty(parentEdges)); } @@ -196,7 +199,7 @@ class Node { return engine; } - bool isInPlace(); + bool isInPlace() const; // must be called only after Graph::InitEdges() virtual bool isExecutable() const { @@ -354,7 +357,7 @@ class Node { PerfCount &PerfCounter() { return perfCounter; } - void resolveInPlaceEdges(); + virtual void resolveInPlaceEdges(Edge::LOOK look = Edge::LOOK_BOTH); virtual void execute(dnnl::stream strm) = 0; void updateShapes(); @@ -598,7 +601,7 @@ class Node { Const, NoConst }; - InPlaceType inplace = InPlaceType::Unknown; + mutable InPlaceType inplace = InPlaceType::Unknown; ConstantType constant = ConstantType::Unknown; std::vector internalBlobs; std::vector internalBlobMemory; diff --git a/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp b/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp index 78efb60a2a1235..30bbc45a009e00 100644 --- a/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp @@ -43,7 +43,7 @@ class AdaptivePoolingShapeInfer : public ShapeInferEmptyPads { VectorDims outputDims(inputRank); outputDims[0] = inputDims[0]; outputDims[1] = inputDims[1]; - auto newSpatialDimsPtr = reinterpret_cast(data_dependency.at(1)->GetPtr()); + auto newSpatialDimsPtr = reinterpret_cast(data_dependency.at(1)->getData()); for (size_t i = 0; i < spatialDimsSize; i++) { outputDims[i + 2] = newSpatialDimsPtr[i]; } @@ -136,7 +136,7 @@ void AdaptivePooling::getSupportedDescriptors() { } bool AdaptivePooling::needShapeInfer() const { - const auto newSpatialDimsPtr = reinterpret_cast(getParentEdgesAtPort(1)[0]->getMemoryPtr()->GetPtr()); + const auto newSpatialDimsPtr = reinterpret_cast(getParentEdgesAtPort(1)[0]->getMemoryPtr()->getData()); for (int i = 0; i < spatialDimsCount; i++) { if (static_cast(spatialDimsValue[i]) != newSpatialDimsPtr[i]) { for (size_t j = 0; j < spatialDimsValue.size(); j++) { @@ -184,8 +184,8 @@ void AdaptivePooling::executeDynamicImpl(dnnl::stream strm) { } void AdaptivePooling::execute(dnnl::stream strm) { - auto inputPrec = getParentEdgeAt(0)->getMemory().GetDataType(); - auto outputPrec = getChildEdgeAt(0)->getMemory().GetDataType(); + auto inputPrec = getParentEdgeAt(0)->getMemory().getDataType(); + auto outputPrec = getChildEdgeAt(0)->getMemory().getDataType(); if (!(inputPrec == dnnl_f32 && outputPrec == dnnl_f32)) IE_THROW() << errorPrefix << "doesn't support demanded precisions"; @@ -194,22 +194,22 @@ void AdaptivePooling::execute(dnnl::stream strm) { int *indexDst = nullptr; if (algorithm == Algorithm::AdaptivePoolingMax) { - indexDst = reinterpret_cast(getChildEdgeAt(1)->getMemoryPtr()->GetPtr()); + indexDst = reinterpret_cast(getChildEdgeAt(1)->getMemoryPtr()->getData()); } auto isPlainFmt = srcMemory0.getDesc().hasLayoutType(LayoutType::ncsp); auto isTailCFmt = srcMemory0.getDesc().hasLayoutType(LayoutType::nspc); auto isBlkFmt = srcMemory0.getDesc().hasLayoutType(LayoutType::nCsp16c) || srcMemory0.getDesc().hasLayoutType(LayoutType::nCsp8c); - auto srcBlockDesc = srcMemory0.GetDescWithType(); + auto srcBlockDesc = srcMemory0.getDescWithType(); int blockSize = isBlkFmt ? srcBlockDesc->getBlockDims().back() : 1; - const auto *src = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - const auto *srcPooledSpatialShapes = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); - auto *dst = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *src = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); + const auto *srcPooledSpatialShapes = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->getData()); + auto *dst = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->getData()); - if (static_cast(srcMemory1.GetShape().getElementsCount()) != spatialDimsCount) - IE_THROW() << errorPrefix << "has input spatial dimension (" << srcMemory1.GetShape().getElementsCount() + if (static_cast(srcMemory1.getShape().getElementsCount()) != spatialDimsCount) + IE_THROW() << errorPrefix << "has input spatial dimension (" << srcMemory1.getShape().getElementsCount() << ") inconsistent with pooling vector size (" << spatialDimsCount << ")"; auto inputDimVector = srcMemory0.getStaticDims(); @@ -226,14 +226,14 @@ void AdaptivePooling::execute(dnnl::stream strm) { const int iHW = IH * IW; const int oDHW = OD * OH * OW, oHW = OH * OW; - const int chPadding = blockSize * (isBlkFmt ? srcBlockDesc->getBlockDims()[1] : srcMemory0.GetShape().getStaticDims()[1]); + const int chPadding = blockSize * (isBlkFmt ? srcBlockDesc->getBlockDims()[1] : srcMemory0.getShape().getStaticDims()[1]); const int blockCount = (isTailCFmt ? 1 : chPadding / blockSize); auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); if (!selectedPrimitiveDescriptor) IE_THROW() << errorPrefix << "doesn't have primitive descriptors."; auto config = selectedPrimitiveDescriptor->getConfig(); auto srcStrides = srcBlockDesc->getStrides(); - auto dstStrides = getChildEdgesAtPort(0)[0]->getMemory().GetDescWithType()->getStrides(); + auto dstStrides = getChildEdgesAtPort(0)[0]->getMemory().getDescWithType()->getStrides(); // unified strides array const size_t tailDimsOffset = (isTailCFmt ? -1 : 0); diff --git a/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp b/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp index 804f79d507d70d..c0d4aff13cfffd 100644 --- a/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp +++ b/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp @@ -102,26 +102,26 @@ static std::vector getShape5D(const SizeVector &shape) { template void BatchToSpace::batchToSpaceKernel() { - const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - const auto *blockShapesPtr = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); - size_t dataRank = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetShape().getRank(); + const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); + const auto *blockShapesPtr = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->getData()); + size_t dataRank = getParentEdgesAtPort(0)[0]->getMemoryPtr()->getShape().getRank(); blockShapeIn.clear(); for (size_t i = 0; i < dataRank; i++) { blockShapeIn.push_back(*(blockShapesPtr + i)); } - const auto *padsBeginPtr = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetPtr()); + const auto *padsBeginPtr = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->getData()); cropsBeginIn.clear(); for (size_t i = 0; i < dataRank; i++) { cropsBeginIn.push_back(*(padsBeginPtr + i)); } - auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->getData()); const auto &inDims = getParentEdgesAtPort(0)[0]->getMemory().getStaticDims(); const auto &outDims = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims(); - auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + auto srcDesc = getParentEdgeAt(0)->getMemory().getDescWithType(); const bool blocked = srcDesc->hasLayoutType(LayoutType::nCsp8c) || srcDesc->hasLayoutType(LayoutType::nCsp16c); const auto dimsSize = inDims.size(); @@ -139,7 +139,7 @@ void BatchToSpace::batchToSpaceKernel() { blockShape.erase(blockShape.begin() + 1); } - auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + auto dstDesc = getChildEdgeAt(0)->getMemory().getDescWithType(); const size_t blockSize = blocked ? dstDesc->getBlockDims().back() : 1lu; const size_t blockCountInput = srcDesc->getBlockDims()[1]; diff --git a/src/plugins/intel_cpu/src/nodes/bin_conv.cpp b/src/plugins/intel_cpu/src/nodes/bin_conv.cpp index d6518e49022ed4..ab8694b6669511 100644 --- a/src/plugins/intel_cpu/src/nodes/bin_conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/bin_conv.cpp @@ -1297,27 +1297,27 @@ void BinaryConvolution::executeReference(const uint8_t* src, const uint8_t* weig } void BinaryConvolution::execute(dnnl::stream strm) { - auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr(); - auto &weightsMemory = getParentEdgeAt(1)->getMemoryPtr(); - auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemory = getParentEdgeAt(0)->getMemoryPtr(); + auto weightsMemory = getParentEdgeAt(1)->getMemoryPtr(); + auto dstMemory = getChildEdgeAt(0)->getMemoryPtr(); - auto src = reinterpret_cast(srcMemory->GetPtr()); - auto weights = reinterpret_cast(weightsMemory->GetPtr()); - auto dst = reinterpret_cast(dstMemory->GetPtr()); + auto src = reinterpret_cast(srcMemory->getData()); + auto weights = reinterpret_cast(weightsMemory->getData()); + auto dst = reinterpret_cast(dstMemory->getData()); - auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + auto srcDesc = getParentEdgeAt(0)->getMemory().getDescWithType(); std::vector srcStride(srcDesc->getStrides().size()); for (size_t i = 0; i < srcStride.size(); i++) { srcStride[srcDesc->getOrder()[i]] = srcDesc->getStrides()[i]; } - auto weiDesc = getParentEdgeAt(1)->getMemory().GetDescWithType(); + auto weiDesc = getParentEdgeAt(1)->getMemory().getDescWithType(); std::vector weightsStride(weiDesc->getShape().getRank()); for (size_t i = 0; i < weightsStride.size(); i++) { weightsStride[weiDesc->getOrder()[i]] = weiDesc->getStrides()[i]; } - auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + auto dstDesc = getChildEdgeAt(0)->getMemory().getDescWithType(); std::vector dstStride(dstDesc->getStrides().size()); for (size_t i = 0; i < dstStride.size(); i++) { dstStride[dstDesc->getOrder()[i]] = dstDesc->getStrides()[i]; diff --git a/src/plugins/intel_cpu/src/nodes/broadcast.cpp b/src/plugins/intel_cpu/src/nodes/broadcast.cpp index 7881bfa9c4befe..2293e36850aada 100644 --- a/src/plugins/intel_cpu/src/nodes/broadcast.cpp +++ b/src/plugins/intel_cpu/src/nodes/broadcast.cpp @@ -117,21 +117,21 @@ bool Broadcast::needPrepareParams() const { void Broadcast::prepareParams() { if (!constMap[TARGET_SHAPE_IDX]) { const auto& targetShapeMem = getParentEdgesAtPort(TARGET_SHAPE_IDX)[0]->getMemory(); - const int32_t* targetShapeData = reinterpret_cast(targetShapeMem.GetPtr()); + const int32_t* targetShapeData = reinterpret_cast(targetShapeMem.getData()); targetShape.assign(targetShapeData, targetShapeData + targetShapeMem.getStaticDims()[0]); } if (broadcastType == EXPLICIT && !constMap[AXES_MAPPING_IDX]) { const auto& axesMapMem = getParentEdgesAtPort(AXES_MAPPING_IDX)[0]->getMemory(); - const int32_t* axesMapData = reinterpret_cast(axesMapMem.GetPtr()); + const int32_t* axesMapData = reinterpret_cast(axesMapMem.getData()); axesMapping.assign(axesMapData, axesMapData + axesMapMem.getStaticDims()[0]); } - const auto& srcDims = getParentEdgesAtPort(INPUT_DATA_IDX)[0]->getMemory().GetShape().getStaticDims(); + const auto& srcDims = getParentEdgesAtPort(INPUT_DATA_IDX)[0]->getMemory().getShape().getStaticDims(); repeats.assign(targetShape.begin(), targetShape.end()); const auto ndims = repeats.size(); - auto srcBlockedDims = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().GetDescWithType()->getBlockDims(); - auto dstBlockedDims = getChildEdgeAt(0)->getMemory().GetDescWithType()->getBlockDims(); + auto srcBlockedDims = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().getDescWithType()->getBlockDims(); + auto dstBlockedDims = getChildEdgeAt(0)->getMemory().getDescWithType()->getBlockDims(); if (broadcastType == NUMPY) { for (size_t i = 0lu; i < srcDims.size(); i++) { @@ -162,7 +162,7 @@ bool Broadcast::needShapeInfer() const { if (targetShape.empty()) { return true; } - const int32_t* targetShapeData = reinterpret_cast(getParentEdgesAtPort(TARGET_SHAPE_IDX)[0]->getMemory().GetPtr()); + const int32_t* targetShapeData = reinterpret_cast(getParentEdgesAtPort(TARGET_SHAPE_IDX)[0]->getMemory().getData()); for (size_t i = 0lu; i < targetShape.size(); i++) { if (targetShape[i] != targetShapeData[i]) { return true; @@ -173,7 +173,7 @@ bool Broadcast::needShapeInfer() const { if (axesMapping.empty()) { return true; } - const int32_t* axesMappingData = reinterpret_cast(getParentEdgesAtPort(AXES_MAPPING_IDX)[0]->getMemory().GetPtr()); + const int32_t* axesMappingData = reinterpret_cast(getParentEdgesAtPort(AXES_MAPPING_IDX)[0]->getMemory().getData()); for (size_t i = 0lu; i < axesMapping.size(); i++) { if (axesMapping[i] != axesMappingData[i]) { return true; @@ -203,10 +203,10 @@ void Broadcast::execute(dnnl::stream strm) { void Broadcast::plainExecute(dnnl::stream strm) { VectorDims srcDims = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().getStaticDims(); const auto& dstDims = getChildEdgeAt(0)->getMemory().getStaticDims(); - const auto& dataSrcRank = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().GetShape().getRank(); - const auto& dataDstRank = getChildEdgeAt(0)->getMemory().GetShape().getRank(); + const auto& dataSrcRank = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().getShape().getRank(); + const auto& dataDstRank = getChildEdgeAt(0)->getMemory().getShape().getRank(); - auto srcDesc = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().GetDescWithType(); + auto srcDesc = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().getDescWithType(); VectorDims srcStrides = srcDesc->getStrides(); const size_t dataSize = srcDesc->getPrecision().size(); @@ -215,7 +215,7 @@ void Broadcast::plainExecute(dnnl::stream strm) { if (!srcStrides.size()) srcStrides = VectorDims(1, 1); - auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + auto dstDesc = getChildEdgeAt(0)->getMemory().getDescWithType(); VectorDims dstStrides = dstDesc->getStrides(); VectorDims srcAligned(dataDstRank); VectorDims srcStridesAligned(dataDstRank); @@ -231,8 +231,8 @@ void Broadcast::plainExecute(dnnl::stream strm) { } const size_t workAmountDst = dstStrides[0] * dstDims[0]; - const auto *srcData = reinterpret_cast(getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr()->GetPtr()); - auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *srcData = reinterpret_cast(getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr()->getData()); + auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->getData()); parallel_nt(0, [&](const int ithr, const int nthr) { size_t i = 0lu, srcIdx = 0lu, start = 0lu, end = 0lu; diff --git a/src/plugins/intel_cpu/src/nodes/bucketize.cpp b/src/plugins/intel_cpu/src/nodes/bucketize.cpp index 3098121f75049b..2073b82e0ff04c 100644 --- a/src/plugins/intel_cpu/src/nodes/bucketize.cpp +++ b/src/plugins/intel_cpu/src/nodes/bucketize.cpp @@ -177,9 +177,9 @@ void Bucketize::execute(dnnl::stream strm) { } void Bucketize::prepareParams() { - auto& inputTensorMemPtr = getParentEdgeAt(INPUT_TENSOR_PORT)->getMemoryPtr(); - auto& inputBinsMemPtr = getParentEdgeAt(INPUT_BINS_PORT)->getMemoryPtr(); - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto inputTensorMemPtr = getParentEdgeAt(INPUT_TENSOR_PORT)->getMemoryPtr(); + auto inputBinsMemPtr = getParentEdgeAt(INPUT_BINS_PORT)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << "Destination memory didn't allocate."; if (!inputTensorMemPtr || !inputTensorMemPtr->isAllocated()) @@ -213,9 +213,9 @@ bool Bucketize::isExecutable() const { template void Bucketize::bucketize() { - const auto *input_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - const auto *boundaries_data = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); - auto *output_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); + const auto *input_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); + const auto *boundaries_data = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->getData()); + auto *output_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->getData()); if (!with_bins) { memset(output_data, 0, num_values * sizeof(T_IND)); diff --git a/src/plugins/intel_cpu/src/nodes/color_convert.cpp b/src/plugins/intel_cpu/src/nodes/color_convert.cpp index d4ae67bea38cba..951db703a96a31 100644 --- a/src/plugins/intel_cpu/src/nodes/color_convert.cpp +++ b/src/plugins/intel_cpu/src/nodes/color_convert.cpp @@ -1026,11 +1026,11 @@ InferenceEngine::Precision ColorConvert::Converter::outputPrecision(size_t idx) } const void * ColorConvert::Converter::input(size_t idx) const { - return _node->getParentEdgeAt(idx)->getMemoryPtr()->GetPtr(); + return _node->getParentEdgeAt(idx)->getMemoryPtr()->getData(); } void * ColorConvert::Converter::output(size_t idx) const { - return _node->getChildEdgeAt(idx)->getMemoryPtr()->GetPtr(); + return _node->getChildEdgeAt(idx)->getMemoryPtr()->getData(); } const VectorDims & ColorConvert::Converter::inputDims(size_t idx) const { diff --git a/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.cpp b/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.cpp index 0b3d5f524de896..e7921c24abd8e0 100644 --- a/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.cpp +++ b/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.cpp @@ -247,8 +247,8 @@ void TileBroadcastCommon::broadcastScalar(const char *srcData, char *dstData, si } void TileBroadcastCommon::optimizedExecute(const MemoryPtr& srcMemory, const MemoryPtr& dstMemory) { - auto srcData = reinterpret_cast(srcMemory->GetPtr()); - auto dstData = reinterpret_cast(dstMemory->GetPtr()); + auto srcData = reinterpret_cast(srcMemory->getData()); + auto dstData = reinterpret_cast(dstMemory->getData()); if (srcMemory->getStaticDims() == dstMemory->getStaticDims()) { const auto prc = dstMemory->getDesc().getPrecision(); @@ -260,7 +260,7 @@ void TileBroadcastCommon::optimizedExecute(const MemoryPtr& srcMemory, const Mem if (optimizedParams.dstStrides[0] == optimizedParams.dims[5] * optimizedParams.dstStrides[5]) { size_t data_size = optimizedParams.dstStrides[5]; size_t elt_cnt = optimizedParams.dims[5]; - auto srcData_i32 = reinterpret_cast(srcMemory->GetPtr()); + auto srcData_i32 = reinterpret_cast(srcMemory->getData()); if (data_size == 1) { memset(dstData, srcData[0], elt_cnt); } else if (data_size == 4 && srcData_i32[0] == 0) { diff --git a/src/plugins/intel_cpu/src/nodes/concat.cpp b/src/plugins/intel_cpu/src/nodes/concat.cpp index efafb27f15d93d..633f40cea00fa5 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.cpp +++ b/src/plugins/intel_cpu/src/nodes/concat.cpp @@ -22,6 +22,7 @@ #include "common/cpu_memcpy.h" #include "common/blocked_desc_creator.h" #include +#include using namespace dnnl; using namespace InferenceEngine; @@ -33,7 +34,7 @@ namespace { } bool Concat::isExecutable() const { - return !hasEmptyOutputTensors() && !isOptimized(); + return !isInPlace() && !hasEmptyOutputTensors(); } bool Concat::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { @@ -87,12 +88,11 @@ void Concat::getSupportedDescriptors() { } // we need the first dims before axis to be 1 to avoid the reorder in the edge between the first parent and this concat - // TODO [DS]: inplace - if (!isDynamicNode()) { - const auto& childDims = outputShapes[0].getStaticDims(); - if (std::all_of(childDims.begin(), childDims.begin() + axis, [](size_t dim) { return dim == 1; })) - canBeInPlace = true; - } + + const auto& childDims = outputShapes[0].getDims(); + if (childDims[axis] != Shape::UNDEFINED_DIM && + std::all_of(childDims.begin(), childDims.begin() + axis, [](size_t dim) { return dim == 1; })) + canBeInPlace = true; } void Concat::initSupportedPrimitiveDescriptors() { @@ -119,7 +119,8 @@ void Concat::initSupportedPrimitiveDescriptors() { const auto& dstShape = getOutputShapeAtPort(0); std::vector tdCreatorTypes = {LayoutType::ncsp, LayoutType::nspc}; - // check if blocked layouts are available the channels size should be evenly divided by the block size to avoid slow oneDNN ref implementation + // check if blocked layouts are available the channels size should be evenly divided by the block size to avoid slow oneDNN ref implementation and allow + // inPlace memory usage if possible if (dstShape.getRank() > channelAxis) { for (auto& item : { std::make_pair(8lu, LayoutType::nCsp8c), std::make_pair(16lu, LayoutType::nCsp16c)}) { const VectorDims &blkDims = dstShape.getDims(); @@ -159,12 +160,7 @@ void Concat::initSupportedPrimitiveDescriptors() { config.inConfs[i].inPlace(-1); config.inConfs[i].constant(false); auto desc = itr->second->createSharedDesc(inputPrecision, getInputShapeAtPort(i)); - // TODO [DS]: inplace - if (isDynamicNode()) { - config.inConfs[i].setMemDesc(desc); - } else { - config.inConfs[i].setMemDesc(desc, BlockedMemoryDesc::EMPTY_MASK); - } + config.inConfs[i].setMemDesc(desc); } supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref); if (itr->first != LayoutType::nspc) { @@ -179,46 +175,14 @@ void Concat::initSupportedPrimitiveDescriptors() { } } - // TODO [DS]: inplace if (!canBeInPlace || std::any_of(inputShapes.begin(), inputShapes.end(), [](const Shape& shape) { return shape.hasZeroDims(); })) return; // Optimized inplace case for (auto refPdIndex : pdIndexesToReuse) { - const auto& refConfig = supportedPrimitiveDescriptors[refPdIndex].getConfig(); - auto config = refConfig; - - auto denseOutDesc = refConfig.outConfs[0].getMemDesc()->as(); - const auto &order = denseOutDesc->getOrder(); - const auto &blkDims = denseOutDesc->getBlockDims(); - auto numOfDim = blkDims.size(); - - SizeVector offsets(numOfDim, 0lu); - SizeVector strides(numOfDim); - strides.back() = 1lu; - size_t offset = Shape::UNDEFINED_DIM; - BlockedMemoryDesc::CmpMask mask = BlockedMemoryDesc::SKIP_OFFSET_MASK; // any offset - - for (size_t i = 2; i <= numOfDim; i++) { - if (numOfDim - i < axis) { - strides[numOfDim - i] = Shape::UNDEFINED_DIM; - mask.reset(numOfDim - i); // any strides on certain axis - } else { - strides[numOfDim - i] = strides[numOfDim - i + 1] * blkDims[numOfDim - i + 1]; - } - } - - const auto outDesc = std::dynamic_pointer_cast(refConfig.outConfs[0].getMemDesc()); - config.outConfs[0].setMemDesc(outDesc, mask); - - for (size_t i = 0; i < getParentEdges().size(); i++) { - const auto& srcBlkDims = refConfig.inConfs[i].getMemDesc()->as()->getBlockDims(); - const auto& shape = refConfig.inConfs[i].getMemDesc()->getShape(); - - const auto inDesc = std::make_shared(inputPrecision, shape, srcBlkDims, order, offset, offsets, strides); - + auto config = supportedPrimitiveDescriptors[refPdIndex].getConfig();; + for (size_t i = 0; i < config.inConfs.size(); i++) { config.inConfs[i].inPlace(0); - config.inConfs[i].setMemDesc(inDesc, mask); } supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } @@ -344,31 +308,27 @@ bool Concat::created() const { return getType() == Type::Concatenation; } -bool Concat::isOptimized() const { - return getSelectedPrimitiveDescriptor() && getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].inPlace() >= 0; -} - bool Concat::needPrepareParams() const { - if (canOptimizeNspc) { + if (canOptimizeNspc || isInPlace()) { return false; } return inputShapesModified(); } void Concat::prepareParams() { - if (canOptimizeNspc || isOptimized()) + if (canOptimizeNspc || isInPlace()) return; const auto& dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << "Destination memory didn't allocate."; - auto dstMemDesc = dstMemPtr->GetDescWithType(); + auto dstMemDesc = dstMemPtr->getDescWithType(); if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; const auto& outputStrides = dstMemDesc->getStrides(); size_t curConcatOffset = 0; - const size_t elemSize = DnnlExtensionUtils::sizeOfDataType(dstMemPtr->GetDataType()); + const size_t elemSize = DnnlExtensionUtils::sizeOfDataType(dstMemPtr->getDataType()); const auto& src0BlkMemDesc = getParentEdgesAtPort(0)[0]->getMemoryPtr()->getDescPtr()->as(); const auto& outputOrder = src0BlkMemDesc->getOrder(); for (size_t i = 0; i < outputOrder.size(); i++) { @@ -408,10 +368,10 @@ void Concat::prepareParams() { dstOffset[i] = outputStrides[reorderedAxis] * curConcatOffset * elemSize; curConcatOffset += inputShape[reorderedAxis]; } else { - if (srcMemPtr->GetShape().hasZeroDims()) { + if (srcMemPtr->getShape().hasZeroDims()) { continue; } - auto desc = srcMemPtr->GetDescWithType()->getDnnlDesc(); + auto desc = srcMemPtr->getDescWithType()->getDnnlDesc(); const auto& dims = srcMemPtr->getStaticDims(); for (size_t j = 0; j < dims.size(); j++) { @@ -422,7 +382,7 @@ void Concat::prepareParams() { } if (!canExecRef) { - auto desc = dstMemPtr->GetDescWithType()->getDnnlDesc(); + auto desc = dstMemPtr->getDescWithType()->getDnnlDesc(); const auto& dims = dstMemPtr->getStaticDims(); for (size_t i = 0; i < dims.size(); i++) { @@ -455,7 +415,7 @@ void Concat::initOptimalPrimitiveDescriptor() { if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; - if (!isOptimized()) { + if (!isInPlace()) { Node::initOptimalPrimitiveDescriptor(); auto config = selected_pd->getConfig(); if (!isConfigDefined(config)) { @@ -472,64 +432,6 @@ void Concat::initOptimalPrimitiveDescriptor() { } } - auto config = selected_pd->getConfig(); - if (!isDynamicNode() && !isConfigDefined(config)) { - for (size_t i = 0; i < config.outConfs.size(); i++) { - int num = getChildEdgeAt(i)->getOutputNum(); - if (num >= 0) { - auto childConf = getChildEdgeAt(i)->getChild()->getSelectedPrimitiveDescriptor()->getConfig().inConfs[num]; - childConf.setMemDesc(childConf.getMemDesc()->cloneWithNewPrecision(config.outConfs[i].getMemDesc()->getPrecision())); - - if (getChildEdgeAt(i)->getChild()->getSelectedPrimitiveDescriptor()) { - if (!childConf.getMemDesc()->isDefined() && childConf.inPlace() >= 0) - getChildEdgeAt(i)->getChild()->initOptimalPrimitiveDescriptor(); - - if (childConf.getMemDesc()->isDefined() && config.outConfs[i].getPortDesc()->isCompatible(*childConf.getPortDesc())) { - config.outConfs[i].setMemDesc(childConf.getMemDesc()); - continue; - } - } - } - - // reset mask - config.outConfs[i].setMemDesc(config.outConfs[i].getMemDesc()); - } - auto firstOutBlockingDesc = config.outConfs[0].getMemDesc()->as(); - size_t offset = 0; - for (size_t i = 0; i < config.inConfs.size(); i++) { - auto oldDesc = config.inConfs[i].getMemDesc(); - auto inpBlockingDesc = oldDesc->as(); - - config.inConfs[i].setMemDesc( - std::make_shared( - inpBlockingDesc->getPrecision(), - inpBlockingDesc->getShape(), - inpBlockingDesc->getBlockDims(), - inpBlockingDesc->getOrder(), - firstOutBlockingDesc->getOffsetPadding() + offset, - firstOutBlockingDesc->getOffsetPaddingToData(), - firstOutBlockingDesc->getStrides()), - BlockedMemoryDesc::FULL_MASK); - size_t axisSize = 1; - - auto firstInpBlockingDesc = config.inConfs[0].getMemDesc()->as(); - if (firstInpBlockingDesc->hasLayoutType(LayoutType::nspc)) { - // This is more general and works for any "direct" Layout (such as nchw or nhwc), but it doesn't work for blocked - size_t realAxis = inverseOrder(firstInpBlockingDesc->getOrder(), axis); - for (size_t j = realAxis; j < inpBlockingDesc->getBlockDims().size(); j++) { - size_t jj = firstInpBlockingDesc->getOrder()[j]; - axisSize *= inpBlockingDesc->getBlockDims()[jj]; - } - } else { - // This works for nchw and nchw8c/nchw16c - for (size_t j = axis; j < inpBlockingDesc->getBlockDims().size(); j++) { - axisSize *= inpBlockingDesc->getBlockDims()[j]; - } - } - offset += axisSize; - } - initDescriptor(config); - } //block layout may have axis greater than rank, disable ref_concat auto primDesc = getSelectedPrimitiveDescriptor(); auto memDesc = primDesc->getConfig().outConfs[0].getMemDesc()->as(); @@ -547,7 +449,7 @@ void Concat::initOptimalPrimitiveDescriptor() { } void Concat::execute(dnnl::stream strm) { - if (isOptimized()) { + if (isInPlace()) { return; } @@ -559,16 +461,16 @@ void Concat::execute(dnnl::stream strm) { if (canExecRef) { execRef(); } else { - const Memory& dst_memory = getChildEdgeAt(0)->getMemory(); + const auto& dst_memory = getChildEdgeAt(0)->getMemory(); const size_t num_src = getParentEdges().size(); - std::unordered_map mem_ags {{DNNL_ARG_DST, dst_memory.GetPrimitive()}}; + std::unordered_map mem_ags {{DNNL_ARG_DST, dst_memory.getPrimitive()}}; size_t nonZeroInShapes = 0; for (size_t i = 0; i < num_src; i++) { const auto& srcMem = getParentEdgesAtPort(i)[0]->getMemory(); - if (srcMem.GetShape().hasZeroDims()) { + if (srcMem.getShape().hasZeroDims()) { continue; } - mem_ags[DNNL_ARG_MULTIPLE_SRC + nonZeroInShapes] = srcMem.GetPrimitive(); + mem_ags[DNNL_ARG_MULTIPLE_SRC + nonZeroInShapes] = srcMem.getPrimitive(); nonZeroInShapes++; } prim.execute(strm, mem_ags); @@ -580,10 +482,10 @@ InferenceEngine::Precision Concat::getRuntimePrecision() const { } void Concat::execNspcSpecCase() { - const Memory& dst_memory = getChildEdgeAt(0)->getMemory(); + const auto& dst_memory = getChildEdgeAt(0)->getMemory(); const size_t num_src = getParentEdges().size(); - uint8_t* dst_ptr = reinterpret_cast(dst_memory.GetData()); - const size_t dataSize = DnnlExtensionUtils::sizeOfDataType(dst_memory.GetDataType()); + uint8_t* dst_ptr = reinterpret_cast(dst_memory.getData()); + const size_t dataSize = DnnlExtensionUtils::sizeOfDataType(dst_memory.getDataType()); std::vector channelsDataSize; size_t channels_size = 0; @@ -593,14 +495,14 @@ void Concat::execNspcSpecCase() { size_t nonZeroInShapes = 0; int firstNonZeroEdge = -1; for (size_t i = 0; i < num_src; i++) { - const Memory& src_mem = getParentEdgesAtPort(i)[0]->getMemory(); - if (src_mem.GetShape().hasZeroDims()) { + const auto& src_mem = getParentEdgesAtPort(i)[0]->getMemory(); + if (src_mem.getShape().hasZeroDims()) { continue; } const size_t num_channels = src_mem.getStaticDims()[channelAxis]; channelsDataSize.push_back(num_channels * dataSize); - src_ptrs.push_back(reinterpret_cast(src_mem.GetData())); + src_ptrs.push_back(reinterpret_cast(src_mem.getData())); dst_ptrs.push_back(dst_ptr + channels_size); channels_size += num_channels * dataSize; @@ -611,7 +513,7 @@ void Concat::execNspcSpecCase() { nonZeroInShapes++; } - const size_t iter_count = getParentEdgeAt(firstNonZeroEdge)->getMemory().GetSize() / channelsDataSize[0]; + const size_t iter_count = getParentEdgeAt(firstNonZeroEdge)->getMemory().getSize() / channelsDataSize[0]; parallel_for(iter_count, [&](int i) { const size_t dst_off = i * channels_size; @@ -623,14 +525,14 @@ void Concat::execNspcSpecCase() { void Concat::execRef() { const size_t numSrc = getParentEdges().size(); - const Memory& dstMemory = getChildEdgeAt(0)->getMemory(); - const size_t elemSize = DnnlExtensionUtils::sizeOfDataType(dstMemory.GetDataType()); + const auto& dstMemory = getChildEdgeAt(0)->getMemory(); + const size_t elemSize = DnnlExtensionUtils::sizeOfDataType(dstMemory.getDataType()); const auto dstMemBlkDesc = dstMemory.getDescPtr()->as(); const auto& outputShape = dstMemBlkDesc->getBlockDims(); - uint8_t* dstPtr = reinterpret_cast(dstMemory.GetData()); + uint8_t* dstPtr = reinterpret_cast(dstMemory.getData()); for (size_t i = 0; i < numSrc; i++) { - const Memory& srcMem = getParentEdgesAtPort(i)[0]->getMemory(); - srcPtrs[i] = reinterpret_cast(srcMem.GetPtr()); + const auto& srcMem = getParentEdgesAtPort(i)[0]->getMemory(); + srcPtrs[i] = reinterpret_cast(srcMem.getData()); } size_t outputStrides[MAX_RANK_REF] = {0}; @@ -723,6 +625,52 @@ void Concat::execRef() { } } +void Concat::resolveInPlaceEdges(Edge::LOOK look) { + if (!(look & Edge::LOOK_DOWN) || !isInPlace()) { + Node::resolveInPlaceEdges(look); + return; + } + + auto selected_pd = getSelectedPrimitiveDescriptor(); + if (selected_pd == nullptr) + IE_THROW() << "Preferable primitive descriptor is not set."; + auto& config = selected_pd->getConfig(); + size_t numberOfInputs = config.inConfs.size(); + size_t inplaceOutIndx = selected_pd->getConfig().inConfs[0].inPlace(); + auto baseDim = outputShapes.front().getDims()[axis]; + IE_ASSERT(baseDim != Shape::UNDEFINED_DIM) << " Concat node: " << getName() << " can't use inPlace memory with concatenation on dynamic dimension"; + + auto& edges = getChildEdgesAtPort(inplaceOutIndx); + auto itr = std::find_if(edges.begin(), edges.end(), [](const EdgePtr& edge) { return edge->getStatus() == Edge::Status::Allocated; }); + IE_ASSERT(itr != edges.end()) << " Could not find allocated child edge for concat node: " << getName(); + + auto baseMemMngr = (*itr)->getMemory().getMemoryMngr(); + IE_ASSERT(baseMemMngr != nullptr) << " NULL base memory manager in concat node: " << getName(); + + ptrdiff_t offset = 0; + for (size_t i = 0; i < numberOfInputs; ++i) { + auto partDim = inputShapes[i].getDims()[axis]; + IE_ASSERT(partDim != Shape::UNDEFINED_DIM) << " Concat node: " << getName() << " can't use inPlace memory with concatenation on dynamic dimension"; + + auto parentEdge = getParentEdgeAt(i); + + IE_ASSERT(parentEdge->getStatus() == Edge::Status::NotAllocated) << " Unexpected inplace resolve call to an allocated edge: " << parentEdge->name(); + + auto memDesc = selected_pd->getConfig().inConfs[i].getMemDesc(); + MemoryPtr newMem; + if (partDim != 0) { + auto memMngr = std::make_shared(baseMemMngr, baseDim, offset, partDim); + newMem = std::make_shared(getEngine(), memDesc, memMngr); + } else { + // empty tensor, no need to reference a part, default memory is enough + newMem = std::make_shared(getEngine(), memDesc); + } + + parentEdge->reuse(newMem); + offset += partDim; + } +} + } // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/concat.h b/src/plugins/intel_cpu/src/nodes/concat.h index 32831bcede332a..e9a4c9e764a7b3 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.h +++ b/src/plugins/intel_cpu/src/nodes/concat.h @@ -26,8 +26,7 @@ class Concat : public Node { bool created() const override; void execute(dnnl::stream strm) override; void executeDynamicImpl(dnnl::stream strm) override { execute(strm); } - - bool isOptimized() const; + void resolveInPlaceEdges(Edge::LOOK look) override; InferenceEngine::Precision getRuntimePrecision() const override; diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp index b4d5ba5ab53b57..bb685b3d2b3775 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/conv.cpp @@ -904,9 +904,8 @@ void Convolution::addZeroPoints(dnnl::primitive_attr& attr) { attr.set_zero_points_mask(DNNL_ARG_SRC, 0); if (!stockInputZeroPointsMemPtr) { - stockInputZeroPointsMemPtr.reset(new Memory(getEngine())); DnnlBlockedMemoryDesc memoryDesc(Precision::I32, {inputZeroPoints.size()}); - stockInputZeroPointsMemPtr->Create(memoryDesc, inputZeroPoints.data()); + stockInputZeroPointsMemPtr = std::make_shared(getEngine(), memoryDesc, inputZeroPoints.data()); } } @@ -915,9 +914,8 @@ void Convolution::addLegacyZeroPoints(dnnl::primitive_attr& attr) { DEBUG_LOG(getName(), ": Set legacy input zero points"); attr.set_input_zero_points(legacyInputZeroPoints.size(), 1 << 1 /*through C dim*/); if (!legacyInputZeroPointsMemPtr) { - legacyInputZeroPointsMemPtr.reset(new Memory(getEngine())); DnnlBlockedMemoryDesc memoryDesc(Precision::U8, {legacyInputZeroPoints.size()}); - legacyInputZeroPointsMemPtr->Create(memoryDesc, legacyInputZeroPoints.data()); + legacyInputZeroPointsMemPtr.reset(new Memory(getEngine(), memoryDesc, legacyInputZeroPoints.data())); } } @@ -926,9 +924,8 @@ void Convolution::addLegacyZeroPoints(dnnl::primitive_attr& attr) { attr.set_weights_zero_points(legacyWeightsZeroPoints.size(), 1 << 1 /*through C dim*/); if (!legacyWeightsZeroPointsMemPtr) { - legacyWeightsZeroPointsMemPtr.reset(new Memory(getEngine())); DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, {legacyWeightsZeroPoints.size()}); - legacyWeightsZeroPointsMemPtr->Create(memoryDesc, legacyWeightsZeroPoints.data()); + legacyWeightsZeroPointsMemPtr = std::make_shared(getEngine(), memoryDesc, legacyWeightsZeroPoints.data()); } } @@ -937,9 +934,8 @@ void Convolution::addLegacyZeroPoints(dnnl::primitive_attr& attr) { attr.set_output_compensations(legacyOutputCompensation.size(), 1 << 1 /*through C dim*/); if (!legacyOutputCompensationMemPtr) { - legacyOutputCompensationMemPtr.reset(new Memory(getEngine())); DnnlBlockedMemoryDesc memoryDesc(Precision::I32, {legacyOutputCompensation.size()}); - legacyOutputCompensationMemPtr->Create(memoryDesc, legacyOutputCompensation.data()); + legacyOutputCompensationMemPtr = std::make_shared(getEngine(), memoryDesc, legacyOutputCompensation.data()); } } } @@ -1079,11 +1075,11 @@ bool Convolution::canFuse(const NodePtr& node) const { } dnnl::memory Convolution::getWeights() const { - return getParentEdgeAt(1)->getMemory().GetPrimitive(); + return getParentEdgeAt(1)->getMemory().getPrimitive(); } dnnl::memory Convolution::getBias() const { - return getParentEdgeAt(2)->getMemory().GetPrimitive(); + return getParentEdgeAt(2)->getMemory().getPrimitive(); } InferenceEngine::Precision Convolution::getRuntimePrecision() const { @@ -1093,7 +1089,7 @@ InferenceEngine::Precision Convolution::getRuntimePrecision() const { for (size_t i = 0; i < std::min(getParentEdges().size(), inputsNumLimit); i++) { auto parentEdge = getParentEdgeAt(i); if (parentEdge && parentEdge->getStatus() == Edge::Status::Validated) { - inputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->GetDataType()))); + inputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->getDataType()))); } } @@ -1184,7 +1180,7 @@ InferenceEngine::Blob::Ptr Convolution::createInternalBlob(InferenceEngine::Size if (blb == nullptr) IE_THROW() << "Cannot get const blob for node " << getName() << "."; - auto const elementsCount = blb->GetDescWithType()->getPaddedElementsCount(); + auto const elementsCount = blb->getDescWithType()->getPaddedElementsCount(); InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, dims, getWeightsLayoutByDims(dims, isGrouped)); @@ -1195,9 +1191,9 @@ InferenceEngine::Blob::Ptr Convolution::createInternalBlob(InferenceEngine::Size IE_THROW() << "Created internal blob and const blob has different size for node: " << getName() << "."; } - cpu_convert(blb->GetPtr(), + cpu_convert(blb->getData(), internalBlob->buffer(), - DnnlExtensionUtils::DataTypeToIEPrecision(blb->GetDataType()), + DnnlExtensionUtils::DataTypeToIEPrecision(blb->getDataType()), internalBlob->getTensorDesc().getPrecision(), elementsCount); @@ -1225,12 +1221,12 @@ void Convolution::prepareParams() { if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << "."; - DnnlMemoryDescCPtr inMemoryDesc = srcMemPtr->GetDescWithType(); - DnnlMemoryDescCPtr weightMemoryDesc = wghMemPtr->GetDescWithType(); - DnnlMemoryDescCPtr outMemoryDesc = dstMemPtr->GetDescWithType(); + DnnlMemoryDescCPtr inMemoryDesc = srcMemPtr->getDescWithType(); + DnnlMemoryDescCPtr weightMemoryDesc = wghMemPtr->getDescWithType(); + DnnlMemoryDescCPtr outMemoryDesc = dstMemPtr->getDescWithType(); DnnlMemoryDescCPtr biasDesc; if (biasMemPtr) { - biasDesc = biasMemPtr->GetDescWithType(); + biasDesc = biasMemPtr->getDescWithType(); } auto initPrimitiveAttr = [&]() { @@ -1378,8 +1374,8 @@ void Convolution::prepareParams() { if (!execPtr) IE_THROW() << "Primitive descriptor was not found for node " << getName() << "."; - primArgs[DNNL_ARG_SRC] = srcMemPtr->GetPrimitive(); - primArgs[DNNL_ARG_DST] = dstMemPtr->GetPrimitive(); + primArgs[DNNL_ARG_SRC] = srcMemPtr->getPrimitive(); + primArgs[DNNL_ARG_DST] = dstMemPtr->getPrimitive(); if (key.constWeight) { // const weight preparation/reordering needs to be done once at next execution @@ -1388,15 +1384,15 @@ void Convolution::prepareParams() { auto it = primArgs.find(DNNL_ARG_WEIGHTS); if (it == primArgs.end() || !prevExecPtr || !execPtr->getWeightDesc()->isCompatible(*(prevExecPtr->getWeightDesc()))) { - primArgs[DNNL_ARG_WEIGHTS] = prepareWeightMemory(execPtr->getWeightDesc())->GetPrimitive(); + primArgs[DNNL_ARG_WEIGHTS] = prepareWeightMemory(execPtr->getWeightDesc())->getPrimitive(); } } else { // non-const weight will be reordered by executor on every exec - primArgs[DNNL_ARG_WEIGHTS] = wghMemPtr->GetPrimitive(); + primArgs[DNNL_ARG_WEIGHTS] = wghMemPtr->getPrimitive(); } if (withBiases) { - primArgs[DNNL_ARG_BIAS] = biasMemPtr->GetPrimitive(); + primArgs[DNNL_ARG_BIAS] = biasMemPtr->getPrimitive(); } if (preferLegacyZeroPoint) @@ -1407,7 +1403,7 @@ void Convolution::prepareParams() { Node::appendPostOpArgs(*pAttrLocal, primArgs, convPostOpsArgs[preferLegacyPostOps]); auto scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); - primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive(); + primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->getPrimitive(); #ifdef CPU_DEBUG_CAPS if (result.second == CacheEntryBase::LookUpStatus::Miss) { @@ -1454,7 +1450,8 @@ void Convolution::executeDynamicImpl(dnnl::stream strm) { const size_t sumPortNum = getParentEdges().size() - 1; const auto& sumInpMem = getParentEdgesAtPort(sumPortNum).front()->getMemory(); auto inp1 = subgraph->getInput(1); - inp1->getChildEdgesAtPort(0).front()->getMemoryPtr()->setDataHandle(sumInpMem.GetData()); + auto inp1Mem = inp1->getChildEdgesAtPort(0).front()->getMemoryPtr(); + inp1Mem->getMemoryMngr()->setExtBuff(sumInpMem.getData(), sumInpMem.getSize()); subgraph->infer(); @@ -1462,7 +1459,7 @@ void Convolution::executeDynamicImpl(dnnl::stream strm) { const auto& outMem = out->getParentEdgesAtPort(0).front()->getMemory(); auto convOutMem = getChildEdgesAtPort(0).front()->getMemoryPtr(); Node::redefineOutputMemory({outMem.getStaticDims()}); - convOutMem->SetData(outMem); + convOutMem->load(outMem); } } @@ -1539,20 +1536,20 @@ void Convolution::addFusedNode(const NodePtr &fusingNode) { void Convolution::appendLegacyZeroPointsArgs() { if (legacyInputZeroPointsMemPtr != nullptr) { - primArgs[DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_SRC] = legacyInputZeroPointsMemPtr->GetPrimitive(); + primArgs[DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_SRC] = legacyInputZeroPointsMemPtr->getPrimitive(); } if (legacyWeightsZeroPointsMemPtr != nullptr) { - primArgs[DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_WEIGHTS] = legacyWeightsZeroPointsMemPtr->GetPrimitive(); + primArgs[DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_WEIGHTS] = legacyWeightsZeroPointsMemPtr->getPrimitive(); } if (legacyOutputCompensationMemPtr != nullptr) { - primArgs[DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_DST] = legacyOutputCompensationMemPtr->GetPrimitive(); + primArgs[DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_DST] = legacyOutputCompensationMemPtr->getPrimitive(); } } void Convolution::appendZeroPointsArgs() { if (stockInputZeroPointsMemPtr != nullptr) { - primArgs[DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_SRC] = stockInputZeroPointsMemPtr->GetPrimitive(); + primArgs[DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_SRC] = stockInputZeroPointsMemPtr->getPrimitive(); } } diff --git a/src/plugins/intel_cpu/src/nodes/convert.cpp b/src/plugins/intel_cpu/src/nodes/convert.cpp index 9015e2d805f10d..388a4418ecff6e 100644 --- a/src/plugins/intel_cpu/src/nodes/convert.cpp +++ b/src/plugins/intel_cpu/src/nodes/convert.cpp @@ -138,14 +138,14 @@ void Convert::execute(dnnl::stream strm) { auto& parentMem = getParentEdgeAt(0)->getMemory(); auto& childMem = getChildEdgeAt(0)->getMemory(); - const auto parentPaddElemCount = parentMem.GetDescWithType()->getPaddedElementsCount(); - const auto childPaddElemCount = childMem.GetDescWithType()->getPaddedElementsCount(); + const auto parentPaddElemCount = parentMem.getDescWithType()->getPaddedElementsCount(); + const auto childPaddElemCount = childMem.getDescWithType()->getPaddedElementsCount(); if (parentPaddElemCount != childPaddElemCount) IE_THROW() << errorPrefix << " has different elements number in input and output buffers"; - void* srcPtr = parentMem.GetPtr(); - void* dstPtr = childMem.GetPtr(); + void* srcPtr = parentMem.getData(); + void* dstPtr = childMem.getData(); cpu_convert(srcPtr, dstPtr, diff --git a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp index ae0a1187890873..494fb6176dd65e 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp +++ b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp @@ -70,9 +70,9 @@ void CTCGreedyDecoder::initSupportedPrimitiveDescriptors() { } void CTCGreedyDecoder::execute(dnnl::stream strm) { - const float* probabilities = reinterpret_cast(getParentEdgeAt(DATA_INDEX)->getMemoryPtr()->GetPtr()); - const float* sequenceMask = reinterpret_cast(getParentEdgeAt(SEQUENCE_LENGTH_INDEX)->getMemoryPtr()->GetPtr()); - float* outputSequences = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); + const float* probabilities = reinterpret_cast(getParentEdgeAt(DATA_INDEX)->getMemoryPtr()->getData()); + const float* sequenceMask = reinterpret_cast(getParentEdgeAt(SEQUENCE_LENGTH_INDEX)->getMemoryPtr()->getData()); + float* outputSequences = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->getData()); const size_t T = getParentEdgeAt(DATA_INDEX)->getMemory().getStaticDims()[0]; const size_t B = getParentEdgeAt(DATA_INDEX)->getMemory().getStaticDims()[1]; diff --git a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp index 34262e85f7fd2d..be695d85b8c6f2 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp +++ b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp @@ -75,10 +75,10 @@ void CTCGreedyDecoderSeqLen::initSupportedPrimitiveDescriptors() { } void CTCGreedyDecoderSeqLen::execute(dnnl::stream strm) { - const float* probabilities = reinterpret_cast(getParentEdgeAt(DATA_INDEX)->getMemoryPtr()->GetPtr()); - const int* sequenceLengths = reinterpret_cast(getParentEdgeAt(SEQUENCE_LENGTH_INDEX)->getMemoryPtr()->GetPtr()); - int* decodedClasses = reinterpret_cast(getChildEdgesAtPort(DECODED_CLASSES_INDEX)[0]->getMemoryPtr()->GetPtr()); - int* decodedClassesLength = reinterpret_cast(getChildEdgesAtPort(DECODED_CLASSES_LENGTH_INDEX)[0]->getMemoryPtr()->GetPtr()); + const float* probabilities = reinterpret_cast(getParentEdgeAt(DATA_INDEX)->getMemoryPtr()->getData()); + const int* sequenceLengths = reinterpret_cast(getParentEdgeAt(SEQUENCE_LENGTH_INDEX)->getMemoryPtr()->getData()); + int* decodedClasses = reinterpret_cast(getChildEdgesAtPort(DECODED_CLASSES_INDEX)[0]->getMemoryPtr()->getData()); + int* decodedClassesLength = reinterpret_cast(getChildEdgesAtPort(DECODED_CLASSES_LENGTH_INDEX)[0]->getMemoryPtr()->getData()); const size_t B = getParentEdgeAt(DATA_INDEX)->getMemory().getStaticDims()[0];; const size_t T = getParentEdgeAt(DATA_INDEX)->getMemory().getStaticDims()[1];; @@ -87,7 +87,7 @@ void CTCGreedyDecoderSeqLen::execute(dnnl::stream strm) { int blankIndex = C - 1; if (inputShapes.size() > BLANK_INDEX) - blankIndex = (reinterpret_cast(getParentEdgeAt(BLANK_INDEX)->getMemoryPtr()->GetPtr()))[0]; + blankIndex = (reinterpret_cast(getParentEdgeAt(BLANK_INDEX)->getMemoryPtr()->getData()))[0]; size_t workAmount = 0; for (size_t b = 0; b < B; b++) { diff --git a/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp b/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp index 1acfd6f2bd7fc7..66753d2ebfe05b 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp +++ b/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp @@ -67,11 +67,11 @@ void CTCLoss::executeDynamicImpl(dnnl::stream strm) { void CTCLoss::execute(dnnl::stream strm) { StatusCode returnCode = OK; - const float* logits = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - const int* logitsLength = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); - const int* labels = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetPtr()); - const int* labelsLength = reinterpret_cast(getParentEdgeAt(3)->getMemoryPtr()->GetPtr()); - float* dstData = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); + const float* logits = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); + const int* logitsLength = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->getData()); + const int* labels = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->getData()); + const int* labelsLength = reinterpret_cast(getParentEdgeAt(3)->getMemoryPtr()->getData()); + float* dstData = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->getData()); const auto &inDims = getParentEdgeAt(0)->getMemory().getStaticDims(); const size_t batchNum = inDims[0]; @@ -80,7 +80,7 @@ void CTCLoss::execute(dnnl::stream strm) { int blankIndex = classesNum - 1; if (inputShapes.size() > 4) { - blankIndex = reinterpret_cast(getParentEdgeAt(4)->getMemoryPtr()->GetPtr())[0]; + blankIndex = reinterpret_cast(getParentEdgeAt(4)->getMemoryPtr()->getData())[0]; } std::vector decodedTargetLenB(batchNum, 0); diff --git a/src/plugins/intel_cpu/src/nodes/cum_sum.cpp b/src/plugins/intel_cpu/src/nodes/cum_sum.cpp index 64d2af3a9429c1..65d3a55ada0cb4 100644 --- a/src/plugins/intel_cpu/src/nodes/cum_sum.cpp +++ b/src/plugins/intel_cpu/src/nodes/cum_sum.cpp @@ -109,9 +109,9 @@ void CumSum::execute(dnnl::stream strm) { template void CumSum::exec() { - const auto *input = reinterpret_cast(getParentEdgeAt(CUM_SUM_DATA)->getMemoryPtr()->GetPtr()); - auto *output = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - const VectorDims strides = getParentEdgeAt(CUM_SUM_DATA)->getMemory().GetDescWithType()->getStrides(); + const auto *input = reinterpret_cast(getParentEdgeAt(CUM_SUM_DATA)->getMemoryPtr()->getData()); + auto *output = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->getData()); + const VectorDims strides = getParentEdgeAt(CUM_SUM_DATA)->getMemory().getDescWithType()->getStrides(); if (reverse) { if (exclusive) { @@ -226,18 +226,18 @@ inline size_t CumSum::getStartOffset(const std::vector &forStartOffset, return startOffset; } -size_t CumSum::getAxis(const Memory& _axis, const Memory& _data) const { +size_t CumSum::getAxis(const IMemory& _axis, const IMemory& _data) const { const auto& axisPrecision = _axis.getDesc().getPrecision(); - const int64_t dataShapeSize = static_cast(_data.GetShape().getRank()); + const int64_t dataShapeSize = static_cast(_data.getShape().getRank()); int64_t axisValueFromBlob = 0; switch (axisPrecision) { case Precision::I32 : { - const auto *axisPtr = reinterpret_cast(_axis.GetPtr()); + const auto *axisPtr = reinterpret_cast(_axis.getData()); axisValueFromBlob = static_cast(axisPtr[0]); break; } case Precision::I64 : { - const auto *axisPtr = reinterpret_cast(_axis.GetPtr()); + const auto *axisPtr = reinterpret_cast(_axis.getData()); axisValueFromBlob = axisPtr[0]; break; } diff --git a/src/plugins/intel_cpu/src/nodes/cum_sum.h b/src/plugins/intel_cpu/src/nodes/cum_sum.h index 44b1f7a8e6d1df..eee2da8c085472 100644 --- a/src/plugins/intel_cpu/src/nodes/cum_sum.h +++ b/src/plugins/intel_cpu/src/nodes/cum_sum.h @@ -38,7 +38,7 @@ class CumSum : public Node { inline size_t getStartOffset(const std::vector &forStartOffset, const std::vector& strides) const; - size_t getAxis(const Memory& _axis, const Memory& _data) const; + size_t getAxis(const IMemory& _axis, const IMemory& _data) const; enum { CUM_SUM_DATA, AXIS, numOfInputs }; bool exclusive; diff --git a/src/plugins/intel_cpu/src/nodes/deconv.cpp b/src/plugins/intel_cpu/src/nodes/deconv.cpp index 94808aa6fc71be..874deddbefdd40 100644 --- a/src/plugins/intel_cpu/src/nodes/deconv.cpp +++ b/src/plugins/intel_cpu/src/nodes/deconv.cpp @@ -235,7 +235,7 @@ InferenceEngine::Blob::Ptr Deconvolution::createWeiBlobAsIO(InferenceEngine::Siz if (!blb) IE_THROW() << "Cannot get const weights blob for node " << getName() << "."; - auto const blbSize = blb->GetSize(); + auto const blbSize = blb->getSize(); // WA: In int8 case, we are processing weights using internal blob. InferenceEngine::SizeVector dimsForBlockedDesc{dims}; @@ -251,7 +251,7 @@ InferenceEngine::Blob::Ptr Deconvolution::createWeiBlobAsIO(InferenceEngine::Siz orderForBlockedDesc.push_back(i); BlockingDesc blkDesc(dimsForBlockedDesc, orderForBlockedDesc); - InferenceEngine::TensorDesc tensorDesc(DnnlExtensionUtils::DataTypeToIEPrecision(blb->GetDataType()), dims, blkDesc); + InferenceEngine::TensorDesc tensorDesc(DnnlExtensionUtils::DataTypeToIEPrecision(blb->getDataType()), dims, blkDesc); Blob::Ptr internalBlob = InferenceEngine::make_shared_blob(tensorDesc); internalBlob->allocate(); @@ -264,7 +264,7 @@ InferenceEngine::Blob::Ptr Deconvolution::createWeiBlobAsIO(InferenceEngine::Siz if (intBuffSize < offset) { IE_THROW() << "Cannot create internal buffer. Buffer can be overrun."; } - cpu_memcpy_s(data, intBuffSize, blb->GetPtr(), blbSize); + cpu_memcpy_s(data, intBuffSize, blb->getData(), blbSize); return internalBlob; } @@ -579,8 +579,7 @@ VectorDims Deconvolution::shapeInferInternal(const VectorDims &inDims, std::vect outSpDimsVecShape = {outSpDims.size()}; inputShapesRefs.push_back(std::cref(outSpDimsVecShape)); CpuBlockedMemoryDesc desc(Precision::I32, Shape(outSpDimsVecShape)); - auto mem = std::make_shared(getEngine()); - mem->Create(desc, outSpDims.data()); + auto mem = std::make_shared(getEngine(), desc, outSpDims.data()); inputValues[i] = mem; break; } @@ -762,13 +761,14 @@ void Deconvolution::createPrimitive() { } else { inDims = getInputShapeAtPort(0).getStaticDims(); outDims = getOutputShapeAtPort(0).getStaticDims(); - inDesc = getParentEdgesAtPort(0).front()->getMemory().GetDescWithType(); - outDesc = getChildEdgesAtPort(0).front()->getMemory().GetDescWithType(); + + inDesc = getParentEdgesAtPort(0).front()->getMemory().getDescWithType(); + outDesc = getChildEdgesAtPort(0).front()->getMemory().getDescWithType(); } dnnl::memory::desc dnnlBiasDesc; if (withBiases) { - DnnlMemoryDescPtr biasDesc = getParentEdgesAtPort(biasPort).front()->getMemory().GetDescWithType(); + DnnlMemoryDescPtr biasDesc = getParentEdgesAtPort(biasPort).front()->getMemory().getDescWithType(); dnnlBiasDesc = biasDesc->getDnnlDesc(); } @@ -807,8 +807,8 @@ void Deconvolution::prepareParams() { if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << "."; - auto inMemoryDesc = getParentEdgesAtPort(0).front()->getMemory().GetDescWithType(); - auto outMemoryDesc = getChildEdgesAtPort(0).front()->getMemory().GetDescWithType(); + auto inMemoryDesc = getParentEdgesAtPort(0).front()->getMemory().getDescWithType(); + auto outMemoryDesc = getChildEdgesAtPort(0).front()->getMemory().getDescWithType(); AttrPtr pAttrLocal; if (isDynamicNode()) { @@ -831,15 +831,15 @@ void Deconvolution::prepareParams() { DnnlMemoryDescCPtr biasDesc; if (isInt8) { - wghDesc = internalBlobMemory.front()->GetDescWithType(); + wghDesc = internalBlobMemory.front()->getDescWithType(); if (withBiases) { biasMemPtr = getParentEdgesAtPort(biasPort)[0]->getMemoryPtr(); if (!biasMemPtr || !biasMemPtr->isAllocated()) IE_THROW() << "Bias memory memory didn't allocate."; - biasDesc = biasMemPtr->GetDescWithType(); + biasDesc = biasMemPtr->getDescWithType(); } } else { - wghDesc = getParentEdgesAtPort(1).front()->getMemory().GetDescWithType(); + wghDesc = getParentEdgesAtPort(1).front()->getMemory().getDescWithType(); } DeconvKey key = {inMemoryDesc, @@ -954,20 +954,20 @@ void Deconvolution::prepareParams() { if (execPtr) { if (key.isInt8) { - primArgs[DNNL_ARG_SRC] = srcMemPtr->GetPrimitive(); - primArgs[DNNL_ARG_WEIGHTS] = internalBlobMemory.front()->GetPrimitive(); - primArgs[DNNL_ARG_DST]= dstMemPtr->GetPrimitive(); + primArgs[DNNL_ARG_SRC] = srcMemPtr->getPrimitive(); + primArgs[DNNL_ARG_WEIGHTS] = internalBlobMemory.front()->getPrimitive(); + primArgs[DNNL_ARG_DST]= dstMemPtr->getPrimitive(); if (withBiases) - primArgs[DNNL_ARG_BIAS] = biasMemPtr->GetPrimitive(); + primArgs[DNNL_ARG_BIAS] = biasMemPtr->getPrimitive(); } else { - primArgs[DNNL_ARG_DIFF_DST] = srcMemPtr->GetPrimitive(); - primArgs[DNNL_ARG_WEIGHTS] = wghMemPtr->GetPrimitive(); - primArgs[DNNL_ARG_DIFF_SRC] = dstMemPtr->GetPrimitive(); + primArgs[DNNL_ARG_DIFF_DST] = srcMemPtr->getPrimitive(); + primArgs[DNNL_ARG_WEIGHTS] = wghMemPtr->getPrimitive(); + primArgs[DNNL_ARG_DIFF_SRC] = dstMemPtr->getPrimitive(); } Node::appendPostOpArgs(*pAttrLocal, primArgs, postOpsArgs); auto scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); - primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive(); + primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->getPrimitive(); #ifdef CPU_DEBUG_CAPS if (result.second == CacheEntryBase::LookUpStatus::Miss) { auto pd = execPtr->getPrimitiveDesc(); @@ -1052,7 +1052,7 @@ InferenceEngine::Precision Deconvolution::getRuntimePrecision() const { for (size_t i = 0; i < std::min(getParentEdges().size(), inputsNumLimit); i++) { auto parentEdge = getParentEdgeAt(i); if (parentEdge && parentEdge->getStatus() == Edge::Status::Validated) { - inputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->GetDataType()))); + inputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->getDataType()))); } } @@ -1107,7 +1107,7 @@ std::vector Deconvolution::readOutputSpatialDims() const { if (shapeMemPtr->getStaticDims()[0] != spDimsNum) { IE_THROW() << "Can't read output spatial dims, beause 'output_shape' input has incorrect number of elements"; } - const int32_t *outShapePtr = reinterpret_cast(shapeMemPtr->GetPtr()); + const int32_t *outShapePtr = reinterpret_cast(shapeMemPtr->getData()); std::vector outSpDims(outShapePtr, outShapePtr + shapeMemPtr->getStaticDims()[0]); return outSpDims; } diff --git a/src/plugins/intel_cpu/src/nodes/def_conv.cpp b/src/plugins/intel_cpu/src/nodes/def_conv.cpp index 5f15520a870807..d8dd6bb1a6b586 100644 --- a/src/plugins/intel_cpu/src/nodes/def_conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/def_conv.cpp @@ -1160,10 +1160,10 @@ void DeformableConvolution::DefConvRefExecutor::exec(const float* src, const flo } void DeformableConvolution::prepareParams() { - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto& srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); - auto& offMemPtr = getParentEdgeAt(OFF_ID)->getMemoryPtr(); - auto& weiMemPtr = getParentEdgeAt(WEI_ID)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); + auto offMemPtr = getParentEdgeAt(OFF_ID)->getMemoryPtr(); + auto weiMemPtr = getParentEdgeAt(WEI_ID)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << errorPrefix << " did not allocate destination memory"; @@ -1175,7 +1175,7 @@ void DeformableConvolution::prepareParams() { IE_THROW() << errorPrefix << " did not allocate weights memory"; if (getOriginalInputsNumber() > 3) { - auto& modMemPtr = getParentEdgeAt(MOD_ID)->getMemoryPtr(); + auto modMemPtr = getParentEdgeAt(MOD_ID)->getMemoryPtr(); if (!modMemPtr || !modMemPtr->isAllocated()) IE_THROW() << errorPrefix << " did not allocate modulations memory"; } @@ -1190,15 +1190,15 @@ void DeformableConvolution::prepareParams() { updatePadding(); std::vector> descVector { - getParentEdgeAt(DATA_ID)->getMemory().GetDescWithType(), - getParentEdgeAt(OFF_ID)->getMemory().GetDescWithType(), - getParentEdgeAt(WEI_ID)->getMemory().GetDescWithType() + getParentEdgeAt(DATA_ID)->getMemory().getDescWithType(), + getParentEdgeAt(OFF_ID)->getMemory().getDescWithType(), + getParentEdgeAt(WEI_ID)->getMemory().getDescWithType() }; if (withModulation) { - descVector.push_back(getParentEdgeAt(MOD_ID)->getMemory().GetDescWithType()); + descVector.push_back(getParentEdgeAt(MOD_ID)->getMemory().getDescWithType()); } - descVector.push_back(getChildEdgesAtPort(0)[0]->getMemory().GetDescWithType()); + descVector.push_back(getChildEdgesAtPort(0)[0]->getMemory().getDescWithType()); DefConvKey key = { descVector, @@ -1278,15 +1278,15 @@ void DeformableConvolution::execute(dnnl::stream strm) { auto &srcMemory2 = getParentEdgeAt(2)->getMemory(); auto &dstMemory = getChildEdgeAt(0)->getMemory(); - const auto *src = reinterpret_cast(srcMemory0.GetPtr()); - const auto *offsets = reinterpret_cast(srcMemory1.GetPtr()); - const auto *weights = reinterpret_cast(srcMemory2.GetPtr()); + const auto *src = reinterpret_cast(srcMemory0.getData()); + const auto *offsets = reinterpret_cast(srcMemory1.getData()); + const auto *weights = reinterpret_cast(srcMemory2.getData()); float* modulation = nullptr; if (inputsNumber > 3) { - modulation = reinterpret_cast(getParentEdgeAt(3)->getMemory().GetPtr()); + modulation = reinterpret_cast(getParentEdgeAt(3)->getMemory().getData()); } - float *dst = reinterpret_cast(dstMemory.GetPtr()); + float *dst = reinterpret_cast(dstMemory.getData()); auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); if (!selectedPrimitiveDescriptor) diff --git a/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp b/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp index 2c6927285f1642..6ed81784948925 100644 --- a/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp +++ b/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp @@ -161,8 +161,8 @@ void DepthToSpace::initSupportedPrimitiveDescriptors() { } void DepthToSpace::createPrimitive() { - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) THROW_ERROR << "has not allocated destination memory"; if (!srcMemPtr || !srcMemPtr->isAllocated()) @@ -185,7 +185,7 @@ void DepthToSpace::createPrimitive() { } void DepthToSpace::prepareParams() { - attrs.srcBlockedDims = getParentEdgeAt(0)->getMemoryPtr()->GetDescWithType()->getBlockDims(); + attrs.srcBlockedDims = getParentEdgeAt(0)->getMemoryPtr()->getDescWithType()->getBlockDims(); auto builder = [](const DepthToSpaceAttrs& key) -> std::shared_ptr { return std::make_shared(key); }; @@ -288,12 +288,12 @@ DepthToSpace::DepthToSpaceExecutor::DepthToSpaceExecutor(const DepthToSpaceAttrs permuteKernel = std::unique_ptr(new PermuteKernel(params)); } -void DepthToSpace::DepthToSpaceExecutor::exec(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr, const int MB) { +void DepthToSpace::DepthToSpaceExecutor::exec(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr, const int MB) { if (!permuteKernel) IE_THROW() << "Could not execute. Kernel for Transpose node was not compiled."; - const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetPtr()); - uint8_t* dstData = reinterpret_cast(dstMemPtr->GetPtr()); + const uint8_t* srcData = reinterpret_cast(srcMemPtr->getData()); + uint8_t* dstData = reinterpret_cast(dstMemPtr->getData()); permuteKernel->execute(srcData, dstData, MB); } diff --git a/src/plugins/intel_cpu/src/nodes/depth_to_space.h b/src/plugins/intel_cpu/src/nodes/depth_to_space.h index 4088379944cb08..6fce5829dc7c4e 100644 --- a/src/plugins/intel_cpu/src/nodes/depth_to_space.h +++ b/src/plugins/intel_cpu/src/nodes/depth_to_space.h @@ -46,7 +46,7 @@ class DepthToSpace : public Node { DepthToSpaceAttrs attrs; struct DepthToSpaceExecutor { DepthToSpaceExecutor(const DepthToSpaceAttrs& attrs); - void exec(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr, const int MB); + void exec(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr, const int MB); ~DepthToSpaceExecutor() = default; private: diff --git a/src/plugins/intel_cpu/src/nodes/detection_output.cpp b/src/plugins/intel_cpu/src/nodes/detection_output.cpp index 82eae313fd168f..e6f5e77b83a958 100644 --- a/src/plugins/intel_cpu/src/nodes/detection_output.cpp +++ b/src/plugins/intel_cpu/src/nodes/detection_output.cpp @@ -93,14 +93,14 @@ DetectionOutput::DetectionOutput(const std::shared_ptr& op, const } void DetectionOutput::prepareParams() { - const auto& idPriorDims = getParentEdgeAt(ID_PRIOR)->getMemory().GetShape().getStaticDims(); - const auto &idConfDims = getParentEdgeAt(ID_CONF)->getMemory().GetShape().getStaticDims(); + const auto& idPriorDims = getParentEdgeAt(ID_PRIOR)->getMemory().getShape().getStaticDims(); + const auto &idConfDims = getParentEdgeAt(ID_CONF)->getMemory().getShape().getStaticDims(); priorsNum = static_cast(idPriorDims.back() / priorSize); isPriorsPerImg = idPriorDims.front() != 1; classesNum = static_cast(idConfDims.back() / priorsNum); locNumForClasses = isShareLoc ? 1 : classesNum; - const auto& idLocDims = getParentEdgeAt(ID_LOC)->getMemory().GetShape().getStaticDims(); + const auto& idLocDims = getParentEdgeAt(ID_LOC)->getMemory().getShape().getStaticDims(); if (priorsNum * locNumForClasses * 4 != static_cast(idLocDims[1])) IE_THROW() << errorPrefix << "has incorrect number of priors, which must match number of location predictions (" << priorsNum * locNumForClasses * 4 << " vs " @@ -169,15 +169,15 @@ void DetectionOutput::executeDynamicImpl(dnnl::stream strm) { } void DetectionOutput::execute(dnnl::stream strm) { - float *dstData = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); + float *dstData = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->getData()); - const float *locData = reinterpret_cast(getParentEdgeAt(ID_LOC)->getMemoryPtr()->GetPtr()); - const float *confData = reinterpret_cast(getParentEdgeAt(ID_CONF)->getMemoryPtr()->GetPtr()); - const float *priorData = reinterpret_cast(getParentEdgeAt(ID_PRIOR)->getMemoryPtr()->GetPtr()); + const float *locData = reinterpret_cast(getParentEdgeAt(ID_LOC)->getMemoryPtr()->getData()); + const float *confData = reinterpret_cast(getParentEdgeAt(ID_CONF)->getMemoryPtr()->getData()); + const float *priorData = reinterpret_cast(getParentEdgeAt(ID_PRIOR)->getMemoryPtr()->getData()); const float *ARMConfData = inputShapes.size() > 3 ? - reinterpret_cast(getParentEdgeAt(ID_ARM_CONF)->getMemoryPtr()->GetPtr()) : nullptr; + reinterpret_cast(getParentEdgeAt(ID_ARM_CONF)->getMemoryPtr()->getData()) : nullptr; const float *ARMLocData = inputShapes.size() > 4 ? - reinterpret_cast(getParentEdgeAt(ID_ARM_LOC)->getMemoryPtr()->GetPtr()) : nullptr; + reinterpret_cast(getParentEdgeAt(ID_ARM_LOC)->getMemoryPtr()->getData()) : nullptr; float *reorderedConfData = reorderedConf.data(); int *reorderedConfDataIndices = reinterpret_cast(reorderedConf.data()); @@ -845,7 +845,7 @@ inline void DetectionOutput::generateOutput(float* reorderedConfData, int* indic else dstDataSize = imgNum * classesNum * priorsNum * DETECTION_SIZE * sizeof(float); - if (static_cast(dstDataSize) > getChildEdgesAtPort(0)[0]->getMemory().GetSize()) { + if (static_cast(dstDataSize) > getChildEdgesAtPort(0)[0]->getMemory().getSize()) { IE_THROW() << errorPrefix << OUT_OF_BOUNDS; } memset(dstData, 0, dstDataSize); diff --git a/src/plugins/intel_cpu/src/nodes/dft.cpp b/src/plugins/intel_cpu/src/nodes/dft.cpp index 8501272d8224c2..ee2e3cb48fe89b 100644 --- a/src/plugins/intel_cpu/src/nodes/dft.cpp +++ b/src/plugins/intel_cpu/src/nodes/dft.cpp @@ -239,13 +239,13 @@ void DFT::execute(dnnl::stream strm) { const auto inputDataEdge = getParentEdgeAt(DATA_INDEX); const auto outputDataEdge = getChildEdgeAt(0); - const auto src = reinterpret_cast(inputDataEdge->getMemoryPtr()->GetPtr()); - auto dst = reinterpret_cast(outputDataEdge->getMemoryPtr()->GetPtr()); + const auto src = reinterpret_cast(inputDataEdge->getMemoryPtr()->getData()); + auto dst = reinterpret_cast(outputDataEdge->getMemoryPtr()->getData()); - const auto inputRank = inputDataEdge->getMemory().GetShape().getRank(); + const auto inputRank = inputDataEdge->getMemory().getShape().getRank(); - const auto& inputStrides = inputDataEdge->getMemory().GetDescWithType()->getStrides(); - const auto& outputStrides = outputDataEdge->getMemory().GetDescWithType()->getStrides(); + const auto& inputStrides = inputDataEdge->getMemory().getDescWithType()->getStrides(); + const auto& outputStrides = outputDataEdge->getMemory().getDescWithType()->getStrides(); size_t nComplexMaxFFT = 0; for (size_t axis : axes) { @@ -542,7 +542,7 @@ void DFT::prepareParams() { std::vector DFT::getAxes() const { auto axesEdge = getParentEdgeAt(AXES_INDEX); - const auto* axesStartPtr = reinterpret_cast(axesEdge->getMemoryPtr()->GetPtr()); + const auto* axesStartPtr = reinterpret_cast(axesEdge->getMemoryPtr()->getData()); auto axes = std::vector(axesStartPtr, axesStartPtr + axesEdge->getMemory().getStaticDims()[0]); for (auto& axis : axes) { if (axis < 0) { diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp index 725fb3c05a4b69..1f74c4f70a2c1a 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp @@ -2219,10 +2219,10 @@ void Eltwise::createPrimitive() { start_offset_in.resize(inputNum); for (size_t i = 0; i < inputNum; i++) { - const auto desc = getParentEdgeAt(i)->getMemory().GetDescWithType(); + const auto desc = getParentEdgeAt(i)->getMemory().getDescWithType(); start_offset_in[i] = desc->getOffsetPadding() * desc->getPrecision().size(); } - const auto desc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + const auto desc = getChildEdgeAt(0)->getMemory().getDescWithType(); start_offset_out = desc->getOffsetPadding() * desc->getPrecision().size(); for (size_t i = 0; i < inputNum; ++i) { @@ -2249,7 +2249,7 @@ void Eltwise::prepareParams() { return; } - auto outBlockingDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + auto outBlockingDesc = getChildEdgeAt(0)->getMemory().getDescWithType(); const auto &outOrder = outBlockingDesc->getOrder(); const auto ¤tOutBlkDims = outBlockingDesc->getBlockDims(); @@ -2265,7 +2265,7 @@ void Eltwise::prepareParams() { size_t outRank = currentOutBlkDims.size(); for (size_t i = 0; i < inputNum; i++) { - auto inBlockingDesc = getParentEdgeAt(i)->getMemory().GetDescWithType(); + auto inBlockingDesc = getParentEdgeAt(i)->getMemory().getDescWithType(); currentInBlkDims[i] = inBlockingDesc->getBlockDims(); size_t inRank = currentInBlkDims[i].size(); @@ -2379,7 +2379,7 @@ void Eltwise::prepareParams() { bool Eltwise::needPrepareParams() const { for (size_t i = 0; i < getParentEdges().size(); i++) { - if (getParentEdgesAtPort(i)[0]->getMemory().GetDescWithType()->getBlockDims() != currentInBlkDims[i]) + if (getParentEdgesAtPort(i)[0]->getMemory().getDescWithType()->getBlockDims() != currentInBlkDims[i]) return true; } return false; @@ -2394,8 +2394,8 @@ void Eltwise::execute(dnnl::stream strm) { jit_eltwise_call_args_ptrs args_ptrs = {}; VectorDims dims_out = implType == EltwiseImplType::optimizedShapeAgnostic ? execParams.outDims : execPtr->getOutDims(); for (size_t i = 0; i < memPtrs.size() - 1; i++) - args_ptrs.src_ptr[i] = reinterpret_cast(memPtrs[i]->GetData()) + start_offset_in[i]; - args_ptrs.dst_ptr = reinterpret_cast(memPtrs.back()->GetData()) + start_offset_out; + args_ptrs.src_ptr[i] = reinterpret_cast(memPtrs[i]->getData()) + start_offset_in[i]; + args_ptrs.dst_ptr = reinterpret_cast(memPtrs.back()->getData()) + start_offset_out; args_ptrs.post_op_data = fqDataPtrs.data(); @@ -2471,10 +2471,8 @@ void Eltwise::fuseInto(NodePtr& parentNode) { void Eltwise::appendMemory(const std::vector &data, MemoryPtr &memPtr, std::vector& postOpsMem) { if (!memPtr) { - memPtr.reset(new Memory(getEngine())); DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, {data.size()}); - memPtr->Create(memoryDesc, data.data()); - + memPtr = std::make_shared(getEngine(), memoryDesc, data.data()); postOpsMem.push_back(memPtr); } } @@ -2754,7 +2752,7 @@ InferenceEngine::Precision Eltwise::getRuntimePrecision() const { for (size_t i = 0; i < getParentEdges().size(); i++) { auto parentEdge = getParentEdgeAt(i); if (parentEdge && parentEdge->getStatus() == Edge::Status::Validated && !parentEdge->getParent()->isConstant()) { - inputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->GetDataType()))); + inputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->getDataType()))); } } diff --git a/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.cpp b/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.cpp index 18f765ad195de0..3268c8b4f937ef 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.cpp +++ b/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.cpp @@ -81,11 +81,11 @@ void EmbeddingBagOffsetSum::prepareParams() { } void EmbeddingBagOffsetSum::initFromInputs() { - indicesData_ = reinterpret_cast(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->GetPtr()); - offsetsData_ = reinterpret_cast(getParentEdgeAt(OFFSETS_IDX)->getMemoryPtr()->GetPtr()); + indicesData_ = reinterpret_cast(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->getData()); + offsetsData_ = reinterpret_cast(getParentEdgeAt(OFFSETS_IDX)->getMemoryPtr()->getData()); if (getParentEdges().size() > DEFAULT_INDEX_IDX) { - defaultIndices_ = reinterpret_cast(getParentEdgeAt(DEFAULT_INDEX_IDX)->getMemoryPtr()->GetPtr()); + defaultIndices_ = reinterpret_cast(getParentEdgeAt(DEFAULT_INDEX_IDX)->getMemoryPtr()->getData()); } } @@ -131,10 +131,10 @@ bool EmbeddingBagOffsetSum::isExecutable() const { } void EmbeddingBagOffsetSum::execute(dnnl::stream strm) { - const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); const uint8_t* weightsData = nullptr; if (_withWeights) - weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr()); + weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->getData()); const auto &inputMem = getParentEdgeAt(0)->getMemory(); EmbeddingBagSum::execute(srcData, weightsData, inputMem.getDesc().getPrecision(), diff --git a/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.cpp b/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.cpp index 8fde7972a18148..9e9b3df94587ad 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.cpp +++ b/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.cpp @@ -75,7 +75,7 @@ void EmbeddingBagPackedSum::prepareParams() { } void EmbeddingBagPackedSum::initFromInputs() { - _indices = reinterpret_cast(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->GetPtr()); + _indices = reinterpret_cast(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->getData()); } void EmbeddingBagPackedSum::getIndices(size_t embIndex, const int*& indices, size_t& size, int& weightsIdx, bool& withWeight) { @@ -99,10 +99,10 @@ bool EmbeddingBagPackedSum::isExecutable() const { } void EmbeddingBagPackedSum::execute(dnnl::stream strm) { - const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); const uint8_t* weightsData = nullptr; if (_withWeights) - weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr()); + weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->getData()); const auto &inputMem = getParentEdgeAt(0)->getMemory(); EmbeddingBagSum::execute(srcData, weightsData, inputMem.getDesc().getPrecision(), diff --git a/src/plugins/intel_cpu/src/nodes/embedding_bag_sum.cpp b/src/plugins/intel_cpu/src/nodes/embedding_bag_sum.cpp index 13b19a45d1ad0a..9215e5b5ecc090 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_bag_sum.cpp +++ b/src/plugins/intel_cpu/src/nodes/embedding_bag_sum.cpp @@ -53,8 +53,8 @@ void EmbeddingBagSum::processData(const T* srcData, const T* weightsData, initFromInputs(); - const size_t outputBagsNum = outMemory->GetShape().getStaticDims()[0]; - auto *dstData = reinterpret_cast(outMemory->GetPtr()); + const size_t outputBagsNum = outMemory->getShape().getStaticDims()[0]; + auto *dstData = reinterpret_cast(outMemory->getData()); auto threadBody = [&](const int ithr, const int nthr) { size_t start(0lu), end(0lu); diff --git a/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.cpp b/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.cpp index e0fee5ee8b2c4c..91e41c68842579 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.cpp +++ b/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.cpp @@ -83,14 +83,14 @@ void EmbeddingSegmentsSum::prepareParams() { } void EmbeddingSegmentsSum::initFromInputs() { - indices_ = reinterpret_cast(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->GetPtr()); - indicesSize_ = getParentEdgeAt(INDICES_IDX)->getMemory().GetShape().getElementsCount(); + indices_ = reinterpret_cast(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->getData()); + indicesSize_ = getParentEdgeAt(INDICES_IDX)->getMemory().getShape().getElementsCount(); - segmentIds_ = reinterpret_cast(getParentEdgeAt(SEGMENT_ID_IDX)->getMemoryPtr()->GetPtr()); + segmentIds_ = reinterpret_cast(getParentEdgeAt(SEGMENT_ID_IDX)->getMemoryPtr()->getData()); lastNumSegments_ = getNumSegments(); if (getParentEdges().size() > DEFAULT_INDEX_IDX) { - defaultIndices_ = reinterpret_cast(getParentEdgeAt(DEFAULT_INDEX_IDX)->getMemoryPtr()->GetPtr()); + defaultIndices_ = reinterpret_cast(getParentEdgeAt(DEFAULT_INDEX_IDX)->getMemoryPtr()->getData()); } } @@ -123,7 +123,7 @@ void EmbeddingSegmentsSum::getIndices(size_t embIndex, const int*& indices, size } int32_t EmbeddingSegmentsSum::getNumSegments() const { - return reinterpret_cast(getParentEdgesAtPort(NUM_SEGMENTS_IDX)[0]->getMemory().GetPtr())[0]; + return reinterpret_cast(getParentEdgesAtPort(NUM_SEGMENTS_IDX)[0]->getMemory().getData())[0]; } bool EmbeddingSegmentsSum::needShapeInfer() const { @@ -147,10 +147,10 @@ bool EmbeddingSegmentsSum::isExecutable() const { } void EmbeddingSegmentsSum::execute(dnnl::stream strm) { - const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); const uint8_t* weightsData = nullptr; if (_withWeights) - weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr()); + weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->getData()); const auto &inputMem = getParentEdgeAt(0)->getMemory(); EmbeddingBagSum::execute(srcData, weightsData, inputMem.getDesc().getPrecision(), diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp index 77a86d2f6b1994..f6c3119456e88d 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp @@ -465,10 +465,10 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs &eltwiseAttrs, const std::vecto void AclEltwiseExecutor::exec(const std::vector &src, const std::vector &dst, const void *post_ops_data_) { for (size_t i = 0; i < src.size(); i++) { - srcTensors[i].allocator()->import_memory(src[i]->GetPtr()); + srcTensors[i].allocator()->import_memory(src[i]->getData()); } for (size_t i = 0; i < dst.size(); i++) { - dstTensors[i].allocator()->import_memory(dst[i]->GetPtr()); + dstTensors[i].allocator()->import_memory(dst[i]->getData()); } exec_func(); diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_interpolate.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_interpolate.cpp index 400c06981c7867..5b6beb0ec3fa59 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_interpolate.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_interpolate.cpp @@ -93,7 +93,7 @@ bool ov::intel_cpu::ACLInterpolateExecutor::init(const InterpolateAttrs &interpo void ov::intel_cpu::ACLInterpolateExecutor::exec(const std::vector& src, const std::vector& dst, const void *post_ops_data_) { auto in_ptr_ = padPreprocess(src, dst); srcTensor.allocator()->import_memory(const_cast(reinterpret_cast(in_ptr_))); - dstTensor.allocator()->import_memory(dst[0]->GetPtr()); + dstTensor.allocator()->import_memory(dst[0]->getData()); acl_scale->run(); diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn.cpp index 84ae23e31e98c8..18f9961c67966a 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn.cpp @@ -63,8 +63,8 @@ bool AclMVNExecutor::init(const MVNAttrs& mvnAttrs, } void AclMVNExecutor::exec(const std::vector& src, const std::vector& dst, const void *post_ops_data_) { - srcTensor.allocator()->import_memory(src[0]->GetPtr()); - dstTensor.allocator()->import_memory(dst[0]->GetPtr()); + srcTensor.allocator()->import_memory(src[0]->getData()); + dstTensor.allocator()->import_memory(dst[0]->getData()); mvn->run(); diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp index 4e152e3987abfc..c63fe50173918b 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp @@ -173,9 +173,9 @@ bool AclPoolingExecutor::init(const PoolingAttrs& poolingAttrs, } void AclPoolingExecutor::exec(const std::vector& src, const std::vector& dst, std::unordered_map postOpsArgs) { - srcTensor.allocator()->import_memory(src[0]->GetPtr()); - dstTensor.allocator()->import_memory(dst[0]->GetPtr()); - if (dst.size() > 1u) indTensor.allocator()->import_memory(dst[1]->GetPtr()); + srcTensor.allocator()->import_memory(src[0]->getData()); + dstTensor.allocator()->import_memory(dst[0]->getData()); + if (dst.size() > 1u) indTensor.allocator()->import_memory(dst[1]->getData()); exec_func(); diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.cpp index b0fa90831bb7f9..10d7641fcb1535 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.cpp @@ -92,8 +92,8 @@ bool AclReduceExecutor::init(const ReduceAttrs& reduceAttrs, } void AclReduceExecutor::exec(const std::vector& src, const std::vector& dst, const void *post_ops_data_) { - srcTensor.allocator()->import_memory(src[0]->GetPtr()); - dstTensor.allocator()->import_memory(dst[0]->GetPtr()); + srcTensor.allocator()->import_memory(src[0]->getData()); + dstTensor.allocator()->import_memory(dst[0]->getData()); exec_func(); diff --git a/src/plugins/intel_cpu/src/nodes/executors/interpolate.cpp b/src/plugins/intel_cpu/src/nodes/executors/interpolate.cpp index 09571941a409d1..01a8bffa205e77 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/interpolate.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/interpolate.cpp @@ -451,7 +451,7 @@ inline SizeVector getBlockND(const SizeVector& shape) { } const uint8_t* ov::intel_cpu::InterpolateExecutor::padPreprocess(const std::vector& src, const std::vector& dst) { - const uint8_t *src_data_origin = reinterpret_cast(src[0]->GetData()); + const uint8_t *src_data_origin = reinterpret_cast(src[0]->getData()); const auto &srcDim = src[0]->getStaticDims(); const auto &dstDim = dst[0]->getStaticDims(); diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.cpp index 8f420de0deef61..f44787e693176d 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.cpp @@ -277,14 +277,14 @@ void ExperimentalDetectronDetectionOutput::execute(dnnl::stream strm) { assert(classes_num_ == static_cast(getParentEdgeAt(INPUT_SCORES)->getMemory().getStaticDims()[1])); assert(4 * classes_num_ == static_cast(getParentEdgeAt(INPUT_DELTAS)->getMemory().getStaticDims()[1])); - const auto* boxes = reinterpret_cast(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->GetPtr()); - const auto* deltas = reinterpret_cast(getParentEdgeAt(INPUT_DELTAS)->getMemoryPtr()->GetPtr()); - const auto* scores = reinterpret_cast(getParentEdgeAt(INPUT_SCORES)->getMemoryPtr()->GetPtr()); - const auto* im_info = reinterpret_cast(getParentEdgeAt(INPUT_IM_INFO)->getMemoryPtr()->GetPtr()); - - auto* output_boxes = reinterpret_cast(getChildEdgesAtPort(OUTPUT_BOXES)[0]->getMemoryPtr()->GetPtr()); - auto* output_scores = reinterpret_cast(getChildEdgesAtPort(OUTPUT_SCORES)[0]->getMemoryPtr()->GetPtr()); - auto* output_classes = reinterpret_cast(getChildEdgesAtPort(OUTPUT_CLASSES)[0]->getMemoryPtr()->GetPtr()); + const auto* boxes = reinterpret_cast(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->getData()); + const auto* deltas = reinterpret_cast(getParentEdgeAt(INPUT_DELTAS)->getMemoryPtr()->getData()); + const auto* scores = reinterpret_cast(getParentEdgeAt(INPUT_SCORES)->getMemoryPtr()->getData()); + const auto* im_info = reinterpret_cast(getParentEdgeAt(INPUT_IM_INFO)->getMemoryPtr()->getData()); + + auto* output_boxes = reinterpret_cast(getChildEdgesAtPort(OUTPUT_BOXES)[0]->getMemoryPtr()->getData()); + auto* output_scores = reinterpret_cast(getChildEdgesAtPort(OUTPUT_SCORES)[0]->getMemoryPtr()->getData()); + auto* output_classes = reinterpret_cast(getChildEdgesAtPort(OUTPUT_CLASSES)[0]->getMemoryPtr()->getData()); const float img_H = im_info[0]; const float img_W = im_info[1]; diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.cpp index bad80a534ba2c8..8caef748256957 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.cpp @@ -349,13 +349,13 @@ void ExperimentalDetectronGenerateProposalsSingleImage::execute(dnnl::stream str IE_THROW() << "'Deltas' blob size for ONNXProposal is incompatible with 'scores' blob size!"; // Prepare memory - const float *p_deltas_item = reinterpret_cast(getParentEdgeAt(INPUT_DELTAS)->getMemoryPtr()->GetPtr()); - const float *p_scores_item = reinterpret_cast(getParentEdgeAt(INPUT_SCORES)->getMemoryPtr()->GetPtr()); - const float *p_anchors_item = reinterpret_cast(getParentEdgeAt(INPUT_ANCHORS)->getMemoryPtr()->GetPtr()); - const float *p_img_info_cpu = reinterpret_cast(getParentEdgeAt(INPUT_IM_INFO)->getMemoryPtr()->GetPtr()); + const float *p_deltas_item = reinterpret_cast(getParentEdgeAt(INPUT_DELTAS)->getMemoryPtr()->getData()); + const float *p_scores_item = reinterpret_cast(getParentEdgeAt(INPUT_SCORES)->getMemoryPtr()->getData()); + const float *p_anchors_item = reinterpret_cast(getParentEdgeAt(INPUT_ANCHORS)->getMemoryPtr()->getData()); + const float *p_img_info_cpu = reinterpret_cast(getParentEdgeAt(INPUT_IM_INFO)->getMemoryPtr()->getData()); - float *p_roi_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetPtr()); - float *p_roi_score_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_SCORES)[0]->getMemoryPtr()->GetPtr()); + float *p_roi_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->getData()); + float *p_roi_score_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_SCORES)[0]->getMemoryPtr()->getData()); const int anchors_num = scoreDims[0]; diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.cpp index a0a3ecde0a92c8..09a94c1d7bd265 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.cpp @@ -70,8 +70,8 @@ void ExperimentalDetectronPriorGridGenerator::execute(dnnl::stream strm) { const float step_w = stride_w_ ? stride_w_ : static_cast(getParentEdgeAt(INPUT_IMAGE)->getMemory().getStaticDims()[3]) / layer_width; const float step_h = stride_h_ ? stride_h_ : static_cast(getParentEdgeAt(INPUT_IMAGE)->getMemory().getStaticDims()[2]) / layer_height; - const auto *bottom_data_0 = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - auto *top_data_0 = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetPtr()); + const auto *bottom_data_0 = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); + auto *top_data_0 = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->getData()); for (int h = 0; h < layer_height; ++h) { for (int w = 0; w < layer_width; ++w) { diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.cpp index 8f6d67d30f3b93..f5b1d5275c17ab 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.cpp @@ -335,11 +335,11 @@ void ExperimentalDetectronROIFeatureExtractor::execute(dnnl::stream strm) { const int channels_num = getParentEdgeAt(INPUT_FEATURES_START)->getMemory().getStaticDims()[1]; const int feaxels_per_roi = pooled_height_ * pooled_width_ * channels_num; - auto *input_rois = reinterpret_cast(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->GetPtr()); - auto *output_rois_features = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROI_FEATURES)[0]->getMemoryPtr()->GetPtr()); + auto *input_rois = reinterpret_cast(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->getData()); + auto *output_rois_features = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROI_FEATURES)[0]->getMemoryPtr()->getData()); float *output_rois = nullptr; if (OUTPUT_ROIS < outputShapes.size()) { - output_rois = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetPtr()); + output_rois = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->getData()); } std::vector level_ids(num_rois, 0); @@ -357,7 +357,7 @@ void ExperimentalDetectronROIFeatureExtractor::execute(dnnl::stream strm) { const int level_rois_offset = rois_per_level[i]; const int level_rois_num = rois_per_level[i + 1] - level_rois_offset; if (level_rois_num > 0) { - auto *featuremap = reinterpret_cast(getParentEdgeAt(INPUT_FEATURES_START + i)->getMemoryPtr()->GetPtr()); + auto *featuremap = reinterpret_cast(getParentEdgeAt(INPUT_FEATURES_START + i)->getMemoryPtr()->getData()); const int featuremap_height = getParentEdgeAt(INPUT_FEATURES_START + i)->getMemory().getStaticDims()[2]; const int featuremap_width = getParentEdgeAt(INPUT_FEATURES_START + i)->getMemory().getStaticDims()[3]; ROIAlignForward_cpu_kernel(feaxels_per_roi * level_rois_num, diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.cpp index 45cb3ad579119e..194839a6570447 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.cpp @@ -67,9 +67,9 @@ void ExperimentalDetectronTopKROIs::execute(dnnl::stream strm) { const int input_rois_num = getParentEdgeAt(INPUT_ROIS)->getMemory().getStaticDims()[0]; const int top_rois_num = (std::min)(max_rois_num_, input_rois_num); - auto *input_rois = reinterpret_cast(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->GetPtr()); - auto *input_probs = reinterpret_cast(getParentEdgeAt(INPUT_PROBS)->getMemoryPtr()->GetPtr()); - auto *output_rois = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetPtr()); + auto *input_rois = reinterpret_cast(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->getData()); + auto *input_probs = reinterpret_cast(getParentEdgeAt(INPUT_PROBS)->getMemoryPtr()->getData()); + auto *output_rois = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->getData()); std::vector idx(input_rois_num); iota(idx.begin(), idx.end(), 0); diff --git a/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp b/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp index 6550d55b4d85a3..e2ec372dce6eb5 100644 --- a/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp +++ b/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp @@ -419,10 +419,10 @@ void ExtractImagePatches::initSupportedPrimitiveDescriptors() { void ExtractImagePatches::execute(dnnl::stream strm) { if (execPtr) { - auto src = getParentEdgeAt(0)->getMemoryPtr()->GetPtr(); - auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr(); - const auto inStrides = getParentEdgeAt(0)->getMemory().GetDescWithType()->getStrides(); - const auto outStrides = getChildEdgesAtPort(0)[0]->getMemory().GetDescWithType()->getStrides(); + auto src = getParentEdgeAt(0)->getMemoryPtr()->getData(); + auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->getData(); + const auto inStrides = getParentEdgeAt(0)->getMemory().getDescWithType()->getStrides(); + const auto outStrides = getChildEdgesAtPort(0)[0]->getMemory().getDescWithType()->getStrides(); execPtr->exec(src, dst, inStrides, outStrides); } else { IE_THROW() << "Can't execute extract image patches node. Primitive wasn't created"; diff --git a/src/plugins/intel_cpu/src/nodes/eye.cpp b/src/plugins/intel_cpu/src/nodes/eye.cpp index 1332942655ebd4..747e89bdc1ed11 100644 --- a/src/plugins/intel_cpu/src/nodes/eye.cpp +++ b/src/plugins/intel_cpu/src/nodes/eye.cpp @@ -108,7 +108,7 @@ void Eye::executeSpecified() { auto outPtr = getChildEdgeAt(0)->getMemoryPtr(); if (!outPtr || !outPtr ->isAllocated()) THROW_ERROR << errorPrefix << "Destination memory didn't allocate."; - T *dst = reinterpret_cast(outPtr->GetPtr()); + T *dst = reinterpret_cast(outPtr->getData()); const size_t batchVolume = getBatchVolume(getBatchShape()); const size_t spatialCount = colNum * rowNum; diff --git a/src/plugins/intel_cpu/src/nodes/eye.h b/src/plugins/intel_cpu/src/nodes/eye.h index 0bf22bf8a1a03d..a6139c3f4b901c 100644 --- a/src/plugins/intel_cpu/src/nodes/eye.h +++ b/src/plugins/intel_cpu/src/nodes/eye.h @@ -45,7 +45,7 @@ class Eye : public Node { auto rowMem = getParentEdgeAt(ROWS_NUM)->getMemoryPtr(); if (rowMem == nullptr) IE_THROW() << errorPrefix << " doesn't contain row_count data"; - const int *rowPtr = reinterpret_cast(rowMem->GetPtr()); + const int *rowPtr = reinterpret_cast(rowMem->getData()); return rowPtr[0]; } @@ -53,7 +53,7 @@ class Eye : public Node { auto colMem = getParentEdgeAt(COLS_NUM)->getMemoryPtr(); if (colMem == nullptr) IE_THROW() << errorPrefix << " doesn't contain col_count data"; - const int *colPtr = reinterpret_cast(colMem->GetPtr()); + const int *colPtr = reinterpret_cast(colMem->getData()); return colPtr[0]; } @@ -61,15 +61,15 @@ class Eye : public Node { auto diagIndMem = getParentEdgeAt(DIAGONAL_INDEX)->getMemoryPtr(); if (diagIndMem == nullptr) IE_THROW() << errorPrefix << " doesn't contain diag_index data"; - const int *diagIndexPtr = reinterpret_cast(diagIndMem->GetPtr()); + const int *diagIndexPtr = reinterpret_cast(diagIndMem->getData()); return diagIndexPtr[0]; } inline const std::vector getBatchShape() const { if (withBatchShape) { - const int batchShapeSize = static_cast(getParentEdgeAt(BATCH_SHAPE)->getMemoryPtr()->GetShape().getElementsCount()); + const int batchShapeSize = static_cast(getParentEdgeAt(BATCH_SHAPE)->getMemoryPtr()->getShape().getElementsCount()); std::vector batchShape(batchShapeSize); - const int *batchShapePtr = reinterpret_cast(getParentEdgeAt(BATCH_SHAPE)->getMemoryPtr()->GetPtr()); + const int *batchShapePtr = reinterpret_cast(getParentEdgeAt(BATCH_SHAPE)->getMemoryPtr()->getData()); batchShape.assign(batchShapePtr, batchShapePtr + batchShapeSize); return batchShape; } else { diff --git a/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp b/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp index 37fee1e7ff30f8..283a6555be3fef 100644 --- a/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp +++ b/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp @@ -1391,7 +1391,7 @@ bool FakeQuantize::needPrepareParams() const { void FakeQuantize::prepareParams() { if (isBinarization()) { - const size_t axisSize = getParentEdgeAt(0)->getMemory().GetShape().getStaticDims()[getAxis()]; + const size_t axisSize = getParentEdgeAt(0)->getMemory().getShape().getStaticDims()[getAxis()]; const size_t newPaddedSize = rnd_up(axisSize, 16); IE_ASSERT(newPaddedSize != 0); @@ -1415,8 +1415,7 @@ void FakeQuantize::prepareParams() { } if (internalBlobMemory.empty() || needUpdThr) { - auto binarizationThresholdsDataMem = std::make_shared(getEngine()); - binarizationThresholdsDataMem->Create(weightsDataDesc, getBinarizationTresholdsPtr()); + auto binarizationThresholdsDataMem = std::make_shared(getEngine(), weightsDataDesc, getBinarizationTresholdsPtr()); if (internalBlobMemory.empty()) { internalBlobMemory.push_back(binarizationThresholdsDataMem); } else { @@ -1425,8 +1424,7 @@ void FakeQuantize::prepareParams() { } if (internalBlobMemory.size() == (numBinFqIntBlob - 1) || needUpdMask) { - auto binarizationMaskDataMem = std::make_shared(getEngine()); - binarizationMaskDataMem->Create(weightsDataDesc, getBinarizationOutputMaskPtr()); + auto binarizationMaskDataMem = std::make_shared(getEngine(), weightsDataDesc, getBinarizationOutputMaskPtr()); if (internalBlobMemory.size() == (numBinFqIntBlob - 1)) { internalBlobMemory.push_back(binarizationMaskDataMem); } else { @@ -1486,16 +1484,16 @@ void FakeQuantize::createPrimitive() { } void FakeQuantize::executeReference() { - auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr(); - auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemory = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemory = getChildEdgeAt(0)->getMemoryPtr(); - auto src = reinterpret_cast(srcMemory->GetPtr()); + auto src = reinterpret_cast(srcMemory->getData()); auto srcDims = srcMemory->getStaticDims(); auto dstDims = dstMemory->getStaticDims(); - auto s_str = srcMemory->GetDescWithType()->getStrides(); - auto d_str = dstMemory->GetDescWithType()->getStrides(); + auto s_str = srcMemory->getDescWithType()->getStrides(); + auto d_str = dstMemory->getDescWithType()->getStrides(); const int N = srcDims[0]; const int C = srcDims.size() > 1 ? srcDims[1] : 1; @@ -1516,13 +1514,13 @@ void FakeQuantize::executeReference() { } d_str[1] = tmp; - auto dst = reinterpret_cast(dstMemory->GetPtr()); + auto dst = reinterpret_cast(dstMemory->getData()); const int nbits = 8; const int CB = impl::utils::div_up(C, nbits); - auto thresholds = reinterpret_cast(internalBlobMemory[0]->GetData()); - auto output_mask = reinterpret_cast(internalBlobMemory[1]->GetData()); + auto thresholds = reinterpret_cast(internalBlobMemory[0]->getData()); + auto output_mask = reinterpret_cast(internalBlobMemory[1]->getData()); parallel_nd(N, CB, D, H, W, [&](dim_t n, dim_t cb, dim_t d, dim_t h, dim_t w) { uint8_t bin_val = 0x00; @@ -1552,7 +1550,7 @@ void FakeQuantize::executeReference() { dst[dst_off / nbits] = bin_val; }); } else { - auto dst = reinterpret_cast(dstMemory->GetPtr()); + auto dst = reinterpret_cast(dstMemory->getData()); parallel_nd(N, C, D, H, W, [&](dim_t n, dim_t c, dim_t d, dim_t h, dim_t w) { size_t src_off = srcDims.size() == 5 ? @@ -1596,18 +1594,18 @@ void FakeQuantize::executeReference() { } void FakeQuantize::executeBinarization(const std::unique_ptr &pKernel) const { #if defined(OPENVINO_ARCH_X86_64) - const auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr(); - auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemory = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemory = getChildEdgeAt(0)->getMemoryPtr(); - auto src = reinterpret_cast(srcMemory->GetPtr()); - auto dst = reinterpret_cast(dstMemory->GetPtr()); + auto src = reinterpret_cast(srcMemory->getData()); + auto dst = reinterpret_cast(dstMemory->getData()); - auto thresholds = reinterpret_cast(internalBlobMemory[0]->GetData()); - auto output_mask = reinterpret_cast(internalBlobMemory[1]->GetData()); + auto thresholds = reinterpret_cast(internalBlobMemory[0]->getData()); + auto output_mask = reinterpret_cast(internalBlobMemory[1]->getData()); auto src_dims = srcMemory->getStaticDims(); - auto srcMemDesc = srcMemory->GetDescWithType(); + auto srcMemDesc = srcMemory->getDescWithType(); std::vector s_str = srcMemDesc->getStrides(); size_t tmp = s_str[s_str.size() - 1]; for (int i = s_str.size() - 1; i > 1; i--) { @@ -1638,11 +1636,11 @@ void FakeQuantize::executeBinarization(const std::unique_ptr &pKernel) const { #if defined(OPENVINO_ARCH_X86_64) - auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr(); - auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemory = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemory = getChildEdgeAt(0)->getMemoryPtr(); - auto src = reinterpret_cast(srcMemory->GetPtr()); - auto dst = reinterpret_cast(dstMemory->GetPtr()); + auto src = reinterpret_cast(srcMemory->getData()); + auto dst = reinterpret_cast(dstMemory->getData()); auto& srcDesc = srcMemory->getDesc(); auto srcDims = srcDesc.getShape().getStaticDims(); @@ -1655,7 +1653,7 @@ void FakeQuantize::executeQuantization(const std::unique_ptrGetDescWithType(); + auto srcMemDesc = srcMemory->getDescWithType(); auto s_str = srcMemDesc->getStrides(); if (is_blk_format) { @@ -1838,9 +1836,8 @@ void FakeQuantize::initializePostOpDataLegacy(const VectorDims &dims, const size void FakeQuantize::appendMemory(const size_t dataSize, const void *data, MemoryPtr &memPtr, std::vector& postOpsMem) { if (!memPtr) { - memPtr.reset(new Memory(getEngine())); DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, {dataSize}); - memPtr->Create(memoryDesc, data); + memPtr = std::make_shared(getEngine(), memoryDesc, data); postOpsMem.push_back(memPtr); } diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index d6b28ccddb414a..c194502864eab7 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -312,11 +312,11 @@ void FullyConnected::prepareParams() { DnnlMemoryDescPtr weightDesc = MemoryDescUtils::convertToDnnlMemoryDesc(weightDescIP); DnnlMemoryDescCPtr biasDesc = nullptr; if (biasMemPtr) { - biasDesc = biasMemPtr->GetDescWithType(); + biasDesc = biasMemPtr->getDescWithType(); } - DnnlMemoryDescCPtr inDesc = srcMemPtr->GetDescWithType(); - DnnlMemoryDescCPtr outDesc = dstMemPtr->GetDescWithType(); + DnnlMemoryDescCPtr inDesc = srcMemPtr->getDescWithType(); + DnnlMemoryDescCPtr outDesc = dstMemPtr->getDescWithType(); useConv1x1 = canBeExecutedInConv1x1(); FCKey key = {inDesc, @@ -398,19 +398,19 @@ void FullyConnected::prepareParams() { if (execPtr) { if (execPtr->getSrcDesc()->isCompatible(*inDesc)) { - primArgs[DNNL_ARG_SRC] = srcMemPtr->GetPrimitive(); + primArgs[DNNL_ARG_SRC] = srcMemPtr->getPrimitive(); } else { - primArgs[DNNL_ARG_SRC] = dnnl::memory(execPtr->getDnnlSrcDesc(), engine, srcMemPtr->GetData()); + primArgs[DNNL_ARG_SRC] = dnnl::memory(execPtr->getDnnlSrcDesc(), engine, srcMemPtr->getData()); } if (execPtr->getDstDesc()->isCompatible(*outDesc)) { - primArgs[DNNL_ARG_DST] = dstMemPtr->GetPrimitive(); + primArgs[DNNL_ARG_DST] = dstMemPtr->getPrimitive(); } else { - primArgs[DNNL_ARG_DST] = dnnl::memory(execPtr->getDnnlDstDesc(), engine, dstMemPtr->GetData()); + primArgs[DNNL_ARG_DST] = dnnl::memory(execPtr->getDnnlDstDesc(), engine, dstMemPtr->getData()); } if (!prevExecPtr || !execPtr->getWeightDesc()->isCompatible(*(prevExecPtr->getWeightDesc()))) { - primArgs[DNNL_ARG_WEIGHTS] = prepareWeightMemory(execPtr->getWeightDesc())->GetPrimitive(); + primArgs[DNNL_ARG_WEIGHTS] = prepareWeightMemory(execPtr->getWeightDesc())->getPrimitive(); } // changed shapes may also cause the kernel type changed selected_pd->setImplementationType(execPtr->getImplementationType()); @@ -422,11 +422,11 @@ void FullyConnected::prepareParams() { useConv1x1 = execPtr->getImplementationType() == brgconv_avx512_1x1; if (withBiases) { - primArgs[DNNL_ARG_BIAS] = biasMemPtr->GetPrimitive(); + primArgs[DNNL_ARG_BIAS] = biasMemPtr->getPrimitive(); } auto schratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); - primArgs[DNNL_ARG_SCRATCHPAD] = schratchpadMem->GetPrimitive(); + primArgs[DNNL_ARG_SCRATCHPAD] = schratchpadMem->getPrimitive(); #ifdef CPU_DEBUG_CAPS if (result.second == CacheEntryBase::LookUpStatus::Miss) { auto pd = execPtr->getPrimitiveDesc(); @@ -449,10 +449,10 @@ void FullyConnected::execute(dnnl::stream strm) { auto param = primArgs.find(argType); if (param != primArgs.end()) { if (argType == DNNL_ARG_SRC && (getInputShapeAtPort(DATA_ID).getRank() == 3 || useConv1x1)) { - primArgs.at(argType).set_data_handle(getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetData()); + primArgs.at(argType).set_data_handle(getParentEdgesAtPort(0)[0]->getMemoryPtr()->getData()); } if (argType == DNNL_ARG_DST && (getOutputShapeAtPort(0).getRank() == 3 || useConv1x1)) { - primArgs.at(argType).set_data_handle(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetData()); + primArgs.at(argType).set_data_handle(getChildEdgesAtPort(0)[0]->getMemoryPtr()->getData()); } } }; @@ -744,7 +744,7 @@ InferenceEngine::Precision FullyConnected::getRuntimePrecision() const { for (size_t i = 0; i < std::min(getParentEdges().size(), inputsNumLimit); i++) { auto parentEdge = getParentEdgeAt(i); if (parentEdge && parentEdge->getStatus() == Edge::Status::Validated) { - inputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->GetDataType()))); + inputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->getDataType()))); } } @@ -840,7 +840,7 @@ bool FullyConnected::canBeExecutedInConv1x1() const { getOriginalInputPrecisionAtPort(DATA_ID) == InferenceEngine::Precision::FP32 && one_of(inRank, 2u, 3u) && weightRank == 2) { auto dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr(); - DnnlMemoryDescCPtr outDesc = dstMemPtr->GetDescWithType(); + DnnlMemoryDescCPtr outDesc = dstMemPtr->getDescWithType(); // brg convolution does not support stride dnnl::impl::memory_desc_wrapper wrapped(outDesc->getDnnlDesc().get()); if (wrapped.offset0() == 0) @@ -905,8 +905,8 @@ bool FullyConnected::useSparseWeightsDecompression() { if (blb == nullptr) IE_THROW() << "Cannot get const blob for node " << getName() << "."; - auto weightsData = reinterpret_cast(blb->GetPtr()); - auto elementsCount = blb->GetDescWithType()->getPaddedElementsCount(); + auto weightsData = reinterpret_cast(blb->getData()); + auto elementsCount = blb->getDescWithType()->getPaddedElementsCount(); size_t zerosCounts = 0; for (size_t i = 0; i < elementsCount; i++) { if (weightsData[i] == 0) { diff --git a/src/plugins/intel_cpu/src/nodes/gather.cpp b/src/plugins/intel_cpu/src/nodes/gather.cpp index 16982f80ab065a..06314ca17c6f5e 100644 --- a/src/plugins/intel_cpu/src/nodes/gather.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather.cpp @@ -12,6 +12,7 @@ #include #include "kernels/x64/gather_uni_kernel.hpp" #include "utils/shape_inference/shape_inference_cpu.hpp" +#include using namespace InferenceEngine; using namespace dnnl::impl::cpu; @@ -61,7 +62,7 @@ class GatherShapeInfer : public ShapeInferEmptyPads { IE_THROW() << "Unsupported precision " << data_dependency.at(GATHER_AXIS)->getDesc().getPrecision() << " for axis tensor."; } - m_axis = reinterpret_cast(data_dependency.at(GATHER_AXIS)->GetPtr())[0]; + m_axis = reinterpret_cast(data_dependency.at(GATHER_AXIS)->getData())[0]; } if (m_axis < 0) @@ -162,6 +163,10 @@ Gather::Gather(const std::shared_ptr& op, const GraphContext::CPtr con if (axis < 0 || axis >= dataSrcRank || batchDims > axis) THROW_ERROR << "has incorrect input parameter axis value: " << axis; } + + if (auto indices = ov::as_type(op->get_input_node_ptr(GATHER_INDICES))) { + constIndices = indices->cast_vector(); + } } void Gather::initSupportedPrimitiveDescriptors() { @@ -201,9 +206,28 @@ void Gather::initSupportedPrimitiveDescriptors() { {LayoutType::ncsp, Precision::I32, isAxisInputConst}}, {{LayoutType::ncsp, dataPrecision}}, ref_any); + + // Let's check for the special inPlace memory use case + // in place only makes sense when we split by dense blocks since strided tensors are not supported by most nodes + + const auto& parentdDims = inputShapes[0].getDims(); + if (isAxisInputConst && + 0 == batchDims && + 1 == constIndices.size() && + parentdDims[axis] != Shape::UNDEFINED_DIM && + std::all_of(parentdDims.begin(), parentdDims.begin() + axis, [](size_t dim) { return dim == 1; })) { + addSupportedPrimDesc({{LayoutType::ncsp, dataPrecision}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32, isAxisInputConst}}, + {{LayoutType::ncsp, dataPrecision, false, GATHER_DATA}}, + unknown); + } } void Gather::createPrimitive() { + if (isInPlace()) { + return; + } #if defined(OPENVINO_ARCH_X86_64) uint64_t idxElPerVec = 1; if (!isDynamicNode()) { @@ -274,24 +298,27 @@ void Gather::createPrimitive() { } bool Gather::needPrepareParams() const { + if (isInPlace()) { + return false; + } bool result = inputShapesModified(); if (!isAxisInputConst) - result = result || axis != (reinterpret_cast(getParentEdgeAt(GATHER_AXIS)->getMemoryPtr()->GetPtr()))[0]; + result = result || axis != (reinterpret_cast(getParentEdgeAt(GATHER_AXIS)->getMemoryPtr()->getData()))[0]; return result; } void Gather::prepareParams() { - auto& dataMemPtr = getParentEdgeAt(GATHER_DATA)->getMemoryPtr(); + auto dataMemPtr = getParentEdgeAt(GATHER_DATA)->getMemoryPtr(); if (!dataMemPtr || !dataMemPtr->isAllocated()) THROW_ERROR << " has not allocated input data memory."; - auto& idxMemPtr = getParentEdgeAt(GATHER_INDICES)->getMemoryPtr(); + auto idxMemPtr = getParentEdgeAt(GATHER_INDICES)->getMemoryPtr(); if (!idxMemPtr || !idxMemPtr->isAllocated()) THROW_ERROR << " has not allocated input indices memory."; if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR << " has unidentified preferable primitive descriptor."; if (!isAxisInputConst) { - axis = (reinterpret_cast(getParentEdgeAt(GATHER_AXIS)->getMemoryPtr()->GetPtr()))[0]; + axis = (reinterpret_cast(getParentEdgeAt(GATHER_AXIS)->getMemoryPtr()->getData()))[0]; if (axis < 0) axis += dataSrcRank; if (axis < 0 || axis >= dataSrcRank || batchDims > axis) @@ -338,9 +365,9 @@ void Gather::prepareParams() { void Gather::execute(dnnl::stream strm) { #if defined(OPENVINO_ARCH_X86_64) if (jitKernel && jitKernel->isSupportedConfiguration(afterAxisSize)) { - const void* srcIndices = getParentEdgeAt(GATHER_INDICES)->getMemoryPtr()->GetPtr(); - const void* srcData = getParentEdgeAt(GATHER_DATA)->getMemoryPtr()->GetPtr(); - uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const void* srcIndices = getParentEdgeAt(GATHER_INDICES)->getMemoryPtr()->getData(); + const void* srcData = getParentEdgeAt(GATHER_DATA)->getMemoryPtr()->getData(); + uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->getData()); const uint64_t dataElPerVec = jitKernel->getDataElPerVec(); @@ -394,9 +421,9 @@ void Gather::execute(dnnl::stream strm) { void Gather::executeDynamicImpl(dnnl::stream strm) { #if defined(OPENVINO_ARCH_X86_64) if (jitKernel && jitKernel->isSupportedConfiguration(afterAxisSize)) { - const void* srcIndices = getParentEdgeAt(GATHER_INDICES)->getMemoryPtr()->GetPtr(); - const void* srcData = getParentEdgeAt(GATHER_DATA)->getMemoryPtr()->GetPtr(); - uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const void* srcIndices = getParentEdgeAt(GATHER_INDICES)->getMemoryPtr()->getData(); + const void* srcData = getParentEdgeAt(GATHER_DATA)->getMemoryPtr()->getData(); + uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->getData()); const uint64_t dataElPerVec = jitKernel->getDataElPerVec(); @@ -521,9 +548,9 @@ void Gather::initShortParams(threadExecParams& p, const uint64_t start) { } void Gather::execReference() { - const int32_t* srcIndices = reinterpret_cast(getParentEdgeAt(GATHER_INDICES)->getMemoryPtr()->GetPtr()); - const uint8_t* srcData = reinterpret_cast(getParentEdgeAt(GATHER_DATA)->getMemoryPtr()->GetPtr()); - uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const int32_t* srcIndices = reinterpret_cast(getParentEdgeAt(GATHER_INDICES)->getMemoryPtr()->getData()); + const uint8_t* srcData = reinterpret_cast(getParentEdgeAt(GATHER_DATA)->getMemoryPtr()->getData()); + uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->getData()); const size_t dstAfterBatchSize = betweenBatchAndAxisSize * specIdxAndAfterAxSizeB; parallel_for2d(beforeBatchSize, specIndicesSize, [&](const size_t b, const size_t j) { @@ -556,6 +583,40 @@ bool Gather::created() const { return getType() == Type::Gather; } +bool Gather::isExecutable() const { + return !isInPlace() && Node::isExecutable(); +} + +void Gather::resolveInPlaceEdges(Edge::LOOK look) { + if (!(look & Edge::LOOK_UP) || !isInPlace()) { + Node::resolveInPlaceEdges(look); + return; + } + + auto selected_pd = getSelectedPrimitiveDescriptor(); + if (selected_pd == nullptr) + IE_THROW() << "Preferable primitive descriptor is not set."; + constexpr size_t outputPort = 0; + + auto& config = selected_pd->getConfig(); + size_t inplaceInpIndx = selected_pd->getConfig().outConfs[outputPort].inPlace(); + auto baseDim = inputShapes.front().getDims()[axis]; + IE_ASSERT(baseDim != Shape::UNDEFINED_DIM) << "Gather node: " << getName() << " can not use inPlace memory with splitting on dynamic dimention"; + auto baseMemMngr = getParentEdgesAtPort(inplaceInpIndx).front()->getMemory().getMemoryMngr(); + auto index = constIndices.at(0); + ptrdiff_t offset = index < 0 ? baseDim + index : index; + const auto& childEdges = getChildEdgesAtPort(outputPort); + for (auto& childEdge : childEdges) { + IE_ASSERT(childEdge->getStatus() == Edge::Status::NotAllocated) << " Unexpected edge status in node: " << + getName() << " with type " << getTypeStr(); + + auto memMngr = std::make_shared(baseMemMngr, baseDim, offset); + auto newMem = std::make_shared(getEngine(), config.outConfs[outputPort].getMemDesc(), memMngr); + + childEdge->reuse(newMem); + } +} + } // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/gather.h b/src/plugins/intel_cpu/src/nodes/gather.h index d89c94f437f7c4..f03a08832a66f5 100644 --- a/src/plugins/intel_cpu/src/nodes/gather.h +++ b/src/plugins/intel_cpu/src/nodes/gather.h @@ -24,6 +24,8 @@ class Gather : public Node { void createPrimitive() override; void execute(dnnl::stream strm) override; bool created() const override; + bool isExecutable() const override; + void resolveInPlaceEdges(Edge::LOOK look) override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; @@ -79,6 +81,7 @@ class Gather : public Node { uint64_t totalWork = 0lu; std::vector execParamsPerThread; + std::vector constIndices; static constexpr size_t GATHER_DATA = 0; static constexpr size_t GATHER_INDICES = 1; diff --git a/src/plugins/intel_cpu/src/nodes/gather_elements.cpp b/src/plugins/intel_cpu/src/nodes/gather_elements.cpp index 143bf5352386ef..14128bac6be73f 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_elements.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather_elements.cpp @@ -103,11 +103,11 @@ void GatherElements::executeDynamicImpl(dnnl::stream strm) { template void GatherElements::directExecution() { - const auto *srcData = reinterpret_cast(getParentEdgeAt(dataIndex_)->getMemoryPtr()->GetPtr()); - const auto *indices = reinterpret_cast(getParentEdgeAt(indicesIndex_)->getMemoryPtr()->GetPtr()); - auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *srcData = reinterpret_cast(getParentEdgeAt(dataIndex_)->getMemoryPtr()->getData()); + const auto *indices = reinterpret_cast(getParentEdgeAt(indicesIndex_)->getMemoryPtr()->getData()); + auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->getData()); - const int outSize = getChildEdgesAtPort(0)[0]->getMemory().GetShape().getElementsCount(); + const int outSize = getChildEdgesAtPort(0)[0]->getMemory().getShape().getElementsCount(); auto threadBody = [&](const int ithr, const int nthr) { int start(0lu), end(0lu); splitter(outSize, nthr, ithr, start, end); diff --git a/src/plugins/intel_cpu/src/nodes/gather_nd.cpp b/src/plugins/intel_cpu/src/nodes/gather_nd.cpp index 5310bb3030870a..c029869faec4bb 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_nd.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather_nd.cpp @@ -84,9 +84,9 @@ void GatherND::initSupportedPrimitiveDescriptors() { } void GatherND::prepareParams() { - auto& srcMemPtr = getParentEdgeAt(GATHERND_DATA)->getMemoryPtr(); - auto& idxMemPtr = getParentEdgeAt(GATHERND_INDEXES)->getMemoryPtr(); - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(GATHERND_DATA)->getMemoryPtr(); + auto idxMemPtr = getParentEdgeAt(GATHERND_INDEXES)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); if (!srcMemPtr || !srcMemPtr->isAllocated()) THROW_ERROR << " has not allocated input memory of 'data'."; if (!idxMemPtr || !idxMemPtr->isAllocated()) @@ -97,8 +97,8 @@ void GatherND::prepareParams() { THROW_ERROR << " has unidentified preferable primitive descriptor."; attrs.srcDims = srcMemPtr->getStaticDims(); - attrs.srcStrides = srcMemPtr->GetDescWithType()->getStrides(); - attrs.dstElementCount = dstMemPtr->GetShape().getElementsCount(); + attrs.srcStrides = srcMemPtr->getDescWithType()->getStrides(); + attrs.dstElementCount = dstMemPtr->getShape().getElementsCount(); attrs.sliceRank = idxMemPtr->getStaticDims().back(); execPtr = std::make_shared(attrs); } @@ -136,7 +136,7 @@ void GatherND::execute(dnnl::stream strm) { getChildEdgeAt(0)->getMemoryPtr()); } -void GatherND::GatherNDExecutor::exec(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, MemoryPtr& dstMemPtr) { +void GatherND::GatherNDExecutor::exec(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, const MemoryPtr& dstMemPtr) { if (dataLength > 1) { gatherBlocks(srcMemPtr, idxMemPtr, dstMemPtr); return; @@ -149,10 +149,10 @@ void GatherND::GatherNDExecutor::exec(const MemoryPtr& srcMemPtr, const MemoryPt OV_CASE(sizeof(PrecisionTrait::value_type), PrecisionTrait::value_type)); } -void GatherND::GatherNDExecutor::gatherBlocks(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, MemoryPtr& dstMemPtr) { - const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetPtr()); - const int32_t* indices = reinterpret_cast(idxMemPtr->GetPtr()); - uint8_t* dstData = reinterpret_cast(dstMemPtr->GetPtr()); +void GatherND::GatherNDExecutor::gatherBlocks(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, const MemoryPtr& dstMemPtr) { + const uint8_t* srcData = reinterpret_cast(srcMemPtr->getData()); + const int32_t* indices = reinterpret_cast(idxMemPtr->getData()); + uint8_t* dstData = reinterpret_cast(dstMemPtr->getData()); parallel_nt(0, [&](const int ithr, const int nthr) { size_t start(0lu), end(0lu); @@ -186,10 +186,10 @@ void GatherND::GatherNDExecutor::gatherBlocks(const MemoryPtr& srcMemPtr, const } template -void GatherND::GatherNDExecutor::gatherElementwise(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, MemoryPtr& dstMemPtr) { - const dataType* srcData = reinterpret_cast(srcMemPtr->GetPtr()); - const int32_t* indices = reinterpret_cast(idxMemPtr->GetPtr()); - dataType* dstData = reinterpret_cast(dstMemPtr->GetPtr()); +void GatherND::GatherNDExecutor::gatherElementwise(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, const MemoryPtr& dstMemPtr) { + const dataType* srcData = reinterpret_cast(srcMemPtr->getData()); + const int32_t* indices = reinterpret_cast(idxMemPtr->getData()); + dataType* dstData = reinterpret_cast(dstMemPtr->getData()); parallel_nt(0, [&](const int ithr, const int nthr) { size_t start(0lu), end(0lu); diff --git a/src/plugins/intel_cpu/src/nodes/gather_nd.h b/src/plugins/intel_cpu/src/nodes/gather_nd.h index d74aed1503fdc4..0fec5e23337354 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_nd.h +++ b/src/plugins/intel_cpu/src/nodes/gather_nd.h @@ -43,12 +43,12 @@ class GatherND : public Node { struct GatherNDExecutor { GatherNDExecutor(const GatherNDAttributes& attrs); ~GatherNDExecutor() = default; - void exec(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, MemoryPtr& dstMemPtr); + void exec(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, const MemoryPtr& dstMemPtr); private: template - void gatherElementwise(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, MemoryPtr& dstMemPtr); - void gatherBlocks(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, MemoryPtr& dstMemPtr); + void gatherElementwise(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, const MemoryPtr& dstMemPtr); + void gatherBlocks(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, const MemoryPtr& dstMemPtr); size_t batchSize = 1lu; size_t cycles = 1lu; diff --git a/src/plugins/intel_cpu/src/nodes/gather_tree.cpp b/src/plugins/intel_cpu/src/nodes/gather_tree.cpp index e7805dfe779d16..b1267436508a36 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_tree.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather_tree.cpp @@ -140,12 +140,12 @@ GatherTree::GatherTreeExecutor::GatherTreeExecutor(const VectorDims& stepIdxDims template void GatherTree::GatherTreeExecutor::exec(const MemoryPtr& stepIdxMemPtr, const MemoryPtr& parentIdxMemPtr, - const MemoryPtr& maxSeqLenMemPtr, const MemoryPtr& endTokenMemPtr, MemoryPtr& dstMemPtr) { - const auto *stepIdx = reinterpret_cast(stepIdxMemPtr->GetPtr()); - const auto *parentIdx = reinterpret_cast(parentIdxMemPtr->GetPtr()); - const auto *maxSeqLen = reinterpret_cast(maxSeqLenMemPtr->GetPtr()); - const auto endToken = (reinterpret_cast(endTokenMemPtr->GetPtr()))[0]; - auto *finalIdx = reinterpret_cast(dstMemPtr->GetPtr()); + const MemoryPtr& maxSeqLenMemPtr, const MemoryPtr& endTokenMemPtr, const MemoryPtr& dstMemPtr) { + const auto *stepIdx = reinterpret_cast(stepIdxMemPtr->getData()); + const auto *parentIdx = reinterpret_cast(parentIdxMemPtr->getData()); + const auto *maxSeqLen = reinterpret_cast(maxSeqLenMemPtr->getData()); + const auto endToken = (reinterpret_cast(endTokenMemPtr->getData()))[0]; + auto *finalIdx = reinterpret_cast(dstMemPtr->getData()); bool incorrectResult = false; parallel_for2d(batchSize, beamWidth, [&](size_t batch, size_t beam) { diff --git a/src/plugins/intel_cpu/src/nodes/gather_tree.h b/src/plugins/intel_cpu/src/nodes/gather_tree.h index e0f8bc38997928..d67dc088f9242e 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_tree.h +++ b/src/plugins/intel_cpu/src/nodes/gather_tree.h @@ -38,7 +38,7 @@ class GatherTree : public Node { const MemoryPtr& parentIdxMemPtr, const MemoryPtr& maxSeqLenMemPtr, const MemoryPtr& endTokenMemPtr, - MemoryPtr& dstMemPtr); + const MemoryPtr& dstMemPtr); private: const int32_t maxTime; diff --git a/src/plugins/intel_cpu/src/nodes/generate_proposals.cpp b/src/plugins/intel_cpu/src/nodes/generate_proposals.cpp index 9ec70581699304..9e9e5bf2ffee65 100644 --- a/src/plugins/intel_cpu/src/nodes/generate_proposals.cpp +++ b/src/plugins/intel_cpu/src/nodes/generate_proposals.cpp @@ -362,10 +362,10 @@ void GenerateProposals::execute(dnnl::stream strm) { } // Prepare memory - const float *p_deltas_item = reinterpret_cast(getParentEdgeAt(INPUT_DELTAS)->getMemoryPtr()->GetPtr()); - const float *p_scores_item = reinterpret_cast(getParentEdgeAt(INPUT_SCORES)->getMemoryPtr()->GetPtr()); - const float *p_anchors_item = reinterpret_cast(getParentEdgeAt(INPUT_ANCHORS)->getMemoryPtr()->GetPtr()); - const float *p_img_info_cpu = reinterpret_cast(getParentEdgeAt(INPUT_IM_INFO)->getMemoryPtr()->GetPtr()); + const float *p_deltas_item = reinterpret_cast(getParentEdgeAt(INPUT_DELTAS)->getMemoryPtr()->getData()); + const float *p_scores_item = reinterpret_cast(getParentEdgeAt(INPUT_SCORES)->getMemoryPtr()->getData()); + const float *p_anchors_item = reinterpret_cast(getParentEdgeAt(INPUT_ANCHORS)->getMemoryPtr()->getData()); + const float *p_img_info_cpu = reinterpret_cast(getParentEdgeAt(INPUT_IM_INFO)->getMemoryPtr()->getData()); const int anchors_num = scoreDims[1]; @@ -453,12 +453,12 @@ void GenerateProposals::execute(dnnl::stream strm) { } // copy to out memory redefineOutputMemory({VectorDims{total_num_rois, 4}, VectorDims{total_num_rois}, VectorDims{batch_size}}); - float *p_roi_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetPtr()); - float *p_roi_score_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_SCORES)[0]->getMemoryPtr()->GetPtr()); - uint8_t* p_roi_num_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROI_NUM)[0]->getMemoryPtr()->GetPtr()); + float *p_roi_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->getData()); + float *p_roi_score_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_SCORES)[0]->getMemoryPtr()->getData()); + uint8_t* p_roi_num_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROI_NUM)[0]->getMemoryPtr()->getData()); memcpy(p_roi_item, &roi_item[0], roi_item.size() * sizeof(float)); memcpy(p_roi_score_item, &score_item[0], score_item.size() * sizeof(float)); - memcpy(p_roi_num_item, &roi_num[0], getChildEdgesAtPort(OUTPUT_ROI_NUM)[0]->getMemoryPtr()->GetSize()); + memcpy(p_roi_num_item, &roi_num[0], getChildEdgesAtPort(OUTPUT_ROI_NUM)[0]->getMemoryPtr()->getSize()); } catch (const std::exception &e) { std::string errorMsg = e.what(); IE_THROW() << errorMsg; diff --git a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp index 638512653e3b0e..798b04078352bf 100644 --- a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp +++ b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp @@ -179,13 +179,13 @@ void GridSample::createPrimitive() { } void GridSample::prepareParams() { - auto& dataMemPtr = getParentEdgeAt(IN_DATA)->getMemoryPtr(); + auto dataMemPtr = getParentEdgeAt(IN_DATA)->getMemoryPtr(); if (!dataMemPtr || !dataMemPtr->isAllocated()) THROW_ERROR << " has not allocated input data memory."; - auto& gridMemPtr = getParentEdgeAt(IN_GRID)->getMemoryPtr(); + auto gridMemPtr = getParentEdgeAt(IN_GRID)->getMemoryPtr(); if (!gridMemPtr || !gridMemPtr->isAllocated()) THROW_ERROR << " has not allocated input grid memory."; - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) THROW_ERROR << " has not allocated output memory."; if (getSelectedPrimitiveDescriptor() == nullptr) @@ -262,9 +262,9 @@ void GridSample::prepareParams() { } void GridSample::execute(dnnl::stream strm) { - const void* srcData = getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetPtr(); - const uint8_t* gridData = reinterpret_cast(getParentEdgeAt(IN_GRID)->getMemoryPtr()->GetPtr()); - uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const void* srcData = getParentEdgeAt(IN_DATA)->getMemoryPtr()->getData(); + const uint8_t* gridData = reinterpret_cast(getParentEdgeAt(IN_GRID)->getMemoryPtr()->getData()); + uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->getData()); auto threadBody = [&](const int ithr, const int nthr) { const auto& p = execParamsPerThread[ithr]; diff --git a/src/plugins/intel_cpu/src/nodes/grn.cpp b/src/plugins/intel_cpu/src/nodes/grn.cpp index 44fe953d601027..7d873de3316923 100644 --- a/src/plugins/intel_cpu/src/nodes/grn.cpp +++ b/src/plugins/intel_cpu/src/nodes/grn.cpp @@ -94,8 +94,8 @@ void GRN::executeDynamicImpl(dnnl::stream strm) { } void GRN::execute(dnnl::stream strm) { - const float* src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - float* dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); + const float* src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); + float* dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->getData()); parallel_for3d(N, H, W, [&](int b, int h, int w) { double variance = 0; diff --git a/src/plugins/intel_cpu/src/nodes/if.cpp b/src/plugins/intel_cpu/src/nodes/if.cpp index e4b28cf4b08326..69b3c7baab710c 100644 --- a/src/plugins/intel_cpu/src/nodes/if.cpp +++ b/src/plugins/intel_cpu/src/nodes/if.cpp @@ -21,7 +21,7 @@ If::PortMapHelper::PortMapHelper(const MemoryPtr &from, const std::dequegetDesc().isDefined()) - size = srcMemPtr->GetSize(); + size = srcMemPtr->getSize(); } void If::PortMapHelper::execute(dnnl::stream& strm) { @@ -29,7 +29,7 @@ void If::PortMapHelper::execute(dnnl::stream& strm) { // after subgraph inference we should redefine out memory of 'If' redefineTo(); - cpu_memcpy(dstMemPtrs.front()->GetPtr(), srcMemPtr->GetPtr(), size); + cpu_memcpy(dstMemPtrs.front()->getData(), srcMemPtr->getData(), size); } void If::PortMapHelper::redefineTo() { @@ -41,7 +41,7 @@ void If::PortMapHelper::redefineTo() { dstMemPtrs[j]->redefineDesc(memDesc); } - size = srcMemPtr->GetSize(); + size = srcMemPtr->getSize(); } } @@ -190,7 +190,7 @@ void If::prepareBeforeMappers(const bool isThen, const dnnl::engine& eng) { auto &inputMems = isThen ? inputMemThen : inputMemElse; auto &beforeMappers = isThen ? beforeThenMappers : beforeElseMappers; for (auto& map_rule : inputPortMap) { - auto &fromMem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr(); + auto fromMem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr(); auto &toMems = inputMems[map_rule.to]; beforeMappers.emplace_back(std::make_shared(fromMem, toMems, eng)); @@ -217,7 +217,7 @@ std::deque If::getToMemories(const Node* node, const size_t port) con } void If::execute(dnnl::stream strm) { - const bool condition = static_cast((reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()))[0]); + const bool condition = static_cast((reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()))[0]); auto& beforeMappers = condition ? beforeThenMappers : beforeElseMappers; auto& afterMappers = condition ? afterThenMappers : afterElseMappers; diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp index 86f866ca49f36e..e153a55b011ace 100644 --- a/src/plugins/intel_cpu/src/nodes/input.cpp +++ b/src/plugins/intel_cpu/src/nodes/input.cpp @@ -266,22 +266,21 @@ void Input::cloneBlobIfRequired() { } auto cloneBlob = [&, this] () { - Memory memory{ getEngine() }; + MemoryPtr memory; // CVS-74980 // oneDNN always allocate 1byte for element type with bitWidth < 8 (u4,u1...) // but ngraph Constant uses actual bitWidth for data storage allocation // in that case we make a copy to avoid overflow if (constOp->get_byte_size() >= memDesc.getCurrentMemSize()) { - memory.Create(memDesc, constOp->get_data_ptr()); + memory = std::make_shared(getEngine(), memDesc, constOp->get_data_ptr()); } else { - memory.Create(memDesc); - memcpy(memory.GetPtr(), constOp->get_data_ptr(), constOp->get_byte_size()); + memory = std::make_shared(getEngine(), memDesc); + memcpy(memory->getData(), constOp->get_data_ptr(), constOp->get_byte_size()); } - MemoryPtr ptr = MemoryPtr(new Memory(getEngine())); - ptr->Create(memDesc); - ptr->SetData(memory, needFlushDenormalsToZero); + MemoryPtr ptr = std::make_shared(getEngine(), memDesc); + ptr->load(*memory.get(), needFlushDenormalsToZero); return ptr; }; @@ -366,15 +365,13 @@ void Input::cloneBlobIfRequired() { auto weightCache = context->getWeightsCache(); if (weightCache) { MemoryPtr ptr = *weightCache->findOrCreate(blobKey(), cloneBlob); - memoryPtr = std::const_pointer_cast(ptr); + memoryPtr = std::const_pointer_cast(ptr); // IRs already have all subnormals flushed to zero, but in // read_model scenario with directly loaded original model still can have subnormals } else if (isBlobAligned() && (!needFlushDenormalsToZero || !hasSubnormals()) && !isWA()) { - auto ptr = new Memory(getEngine()); - ptr->Create(memDesc, constOp->get_data_ptr()); - memoryPtr = MemoryCPtr(ptr); + memoryPtr = std::make_shared(getEngine(), memDesc, constOp->get_data_ptr()); } else { - memoryPtr = std::const_pointer_cast(cloneBlob()); + memoryPtr = std::const_pointer_cast(cloneBlob()); } } @@ -434,13 +431,13 @@ void Input::initSupportedPrimitiveDescriptors() { void Input::createPrimitive() { for (size_t i = 0; i < getChildEdges().size(); i++) { - auto &dstMemPtr = getChildEdgeAt(i)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(i)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << "Destination memory didn't allocate for node " << getName() << " to node " << getChildEdgeAt(i)->getChild()->getName() << "."; } for (size_t i = 0; i < getParentEdges().size(); i++) { - auto &srcMemPtr = getParentEdgeAt(i)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(i)->getMemoryPtr(); if (!srcMemPtr || !srcMemPtr->isAllocated()) IE_THROW() << "Destination memory didn't allocate for node " << getName() << " from node " << getParentEdgeAt(i)->getParent()->getName() << "."; diff --git a/src/plugins/intel_cpu/src/nodes/interaction.cpp b/src/plugins/intel_cpu/src/nodes/interaction.cpp index 47f4afdeb47cb9..df92b2bb162956 100644 --- a/src/plugins/intel_cpu/src/nodes/interaction.cpp +++ b/src/plugins/intel_cpu/src/nodes/interaction.cpp @@ -238,21 +238,21 @@ static inline void flat_triangle(const uint8_t* in, uint8_t* out, size_t size, s void Interaction::execRef(dnnl::stream strm) { using namespace dnnl; - uint8_t* outFeaturesPtr = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); + uint8_t* outFeaturesPtr = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->getData()); std::vector inputPtrs(inputSizes); for (uint32_t n = 0; n < inputSizes; n++) { - auto inPtr = reinterpret_cast(getParentEdgeAt(n)->getMemoryPtr()->GetPtr()); + auto inPtr = reinterpret_cast(getParentEdgeAt(n)->getMemoryPtr()->getData()); inputPtrs[n] = inPtr; } - std::unordered_map mem_ags{{DNNL_ARG_SRC, inputMemPtr->GetPrimitive()}, - {DNNL_ARG_WEIGHTS, inputMemPtr->GetPrimitive()}, - {DNNL_ARG_DST, outputMemPtr->GetPrimitive()}}; + std::unordered_map mem_ags{{DNNL_ARG_SRC, inputMemPtr->getPrimitive()}, + {DNNL_ARG_WEIGHTS, inputMemPtr->getPrimitive()}, + {DNNL_ARG_DST, outputMemPtr->getPrimitive()}}; float* scales = fqScales.empty() ? nullptr : fqScales.data(); for (int64_t start = 0; start < static_cast(batchSize); start++) { - cat(reinterpret_cast(inputMemPtr->GetPtr()), inputPtrs, featureSizes, start, dataPrecision.size()); + cat(reinterpret_cast(inputMemPtr->getData()), inputPtrs, featureSizes, start, dataPrecision.size()); prim.execute(strm, mem_ags); - flat_triangle(reinterpret_cast(outputMemPtr->GetPtr()), - reinterpret_cast(flatMemPtr->GetPtr()), + flat_triangle(reinterpret_cast(outputMemPtr->getData()), + reinterpret_cast(flatMemPtr->getData()), inputSizes, dataPrecision.size()); // in1 dense feature @@ -266,7 +266,7 @@ void Interaction::execRef(dnnl::stream strm) { } if (moveInteractKernel) { jit_move_scale_call_args interArgs; - interArgs.p_in = flatMemPtr->GetPtr(); + interArgs.p_in = flatMemPtr->getData(); interArgs.p_out = outFeaturesPtr + (start * outputFeaturesLen + featureSize) * outputDataType.size(); interArgs.p_scales = scales; (*moveInteractKernel)(&interArgs); @@ -306,8 +306,7 @@ void Interaction::prepareParams() { featureSizes.assign(inputSizes, featureSize); auto initMemoryPtr = [&](const InferenceEngine::Precision &prc, const intel_cpu::Shape& shape, MemoryPtr& ptr) { - ptr = std::make_shared(getEngine()); - ptr->Create(intel_cpu::DnnlBlockedMemoryDesc(prc, shape)); + ptr = std::make_shared(getEngine(), intel_cpu::DnnlBlockedMemoryDesc(prc, shape)); }; initMemoryPtr(dataPrecision, intel_cpu::Shape{inputSizes, featureSize}, inputMemPtr); initMemoryPtr(dataPrecision, intel_cpu::Shape{inputShapes.size(), inputShapes.size()}, outputMemPtr); diff --git a/src/plugins/intel_cpu/src/nodes/interpolate.cpp b/src/plugins/intel_cpu/src/nodes/interpolate.cpp index 1ffa599fc7c8bb..a71f0df5b5f9b1 100644 --- a/src/plugins/intel_cpu/src/nodes/interpolate.cpp +++ b/src/plugins/intel_cpu/src/nodes/interpolate.cpp @@ -2182,7 +2182,7 @@ bool Interpolate::needShapeInfer() const { if (lastScales.empty()) { return true; } - const float *scales = reinterpret_cast(getParentEdgesAtPort(get_scale_id())[0]->getMemory().GetPtr()); + const float *scales = reinterpret_cast(getParentEdgesAtPort(get_scale_id())[0]->getMemory().getData()); for (size_t i = 0; i < lastScales.size(); i++) { if (lastScales[i] != scales[i]) { return true; @@ -2192,7 +2192,7 @@ bool Interpolate::needShapeInfer() const { if (lastSizes.empty()) { return true; } - const int32_t *sizes = reinterpret_cast(getParentEdgesAtPort(TARGET_SHAPE_ID)[0]->getMemory().GetPtr()); + const int32_t *sizes = reinterpret_cast(getParentEdgesAtPort(TARGET_SHAPE_ID)[0]->getMemory().getData()); for (size_t i = 0; i < lastSizes.size(); i++) { if (sizes[i] != lastSizes[i]) { return true; @@ -2208,10 +2208,10 @@ void Interpolate::executeDynamicImpl(dnnl::stream strm) { const size_t port = shapeCalcMode == InterpolateShapeCalcMode::sizes ? TARGET_SHAPE_ID : get_scale_id(); const auto &memory = getParentEdgesAtPort(port)[0]->getMemory(); if (shapeCalcMode == InterpolateShapeCalcMode::scales) { - const float *scales = reinterpret_cast(memory.GetPtr()); + const float *scales = reinterpret_cast(memory.getData()); lastScales.assign(scales, scales + memory.getDesc().getShape().getElementsCount()); } else { - const int32_t *sizes = reinterpret_cast(memory.GetPtr()); + const int32_t *sizes = reinterpret_cast(memory.getData()); lastSizes.assign(sizes, sizes + memory.getDesc().getShape().getElementsCount()); } } @@ -2238,26 +2238,26 @@ void Interpolate::prepareParams() { IE_THROW() << "Can't prepare params for Interpolate node with name: " << getName() << ", because input/output dims aren't defined"; } - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << errorPrefix << " did not allocate destination memory"; - auto& srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); if (!srcMemPtr || !srcMemPtr->isAllocated()) IE_THROW() << errorPrefix << " did not allocate input memory"; if (shapeCalcMode == InterpolateShapeCalcMode::sizes) { - auto& tsMemPtr = getParentEdgeAt(TARGET_SHAPE_ID)->getMemoryPtr(); + auto tsMemPtr = getParentEdgeAt(TARGET_SHAPE_ID)->getMemoryPtr(); if (!tsMemPtr || !tsMemPtr->isAllocated()) IE_THROW() << errorPrefix << " did not allocate target shape memory"; } else { - auto& scaleMemPtr = getParentEdgeAt(get_scale_id())->getMemoryPtr(); + auto scaleMemPtr = getParentEdgeAt(get_scale_id())->getMemoryPtr(); if (!scaleMemPtr || !scaleMemPtr->isAllocated()) IE_THROW() << errorPrefix << " did not allocate scales memory"; } if (isAxesSpecified) { - auto &axesMemPtr = getParentEdgeAt(get_axis_id())->getMemoryPtr(); + auto axesMemPtr = getParentEdgeAt(get_axis_id())->getMemoryPtr(); if (!axesMemPtr || !axesMemPtr->isAllocated()) IE_THROW() << errorPrefix << " did not allocate axes memory"; } @@ -2288,7 +2288,7 @@ void Interpolate::prepareParams() { if (shapeCalcMode == InterpolateShapeCalcMode::scales) { if (!isScaleConstant) { const auto& scalesMem = getParentEdgesAtPort(get_scale_id())[0]->getMemory(); - const float* scalesData = reinterpret_cast(scalesMem.GetPtr()); + const float* scalesData = reinterpret_cast(scalesMem.getData()); scales.assign(scalesData, scalesData + scalesMem.getStaticDims()[0]); } } @@ -2353,8 +2353,8 @@ void Interpolate::prepareParams() { } void Interpolate::createPrimitive() { - auto& srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); - auto& dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); + auto dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr(); if (!srcMemPtr || !srcMemPtr->isAllocated()) IE_THROW() << errorPrefix << " did not allocate input memory"; if (!dstMemPtr || !dstMemPtr->isAllocated()) @@ -2443,12 +2443,12 @@ std::vector Interpolate::getScales(const VectorDims &srcDimPad, const Vec } void Interpolate::execute(dnnl::stream strm) { - auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto &srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); if (execPtr) { - uint8_t *dst_data = reinterpret_cast(dstMemPtr->GetPtr()); - const uint8_t *src_data_origin = reinterpret_cast(srcMemPtr->GetData()); + uint8_t *dst_data = reinterpret_cast(dstMemPtr->getData()); + const uint8_t *src_data_origin = reinterpret_cast(srcMemPtr->getData()); const uint8_t *src_data = nullptr; std::vector srcPadded; if (hasPad) { diff --git a/src/plugins/intel_cpu/src/nodes/log_softmax.cpp b/src/plugins/intel_cpu/src/nodes/log_softmax.cpp index d69eac583eaae8..95bd8d576fccf1 100644 --- a/src/plugins/intel_cpu/src/nodes/log_softmax.cpp +++ b/src/plugins/intel_cpu/src/nodes/log_softmax.cpp @@ -87,8 +87,8 @@ void LogSoftmax::executeDynamicImpl(dnnl::stream strm) { } void LogSoftmax::execute(dnnl::stream strm) { - const float *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - float* dstData = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); + const float *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); + float* dstData = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->getData()); if (isLastDim) { parallel_for(axisStep, [&](size_t i) { diff --git a/src/plugins/intel_cpu/src/nodes/lrn.cpp b/src/plugins/intel_cpu/src/nodes/lrn.cpp index dfe704dfff0442..2dd2ebdd515871 100644 --- a/src/plugins/intel_cpu/src/nodes/lrn.cpp +++ b/src/plugins/intel_cpu/src/nodes/lrn.cpp @@ -163,8 +163,8 @@ std::shared_ptr Lrn::getSrcMemDesc(const dnnl::primitive_desc &prim_ } void Lrn::prepareParams() { - auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); if (!srcMemPtr || !srcMemPtr->isAllocated()) IE_THROW() << errorPrefix << " input memory did not allocate"; if (!dstMemPtr || !dstMemPtr->isAllocated()) @@ -174,7 +174,7 @@ void Lrn::prepareParams() { if (selected_pd == nullptr) IE_THROW() << errorPrefix << "preferable primitive descriptor did not set"; - auto inpDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + auto inpDesc = getParentEdgeAt(0)->getMemory().getDescWithType(); dnnl::primitive_attr attr; attr.set_scratchpad_mode(dnnl::scratchpad_mode::user); @@ -212,9 +212,9 @@ void Lrn::prepareParams() { auto scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); - primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive(); - primArgs[DNNL_ARG_SRC] = srcMemPtr->GetPrimitive(); - primArgs[DNNL_ARG_DST] = dstMemPtr->GetPrimitive(); + primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->getPrimitive(); + primArgs[DNNL_ARG_SRC] = srcMemPtr->getPrimitive(); + primArgs[DNNL_ARG_DST] = dstMemPtr->getPrimitive(); #ifdef CPU_DEBUG_CAPS if (result.second == CacheEntryBase::LookUpStatus::Miss) { auto pd = execPtr->getPrimitiveDesc(); diff --git a/src/plugins/intel_cpu/src/nodes/mathematics.cpp b/src/plugins/intel_cpu/src/nodes/mathematics.cpp index bb639ac31b6cbd..926e09fd9770d6 100644 --- a/src/plugins/intel_cpu/src/nodes/mathematics.cpp +++ b/src/plugins/intel_cpu/src/nodes/mathematics.cpp @@ -71,9 +71,9 @@ void Math::executeDynamicImpl(dnnl::stream strm) { } void Math::execute(dnnl::stream strm) { - size_t dataSize = getChildEdgesAtPort(0)[0]->getMemory().GetShape().getElementsCount(); - const float *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - float* dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + size_t dataSize = getChildEdgesAtPort(0)[0]->getMemory().getShape().getElementsCount(); + const float *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); + float* dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->getData()); switch (getAlgorithm()) { case Algorithm::MathAbs: diff --git a/src/plugins/intel_cpu/src/nodes/matmul.cpp b/src/plugins/intel_cpu/src/nodes/matmul.cpp index 430fe562a7489b..e987c42d1b040b 100644 --- a/src/plugins/intel_cpu/src/nodes/matmul.cpp +++ b/src/plugins/intel_cpu/src/nodes/matmul.cpp @@ -575,9 +575,9 @@ InferenceEngine::Precision MatMul::getRuntimePrecision() const { } void MatMul::prepareParams() { - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto& src0MemPtr = getParentEdgeAt(0)->getMemoryPtr(); - auto& src1MemPtr = getParentEdgeAt(1)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto src0MemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto src1MemPtr = getParentEdgeAt(1)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << errorPrefix << " did not allocate destination memory"; if (!src0MemPtr || !src0MemPtr->isAllocated() || !src1MemPtr || !src1MemPtr->isAllocated()) @@ -611,14 +611,14 @@ void MatMul::prepareParams() { src1TransposedDesc = inDataDesc[1]; } - auto dstDnnlDesc = dstMemPtr->GetDescWithType(); + auto dstDnnlDesc = dstMemPtr->getDescWithType(); DnnlMemoryDescPtr dnnlBiasMemDesc = nullptr; if (withBiases) { - auto& biasMemory = getParentEdgeAt(2)->getMemoryPtr(); + auto biasMemory = getParentEdgeAt(2)->getMemoryPtr(); if (!biasMemory || !biasMemory->isAllocated()) IE_THROW() << errorPrefix << " did not allocate bias memory"; - dnnlBiasMemDesc = biasMemory->GetDescWithType(); + dnnlBiasMemDesc = biasMemory->getDescWithType(); } MatMulKey key = {src0TransposedDesc, src1TransposedDesc, dnnlBiasMemDesc, @@ -669,12 +669,12 @@ void MatMul::prepareParams() { auto schratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); - primArgs[DNNL_ARG_SCRATCHPAD] = schratchpadMem->GetPrimitive(); - primArgs[DNNL_ARG_SRC_0] = src0MemPtr->GetPrimitive(); - primArgs[DNNL_ARG_WEIGHTS_0] = src1MemPtr->GetPrimitive(); - primArgs[DNNL_ARG_DST] = dstMemPtr->GetPrimitive(); + primArgs[DNNL_ARG_SCRATCHPAD] = schratchpadMem->getPrimitive(); + primArgs[DNNL_ARG_SRC_0] = src0MemPtr->getPrimitive(); + primArgs[DNNL_ARG_WEIGHTS_0] = src1MemPtr->getPrimitive(); + primArgs[DNNL_ARG_DST] = dstMemPtr->getPrimitive(); if (withBiases) - primArgs[DNNL_ARG_BIAS] = getParentEdgeAt(2)->getMemoryPtr()->GetPrimitive(); + primArgs[DNNL_ARG_BIAS] = getParentEdgeAt(2)->getMemoryPtr()->getPrimitive(); appendPostOpArgs(*attr, primArgs, postOpsArgs); #ifdef CPU_DEBUG_CAPS diff --git a/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp b/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp index fd7f2ad6d08afd..1e910d04e76834 100644 --- a/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp +++ b/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp @@ -302,8 +302,8 @@ void MatrixNms::executeDynamicImpl(dnnl::stream strm) { } void MatrixNms::execute(dnnl::stream strm) { - const float* boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr()); - const float* scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr()); + const float* boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->getData()); + const float* scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->getData()); InferenceEngine::parallel_for2d(m_numBatches, m_numClasses, [&](size_t batchIdx, size_t classIdx) { if (classIdx == static_cast(m_backgroundClass)) { @@ -380,9 +380,9 @@ void MatrixNms::execute(dnnl::stream strm) { size_t totalBox = std::accumulate(m_numPerBatch.begin(), m_numPerBatch.end(), size_t(0)); redefineOutputMemory({{totalBox, 6}, {totalBox, 1}, {m_numBatches}}); } - float* selectedOutputs = reinterpret_cast(selectedOutputsMemPtr->GetPtr()); - int* selectedIndices = reinterpret_cast(selectedIndicesMemPtr->GetPtr()); - int* validOutputs = reinterpret_cast(validOutputsMemPtr->GetPtr()); + float* selectedOutputs = reinterpret_cast(selectedOutputsMemPtr->getData()); + int* selectedIndices = reinterpret_cast(selectedIndicesMemPtr->getData()); + int* validOutputs = reinterpret_cast(validOutputsMemPtr->getData()); for (size_t i = 0; i < m_numPerBatch.size(); i++) validOutputs[i] = static_cast(m_numPerBatch[i]); diff --git a/src/plugins/intel_cpu/src/nodes/memory.cpp b/src/plugins/intel_cpu/src/nodes/memory.cpp index 20cb3f3b961c4a..8ce17c8d21e4c9 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.cpp +++ b/src/plugins/intel_cpu/src/nodes/memory.cpp @@ -106,7 +106,7 @@ bool MemoryInput::isSupportedOperation(const std::shared_ptr } MemoryInput::MemoryInput(const std::shared_ptr& op, const GraphContext::CPtr ctx) - : Input(op, ctx), MemoryNode(op), dataStore(new Memory{ctx->getEngine()}) { + : Input(op, ctx), MemoryNode(op) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -119,11 +119,11 @@ MemoryInput::MemoryInput(const std::shared_ptr& op, const GraphCon void MemoryInput::createPrimitive() { Input::createPrimitive(); - dataStore->Create(getChildEdgeAt(0)->getMemory().getDesc()); + dataStore = std::make_shared(getEngine(), getChildEdgeAt(0)->getMemory().getDesc()); // default memory state is zero filled if (dataStore->getDesc().hasDefinedMaxSize()) - dataStore->FillZero(); + dataStore->nullify(); } /** @@ -133,12 +133,12 @@ void MemoryInput::createPrimitive() { * @param src source memory object */ inline -static void simple_copy(const Memory& dst, const Memory& src) { - auto srcPtr = static_cast(src.GetPtr()); - auto dstPtr = static_cast(dst.GetPtr()); - if (src.GetDataType() == dst.GetDataType()) { - auto srcSizeInByte = src.GetSize(); - auto dstSizeInByte = dst.GetSize(); +static void simple_copy(const IMemory& dst, const IMemory& src) { + auto srcPtr = static_cast(src.getData()); + auto dstPtr = static_cast(dst.getData()); + if (src.getDataType() == dst.getDataType()) { + auto srcSizeInByte = src.getSize(); + auto dstSizeInByte = dst.getSize(); IE_ASSERT(srcSizeInByte == dstSizeInByte) << "MemoryNode objects are not compatible. Has different sizes."; @@ -157,16 +157,16 @@ MemoryPtr MemoryInput::getStore() { return dataStore; } -void MemoryInput::storeState(const Memory &new_state) { +void MemoryInput::storeState(const IMemory &new_state) { // TODO: Should be next one call: - // dataStore.SetData(new_state, false); + // dataStore.load(new_state, false); // But because of performance reason we use simple manual copy simple_copy(*dataStore, new_state); } void MemoryInput::execute(dnnl::stream strm) { // TODO: Should be simple call of: - // dst_mem.SetData(dataStore, false); + // dst_mem.load(dataStore, false); // But because of performance reason we use simple manual copy simple_copy(getChildEdgeAt(0)->getMemory(), *dataStore); } diff --git a/src/plugins/intel_cpu/src/nodes/memory.hpp b/src/plugins/intel_cpu/src/nodes/memory.hpp index 4414cb0dde68fa..2035f3e8651064 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.hpp +++ b/src/plugins/intel_cpu/src/nodes/memory.hpp @@ -101,7 +101,7 @@ class MemoryInput : public Input, public MemoryNode { void createPrimitive() override; void setInputNode(Node* node) override {} - void storeState(const Memory& mem); + void storeState(const IMemory& mem); MemoryPtr getStore(); private: MemoryPtr dataStore; diff --git a/src/plugins/intel_cpu/src/nodes/mha.cpp b/src/plugins/intel_cpu/src/nodes/mha.cpp index c3d80501929c2a..d4405be5d468d0 100644 --- a/src/plugins/intel_cpu/src/nodes/mha.cpp +++ b/src/plugins/intel_cpu/src/nodes/mha.cpp @@ -902,11 +902,11 @@ void MHA::prepareParams() { return new_vec; }; - const auto memDescTranspose0In0 = getParentEdgeAt(0)->getMemoryPtr()->GetDescWithType(); - const auto memDescTranspose1In0 = getParentEdgeAt(1)->getMemoryPtr()->GetDescWithType(); - const auto memDescAddIn1 = getParentEdgeAt(2)->getMemoryPtr()->GetDescWithType(); - const auto memDescTranspose2In0 = getParentEdgeAt(3)->getMemoryPtr()->GetDescWithType(); - const auto memDescOut = getChildEdgeAt(0)->getMemoryPtr()->GetDescWithType(); + const auto memDescTranspose0In0 = getParentEdgeAt(0)->getMemoryPtr()->getDescWithType(); + const auto memDescTranspose1In0 = getParentEdgeAt(1)->getMemoryPtr()->getDescWithType(); + const auto memDescAddIn1 = getParentEdgeAt(2)->getMemoryPtr()->getDescWithType(); + const auto memDescTranspose2In0 = getParentEdgeAt(3)->getMemoryPtr()->getDescWithType(); + const auto memDescOut = getChildEdgeAt(0)->getMemoryPtr()->getDescWithType(); dimsTranspose0In0 = memDescTranspose0In0->getBlockDims(); dimsTranspose1In0 = memDescTranspose1In0->getBlockDims(); @@ -1215,11 +1215,11 @@ void MHA::callBrgemm(brgemmCtx& ctx, std::unique_ptr& brgKernel template void MHA::mhaImpl() { - const uint8_t* pTranspose0In0 = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - const uint8_t* pTranspose1In0 = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); - const float* pAddIn1 = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetPtr()); - const uint8_t* pTranspose2In0 = reinterpret_cast(getParentEdgeAt(3)->getMemoryPtr()->GetPtr()); - uint8_t* pout = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const uint8_t* pTranspose0In0 = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); + const uint8_t* pTranspose1In0 = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->getData()); + const float* pAddIn1 = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->getData()); + const uint8_t* pTranspose2In0 = reinterpret_cast(getParentEdgeAt(3)->getMemoryPtr()->getData()); + uint8_t* pout = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->getData()); auto outPrcSize = outputPrecision.size(); diff --git a/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp b/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp index c0c6429d4fa44a..87803b746fcf10 100644 --- a/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp +++ b/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp @@ -203,8 +203,8 @@ void MultiClassNms::executeDynamicImpl(dnnl::stream strm) { } void MultiClassNms::execute(dnnl::stream strm) { - const float* boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr()); - const float* scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr()); + const float* boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->getData()); + const float* scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->getData()); auto dims_boxes = getParentEdgeAt(NMS_BOXES)->getMemory().getStaticDims(); auto dims_scores = getParentEdgeAt(NMS_SCORES)->getMemory().getStaticDims(); @@ -219,14 +219,14 @@ void MultiClassNms::execute(dnnl::stream strm) { auto selectedIndicesMemPtr = getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemoryPtr(); auto validOutputsMemPtr = getChildEdgesAtPort(NMS_SELECTEDNUM)[0]->getMemoryPtr(); - auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getMemory().GetDescWithType()->getStrides(); - auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getMemory().GetDescWithType()->getStrides(); + auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getMemory().getDescWithType()->getStrides(); + auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getMemory().getDescWithType()->getStrides(); int* roisnum = nullptr; VectorDims roisnumStrides; if (has_roinum) { - roisnum = reinterpret_cast(getParentEdgeAt(NMS_ROISNUM)->getMemoryPtr()->GetPtr()); - roisnumStrides = getParentEdgeAt(NMS_ROISNUM)->getMemory().GetDescWithType()->getStrides(); + roisnum = reinterpret_cast(getParentEdgeAt(NMS_ROISNUM)->getMemoryPtr()->getData()); + roisnumStrides = getParentEdgeAt(NMS_ROISNUM)->getMemory().getDescWithType()->getStrides(); } if ((m_nmsEta >= 0) && (m_nmsEta < 1)) { @@ -328,9 +328,9 @@ void MultiClassNms::execute(dnnl::stream strm) { size_t totalBox = std::accumulate(m_selected_num.begin(), m_selected_num.end(), size_t(0)); redefineOutputMemory({{totalBox, 6}, {totalBox, 1}, {m_numBatches}}); } - int* selected_indices = reinterpret_cast(selectedIndicesMemPtr->GetPtr()); - float* selected_outputs = reinterpret_cast(selectedOutputsMemPtr->GetPtr()); - int* selected_num = reinterpret_cast(validOutputsMemPtr->GetPtr()); + int* selected_indices = reinterpret_cast(selectedIndicesMemPtr->getData()); + float* selected_outputs = reinterpret_cast(selectedOutputsMemPtr->getData()); + int* selected_num = reinterpret_cast(validOutputsMemPtr->getData()); auto _flattened_index = [](int batch_idx, int box_idx, int num_box) { return batch_idx * num_box + box_idx; diff --git a/src/plugins/intel_cpu/src/nodes/mvn.cpp b/src/plugins/intel_cpu/src/nodes/mvn.cpp index b8f25e79ac41a3..298d1351bf36a1 100644 --- a/src/plugins/intel_cpu/src/nodes/mvn.cpp +++ b/src/plugins/intel_cpu/src/nodes/mvn.cpp @@ -1347,8 +1347,8 @@ void MVN::MVNRefExecutor::exec(const uint8_t *src_data, uint8_t *dst_data, const } void MVN::prepareParams() { - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << "Destination memory didn't allocate."; if (!srcMemPtr || !srcMemPtr->isAllocated()) @@ -1460,12 +1460,12 @@ void MVN::executeDynamicImpl(dnnl::stream strm) { } void MVN::execute(dnnl::stream strm) { - auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto &srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); if (execPtr) { - uint8_t *dst_data = reinterpret_cast(dstMemPtr->GetPtr()); - uint8_t *src_data = reinterpret_cast(srcMemPtr->GetPtr()); + uint8_t *dst_data = reinterpret_cast(dstMemPtr->getData()); + uint8_t *src_data = reinterpret_cast(srcMemPtr->getData()); execPtr->exec(src_data, dst_data, postOpsDataPtrs.data()); } else if (aclExecPtr) { aclExecPtr->exec({srcMemPtr}, {dstMemPtr}, postOpsDataPtrs.data()); diff --git a/src/plugins/intel_cpu/src/nodes/ngram.cpp b/src/plugins/intel_cpu/src/nodes/ngram.cpp index 53192ef396e930..917f3f1ef7066b 100644 --- a/src/plugins/intel_cpu/src/nodes/ngram.cpp +++ b/src/plugins/intel_cpu/src/nodes/ngram.cpp @@ -104,7 +104,7 @@ void Ngram::prepareParams() { idcesShapeSize = std::accumulate(srcIndicesDims.begin(), srcIndicesDims.end(), 1, std::multiplies()); numOutElems = std::accumulate(outDims.begin(), outDims.end(), 1, std::multiplies()); - idcesStride = getParentEdgeAt(1)->getMemoryPtr()->GetDescWithType()->getStrides()[0]; + idcesStride = getParentEdgeAt(1)->getMemoryPtr()->getDescWithType()->getStrides()[0]; numIdces = srcIndicesDims[0]; windowStride = srcDataDims[1]; @@ -115,7 +115,7 @@ void Ngram::prepareParams() { template std::vector Ngram::computeBatchLenghts() { - auto* srcIndices = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); + auto* srcIndices = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->getData()); std::vector batchLenghts{0}; batchLenghts.reserve(numIdces + 1); @@ -130,8 +130,8 @@ std::vector Ngram::computeBatchLenghts() { } void Ngram::execute(dnnl::stream strm) { - auto* srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - auto* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + auto* srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); + auto* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->getData()); std::vector batchLenghts; if (idcesPrecision == InferenceEngine::Precision::I32) { diff --git a/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp b/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp index 86a502e4333981..baeeb5ff091547 100644 --- a/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp +++ b/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp @@ -723,20 +723,20 @@ void NonMaxSuppression::createJitKernel() { void NonMaxSuppression::executeDynamicImpl(dnnl::stream strm) { if (hasEmptyInputTensors() || (inputShapes.size() > NMS_MAXOUTPUTBOXESPERCLASS && - reinterpret_cast(getParentEdgeAt(NMS_MAXOUTPUTBOXESPERCLASS)->getMemoryPtr()->GetPtr())[0] == 0)) { + reinterpret_cast(getParentEdgeAt(NMS_MAXOUTPUTBOXESPERCLASS)->getMemoryPtr()->getData())[0] == 0)) { redefineOutputMemory({{0, 3}, {0, 3}, {1}}); - *reinterpret_cast(getChildEdgesAtPort(NMS_VALIDOUTPUTS)[0]->getMemoryPtr()->GetPtr()) = 0; + *reinterpret_cast(getChildEdgesAtPort(NMS_VALIDOUTPUTS)[0]->getMemoryPtr()->getData()) = 0; return; } execute(strm); } void NonMaxSuppression::execute(dnnl::stream strm) { - const float *boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr()); - const float *scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr()); + const float *boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->getData()); + const float *scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->getData()); if (inputShapes.size() > NMS_MAXOUTPUTBOXESPERCLASS) { - maxOutputBoxesPerClass = reinterpret_cast(getParentEdgeAt(NMS_MAXOUTPUTBOXESPERCLASS)->getMemoryPtr()->GetPtr())[0]; + maxOutputBoxesPerClass = reinterpret_cast(getParentEdgeAt(NMS_MAXOUTPUTBOXESPERCLASS)->getMemoryPtr()->getData())[0]; } maxOutputBoxesPerClass = std::min(maxOutputBoxesPerClass, numBoxes); @@ -746,20 +746,20 @@ void NonMaxSuppression::execute(dnnl::stream strm) { } if (inputShapes.size() > NMS_IOUTHRESHOLD) - iouThreshold = reinterpret_cast(getParentEdgeAt(NMS_IOUTHRESHOLD)->getMemoryPtr()->GetPtr())[0]; + iouThreshold = reinterpret_cast(getParentEdgeAt(NMS_IOUTHRESHOLD)->getMemoryPtr()->getData())[0]; if (inputShapes.size() > NMS_SCORETHRESHOLD) - scoreThreshold = reinterpret_cast(getParentEdgeAt(NMS_SCORETHRESHOLD)->getMemoryPtr()->GetPtr())[0]; + scoreThreshold = reinterpret_cast(getParentEdgeAt(NMS_SCORETHRESHOLD)->getMemoryPtr()->getData())[0]; if (inputShapes.size() > NMS_SOFTNMSSIGMA) - softNMSSigma = reinterpret_cast(getParentEdgeAt(NMS_SOFTNMSSIGMA)->getMemoryPtr()->GetPtr())[0]; + softNMSSigma = reinterpret_cast(getParentEdgeAt(NMS_SOFTNMSSIGMA)->getMemoryPtr()->getData())[0]; scale = 0.0f; if (softNMSSigma > 0.0) { scale = -0.5f / softNMSSigma; } - auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getMemory().GetDescWithType()->getStrides(); - auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getMemory().GetDescWithType()->getStrides(); + auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getMemory().getDescWithType()->getStrides(); + auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getMemory().getDescWithType()->getStrides(); const auto maxNumberOfBoxes = maxOutputBoxesPerClass * numBatches * numClasses; std::vector filtBoxes(maxNumberOfBoxes); @@ -804,10 +804,10 @@ void NonMaxSuppression::execute(dnnl::stream strm) { redefineOutputMemory({newDims, newDims, {1}}); } - int selectedIndicesStride = indicesMemPtr->GetDescWithType()->getStrides()[0]; + int selectedIndicesStride = indicesMemPtr->getDescWithType()->getStrides()[0]; - int *selectedIndicesPtr = reinterpret_cast(indicesMemPtr->GetPtr()); - float *selectedScoresPtr = reinterpret_cast(scoresMemPtr->GetPtr()); + int *selectedIndicesPtr = reinterpret_cast(indicesMemPtr->getData()); + float *selectedScoresPtr = reinterpret_cast(scoresMemPtr->getData()); size_t idx = 0lu; for (; idx < validOutputs; idx++) { @@ -827,7 +827,7 @@ void NonMaxSuppression::execute(dnnl::stream strm) { std::fill(selectedScoresPtr, selectedScoresPtr + (maxNumberOfBoxes - idx) * selectedIndicesStride, -1.f); } - int *valid_outputs = reinterpret_cast(getChildEdgesAtPort(NMS_VALIDOUTPUTS)[0]->getMemoryPtr()->GetPtr()); + int *valid_outputs = reinterpret_cast(getChildEdgesAtPort(NMS_VALIDOUTPUTS)[0]->getMemoryPtr()->getData()); *valid_outputs = static_cast(validOutputs); } diff --git a/src/plugins/intel_cpu/src/nodes/non_zero.cpp b/src/plugins/intel_cpu/src/nodes/non_zero.cpp index 1abbedaa259f4b..cbb0b134211359 100644 --- a/src/plugins/intel_cpu/src/nodes/non_zero.cpp +++ b/src/plugins/intel_cpu/src/nodes/non_zero.cpp @@ -131,9 +131,9 @@ void NonZero::execute(dnnl::stream strm) { template void NonZero::executeSpecified() { const T zero = 0; - const T *src = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); + const T *src = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - Shape inShape = getParentEdgeAt(0)->getMemory().GetShape(); + Shape inShape = getParentEdgeAt(0)->getMemory().getShape(); size_t inRank = inShape.getRank(); std::vector nonZeroCounts = getNonZeroElementsCount(src, inShape); std::vector destIndices(nonZeroCounts.size()); @@ -148,7 +148,7 @@ void NonZero::executeSpecified() { VectorDims newDims{inRank, totalNonZeroCount}; redefineOutputMemory({newDims}); } - int* dst = reinterpret_cast(dstMemPtr->GetPtr()); + int* dst = reinterpret_cast(dstMemPtr->getData()); if (totalNonZeroCount == 0) return; @@ -365,7 +365,7 @@ void NonZero::executeSpecified() { } default: { size_t inSize = inShape.getElementsCount(); - auto srcStrides = getParentEdgeAt(0)->getMemory().GetDescWithType()->getStrides(); + auto srcStrides = getParentEdgeAt(0)->getMemory().getDescWithType()->getStrides(); parallel_nt(threadsCount, [&](int ithr, int nthr) { size_t& colIndex = destIndices[ithr]; diff --git a/src/plugins/intel_cpu/src/nodes/normalize.cpp b/src/plugins/intel_cpu/src/nodes/normalize.cpp index f8025c90d238af..fd25752af59af3 100644 --- a/src/plugins/intel_cpu/src/nodes/normalize.cpp +++ b/src/plugins/intel_cpu/src/nodes/normalize.cpp @@ -874,8 +874,8 @@ void NormalizeL2::setPostOps(dnnl::primitive_attr& kernel_attrs, const VectorDim } void NormalizeL2::createPrimitive() { - auto& dstMemPtr = getChildEdgeAt(DATA)->getMemoryPtr(); - auto& srcMemPtr = getParentEdgeAt(DATA)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(DATA)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(DATA)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) THROW_ERROR << "can't get destination memory"; if (!srcMemPtr || !srcMemPtr->isAllocated()) @@ -937,8 +937,8 @@ void NormalizeL2::execute(dnnl::stream strm) { if (!execPtr) THROW_ERROR << "doesn't have a compiled executor."; - const uint8_t *src_ptr = reinterpret_cast(getParentEdgeAt(DATA)->getMemoryPtr()->GetPtr()); - uint8_t *dst_ptr = reinterpret_cast(getChildEdgeAt(DATA)->getMemoryPtr()->GetPtr()); + const uint8_t *src_ptr = reinterpret_cast(getParentEdgeAt(DATA)->getMemoryPtr()->getData()); + uint8_t *dst_ptr = reinterpret_cast(getChildEdgeAt(DATA)->getMemoryPtr()->getData()); execPtr->exec(src_ptr, dst_ptr, postOpsDataPtrs.data()); } diff --git a/src/plugins/intel_cpu/src/nodes/one_hot.cpp b/src/plugins/intel_cpu/src/nodes/one_hot.cpp index a3ee3b715b27df..5eefbf0131324c 100644 --- a/src/plugins/intel_cpu/src/nodes/one_hot.cpp +++ b/src/plugins/intel_cpu/src/nodes/one_hot.cpp @@ -33,7 +33,7 @@ class OneHotShapeInfer : public ShapeInferEmptyPads { Result infer( const std::vector>& input_shapes, const std::unordered_map& data_dependency) override { - auto depth = reinterpret_cast(data_dependency.at(1)->GetPtr())[0]; + auto depth = reinterpret_cast(data_dependency.at(1)->getData())[0]; auto result = input_shapes.front().get(); result.insert(result.begin() + m_axis, depth); @@ -132,7 +132,7 @@ OneHot::OneHot(const std::shared_ptr& op, const GraphContext::CPtr } bool OneHot::needShapeInfer() const { - const auto depthNodePtr = reinterpret_cast(getParentEdgesAtPort(1)[0]->getMemoryPtr()->GetPtr()); + const auto depthNodePtr = reinterpret_cast(getParentEdgesAtPort(1)[0]->getMemoryPtr()->getData()); if (depth != static_cast(depthNodePtr[0])) { depth = depthNodePtr[0]; return true; @@ -162,11 +162,11 @@ void OneHot::initSupportedPrimitiveDescriptors() { template void OneHot::one_hot(size_t prefix_size, size_t suffix_size) { - const auto *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - auto *dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); + auto *dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->getData()); - const out_type on_value = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetPtr())[0]; - const out_type off_value = reinterpret_cast(getParentEdgeAt(3)->getMemoryPtr()->GetPtr())[0]; + const out_type on_value = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->getData())[0]; + const out_type off_value = reinterpret_cast(getParentEdgeAt(3)->getMemoryPtr()->getData())[0]; // fill the output with off_value std::size_t dst_size = prefix_size * depth * suffix_size; @@ -198,7 +198,7 @@ void OneHot::execute(dnnl::stream strm) { for (size_t i = 0; i < actual_axis; ++i) prefix_size *= input_dims[i]; - std::size_t suffix_size = getParentEdgeAt(0)->getMemory().GetShape().getElementsCount() / prefix_size; + std::size_t suffix_size = getParentEdgeAt(0)->getMemory().getShape().getElementsCount() / prefix_size; OneHotContext ctx = {this, prefix_size, suffix_size}; OV_SWITCH(intel_cpu, OneHotExecute, ctx, output_precision.size(), diff --git a/src/plugins/intel_cpu/src/nodes/pad.cpp b/src/plugins/intel_cpu/src/nodes/pad.cpp index 49af401e0fe6eb..9066c6a855b6cd 100644 --- a/src/plugins/intel_cpu/src/nodes/pad.cpp +++ b/src/plugins/intel_cpu/src/nodes/pad.cpp @@ -227,8 +227,8 @@ void Pad::PadExecutor::paramsInitialization(const PadAttrs& attrs, IE_THROW() << errorPrefix << "has not allocated source memory."; if (!srcMemPtr || !srcMemPtr->isAllocated()) IE_THROW() << errorPrefix << "has not allocated destination memory."; - const auto srcBlockMemDesc = srcMemPtr->GetDescWithType(); - const auto dstBlockMemDesc = dstMemPtr->GetDescWithType(); + const auto srcBlockMemDesc = srcMemPtr->getDescWithType(); + const auto dstBlockMemDesc = dstMemPtr->getDescWithType(); const auto& srcDims = srcBlockMemDesc->getBlockDims(); const auto& dstDims = dstBlockMemDesc->getBlockDims(); @@ -239,7 +239,7 @@ void Pad::PadExecutor::paramsInitialization(const PadAttrs& attrs, auto fillingInParameters = [&](VectorIdxs& parameter, const size_t type, const size_t size, const int value) { - const int* ptr = reinterpret_cast(srcMemory[type]->GetPtr()); + const int* ptr = reinterpret_cast(srcMemory[type]->getData()); parameter.resize(size); for (size_t i = 0; i < size; i++) { parameter[i] = static_cast(ptr[i]); @@ -251,7 +251,7 @@ void Pad::PadExecutor::paramsInitialization(const PadAttrs& attrs, if (params.attrs.padsEnd.empty()) fillingInParameters(params.attrs.padsEnd, PADS_END_ID, srcDims.size(), 0); if (!params.attrs.constPadValue) - params.attrs.padValue = reinterpret_cast(srcMemory[PAD_VALUE_ID]->GetPtr())[0]; + params.attrs.padValue = reinterpret_cast(srcMemory[PAD_VALUE_ID]->getData())[0]; // pads are constant, so we can calculate new collapsing pads for first target dimensions and use it for the next // dimensions to avoid permanent identical pad calculations const size_t blockSize = srcMemPtr->getDesc().hasLayoutType(LayoutType::nCsp16c) @@ -369,7 +369,7 @@ void Pad::PadExecutor::innerParamsInitialization() { std::min(params.attrs.padsEnd[params.nDimsForWork], 0)) * params.shift; } -void Pad::PadExecutor::exec(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { +void Pad::PadExecutor::exec(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr) { if (zeroInputDimsCase) { padConstant(srcMemPtr, dstMemPtr); } else { @@ -419,7 +419,7 @@ static inline void parallel_step(size_t nDims, const VectorDims& dims, std::vect } } -void Pad::PadExecutor::padConstant(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { +void Pad::PadExecutor::padConstant(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr) { if (params.attrs.padValue == 0 && !zeroInputDimsCase) { padConstantZero(srcMemPtr, dstMemPtr); return; @@ -439,11 +439,11 @@ void Pad::PadExecutor::padConstant(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { } template -void Pad::PadExecutor::padConstantCommon(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { - T* dstData = reinterpret_cast(dstMemPtr->GetPtr()); +void Pad::PadExecutor::padConstantCommon(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr) { + T* dstData = reinterpret_cast(dstMemPtr->getData()); const T value = static_cast(params.attrs.padValue); if (zeroInputDimsCase) { - const auto workAmount = dstMemPtr->GetDescWithType()->getPaddedElementsCount(); + const auto workAmount = dstMemPtr->getDescWithType()->getPaddedElementsCount(); parallel_for(workAmount, [&](size_t i) { dstData[i] = value; }); @@ -451,7 +451,7 @@ void Pad::PadExecutor::padConstantCommon(MemoryPtr& srcMemPtr, MemoryPtr& dstMem return; } - const T* srcData = reinterpret_cast(srcMemPtr->GetPtr()); + const T* srcData = reinterpret_cast(srcMemPtr->getData()); parallel_nt(params.nThreads, [&](const int ithr, const int nthr) { size_t start = 0, end = 0; @@ -488,9 +488,9 @@ void Pad::PadExecutor::padConstantCommon(MemoryPtr& srcMemPtr, MemoryPtr& dstMem }); } -void Pad::PadExecutor::padConstantZero(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { - const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetPtr()); - uint8_t* dstData = reinterpret_cast(dstMemPtr->GetPtr()); +void Pad::PadExecutor::padConstantZero(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr) { + const uint8_t* srcData = reinterpret_cast(srcMemPtr->getData()); + uint8_t* dstData = reinterpret_cast(dstMemPtr->getData()); parallel_nt(params.nThreads, [&](const int ithr, const int nthr) { size_t start = 0, end = 0; @@ -529,9 +529,9 @@ void Pad::PadExecutor::padConstantZero(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPt }); } -void Pad::PadExecutor::padEdge(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { - const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetPtr()); - uint8_t* dstData = reinterpret_cast(dstMemPtr->GetPtr()); +void Pad::PadExecutor::padEdge(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr) { + const uint8_t* srcData = reinterpret_cast(srcMemPtr->getData()); + uint8_t* dstData = reinterpret_cast(dstMemPtr->getData()); parallel_nt(params.nThreads, [&](const int ithr, const int nthr) { size_t start = 0, end = 0; @@ -570,9 +570,9 @@ void Pad::PadExecutor::padEdge(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { }); } -void Pad::PadExecutor::padReflectOrSymmetric(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr, const bool isSymmetric) { - const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetPtr()); - uint8_t* dstData = reinterpret_cast(dstMemPtr->GetPtr()); +void Pad::PadExecutor::padReflectOrSymmetric(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr, const bool isSymmetric) { + const uint8_t* srcData = reinterpret_cast(srcMemPtr->getData()); + uint8_t* dstData = reinterpret_cast(dstMemPtr->getData()); const size_t shift = isSymmetric ? 1 : 0; const size_t endSrcShift = (params.srcDimsForReflectOrSymmetric[params.nDimsForWork] - params.srcODims[params.nDimsForWork]) * params.shift; diff --git a/src/plugins/intel_cpu/src/nodes/pad.h b/src/plugins/intel_cpu/src/nodes/pad.h index d19fa9c1f30d3d..5f670643d19499 100644 --- a/src/plugins/intel_cpu/src/nodes/pad.h +++ b/src/plugins/intel_cpu/src/nodes/pad.h @@ -57,15 +57,15 @@ class Pad : public Node { const std::vector& srcMemory, const std::vector& dstMemory, const std::string& errorPrefix); - void exec(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr); + void exec(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr); ~PadExecutor() = default; private: - void padConstant(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr); - template void padConstantCommon(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr); - void padConstantZero(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr); - void padEdge(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr); - void padReflectOrSymmetric(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr, const bool isSymmetric = false); + void padConstant(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr); + template void padConstantCommon(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr); + void padConstantZero(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr); + void padEdge(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr); + void padReflectOrSymmetric(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr, const bool isSymmetric = false); void paramsInitialization(const PadAttrs& attrs, const std::vector& srcMemory, const std::vector& dstMemory); diff --git a/src/plugins/intel_cpu/src/nodes/pooling.cpp b/src/plugins/intel_cpu/src/nodes/pooling.cpp index 3a53a43df71d9e..1bd288697bef34 100644 --- a/src/plugins/intel_cpu/src/nodes/pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/pooling.cpp @@ -388,8 +388,8 @@ void Pooling::prepareParams() { } if (useACL) { - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << "Destination memory didn't allocate."; if (!srcMemPtr || !srcMemPtr->isAllocated()) @@ -410,8 +410,8 @@ void Pooling::prepareParams() { *attr); selected_pd->setImplementationType(execPtr->getImplType()); } else { - auto inDesc = getParentEdgesAtPort(0)[0]->getMemory().GetDescWithType(); - auto outDesc = getChildEdgesAtPort(0)[0]->getMemory().GetDescWithType(); + auto inDesc = getParentEdgesAtPort(0)[0]->getMemory().getDescWithType(); + auto outDesc = getChildEdgesAtPort(0)[0]->getMemory().getDescWithType(); if (isDynamicNode()) { if (poolingAttrs.auto_pad) { @@ -468,9 +468,9 @@ void Pooling::prepareParams() { } auto scratchpadMem = getScratchPadMem(dnnlExecPtr->getScratchPadDesc()); - primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive(); - primArgs[DNNL_ARG_SRC] = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); - primArgs[DNNL_ARG_DST] = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); + primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->getPrimitive(); + primArgs[DNNL_ARG_SRC] = getParentEdgesAtPort(0)[0]->getMemoryPtr()->getPrimitive(); + primArgs[DNNL_ARG_DST] = getChildEdgesAtPort(0)[0]->getMemoryPtr()->getPrimitive(); Node::appendPostOpArgs(*attr, primArgs, postOpsArgs); diff --git a/src/plugins/intel_cpu/src/nodes/priorbox.cpp b/src/plugins/intel_cpu/src/nodes/priorbox.cpp index 963d39cfe98c9d..f1832c37bd7348 100644 --- a/src/plugins/intel_cpu/src/nodes/priorbox.cpp +++ b/src/plugins/intel_cpu/src/nodes/priorbox.cpp @@ -33,7 +33,7 @@ class PriorBoxShapeInfer : public ShapeInferEmptyPads { Result infer( const std::vector>& input_shapes, const std::unordered_map& data_dependency) override { - const int* in_data = reinterpret_cast(data_dependency.at(0)->GetPtr()); + const int* in_data = reinterpret_cast(data_dependency.at(0)->getData()); const int H = in_data[0]; const int W = in_data[1]; const auto output = static_cast(4 * H * W * m_number_of_priors); @@ -152,13 +152,13 @@ PriorBox::PriorBox(const std::shared_ptr& op, const GraphContext:: } bool PriorBox::needShapeInfer() const { - auto& memory = getChildEdgeAt(0)->getMemoryPtr(); - if (memory->GetShape().isDynamic()) { + auto memory = getChildEdgeAt(0)->getMemoryPtr(); + if (memory->getShape().isDynamic()) { return true; } - const auto& outputShape = memory->GetShape().getStaticDims(); - const int* in_data = reinterpret_cast(memory->GetPtr()); + const auto& outputShape = memory->getShape().getStaticDims(); + const int* in_data = reinterpret_cast(memory->getData()); const int h = in_data[0]; const int w = in_data[1]; const auto output = static_cast(4 * h * w * number_of_priors); @@ -189,18 +189,18 @@ void PriorBox::createPrimitive() { } void PriorBox::execute(dnnl::stream strm) { - const int* in_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); + const int* in_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); const int H = in_data[0]; const int W = in_data[1]; - const int* in_image = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); + const int* in_image = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->getData()); const int IH = in_image[0]; const int IW = in_image[1]; const int OH = 4 * H * W * number_of_priors; const int OW = 1; - float* dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + float* dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->getData()); float step_ = step; auto min_size_ = min_size; diff --git a/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp b/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp index bcd24b2f23e602..2ab36190b85d13 100644 --- a/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp +++ b/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp @@ -32,7 +32,7 @@ class PriorBoxClusteredShapeInfer : public ShapeInferEmptyPads { Result infer( const std::vector>& input_shapes, const std::unordered_map& data_dependency) override { - const int* in_data = reinterpret_cast(data_dependency.at(0)->GetPtr()); + const int* in_data = reinterpret_cast(data_dependency.at(0)->getData()); const int H = in_data[0]; const int W = in_data[1]; const auto output = static_cast(4 * H * W * m_number_of_priors); @@ -106,13 +106,13 @@ PriorBoxClustered::PriorBoxClustered(const std::shared_ptr& op, co } bool PriorBoxClustered::needShapeInfer() const { - auto& memory = getChildEdgeAt(0)->getMemoryPtr(); - if (memory->GetShape().isDynamic()) { + auto memory = getChildEdgeAt(0)->getMemoryPtr(); + if (memory->getShape().isDynamic()) { return true; } - const auto& outputShape = memory->GetShape().getStaticDims(); - const int* in_data = reinterpret_cast(memory->GetPtr()); + const auto& outputShape = memory->getShape().getStaticDims(); + const int* in_data = reinterpret_cast(memory->getData()); const int h = in_data[0]; const int w = in_data[1]; const auto output = static_cast(4 * h * w * number_of_priors); @@ -143,11 +143,11 @@ void PriorBoxClustered::createPrimitive() { } void PriorBoxClustered::execute(dnnl::stream strm) { - const int* in_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); + const int* in_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); const int layer_height = in_data[0]; const int layer_width = in_data[1]; - const int* in_image = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); + const int* in_image = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->getData()); int img_height = in_image[0]; int img_width = in_image[1]; @@ -158,8 +158,8 @@ void PriorBoxClustered::execute(dnnl::stream strm) { step_h = static_cast(img_height) / layer_height; } - float* dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - const auto& out_shape = getChildEdgeAt(0)->getMemory().GetShape().getStaticDims(); + float* dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->getData()); + const auto& out_shape = getChildEdgeAt(0)->getMemory().getShape().getStaticDims(); size_t var_size = variances.size(); parallel_for2d(layer_height, layer_width, [&](int64_t h, int64_t w) { diff --git a/src/plugins/intel_cpu/src/nodes/proposal.cpp b/src/plugins/intel_cpu/src/nodes/proposal.cpp index 6f713d5d8ae1a3..c0c77977461e68 100644 --- a/src/plugins/intel_cpu/src/nodes/proposal.cpp +++ b/src/plugins/intel_cpu/src/nodes/proposal.cpp @@ -164,13 +164,13 @@ void Proposal::executeDynamicImpl(dnnl::stream strm) { void Proposal::execute(dnnl::stream strm) { try { - const float* probabilitiesData = reinterpret_cast(getParentEdgeAt(PROBABILITIES_IN_IDX)->getMemoryPtr()->GetPtr()); - const float* anchorsData = reinterpret_cast(getParentEdgeAt(ANCHORS_IN_IDX)->getMemoryPtr()->GetPtr()); - const float* imgInfoData = reinterpret_cast(getParentEdgeAt(IMG_INFO_IN_IDX)->getMemoryPtr()->GetPtr()); - float* outRoiData = reinterpret_cast (getChildEdgesAtPort(ROI_OUT_IDX)[0]->getMemoryPtr()->GetPtr()); + const float* probabilitiesData = reinterpret_cast(getParentEdgeAt(PROBABILITIES_IN_IDX)->getMemoryPtr()->getData()); + const float* anchorsData = reinterpret_cast(getParentEdgeAt(ANCHORS_IN_IDX)->getMemoryPtr()->getData()); + const float* imgInfoData = reinterpret_cast(getParentEdgeAt(IMG_INFO_IN_IDX)->getMemoryPtr()->getData()); + float* outRoiData = reinterpret_cast (getChildEdgesAtPort(ROI_OUT_IDX)[0]->getMemoryPtr()->getData()); float* outProbData = nullptr; if (store_prob) - outProbData = reinterpret_cast (getChildEdgesAtPort(PROBABILITIES_OUT_IDX)[0]->getMemoryPtr()->GetPtr()); + outProbData = reinterpret_cast (getChildEdgesAtPort(PROBABILITIES_OUT_IDX)[0]->getMemoryPtr()->getData()); auto inProbDims = getParentEdgeAt(0)->getMemory().getStaticDims(); const size_t imgInfoSize = getParentEdgeAt(2)->getMemory().getStaticDims()[0]; diff --git a/src/plugins/intel_cpu/src/nodes/psroi_pooling.cpp b/src/plugins/intel_cpu/src/nodes/psroi_pooling.cpp index ba96d219926f4c..8677428de8870b 100644 --- a/src/plugins/intel_cpu/src/nodes/psroi_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/psroi_pooling.cpp @@ -482,12 +482,12 @@ void PSROIPooling::executeBilinearDeformable(const inputType *srcData, outputTyp template void PSROIPooling::executeSpecified() { - const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - const auto *bottomRoisBeginning = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); - auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); + const auto *bottomRoisBeginning = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->getData()); + auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->getData()); - auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); - auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + auto srcDesc = getParentEdgeAt(0)->getMemory().getDescWithType(); + auto dstDesc = getChildEdgeAt(0)->getMemory().getDescWithType(); int realRois = 0; for (; realRois < nn; realRois++) { @@ -503,7 +503,7 @@ void PSROIPooling::executeSpecified() { int channelsEachClass = outputDim; if (!noTrans) { const auto mem = getParentEdgeAt(2)->getMemoryPtr(); - bottomTrans = reinterpret_cast(mem->GetPtr()); + bottomTrans = reinterpret_cast(mem->getData()); numClasses = static_cast(mem->getStaticDims()[1]) / 2; channelsEachClass /= numClasses; } diff --git a/src/plugins/intel_cpu/src/nodes/range.cpp b/src/plugins/intel_cpu/src/nodes/range.cpp index 7fd0e2a74f7bbd..c7b47e55449a21 100644 --- a/src/plugins/intel_cpu/src/nodes/range.cpp +++ b/src/plugins/intel_cpu/src/nodes/range.cpp @@ -118,9 +118,9 @@ size_t Range::getWorkAmount(data_t *startPtr, data_t *stopPtr, data_t *stepPtr) stopPtr = &limit; if (stepPtr == nullptr) stepPtr = δ - *startPtr = reinterpret_cast(getParentEdgeAt(RANGE_START)->getMemoryPtr()->GetPtr())[0]; - *stopPtr = reinterpret_cast(getParentEdgeAt(RANGE_LIMIT)->getMemoryPtr()->GetPtr())[0]; - *stepPtr = reinterpret_cast(getParentEdgeAt(RANGE_DELTA)->getMemoryPtr()->GetPtr())[0]; + *startPtr = reinterpret_cast(getParentEdgeAt(RANGE_START)->getMemoryPtr()->getData())[0]; + *stopPtr = reinterpret_cast(getParentEdgeAt(RANGE_LIMIT)->getMemoryPtr()->getData())[0]; + *stepPtr = reinterpret_cast(getParentEdgeAt(RANGE_DELTA)->getMemoryPtr()->getData())[0]; const data_t span = *stopPtr - *startPtr; const data_t step = *stepPtr; if (std::is_same::value) { @@ -140,7 +140,7 @@ InferenceEngine::StatusCode Range::rangeKernel() { VectorDims newOutputShape {work_amount_dst}; redefineOutputMemory({newOutputShape}); } - data_t* dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); + data_t* dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->getData()); parallel_nt(0, [&](const int ithr, const int nthr) { size_t iwork = 0, end = 0; splitter(work_amount_dst, nthr, ithr, iwork, end); diff --git a/src/plugins/intel_cpu/src/nodes/rdft.cpp b/src/plugins/intel_cpu/src/nodes/rdft.cpp index de8d493df71f5f..c16232953a57ff 100644 --- a/src/plugins/intel_cpu/src/nodes/rdft.cpp +++ b/src/plugins/intel_cpu/src/nodes/rdft.cpp @@ -157,13 +157,13 @@ void RDFT::execute(dnnl::stream strm) { const auto& inputShape = inputMem.getStaticDims(); const auto& outputShape = outputMem.getStaticDims(); - auto inputPtr = reinterpret_cast(inputMem.GetPtr()); - auto outputPtr = reinterpret_cast(outputMem.GetPtr()); + auto inputPtr = reinterpret_cast(inputMem.getData()); + auto outputPtr = reinterpret_cast(outputMem.getData()); auto rank = inputShape.size() - inverse; - const auto& inputStrides = inputMem.GetDescWithType()->getStrides(); - const auto& outputStrides = outputMem.GetDescWithType()->getStrides(); + const auto& inputStrides = inputMem.getDescWithType()->getStrides(); + const auto& outputStrides = outputMem.getDescWithType()->getStrides(); executor->execute(inputPtr, outputPtr, twiddles, rank, @@ -187,7 +187,7 @@ void RDFT::prepareParams() { if (axes.size() != newAxesSize) { axes.resize(newAxesSize); } - auto axesPtr = reinterpret_cast(axesMem->GetPtr()); + auto axesPtr = reinterpret_cast(axesMem->getData()); auto inputRank = inputShapes[DATA_INDEX].getRank() - inverse; for (size_t i = 0; i < axes.size(); i++) { axes[i] = axesPtr[i] < 0 ? axesPtr[i] + inputRank : axesPtr[i]; @@ -213,7 +213,7 @@ void RDFT::prepareParams() { if (signalSizes.size() != newSize) { signalSizes.resize(newSize); } - const auto& signalSizesPtr = reinterpret_cast(signalSizesMem->GetPtr()); + const auto& signalSizesPtr = reinterpret_cast(signalSizesMem->getData()); for (size_t i = 0; i < newSize; i++) { signalSizes[i] = signalSizesPtr[i]; } @@ -232,7 +232,7 @@ bool RDFT::axesChanged() const { if (axes.size() != axesMem->getStaticDims()[0]) { return true; } - auto axesPtr = reinterpret_cast(axesMem->GetPtr()); + auto axesPtr = reinterpret_cast(axesMem->getData()); auto inputRank = inputShapes[DATA_INDEX].getRank() - inverse; for (size_t i = 0; i < axes.size(); i++) { auto newAxis = axesPtr[i] < 0 ? axesPtr[i] + inputRank : axesPtr[i]; @@ -267,7 +267,7 @@ bool RDFT::signalSizesChanged() const { if (signalSizes.size() != newSize || signalSizes.size() != axes.size()) { return true; } - const auto& signalSizesPtr = reinterpret_cast(signalSizesMem->GetPtr()); + const auto& signalSizesPtr = reinterpret_cast(signalSizesMem->getData()); for (size_t i = 0; i < newSize; i++) { if (signalSizesPtr[i] != signalSizes[i]) { return true; diff --git a/src/plugins/intel_cpu/src/nodes/reduce.cpp b/src/plugins/intel_cpu/src/nodes/reduce.cpp index ce52820fd1b4ea..9c440c09ef426d 100644 --- a/src/plugins/intel_cpu/src/nodes/reduce.cpp +++ b/src/plugins/intel_cpu/src/nodes/reduce.cpp @@ -1942,9 +1942,9 @@ void Reduce::prepareParams() { reduce_axes = raw_axes; } - auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); const SizeVector &dst_dims = dstMemPtr->getDesc().getShape().getDims(); - dst_size = dstMemPtr->GetSize(); + dst_size = dstMemPtr->getSize(); calc_process_dst_dims(reduce_axes, dst_dims); if (jit_mode) { set_reduce_dim_flags(); @@ -1990,8 +1990,8 @@ void Reduce::createPrimitive() { if (!isExecutable()) { return; } - auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto &srcMemPtr = getParentEdgeAt(REDUCE_DATA)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(REDUCE_DATA)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << errorPrefix << " has not allocated destination memory."; if (!srcMemPtr || !srcMemPtr->isAllocated()) @@ -2083,11 +2083,11 @@ void Reduce::executeDynamicImpl(dnnl::stream strm) { } void Reduce::execute(dnnl::stream strm) { - auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto &srcMemPtr = getParentEdgeAt(REDUCE_DATA)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(REDUCE_DATA)->getMemoryPtr(); - const uint8_t *src_data = reinterpret_cast(srcMemPtr->GetPtr()); - uint8_t *dst_data = reinterpret_cast(dstMemPtr->GetPtr()); + const uint8_t *src_data = reinterpret_cast(srcMemPtr->getData()); + uint8_t *dst_data = reinterpret_cast(dstMemPtr->getData()); if (jit_mode) { if (is_hybrid_layout) { @@ -2130,8 +2130,8 @@ void Reduce::reduce_type(const uint8_t *in_ptr, uint8_t *out_ptr, size_t dst_siz if (is_hybrid_layout) { uint8_t *proc_ptr = out_ptr; - auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - out_ptr = reinterpret_cast(dstMemPtr->GetPtr()); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + out_ptr = reinterpret_cast(dstMemPtr->getData()); if (layout == ReduceLayoutType::reduce_nspc) { nspc2ncsp(proc_ptr, out_ptr); } else { @@ -2976,7 +2976,7 @@ void Reduce::reduce_ref_process(const float *in_ptr, float *out_ptr, float init_ reduced_dims_work_amount *= src_dims[i]; reduced_dims_work_amount /= work_amount_dst; - SizeVector src_strides = getParentEdgeAt(REDUCE_DATA)->getMemory().GetDescWithType()->getStrides(); + SizeVector src_strides = getParentEdgeAt(REDUCE_DATA)->getMemory().getDescWithType()->getStrides(); parallel_nt(0, [&](const int ithr, const int nthr) { int j; size_t i, start = 0, end = 0; diff --git a/src/plugins/intel_cpu/src/nodes/reference.cpp b/src/plugins/intel_cpu/src/nodes/reference.cpp index 2356a56ae37028..490610b4f0c822 100644 --- a/src/plugins/intel_cpu/src/nodes/reference.cpp +++ b/src/plugins/intel_cpu/src/nodes/reference.cpp @@ -102,11 +102,11 @@ void Reference::executeDynamicImpl(dnnl::stream strm) { for (size_t i = 0; i < outputShapes.size(); ++i) { auto memory = getChildEdgesAtPort(i)[0]->getMemoryPtr(); auto& tensor = outputs[i]; - if (memory->GetSize() != tensor.get_byte_size()) { + if (memory->getSize() != tensor.get_byte_size()) { IE_THROW(Unexpected) << "Output tensor data size mismatch occurred during the inference of a node with type " << getTypeStr() << " and name " << getName() << " on output port number " << i; } - cpu_memcpy(memory->GetData(), tensor.data(), tensor.get_byte_size()); + cpu_memcpy(memory->getData(), tensor.data(), tensor.get_byte_size()); } } } @@ -122,7 +122,7 @@ bool Reference::needShapeInfer() const { ov::TensorVector Reference::prepareInputs() const { ov::TensorVector inputs; for (size_t i = 0; i < inputShapes.size(); i++) { - void *srcDataPtr = getParentEdgesAtPort(i)[0]->getMemory().GetPtr(); + void *srcDataPtr = getParentEdgesAtPort(i)[0]->getMemory().getData(); inputs.push_back(ov::Tensor(ngraphOp->get_input_element_type(i), getParentEdgesAtPort(i)[0]->getMemory().getStaticDims(), srcDataPtr)); } @@ -132,7 +132,7 @@ ov::TensorVector Reference::prepareInputs() const { ov::TensorVector Reference::prepareOutputs() const { ov::TensorVector outputs; for (size_t i = 0; i < outputShapes.size(); i++) { - void *dstDataPtr = getChildEdgesAtPort(i)[0]->getMemory().GetPtr(); + void *dstDataPtr = getChildEdgesAtPort(i)[0]->getMemory().getData(); outputs.push_back(ov::Tensor(ngraphOp->get_output_element_type(i), getChildEdgesAtPort(i)[0]->getMemory().getStaticDims(), dstDataPtr)); } diff --git a/src/plugins/intel_cpu/src/nodes/region_yolo.cpp b/src/plugins/intel_cpu/src/nodes/region_yolo.cpp index 942d9eec06bb2c..97b3a911b99aa6 100644 --- a/src/plugins/intel_cpu/src/nodes/region_yolo.cpp +++ b/src/plugins/intel_cpu/src/nodes/region_yolo.cpp @@ -380,7 +380,7 @@ inline void RegionYolo::calculate_logistic(size_t start_index, int count, uint8_ } void RegionYolo::execute(dnnl::stream strm) { - const auto &inShape = getParentEdgeAt(0)->getMemory().GetShape(); + const auto &inShape = getParentEdgeAt(0)->getMemory().getShape(); const auto &inDims = inShape.getStaticDims(); size_t B = (inShape.getRank() > 0) ? inDims[0] : 1; size_t IC = (inShape.getRank() > 1) ? inDims[1] : 1; @@ -403,15 +403,15 @@ void RegionYolo::execute(dnnl::stream strm) { output_size = B * IH * IW * mask_size * (classes + coords + 1); } - if (output_size != getChildEdgeAt(0)->getMemoryPtr()->GetShape().getElementsCount()) + if (output_size != getChildEdgeAt(0)->getMemoryPtr()->getShape().getElementsCount()) IE_THROW() << "Incorrect layer configuration or output dimensions. " << output_size << " != " - << getChildEdgeAt(0)->getMemoryPtr()->GetShape().getElementsCount(); + << getChildEdgeAt(0)->getMemoryPtr()->getShape().getElementsCount(); size_t inputs_size = IH * IW * num_ * (classes + coords + 1); size_t total_size = 2 * IH * IW; - const auto *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - auto *dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); + auto *dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->getData()); cpu_convert(src_data, dst_data, getParentEdgeAt(0)->getMemory().getDesc().getPrecision(), getChildEdgeAt(0)->getMemory().getDesc().getPrecision(), output_size); diff --git a/src/plugins/intel_cpu/src/nodes/reorder.cpp b/src/plugins/intel_cpu/src/nodes/reorder.cpp index ef2d4ca4d67727..f8a9de782c2c09 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.cpp +++ b/src/plugins/intel_cpu/src/nodes/reorder.cpp @@ -120,8 +120,8 @@ void Reorder::prepareParams() { if (isOptimized) return; - auto &srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); - auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << "Destination memory didn't allocate."; if (!srcMemPtr || !srcMemPtr->isAllocated()) @@ -176,8 +176,8 @@ void Reorder::prepareParams() { if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; - createReorderPrimitive(srcMemPtr->GetDescWithType()->getDnnlDesc(), srcMemPtr->GetData(), - dstMemPtr->GetDescWithType()->getDnnlDesc(), dstMemPtr->GetData()); + createReorderPrimitive(srcMemPtr->getDescWithType()->getDnnlDesc(), srcMemPtr->getData(), + dstMemPtr->getDescWithType()->getDnnlDesc(), dstMemPtr->getData()); } } @@ -190,13 +190,11 @@ void Reorder::createReorderPrimitive(const dnnl::memory::desc& srcDesc, IE_THROW() << "Preferable primitive descriptor is not set."; const auto engine = getEngine(); - src_blocked = std::make_shared(engine); - src_blocked->Create(DnnlExtensionUtils::makeDescriptor(srcDesc), srcPtr, false); + src_blocked = std::make_shared(engine, DnnlExtensionUtils::makeDescriptor(srcDesc), srcPtr, false); - dst_blocked = std::make_shared(engine); - dst_blocked->Create(DnnlExtensionUtils::makeDescriptor(dstDesc), dstPtr, false); + dst_blocked = std::make_shared(engine, DnnlExtensionUtils::makeDescriptor(dstDesc), dstPtr, false); - auto src_desc = src_blocked->GetPrimitive().get_desc(); + auto src_desc = src_blocked->getPrimitive().get_desc(); if (!src_permutation.empty()) { // reorder requires exact matching of logical dimensions between src & dst // sometime we have to permute source's logical dimensions to satisfy @@ -206,7 +204,7 @@ void Reorder::createReorderPrimitive(const dnnl::memory::desc& srcDesc, src_desc = src_desc.permute_axes(src_permutation); } - auto dst_desc = dst_blocked->GetPrimitive().get_desc(); + auto dst_desc = dst_blocked->getPrimitive().get_desc(); // TODO: We should keep shape consistency for const and expected shape for node. // If it requires reshape operation it should explicitly injected into graph. @@ -220,16 +218,16 @@ void Reorder::createReorderPrimitive(const dnnl::memory::desc& srcDesc, // useful in situations when rank in IR does not much rank that is required by the oneDNN primitive, // but the input tensor can be reshaped (e.g. weights for grouped convolutions, biases etc.) if (src_blocked->getDesc().hasLayoutType(LayoutType::ncsp) && - src_blocked->GetShape().getRank() != dst_blocked->GetShape().getRank()) { + src_blocked->getShape().getRank() != dst_blocked->getShape().getRank()) { const auto newDims = dst_blocked->getStaticDims(); const auto newFormat = DnnlExtensionUtils::GetPlainFormatByRank(newDims.size()); auto newDesc = dnnl::memory::desc(DnnlExtensionUtils::convertToDnnlDims(newDims), - src_blocked->GetDataType(), + src_blocked->getDataType(), newFormat); - src_blocked->Create(DnnlExtensionUtils::makeDescriptor(newDesc), srcPtr, false); + src_blocked = std::make_shared(getEngine(), DnnlExtensionUtils::makeDescriptor(newDesc), srcPtr, false); - src_desc = src_blocked->GetPrimitive().get_desc(); + src_desc = src_blocked->getPrimitive().get_desc(); } auto result = getReorderPrim(context->getParamsCache(), getEngine(), src_desc, dst_desc); @@ -241,8 +239,8 @@ void Reorder::createReorderPrimitive(const dnnl::memory::desc& srcDesc, selectedPD->setImplementationType( parse_impl_name(DnnlExtensionUtils::query_impl_info_str(prim.get_primitive_desc()))); - auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); - auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); + auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->getPrimitive(); + auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->getPrimitive(); primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_DST, dst}}; #ifdef CPU_DEBUG_CAPS @@ -267,8 +265,8 @@ void Reorder::optimizedNcsp2Nspc() { auto parentEdge = getParentEdgeAt(0); auto childEdge = getChildEdgeAt(0); - auto inDims = parentEdge->getMemory().GetShape().getStaticDims(); - const auto dstStrides = childEdge->getMemoryPtr()->GetDescWithType()->getStrides(); + auto inDims = parentEdge->getMemory().getShape().getStaticDims(); + const auto dstStrides = childEdge->getMemoryPtr()->getDescWithType()->getStrides(); const size_t ndims = inDims.size(); const size_t DIM0 = inDims[0]; const size_t DIM1 = inDims[1]; @@ -276,8 +274,8 @@ void Reorder::optimizedNcsp2Nspc() { const size_t DIM3 = inDims[ndims - 2]; const size_t DIM4 = inDims[ndims - 1]; - auto src_data = reinterpret_cast(parentEdge->getMemoryPtr()->GetPtr()); - auto dst_data = reinterpret_cast(childEdge->getMemoryPtr()->GetPtr()); + auto src_data = reinterpret_cast(parentEdge->getMemoryPtr()->getData()); + auto dst_data = reinterpret_cast(childEdge->getMemoryPtr()->getData()); const size_t src_batch_stride = DIM1 * DIM2 * DIM3 * DIM4; const size_t dst_batch_stride = dstStrides[0]; @@ -301,7 +299,7 @@ void Reorder::optimizedNspc2Ncsp() { auto parentEdge = getParentEdgeAt(0); auto childEdge = getChildEdgeAt(0); - auto inDims = parentEdge->getMemory().GetShape().getStaticDims(); + auto inDims = parentEdge->getMemory().getShape().getStaticDims(); const size_t ndims = inDims.size(); const size_t DIM0 = inDims[0]; const size_t DIM1 = inDims[1]; @@ -309,10 +307,10 @@ void Reorder::optimizedNspc2Ncsp() { const size_t DIM3 = inDims[ndims - 2]; const size_t DIM4 = inDims[ndims - 1]; - auto src_data = reinterpret_cast(parentEdge->getMemoryPtr()->GetPtr()); - auto dst_data = reinterpret_cast(childEdge->getMemoryPtr()->GetPtr()); + auto src_data = reinterpret_cast(parentEdge->getMemoryPtr()->getData()); + auto dst_data = reinterpret_cast(childEdge->getMemoryPtr()->getData()); - const auto dstStrides = childEdge->getMemoryPtr()->GetDescWithType()->getStrides(); + const auto dstStrides = childEdge->getMemoryPtr()->getDescWithType()->getStrides(); const size_t block_size = DIM2 * DIM3 * DIM4; const size_t src_batch_stride = block_size * DIM1; const size_t dst_batch_stride = dstStrides[0]; @@ -330,8 +328,8 @@ void Reorder::optimizedNspc2Ncsp() { void Reorder::execute(dnnl::stream strm) { if (isOptimized) { DEBUG_LOG("#", getExecIndex(), " Reorder ", getName(), " is Optimized.", - " input @", getParentEdgeAt(0)->getMemory().GetData(), - " output @", getChildEdgeAt(0)->getMemory().GetData()); + " input @", getParentEdgeAt(0)->getMemory().getData(), + " output @", getChildEdgeAt(0)->getMemory().getData()); return; } @@ -340,8 +338,8 @@ void Reorder::execute(dnnl::stream strm) { } else if (canUseNcsp2Nspc) { optimizedNcsp2Nspc(); } else { - src_blocked->setDataHandle(getParentEdgeAt(0)->getMemory().GetData()); - dst_blocked->setDataHandle(getChildEdgeAt(0)->getMemory().GetData()); + // src_blocked->setDataHandle(getParentEdgeAt(0)->getMemory().GetData()); + // dst_blocked->setDataHandle(getChildEdgeAt(0)->getMemory().GetData()); if (prim) { prim.execute(strm, primArgs); @@ -366,47 +364,46 @@ std::string Reorder::getReorderArgs(const MemoryDesc &parentDesc, const MemoryDe return inArgs + "_" + outArgs; } -void Reorder::reorderData(const Memory &input, const Memory &output, MultiCachePtr cache) { +void Reorder::reorderData(const IMemory &input, const IMemory &output, MultiCachePtr cache) { if (!input.getDesc().isDefined() || !output.getDesc().isDefined()) IE_THROW() << "Can't reorder data with dynamic shapes"; - if (input.GetShape().hasZeroDims() || output.GetShape().hasZeroDims()) { + if (input.getShape().hasZeroDims() || output.getShape().hasZeroDims()) { return; } if (input.getDesc().isCompatible(output.getDesc())) { - auto srcPtr = static_cast(input.GetPtr()); - auto dstPtr = static_cast(output.GetPtr()); + auto srcPtr = static_cast(input.getData()); + auto dstPtr = static_cast(output.getData()); - auto copySize = output.GetSize(); + auto copySize = output.getSize(); cpu_memcpy(dstPtr, srcPtr, copySize); } else { dnnl::reorder reorder; std::vector tmpBuff; - auto srcMemory = input.GetPrimitive(); - auto dstMemory = output.GetPrimitive(); - auto engine = output.getEngine(); + auto srcMemory = input.getPrimitive(); + auto dstMemory = output.getPrimitive(); + auto engine = dstMemory.get_engine(); // try directly reorder reorder = getReorderPrim(cache, dstMemory.get_engine(), srcMemory.get_desc(), dstMemory.get_desc()); if (!reorder) { // try precision conversion then do the reorder - if (output.GetDataType() != input.GetDataType() && Convert::isSupportedDesc(input.getDesc()) && + if (output.getDataType() != input.getDataType() && Convert::isSupportedDesc(input.getDesc()) && Convert::isSupportedDesc(output.getDesc())) { //we probably could not make the reorder because there is no one supporting this precision conversion //lets try to convert data first using cpu_convert - auto data = static_cast(input.GetPtr()); - tmpBuff.resize(input.GetSize()); + auto data = static_cast(input.getData()); + tmpBuff.resize(input.getSize()); - const auto outPrc = DnnlExtensionUtils::DataTypeToIEPrecision(output.GetDataType()); - cpu_convert(data, tmpBuff.data(), DnnlExtensionUtils::DataTypeToIEPrecision(input.GetDataType()), - outPrc, input.GetSize() / input.getDesc().getPrecision().size()); + const auto outPrc = DnnlExtensionUtils::DataTypeToIEPrecision(output.getDataType()); + cpu_convert(data, tmpBuff.data(), DnnlExtensionUtils::DataTypeToIEPrecision(input.getDataType()), + outPrc, input.getSize() / input.getDesc().getPrecision().size()); - Memory tmpMem(engine); auto tmpDesc = input.getDesc().cloneWithNewPrecision(outPrc); - tmpMem.Create(std::move(tmpDesc), tmpBuff.data()); + Memory tmpMem(engine, std::move(tmpDesc), tmpBuff.data()); - srcMemory = tmpMem.GetPrimitive(); + srcMemory = tmpMem.getPrimitive(); reorder = getReorderPrim(cache, dstMemory.get_engine(), srcMemory.get_desc(), dstMemory.get_desc()); } if (!reorder) { diff --git a/src/plugins/intel_cpu/src/nodes/reorder.h b/src/plugins/intel_cpu/src/nodes/reorder.h index f9517912855366..ef8e508fa08123 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.h +++ b/src/plugins/intel_cpu/src/nodes/reorder.h @@ -61,7 +61,7 @@ class Reorder : public Node { static std::string getReorderArgs(const MemoryDesc &parentDesc, const MemoryDesc &childDesc); - static void reorderData(const Memory &input, const Memory &output, MultiCachePtr cache = nullptr); + static void reorderData(const IMemory &input, const IMemory &output, MultiCachePtr cache = nullptr); private: dnnl::reorder::primitive prim; diff --git a/src/plugins/intel_cpu/src/nodes/reorg_yolo.cpp b/src/plugins/intel_cpu/src/nodes/reorg_yolo.cpp index c331d7b0adb64c..153a789ae88d33 100644 --- a/src/plugins/intel_cpu/src/nodes/reorg_yolo.cpp +++ b/src/plugins/intel_cpu/src/nodes/reorg_yolo.cpp @@ -59,8 +59,8 @@ void ReorgYolo::executeDynamicImpl(dnnl::stream strm) { } void ReorgYolo::execute(dnnl::stream strm) { - const auto *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - auto *dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); + const auto *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); + auto *dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->getData()); const auto &inDims = getParentEdgeAt(0)->getMemory().getStaticDims(); int IW = (inDims.size() > 3) ? inDims[3] : 1; diff --git a/src/plugins/intel_cpu/src/nodes/reshape.cpp b/src/plugins/intel_cpu/src/nodes/reshape.cpp index 095bbfcb6614fc..58b59b0dbfa2ab 100644 --- a/src/plugins/intel_cpu/src/nodes/reshape.cpp +++ b/src/plugins/intel_cpu/src/nodes/reshape.cpp @@ -46,7 +46,7 @@ class ReshapeShapeInfer : public ShapeInferEmptyPads { const auto& inputShape = input_shapes[RESHAPE_SRC].get(); const size_t inputShapeSize = inputShape.size(); const auto memPtr = data_dependency.at(RESHAPE_PATTERN); - const auto data = memPtr->GetPtr(); + const auto data = memPtr->getData(); const auto& dims = memPtr->getStaticDims(); const auto outputPatternSize = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies()); std::vector outPattern = ov::get_raw_data_as( @@ -109,7 +109,7 @@ class SqueezeShapeInfer : public ShapeInferEmptyPads { outputShape.reserve(inputShapeSize); if (itr != data_dependency.end()) { const auto memPtr = data_dependency.at(SQUEEZE_PATTERN); - const auto data = memPtr->GetPtr(); + const auto data = memPtr->getData(); const auto& dims = memPtr->getStaticDims(); const auto outputPatternSize = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies()); std::vector outPattern = ov::get_raw_data_as( @@ -164,7 +164,7 @@ class UnsqueezeShapeInfer : public ShapeInferEmptyPads { const auto& inputShape = input_shapes[UNSQUEEZE_SRC].get(); const size_t inputShapeSize = inputShape.size(); const auto memPtr = data_dependency.at(UNSQUEEZE_PATTERN); - const auto data = memPtr->GetPtr(); + const auto data = memPtr->getData(); const auto& dims = memPtr->getStaticDims(); const auto outputPatternSize = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies()); std::vector outPattern = ov::get_raw_data_as( @@ -264,7 +264,7 @@ bool Reshape::needShapeInfer() const { if (lastSecondInputValues.empty()) { lastSecondInputValues.resize(mem.getStaticDims()[0], 0); } - const int32_t *sndInput = reinterpret_cast(mem.GetPtr()); + const int32_t *sndInput = reinterpret_cast(mem.getData()); for (size_t i = 0; i < lastSecondInputValues.size(); i++) { if (lastSecondInputValues[i] != sndInput[i]) { for (size_t i = 0; i < lastSecondInputValues.size(); i++) { @@ -306,7 +306,7 @@ void Reshape::initSupportedPrimitiveDescriptors() { config.inConfs.resize(getParentEdges().size()); auto& creatorsMap = BlockedDescCreator::getCommonCreators(); for (size_t i = 0; i < getParentEdges().size(); i++) { - config.inConfs[i].inPlace(-1); + config.inConfs[i].inPlace(0 == i && canBeInPlace ? 0 : -1); config.inConfs[i].constant(false); config.inConfs[i].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc((i > 0 ? secondInPrc : inPrec), getInputShapeAtPort(i))); } @@ -322,20 +322,26 @@ void Reshape::executeDynamicImpl(dnnl::stream strm) { } void Reshape::execute(dnnl::stream strm) { - auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto srcPtr = static_cast(srcMemPtr->GetPtr()); - auto dstPtr = static_cast(dstMemPtr->GetPtr()); + auto srcPtr = static_cast(srcMemPtr->getData()); + auto dstPtr = static_cast(dstMemPtr->getData()); if (dstPtr != srcPtr) { - cpu_memcpy(dstPtr, srcPtr, dstMemPtr->GetSize()); + cpu_memcpy(dstPtr, srcPtr, dstMemPtr->getSize()); } } bool Reshape::isExecutable() const { - bool inPlaceEnabled = - getSelectedPrimitiveDescriptor() && getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].inPlace() >= 0; + bool inPlaceEnabled = false; + if (auto prim_desc = getSelectedPrimitiveDescriptor()) { + auto& config = prim_desc->getConfig(); + if (config.inConfs[0].inPlace() >= 0 || + config.outConfs[0].inPlace() >= 0) { + inPlaceEnabled = true; + } + } return !inPlaceEnabled; } diff --git a/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp b/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp index 6606ab7505872d..a948f940c355e3 100644 --- a/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp +++ b/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp @@ -127,11 +127,11 @@ ReverseSequence::ReverseSequenceExecutor::ReverseSequenceExecutor(const VectorDi } template -void ReverseSequence::ReverseSequenceExecutor::exec(const MemoryPtr& dataMemPtr, const MemoryPtr& seqLengthsMemPtr, MemoryPtr& dstMemPtr) { +void ReverseSequence::ReverseSequenceExecutor::exec(const MemoryPtr& dataMemPtr, const MemoryPtr& seqLengthsMemPtr, const MemoryPtr& dstMemPtr) { const VectorDims& srcDims = dataMemPtr->getStaticDims(); - const auto *srcData = reinterpret_cast(dataMemPtr->GetPtr()); - auto *dstData = reinterpret_cast(dstMemPtr->GetPtr()); - auto *seqLengthsData = reinterpret_cast(seqLengthsMemPtr->GetPtr()); + const auto *srcData = reinterpret_cast(dataMemPtr->getData()); + auto *dstData = reinterpret_cast(dstMemPtr->getData()); + auto *seqLengthsData = reinterpret_cast(seqLengthsMemPtr->getData()); for (size_t i = 0; i < srcDims[batchAxis]; ++i) { if (static_cast(seqLengthsData[i]) > static_cast(srcDims[seqAxis])) { diff --git a/src/plugins/intel_cpu/src/nodes/reverse_sequence.h b/src/plugins/intel_cpu/src/nodes/reverse_sequence.h index f1e7d67a999616..55fd457c0a688e 100644 --- a/src/plugins/intel_cpu/src/nodes/reverse_sequence.h +++ b/src/plugins/intel_cpu/src/nodes/reverse_sequence.h @@ -34,7 +34,7 @@ class ReverseSequence : public Node { ~ReverseSequenceExecutor() = default; template - void exec(const MemoryPtr& dataMemPtr, const MemoryPtr& seqLengthsMemPtr, MemoryPtr& dstMemPtr); + void exec(const MemoryPtr& dataMemPtr, const MemoryPtr& seqLengthsMemPtr, const MemoryPtr& dstMemPtr); private: const int batchAxis; diff --git a/src/plugins/intel_cpu/src/nodes/rnn.cpp b/src/plugins/intel_cpu/src/nodes/rnn.cpp index 43b2e76c9a7ddc..004cccf763e90c 100644 --- a/src/plugins/intel_cpu/src/nodes/rnn.cpp +++ b/src/plugins/intel_cpu/src/nodes/rnn.cpp @@ -715,8 +715,8 @@ void RNN::fillWeights(const int *gate_map, const size_t wIdx, const size_t rIdx) auto ie_w_ptr = ie_w_vec.data(); auto ie_r_ptr = ie_r_vec.data(); - cpu_convert(wConstBlob->GetPtr(), ie_w_ptr, weightPrec, targetWeightPrec, ie_w_vec_size); - cpu_convert(rConstBlob->GetPtr(), ie_r_ptr, weightPrec, targetWeightPrec, ie_r_vec_size); + cpu_convert(wConstBlob->getData(), ie_w_ptr, weightPrec, targetWeightPrec, ie_w_vec_size); + cpu_convert(rConstBlob->getData(), ie_r_ptr, weightPrec, targetWeightPrec, ie_r_vec_size); const int step = SC * G; @@ -760,12 +760,12 @@ void RNN::fillBiases(const int *gate_map) { auto *constInputNode = dynamic_cast(getParentEdgesAtPort(bIdx)[0]->getParent().get()); auto constBlob = constInputNode->getMemoryPtr(); - auto const elementsCount = constBlob->GetSize() / constBlob->getDesc().getPrecision().size(); + auto const elementsCount = constBlob->getSize() / constBlob->getDesc().getPrecision().size(); std::vector ie_b_vec(elementsCount); - cpu_convert(constBlob->GetPtr(), + cpu_convert(constBlob->getData(), &ie_b_vec[0], - DnnlExtensionUtils::DataTypeToIEPrecision(constBlob->GetDataType()), + DnnlExtensionUtils::DataTypeToIEPrecision(constBlob->getDataType()), Prec, elementsCount); @@ -1037,8 +1037,8 @@ void RNN::prepareParams() { THROW_ERROR << "has incorrect input size value in the first input."; auto dataMemPtr = getParentEdgesAtPort(0).front()->getMemoryPtr(); - const size_t B = dataMemPtr->GetShape().getStaticDims()[0]; - const size_t SL = is_cell ? 1lu : dataMemPtr->GetShape().getStaticDims()[1]; + const size_t B = dataMemPtr->getShape().getStaticDims()[0]; + const size_t SL = is_cell ? 1lu : dataMemPtr->getShape().getStaticDims()[1]; const Shape shapeS_4D{L, D, B, SC}; inDataDescs[0] = std::make_shared(Shape{SL, B, DC}, inDataTypes[xIdx], memory::format_tag::tnc); @@ -1104,23 +1104,23 @@ void RNN::prepareParams() { if (!primArgs.count(DNNL_ARG_WEIGHTS_LAYER) || !prevExecPtr || !execPtr->getWeightDesc()->isCompatible(*(prevExecPtr->getWeightDesc()))) { prepareMemory(execPtr->getWeightDesc(), 0); - primArgs[DNNL_ARG_WEIGHTS_LAYER] = internalBlobMemory[0]->GetPrimitive(); + primArgs[DNNL_ARG_WEIGHTS_LAYER] = internalBlobMemory[0]->getPrimitive(); } if (!primArgs.count(DNNL_ARG_WEIGHTS_ITER) || !prevExecPtr || !execPtr->getWeightIterDesc()->isCompatible(*(prevExecPtr->getWeightIterDesc()))) { prepareMemory(execPtr->getWeightIterDesc(), 1); - primArgs[DNNL_ARG_WEIGHTS_ITER] = internalBlobMemory[1]->GetPrimitive(); + primArgs[DNNL_ARG_WEIGHTS_ITER] = internalBlobMemory[1]->getPrimitive(); } if (!primArgs.count(DNNL_ARG_BIAS) || !prevExecPtr || !execPtr->getBiasDesc()->isCompatible(*(prevExecPtr->getBiasDesc()))) { prepareMemory(execPtr->getBiasDesc(), 2); - primArgs[DNNL_ARG_BIAS] = internalBlobMemory[2]->GetPrimitive(); + primArgs[DNNL_ARG_BIAS] = internalBlobMemory[2]->getPrimitive(); } auto scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); - primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive(); + primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->getPrimitive(); } std::shared_ptr RNN::getSrcMemDesc(const dnnl::primitive_desc& prim_desc, size_t idx) const { @@ -1142,28 +1142,28 @@ void RNN::execute(dnnl::stream strm) { auto args = primArgs; - args[DNNL_ARG_SRC_LAYER] = src_data_mem->GetPrimitive(); - args[DNNL_ARG_DST_LAYER] = dst_data_mem->GetPrimitive(); + args[DNNL_ARG_SRC_LAYER] = src_data_mem->getPrimitive(); + args[DNNL_ARG_DST_LAYER] = dst_data_mem->getPrimitive(); int state_i_tags[] {DNNL_ARG_SRC_ITER, DNNL_ARG_SRC_ITER_C}; int state_o_tags[] {DNNL_ARG_DST_ITER, DNNL_ARG_DST_ITER_C}; for (size_t s = 0; s < S; s++) { - args[state_i_tags[s]] = getParentEdgeAt(s+1)->getMemoryPtr()->GetPrimitive(); + args[state_i_tags[s]] = getParentEdgeAt(s+1)->getMemoryPtr()->getPrimitive(); } if (is_augru) { const auto atten_port = is_cell ? 5 : 6; - args[DNNL_ARG_AUGRU_ATTENTION] = getParentEdgeAt(atten_port)->getMemoryPtr()->GetPrimitive(); + args[DNNL_ARG_AUGRU_ATTENTION] = getParentEdgeAt(atten_port)->getMemoryPtr()->getPrimitive(); } if (is_cell) { for (size_t s = 0; s < S; s++) { - args[state_o_tags[s]] = getChildEdgesAtPort(s)[0]->getMemoryPtr()->GetPrimitive(); + args[state_o_tags[s]] = getChildEdgesAtPort(s)[0]->getMemoryPtr()->getPrimitive(); } } else { size_t n_ports_with_init_states = outputShapes.size() - 1; // first is a sequence data for (size_t s = 0; s < std::min(S, n_ports_with_init_states); s++) { if (s < outputShapes.size()) { - args[state_o_tags[s]] = getChildEdgesAtPort(s+1)[0]->getMemoryPtr()->GetPrimitive(); + args[state_o_tags[s]] = getChildEdgesAtPort(s+1)[0]->getMemoryPtr()->getPrimitive(); } } } diff --git a/src/plugins/intel_cpu/src/nodes/roi_align.cpp b/src/plugins/intel_cpu/src/nodes/roi_align.cpp index 02431e43ec953a..0a53da5f9d011f 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_align.cpp +++ b/src/plugins/intel_cpu/src/nodes/roi_align.cpp @@ -811,8 +811,8 @@ void ROIAlign::initSupportedPrimitiveDescriptors() { } void ROIAlign::createPrimitive() { - auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); if (!srcMemPtr || !srcMemPtr->isAllocated()) IE_THROW() << errorPrefix << " did not allocate input memory"; if (!dstMemPtr || !dstMemPtr->isAllocated()) @@ -847,8 +847,8 @@ struct ROIAlign::ROIAlignExecute { } }; void ROIAlign::execute(dnnl::stream strm) { - auto inputPrec = getParentEdgeAt(0)->getMemory().GetDataType(); - auto outputPrec = getChildEdgeAt(0)->getMemory().GetDataType(); + auto inputPrec = getParentEdgeAt(0)->getMemory().getDataType(); + auto outputPrec = getChildEdgeAt(0)->getMemory().getDataType(); if (!((inputPrec == dnnl_bf16 && outputPrec == dnnl_bf16) || (inputPrec == dnnl_f32 && outputPrec == dnnl_f32))) IE_THROW() <<"ROIAlign doesn't support demanded precisions"; @@ -868,15 +868,15 @@ void ROIAlign::executeSpecified() { auto &srcMemory1 = getParentEdgeAt(1)->getMemory(); auto &dstMemory = getChildEdgeAt(0)->getMemory(); - auto srcBlockDesc = srcMemory0.GetDescWithType(); - auto dstBlockDesc = dstMemory.GetDescWithType(); + auto srcBlockDesc = srcMemory0.getDescWithType(); + auto dstBlockDesc = dstMemory.getDescWithType(); auto isPlainFmt = srcBlockDesc->hasLayoutType(LayoutType::ncsp); - const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - const auto *srcRoi = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); - const auto *srcRoiIdx = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetPtr()); - auto *dst = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); + const auto *srcRoi = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->getData()); + const auto *srcRoiIdx = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->getData()); + auto *dst = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->getData()); auto nominalRoiCount = static_cast(srcMemory1.getStaticDims()[0]); int realRois = 0; diff --git a/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp b/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp index a91123fb2567c6..091cc56f0da46b 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp @@ -548,18 +548,18 @@ class ROIPooling::ROIPoolingJitExecutor : public ROIPooling::ROIPoolingExecutor } void exec( - const Memory& srcData, - const Memory& srcRoi, - const Memory& dst) override { + const IMemory& srcData, + const IMemory& srcRoi, + const IMemory& dst) override { if (!roi_pooling_kernel) IE_THROW() << "Could not execute. Kernel for RoiPooling node was not compiled."; - auto src_strides = srcData.GetDescWithType()->getStrides(); - auto src_roi_step = srcRoi.GetDescWithType()->getStrides()[0]; - auto dst_strides = dst.GetDescWithType()->getStrides(); - const auto* src_ptr = reinterpret_cast(srcData.GetPtr()); - const auto* roi_ptr = reinterpret_cast(srcRoi.GetPtr()); - auto* dst_ptr = reinterpret_cast(dst.GetPtr()); + auto src_strides = srcData.getDescWithType()->getStrides(); + auto src_roi_step = srcRoi.getDescWithType()->getStrides()[0]; + auto dst_strides = dst.getDescWithType()->getStrides(); + const auto* src_ptr = reinterpret_cast(srcData.getData()); + const auto* roi_ptr = reinterpret_cast(srcRoi.getData()); + auto* dst_ptr = reinterpret_cast(dst.getData()); executeOptimizedGeneric(src_ptr, roi_ptr, dst_ptr, src_strides, dst_strides, src_roi_step); } @@ -671,15 +671,15 @@ class ROIPooling::ROIPoolingRefExecutor : public ROIPooling::ROIPoolingExecutor public: ROIPoolingRefExecutor(const jit_roi_pooling_params &_jpp) : jpp(_jpp) {} void exec( - const Memory& srcData, - const Memory& srcRoi, - const Memory& dst) override { - auto src_strides = srcData.GetDescWithType()->getStrides(); - auto src_roi_step = srcRoi.GetDescWithType()->getStrides()[0]; - auto dst_strides = dst.GetDescWithType()->getStrides(); - const auto* src_ptr = reinterpret_cast(srcData.GetPtr()); - const auto* roi_ptr = reinterpret_cast(srcRoi.GetPtr()); - auto* dst_ptr = reinterpret_cast(dst.GetPtr()); + const IMemory& srcData, + const IMemory& srcRoi, + const IMemory& dst) override { + auto src_strides = srcData.getDescWithType()->getStrides(); + auto src_roi_step = srcRoi.getDescWithType()->getStrides()[0]; + auto dst_strides = dst.getDescWithType()->getStrides(); + const auto* src_ptr = reinterpret_cast(srcData.getData()); + const auto* roi_ptr = reinterpret_cast(srcRoi.getData()); + auto* dst_ptr = reinterpret_cast(dst.getData()); executeReference(src_ptr, roi_ptr, dst_ptr, src_strides, dst_strides, src_roi_step); } diff --git a/src/plugins/intel_cpu/src/nodes/roi_pooling.h b/src/plugins/intel_cpu/src/nodes/roi_pooling.h index 5135bdc3db929f..ee3d3b9852ddde 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_pooling.h +++ b/src/plugins/intel_cpu/src/nodes/roi_pooling.h @@ -93,9 +93,9 @@ class ROIPooling : public Node { public: ROIPoolingExecutor() = default; virtual void exec( - const ov::intel_cpu::Memory& srcData, - const ov::intel_cpu::Memory& srcRoi, - const ov::intel_cpu::Memory& dst) = 0; + const ov::intel_cpu::IMemory& srcData, + const ov::intel_cpu::IMemory& srcRoi, + const ov::intel_cpu::IMemory& dst) = 0; virtual ~ROIPoolingExecutor() = default; static std::shared_ptr createROIPoolingNewExecutor(const jit_roi_pooling_params& jpp); diff --git a/src/plugins/intel_cpu/src/nodes/roll.cpp b/src/plugins/intel_cpu/src/nodes/roll.cpp index f06e4c84d48cf0..3be87a6cbb23a3 100644 --- a/src/plugins/intel_cpu/src/nodes/roll.cpp +++ b/src/plugins/intel_cpu/src/nodes/roll.cpp @@ -177,11 +177,11 @@ Roll::RollExecutor::RollExecutor(const VectorDims& dataDims, const VectorDims& s template void Roll::RollExecutor::exec(const MemoryPtr& dataMemPtr, const MemoryPtr& shiftMemPtr, const MemoryPtr& axesMemPtr, - MemoryPtr& dstMemPtr) { - const auto *data = reinterpret_cast(dataMemPtr->GetPtr()); - const auto *shift = reinterpret_cast(shiftMemPtr->GetPtr()); - const auto *axes = reinterpret_cast(axesMemPtr->GetPtr()); - auto *dst = reinterpret_cast(dstMemPtr->GetPtr()); + const MemoryPtr& dstMemPtr) { + const auto *data = reinterpret_cast(dataMemPtr->getData()); + const auto *shift = reinterpret_cast(shiftMemPtr->getData()); + const auto *axes = reinterpret_cast(axesMemPtr->getData()); + auto *dst = reinterpret_cast(dstMemPtr->getData()); std::vector shiftsVector(numOfDims, 0ul); const VectorDims& dataDims = dataMemPtr->getStaticDims(); @@ -197,7 +197,7 @@ void Roll::RollExecutor::exec(const MemoryPtr& dataMemPtr, const MemoryPtr& shif const size_t rightBlockSize = blockSize - leftBlockSize; const size_t elementSize = sizeof(T); - const auto strides = dataMemPtr->GetDescWithType()->getStrides(); + const auto strides = dataMemPtr->getDescWithType()->getStrides(); const auto calculateShiftOffset = [](size_t dataOffset, size_t dimShift, size_t segmentSize, size_t dimSize){ size_t pos = dataOffset / segmentSize % dimSize; size_t shift = (pos + dimShift) % dimSize - pos; diff --git a/src/plugins/intel_cpu/src/nodes/roll.h b/src/plugins/intel_cpu/src/nodes/roll.h index 9e6fd6d508e426..dcaf32c144eecd 100644 --- a/src/plugins/intel_cpu/src/nodes/roll.h +++ b/src/plugins/intel_cpu/src/nodes/roll.h @@ -34,7 +34,7 @@ class Roll : public Node { template void exec(const MemoryPtr& dataMemPtr, const MemoryPtr& shiftMemPtr, const MemoryPtr& axesMemPtr, - MemoryPtr& dstMemPtr); + const MemoryPtr& dstMemPtr); private: const size_t numOfDims; diff --git a/src/plugins/intel_cpu/src/nodes/scatter_update.cpp b/src/plugins/intel_cpu/src/nodes/scatter_update.cpp index 7340bec357d296..58b21651defc6d 100644 --- a/src/plugins/intel_cpu/src/nodes/scatter_update.cpp +++ b/src/plugins/intel_cpu/src/nodes/scatter_update.cpp @@ -254,23 +254,23 @@ static std::vector getBlockND(const VectorDims& shape) { } void ScatterUpdate::execute(dnnl::stream strm) { - auto &srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); - auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto &indicesMemPtr = getParentEdgeAt(INDICES_ID)->getMemoryPtr(); - auto &updateMemPtr = getParentEdgeAt(UPDATE_ID)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto indicesMemPtr = getParentEdgeAt(INDICES_ID)->getMemoryPtr(); + auto updateMemPtr = getParentEdgeAt(UPDATE_ID)->getMemoryPtr(); - uint8_t *dstPtr = reinterpret_cast(dstMemPtr->GetPtr()); - uint8_t *srcPtr = reinterpret_cast(srcMemPtr->GetPtr()); - uint8_t *indicesPtr = reinterpret_cast(indicesMemPtr->GetPtr()); - uint8_t *updatePtr = reinterpret_cast(updateMemPtr->GetPtr()); + uint8_t *dstPtr = reinterpret_cast(dstMemPtr->getData()); + uint8_t *srcPtr = reinterpret_cast(srcMemPtr->getData()); + uint8_t *indicesPtr = reinterpret_cast(indicesMemPtr->getData()); + uint8_t *updatePtr = reinterpret_cast(updateMemPtr->getData()); const auto& srcDataDim = getParentEdgeAt(DATA_ID)->getMemory().getStaticDims(); const auto& indicesDim = getParentEdgeAt(INDICES_ID)->getMemory().getStaticDims(); size_t srcRank = srcDataDim.size(); int axis = 0; if (axisRelaxed) { - auto &axisMemPtr = getParentEdgeAt(AXIS_ID)->getMemoryPtr(); - uint8_t *axisPtr = reinterpret_cast(axisMemPtr->GetPtr()); + auto axisMemPtr = getParentEdgeAt(AXIS_ID)->getMemoryPtr(); + uint8_t *axisPtr = reinterpret_cast(axisMemPtr->getData()); if (axisSize == 4) { auto *axisPtr32 = reinterpret_cast(axisPtr); axis = *axisPtr32; diff --git a/src/plugins/intel_cpu/src/nodes/shapeof.cpp b/src/plugins/intel_cpu/src/nodes/shapeof.cpp index 6c4d76b41c7d74..e3b9a8bcc81640 100644 --- a/src/plugins/intel_cpu/src/nodes/shapeof.cpp +++ b/src/plugins/intel_cpu/src/nodes/shapeof.cpp @@ -100,7 +100,7 @@ void ShapeOf::execute(dnnl::stream strm) { if (outPtr->getStaticDims().size() != 1 || dimsCount != outPtr->getStaticDims()[0]) IE_THROW() << errorPrefix << "has inconsistent input shape and output size"; - auto *dst = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + auto *dst = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->getData()); for (size_t i = 0; i < dimsCount; i++) { dst[i] = inDims[i]; diff --git a/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp b/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp index ac02f1c8175321..84a74766875f63 100644 --- a/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp +++ b/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp @@ -125,8 +125,8 @@ void ShuffleChannels::initSupportedPrimitiveDescriptors() { } void ShuffleChannels::createPrimitive() { - auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto &srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) THROW_SHCH_ERROR << "has not allocated destination memory"; if (!srcMemPtr || !srcMemPtr->isAllocated()) @@ -149,12 +149,12 @@ void ShuffleChannels::createPrimitive() { } void ShuffleChannels::prepareParams() { - auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); auto builder = [](const ShuffleChannelsAttributes& key) -> std::shared_ptr { return std::make_shared(key); }; attrs.srcDims = srcMemPtr->getStaticDims(); - attrs.srcBlockedDims = srcMemPtr->GetDescWithType()->getBlockDims(); + attrs.srcBlockedDims = srcMemPtr->getDescWithType()->getBlockDims(); auto cache = context->getParamsCache(); auto result = cache->getOrCreate(attrs, builder); @@ -294,8 +294,8 @@ void ShuffleChannels::execute(dnnl::stream strm) { int MB = (attrs.axis != 0) ? getParentEdgeAt(0)->getMemoryPtr()->getStaticDims()[0] : -1; - const uint8_t* srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const uint8_t* srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); + uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->getData()); execPtr->exec(srcData, dstData, MB); } diff --git a/src/plugins/intel_cpu/src/nodes/softmax.cpp b/src/plugins/intel_cpu/src/nodes/softmax.cpp index 595b6182752420..6d9768492c0973 100644 --- a/src/plugins/intel_cpu/src/nodes/softmax.cpp +++ b/src/plugins/intel_cpu/src/nodes/softmax.cpp @@ -163,7 +163,7 @@ void SoftMax::createDescriptor(const std::vector &inputDesc, } void SoftMax::prepareParams() { - auto inpDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + auto inpDesc = getParentEdgeAt(0)->getMemory().getDescWithType(); const NodeDesc* selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) @@ -217,9 +217,9 @@ void SoftMax::prepareParams() { auto scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); - primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive(); - primArgs[DNNL_ARG_SRC] = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); - primArgs[DNNL_ARG_DST] = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); + primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->getPrimitive(); + primArgs[DNNL_ARG_SRC] = getParentEdgesAtPort(0)[0]->getMemoryPtr()->getPrimitive(); + primArgs[DNNL_ARG_DST] = getChildEdgesAtPort(0)[0]->getMemoryPtr()->getPrimitive(); #ifdef CPU_DEBUG_CAPS if (result.second == CacheEntryBase::LookUpStatus::Miss) { auto pd = execPtr->getPrimitiveDesc(); diff --git a/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp b/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp index 116d7ae61f6742..bd49e49557e6bd 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp +++ b/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp @@ -101,24 +101,24 @@ void SpaceToBatch::SpaceToBatchKernel() { const auto& srcMem = getParentEdgesAtPort(0)[0]->getMemoryPtr(); const auto& dstMem = getChildEdgesAtPort(0)[0]->getMemoryPtr(); - const auto *blockShapesPtr = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); - size_t dataRank = srcMem->GetShape().getRank(); + const auto *blockShapesPtr = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->getData()); + size_t dataRank = srcMem->getShape().getRank(); blockShapeIn.clear(); for (size_t i = 0; i < dataRank; i++) { blockShapeIn.push_back(*(blockShapesPtr + i)); } - const auto *padsBeginPtr = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetPtr()); + const auto *padsBeginPtr = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->getData()); padsBeginIn.clear(); for (size_t i = 0; i < dataRank; i++) { padsBeginIn.push_back(*(padsBeginPtr + i)); } - const auto *srcData = reinterpret_cast(srcMem->GetPtr()); - auto *dstData = reinterpret_cast(dstMem->GetPtr()); + const auto *srcData = reinterpret_cast(srcMem->getData()); + auto *dstData = reinterpret_cast(dstMem->getData()); - const int64_t srcLen = srcMem->GetSize() / sizeof(T); - const int64_t dstLen = dstMem->GetSize() / sizeof(T); + const int64_t srcLen = srcMem->getSize() / sizeof(T); + const int64_t dstLen = dstMem->getSize() / sizeof(T); const auto &inDims = srcMem->getStaticDims(); const auto &outDims = dstMem->getStaticDims(); @@ -140,10 +140,10 @@ void SpaceToBatch::SpaceToBatchKernel() { blockShape.erase(blockShape.begin() + 1); } - const auto outBlkDims = dstMem->GetDescWithType()->getBlockDims(); + const auto outBlkDims = dstMem->getDescWithType()->getBlockDims(); const int64_t blockSize = blocked ? outBlkDims.back() : 1lu; const int64_t blockCountInput = outBlkDims[1]; - const int64_t blockCountOutput = srcMem->GetDescWithType()->getBlockDims()[1]; + const int64_t blockCountOutput = srcMem->getDescWithType()->getBlockDims()[1]; const int64_t blockRemainder = inShape5D[1] % blockSize; const int64_t lastBlock = blockRemainder == 0 ? blockSize : blockRemainder; @@ -153,7 +153,7 @@ void SpaceToBatch::SpaceToBatchKernel() { const int64_t outSpatialStep = outShape5D[2] * outShape5D[3] * outShape5D[4]; const int64_t outBatchStep = (blocked ? blockSize * blockCountOutput : outShape5D[1]) * outSpatialStep; - memset(dstData, 0, dstMem->GetSize()); + memset(dstData, 0, dstMem->getSize()); int64_t channels = (inShape5D[1] / blockSize); channels = channels == 0 ? 1 : channels; diff --git a/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp b/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp index af22ab4b510e1c..ad6bc79c5f6a25 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp +++ b/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp @@ -164,8 +164,8 @@ void SpaceToDepth::initSupportedPrimitiveDescriptors() { } void SpaceToDepth::createPrimitive() { - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) THROW_ERROR << "has not allocated destination memory"; if (!srcMemPtr || !srcMemPtr->isAllocated()) @@ -190,9 +190,9 @@ void SpaceToDepth::createPrimitive() { void SpaceToDepth::prepareParams() { attrs.srcBlockedDims = - getParentEdgeAt(0)->getMemoryPtr()->GetDescWithType()->getBlockDims(); + getParentEdgeAt(0)->getMemoryPtr()->getDescWithType()->getBlockDims(); attrs.destBlockedDims = - getChildEdgeAt(0)->getMemoryPtr()->GetDescWithType()->getBlockDims(); + getChildEdgeAt(0)->getMemoryPtr()->getDescWithType()->getBlockDims(); auto builder = [](const SpaceToDepthAttrs& key) -> std::shared_ptr { return std::make_shared(key); }; @@ -313,8 +313,8 @@ void SpaceToDepth::execute(dnnl::stream strm) { if (!execPtr) { THROW_ERROR << "doesn't have a compiled executor."; } - const uint8_t* srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const uint8_t* srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); + uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->getData()); const int MB = getParentEdgeAt(0)->getMemoryPtr()->getStaticDims()[0]; execPtr->exec(srcData, dstData, MB); } diff --git a/src/plugins/intel_cpu/src/nodes/split.cpp b/src/plugins/intel_cpu/src/nodes/split.cpp index fe4cb99b753523..710abbfffba80f 100644 --- a/src/plugins/intel_cpu/src/nodes/split.cpp +++ b/src/plugins/intel_cpu/src/nodes/split.cpp @@ -12,6 +12,7 @@ #include "utils/general_utils.h" #include #include "utils/ngraph_utils.hpp" +#include #define THROW_ERROR IE_THROW() << "Split layer with name '" << getName() <<"' " @@ -99,12 +100,11 @@ void Split::initSupportedPrimitiveDescriptors() { InferenceEngine::Precision inpPrecision = getOriginalInputPrecisionAtPort(0); const auto axisPrecision = Precision::I32; - auto outPrecision = inpPrecision; // the split layer doesn't convert precisions // Set plain and tailC formats std::vector tdCreatorTypes{ LayoutType::ncsp, LayoutType::nspc }; - // Support channel blocked format + // Support channel blocked format only if we manipulate complete blocks if (srcShape.getRank() > 2) { for (auto item : { std::make_pair(8lu, LayoutType::nCsp8c), std::make_pair(16lu, LayoutType::nCsp16c) }) { const auto &blkDims = srcShape.getDims(); @@ -163,43 +163,16 @@ void Split::initSupportedPrimitiveDescriptors() { } } - // Optimized inplace case - // TODO [DS]: inplace - if (!isDynamicNode()) { + // in place only makes sense when we split by dense blocks since strided tensors are not supported by most nodes. + const auto& parentdDims = inputShapes[0].getDims(); + if (parentdDims[axis] != Shape::UNDEFINED_DIM && + std::all_of(parentdDims.begin(), parentdDims.begin() + axis, [](size_t dim) { return dim == 1; }) && + std::all_of(outputShapes.begin(), outputShapes.end(), [=](const Shape& shape){ return shape.getDims()[axis] != Shape::UNDEFINED_DIM; })) { for (auto refPdIndex : pdIndexesToReuse) { - const auto& refConfig = supportedPrimitiveDescriptors[refPdIndex].getConfig(); - auto config = refConfig; - const auto inBlockingDesc = refConfig.inConfs[0].getMemDesc()->as(); - const auto& order = inBlockingDesc->getOrder(); - const auto& blkDims = inBlockingDesc->getBlockDims(); - auto numOfDim = blkDims.size(); - - SizeVector offsets(numOfDim, 0lu); - SizeVector strides(numOfDim); - strides.back() = 1lu; - size_t offset = Shape::UNDEFINED_DIM; - BlockedMemoryDesc::CmpMask mask = BlockedMemoryDesc::SKIP_OFFSET_MASK; // accepts any offset - - for (size_t i = 2; i <= numOfDim; i++) { - if (numOfDim - i < axis) { - strides[numOfDim - i] = Shape::UNDEFINED_DIM; - mask.reset(numOfDim - i); // accepts any strides on axis - } else { - strides[numOfDim - i] = strides[numOfDim - i + 1] * blkDims[numOfDim - i + 1]; - } - } - - config.inConfs[0].setMemDesc(std::dynamic_pointer_cast(refConfig.inConfs[0].getMemDesc()), mask); - - for (size_t i = 0; i < outputShapes.size(); i++) { - auto outBlockingDesc = refConfig.outConfs[i].getMemDesc()->as(); - const auto& outBlkDims = outBlockingDesc->getBlockDims(); - const auto& shape = outBlockingDesc->getShape(); - const auto& dims = shape.getStaticDims(); + auto config = supportedPrimitiveDescriptors[refPdIndex].getConfig(); + for (size_t i = 0; i < config.outConfs.size(); i++) { config.outConfs[i].inPlace(0); - config.outConfs[i].setMemDesc(std::make_shared(outPrecision, Shape(dims), outBlkDims, order, offset, offsets, - shape.hasZeroDims() ? SizeVector(numOfDim, 0) : strides), mask); } supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } @@ -240,7 +213,7 @@ bool Split::needShapeInfer() const { if (curLengthsSize != splitLengths.size()) { return true; } - const int* curLengthsValues = reinterpret_cast(lengthsMemPtr->GetPtr()); + const int* curLengthsValues = reinterpret_cast(lengthsMemPtr->getData()); for (size_t i = 0; i < curLengthsSize; ++i) { if (curLengthsValues[i] != splitLengths[i]) { return true; @@ -251,7 +224,7 @@ bool Split::needShapeInfer() const { } bool Split::needPrepareParams() const { - if (isOptimized()) { + if (isInPlace()) { return false; } return needShapeInfer(); @@ -265,7 +238,7 @@ void Split::prepareParams() { if (!constSplitLengths) { const auto& splitLengthsPtr = getParentEdgeAt(2)->getMemoryPtr(); - const int* curSplitLengths = reinterpret_cast(splitLengthsPtr->GetPtr()); + const int* curSplitLengths = reinterpret_cast(splitLengthsPtr->getData()); const auto curLengthsSize = splitLengthsPtr->getStaticDims()[0]; splitLengths.assign(curSplitLengths, curSplitLengths + curLengthsSize); } @@ -278,29 +251,29 @@ void Split::prepareParams() { THROW_ERROR << "has not allocated destination memory"; } - if (outMemPtr->GetShape().hasZeroDims()) { + if (outMemPtr->getShape().hasZeroDims()) { continue; } dstMemPtrs.emplace_back(port, outMemPtr); if (!canUseOptimizedNspc2Ncsp) { - outDescs.push_back(outMemPtr->GetDescWithType()); + outDescs.push_back(outMemPtr->getDescWithType()); } } if (!canUseOptimizedNspc2Ncsp) { - const auto inDesc = srcMemPtr->GetDescWithType(); + const auto inDesc = srcMemPtr->getDescWithType(); execPtr = std::make_shared(inDesc, outDescs, axis); } } bool Split::isExecutable() const { - return !isInputTensorAtPortEmpty(0) && !isOptimized(); + return !isInPlace() && !isInputTensorAtPortEmpty(0); } void Split::execute(dnnl::stream strm) { - if (isOptimized()) { + if (isInPlace()) { return; } @@ -314,7 +287,7 @@ void Split::execute(dnnl::stream strm) { return; } - uint8_t* srcData = reinterpret_cast(srcMem.GetPtr()); + uint8_t* srcData = reinterpret_cast(srcMem.getData()); IE_ASSERT(execPtr != nullptr); execPtr->exec(srcData, getRawDstMemPtrs()); } @@ -323,67 +296,13 @@ bool Split::created() const { return getType() == Type::Split; } -bool Split::isOptimized() const { - return getSelectedPrimitiveDescriptor() && getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].inPlace() >= 0; -} - void Split::initOptimalPrimitiveDescriptor() { + Node::initOptimalPrimitiveDescriptor(); auto selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) THROW_ERROR << "Preferable primitive descriptor is not set."; - auto config = selected_pd->getConfig(); - - if (!isOptimized()) { - Node::initOptimalPrimitiveDescriptor(); - } else if (!isDynamicNode() && !isConfigDefined(config)) { - for (size_t i = 0; i < config.inConfs.size(); i++) { - int num = getParentEdgeAt(i)->getInputNum(); - if (getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()) { - if (num >= 0) { - const auto& parentConfig = getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num]; - if (!parentConfig.getMemDesc()->isDefined() && parentConfig.inPlace() >= 0) - getParentEdgeAt(i)->getParent()->initOptimalPrimitiveDescriptor(); - if (parentConfig.getMemDesc()->isDefined() && config.inConfs[i].getPortDesc()->isCompatible(*parentConfig.getPortDesc())) { - config.inConfs[i].setMemDesc(parentConfig.getMemDesc()); - continue; - } - } - } - - // reset mask - config.inConfs[i].setMemDesc(config.inConfs[i].getMemDesc()); - } - if (config.outConfs.size() != outputShapes.size()) - THROW_ERROR << "has invalid config"; - - auto firstInBlockingDesc = config.inConfs[0].getMemDesc()->as(); - size_t offset = 0; - for (size_t i = 0; i < outputShapes.size(); i++) { - auto oldDesc = config.outConfs[i].getMemDesc(); - auto outBlockingDesc = oldDesc->as(); - const auto& shape = outBlockingDesc->getShape(); - const auto& blkDims = outBlockingDesc->getBlockDims(); - config.outConfs[i].setMemDesc(std::make_shared( - outBlockingDesc->getPrecision(), - shape, - blkDims, - outBlockingDesc->getOrder(), - firstInBlockingDesc->getOffsetPadding() + offset, - firstInBlockingDesc->getOffsetPaddingToData(), - (shape.hasZeroDims() ? VectorDims(blkDims.size(), 0) : - firstInBlockingDesc->getStrides())), - BlockedMemoryDesc::FULL_MASK); - - size_t axisSize = 1; - for (size_t j = axis; j < outBlockingDesc->getBlockDims().size(); j++) { - axisSize *= outBlockingDesc->getBlockDims()[j]; - } - offset += axisSize; - } - initDescriptor(config); - } - config = selected_pd->getConfig(); + auto config = selected_pd->getConfig(); canUseOptimizedNspc2Ncsp = false; IE_ASSERT(config.inConfs.size() > 0); const auto inConfDesc = config.inConfs[0].getMemDesc(); @@ -494,7 +413,7 @@ void Split::selectOptimalPrimitiveDescriptor() { void Split::optimizedNspc2Ncsp(size_t MB) { auto parentEdge = getParentEdgeAt(0); - const int rank = parentEdge->getMemory().GetShape().getRank(); + const int rank = parentEdge->getMemory().getShape().getRank(); const auto parentDims = parentEdge->getMemory().getStaticDims(); const size_t IC = parentDims[1]; const size_t D = rank == 5 ? parentDims[rank - 3] : 1; @@ -502,7 +421,7 @@ void Split::optimizedNspc2Ncsp(size_t MB) { const size_t W = parentDims[rank - 1]; auto& srcMem = parentEdge->getMemory(); - auto srcData = reinterpret_cast(srcMem.GetData()); + auto srcData = reinterpret_cast(srcMem.getData()); const auto dataSize = srcMem.getDesc().getPrecision().size(); const size_t DHW = D*H*W; @@ -511,7 +430,7 @@ void Split::optimizedNspc2Ncsp(size_t MB) { const size_t strideOC = DHW * dataSize; for (size_t i = 0, sIdx = 0; i < dstMemPtrs.size(); i++) { - auto dstData = reinterpret_cast(dstMemPtrs[i].second->GetPtr()); + auto dstData = reinterpret_cast(dstMemPtrs[i].second->getData()); size_t innerSize = 1; auto dims = getChildEdgesAtPort(dstMemPtrs[i].first)[0]->getMemory().getStaticDims(); @@ -541,7 +460,7 @@ void Split::optimizedNspc2Ncsp(size_t MB) { std::vector Split::getRawDstMemPtrs() const { std::vector result(dstMemPtrs.size()); for (size_t i = 0; i < dstMemPtrs.size(); ++i) { - result[i] = reinterpret_cast(dstMemPtrs[i].second->GetPtr()); + result[i] = reinterpret_cast(dstMemPtrs[i].second->getData()); if (!result[i]) { THROW_ERROR << "can't get child edge indx " << dstMemPtrs[i].first << " data."; } @@ -604,6 +523,45 @@ void Split::SplitOptimizedExecutor::exec(const uint8_t* srcData, const std::vect }); } +void Split::resolveInPlaceEdges(Edge::LOOK look) { + if (!(look & Edge::LOOK_UP) || !isInPlace()) { + Node::resolveInPlaceEdges(look); + return; + } + auto selected_pd = getSelectedPrimitiveDescriptor(); + if (selected_pd == nullptr) + IE_THROW() << "Preferable primitive descriptor is not set."; + auto& config = selected_pd->getConfig(); + size_t numberOfOutputs = config.outConfs.size(); + size_t inplaceInpIndx = selected_pd->getConfig().outConfs[0].inPlace(); + auto baseDim = inputShapes.front().getDims()[axis]; + IE_ASSERT(baseDim != Shape::UNDEFINED_DIM) << " Split node: " << getName() << " can not use inPlace memory with splitting on dynamic dimension"; + auto baseMemMngr = getParentEdgesAtPort(inplaceInpIndx).front()->getMemory().getMemoryMngr(); + ptrdiff_t offset = 0; + for (size_t i = 0; i < numberOfOutputs; ++i) { + auto partDim = outputShapes[i].getDims()[axis]; + IE_ASSERT(partDim != Shape::UNDEFINED_DIM) << " Split node: " << getName() << " can not use inPlace memory with splitting on dynamic dimension"; + const auto& childEdges = getChildEdgesAtPort(i); + for (auto& childEdge : childEdges) { + IE_ASSERT(childEdge->getStatus() == Edge::Status::NotAllocated) << " Unexpected edge status in node: " << + getName() << " with type " << getTypeStr(); + + auto memDesc = selected_pd->getConfig().outConfs[i].getMemDesc(); + MemoryPtr newMem; + if (partDim != 0) { + auto memMngr = std::make_shared(baseMemMngr, baseDim, offset, partDim); + newMem = std::make_shared(getEngine(), memDesc, memMngr); + } else { + // empty tensor, no need to reference a part, default memory is enough + newMem = std::make_shared(getEngine(), memDesc); + } + + childEdge->reuse(newMem); + } + offset += partDim; + } +} + } // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/split.h b/src/plugins/intel_cpu/src/nodes/split.h index 449413439c3f7e..5402d748832d7d 100644 --- a/src/plugins/intel_cpu/src/nodes/split.h +++ b/src/plugins/intel_cpu/src/nodes/split.h @@ -23,7 +23,6 @@ class Split : public Node { void execute(dnnl::stream strm) override; bool created() const override; - bool isOptimized() const; void initOptimalPrimitiveDescriptor() override; bool isExecutable() const override; @@ -32,6 +31,7 @@ class Split : public Node { bool needShapeInfer() const override; void prepareParams() override; void executeDynamicImpl(dnnl::stream strm) override { execute(strm); } + void resolveInPlaceEdges(Edge::LOOK look) override; private: struct SplitExecutor { diff --git a/src/plugins/intel_cpu/src/nodes/strided_slice.cpp b/src/plugins/intel_cpu/src/nodes/strided_slice.cpp index 810db4247640a7..aa4dae10df7d86 100644 --- a/src/plugins/intel_cpu/src/nodes/strided_slice.cpp +++ b/src/plugins/intel_cpu/src/nodes/strided_slice.cpp @@ -63,9 +63,9 @@ class StridedSliceShapeInfer : public ShapeInferEmptyPads { data_dependency.at(STRIDE_ID)->getDesc().getPrecision() != Precision::I32) { IE_THROW(Unexpected) << "The data type of begin/end/stride is NOT I32, which is unexpected!"; } - auto beginPtr = reinterpret_cast(data_dependency.at(BEGIN_ID)->GetPtr()); - auto endPtr = reinterpret_cast(data_dependency.at(END_ID)->GetPtr()); - auto stridePtr = reinterpret_cast(data_dependency.at(STRIDE_ID)->GetPtr()); + auto beginPtr = reinterpret_cast(data_dependency.at(BEGIN_ID)->getData()); + auto endPtr = reinterpret_cast(data_dependency.at(END_ID)->getData()); + auto stridePtr = reinterpret_cast(data_dependency.at(STRIDE_ID)->getData()); for (size_t i = 0, new_idx = 0; i < shapeIn.size(); ++i) { if (m_new_axis_mask_set.count(i)) { @@ -492,20 +492,20 @@ void StridedSlice::StridedSliceCommonExecutor::orderParametersByLayouts(const Bl void StridedSlice::StridedSliceCommonExecutor::paramsInitialization(const StridedSliceAttributes& attrs, const std::vector& srcMemory, const std::vector& dstMemory) { - const auto srcBlockedMemoryDesc = srcMemory[0]->GetDescWithType(); - const auto dstBlockedMemoryDesc = dstMemory[0]->GetDescWithType(); + const auto srcBlockedMemoryDesc = srcMemory[0]->getDescWithType(); + const auto dstBlockedMemoryDesc = dstMemory[0]->getDescWithType(); params.attrs = attrs; params.srcBlockedDims = srcBlockedMemoryDesc->getBlockDims(); params.srcOrder = srcBlockedMemoryDesc->getOrder(); params.dstBlockedDims = dstBlockedMemoryDesc->getBlockDims(); - const size_t inputRank = srcMemory[0]->GetShape().getRank(); - const size_t outputRank = dstMemory[0]->GetShape().getRank(); + const size_t inputRank = srcMemory[0]->getShape().getRank(); + const size_t outputRank = dstMemory[0]->getShape().getRank(); const size_t nDims = std::max(inputRank, outputRank); auto fillingInParameters = [&](std::vector ¶meter, const size_t type, const size_t size, const int value) { - const int *ptr = reinterpret_cast(srcMemory[type]->GetPtr()); + const int *ptr = reinterpret_cast(srcMemory[type]->getData()); parameter.assign(ptr, ptr + size); if (type != AXES_ID && params.attrs.ellipsisMaskCounter == 0 && size < nDims) { @@ -513,8 +513,8 @@ void StridedSlice::StridedSliceCommonExecutor::paramsInitialization(const Stride } }; - params.attrs.beginDims = srcMemory[BEGIN_ID]->GetShape().getStaticDims(); - params.attrs.endDims = srcMemory[END_ID]->GetShape().getStaticDims(); + params.attrs.beginDims = srcMemory[BEGIN_ID]->getShape().getStaticDims(); + params.attrs.endDims = srcMemory[END_ID]->getShape().getStaticDims(); if (params.attrs.beginDims.size() != 1) IE_THROW() << errorPrefix << "should have begin vector with 1 dimension"; if (params.attrs.endDims.size() != 1) @@ -528,7 +528,7 @@ void StridedSlice::StridedSliceCommonExecutor::paramsInitialization(const Stride fillingInParameters(params.attrs.end, END_ID, params.attrs.endDims[0], 0); if (srcMemory.size() > STRIDE_ID) { - params.attrs.strideDims = srcMemory[STRIDE_ID]->GetShape().getStaticDims(); + params.attrs.strideDims = srcMemory[STRIDE_ID]->getShape().getStaticDims(); if (params.attrs.strideDims.size() > 1) IE_THROW() << errorPrefix << "should have stride vector with 1 dimension"; if (params.attrs.beginDims[0] != params.attrs.strideDims[0]) @@ -539,7 +539,7 @@ void StridedSlice::StridedSliceCommonExecutor::paramsInitialization(const Stride } if (srcMemory.size() > AXES_ID) { - params.attrs.axesDims = srcMemory[AXES_ID]->GetShape().getStaticDims(); + params.attrs.axesDims = srcMemory[AXES_ID]->getShape().getStaticDims(); if (params.attrs.axesDims.size() != 1) IE_THROW() << errorPrefix << "should have axes vector with 1 dimension."; if (params.attrs.beginDims[0] != params.attrs.axesDims[0]) @@ -840,8 +840,8 @@ void StridedSlice::StridedSliceCommonExecutor::indicesCalculationForOptimized() } void StridedSlice::StridedSliceCommonExecutor::exec(const std::vector& srcMemory, const std::vector& dstMemory) { - const uint8_t* srcData = reinterpret_cast(srcMemory[0]->GetPtr()); - uint8_t* dstData = reinterpret_cast(dstMemory[0]->GetPtr()); + const uint8_t* srcData = reinterpret_cast(srcMemory[0]->getData()); + uint8_t* dstData = reinterpret_cast(dstMemory[0]->getData()); const uint8_t* srcShiftedData = srcData + srcShift; parallel_nt(nThreads, [&](const int ithr, const int nthr) { size_t start = 0, end = 0; diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp index 8aebba4d45ea37..c6e8f4c03161d2 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp @@ -244,7 +244,7 @@ InferenceEngine::Precision Snippet::getRuntimePrecision() const { for (size_t i = 0; i < getParentEdges().size(); i++) { auto parentEdge = getParentEdgeAt(i); if (parentEdge && parentEdge->getStatus() == Edge::Status::Validated && !parentEdge->getParent()->isConstant()) { - inputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->GetDataType()))); + inputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->getDataType()))); } } @@ -320,7 +320,7 @@ bool Snippet::optimizeExecDomain(std::vector& inputShapes, std::vect } ov::PartialShape Snippet::canonicalizeBody() { auto edgeToBlockedShape = [](const EdgePtr& edge) { - const auto blockedDesc = edge->getMemory().GetDescWithType(); + const auto blockedDesc = edge->getMemory().getDescWithType(); std::vector dims; // if blockDim == Shape::UNDEFINED_DIM, then it's a dynamic dimension, and we need to recreate a proper dynamic Dim for (const auto& d : blockedDesc->getBlockDims()) @@ -412,7 +412,7 @@ std::vector Snippet::shapeInfer() { return success; }; for (size_t i = 0; i < getParentEdges().size(); i++) { - VectorDims inDims {getParentEdgesAtPort(i)[0]->getMemory().GetShape().getDims()}; + VectorDims inDims {getParentEdgesAtPort(i)[0]->getMemory().getShape().getDims()}; if (masterShapeIsBlocked && !inputShapeIsBlocked[i]) inDims.insert(inDims.end(), 1); // todo: this is a simple master_shape inference for shape-agnostic operations, @@ -428,7 +428,7 @@ std::vector Snippet::shapeInfer() { errorMessage << "Can't compute static master shape for Snippet node with name: " << getName(); errorMessage << ". Input shapes = ( "; for (size_t i = 0; i < getParentEdges().size(); i++) { - errorMessage << i << " port = " << getParentEdgesAtPort(i)[0]->getMemory().GetShape().toString() << ", "; + errorMessage << i << " port = " << getParentEdgesAtPort(i)[0]->getMemory().getShape().toString() << ", "; } errorMessage << "). Master shape = ( " << Shape(masterShape).toString() << " )"; IE_THROW() << errorMessage.str(); @@ -476,7 +476,7 @@ void Snippet::prepareParams() { for (size_t i = 0; i < numInputs; i++) { const auto memPtr = getParentEdgeAt(i)->getMemoryPtr(); srcMemPtrs[i] = memPtr; - start_offset_in[i] = memPtr->GetDescWithType()->getOffsetPadding() * dataSize[i]; + start_offset_in[i] = memPtr->getDescWithType()->getOffsetPadding() * dataSize[i]; } const size_t numOutputs = outputShapes.size(); start_offset_out.resize(numOutputs); @@ -484,7 +484,7 @@ void Snippet::prepareParams() { for (size_t i = 0; i < numOutputs; i++) { const auto memPtr = getChildEdgeAt(i)->getMemoryPtr(); dstMemPtrs[i] = memPtr; - start_offset_out[i] = memPtr->GetDescWithType()->getOffsetPadding() * dataSize[i + numInputs]; + start_offset_out[i] = memPtr->getDescWithType()->getOffsetPadding() * dataSize[i + numInputs]; } }; // initialize start offsets to src and dst memory @@ -584,10 +584,10 @@ void Snippet::generate(const jit_snippets_compile_args* jcp) { void Snippet::update_ptrs(jit_snippets_call_args& call_args) { for (size_t i = 0; i < srcMemPtrs.size(); i++) - call_args.src_ptrs[i] = reinterpret_cast(srcMemPtrs[i]->GetData()) + start_offset_in[i]; + call_args.src_ptrs[i] = reinterpret_cast(srcMemPtrs[i]->getData()) + start_offset_in[i]; for (size_t i = 0; i < dstMemPtrs.size(); i++) - call_args.dst_ptrs[i] = reinterpret_cast(dstMemPtrs[i]->GetData()) + start_offset_out[i]; + call_args.dst_ptrs[i] = reinterpret_cast(dstMemPtrs[i]->getData()) + start_offset_out[i]; if (buffer_scratchpad_size > 0) { call_args.buffer_scratchpad_ptr = diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp index b3af29c3c01255..be139aaf8d0c75 100644 --- a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp +++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp @@ -89,8 +89,8 @@ class PortIteratorHelper : public PortMapHelper { auto axis = slice_rule.axis; auto stride = slice_rule.stride; - auto full_dims = full_blob->GetShape().getStaticDims(); - auto part_dims = part_blob->GetShape().getStaticDims(); + auto full_dims = full_blob->getShape().getStaticDims(); + auto part_dims = part_blob->getShape().getStaticDims(); auto abs_stride = std::abs(stride); auto sign_of_stride = stride < 0.0f ? -1 : 1; @@ -101,11 +101,11 @@ class PortIteratorHelper : public PortMapHelper { IE_ASSERT(full_dims == part_dims) << "Shape mismatch for tensor iterator port"; // make chunk view - auto chunk_desc = full_blob->GetDescWithType()->getDnnlDesc(); + auto chunk_desc = full_blob->getDescWithType()->getDnnlDesc(); chunk_desc.get()->dims[axis] = abs_stride; chunk_desc.get()->padded_dims[axis] = abs_stride; // TODO: asamption that plain tensor - full_mem = full_blob->GetPrimitive(); + full_mem = full_blob->getPrimitive(); const auto full_mem_handler = full_mem.get_data_handle(); dnnl::memory chunk_mem = {chunk_desc, eng, full_mem_handler}; @@ -117,9 +117,9 @@ class PortIteratorHelper : public PortMapHelper { if (sliced_src) { mem_holder_src = chunk_mem; - mem_holder_dst = to->GetPrimitive(); + mem_holder_dst = to->getPrimitive(); } else { - mem_holder_src = from->GetPrimitive(); + mem_holder_src = from->getPrimitive(); mem_holder_dst = chunk_mem; } reorder = getReorderPrim(cache, mem_holder_dst.get_engine(), mem_holder_src.get_desc(), mem_holder_dst.get_desc()); @@ -148,8 +148,8 @@ class PortIteratorHelper : public PortMapHelper { class BackEdgePortHelper : public PortMapHelper { public: BackEdgePortHelper(MultiCachePtr cache, const MemoryPtr &from, const MemoryPtr &to, const dnnl::engine& eng) { - mem_holder_src = from->GetPrimitive(); - mem_holder_dst = to->GetPrimitive(); + mem_holder_src = from->getPrimitive(); + mem_holder_dst = to->getPrimitive(); reorder = getReorderPrim(cache, mem_holder_dst.get_engine(), mem_holder_src.get_desc(), mem_holder_dst.get_desc()); } @@ -164,9 +164,9 @@ class IterCountPortHelper : public PortMapHelper { public: IterCountPortHelper(const MemoryPtr &to, const dnnl::engine& eng) { // Only scalar I32 tensor is supported - IE_ASSERT(to->GetDataType() == memory::data_type::s32); - IE_ASSERT(to->GetShape() == Shape(VectorDims{1})); - mem_holder_dst = to->GetPrimitive(); + IE_ASSERT(to->getDataType() == memory::data_type::s32); + IE_ASSERT(to->getShape() == Shape(VectorDims{1})); + mem_holder_dst = to->getPrimitive(); } void execute(dnnl::stream strm, int n_iter) override { @@ -182,9 +182,9 @@ class IterCountPortHelper : public PortMapHelper { class asBoolCheck : public PortChecker { public: asBoolCheck(const MemoryPtr &mem) { - IE_ASSERT(mem->GetDataType() == memory::data_type::u8); - IE_ASSERT(mem->GetShape() == Shape(InferenceEngine::SizeVector{1})); - mem_holder = mem->GetPrimitive(); + IE_ASSERT(mem->getDataType() == memory::data_type::u8); + IE_ASSERT(mem->getShape() == Shape(InferenceEngine::SizeVector{1})); + mem_holder = mem->getPrimitive(); } int getStatus() override { @@ -199,9 +199,9 @@ class asBoolCheck : public PortChecker { class asIntCheck : public PortChecker { public: asIntCheck(const MemoryPtr &mem) { - IE_ASSERT(mem->GetDataType() == memory::data_type::s32); - IE_ASSERT(mem->GetShape() == Shape(InferenceEngine::SizeVector{1})); - mem_holder = mem->GetPrimitive(); + IE_ASSERT(mem->getDataType() == memory::data_type::s32); + IE_ASSERT(mem->getShape() == Shape(InferenceEngine::SizeVector{1})); + mem_holder = mem->getPrimitive(); } int getStatus() override { @@ -226,7 +226,7 @@ class staticValueCheck : public PortChecker { DynamicBuffer::DynamicBuffer(const MemoryPtr &from_, const std::vector &to_, const PortMap &map_rule_) : from(from_), to(to_), map_rule(map_rule_) { - elem_size = DnnlExtensionUtils::sizeOfDataType(from->GetDataType()); + elem_size = DnnlExtensionUtils::sizeOfDataType(from->getDataType()); } void DynamicBuffer::execute(const dnnl::engine& eng, const int iter) { @@ -256,7 +256,7 @@ void DynamicBuffer::init(const dnnl::engine& eng) { const auto abs_stride = std::abs(stride); // We have no idea of "from" node memory dims until the sub_graph has been executed. - const auto& src_mem = from->GetPrimitive(); + const auto& src_mem = from->getPrimitive(); const auto& src_desc = src_mem.get_desc(); const auto& dims = src_desc.get_dims(); count = std::accumulate(dims.begin(), dims.begin() + map_rule.axis, size_t(1), std::multiplies()); @@ -269,7 +269,7 @@ void DynamicBuffer::init(const dnnl::engine& eng) { } // reset chunk_offset_in_byte since the first execution - chunk_stride_in_byte = mem_holder_buffer->GetSize() / count; + chunk_stride_in_byte = mem_holder_buffer->getSize() / count; chunk_offset_in_byte = stride > 0 ? 0 : (chunk_stride_in_byte - chunk_unit_in_byte); num_execs = 0; } @@ -297,8 +297,7 @@ MemoryPtr DynamicBuffer::create_buffer(const dnnl::engine& eng) { auto _descCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::ncsp); auto new_buffer_desc = _descCreator->createSharedDesc(from->getDesc().getPrecision(), _shape); - auto _ptr = std::make_shared(eng); - _ptr->Create(*new_buffer_desc); + auto _ptr = std::make_shared(eng, new_buffer_desc); return _ptr; } @@ -313,12 +312,13 @@ void DynamicBuffer::move_buffer(const MemoryPtr& new_buffer) { const auto src_offset_in_byte = stride > 0 ? 0 : (src_stride - valid_size); chunk_offset_in_byte = stride > 0 ? 0 : (dst_stride - valid_size); // reset chunk_offset_in_byte - copy(reinterpret_cast(mem_holder_buffer->GetPtr()) + src_offset_in_byte, reinterpret_cast(new_buffer->GetPtr()) + chunk_offset_in_byte, + copy(reinterpret_cast(mem_holder_buffer->getData()) + src_offset_in_byte, + reinterpret_cast(new_buffer->getData()) + chunk_offset_in_byte, src_stride, dst_stride, count, valid_size); // assign mem_holder_buffer mem_holder_buffer = new_buffer; - chunk_stride_in_byte = mem_holder_buffer->GetSize() / count; + chunk_stride_in_byte = mem_holder_buffer->getSize() / count; // adjust for next execution if (stride > 0) { @@ -332,7 +332,7 @@ void DynamicBuffer::move_data() { const auto src_stride = abs(map_rule.stride) * len; const auto dst_stride = chunk_stride_in_byte; - copy(reinterpret_cast(from->GetPtr()), reinterpret_cast(mem_holder_buffer->GetPtr()) + chunk_offset_in_byte, + copy(reinterpret_cast(from->getData()), reinterpret_cast(mem_holder_buffer->getData()) + chunk_offset_in_byte, src_stride, dst_stride, count, chunk_unit_in_byte); // adjust for next execution @@ -350,7 +350,7 @@ void DynamicBuffer::transfer(const Node* node) { const auto stride = map_rule.stride; const auto abs_stride = std::abs(stride); - const auto& src_mem = from->GetPrimitive(); + const auto& src_mem = from->getPrimitive(); const auto& src_desc = src_mem.get_desc(); auto dims = src_desc.get_dims(); dims[axis] = abs_stride * num_execs; @@ -363,10 +363,10 @@ void DynamicBuffer::transfer(const Node* node) { const auto dst_stride = to.front()->getStaticDims()[axis] * len; const auto valid_size = chunk_unit_in_byte * num_execs; const auto src_offset_in_byte = stride > 0 ? 0 : (src_stride - valid_size); - copy(reinterpret_cast(mem_holder_buffer->GetPtr()) + src_offset_in_byte, reinterpret_cast(to.front()->GetPtr()), + copy(reinterpret_cast(mem_holder_buffer->getData()) + src_offset_in_byte, reinterpret_cast(to.front()->getData()), src_stride, dst_stride, count, dst_stride); } else { - VectorDims newDims = to.front()->GetShape().getDims(); + VectorDims newDims = to.front()->getShape().getDims(); nullifyUndefinedDims(newDims); const auto desc = node->getBaseMemDescAtOutputPort(map_rule.from)->cloneWithNewDims(newDims); @@ -521,8 +521,8 @@ void TensorIterator::createPrimitive() { bool TensorIterator::needPrepareParams() const { if (getAlgorithm() == Algorithm::TensorIteratorLoop) { - const auto tripCountPtr = reinterpret_cast(getParentEdgesAtPort(loopTripCountIdx).front()->getMemoryPtr()->GetPtr()); - const auto condPtr = reinterpret_cast(getParentEdgesAtPort(loopExecutionConditionIdx).front()->getMemoryPtr()->GetPtr()); + const auto tripCountPtr = reinterpret_cast(getParentEdgesAtPort(loopTripCountIdx).front()->getMemoryPtr()->getData()); + const auto condPtr = reinterpret_cast(getParentEdgesAtPort(loopExecutionConditionIdx).front()->getMemoryPtr()->getData()); if (tripCountPtr[0] != static_cast(lastUsedTripCount) || static_cast(condPtr[0]) != lastUsedCond) return true; } @@ -634,7 +634,7 @@ void TensorIterator::executeDynamicImpl(dnnl::stream strm) { void TensorIterator::prepareInputPorts() { const auto &eng = getEngine(); for (auto map_rule : inputPortMap) { - auto &from_mem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr(); + auto from_mem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr(); auto &to_mem = input_mems[map_rule.to].front(); // first memory is enough to access the shared underlying physical memory if (map_rule.axis == -1) @@ -648,7 +648,7 @@ void TensorIterator::prepareInputPorts() { void TensorIterator::prepareOutputPorts() { const auto &eng = getEngine(); for (auto map_rule : outputPortMap) { - auto &to_mem = getChildEdgesAtPort(map_rule.from)[0]->getMemoryPtr(); + auto to_mem = getChildEdgesAtPort(map_rule.from)[0]->getMemoryPtr(); auto &from_mem = output_mem[map_rule.to]; if (map_rule.axis == -1) @@ -738,7 +738,7 @@ void TensorIterator::reshapeSubgraphInput() { for (auto map_rule : inputPortMap) { auto new_dims = sliced_input_dims(getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr(), map_rule.axis, map_rule.stride); auto &to_mems = input_mems[map_rule.to]; - const auto& body_inshape = to_mems.front()->GetShape(); + const auto& body_inshape = to_mems.front()->getShape(); if (body_inshape.isDynamic() || body_inshape.getDims() != new_dims) { const auto desc = std::make_shared(to_mems.front()->getDesc().getPrecision(), Shape(new_dims)); redefineToMemories(to_mems, desc); @@ -754,7 +754,7 @@ void TensorIterator::reshapeAndFillOutput(dnnl::stream strm) { auto &from_mem = output_mem[map_rule.to]; // if Loop or TI isn't executed we should fill dynamic dims by zero - auto newShape = from_mem->GetShape(); + auto newShape = from_mem->getShape(); auto newDims = newShape.getDims(); nullifyUndefinedDims(newDims); @@ -778,7 +778,7 @@ bool TensorIterator::checkForInputAndBodyShapesInequality() const { for (auto map_rule : inputPortMap) { auto original_dims = sliced_input_dims(getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr(), map_rule.axis, map_rule.stride); auto &to_mems = input_mems[map_rule.to]; - const auto& body_inshape = to_mems.front()->GetShape(); + const auto& body_inshape = to_mems.front()->getShape(); if (body_inshape.isDynamic() || body_inshape.getDims() != original_dims) { return true; } diff --git a/src/plugins/intel_cpu/src/nodes/tile.cpp b/src/plugins/intel_cpu/src/nodes/tile.cpp index 77be22d68276a3..05392e7f1506fd 100644 --- a/src/plugins/intel_cpu/src/nodes/tile.cpp +++ b/src/plugins/intel_cpu/src/nodes/tile.cpp @@ -100,7 +100,7 @@ void Tile::prepareParams() { if (!constMap[TILE_REPEATS]) { const auto& repeatsMem = getParentEdgesAtPort(TILE_REPEATS)[0]->getMemory(); - const int32_t* repeatsData = reinterpret_cast(repeatsMem.GetPtr()); + const int32_t* repeatsData = reinterpret_cast(repeatsMem.getData()); originRepeats.assign(repeatsData, repeatsData + repeatsMem.getStaticDims()[0]); repeats.assign(std::max(originRepeats.size(), getInputShapeAtPort(TILE_INPUT).getRank()), 1lu); @@ -110,8 +110,8 @@ void Tile::prepareParams() { } } - auto srcBlockedDims = getParentEdgeAt(TILE_INPUT)->getMemory().GetDescWithType()->getBlockDims(); - auto dstBlockedDims = getChildEdgeAt(0)->getMemory().GetDescWithType()->getBlockDims(); + auto srcBlockedDims = getParentEdgeAt(TILE_INPUT)->getMemory().getDescWithType()->getBlockDims(); + auto dstBlockedDims = getChildEdgeAt(0)->getMemory().getDescWithType()->getBlockDims(); optimizedCase = prepareOptimizedParams(this, srcBlockedDims, dstBlockedDims); } @@ -124,7 +124,7 @@ bool Tile::needShapeInfer() const { if (!constMap[TILE_REPEATS]) { if (originRepeats.empty()) return true; - const int32_t* repeatsData = reinterpret_cast(getParentEdgesAtPort(TILE_REPEATS)[0]->getMemory().GetPtr()); + const int32_t* repeatsData = reinterpret_cast(getParentEdgesAtPort(TILE_REPEATS)[0]->getMemory().getData()); for (size_t i = 0lu; i < originRepeats.size(); i++) { if (originRepeats[i] != static_cast(repeatsData[i])) return true; @@ -153,8 +153,8 @@ void Tile::plainExecute(dnnl::stream strm) { auto& srcMemory = getParentEdgeAt(TILE_INPUT)->getMemory(); - const uint8_t* src_ptr = reinterpret_cast(srcMemory.GetPtr()); - uint8_t* dst_ptr = reinterpret_cast(getChildEdgeAt(0)->getMemory().GetPtr()); + const uint8_t* src_ptr = reinterpret_cast(srcMemory.getData()); + uint8_t* dst_ptr = reinterpret_cast(getChildEdgeAt(0)->getMemory().getData()); int m_inner_dim = 1; int m_outer_dim = 1; diff --git a/src/plugins/intel_cpu/src/nodes/topk.cpp b/src/plugins/intel_cpu/src/nodes/topk.cpp index d3b14e6f458ac0..80d7b42d3a1369 100644 --- a/src/plugins/intel_cpu/src/nodes/topk.cpp +++ b/src/plugins/intel_cpu/src/nodes/topk.cpp @@ -1944,12 +1944,12 @@ void TopK::initSupportedPrimitiveDescriptors() { } bool TopK::needShapeInfer() const { - const int src_k = reinterpret_cast(getParentEdgeAt(TOPK_K)->getMemoryPtr()->GetPtr())[0]; + const int src_k = reinterpret_cast(getParentEdgeAt(TOPK_K)->getMemoryPtr()->getData())[0]; return inputShapesModified() || src_k != top_k; } bool TopK::needPrepareParams() const { - const int src_k = reinterpret_cast(getParentEdgeAt(TOPK_K)->getMemoryPtr()->GetPtr())[0]; + const int src_k = reinterpret_cast(getParentEdgeAt(TOPK_K)->getMemoryPtr()->getData())[0]; return inputShapesModified() || top_k != src_k; } @@ -1981,8 +1981,8 @@ void TopK::preset_params() { } void TopK::prepareParams() { - auto &dstMemPtr = getChildEdgeAt(TOPK_DATA)->getMemoryPtr(); - auto &srcMemPtr = getParentEdgeAt(TOPK_DATA)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(TOPK_DATA)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(TOPK_DATA)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << errorPrefix << " has not allocated destination memory."; if (!srcMemPtr || !srcMemPtr->isAllocated()) @@ -1994,14 +1994,14 @@ void TopK::prepareParams() { dst_dims = dstMemPtr->getDesc().getShape().getDims(); if (isDynamicNode()) { - const int src_k = reinterpret_cast(getParentEdgeAt(TOPK_K)->getMemoryPtr()->GetPtr())[0]; + const int src_k = reinterpret_cast(getParentEdgeAt(TOPK_K)->getMemoryPtr()->getData())[0]; if (static_cast(src_k) > src_dims[axis]) IE_THROW() << errorPrefix << " gets top_k out of range!"; if (top_k != src_k) { top_k = src_k; } } else { - top_k = reinterpret_cast(getParentEdgeAt(TOPK_K)->getMemoryPtr()->GetPtr())[0]; + top_k = reinterpret_cast(getParentEdgeAt(TOPK_K)->getMemoryPtr()->getData())[0]; } if (jit_mode) { @@ -2010,7 +2010,7 @@ void TopK::prepareParams() { preset_params_done = true; } - auto layout_dims = dstMemPtr->GetDescWithType()->getBlockDims(); + auto layout_dims = dstMemPtr->getDescWithType()->getBlockDims(); calc_dims_size(layout_dims); axis_dim = src_dims[axis]; @@ -2064,7 +2064,7 @@ void TopK::prepareParams() { } void TopK::createPrimitive() { - auto &srcMemPtr = getParentEdgeAt(TOPK_DATA)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(TOPK_DATA)->getMemoryPtr(); if (srcMemPtr->getDesc().hasLayoutType(LayoutType::ncsp)) { layout = TopKLayoutType::topk_ncsp; } else if (srcMemPtr->getDesc().hasLayoutType(LayoutType::nspc)) { @@ -2108,7 +2108,7 @@ void TopK::createPrimitive() { jcp.bitonic_k_idx_cnt = 0; if (algorithm == TopKAlgorithm::topk_bitonic_sort) { - size_t src_count = srcMemPtr->GetDescWithType()->getPaddedElementsCount(); + size_t src_count = srcMemPtr->getDescWithType()->getPaddedElementsCount(); vec_process_ptr.resize(src_count * data_size); vec_process_idx_ptr.resize(src_count * sizeof(int32_t)); @@ -2137,13 +2137,13 @@ void TopK::executeDynamicImpl(dnnl::stream strm) { } void TopK::execute(dnnl::stream strm) { - auto &srcMemPtr = getParentEdgeAt(TOPK_DATA)->getMemoryPtr(); - auto &dstMemPtr = getChildEdgeAt(TOPK_DATA)->getMemoryPtr(); - auto &dstIndexesMemPtr = getChildEdgeAt(TOPK_INDEX)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(TOPK_DATA)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(TOPK_DATA)->getMemoryPtr(); + auto dstIndexesMemPtr = getChildEdgeAt(TOPK_INDEX)->getMemoryPtr(); - const uint8_t *src_data = reinterpret_cast(srcMemPtr->GetPtr()); - uint8_t *dst_data = reinterpret_cast(dstMemPtr->GetPtr()); - uint8_t *dst_idx = reinterpret_cast(dstIndexesMemPtr->GetPtr()); + const uint8_t *src_data = reinterpret_cast(srcMemPtr->getData()); + uint8_t *dst_data = reinterpret_cast(dstMemPtr->getData()); + uint8_t *dst_idx = reinterpret_cast(dstIndexesMemPtr->getData()); if (jit_mode) { topk_process(src_data, dst_data, dst_idx); diff --git a/src/plugins/intel_cpu/src/nodes/transpose.cpp b/src/plugins/intel_cpu/src/nodes/transpose.cpp index 0217a4bb772e78..1998c90023788c 100644 --- a/src/plugins/intel_cpu/src/nodes/transpose.cpp +++ b/src/plugins/intel_cpu/src/nodes/transpose.cpp @@ -187,9 +187,9 @@ bool Transpose::needPrepareParams() const { void Transpose::prepareParams() { if (performAsReorder) { // Transpose(order={0,3,1,2}) can be performed as Reorder(acdb=>abcd) - auto& srcMemPtr = getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(); - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto dstDesc = dstMemPtr->GetDescWithType()->getDnnlDesc(); + auto srcMemPtr = getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto dstDesc = dstMemPtr->getDescWithType()->getDnnlDesc(); auto srcDesc = dnnl::memory::desc(dstDesc.get_dims(), dstDesc.get_data_type(), memory::format_tag::acdb); auto result = getReorderPrim(context->getParamsCache(), getEngine(), srcDesc, dstDesc); if (!result) { @@ -200,7 +200,7 @@ void Transpose::prepareParams() { getSelectedPrimitiveDescriptor()->setImplementationType( parse_impl_name(DnnlExtensionUtils::query_impl_info_str(prim.get_primitive_desc()))); - primArgs = {{DNNL_ARG_SRC, srcMemPtr->GetPrimitive()}, {DNNL_ARG_DST, dstMemPtr->GetPrimitive()}}; + primArgs = {{DNNL_ARG_SRC, srcMemPtr->getPrimitive()}, {DNNL_ARG_DST, dstMemPtr->getPrimitive()}}; #ifdef CPU_DEBUG_CAPS if (prim) { auto pd = prim.get_primitive_desc(); @@ -210,14 +210,14 @@ void Transpose::prepareParams() { return; } - auto srcDesc = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().GetDescWithType(); + auto srcDesc = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().getDescWithType(); params.src_block_dims = srcDesc->getBlockDims(); - auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + auto dstDesc = getChildEdgeAt(0)->getMemory().getDescWithType(); params.dst_block_dims = dstDesc->getBlockDims(); if (!isInputOrderConst) { - auto orderPtr = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - auto orderLen = getParentEdgeAt(0)->getMemoryPtr()->GetSize(); + auto orderPtr = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->getData()); + auto orderLen = getParentEdgeAt(0)->getMemoryPtr()->getSize(); params.order.assign(orderPtr, orderPtr + orderLen); } @@ -237,8 +237,8 @@ void Transpose::prepareParams() { } void Transpose::createPrimitive() { - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto& srcMemPtr = getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << "Destination memory was not allocated."; if (!srcMemPtr || !srcMemPtr->isAllocated()) @@ -261,9 +261,9 @@ void Transpose::createPrimitive() { params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].getMemDesc()->getPrecision().size(); if (isInputOrderConst) params.order = order; - auto srcDesc = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().GetDescWithType(); + auto srcDesc = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().getDescWithType(); params.src_block_order = srcDesc->getOrder(); - auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + auto dstDesc = getChildEdgeAt(0)->getMemory().getDescWithType(); params.dst_block_order = dstDesc->getOrder(); } @@ -275,8 +275,8 @@ void Transpose::createPrimitive() { template static void transpose_to_0312(const int MB, const MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { - const auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); - auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); + const auto src_data = reinterpret_cast(srcMemPtr->getData()); + auto dst_data = reinterpret_cast(dstMemPtr->getData()); const int DIM1 = srcMemPtr->getStaticDims()[1]; const int DIM2 = srcMemPtr->getStaticDims()[2]; @@ -300,8 +300,8 @@ static void transpose_to_0312(const int MB, const MemoryPtr& srcMemPtr, MemoryPt template static void transpose_to_04123(const int MB, const MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { - const auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); - auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); + const auto src_data = reinterpret_cast(srcMemPtr->getData()); + auto dst_data = reinterpret_cast(dstMemPtr->getData()); const int DIM1 = srcMemPtr->getStaticDims()[1]; const int DIM2 = srcMemPtr->getStaticDims()[2]; @@ -328,8 +328,8 @@ static void transpose_to_04123(const int MB, const MemoryPtr& srcMemPtr, MemoryP template static void transpose_to_051234(const int MB, const MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { - const auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); - auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); + const auto src_data = reinterpret_cast(srcMemPtr->getData()); + auto dst_data = reinterpret_cast(dstMemPtr->getData()); const int DIM1 = srcMemPtr->getStaticDims()[1]; const int DIM2 = srcMemPtr->getStaticDims()[2]; @@ -378,8 +378,8 @@ void Transpose::execute(dnnl::stream strm) { if (prim) { prim.execute(strm, primArgs); } else if (execPtr) { - auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto &srcMemPtr = getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(); int MB = srcMemPtr->getStaticDims()[0]; @@ -401,8 +401,8 @@ void Transpose::TransposeJitExecutor::exec(Transpose* node, MemoryPtr& srcMemPtr if (!pKernel) IE_THROW() << "Could not execute. Kernel for Transpose node was not compiled."; - const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetPtr()); - uint8_t* dstData = reinterpret_cast(dstMemPtr->GetPtr()); + const uint8_t* srcData = reinterpret_cast(srcMemPtr->getData()); + uint8_t* dstData = reinterpret_cast(dstMemPtr->getData()); pKernel->execute(srcData, dstData, MB); } diff --git a/src/plugins/intel_cpu/src/nodes/unique.cpp b/src/plugins/intel_cpu/src/nodes/unique.cpp index 1c0ff95b21c51a..5fbb3b4cebe2f0 100644 --- a/src/plugins/intel_cpu/src/nodes/unique.cpp +++ b/src/plugins/intel_cpu/src/nodes/unique.cpp @@ -88,13 +88,13 @@ void Unique::createPrimitive() { } void Unique::prepareParams() { - auto& dataMemPtr = getParentEdgeAt(IN_DATA)->getMemoryPtr(); + auto dataMemPtr = getParentEdgeAt(IN_DATA)->getMemoryPtr(); if (!dataMemPtr || !dataMemPtr->isAllocated()) { THROW_ERROR << " has not allocated input data memory."; } for (int i = 0; i < 4; i++) { if (definedOutputs[i]) { - auto& dstMemPtr = getChildEdgeAt(i)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(i)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) { THROW_ERROR << " has not allocated output memory at port " << i; } @@ -106,7 +106,7 @@ void Unique::prepareParams() { size_t srcLen = 1; if (flattened) { - srcLen = getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetSize() / dataTypeSize; + srcLen = getParentEdgeAt(IN_DATA)->getMemoryPtr()->getSize() / dataTypeSize; } else { auto dstDataShape = getParentEdgeAt(IN_DATA)->getMemoryPtr()->getStaticDims(); srcLen = dstDataShape[axis]; @@ -164,8 +164,8 @@ void Unique::executeDynamicImpl(dnnl::stream strm) { template void Unique::flattenTensorExec() { - const T* srcDataPtr = reinterpret_cast(getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetPtr()); - const size_t inputLen = getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetSize() / sizeof(T); + const T* srcDataPtr = reinterpret_cast(getParentEdgeAt(IN_DATA)->getMemoryPtr()->getData()); + const size_t inputLen = getParentEdgeAt(IN_DATA)->getMemoryPtr()->getSize() / sizeof(T); std::vector uniDataTmp(inputLen); auto uniDataTmpPtr = uniDataTmp.data(); int *firstTmpPtr = nullptr, *inToOutTmpPtr = nullptr, *occurTmpPtr = nullptr; @@ -263,18 +263,18 @@ void Unique::flattenTensorExec() { redefineOutputMemory({ {uniqueLen}, {uniqueLen}, {inputLen}, {uniqueLen}}); - T* uniDataPtr = reinterpret_cast(getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->GetPtr()); + T* uniDataPtr = reinterpret_cast(getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->getData()); cpu_parallel_memcpy(uniDataPtr, uniDataTmpPtr, uniqueLen * sizeof(T)); if (definedOutputs[FIRST_UNIQUE_IDX]) { - int *firstPtr = reinterpret_cast(getChildEdgesAtPort(FIRST_UNIQUE_IDX)[0]->getMemoryPtr()->GetPtr()); + int *firstPtr = reinterpret_cast(getChildEdgesAtPort(FIRST_UNIQUE_IDX)[0]->getMemoryPtr()->getData()); cpu_parallel_memcpy(firstPtr, firstUniTmp.data(), uniqueLen * sizeof(int)); } if (definedOutputs[INPUT_TO_UNIQ_IDX]) { - auto inToOutPtr = reinterpret_cast(getChildEdgesAtPort(INPUT_TO_UNIQ_IDX)[0]->getMemoryPtr()->GetPtr()); + auto inToOutPtr = reinterpret_cast(getChildEdgesAtPort(INPUT_TO_UNIQ_IDX)[0]->getMemoryPtr()->getData()); cpu_parallel_memcpy(inToOutPtr, inToOutTmp.data(), inputLen * sizeof(int)); } if (definedOutputs[OCCURRENCES_NUM]) { - auto occurPtr = reinterpret_cast(getChildEdgesAtPort(OCCURRENCES_NUM)[0]->getMemoryPtr()->GetPtr()); + auto occurPtr = reinterpret_cast(getChildEdgesAtPort(OCCURRENCES_NUM)[0]->getMemoryPtr()->getData()); cpu_parallel_memcpy(occurPtr, occurTmp.data(), uniqueLen * sizeof(int)); } } @@ -282,7 +282,7 @@ void Unique::flattenTensorExec() { template void Unique::slicedTensorExec() { auto inDataMemPtr = getParentEdgeAt(IN_DATA)->getMemoryPtr(); - auto srcDataPtr = reinterpret_cast(inDataMemPtr->GetPtr()); + auto srcDataPtr = reinterpret_cast(inDataMemPtr->getData()); int *firstTmpPtr = nullptr, *inToOutTmpPtr = nullptr, *occurTmpPtr = nullptr; if (definedOutputs[FIRST_UNIQUE_IDX]) { firstTmpPtr = firstUniTmp.data(); @@ -367,16 +367,16 @@ void Unique::slicedTensorExec() { int *firstPtr = nullptr, *inToOutPtr = nullptr, *occurNPtr = nullptr; if (definedOutputs[FIRST_UNIQUE_IDX]) { - firstPtr = reinterpret_cast(getChildEdgesAtPort(FIRST_UNIQUE_IDX)[0]->getMemoryPtr()->GetPtr()); + firstPtr = reinterpret_cast(getChildEdgesAtPort(FIRST_UNIQUE_IDX)[0]->getMemoryPtr()->getData()); } if (definedOutputs[INPUT_TO_UNIQ_IDX]) { - inToOutPtr = reinterpret_cast(getChildEdgesAtPort(INPUT_TO_UNIQ_IDX)[0]->getMemoryPtr()->GetPtr()); + inToOutPtr = reinterpret_cast(getChildEdgesAtPort(INPUT_TO_UNIQ_IDX)[0]->getMemoryPtr()->getData()); } if (definedOutputs[OCCURRENCES_NUM]) { - occurNPtr = reinterpret_cast(getChildEdgesAtPort(OCCURRENCES_NUM)[0]->getMemoryPtr()->GetPtr()); + occurNPtr = reinterpret_cast(getChildEdgesAtPort(OCCURRENCES_NUM)[0]->getMemoryPtr()->getData()); } - T* dstDataPtr = reinterpret_cast(getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->GetPtr()); + T* dstDataPtr = reinterpret_cast(getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->getData()); const auto dstOuterStep = innerLen * uniqueLen; // Filling of the first output if needed. if (sorted || definedOutputs[UNIQUE_DATA]) { diff --git a/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp b/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp new file mode 100644 index 00000000000000..a712cbcd2749bd --- /dev/null +++ b/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp @@ -0,0 +1,33 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "partitioned_mem_mgr.h" + +using namespace ov::intel_cpu; + +void* PartitionedMemoryMngr::getRawPtr() const noexcept { + return static_cast(m_pMngr->getRawPtr()) + m_offset_blocks * m_size / m_size_blocks; +} + +void PartitionedMemoryMngr::setExtBuff(void* ptr, size_t size) { + m_pMngr->setExtBuff(ptr, size); +} + +bool PartitionedMemoryMngr::resize(size_t size) { + m_size = size; + return m_pMngr->resize(m_size * m_total_blocks / m_size_blocks); +} + +bool PartitionedMemoryMngr::hasExtBuffer() const noexcept { + return m_pMngr->hasExtBuffer(); +} + +void PartitionedMemoryMngr::registerMemory(Memory* memPtr) { + m_pMngr->registerMemory(memPtr); +} + +void PartitionedMemoryMngr::unregisterMemory(Memory* memPtr) { + m_pMngr->unregisterMemory(memPtr); +} + diff --git a/src/plugins/intel_cpu/src/partitioned_mem_mgr.h b/src/plugins/intel_cpu/src/partitioned_mem_mgr.h new file mode 100644 index 00000000000000..f2ca5e45a1746f --- /dev/null +++ b/src/plugins/intel_cpu/src/partitioned_mem_mgr.h @@ -0,0 +1,40 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "cpu_memory.h" + +namespace ov { +namespace intel_cpu { + +/** + * This is a memory manager that represents a view on a partition inside a continuous memory block controlled by + * another memory manager. + * + */ +class PartitionedMemoryMngr : public IMemoryMngrObserver { +public: + PartitionedMemoryMngr(MemoryMngrPtr pMngr, size_t total_blocks = 1, ptrdiff_t offset_blocks = 0, size_t size_blocks = 1) + : m_pMngr(pMngr), m_total_blocks(total_blocks), m_offset_blocks(offset_blocks), m_size_blocks(size_blocks) { + IE_ASSERT(m_pMngr) << "Memory manager is uninitialized"; + } + + void* getRawPtr() const noexcept override; + void setExtBuff(void* ptr, size_t size) override; + bool resize(size_t size) override; + bool hasExtBuffer() const noexcept override; + void registerMemory(Memory* memPtr) override; + void unregisterMemory(Memory* memPtr) override; + +private: + MemoryMngrPtr m_pMngr; + size_t m_total_blocks = 1; // size of the parent memory in abstract blocks + ptrdiff_t m_offset_blocks = 0; // offset from the beginning of the external memory in abstract blocks + size_t m_size_blocks = 1; // size of the viewed partition in abstract blocks + size_t m_size = 0; // size of the viewed partition in bytes +}; + +} // namespace intel_cpu +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/utils/blob_dump.cpp b/src/plugins/intel_cpu/src/utils/blob_dump.cpp index 2cbc619c7dbb1b..dce76d115d0908 100644 --- a/src/plugins/intel_cpu/src/utils/blob_dump.cpp +++ b/src/plugins/intel_cpu/src/utils/blob_dump.cpp @@ -94,12 +94,12 @@ void BlobDumper::prepare_plain_data(const MemoryPtr &memory, std::vector(memory->GetPtr()), size); + cpu_memcpy(data.data(), reinterpret_cast(memory->getData()), size); return; } // Copy to plain - const void *ptr = memory->GetData(); + const void *ptr = memory->getData(); switch (desc.getPrecision()) { case Precision::FP32: @@ -161,9 +161,9 @@ void BlobDumper::dumpAsTxt(std::ostream &stream) const { << "shape: "; for (size_t d : dims) stream << d << " "; stream << "(" << data_size << ")" << - " by address 0x" << std::hex << reinterpret_cast(memory->GetData()) << std::dec <(memory->getData()) << std::dec <GetData(); + const void *ptr = memory->getData(); switch (desc.getPrecision()) { case Precision::FP32 : { diff --git a/src/plugins/intel_cpu/src/utils/blob_dump.h b/src/plugins/intel_cpu/src/utils/blob_dump.h index 6f90160cae0b82..76d662bdbb8cbd 100644 --- a/src/plugins/intel_cpu/src/utils/blob_dump.h +++ b/src/plugins/intel_cpu/src/utils/blob_dump.h @@ -29,8 +29,7 @@ class BlobDumper { BlobDumper() = default; BlobDumper(const DnnlBlockedMemoryDesc &desc) { dnnl::engine eng(dnnl::engine::kind::cpu, 0); - memory = std::make_shared(eng); - memory->Create(desc); + memory = std::make_shared(eng, desc); } BlobDumper(const BlobDumper&) = default; BlobDumper& operator = (BlobDumper&&) = default; @@ -47,7 +46,7 @@ class BlobDumper { void dumpAsTxt(std::ostream &stream) const; void *getDataPtr() const { - return memory->GetPtr(); + return memory->getData(); } }; diff --git a/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp b/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp index 3cbb94e6b89705..300f62e27b19aa 100644 --- a/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp +++ b/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp @@ -199,7 +199,7 @@ std::ostream & operator<<(std::ostream & os, const Node &c_node) { leftside << comma << desc->getPrecision().name() << "_" << desc->serializeFormat() << "_" << shape_str - << "_" << ptr->GetData(); + << "_" << ptr->getData(); b_ouputed = true; } else { leftside << "(empty)"; @@ -292,7 +292,7 @@ std::ostream & operator<<(std::ostream & os, const Node &c_node) { if (node.getType() == intel_cpu::Type::Input && node.isConstant()) { if (auto input_node = reinterpret_cast(&node)) { auto pmem = input_node->getMemoryPtr(); - void * data = pmem->GetData(); + void * data = pmem->getData(); auto shape = pmem->getDesc().getShape().getDims(); if (shape_size(shape) <= 8) { diff --git a/src/plugins/intel_cpu/src/utils/general_utils.h b/src/plugins/intel_cpu/src/utils/general_utils.h index 000b269e4747fb..3e147fa8e403b3 100644 --- a/src/plugins/intel_cpu/src/utils/general_utils.h +++ b/src/plugins/intel_cpu/src/utils/general_utils.h @@ -44,6 +44,15 @@ constexpr inline bool implication(bool cause, bool cond) { return !cause || !!cond; } +#ifdef __cpp_lib_make_unique +using std::make_unique; +#else +template +inline std::unique_ptr make_unique(Args&&... args) { + return std::unique_ptr(new T(std::forward(args)...)); +} +#endif + template std::string vec2str(const std::vector &vec) { if (!vec.empty()) { diff --git a/src/plugins/intel_cpu/src/utils/node_dumper.cpp b/src/plugins/intel_cpu/src/utils/node_dumper.cpp index 3043a3e40edd40..44fdc67e8e46ce 100644 --- a/src/plugins/intel_cpu/src/utils/node_dumper.cpp +++ b/src/plugins/intel_cpu/src/utils/node_dumper.cpp @@ -110,8 +110,7 @@ static void dumpInternalBlobs(const NodePtr& node, const DebugCapsConfig& config if (desc.getPrecision() == Precision::BIN) continue; - MemoryPtr memory = std::make_shared(node->getEngine()); - memory->Create(MemoryDescUtils::convertToDnnlBlockedMemoryDesc(desc), blb->buffer()); + MemoryPtr memory = std::make_shared(node->getEngine(), MemoryDescUtils::convertToDnnlBlockedMemoryDesc(desc), blb->buffer()); BlobDumper dumper(memory); dump(dumper, dump_file, config); } diff --git a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference_ngraph.cpp b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference_ngraph.cpp index ef325b22a7dc86..beb2504d75142c 100644 --- a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference_ngraph.cpp +++ b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference_ngraph.cpp @@ -36,7 +36,7 @@ NgraphShapeInfer::infer( input_values[port] = std::make_shared( InferenceEngine::details::convertPrecision(memPtr->getDesc().getPrecision()), shape, - memPtr->GetPtr()); + memPtr->getData()); } } // call shape inference API diff --git a/src/plugins/intel_cpu/src/weights_cache.hpp b/src/plugins/intel_cpu/src/weights_cache.hpp index 3555dee9e0a8b1..dbc674213348ce 100644 --- a/src/plugins/intel_cpu/src/weights_cache.hpp +++ b/src/plugins/intel_cpu/src/weights_cache.hpp @@ -63,7 +63,7 @@ class WeightsSharing { {} std::mutex guard; - std::weak_ptr sharedMemory; + std::weak_ptr sharedMemory; std::atomic valid; }; diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/concat.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/concat.cpp index f2bff9f8e97370..1cb242daaf55f1 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/concat.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/concat.cpp @@ -632,31 +632,84 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat_1D_dynamic, ConcatLayerCPUTest, INSTANTIATE_TEST_SUITE_P(concat_Concat4D_CPU_Block8inPlace, ConcatLayerCPUTest, ::testing::Combine( ::testing::Values(0, 1), - ::testing::Values(static_shapes_to_test_representation({{1, 8, 3, 5}, {1, 8, 3, 5}})), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(planar_4D, planarChannels_4D, blocked8_4D)), + ::testing::Values(std::vector{ + {{}, {{1, 16, 5, 7}}}, + {{}, {{1, 16, 5, 7}}}, + {{}, {{1, 16, 5, 7}}}, + }, + std::vector{ + {{1, 16, -1, -1}, {{1, 16, 5, 7}, {1, 16, 16, 2}, {1, 16, 2, 8}}}, + {{1, 16, -1, -1}, {{1, 16, 5, 7}, {1, 16, 16, 2}, {1, 16, 2, 8}}}, + {{1, 16, -1, -1}, {{1, 16, 5, 7}, {1, 16, 16, 2}, {1, 16, 2, 8}}}, + }), + ::testing::Values(ElementType::f32), + ::testing::Values(planar_4D, blocked8_4D)), ConcatLayerCPUTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_Concat4D_CPU_Block16inPlace, ConcatLayerCPUTest, +INSTANTIATE_TEST_SUITE_P(smoke_Concat4D_CPU_Block16inPlace_0, ConcatLayerCPUTest, ::testing::Combine( - ::testing::Values(0, 1), - ::testing::Values(static_shapes_to_test_representation({{1, 32, 3, 5}, {1, 32, 3, 5}})), - ::testing::ValuesIn(netPrecisions), + ::testing::Values(0), + ::testing::Values(std::vector{ + {{}, {{1, 32, 5, 7}}}, + {{}, {{1, 32, 5, 7}}}, + {{}, {{1, 32, 5, 7}}}, + }, + std::vector{ + {{1, 32, -1, -1}, {{1, 32, 5, 7}, {1, 32, 16, 2}, {1, 32, 2, 8}}}, + {{1, 32, -1, -1}, {{1, 32, 5, 7}, {1, 32, 16, 2}, {1, 32, 2, 8}}}, + {{1, 32, -1, -1}, {{1, 32, 5, 7}, {1, 32, 16, 2}, {1, 32, 2, 8}}}, + }), + ::testing::Values(ElementType::f32), + ::testing::Values(blocked16_4D)), + ConcatLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_Concat4D_CPU_Block16inPlace_1, ConcatLayerCPUTest, + ::testing::Combine( + ::testing::Values(1), + ::testing::Values(std::vector{ + {{}, {{1, 32, 5, 7}}}, + {{}, {{1, 16, 5, 7}}}, + {{}, {{1, 32, 5, 7}}}, + }, + std::vector{ + {{1, 32, -1, -1}, {{1, 32, 5, 7}, {1, 32, 16, 2}, {1, 32, 2, 8}}}, + {{1, 16, -1, -1}, {{1, 16, 5, 7}, {1, 16, 16, 2}, {1, 16, 2, 8}}}, + {{1, 32, -1, -1}, {{1, 32, 5, 7}, {1, 32, 16, 2}, {1, 32, 2, 8}}}, + }), + ::testing::Values(ElementType::f32), ::testing::Values(blocked16_4D)), ConcatLayerCPUTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(concat_Concat5D_CPU_Block8inPlace, ConcatLayerCPUTest, ::testing::Combine( ::testing::Values(0, 1), - ::testing::Values(static_shapes_to_test_representation({{1, 16, 3, 5, 7}, {1, 16, 3, 5, 7}})), + ::testing::Values(std::vector{ + {{}, {{1, 16, 3, 5, 7}}}, + {{}, {{1, 16, 3, 5, 7}}}, + {{}, {{1, 16, 3, 5, 7}}}, + }, + std::vector{ + {{1, 32, -1, -1, -1}, {{1, 32, 5, 7, 3}, {1, 32, 16, 2, 3}, {1, 32, 2, 8, 3}}}, + {{1, 32, -1, -1, -1}, {{1, 32, 5, 7, 3}, {1, 32, 16, 2, 3}, {1, 32, 2, 8, 3}}}, + {{1, 32, -1, -1, -1}, {{1, 32, 5, 7, 3}, {1, 32, 16, 2, 3}, {1, 32, 2, 8, 3}}}, + }), ::testing::ValuesIn(netPrecisions), - ::testing::Values(planar_5D, planarChannels_5D, blocked8_5D)), + ::testing::Values(planar_5D, blocked8_5D)), ConcatLayerCPUTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_Concat5D_CPU_Block16inPlace, ConcatLayerCPUTest, ::testing::Combine( ::testing::Values(0, 1), - ::testing::Values(static_shapes_to_test_representation({{1, 32, 3, 5, 7}, {1, 32, 3, 5, 7}})), + ::testing::Values(std::vector{ + {{}, {{1, 32, 3, 5, 7}}}, + {{}, {{1, 32, 3, 5, 7}}}, + {{}, {{1, 32, 3, 5, 7}}}, + }, + std::vector{ + {{1, 32, -1, -1, -1}, {{1, 32, 5, 7, 3}, {1, 32, 16, 2, 3}, {1, 32, 2, 8, 3}}}, + {{1, 32, -1, -1, -1}, {{1, 32, 5, 7, 3}, {1, 32, 16, 2, 3}, {1, 32, 2, 8, 3}}}, + {{1, 32, -1, -1, -1}, {{1, 32, 5, 7, 3}, {1, 32, 16, 2, 3}, {1, 32, 2, 8, 3}}}, + }), ::testing::ValuesIn(netPrecisions), ::testing::Values(blocked16_5D)), ConcatLayerCPUTest::getTestCaseName); diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/gather.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/gather.cpp index 1ec99c7bbf40f3..3e4326321e7732 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/gather.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/gather.cpp @@ -146,11 +146,84 @@ class GatherLayerTestCPU : public testing::WithParamInterface, // Indices + int, // Axis + ElementType, // Network precision + CPUSpecificParams // CPU specific params +> GatherInPlaceLayerTestCPUParams; + +class GatherInPlaceLayerTestCPU : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseTest, public CPUTestsBase { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + InputShape inputShapes; + std::vector indices; + int axis; + ElementType netPrecision; + CPUSpecificParams cpuParams; + + std::tie(inputShapes, indices, axis, netPrecision, cpuParams) = obj.param; + + std::ostringstream result; + result << "IS=("; + + result << CommonTestUtils::partialShape2str({inputShapes.first}) << ")_TS="; + + result << "{"; + for (size_t i = 0lu; i < inputShapes.second.size(); i++) { + result << CommonTestUtils::vec2str(inputShapes.second[i]) << (i < inputShapes.second.size() - 1lu ? "_" : ""); + } + result << "}_"; + result << "axis=" << axis << "_"; + result << "indices=" << CommonTestUtils::vec2str(indices) << "_"; + result << "netPrc=" << netPrecision << "_"; + result << CPUTestsBase::getTestCaseName(cpuParams); + + return result.str(); + } + +protected: + void SetUp() override { + InputShape inputShapes; + std::vector indices; + int axis; + ElementType netPrecision; + CPUSpecificParams cpuParams; + constexpr ElementType intInputsPrecision = ElementType::i64; + constexpr int batchDims = 0; + + std::tie(inputShapes, indices, axis, netPrecision, cpuParams) = this->GetParam(); + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + targetDevice = CommonTestUtils::DEVICE_CPU; + init_input_shapes({ inputShapes }); + + selectedType = makeSelectedTypeStr(selectedType, netPrecision); + + ngraph::ParameterVector params { + std::make_shared(netPrecision, inputDynamicShapes[0]) + }; + params[0]->set_friendly_name("data"); + auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); + std::shared_ptr gatherNode = std::make_shared(paramOuts[0], + ov::op::v0::Constant::create(intInputsPrecision, ov::Shape({indices.size()}), indices), + ov::op::v0::Constant::create(intInputsPrecision, ov::Shape({1}), { axis }), batchDims); + + function = makeNgraphFunction(netPrecision, params, gatherNode, "GatherCPU"); + } +}; + TEST_P(GatherLayerTestCPU, CompareWithRefs) { run(); CheckPluginRelatedResults(compiledModel, "Gather"); } +TEST_P(GatherInPlaceLayerTestCPU, CompareWithRefs) { + run(); + CheckPluginRelatedResults(compiledModel, "Gather"); +} + namespace { const std::vector netPrecisions = { ElementType::f32, @@ -847,5 +920,36 @@ INSTANTIATE_TEST_SUITE_P(smoke_static_4D_ref8_Bmax, GatherLayerTestCPU, ::testing::Values(cpuParamsRef), ::testing::Values(additionalConfig[0])), GatherLayerTestCPU::getTestCaseName); + +// InPlace + +const std::vector shapesInPlace4D_0 = { + { {}, { {5, 4, 4, 19} } }, // Static shapes + { {5, 4, -1, -1}, { {5, 4, 4, 19}, {5, 4, 4, 25}, {5, 4, 2, 19} } }, // Static shapes +}; + +INSTANTIATE_TEST_SUITE_P(smoke_inplace_4D_0, GatherInPlaceLayerTestCPU, + ::testing::Combine( + ::testing::ValuesIn(shapesInPlace4D_0), + ::testing::Values(std::vector{ 2 }), + ::testing::Values(0), + ::testing::Values(ElementType::f32), + ::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"})), + GatherInPlaceLayerTestCPU::getTestCaseName); + +const std::vector shapesInPlace4D_1 = { + { {}, { {1, 9, 4, 19} } }, // Static shapes + { {1, 9, -1, -1}, { {1, 9, 4, 19}, {1, 9, 4, 25}, {1, 9, 2, 19} } }, // Static shapes +}; + +INSTANTIATE_TEST_SUITE_P(smoke_inplace_4D_1, GatherInPlaceLayerTestCPU, + ::testing::Combine( + ::testing::ValuesIn(shapesInPlace4D_1), + ::testing::Values(std::vector{ 4 }), + ::testing::Values(1), + ::testing::Values(ElementType::f32), + ::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"})), + GatherInPlaceLayerTestCPU::getTestCaseName); + } // namespace } // namespace CPULayerTestsDefinitions diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/split.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/split.cpp index 1508f73bbc269e..e7e9b86aa68088 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/split.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/split.cpp @@ -92,12 +92,12 @@ class SplitLayerCPUTest : public testing::WithParamInterface TEST_P(SplitLayerCPUTest, CompareWithRefs) { run(); -// CheckPluginRelatedResults(executableNetwork, "Split"); + CheckPluginRelatedResults(compiledModel, "Split"); } namespace { -const auto planar_4D_ref = CPUSpecificParams{{nchw}, {nchw}, {"ref"}, "ref"}; -const auto planar_5D_ref = CPUSpecificParams{{ncdhw}, {ncdhw}, {"ref"}, "ref"}; +const auto planar_4D_ref = CPUSpecificParams{{nchw}, {nchw}, {}, "ref"}; +const auto planar_5D_ref = CPUSpecificParams{{ncdhw}, {ncdhw}, {}, "ref"}; const auto planar_4D = CPUSpecificParams{{nchw}, {nchw}, {}, "unknown"}; const auto planar_5D = CPUSpecificParams{{ncdhw}, {ncdhw}, {}, "unknown"}; @@ -249,7 +249,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_planar, SplitLayerCPUTest, ::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(inputShapes4D_planar), ::testing::ValuesIn(outIndices3), - ::testing::Values(planar_4D, planar_4D_ref, perChannels_4D)), + ::testing::Values(planar_4D_ref, perChannels_4D)), SplitLayerCPUTest::getTestCaseName); const std::vector inputShapes4D_block = { @@ -336,7 +336,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_planar, SplitLayerCPUTest, ::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(inputShapes5D_planar), ::testing::ValuesIn(outIndices3), - ::testing::Values(planar_5D, planar_5D_ref, perChannels_5D)), + ::testing::Values(planar_5D_ref, perChannels_5D)), SplitLayerCPUTest::getTestCaseName); const std::vector inputShapes5D_block = { @@ -410,11 +410,11 @@ const std::vector inputShapes3D = { INSTANTIATE_TEST_SUITE_P(smoke_Split3D, SplitLayerCPUTest, ::testing::Combine( ::testing::Values(7), - ::testing::Values(0, 1, 2), + ::testing::Values(1, 2), ::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(inputShapes3D), ::testing::Values(std::vector({})), - ::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})), + ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})), SplitLayerCPUTest::getTestCaseName); const std::vector inputShapes2D = { @@ -444,15 +444,24 @@ const std::vector inputShapes2D = { INSTANTIATE_TEST_SUITE_P(smoke_Split2D, SplitLayerCPUTest, ::testing::Combine( ::testing::Values(2), - ::testing::Values(0, 1), + ::testing::Values(1), ::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(inputShapes2D), ::testing::Values(std::vector({})), - ::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})), + ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})), SplitLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Split1D_static, SplitLayerCPUTest, + ::testing::Combine( + ::testing::Values(5), + ::testing::Values(0), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InputShape{ {}, {{10}} }), + ::testing::Values(std::vector({})), + ::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"})), + SplitLayerCPUTest::getTestCaseName); + const std::vector inputShapes1D = { - { {}, {{10}} }, { // dynamic {-1}, @@ -482,7 +491,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split1D, SplitLayerCPUTest, ::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(inputShapes1D), ::testing::Values(std::vector({})), - ::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})), + ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})), SplitLayerCPUTest::getTestCaseName); const std::vector inputShapes4D_dynBatch = { @@ -504,48 +513,83 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_by_batch, SplitLayerCPUTest, ::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(inputShapes4D_dynBatch), ::testing::ValuesIn(outIndices3), - ::testing::Values(planar_4D, planar_4D_ref, perChannels_4D)), + ::testing::Values(planar_4D_ref, perChannels_4D)), SplitLayerCPUTest::getTestCaseName); // ============================================== inPlace cases ============================================ -INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Block8inPlace, SplitLayerCPUTest, +const std::vector inputShapes4D_inPlace_0 = { + {{}, {{3, 24, 24, 9}}}, + {{}, {{6, 24, 24}}}, + {{}, {{9, 24}}}, + { + // dynamic + {3, -1, -1, -1, -1}, + // target + { + {3, 24, 6, 9, 4}, + {3, 12, 12, 15, 5}, + } + }, + { + // dynamic + {6, -1, -1, -1}, + // target + { + {6, 24, 6, 9}, + {6, 12, 12, 15}, + } + }, + { + // dynamic + {9, -1, -1}, + // target + { + {9, 24, 6}, + {9, 12, 12}, + } + } +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Split_CPU_planar_inPlace_0, SplitLayerCPUTest, ::testing::Combine( ::testing::Values(3), - ::testing::Values(0, 1), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InputShape{ {}, {{3, 24, 24, 9}} }), - ::testing::ValuesIn(outIndices3), - ::testing::Values(planar_4D, planar_4D_ref, perChannels_4D, blocked8_4D)), + ::testing::Values(0), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(inputShapes4D_inPlace_0), + ::testing::Values(std::vector{}), + ::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"})), SplitLayerCPUTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Block16inPlace, SplitLayerCPUTest, - ::testing::Combine( - ::testing::Values(4), - ::testing::Values(0, 1), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InputShape{ {}, {{4, 64, 32, 12}} }), - ::testing::ValuesIn(outIndices3), - ::testing::Values(blocked16_4D)), - SplitLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Block8inPlace_1, SplitLayerCPUTest, + ::testing::Combine( + ::testing::Values(4), + ::testing::Values(1), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InputShape{ {}, {{1, 32, 5, 6}} }, + InputShape{ {1, 32, -1, -1}, + { + {1, 32, 5, 6}, + {1, 32, 5, 2}, + {1, 32, 5, 8} + } }), + ::testing::ValuesIn(outIndices4), + ::testing::Values(planar_4D, blocked8_4D)), + SplitLayerCPUTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Block8inPlace, SplitLayerCPUTest, +INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Block16inPlace_1, SplitLayerCPUTest, ::testing::Combine( ::testing::Values(3), - ::testing::Values(0, 1), + ::testing::Values(1), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InputShape{ {}, {{3, 24, 24, 9, 15}} }), + ::testing::Values(InputShape{ {}, {{1, 48, 5, 6, 3}} }, + InputShape{ {1, 48, -1, -1, 3}, + { + {1, 48, 5, 6, 3}, + {1, 48, 5, 2, 3}, + {1, 48, 5, 8, 3} + } }), ::testing::ValuesIn(outIndices3), - ::testing::Values(planar_5D, planar_5D_ref, perChannels_5D, blocked8_5D)), - SplitLayerCPUTest::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Block16inPlace, SplitLayerCPUTest, - ::testing::Combine( - ::testing::Values(4), - ::testing::Values(0, 1), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InputShape{ {}, {{4, 64, 32, 12, 20}} }), - ::testing::ValuesIn(outIndices4), - ::testing::Values(blocked16_5D)), + ::testing::Values(planar_5D, blocked16_5D)), SplitLayerCPUTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/variadic_split.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/variadic_split.cpp index 6fadf9b220f81e..936562f6384e28 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/variadic_split.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/variadic_split.cpp @@ -127,8 +127,8 @@ TEST_P(VariadicSplitLayerCPUTest, CompareWithRefs) { } namespace { -const auto planar_4D_ref = CPUSpecificParams{{nchw}, {nchw}, {"ref"}, "ref"}; -const auto planar_5D_ref = CPUSpecificParams{{ncdhw}, {ncdhw}, {"ref"}, "ref"}; +const auto planar_4D_ref = CPUSpecificParams{{nchw}, {nchw}, {}, "ref"}; +const auto planar_5D_ref = CPUSpecificParams{{ncdhw}, {ncdhw}, {}, "ref"}; const auto planar_4D = CPUSpecificParams{{nchw}, {nchw}, {}, "unknown"}; const auto planar_5D = CPUSpecificParams{{ncdhw}, {ncdhw}, {}, "unknown"}; @@ -239,7 +239,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit4D_CPU_planar_static, VariadicSplitL ::testing::Values(LengthsPerInfer{{1, 3, -1}}), ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT), ::testing::ValuesIn(netPrecisions), - ::testing::Values(planar_4D, planar_4D_ref, perChannels_4D)), + ::testing::Values(planar_4D_ref, perChannels_4D)), VariadicSplitLayerCPUTest::getTestCaseName); const std::vector inputShapes4D_planar = { @@ -326,7 +326,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit5D_CPU_planar_static, VariadicSplitL ::testing::Values(LengthsPerInfer{{2, 1, -1}}), ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT), ::testing::ValuesIn(netPrecisions), - ::testing::Values(planar_5D, planar_5D_ref, perChannels_5D)), + ::testing::Values(planar_5D_ref, perChannels_5D)), VariadicSplitLayerCPUTest::getTestCaseName); const std::vector inputShapes5D_planar = { @@ -409,11 +409,11 @@ INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit5D_CPU_Block16, VariadicSplitLayerCP INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit3D_static, VariadicSplitLayerCPUTest, ::testing::Combine( ::testing::Values(InputShape{ {}, {{14, 7, 21}} }), - ::testing::Values(0, 1, 2), + ::testing::Values(1, 2), ::testing::Values(LengthsPerInfer{{2, 4, -1}}), ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT), ::testing::ValuesIn(netPrecisions), - ::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})), + ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})), VariadicSplitLayerCPUTest::getTestCaseName); const std::vector inputShapes3D = { @@ -446,17 +446,17 @@ INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit3D, VariadicSplitLayerCPUTest, ::testing::Values(LengthsPerInfer{{2, 4, -1}}), ::testing::ValuesIn(lengthsTypes), ::testing::ValuesIn(netPrecisions), - ::testing::Values(CPUSpecificParams{{}, {}, {"ref"}, "ref"})), + ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})), VariadicSplitLayerCPUTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit2D_static, VariadicSplitLayerCPUTest, ::testing::Combine( ::testing::Values(InputShape{ {}, {{6, 12}} }), - ::testing::Values(0, 1), + ::testing::Values(1), ::testing::Values(LengthsPerInfer{{2, -1}}), ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT), ::testing::ValuesIn(netPrecisions), - ::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})), + ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})), VariadicSplitLayerCPUTest::getTestCaseName); const std::vector inputShapes2D = { @@ -489,7 +489,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit2D, VariadicSplitLayerCPUTest, ::testing::Values(LengthsPerInfer{{2, -1}}), ::testing::ValuesIn(lengthsTypes), ::testing::ValuesIn(netPrecisions), - ::testing::Values(CPUSpecificParams{{}, {}, {"ref"}, "ref"})), + ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})), VariadicSplitLayerCPUTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit1D_static, VariadicSplitLayerCPUTest, @@ -532,7 +532,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit1D, VariadicSplitLayerCPUTest, ::testing::Values(LengthsPerInfer{{2, 1, 1, -1}}), ::testing::ValuesIn(lengthsTypes), ::testing::ValuesIn(netPrecisions), - ::testing::Values(CPUSpecificParams{{}, {}, {"ref"}, "ref"})), + ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})), VariadicSplitLayerCPUTest::getTestCaseName); const std::vector inputShapes4D_zero_dims = { @@ -611,6 +611,98 @@ INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit4D_CPU_dynamic_lengths, VariadicSpli ::testing::Values(planar_4D_ref)), VariadicSplitLayerCPUTest::getTestCaseName); +// =========================================== in - place ============================================================// +INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit_CPU_planar_inPlace_0, VariadicSplitLayerCPUTest, + ::testing::Combine( + ::testing::Values(InputShape{ {}, {{5, 6, 5, 6, 7}} }, + InputShape{ {}, {{5, 6, 5, 6}} }, + InputShape{ {}, {{5, 6, 5}} }, + InputShape{ {5, -1, -1, -1, -1}, + { + {5, 6, 5, 6, 7}, + {5, 2, 5, 2, 7}, + {5, 8, 5, 8, 7} + } }, + InputShape{ {5, -1, -1, -1}, + { + {5, 6, 5, 6}, + {5, 2, 5, 2}, + {5, 8, 5, 8} + } }, + InputShape{ {5, -1, -1}, + { + {5, 6, 5}, + {5, 2, 5}, + {5, 8, 5} + } }), + ::testing::Values(0), + ::testing::Values(LengthsPerInfer{{1, 2, -1}}), + ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT), + ::testing::Values(ElementType::f32), + ::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"})), + VariadicSplitLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit_CPU_planar_inPlace_1, VariadicSplitLayerCPUTest, + ::testing::Combine( + ::testing::Values(InputShape{ {}, {{1, 6, 5, 6, 7}} }, + InputShape{ {}, {{1, 6, 5, 6}} }, + InputShape{ {}, {{1, 6, 5}} }, + InputShape{ {1, 6, -1, -1, -1}, + { + {1, 6, 5, 6, 7}, + {1, 6, 5, 2, 7}, + {1, 6, 5, 8, 7} + } }, + InputShape{ {1, 6, -1, -1}, + { + {1, 6, 5, 6}, + {1, 6, 5, 2}, + {1, 6, 5, 8} + } }, + InputShape{ {1, 6, -1}, + { + {1, 6, 5}, + {1, 6, 3}, + {1, 6, 7} + } }), + ::testing::Values(1), + ::testing::Values(LengthsPerInfer{{1, 2, -1}}), + ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT), + ::testing::Values(ElementType::f32), + ::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"})), + VariadicSplitLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit4D_CPU_block8_inPlace, VariadicSplitLayerCPUTest, + ::testing::Combine( + ::testing::Values(InputShape{ {}, {{1, 32, 5, 6}} }, + InputShape{ {1, 32, -1, -1}, + { + {1, 32, 5, 6}, + {1, 32, 5, 2}, + {1, 32, 5, 8} + } }), + ::testing::Values(1), + ::testing::Values(LengthsPerInfer{{8, 16, -1}}), + ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT), + ::testing::Values(ElementType::f32), + ::testing::Values(blocked8_4D)), + VariadicSplitLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit4D_CPU_block16_inPlace, VariadicSplitLayerCPUTest, + ::testing::Combine( + ::testing::Values(InputShape{ {}, {{1, 64, 5, 6}} }, + InputShape{ {1, 64, -1, -1}, + { + {1, 64, 5, 6}, + {1, 64, 5, 2}, + {1, 64, 5, 8} + } }), + ::testing::Values(1), + ::testing::Values(LengthsPerInfer{{16, 32, -1}}), + ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT), + ::testing::Values(ElementType::f32), + ::testing::Values(blocked16_4D)), + VariadicSplitLayerCPUTest::getTestCaseName); } // namespace } // namespace CPULayerTestsDefinitions diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_reshape_concat.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_reshape_concat.cpp new file mode 100644 index 00000000000000..91fa8ff0fb942b --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_reshape_concat.cpp @@ -0,0 +1,147 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "ngraph_functions/builders.hpp" + + +/*This test runs the following subgraph: + + param1 param2 param3 param4 + | | | | + | | | | + Softmax Softmax Softmax Softmax + | | | | + | | | | + Reshape Reshape Reshape Reshape + | | | | + | | | | + \ / \ / + \ / \ / + \ / \ / + Concat Concat + | | + | | + Reshape Reshape + | | + \ / + \ / + \ / + Concat + | + Softmax + + Result + + The main purpose of this test is checking the code path when all the nodes except Softmax use "in-place" memory mode. + Softmax is used as a model of an arbitrary subgraph preceding the pattern. +*/ + +using namespace InferenceEngine; +using namespace ov::test; + +namespace SubgraphTestsDefinitions { + +using VectorShapes = std::vector; + +class ConcatReshapeConcatSubgraphTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + VectorShapes& inputShapes = obj.param; + + std::ostringstream result; + result << "IS="; + for (const auto& shape : inputShapes) { + result << CommonTestUtils::partialShape2str({shape.first}) << "_"; + } + result << "TS="; + for (const auto& shape : inputShapes) { + result << "("; + if (!shape.second.empty()) { + for (const auto& itr : shape.second) { + result << CommonTestUtils::vec2str(itr); + } + } + result << ")"; + } + return result.str(); + } + + void SetUp() override { + constexpr size_t number_of_params = 4ul; + constexpr size_t softmax_axis = 1ul; + constexpr int concat_axis = 0; + targetDevice = CommonTestUtils::DEVICE_CPU; + auto netPrc = ov::element::f32; + auto& InputShapes = this->GetParam(); + ASSERT_EQ(InputShapes.size(), number_of_params) << "Unexpected number of input shapes"; + init_input_shapes(InputShapes); + auto input_params = ngraph::builder::makeDynamicParams(netPrc, inputDynamicShapes); + + ov::NodeVector first_level_reshapes; + + for (size_t i = 0; i < number_of_params; ++i) { + auto soft_max = std::make_shared(input_params[i], softmax_axis); + auto reshape_param = ngraph::builder::makeConstant(ov::element::i32, {1}, {0}); + auto reshape = std::make_shared(soft_max, reshape_param); + first_level_reshapes.push_back(reshape); + } + + auto concat1 = std::make_shared(ov::NodeVector{first_level_reshapes[0], first_level_reshapes[1]}, concat_axis); + auto concat2 = std::make_shared(ov::NodeVector{first_level_reshapes[2], first_level_reshapes[3]}, concat_axis); + + ov::NodeVector second_level_reshapes; + ov::NodeVector first_level_concats = {concat1, concat2}; + + for (size_t i = 0; i < number_of_params / 2; ++i) { + auto reshape_param = ngraph::builder::makeConstant(ov::element::i32, {1}, {0}); + auto reshape = std::make_shared(first_level_concats[i], reshape_param); + second_level_reshapes.push_back(reshape); + } + + auto concat3 = std::make_shared(second_level_reshapes, concat_axis); + auto soft_max = std::make_shared(concat3, softmax_axis); + + ngraph::ResultVector results; + for (size_t i = 0; i < soft_max->get_output_size(); i++) + results.push_back(std::make_shared(soft_max->output(i))); + + function = std::make_shared(results, input_params, "ConcatReshapeConcatPattern"); + ov::pass::Serialize serializer("ngraph.xml", "ngraph.bin"); + serializer.run_on_model(function); + } +}; + +TEST_P(ConcatReshapeConcatSubgraphTest, CompareWithRefs) { + run(); + ov::pass::Serialize serializer("exec_graph_dyn.xml", "exec_graph_dyn.bin"); + serializer.run_on_model(std::const_pointer_cast(compiledModel.get_runtime_model())); +} + +namespace { + +const std::vector> inputShapes = { + // { + // // {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + // {{2, 64}, {{2, 64}}}, // input 0 + // {{2, 64}, {{2, 64}}}, // input 1 + // {{2, 64}, {{2, 64}}}, // input 2 + // {{2, 64}, {{2, 64}}} // input 3 + // }, + { + // {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{2, -1}, {{2, 64}}}, // input 0 + {{2, -1}, {{2, 64}}}, // input 1 + {{2, -1}, {{2, 64}}}, // input 2 + {{2, -1}, {{2, 64}}} // input 3 + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Concat_Reshape_Concat, ConcatReshapeConcatSubgraphTest, + ::testing::ValuesIn(inputShapes), + ConcatReshapeConcatSubgraphTest::getTestCaseName); +} // namespace +} // namespace SubgraphTestsDefinitions \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/unit/dnnl_memory_test.cpp b/src/plugins/intel_cpu/tests/unit/dnnl_memory_test.cpp index 3775131aa55364..b2728ee309ec1c 100644 --- a/src/plugins/intel_cpu/tests/unit/dnnl_memory_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/dnnl_memory_test.cpp @@ -26,26 +26,25 @@ TEST(MemoryTest, ConcurrentGetPrimitive) { dnnl::memory dnnl_mem1; dnnl::memory dnnl_mem2; auto desc = std::make_shared(Precision::FP32, Shape{10, 2}); - Memory cpu_mem1(eng); - cpu_mem1.Create(desc); + Memory cpu_mem1(eng, desc); std::atomic lock{true}; std::thread worker1([&](){ while (lock.load()) {} - dnnl_mem1 = cpu_mem1.GetPrimitive(); + dnnl_mem1 = cpu_mem1.getPrimitive(); }); std::thread worker2([&](){ while (lock.load()) {} - dnnl_mem2 = cpu_mem1.GetPrimitive(); + dnnl_mem2 = cpu_mem1.getPrimitive(); }); lock.store(false); worker1.join(); worker2.join(); - ASSERT_EQ(dnnl_mem1.get_data_handle(), cpu_mem1.GetData()); + ASSERT_EQ(dnnl_mem1.get_data_handle(), cpu_mem1.getData()); ASSERT_EQ(dnnl_mem1, dnnl_mem2); } @@ -55,17 +54,15 @@ TEST(MemoryTest, ConcurrentResizeGetPrimitive) { for (size_t i = 0; i < number_of_attempts; ++i) { dnnl::memory dnnl_mem; auto desc = std::make_shared(Precision::FP32, Shape{10, 2}); - Memory cpu_mem1(eng); - cpu_mem1.Create(desc); - Memory cpu_mem2(eng); - cpu_mem2.Create(desc, cpu_mem1.getDnnlMemoryMngr()); // tie two memory objects (memory reuse) + Memory cpu_mem1(eng, desc); + Memory cpu_mem2(eng, desc, cpu_mem1.getMemoryMngr()); auto desc2 = std::make_shared(Precision::FP32, Shape{10, 20}); std::atomic lock{true}; std::thread worker1([&](){ while (lock.load()) {} - dnnl_mem = cpu_mem1.GetPrimitive(); + dnnl_mem = cpu_mem1.getPrimitive(); }); std::thread worker2([&](){ @@ -77,6 +74,6 @@ TEST(MemoryTest, ConcurrentResizeGetPrimitive) { worker1.join(); worker2.join(); - ASSERT_EQ(dnnl_mem.get_data_handle(), cpu_mem2.GetData()); + ASSERT_EQ(dnnl_mem.get_data_handle(), cpu_mem2.getData()); } } diff --git a/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp b/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp index 63ad2abf574f0e..57a78302a18770 100644 --- a/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp @@ -22,13 +22,13 @@ using namespace InferenceEngine; using namespace ov::intel_cpu; namespace ReorderCPUTest { -inline void checkReorder(const ov::intel_cpu::Memory& inputMemory, - const ov::intel_cpu::Memory& outputMemory, +inline void checkReorder(const ov::intel_cpu::IMemory& inputMemory, + const ov::intel_cpu::IMemory& outputMemory, const InferenceEngine::Precision& prescision) { - auto srcData = inputMemory.GetData(); - auto dstData = outputMemory.GetData(); - auto mdInput = inputMemory.GetDescWithType()->getDnnlDesc(); - auto mdOutput = outputMemory.GetDescWithType()->getDnnlDesc(); + auto srcData = inputMemory.getData(); + auto dstData = outputMemory.getData(); + auto mdInput = inputMemory.getDescWithType()->getDnnlDesc(); + auto mdOutput = outputMemory.getDescWithType()->getDnnlDesc(); const dnnl::impl::memory_desc_wrapper mdwInput(mdInput.get()); const dnnl::impl::memory_desc_wrapper mdwOutput(mdOutput.get()); @@ -68,11 +68,11 @@ inline std::string layoutName(const LayoutType& layout) { return "Unsupported layout type"; } -inline void fillData(const ov::intel_cpu::Memory& inputMemory, const InferenceEngine::Precision& prec) { - ov::intel_cpu::DnnlMemoryDescPtr dnnlMdInput = inputMemory.GetDescWithType(); +inline void fillData(const ov::intel_cpu::IMemory& inputMemory, const InferenceEngine::Precision& prec) { + ov::intel_cpu::DnnlMemoryDescPtr dnnlMdInput = inputMemory.getDescWithType(); const dnnl::impl::memory_desc_wrapper mdInput{dnnlMdInput->getDnnlDesc().get()}; auto elemNum = mdInput.nelems(); - auto inputReorderData = inputMemory.GetData(); + auto inputReorderData = inputMemory.getData(); switch (prec) { case InferenceEngine::Precision::FP32: for (int64_t i = 0; i < elemNum; ++i) @@ -133,10 +133,8 @@ class ReorderCPUTestGraph { reorderNode->addEdge(parentEdge); reorderNode->addEdge(childEdge); - auto parentMemory = std::make_shared(cpuEngine); - auto childMemory = std::make_shared(cpuEngine); - parentMemory->Create(inputDesc, nullptr); - childMemory->Create(outputDesc, nullptr); + auto parentMemory = std::make_shared(cpuEngine, inputDesc); + auto childMemory = std::make_shared(cpuEngine, outputDesc); parentEdge->reuse(parentMemory); childEdge->reuse(childMemory); @@ -274,7 +272,7 @@ class ReorderCustomizedStrideTest : public ::testing::Test, // Fill dstData with zeros void generateInput() { fillData(parentEdge->getMemory(), prec); - memset(childEdge->getMemory().GetData(), 0, childEdge->getMemory().GetSize()); + memset(childEdge->getMemory().getData(), 0, childEdge->getMemory().getSize()); } size_t getNumElems(const std::vector& dims) { From 0148076ed739402676a47c4be5cb2217bb4f8cdb Mon Sep 17 00:00:00 2001 From: Maciej Smyk Date: Tue, 11 Jul 2023 10:43:54 +0200 Subject: [PATCH 16/21] [DOCS] Code block update for master (#18437) * code-block-1 * Update Convert_Model_From_Paddle.md * code-block force * fix * fix-2 * Update troubleshooting-steps.md * code-block-2 * Update README.md --- docs/Extensibility_UG/GPU_Extensibility.md | 3 +- docs/IE_PLUGIN_DG/Building.md | 6 +- .../Deep_Learning_Model_Optimizer_DevGuide.md | 18 +- .../prepare_model/Additional_Optimizations.md | 18 +- docs/MO_DG/prepare_model/FP16_Compression.md | 3 +- .../Getting_performance_numbers.md | 6 +- docs/MO_DG/prepare_model/MO_Python_API.md | 24 +- .../prepare_model/Model_Optimizer_FAQ.md | 9 +- .../Convert_Model_From_Paddle.md | 97 +-- .../Convert_Model_From_PyTorch.md | 7 +- .../Convert_Model_From_TensorFlow.md | 36 +- .../convert_model/Converting_Model.md | 15 +- .../convert_model/Cutting_Model.md | 41 +- .../kaldi_specific/Aspire_Tdnn_Model.md | 3 +- .../mxnet_specific/Convert_GluonCV_Models.md | 3 +- .../pytorch_specific/Convert_Bert_ner.md | 5 +- .../pytorch_specific/Convert_F3Net.md | 3 +- .../pytorch_specific/Convert_QuartzNet.md | 3 +- .../pytorch_specific/Convert_RCAN.md | 3 +- .../pytorch_specific/Convert_RNNT.md | 3 +- .../Convert_BERT_From_Tensorflow.md | 9 +- .../Convert_CRNN_From_Tensorflow.md | 3 +- .../Convert_GNMT_From_Tensorflow.md | 2 + .../Convert_NCF_From_Tensorflow.md | 1 + .../Convert_Object_Detection_API_Models.md | 11 +- .../Convert_Slim_Library_Models.md | 3 +- .../Convert_WideAndDeep_Family_Models.md | 3 +- .../Convert_XLNet_From_Tensorflow.md | 6 +- .../Customize_Model_Optimizer.md | 1 + ...odel_Optimizer_with_Caffe_Python_Layers.md | 1 + .../Model_Optimizer_Extractor.md | 2 + .../Model_Optimizer_Ports_Connections.md | 1 + ...del_Optimizer_Transformation_Extensions.md | 4 + .../supported_plugins/config_properties.md | 2 +- .../install_guides/installing-openvino-apt.md | 2 +- docs/install_guides/troubleshooting-steps.md | 3 +- docs/notebooks-installation.md | 4 +- ...diffusion-v2-infinite-zoom-with-output.rst | 10 + ...diffusion-v2-text-to-image-with-output.rst | 6 + docs/ops/activation/Clamp_1.md | 3 +- docs/ops/activation/Elu_1.md | 3 +- docs/ops/activation/Exp_1.md | 3 +- docs/ops/activation/GELU_2.md | 3 +- docs/ops/activation/GELU_7.md | 6 +- docs/ops/activation/HSigmoid_5.md | 3 +- docs/ops/activation/HSwish_4.md | 3 +- docs/ops/activation/HardSigmoid_1.md | 3 +- docs/ops/activation/LogSoftmax_5.md | 3 +- docs/ops/activation/Mish_4.md | 3 +- docs/ops/activation/PReLU_1.md | 9 +- docs/ops/activation/ReLU_1.md | 3 +- docs/ops/activation/Selu_1.md | 3 +- docs/ops/activation/Sigmoid_1.md | 3 +- docs/ops/activation/SoftMax_1.md | 15 +- docs/ops/activation/SoftMax_8.md | 15 +- docs/ops/activation/SoftPlus_4.md | 3 +- docs/ops/activation/SoftSign_9.md | 3 +- docs/ops/activation/Swish_4.md | 6 +- docs/ops/arithmetic/Abs_1.md | 3 +- docs/ops/arithmetic/Acos_1.md | 3 +- docs/ops/arithmetic/Acosh_3.md | 3 +- docs/ops/arithmetic/Add_1.md | 6 +- docs/ops/arithmetic/Asin_1.md | 3 +- docs/ops/arithmetic/Asinh_3.md | 3 +- docs/ops/arithmetic/Atan_1.md | 3 +- docs/ops/arithmetic/Atanh_3.md | 3 +- docs/ops/arithmetic/Ceiling_1.md | 3 +- docs/ops/arithmetic/Cos_1.md | 3 +- docs/ops/arithmetic/Cosh_1.md | 3 +- docs/ops/arithmetic/CumSum_3.md | 12 +- docs/ops/arithmetic/Divide_1.md | 6 +- docs/ops/arithmetic/Erf_1.md | 3 +- docs/ops/arithmetic/FloorMod_1.md | 6 +- docs/ops/arithmetic/Floor_1.md | 3 +- docs/ops/arithmetic/Log_1.md | 3 +- docs/ops/arithmetic/Maximum_1.md | 6 +- docs/ops/arithmetic/Minimum_1.md | 6 +- docs/ops/arithmetic/Mod_1.md | 6 +- docs/ops/arithmetic/Multiply_1.md | 6 +- docs/ops/arithmetic/Negative_1.md | 3 +- docs/ops/arithmetic/Power_1.md | 6 +- docs/ops/arithmetic/Round_5.md | 48 +- docs/ops/arithmetic/Sign_1.md | 3 +- docs/ops/arithmetic/Sin_1.md | 3 +- docs/ops/arithmetic/Sinh_1.md | 3 +- docs/ops/arithmetic/Sqrt_1.md | 9 +- docs/ops/arithmetic/SquaredDifference_1.md | 6 +- docs/ops/arithmetic/Subtract_1.md | 6 +- docs/ops/arithmetic/Tan_1.md | 9 +- docs/ops/arithmetic/Tanh_1.md | 3 +- docs/ops/comparison/Equal_1.md | 6 +- docs/ops/comparison/GreaterEqual_1.md | 6 +- docs/ops/comparison/Greater_1.md | 6 +- docs/ops/comparison/IsFinite_10.md | 6 +- docs/ops/comparison/IsInf_10.md | 3 +- docs/ops/comparison/IsNaN_10.md | 3 +- docs/ops/comparison/LessEqual_1.md | 6 +- docs/ops/comparison/Less_1.md | 6 +- docs/ops/condition/Bucketize_3.md | 3 +- docs/ops/condition/If_8.md | 3 +- docs/ops/condition/NonZero_3.md | 3 +- docs/ops/condition/Select_1.md | 3 +- docs/ops/convolution/BinaryConvolution_1.md | 3 +- .../convolution/ConvolutionBackpropData_1.md | 15 +- docs/ops/convolution/Convolution_1.md | 9 +- .../convolution/DeformableConvolution_1.md | 6 +- .../convolution/DeformableConvolution_8.md | 3 +- .../GroupConvolutionBackpropData_1.md | 9 +- docs/ops/convolution/GroupConvolution_1.md | 9 +- .../ops/detection/DeformablePSROIPooling_1.md | 6 +- docs/ops/detection/DetectionOutput_1.md | 3 +- docs/ops/detection/DetectionOutput_8.md | 3 +- .../ExperimentalDetectronDetectionOutput_6.md | 3 +- ...DetectronGenerateProposalsSingleImage_6.md | 3 +- ...perimentalDetectronPriorGridGenerator_6.md | 3 +- ...erimentalDetectronROIFeatureExtractor_6.md | 3 +- docs/ops/detection/GenerateProposals_9.md | 3 +- docs/ops/detection/PSROIPooling_1.md | 51 +- docs/ops/detection/PriorBoxClustered_1.md | 39 +- docs/ops/detection/PriorBox_1.md | 39 +- docs/ops/detection/PriorBox_8.md | 39 +- docs/ops/detection/Proposal_1.md | 17 +- docs/ops/detection/Proposal_4.md | 63 +- docs/ops/detection/ROIAlign_3.md | 57 +- docs/ops/detection/ROIAlign_9.md | 57 +- docs/ops/detection/ROIPooling_1.md | 15 +- docs/ops/detection/RegionYolo_1.md | 83 +-- docs/ops/detection/ReorgYolo_1.md | 3 +- docs/ops/generation/Eye_9.md | 15 +- docs/ops/generation/RandomUniform_8.md | 30 +- docs/ops/generation/Range_1.md | 6 +- docs/ops/generation/Range_4.md | 9 +- docs/ops/image/GridSample_9.md | 3 +- docs/ops/image/I420toBGR_8.md | 6 +- docs/ops/image/I420toRGB_8.md | 6 +- docs/ops/image/Interpolate_1.md | 3 +- docs/ops/image/Interpolate_11.md | 3 +- docs/ops/image/Interpolate_4.md | 6 +- docs/ops/image/NV12toBGR_8.md | 6 +- docs/ops/image/NV12toRGB_8.md | 6 +- docs/ops/infrastructure/Assign_3.md | 3 +- docs/ops/infrastructure/Constant_1.md | 3 +- docs/ops/infrastructure/Loop_5.md | 3 +- docs/ops/infrastructure/Parameter_1.md | 27 +- docs/ops/infrastructure/ReadValue_3.md | 3 +- docs/ops/infrastructure/Result_1.md | 3 +- docs/ops/infrastructure/TensorIterator_1.md | 6 +- docs/ops/logical/LogicalAnd_1.md | 6 +- docs/ops/logical/LogicalNot_1.md | 3 +- docs/ops/logical/LogicalOr_1.md | 6 +- docs/ops/logical/LogicalXor_1.md | 6 +- docs/ops/matrix/Einsum_7.md | 6 +- docs/ops/matrix/MatMul_1.md | 18 +- docs/ops/movement/BatchToSpace_2.md | 6 +- docs/ops/movement/Broadcast_1.md | 3 +- docs/ops/movement/Broadcast_3.md | 3 +- docs/ops/movement/Concat_1.md | 6 +- docs/ops/movement/DepthToSpace_1.md | 3 +- docs/ops/movement/ExtractImagePatches_3.md | 3 +- docs/ops/movement/GatherElements_6.md | 11 +- docs/ops/movement/GatherND_5.md | 18 +- docs/ops/movement/GatherND_8.md | 23 +- docs/ops/movement/GatherTree_1.md | 6 +- docs/ops/movement/Gather_1.md | 5 +- docs/ops/movement/Gather_7.md | 12 +- docs/ops/movement/Gather_8.md | 19 +- docs/ops/movement/Pad_1.md | 6 +- docs/ops/movement/Pad_12.md | 9 +- docs/ops/movement/ReverseSequence_1.md | 3 +- docs/ops/movement/Reverse_1.md | 3 +- docs/ops/movement/Roll_7.md | 6 +- docs/ops/movement/ScatterElementsUpdate_3.md | 3 +- docs/ops/movement/ScatterNDUpdate_3.md | 3 +- docs/ops/movement/ScatterUpdate_3.md | 6 +- docs/ops/movement/ShuffleChannels_1.md | 3 +- docs/ops/movement/Slice_8.md | 654 +++++++++--------- docs/ops/movement/SpaceToBatch_2.md | 3 +- docs/ops/movement/SpaceToDepth_1.md | 3 +- docs/ops/movement/Split_1.md | 3 +- docs/ops/movement/StridedSlice_1.md | 10 +- docs/ops/movement/Tile_1.md | 9 +- docs/ops/movement/Transpose_1.md | 6 +- docs/ops/movement/Unique_10.md | 9 +- docs/ops/movement/VariadicSplit_1.md | 6 +- .../ops/normalization/BatchNormInference_1.md | 6 +- .../ops/normalization/BatchNormInference_5.md | 6 +- docs/ops/normalization/GRN_1.md | 3 +- .../normalization/GroupNormalization_12.md | 3 +- docs/ops/normalization/LRN_1.md | 3 +- docs/ops/normalization/MVN_1.md | 6 +- docs/ops/normalization/MVN_6.md | 3 +- docs/ops/normalization/NormalizeL2_1.md | 6 +- docs/ops/pooling/AdaptiveAvgPool_8.md | 3 +- docs/ops/pooling/AdaptiveMaxPool_8.md | 3 +- docs/ops/pooling/AvgPool_1.md | 3 +- docs/ops/pooling/MaxPool_1.md | 3 +- docs/ops/pooling/MaxPool_8.md | 3 +- docs/ops/quantization/FakeQuantize_1.md | 6 +- docs/ops/reduction/ReduceL1_4.md | 12 +- docs/ops/reduction/ReduceL2_4.md | 12 +- docs/ops/reduction/ReduceLogicalAnd_1.md | 12 +- docs/ops/reduction/ReduceLogicalOr_1.md | 12 +- docs/ops/reduction/ReduceMax_1.md | 12 +- docs/ops/reduction/ReduceMean_1.md | 15 +- docs/ops/reduction/ReduceMin_1.md | 12 +- docs/ops/reduction/ReduceProd_1.md | 12 +- docs/ops/reduction/ReduceSum_1.md | 12 +- docs/ops/sequence/CTCGreedyDecoderSeqLen_6.md | 3 +- docs/ops/sequence/CTCGreedyDecoder_1.md | 3 +- docs/ops/sequence/CTCLoss_4.md | 3 +- docs/ops/sequence/GRUCell_3.md | 5 +- docs/ops/sequence/GRUSequence_5.md | 3 +- docs/ops/sequence/LSTMCell_1.md | 3 +- docs/ops/sequence/OneHot_1.md | 6 +- docs/ops/sequence/RNNCell_3.md | 65 +- docs/ops/sequence/RNNSequence_5.md | 95 +-- docs/ops/shape/Reshape_1.md | 15 +- docs/ops/shape/ShapeOf_1.md | 3 +- docs/ops/shape/ShapeOf_3.md | 3 +- docs/ops/shape/Squeeze_1.md | 6 +- docs/ops/shape/Unsqueeze_1.md | 6 +- docs/ops/signals/DFT_7.md | 18 +- docs/ops/signals/IDFT_7.md | 18 +- docs/ops/signals/IRDFT_9.md | 18 +- docs/ops/signals/RDFT_9.md | 18 +- .../sort/ExperimentalDetectronTopKROIs_6.md | 3 +- .../hello_nv12_input_classification/README.md | 2 +- samples/cpp/benchmark_app/README.md | 3 +- .../hello_nv12_input_classification/README.md | 2 +- samples/python/speech_sample/README.md | 2 +- tools/pot/docs/BestPractices.md | 9 +- tools/pot/docs/DefaultQuantizationUsage.md | 6 +- tools/pot/docs/SimplifiedMode.md | 6 +- .../quantization/accuracy_aware/README.md | 2 +- .../algorithms/quantization/default/README.md | 3 +- 235 files changed, 1639 insertions(+), 1176 deletions(-) diff --git a/docs/Extensibility_UG/GPU_Extensibility.md b/docs/Extensibility_UG/GPU_Extensibility.md index 0ef611e3fcf56f..dbe2c865da9f2d 100644 --- a/docs/Extensibility_UG/GPU_Extensibility.md +++ b/docs/Extensibility_UG/GPU_Extensibility.md @@ -242,7 +242,8 @@ Example Configuration File The following code sample provides an example configuration file in XML format. For information on the configuration file structure, see the `Configuration File Format <#config-file-format>`__. -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/IE_PLUGIN_DG/Building.md b/docs/IE_PLUGIN_DG/Building.md index acb8d6e7c56867..d3d49fe06313c0 100644 --- a/docs/IE_PLUGIN_DG/Building.md +++ b/docs/IE_PLUGIN_DG/Building.md @@ -13,7 +13,7 @@ OpenVINO Developer Package To automatically generate the OpenVINO Developer Package, run the ``cmake`` tool during a OpenVINO build: -.. code-block:: bash +.. code-block:: sh $ mkdir openvino-release-build $ cd openvino-release-build @@ -52,7 +52,7 @@ Build Plugin using OpenVINO Developer Package To build a plugin source tree using the OpenVINO Developer Package, run the commands below: -.. code-block:: bash +.. code-block:: sh $ mkdir template-plugin-release-build $ cd template-plugin-release-build @@ -76,7 +76,7 @@ To build a plugin and its tests, run the following CMake scripts: The default values of the ``ENABLE_TESTS``, ``ENABLE_FUNCTIONAL_TESTS`` options are shared via the OpenVINO Developer Package and they are the same as for the main OpenVINO build tree. You can override them during plugin build using the command below: -.. code-block:: bash +.. code-block:: sh $ cmake -DENABLE_FUNCTIONAL_TESTS=OFF -DOpenVINODeveloperPackage_DIR=../openvino-release-build ../template-plugin diff --git a/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md b/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md index 56357ab993382b..7e028cfb3431c6 100644 --- a/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md +++ b/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md @@ -28,7 +28,8 @@ To convert a model to OpenVINO model format (``ov.Model``), you can use the foll .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model(INPUT_MODEL) @@ -63,7 +64,8 @@ To get the full list of conversion parameters, run the following command: .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model(help=True) @@ -88,7 +90,8 @@ Below is a list of separate examples for different frameworks and model conversi .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model("MobileNet.pb") @@ -108,7 +111,8 @@ Below is a list of separate examples for different frameworks and model conversi .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model("BERT", input_shape=[[2,30],[2,30],[2,30]]) @@ -130,7 +134,8 @@ Below is a list of separate examples for different frameworks and model conversi .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model("ocr.onnx", output="probabilities") @@ -156,7 +161,8 @@ Below is a list of separate examples for different frameworks and model conversi .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model("unet.pdmodel", mean_values=[123,117,104], scale=255) diff --git a/docs/MO_DG/prepare_model/Additional_Optimizations.md b/docs/MO_DG/prepare_model/Additional_Optimizations.md index 4b30daaa18bff7..5c1c412d7c1898 100644 --- a/docs/MO_DG/prepare_model/Additional_Optimizations.md +++ b/docs/MO_DG/prepare_model/Additional_Optimizations.md @@ -36,7 +36,8 @@ For example, the following command specifies the ``NHWC`` layout for a Tensorflo .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model("tf_nasnet_large.onnx", layout="nhwc") @@ -61,7 +62,8 @@ having two dimensions: batch and size of the image expressed as the ``N?`` layou .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model("yolov3-tiny.onnx", layout={"input_1": "nchw", "image_shape": "n?"}) @@ -89,7 +91,8 @@ the following commands to provide data in the ``NCHW`` layout: .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model("tf_nasnet_large.onnx", source_layout="nhwc", target_layout="nchw") @@ -117,7 +120,8 @@ mentioned earlier, use the following commands: .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model("yolov3-tiny.onnx", source_layout={"input_1": "nchw", "image_shape": "n?"}, target_layout={"input_1": "nhwc"}) @@ -170,7 +174,8 @@ model and applies mean-scale normalization to the input data: .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model("unet.pdmodel", mean_values=[123,117,104], scale=255) @@ -213,7 +218,8 @@ model and embeds the ``reverse_input_channel`` preprocessing block into OpenVINO .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model("alexnet.pb", reverse_input_channels=True) diff --git a/docs/MO_DG/prepare_model/FP16_Compression.md b/docs/MO_DG/prepare_model/FP16_Compression.md index 44a829ef50103e..5dbd0cfc9ff6ab 100644 --- a/docs/MO_DG/prepare_model/FP16_Compression.md +++ b/docs/MO_DG/prepare_model/FP16_Compression.md @@ -14,7 +14,8 @@ To compress the model, use the ``compress_to_fp16=True`` option: .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model(INPUT_MODEL, compress_to_fp16=False) diff --git a/docs/MO_DG/prepare_model/Getting_performance_numbers.md b/docs/MO_DG/prepare_model/Getting_performance_numbers.md index 0a744ae1670b0c..2627618aa799ec 100644 --- a/docs/MO_DG/prepare_model/Getting_performance_numbers.md +++ b/docs/MO_DG/prepare_model/Getting_performance_numbers.md @@ -21,7 +21,7 @@ To test performance of your model, make sure you :doc:`prepare the model for use For example, if you use :doc:`OpenVINO's automation tools `, these two lines of code will download the resnet-50-tf and convert it to OpenVINO IR. -.. code-block:: bash +.. code-block:: sh omz_downloader --name resnet-50-tf omz_converter --name resnet-50-tf @@ -36,7 +36,7 @@ For a detailed description, see the dedicated articles: The benchmark_app includes a lot of device-specific options, but the primary usage is as simple as: -.. code-block:: bash +.. code-block:: sh benchmark_app -m -d -i @@ -47,7 +47,7 @@ performance settings that contain command-line equivalents in the Benchmark app. While these settings provide really low-level control for the optimal model performance on the *specific* device, it is recommended to always start performance evaluation with the :doc:`OpenVINO High-Level Performance Hints ` first, like so: -.. code-block:: bash +.. code-block:: sh # for throughput prioritization benchmark_app -hint tput -m -d diff --git a/docs/MO_DG/prepare_model/MO_Python_API.md b/docs/MO_DG/prepare_model/MO_Python_API.md index a1e5bd951fc9ee..a02f721e75e843 100644 --- a/docs/MO_DG/prepare_model/MO_Python_API.md +++ b/docs/MO_DG/prepare_model/MO_Python_API.md @@ -15,7 +15,8 @@ Model conversion API is represented by ``convert_model()`` method in openvino.to Example of converting a PyTorch model directly from memory: -.. code-block:: python +.. code-block:: py + :force: import torchvision @@ -31,7 +32,8 @@ The following types are supported as an input model for ``convert_model()``: Example of using native Python classes to set ``input_shape``, ``mean_values`` and ``layout``: -.. code-block:: python +.. code-block:: py + :force: from openvino.runtime import PartialShape, Layout @@ -39,7 +41,8 @@ Example of using native Python classes to set ``input_shape``, ``mean_values`` a Example of using strings for setting ``input_shape``, ``mean_values`` and ``layout``: -.. code-block:: python +.. code-block:: py + :force: ov_model = convert_model(model, input_shape="[1,3,100,100]", mean_values="[127,127,127]", layout="NCHW") @@ -49,7 +52,8 @@ The shape can be a ``list`` or ``tuple`` of dimensions (``int`` or ``openvino.ru Example of using a tuple in the ``input`` parameter to cut a model: -.. code-block:: python +.. code-block:: py + :force: ov_model = convert_model(model, input=("input_name", [3], np.float32)) @@ -66,7 +70,8 @@ Supported types for ``InputCutInfo``: Example of using ``InputCutInfo`` to freeze an input with value: -.. code-block:: python +.. code-block:: py + :force: from openvino.tools.mo import convert_model, InputCutInfo @@ -85,7 +90,8 @@ Parameters supporting ``list``: Example of using lists to set shapes, types and layout for multiple inputs: -.. code-block:: python +.. code-block:: py + :force: ov_model = convert_model(model, input=[("input1", [1,3,100,100], np.float32), ("input2", [1,3,100,100], np.float32)], layout=[Layout("NCHW"), LayoutMap("NCHW", "NHWC")]) @@ -93,7 +99,8 @@ Example of using lists to set shapes, types and layout for multiple inputs: Example of using the ``Layout`` class to set the layout of a model input: -.. code-block:: python +.. code-block:: py + :force: from openvino.runtime import Layout from openvino.tools.mo import convert_model @@ -106,7 +113,8 @@ To set both source and destination layouts in the ``layout`` parameter, use the Example of using the ``LayoutMap`` class to change the layout of a model input: -.. code-block:: python +.. code-block:: py + :force: from openvino.tools.mo import convert_model, LayoutMap diff --git a/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md b/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md index 83cb39093b04b4..6b30ddbfdbe796 100644 --- a/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md +++ b/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md @@ -769,7 +769,8 @@ The name should be the compilation of the layer name with the module name separa For example, your topology contains this layer with type ``Python``: -.. code-block:: +.. code-block:: py + :force: layer { name: 'proposal' @@ -785,7 +786,8 @@ For example, your topology contains this layer with type ``Python``: The first step is to implement an extension for this layer in Model Optimizer as an ancestor of ``Op`` class: -.. code-block:: +.. code-block:: py + :force: class ProposalPythonExampleOp(Op): op = 'Proposal' @@ -796,7 +798,8 @@ The first step is to implement an extension for this layer in Model Optimizer as It is mandatory to call two functions right after the implementation of that class: -.. code-block:: +.. code-block:: py + :force: class ProposalPythonExampleOp(Op): ... diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md index 7221572022ae1c..e03d2ca47161e1 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md @@ -44,24 +44,25 @@ Converting certain PaddlePaddle models may require setting ``example_input`` or * Example of converting ``paddle.hapi.model.Model`` format model: - .. code-block:: python - - import paddle - from openvino.tools.mo import convert_model - - # create a paddle.hapi.model.Model format model - resnet50 = paddle.vision.models.resnet50() - x = paddle.static.InputSpec([1,3,224,224], 'float32', 'x') - y = paddle.static.InputSpec([1,1000], 'float32', 'y') - - model = paddle.Model(resnet50, x, y) - - # convert to OpenVINO IR format - ov_model = convert_model(model) - - # optional: serialize OpenVINO IR to *.xml & *.bin - from openvino.runtime import serialize - serialize(ov_model, "ov_model.xml", "ov_model.bin") + .. code-block:: py + :force: + + import paddle + from openvino.tools.mo import convert_model + + # create a paddle.hapi.model.Model format model + resnet50 = paddle.vision.models.resnet50() + x = paddle.static.InputSpec([1,3,224,224], 'float32', 'x') + y = paddle.static.InputSpec([1,1000], 'float32', 'y') + + model = paddle.Model(resnet50, x, y) + + # convert to OpenVINO IR format + ov_model = convert_model(model) + + # optional: serialize OpenVINO IR to *.xml & *.bin + from openvino.runtime import serialize + serialize(ov_model, "ov_model.xml", "ov_model.bin") * Example of converting ``paddle.fluid.dygraph.layers.Layer`` format model: @@ -69,17 +70,18 @@ Converting certain PaddlePaddle models may require setting ``example_input`` or ``list`` with tensor(``paddle.Tensor``) or InputSpec(``paddle.static.input.InputSpec``) - .. code-block:: python - - import paddle - from openvino.tools.mo import convert_model + .. code-block:: py + :force: - # create a paddle.fluid.dygraph.layers.Layer format model - model = paddle.vision.models.resnet50() - x = paddle.rand([1,3,224,224]) - - # convert to OpenVINO IR format - ov_model = convert_model(model, example_input=[x]) + import paddle + from openvino.tools.mo import convert_model + + # create a paddle.fluid.dygraph.layers.Layer format model + model = paddle.vision.models.resnet50() + x = paddle.rand([1,3,224,224]) + + # convert to OpenVINO IR format + ov_model = convert_model(model, example_input=[x]) * Example of converting ``paddle.fluid.executor.Executor`` format model: @@ -87,25 +89,26 @@ Converting certain PaddlePaddle models may require setting ``example_input`` or ``list`` or ``tuple`` with variable(``paddle.static.data``) - .. code-block:: python - - import paddle - from openvino.tools.mo import convert_model - - paddle.enable_static() - - # create a paddle.fluid.executor.Executor format model - x = paddle.static.data(name="x", shape=[1,3,224]) - y = paddle.static.data(name="y", shape=[1,3,224]) - relu = paddle.nn.ReLU() - sigmoid = paddle.nn.Sigmoid() - y = sigmoid(relu(x)) - - exe = paddle.static.Executor(paddle.CPUPlace()) - exe.run(paddle.static.default_startup_program()) - - # convert to OpenVINO IR format - ov_model = convert_model(exe, example_input=[x], example_output=[y]) + .. code-block:: py + :force: + + import paddle + from openvino.tools.mo import convert_model + + paddle.enable_static() + + # create a paddle.fluid.executor.Executor format model + x = paddle.static.data(name="x", shape=[1,3,224]) + y = paddle.static.data(name="y", shape=[1,3,224]) + relu = paddle.nn.ReLU() + sigmoid = paddle.nn.Sigmoid() + y = sigmoid(relu(x)) + + exe = paddle.static.Executor(paddle.CPUPlace()) + exe.run(paddle.static.default_startup_program()) + + # convert to OpenVINO IR format + ov_model = convert_model(exe, example_input=[x], example_output=[y]) Supported PaddlePaddle Layers ############################# diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md index ad59e8f3cc26fb..37071895bef3c2 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md @@ -15,7 +15,8 @@ Model Optimizer Python API allows the conversion of PyTorch models using the ``c Example of PyTorch model conversion: -.. code-block:: python +.. code-block:: py + :force: import torchvision import torch @@ -37,7 +38,8 @@ Converting certain PyTorch models may require model tracing, which needs ``input Example of using ``example_input``: -.. code-block:: python +.. code-block:: py + :force: import torchvision import torch @@ -70,6 +72,7 @@ For more information, refer to the `Exporting PyTorch models to ONNX format @@ -92,6 +94,7 @@ The ``-b`` option is used here for conversion to override a possible undefined b The last layer in the model is ``InceptionV1/Logits/Predictions/Reshape_1``, which matches an output operation in the TensorFlow graph: .. code-block:: xml + :force: @@ -117,7 +120,8 @@ Due to automatic identification of inputs and outputs, providing the ``input`` a .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model("inception_v1.pb", batch=1) @@ -156,7 +160,8 @@ If you want to cut your model at the end, you have the following options: .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model("inception_v1.pb", batch=1, output="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu") @@ -172,6 +177,7 @@ If you want to cut your model at the end, you have the following options: The resulting Intermediate Representation has three layers: .. code-block:: xml + :force: @@ -219,7 +225,8 @@ If you want to cut your model at the end, you have the following options: .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model("inception_v1.pb", batch=1, output="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu:0") @@ -235,6 +242,7 @@ If you want to cut your model at the end, you have the following options: The resulting Intermediate Representation has three layers, which are the same as in the previous case: .. code-block:: xml + :force: @@ -282,7 +290,8 @@ If you want to cut your model at the end, you have the following options: .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model("inception_v1.pb", batch=1, output="0:InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu") @@ -298,6 +307,7 @@ If you want to cut your model at the end, you have the following options: The resulting Intermediate Representation has two layers, which are the same as the first two layers in the previous case: .. code-block:: xml + :force: @@ -339,7 +349,8 @@ If you want to go further and cut the beginning of the model, leaving only the ` .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model("inception_v1.pb", batch=1, output="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu", input="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu") @@ -355,6 +366,7 @@ If you want to go further and cut the beginning of the model, leaving only the ` The resulting Intermediate Representation looks as follows: .. code-block:: xml + :force: @@ -390,7 +402,8 @@ If you want to go further and cut the beginning of the model, leaving only the ` .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model("inception_v1.pb", batch=1, input="0:InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu", output="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu") @@ -406,6 +419,7 @@ If you want to go further and cut the beginning of the model, leaving only the ` The resulting Intermediate Representation looks as follows: .. code-block:: xml + :force: @@ -441,7 +455,8 @@ If you want to go further and cut the beginning of the model, leaving only the ` .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model("inception_v1.pb", batch=1, input="InceptionV1/InceptionV1/Conv2d_1a_7x7/BatchNorm/batchnorm/add_1:0", output="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu") @@ -457,6 +472,7 @@ If you want to go further and cut the beginning of the model, leaving only the ` The resulting Intermediate Representation looks as follows: .. code-block:: xml + :force: @@ -496,7 +512,8 @@ Following this behavior, ``convert_model()`` creates an ``Input`` layer for port .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model("inception_v1.pb", batch=1, input="InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution") @@ -518,7 +535,8 @@ Different behavior occurs when ``input_shape`` is also used as an attempt to ove .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model("inception_v1.pb", input="InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution", input_shape=[1,224,224,3]) @@ -548,7 +566,8 @@ The correct command line is: .. tab-item:: Python :sync: py - .. code-block:: python + .. code-block:: py + :force: from openvino.tools.mo import convert_model ov_model = convert_model("inception_v1.pb", input="0:InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution", input_shape=[1,224,224,3]) diff --git a/docs/MO_DG/prepare_model/convert_model/kaldi_specific/Aspire_Tdnn_Model.md b/docs/MO_DG/prepare_model/convert_model/kaldi_specific/Aspire_Tdnn_Model.md index c5002f9a62dfa1..856bd580cd2cc6 100644 --- a/docs/MO_DG/prepare_model/convert_model/kaldi_specific/Aspire_Tdnn_Model.md +++ b/docs/MO_DG/prepare_model/convert_model/kaldi_specific/Aspire_Tdnn_Model.md @@ -107,7 +107,8 @@ Prepare ivectors for the Speech Recognition sample: 5. For the Speech Recognition sample, the ``.ark`` file must contain an ivector for each frame. Copy the ivector ``frame_count`` times by running the below script in the Python command prompt: - .. code-block:: python + .. code-block:: py + :force: import subprocess diff --git a/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_GluonCV_Models.md b/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_GluonCV_Models.md index 124bb40861c2e2..37ca88d9b56cb8 100644 --- a/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_GluonCV_Models.md +++ b/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_GluonCV_Models.md @@ -15,7 +15,8 @@ This article provides the instructions and examples on how to convert `GluonCV S 1. Choose the topology available from the `GluonCV Model Zoo `__ and export to the MXNet format using the GluonCV API. For example, for the ``ssd_512_mobilenet1.0`` topology: - .. code-block:: python + .. code-block:: py + :force: from gluoncv import model_zoo, data, utils from gluoncv.utils import export_block diff --git a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_Bert_ner.md b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_Bert_ner.md index 5b49a90def8b08..a48268609933bb 100644 --- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_Bert_ner.md +++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_Bert_ner.md @@ -22,7 +22,8 @@ directory of the model repository. If you download the pretrained model, you nee to download `bert.py `__ to run the script. The instructions were tested with the commit-SHA: ``e5be564156f194f1becb0d82aeaf6e762d9eb9ed``. -.. code-block:: python +.. code-block:: py + :force: import torch @@ -61,7 +62,7 @@ The script generates ONNX model file ``bert-ner.onnx``. Converting an ONNX BERT-NER model to IR ####################################### -.. code-block:: bash +.. code-block:: sh mo --input_model bert-ner.onnx --input "input_mask[1,128],segment_ids[1,128],input_ids[1,128]" diff --git a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_F3Net.md b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_F3Net.md index 934eca8652ad0d..04ed98ee60fc6a 100644 --- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_F3Net.md +++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_F3Net.md @@ -25,7 +25,8 @@ Downloading and Converting the Model to ONNX To download the pretrained model or train the model yourself, refer to the `instructions `__ in the F3Net model repository. First, convert the model to ONNX format. Create and run the following Python script in the ``src`` directory of the model repository: -.. code-block:: python +.. code-block:: py + :force: import torch from dataset import Config diff --git a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_QuartzNet.md b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_QuartzNet.md index 9276f3fe8ba7a9..08d12669443388 100644 --- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_QuartzNet.md +++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_QuartzNet.md @@ -19,7 +19,8 @@ Here are the instructions on how to obtain QuartzNet in ONNX format. 2. Run the following code: - .. code-block:: python + .. code-block:: py + :force: import nemo import nemo.collections.asr as nemo_asr diff --git a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RCAN.md b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RCAN.md index f736befd64eed4..154b8d0f037b02 100644 --- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RCAN.md +++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RCAN.md @@ -15,7 +15,8 @@ Downloading and Converting the Model to ONNX To download the pre-trained model or train the model yourself, refer to the `instruction `__ in the RCAN model repository. First, convert the model to ONNX format. Create and run the script with the following content in the root directory of the model repository: -.. code-block:: python +.. code-block:: py + :force: from argparse import Namespace diff --git a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md index a8d11b7338ceb9..08d14dfb1fc7d5 100644 --- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md +++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md @@ -56,7 +56,8 @@ if you were following the `guide `__ file defines the pre-processing function for the Inception models. The ``preprocess_for_eval`` function contains the following code: -.. code-block:: python +.. code-block:: py + :force: ... import tensorflow as tf diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_WideAndDeep_Family_Models.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_WideAndDeep_Family_Models.md index 017a3019394f3e..425038bca4ca3f 100644 --- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_WideAndDeep_Family_Models.md +++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_WideAndDeep_Family_Models.md @@ -49,7 +49,8 @@ The Wide and Deep model is no longer in the master branch of the repository but As the OpenVINOâ„¢ toolkit does not support the categorical with hash and crossed features, such feature types must be switched off in the model by changing the ``build_model_columns()`` function in `census_dataset.py` as follows: -.. code-block:: python +.. code-block:: py + :force: def build_model_columns(): """Builds a set of wide and deep feature columns.""" diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_XLNet_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_XLNet_From_Tensorflow.md index cb0be7d770a99d..b9ede8f74bbd4b 100644 --- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_XLNet_From_Tensorflow.md +++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_XLNet_From_Tensorflow.md @@ -48,7 +48,8 @@ To get pb-file from the archive contents, you need to do the following. .. note:: The original model repository has been tested with TensorFlow 1.13.1 under Python2. - .. code-block:: python + .. code-block:: py + :force: from collections import namedtuple @@ -131,7 +132,8 @@ To get ``pb-file`` from the archive contents, follow the instructions below: 2. Save and run the following Python script in ``~/XLNet-Large/xlnet``: - .. code-block:: python + .. code-block:: py + :force: from collections import namedtuple diff --git a/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md b/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md index d2f86b2e6077c1..b96c23beed1271 100644 --- a/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md +++ b/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md @@ -225,6 +225,7 @@ example of the shape infer function for the :doc:`Reshape `__. .. code-block:: py + :force: import inspect from typing import List, Optional, Union, Dict @@ -942,6 +945,7 @@ Configure Inference Pipeline First, you should create instances of OpenVINO Model. .. code-block:: py + :force: from openvino.runtime import Core core = Core() @@ -954,6 +958,7 @@ Model tokenizer and scheduler are also important parts of the pipeline. Let us define them and put all components together. .. code-block:: py + :force: from transformers import CLIPTokenizer @@ -979,6 +984,7 @@ Consider increasing ``steps`` to get more precise results. A suggested value is ``50``, but it will take longer time to process. .. code-block:: py + :force: import gradio as gr from socket import gethostbyname, gethostname diff --git a/docs/ops/activation/Clamp_1.md b/docs/ops/activation/Clamp_1.md index 371a2619138cac..3135dcc11cf964 100644 --- a/docs/ops/activation/Clamp_1.md +++ b/docs/ops/activation/Clamp_1.md @@ -57,7 +57,8 @@ Let *min_value* and *max_value* be *min* and *max*, respectively. The mathematic **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/activation/Elu_1.md b/docs/ops/activation/Elu_1.md index 7998368ceea7f2..36382d5bd23c1d 100644 --- a/docs/ops/activation/Elu_1.md +++ b/docs/ops/activation/Elu_1.md @@ -52,7 +52,8 @@ where α corresponds to *alpha* attribute. **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/activation/Exp_1.md b/docs/ops/activation/Exp_1.md index fe11978bbbc143..57967b221fedb4 100644 --- a/docs/ops/activation/Exp_1.md +++ b/docs/ops/activation/Exp_1.md @@ -36,7 +36,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/activation/GELU_2.md b/docs/ops/activation/GELU_2.md index b3eb3daa0d81b8..5223d91f9e622c 100644 --- a/docs/ops/activation/GELU_2.md +++ b/docs/ops/activation/GELU_2.md @@ -47,7 +47,8 @@ Additionally, the *Gelu* function may be approximated as follows: **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/activation/GELU_7.md b/docs/ops/activation/GELU_7.md index e69c1de5b0b30d..aedb55fbfba018 100644 --- a/docs/ops/activation/GELU_7.md +++ b/docs/ops/activation/GELU_7.md @@ -69,7 +69,8 @@ For ``tanh`` approximation mode, *Gelu* function is represented as: *Example*: ``tanh`` approximation mode -.. code-block:: cpp +.. code-block:: xml + :force: @@ -90,7 +91,8 @@ For ``tanh`` approximation mode, *Gelu* function is represented as: *Example:* ``erf`` approximation mode -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/activation/HSigmoid_5.md b/docs/ops/activation/HSigmoid_5.md index 2fb93070dfedc0..6e5f2264315eab 100644 --- a/docs/ops/activation/HSigmoid_5.md +++ b/docs/ops/activation/HSigmoid_5.md @@ -38,7 +38,8 @@ The HSigmoid operation is introduced in the following `article diff --git a/docs/ops/activation/HSwish_4.md b/docs/ops/activation/HSwish_4.md index a1bc89897b7c04..74ef71ff18b4e6 100644 --- a/docs/ops/activation/HSwish_4.md +++ b/docs/ops/activation/HSwish_4.md @@ -39,7 +39,8 @@ The HSwish operation is introduced in the following `article diff --git a/docs/ops/activation/HardSigmoid_1.md b/docs/ops/activation/HardSigmoid_1.md index 7f5ef3b8e679b7..d26bba9d2d5e4f 100644 --- a/docs/ops/activation/HardSigmoid_1.md +++ b/docs/ops/activation/HardSigmoid_1.md @@ -44,7 +44,8 @@ where α corresponds to ``alpha`` scalar input and β corresponds to ``beta`` sc **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/activation/LogSoftmax_5.md b/docs/ops/activation/LogSoftmax_5.md index dfaccffed8610e..168de9e5fe46ed 100644 --- a/docs/ops/activation/LogSoftmax_5.md +++ b/docs/ops/activation/LogSoftmax_5.md @@ -56,7 +56,8 @@ where :math:`C` is a size of tensor along *axis* dimension. **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/activation/Mish_4.md b/docs/ops/activation/Mish_4.md index 826a21134fa69c..5825b35aa99510 100644 --- a/docs/ops/activation/Mish_4.md +++ b/docs/ops/activation/Mish_4.md @@ -39,7 +39,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/activation/PReLU_1.md b/docs/ops/activation/PReLU_1.md index 4271ac94d78ebd..77535422977be2 100644 --- a/docs/ops/activation/PReLU_1.md +++ b/docs/ops/activation/PReLU_1.md @@ -54,7 +54,8 @@ Another mathematical representation that may be found in other references: Example: 1D input tensor ``data`` -.. code-block:: cpp +.. code-block:: xml + :force: @@ -75,7 +76,8 @@ Example: 1D input tensor ``data`` Example: 2D input tensor ``data`` -.. code-block:: cpp +.. code-block:: xml + :force: @@ -97,7 +99,8 @@ Example: 2D input tensor ``data`` Example: 4D input tensor ``data`` -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/activation/ReLU_1.md b/docs/ops/activation/ReLU_1.md index 94e54e9ed9a0f6..c1b4b6707aebc1 100644 --- a/docs/ops/activation/ReLU_1.md +++ b/docs/ops/activation/ReLU_1.md @@ -35,7 +35,8 @@ For each element from the input tensor calculates corresponding element in the o **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/activation/Selu_1.md b/docs/ops/activation/Selu_1.md index 0f5058cad03cc1..18f474a556d041 100644 --- a/docs/ops/activation/Selu_1.md +++ b/docs/ops/activation/Selu_1.md @@ -50,7 +50,8 @@ Another mathematical representation that may be found in other references: **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/activation/Sigmoid_1.md b/docs/ops/activation/Sigmoid_1.md index f8078da39eedae..370c866a735fa4 100644 --- a/docs/ops/activation/Sigmoid_1.md +++ b/docs/ops/activation/Sigmoid_1.md @@ -35,7 +35,8 @@ For each element from the input tensor calculates corresponding element in the o **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/activation/SoftMax_1.md b/docs/ops/activation/SoftMax_1.md index 3864d7cb179ae8..d9cdae2a763ab0 100644 --- a/docs/ops/activation/SoftMax_1.md +++ b/docs/ops/activation/SoftMax_1.md @@ -42,12 +42,13 @@ where :math:`C` is a size of tensor along *axis* dimension. **Example** -.. code-block:: cpp - - - - ... - ... - +.. code-block:: xml + :force: + + + + ... + ... + @endsphinxdirective diff --git a/docs/ops/activation/SoftMax_8.md b/docs/ops/activation/SoftMax_8.md index 519f549b8e6aec..65cb0a69b0d70e 100644 --- a/docs/ops/activation/SoftMax_8.md +++ b/docs/ops/activation/SoftMax_8.md @@ -42,12 +42,13 @@ where :math:`C` is a size of tensor along *axis* dimension. **Example** -.. code-block:: cpp - - - - ... - ... - +.. code-block:: xml + :force: + + + + ... + ... + @endsphinxdirective diff --git a/docs/ops/activation/SoftPlus_4.md b/docs/ops/activation/SoftPlus_4.md index e4897ee6f0e2ef..9ace9640c91bba 100644 --- a/docs/ops/activation/SoftPlus_4.md +++ b/docs/ops/activation/SoftPlus_4.md @@ -53,7 +53,8 @@ For example, if *T* is ``fp32``, ``threshold`` should be ``20`` or if *T* is ``f **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/activation/SoftSign_9.md b/docs/ops/activation/SoftSign_9.md index bf8fbaee1f1861..c3433dbd75b0f4 100644 --- a/docs/ops/activation/SoftSign_9.md +++ b/docs/ops/activation/SoftSign_9.md @@ -37,7 +37,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/activation/Swish_4.md b/docs/ops/activation/Swish_4.md index f460915b21e1bd..96a7632ae62a16 100644 --- a/docs/ops/activation/Swish_4.md +++ b/docs/ops/activation/Swish_4.md @@ -44,7 +44,8 @@ where β corresponds to ``beta`` scalar input. Example: Second input ``beta`` provided -.. code-block:: cpp +.. code-block:: xml + :force: @@ -66,7 +67,8 @@ Example: Second input ``beta`` provided Example: Second input ``beta`` not provided -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Abs_1.md b/docs/ops/arithmetic/Abs_1.md index c49574fca69fae..9527e69200d57a 100644 --- a/docs/ops/arithmetic/Abs_1.md +++ b/docs/ops/arithmetic/Abs_1.md @@ -39,7 +39,8 @@ No attributes available. *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Acos_1.md b/docs/ops/arithmetic/Acos_1.md index 5e9b33c0d7a4ca..b1db2dd758fd5c 100644 --- a/docs/ops/arithmetic/Acos_1.md +++ b/docs/ops/arithmetic/Acos_1.md @@ -38,7 +38,8 @@ No attributes available. *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Acosh_3.md b/docs/ops/arithmetic/Acosh_3.md index c52b86f8551469..0b1363810ca8b2 100644 --- a/docs/ops/arithmetic/Acosh_3.md +++ b/docs/ops/arithmetic/Acosh_3.md @@ -34,7 +34,8 @@ **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Add_1.md b/docs/ops/arithmetic/Add_1.md index 8a57d0bd7ec9bc..02576871b666c1 100644 --- a/docs/ops/arithmetic/Add_1.md +++ b/docs/ops/arithmetic/Add_1.md @@ -53,7 +53,8 @@ After broadcasting *Add* performs addition operation for the input tensors *a* a *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -77,7 +78,8 @@ After broadcasting *Add* performs addition operation for the input tensors *a* a *Example 2: broadcast* -.. code-block:: console +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Asin_1.md b/docs/ops/arithmetic/Asin_1.md index 224645366f9642..f22a3d74172f10 100644 --- a/docs/ops/arithmetic/Asin_1.md +++ b/docs/ops/arithmetic/Asin_1.md @@ -38,7 +38,8 @@ No attributes available. *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Asinh_3.md b/docs/ops/arithmetic/Asinh_3.md index be50df7311312a..588130134bd7ce 100644 --- a/docs/ops/arithmetic/Asinh_3.md +++ b/docs/ops/arithmetic/Asinh_3.md @@ -34,7 +34,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Atan_1.md b/docs/ops/arithmetic/Atan_1.md index df6be29567632f..baf93a85599b57 100644 --- a/docs/ops/arithmetic/Atan_1.md +++ b/docs/ops/arithmetic/Atan_1.md @@ -34,7 +34,8 @@ **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Atanh_3.md b/docs/ops/arithmetic/Atanh_3.md index e70fd71c2a40f5..39a48e2b8346ac 100644 --- a/docs/ops/arithmetic/Atanh_3.md +++ b/docs/ops/arithmetic/Atanh_3.md @@ -49,7 +49,8 @@ Unsigned Integral type put: **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Ceiling_1.md b/docs/ops/arithmetic/Ceiling_1.md index 312d5c114fa9c6..53828ccf26dc1b 100644 --- a/docs/ops/arithmetic/Ceiling_1.md +++ b/docs/ops/arithmetic/Ceiling_1.md @@ -36,7 +36,8 @@ *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Cos_1.md b/docs/ops/arithmetic/Cos_1.md index fb6a99147d2310..d52a8c1545aae9 100644 --- a/docs/ops/arithmetic/Cos_1.md +++ b/docs/ops/arithmetic/Cos_1.md @@ -34,7 +34,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Cosh_1.md b/docs/ops/arithmetic/Cosh_1.md index b2e84a784525a1..027dac127a3dc4 100644 --- a/docs/ops/arithmetic/Cosh_1.md +++ b/docs/ops/arithmetic/Cosh_1.md @@ -34,7 +34,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/CumSum_3.md b/docs/ops/arithmetic/CumSum_3.md index 3c8d73dbc00c24..13468c4a9344d1 100644 --- a/docs/ops/arithmetic/CumSum_3.md +++ b/docs/ops/arithmetic/CumSum_3.md @@ -59,7 +59,8 @@ To perform the summation in the opposite direction of the axis, set reverse attr *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -77,7 +78,8 @@ To perform the summation in the opposite direction of the axis, set reverse attr *Example 2* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -95,7 +97,8 @@ To perform the summation in the opposite direction of the axis, set reverse attr *Example 3* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -113,7 +116,8 @@ To perform the summation in the opposite direction of the axis, set reverse attr *Example 4* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Divide_1.md b/docs/ops/arithmetic/Divide_1.md index ec0cf6d52d2dc0..4ba69064b5142e 100644 --- a/docs/ops/arithmetic/Divide_1.md +++ b/docs/ops/arithmetic/Divide_1.md @@ -64,7 +64,8 @@ The result of division by zero is undefined. *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -88,7 +89,8 @@ The result of division by zero is undefined. *Example 2: broadcast* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Erf_1.md b/docs/ops/arithmetic/Erf_1.md index bbd934813a1771..098d5190288c57 100644 --- a/docs/ops/arithmetic/Erf_1.md +++ b/docs/ops/arithmetic/Erf_1.md @@ -36,7 +36,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/FloorMod_1.md b/docs/ops/arithmetic/FloorMod_1.md index dca84d6a7ac7b8..08aaec96e56876 100644 --- a/docs/ops/arithmetic/FloorMod_1.md +++ b/docs/ops/arithmetic/FloorMod_1.md @@ -54,7 +54,8 @@ Python programming language: :math:`floor(x / y) * y + floor\_mod(x, y) = x`. Th *Example 1 - no broadcasting* -.. code-block:: +.. code-block:: xml + :force: @@ -78,7 +79,8 @@ Python programming language: :math:`floor(x / y) * y + floor\_mod(x, y) = x`. Th *Example 2: numpy broadcasting* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Floor_1.md b/docs/ops/arithmetic/Floor_1.md index 02b5f6b7c34a04..86a043681a5de1 100644 --- a/docs/ops/arithmetic/Floor_1.md +++ b/docs/ops/arithmetic/Floor_1.md @@ -38,7 +38,8 @@ element in the output tensor with the following formula: *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Log_1.md b/docs/ops/arithmetic/Log_1.md index f505670c6d73c1..6dfafbc3d2b76d 100644 --- a/docs/ops/arithmetic/Log_1.md +++ b/docs/ops/arithmetic/Log_1.md @@ -39,7 +39,8 @@ No attributes available. *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Maximum_1.md b/docs/ops/arithmetic/Maximum_1.md index db85ac1c7cfe6a..905e1e41f3334d 100644 --- a/docs/ops/arithmetic/Maximum_1.md +++ b/docs/ops/arithmetic/Maximum_1.md @@ -53,7 +53,8 @@ After broadcasting *Maximum* does the following with the input tensors *a* and * *Example 1 - no broadcasting* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -78,7 +79,8 @@ After broadcasting *Maximum* does the following with the input tensors *a* and * *Example 2: numpy broadcasting* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Minimum_1.md b/docs/ops/arithmetic/Minimum_1.md index a17268837d62ab..906453cd4f9c29 100644 --- a/docs/ops/arithmetic/Minimum_1.md +++ b/docs/ops/arithmetic/Minimum_1.md @@ -51,7 +51,8 @@ As a first step input tensors *a* and *b* are broadcasted if their shapes differ *Example 1 - no broadcasting* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -76,7 +77,8 @@ As a first step input tensors *a* and *b* are broadcasted if their shapes differ *Example 2: numpy broadcasting* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Mod_1.md b/docs/ops/arithmetic/Mod_1.md index 31ca65b4ecd0ac..937fb0148bee85 100644 --- a/docs/ops/arithmetic/Mod_1.md +++ b/docs/ops/arithmetic/Mod_1.md @@ -54,7 +54,8 @@ As a first step input tensors *a* and *b* are broadcasted if their shapes differ *Example 1 - no broadcasting* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -79,7 +80,8 @@ As a first step input tensors *a* and *b* are broadcasted if their shapes differ *Example 2: numpy broadcasting* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Multiply_1.md b/docs/ops/arithmetic/Multiply_1.md index b06fd4aa97c8d0..77af023b2f5d65 100644 --- a/docs/ops/arithmetic/Multiply_1.md +++ b/docs/ops/arithmetic/Multiply_1.md @@ -54,7 +54,8 @@ After broadcasting *Multiply* performs multiplication operation for the input te *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -79,7 +80,8 @@ After broadcasting *Multiply* performs multiplication operation for the input te *Example 2: broadcast* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Negative_1.md b/docs/ops/arithmetic/Negative_1.md index 61ba7b73c01743..761ae414c90843 100644 --- a/docs/ops/arithmetic/Negative_1.md +++ b/docs/ops/arithmetic/Negative_1.md @@ -36,7 +36,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Power_1.md b/docs/ops/arithmetic/Power_1.md index 8188afd6a640ac..bbe918222dba10 100644 --- a/docs/ops/arithmetic/Power_1.md +++ b/docs/ops/arithmetic/Power_1.md @@ -50,7 +50,8 @@ As a first step input tensors *a* and *b* are broadcasted if their shapes differ *Example 1 - no broadcasting* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -75,7 +76,8 @@ As a first step input tensors *a* and *b* are broadcasted if their shapes differ *Example 2: numpy broadcasting* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Round_5.md b/docs/ops/arithmetic/Round_5.md index d11cd8f1c50d4d..5b40a070d55612 100644 --- a/docs/ops/arithmetic/Round_5.md +++ b/docs/ops/arithmetic/Round_5.md @@ -14,13 +14,14 @@ **Detailed description**: Operation takes one input tensor and rounds the values, element-wise, meaning it finds the nearest integer for each value. In case of halves, the rule is to round them to the nearest even integer if ``mode`` attribute is ``half_to_even`` or rounding in such a way that the result heads away from zero if ``mode`` attribute is ``half_away_from_zero``. -.. code-block:: cpp +.. code-block:: xml + :force: - Input = [-4.5, -1.9, -1.5, 0.5, 0.9, 1.5, 2.3, 2.5] - - round(Input, mode = `half_to_even`) = [-4.0, -2.0, -2.0, 0.0, 1.0, 2.0, 2.0, 2.0] - - round(Input, mode = `half_away_from_zero`) = [-5.0, -2.0, -2.0, 1.0, 1.0, 2.0, 2.0, 3.0] + Input = [-4.5, -1.9, -1.5, 0.5, 0.9, 1.5, 2.3, 2.5] + + round(Input, mode = `half_to_even`) = [-4.0, -2.0, -2.0, 0.0, 1.0, 2.0, 2.0, 2.0] + + round(Input, mode = `half_away_from_zero`) = [-5.0, -2.0, -2.0, 1.0, 1.0, 2.0, 2.0, 3.0] **Attributes**: @@ -46,22 +47,23 @@ **Example** -.. code-block:: cpp - - - - - - 256 - 56 - - - - - 256 - 56 - - - +.. code-block:: xml + :force: + + + + + + 256 + 56 + + + + + 256 + 56 + + + @endsphinxdirective diff --git a/docs/ops/arithmetic/Sign_1.md b/docs/ops/arithmetic/Sign_1.md index 38226a26405e8f..ea361b38f5d2e3 100644 --- a/docs/ops/arithmetic/Sign_1.md +++ b/docs/ops/arithmetic/Sign_1.md @@ -35,7 +35,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Sin_1.md b/docs/ops/arithmetic/Sin_1.md index fe9ae89402d0d8..29649992ca0baf 100644 --- a/docs/ops/arithmetic/Sin_1.md +++ b/docs/ops/arithmetic/Sin_1.md @@ -42,7 +42,8 @@ No attributes available. *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Sinh_1.md b/docs/ops/arithmetic/Sinh_1.md index 07d9db6d914858..6b4f4dc7defdcd 100644 --- a/docs/ops/arithmetic/Sinh_1.md +++ b/docs/ops/arithmetic/Sinh_1.md @@ -34,7 +34,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Sqrt_1.md b/docs/ops/arithmetic/Sqrt_1.md index d2247e3e2aeede..ae436eef7af098 100644 --- a/docs/ops/arithmetic/Sqrt_1.md +++ b/docs/ops/arithmetic/Sqrt_1.md @@ -40,7 +40,8 @@ *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -57,7 +58,8 @@ *Example 2* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -75,7 +77,8 @@ *Example 3* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/SquaredDifference_1.md b/docs/ops/arithmetic/SquaredDifference_1.md index 5744eee835fe26..aed874982efb77 100644 --- a/docs/ops/arithmetic/SquaredDifference_1.md +++ b/docs/ops/arithmetic/SquaredDifference_1.md @@ -50,7 +50,8 @@ As a first step input tensors *a* and *b* are broadcasted if their shapes differ *Example 1 - no broadcasting* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -74,7 +75,8 @@ As a first step input tensors *a* and *b* are broadcasted if their shapes differ *Example 2: numpy broadcasting* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Subtract_1.md b/docs/ops/arithmetic/Subtract_1.md index 704aa7818adc29..4653f52f83eee6 100644 --- a/docs/ops/arithmetic/Subtract_1.md +++ b/docs/ops/arithmetic/Subtract_1.md @@ -52,7 +52,8 @@ After broadcasting *Subtract* performs subtraction operation for the input tenso *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -76,7 +77,8 @@ After broadcasting *Subtract* performs subtraction operation for the input tenso *Example 2: broadcast* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Tan_1.md b/docs/ops/arithmetic/Tan_1.md index b2f0e1aa3b9992..c2958560df9c40 100644 --- a/docs/ops/arithmetic/Tan_1.md +++ b/docs/ops/arithmetic/Tan_1.md @@ -20,14 +20,16 @@ *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: input = [0.0, 0.25, -0.25, 0.5, -0.5] output = [0.0, 0.25534192, -0.25534192, 0.54630249, -0.54630249] *Example 2* -.. code-block:: cpp +.. code-block:: xml + :force: input = [-2, -1, 0, 1, 2] output = [2, -2, 0, 2, -2] @@ -49,7 +51,8 @@ **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/arithmetic/Tanh_1.md b/docs/ops/arithmetic/Tanh_1.md index 8c3cb77b4e8271..27e9af2b7ba34e 100644 --- a/docs/ops/arithmetic/Tanh_1.md +++ b/docs/ops/arithmetic/Tanh_1.md @@ -41,7 +41,8 @@ For each element from the input tensor calculates corresponding element in the o *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/comparison/Equal_1.md b/docs/ops/comparison/Equal_1.md index d827d7b152bd11..2834c437d1a3a1 100644 --- a/docs/ops/comparison/Equal_1.md +++ b/docs/ops/comparison/Equal_1.md @@ -53,7 +53,8 @@ After broadcasting *Equal* does the following with the input tensors *a* and *b* *Example 1: no broadcast* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -77,7 +78,8 @@ After broadcasting *Equal* does the following with the input tensors *a* and *b* *Example 2: numpy broadcast* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/comparison/GreaterEqual_1.md b/docs/ops/comparison/GreaterEqual_1.md index 250f6e8b36f49f..49c781f97a828b 100644 --- a/docs/ops/comparison/GreaterEqual_1.md +++ b/docs/ops/comparison/GreaterEqual_1.md @@ -58,7 +58,8 @@ After broadcasting, *GreaterEqual* does the following with the input tensors *a* *Example 1: no broadcast* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -82,7 +83,8 @@ After broadcasting, *GreaterEqual* does the following with the input tensors *a* *Example 2: numpy broadcast* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/comparison/Greater_1.md b/docs/ops/comparison/Greater_1.md index 7089540794c17b..0c893f2218ec42 100644 --- a/docs/ops/comparison/Greater_1.md +++ b/docs/ops/comparison/Greater_1.md @@ -59,7 +59,8 @@ After broadcasting, *Greater* does the following with the input tensors *a* and *Example 1: no broadcast* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -84,7 +85,8 @@ After broadcasting, *Greater* does the following with the input tensors *a* and *Example 2: numpy broadcast* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/comparison/IsFinite_10.md b/docs/ops/comparison/IsFinite_10.md index 5b4eed0ac8a3ce..70839f7a56beb5 100644 --- a/docs/ops/comparison/IsFinite_10.md +++ b/docs/ops/comparison/IsFinite_10.md @@ -37,7 +37,8 @@ **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: @@ -55,7 +56,8 @@ -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/comparison/IsInf_10.md b/docs/ops/comparison/IsInf_10.md index 77d148d0255f23..4fc824b96f8670 100644 --- a/docs/ops/comparison/IsInf_10.md +++ b/docs/ops/comparison/IsInf_10.md @@ -50,7 +50,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/comparison/IsNaN_10.md b/docs/ops/comparison/IsNaN_10.md index ee2f81ee187b96..916c0f065408d6 100644 --- a/docs/ops/comparison/IsNaN_10.md +++ b/docs/ops/comparison/IsNaN_10.md @@ -35,7 +35,8 @@ For example, for the given input tensor ``[NaN, 2.1, 3.7, NaN, Inf]`` the output **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/comparison/LessEqual_1.md b/docs/ops/comparison/LessEqual_1.md index 8af844849c33a4..0f82a61f8d23c2 100644 --- a/docs/ops/comparison/LessEqual_1.md +++ b/docs/ops/comparison/LessEqual_1.md @@ -54,7 +54,8 @@ After broadcasting *LessEqual* does the following with the input tensors *a* and *Example 1: no broadcast* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -79,7 +80,8 @@ After broadcasting *LessEqual* does the following with the input tensors *a* and *Example 2: numpy broadcast* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/comparison/Less_1.md b/docs/ops/comparison/Less_1.md index 183997d6de8b4c..169c589cefb25e 100644 --- a/docs/ops/comparison/Less_1.md +++ b/docs/ops/comparison/Less_1.md @@ -54,7 +54,8 @@ After broadcasting *Less* does the following with the input tensors *a* and *b*: *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -78,7 +79,8 @@ After broadcasting *Less* does the following with the input tensors *a* and *b*: *Example 2: broadcast* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/condition/Bucketize_3.md b/docs/ops/condition/Bucketize_3.md index e2015fff391677..26e34a1b35a75c 100644 --- a/docs/ops/condition/Bucketize_3.md +++ b/docs/ops/condition/Bucketize_3.md @@ -56,7 +56,8 @@ For example, if the first input tensor is ``[[3, 50], [10, -1]]`` and the second **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/condition/If_8.md b/docs/ops/condition/If_8.md index 6a12fbdfb2e784..57402cad706f63 100644 --- a/docs/ops/condition/If_8.md +++ b/docs/ops/condition/If_8.md @@ -92,7 +92,8 @@ Also the number of outputs from *If* always must be greater than zero and equal *Example 1: a typical If structure* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/condition/NonZero_3.md b/docs/ops/condition/NonZero_3.md index 086439f842239d..c5285304109671 100644 --- a/docs/ops/condition/NonZero_3.md +++ b/docs/ops/condition/NonZero_3.md @@ -46,7 +46,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/condition/Select_1.md b/docs/ops/condition/Select_1.md index 56297d08ddad2d..8640126c39f2d0 100644 --- a/docs/ops/condition/Select_1.md +++ b/docs/ops/condition/Select_1.md @@ -51,7 +51,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/convolution/BinaryConvolution_1.md b/docs/ops/convolution/BinaryConvolution_1.md index e248f85ce023da..bbff1b2608fd22 100644 --- a/docs/ops/convolution/BinaryConvolution_1.md +++ b/docs/ops/convolution/BinaryConvolution_1.md @@ -103,7 +103,8 @@ Computation algorithm for mode *xnor-popcount*: 2D Convolution -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/convolution/ConvolutionBackpropData_1.md b/docs/ops/convolution/ConvolutionBackpropData_1.md index c641537251177e..b7de787fa747f3 100644 --- a/docs/ops/convolution/ConvolutionBackpropData_1.md +++ b/docs/ops/convolution/ConvolutionBackpropData_1.md @@ -20,7 +20,8 @@ ConvolutionBackpropData accepts the same set of attributes as a regular Convolut When output shape is specified as an input tensor ``output_shape`` then it specifies only spatial dimensions. No batch or channel dimension should be passed along with spatial dimensions. If ``output_shape`` is omitted, then ``pads_begin``, ``pads_end`` or ``auto_pad`` are used to determine output spatial shape ``[O_z, O_y, O_x]`` by input spatial shape ``[I_z, I_y, I_x]`` in the following way: -.. code-block:: cpp +.. code-block:: xml + :force: if auto_pads != None: pads_begin[i] = 0 @@ -32,7 +33,8 @@ where ``K_i`` filter kernel dimension along spatial axis ``i``. If ``output_shape`` is specified, ``pads_begin`` and ``pads_end`` are ignored, and ``auto_pad`` defines how to distribute padding amount around the tensor. In this case pads are determined based on the next formulas to correctly align input and output tensors: -.. code-block:: cpp +.. code-block:: xml + :force: total_padding[i] = stride[i] * (X_i - 1) + ((K_i - 1) * dilations[i] + 1) - output_shape[i] + output_padding[i] if auto_pads != SAME_UPPER: @@ -119,7 +121,8 @@ If ``output_shape`` is specified, ``pads_begin`` and ``pads_end`` are ignored, a *Example 1: 2D ConvolutionBackpropData* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -149,7 +152,8 @@ If ``output_shape`` is specified, ``pads_begin`` and ``pads_end`` are ignored, a *Example 2: 2D ConvolutionBackpropData with output_padding* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -179,7 +183,8 @@ If ``output_shape`` is specified, ``pads_begin`` and ``pads_end`` are ignored, a *Example 3: 2D ConvolutionBackpropData with output_shape input* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/convolution/Convolution_1.md b/docs/ops/convolution/Convolution_1.md index 2203f65e5a70b1..75c705998c1db7 100644 --- a/docs/ops/convolution/Convolution_1.md +++ b/docs/ops/convolution/Convolution_1.md @@ -113,7 +113,8 @@ The receptive field in each layer is calculated using the formulas: 1D Convolution -.. code-block:: cpp +.. code-block:: xml + :force: @@ -141,7 +142,8 @@ The receptive field in each layer is calculated using the formulas: 2D Convolution -.. code-block:: cpp +.. code-block:: xml + :force: @@ -171,7 +173,8 @@ The receptive field in each layer is calculated using the formulas: 3D Convolution -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/convolution/DeformableConvolution_1.md b/docs/ops/convolution/DeformableConvolution_1.md index 9ea23519705e14..23b21c065353ae 100644 --- a/docs/ops/convolution/DeformableConvolution_1.md +++ b/docs/ops/convolution/DeformableConvolution_1.md @@ -109,7 +109,8 @@ Where 2D DeformableConvolution (deformable_group=1) -.. code-block:: cpp +.. code-block:: xml + :force: @@ -145,7 +146,8 @@ Where 2D DeformableConvolution (deformable_group=4) -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/convolution/DeformableConvolution_8.md b/docs/ops/convolution/DeformableConvolution_8.md index b582dd02257d04..292561da1cd093 100644 --- a/docs/ops/convolution/DeformableConvolution_8.md +++ b/docs/ops/convolution/DeformableConvolution_8.md @@ -121,7 +121,8 @@ Where 2D DeformableConvolution (deformable_group=1) -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/convolution/GroupConvolutionBackpropData_1.md b/docs/ops/convolution/GroupConvolutionBackpropData_1.md index 884ee39cef5fe8..4d2cede8f3eca8 100644 --- a/docs/ops/convolution/GroupConvolutionBackpropData_1.md +++ b/docs/ops/convolution/GroupConvolutionBackpropData_1.md @@ -109,7 +109,8 @@ is derived from the kernel shape. 1D GroupConvolutionBackpropData -.. code-block:: cpp +.. code-block:: xml + :force: @@ -138,7 +139,8 @@ is derived from the kernel shape. 2D GroupConvolutionBackpropData -.. code-block:: cpp +.. code-block:: xml + :force: @@ -170,7 +172,8 @@ is derived from the kernel shape. 3D GroupConvolutionBackpropData -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/convolution/GroupConvolution_1.md b/docs/ops/convolution/GroupConvolution_1.md index 3a2024c96cbe4c..0e48f4a7f01d50 100644 --- a/docs/ops/convolution/GroupConvolution_1.md +++ b/docs/ops/convolution/GroupConvolution_1.md @@ -96,7 +96,8 @@ as in regular convolution and concatenates the results. More thorough explanatio 1D GroupConvolution -.. code-block:: cpp +.. code-block:: xml + :force: @@ -124,7 +125,8 @@ as in regular convolution and concatenates the results. More thorough explanatio 2D GroupConvolution -.. code-block:: cpp +.. code-block:: xml + :force: @@ -155,7 +157,8 @@ as in regular convolution and concatenates the results. More thorough explanatio 3D GroupConvolution -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/detection/DeformablePSROIPooling_1.md b/docs/ops/detection/DeformablePSROIPooling_1.md index e58d4b9b841f23..a62efaa204e367 100644 --- a/docs/ops/detection/DeformablePSROIPooling_1.md +++ b/docs/ops/detection/DeformablePSROIPooling_1.md @@ -106,7 +106,8 @@ This operation is compatible with `Apache MXNet DeformablePSROIPooling @@ -135,7 +136,8 @@ This operation is compatible with `Apache MXNet DeformablePSROIPooling diff --git a/docs/ops/detection/DetectionOutput_1.md b/docs/ops/detection/DetectionOutput_1.md index 9817c6e0633868..253e1e54bc33d2 100644 --- a/docs/ops/detection/DetectionOutput_1.md +++ b/docs/ops/detection/DetectionOutput_1.md @@ -156,7 +156,8 @@ At each feature map cell, *DetectionOutput* predicts the offsets relative to the **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/detection/DetectionOutput_8.md b/docs/ops/detection/DetectionOutput_8.md index 891e9e0d50019c..5825b635fccada 100644 --- a/docs/ops/detection/DetectionOutput_8.md +++ b/docs/ops/detection/DetectionOutput_8.md @@ -154,7 +154,8 @@ At each feature map cell, *DetectionOutput* predicts the offsets relative to the **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/detection/ExperimentalDetectronDetectionOutput_6.md b/docs/ops/detection/ExperimentalDetectronDetectionOutput_6.md index 6e0cf618c03ace..c7f79585860f72 100644 --- a/docs/ops/detection/ExperimentalDetectronDetectionOutput_6.md +++ b/docs/ops/detection/ExperimentalDetectronDetectionOutput_6.md @@ -146,7 +146,8 @@ second input: **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/detection/ExperimentalDetectronGenerateProposalsSingleImage_6.md b/docs/ops/detection/ExperimentalDetectronGenerateProposalsSingleImage_6.md index 6c5364fb7431ff..1805b08263a08f 100644 --- a/docs/ops/detection/ExperimentalDetectronGenerateProposalsSingleImage_6.md +++ b/docs/ops/detection/ExperimentalDetectronGenerateProposalsSingleImage_6.md @@ -76,7 +76,8 @@ Height and width for third and fourth inputs should be equal. **Required.** **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/detection/ExperimentalDetectronPriorGridGenerator_6.md b/docs/ops/detection/ExperimentalDetectronPriorGridGenerator_6.md index 6fd83f7e72f6a2..55d905757e31af 100644 --- a/docs/ops/detection/ExperimentalDetectronPriorGridGenerator_6.md +++ b/docs/ops/detection/ExperimentalDetectronPriorGridGenerator_6.md @@ -80,7 +80,8 @@ Numbers of generated cells is ``featmap_height`` and ``featmap_width`` if *h* an **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/detection/ExperimentalDetectronROIFeatureExtractor_6.md b/docs/ops/detection/ExperimentalDetectronROIFeatureExtractor_6.md index 3a5440e09658e9..17f8cb3201c70c 100644 --- a/docs/ops/detection/ExperimentalDetectronROIFeatureExtractor_6.md +++ b/docs/ops/detection/ExperimentalDetectronROIFeatureExtractor_6.md @@ -82,7 +82,8 @@ For more details please see the following source: `Feature Pyramid Networks for **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/detection/GenerateProposals_9.md b/docs/ops/detection/GenerateProposals_9.md index b2f3b7fc9b8b33..e138227d4a2892 100644 --- a/docs/ops/detection/GenerateProposals_9.md +++ b/docs/ops/detection/GenerateProposals_9.md @@ -114,7 +114,8 @@ The ``height`` and ``width`` from inputs ``anchors``, ``boxesdeltas`` and ``scor **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/detection/PSROIPooling_1.md b/docs/ops/detection/PSROIPooling_1.md index 257316110b6508..c95ae635fbc260 100644 --- a/docs/ops/detection/PSROIPooling_1.md +++ b/docs/ops/detection/PSROIPooling_1.md @@ -86,31 +86,32 @@ Batch indices must be in the range of ``[0, N-1]``. **Example** -.. code-block:: cpp - - - - - - 1 - 3240 - 38 - 38 - - - 100 - 5 - - - - - 100 - 360 - 6 - 6 - - - +.. code-block:: xml + :force: + + + + + + 1 + 3240 + 38 + 38 + + + 100 + 5 + + + + + 100 + 360 + 6 + 6 + + + @endsphinxdirective diff --git a/docs/ops/detection/PriorBoxClustered_1.md b/docs/ops/detection/PriorBoxClustered_1.md index 16ecadafc00077..643cd8f6d4fc4d 100644 --- a/docs/ops/detection/PriorBoxClustered_1.md +++ b/docs/ops/detection/PriorBoxClustered_1.md @@ -130,25 +130,26 @@ If *clip* is defined, the coordinates of prior boxes are recalculated with the f **Example** -.. code-block:: cpp - - - - - - 2 < !-- [10, 19] --> - - - 2 < !-- [180, 320] --> - - - - - 2 - 6840 - - - +.. code-block:: xml + :force: + + + + + + 2 < !-- [10, 19] --> + + + 2 < !-- [180, 320] --> + + + + + 2 + 6840 + + + @endsphinxdirective diff --git a/docs/ops/detection/PriorBox_1.md b/docs/ops/detection/PriorBox_1.md index f554ae24aa83b5..06674ebf64d54e 100644 --- a/docs/ops/detection/PriorBox_1.md +++ b/docs/ops/detection/PriorBox_1.md @@ -177,25 +177,26 @@ **Example** -.. code-block:: cpp - - - - - - 2 < !-- values: [24, 42] --> - - - 2 < !-- values: [384, 672] --> - - - - - 2 - 16128 - - - +.. code-block:: xml + :force: + + + + + + 2 < !-- values: [24, 42] --> + + + 2 < !-- values: [384, 672] --> + + + + + 2 + 16128 + + + @endsphinxdirective diff --git a/docs/ops/detection/PriorBox_8.md b/docs/ops/detection/PriorBox_8.md index c636954fc3315b..b8dbfa64a11aea 100644 --- a/docs/ops/detection/PriorBox_8.md +++ b/docs/ops/detection/PriorBox_8.md @@ -184,25 +184,26 @@ **Example** -.. code-block:: cpp - - - - - - 2 < !-- values: [24, 42] --> - - - 2 < !-- values: [384, 672] --> - - - - - 2 - 16128 - - - +.. code-block:: xml + :force: + + + + + + 2 < !-- values: [24, 42] --> + + + 2 < !-- values: [384, 672] --> + + + + + 2 + 16128 + + + @endsphinxdirective diff --git a/docs/ops/detection/Proposal_1.md b/docs/ops/detection/Proposal_1.md index 0c9ed9f5d25d38..2e3a4a5ac25fe1 100644 --- a/docs/ops/detection/Proposal_1.md +++ b/docs/ops/detection/Proposal_1.md @@ -157,13 +157,14 @@ **Example** -.. code-block:: cpp - - - - ... - ... - +.. code-block:: xml + :force: + + + + ... + ... + @endsphinxdirective diff --git a/docs/ops/detection/Proposal_4.md b/docs/ops/detection/Proposal_4.md index 341b6396a8e8cf..110e09788f5c74 100644 --- a/docs/ops/detection/Proposal_4.md +++ b/docs/ops/detection/Proposal_4.md @@ -170,37 +170,38 @@ the second optional tensor of shape ``[batch_size * post_nms_topn]`` with probab **Example** -.. code-block:: cpp - - - - - - 7 - 4 - 28 - 28 - - - 7 - 8 - 28 - 28 - - - 3 - - - - - 7000 - 5 - - - 7000 - - - +.. code-block:: xml + :force: + + + + + + 7 + 4 + 28 + 28 + + + 7 + 8 + 28 + 28 + + + 3 + + + + + 7000 + 5 + + + 7000 + + + @endsphinxdirective diff --git a/docs/ops/detection/ROIAlign_3.md b/docs/ops/detection/ROIAlign_3.md index 4c33a9786a3865..20c13a98aadb50 100644 --- a/docs/ops/detection/ROIAlign_3.md +++ b/docs/ops/detection/ROIAlign_3.md @@ -81,34 +81,35 @@ **Example** -.. code-block:: cpp - - - - - - 7 - 256 - 200 - 200 - - - 1000 - 4 - - - 1000 - - - - - 1000 - 256 - 6 - 6 - - - +.. code-block:: xml + :force: + + + + + + 7 + 256 + 200 + 200 + + + 1000 + 4 + + + 1000 + + + + + 1000 + 256 + 6 + 6 + + + @endsphinxdirective diff --git a/docs/ops/detection/ROIAlign_9.md b/docs/ops/detection/ROIAlign_9.md index 9f0185c2808474..7f94f22df77a0b 100644 --- a/docs/ops/detection/ROIAlign_9.md +++ b/docs/ops/detection/ROIAlign_9.md @@ -98,33 +98,34 @@ **Example** -.. code-block:: cpp - - - - - - 7 - 256 - 200 - 200 - - - 1000 - 4 - - - 1000 - - - - - 1000 - 256 - 6 - 6 - - - +.. code-block:: xml + :force: + + + + + + 7 + 256 + 200 + 200 + + + 1000 + 4 + + + 1000 + + + + + 1000 + 256 + 6 + 6 + + + @endsphinxdirective diff --git a/docs/ops/detection/ROIPooling_1.md b/docs/ops/detection/ROIPooling_1.md index e837a6bb1249ad..82e783c855e31e 100644 --- a/docs/ops/detection/ROIPooling_1.md +++ b/docs/ops/detection/ROIPooling_1.md @@ -75,12 +75,13 @@ Batch indices must be in the range of ``[0, N-1]``. **Example** -.. code-block:: cpp - - - - ... - ... - +.. code-block:: xml + :force: + + + + ... + ... + @endsphinxdirective diff --git a/docs/ops/detection/RegionYolo_1.md b/docs/ops/detection/RegionYolo_1.md index 70d8c2ab2054a6..ef3dd75a3fda38 100644 --- a/docs/ops/detection/RegionYolo_1.md +++ b/docs/ops/detection/RegionYolo_1.md @@ -95,47 +95,48 @@ **Example** -.. code-block:: cpp - - < !-- YOLO V3 example --> - - - - - 1 - 255 - 26 - 26 - - - - - 1 - 255 - 26 - 26 - - - - - < !-- YOLO V2 Example --> - - - - - 1 - 125 - 13 - 13 - - - - - 1 - 21125 - - - +.. code-block:: xml + :force: + + < !-- YOLO V3 example --> + + + + + 1 + 255 + 26 + 26 + + + + + 1 + 255 + 26 + 26 + + + + + < !-- YOLO V2 Example --> + + + + + 1 + 125 + 13 + 13 + + + + + 1 + 21125 + + + @endsphinxdirective diff --git a/docs/ops/detection/ReorgYolo_1.md b/docs/ops/detection/ReorgYolo_1.md index 4a33916d2f15a1..3bc092ea9e96ac 100644 --- a/docs/ops/detection/ReorgYolo_1.md +++ b/docs/ops/detection/ReorgYolo_1.md @@ -33,7 +33,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/generation/Eye_9.md b/docs/ops/generation/Eye_9.md index 2e6f43aad551dd..d07994f53c825b 100644 --- a/docs/ops/generation/Eye_9.md +++ b/docs/ops/generation/Eye_9.md @@ -19,7 +19,8 @@ Example 1. *Eye* output with ``output_type`` = ``i32``: -.. code-block:: cpp +.. code-block:: xml + :force: num_rows = 3 @@ -33,7 +34,8 @@ Example 1. *Eye* output with ``output_type`` = ``i32``: Example 2. *Eye* output with ``output_type`` = ``i32``: -.. code-block:: cpp +.. code-block:: xml + :force: num_rows = 3 @@ -47,7 +49,8 @@ Example 2. *Eye* output with ``output_type`` = ``i32``: Example 3. *Eye* output with ``output_type`` = ``f16``: -.. code-block:: cpp +.. code-block:: xml + :force: num_rows = 2 @@ -90,7 +93,8 @@ Example 3. *Eye* output with ``output_type`` = ``f16``: *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -109,7 +113,8 @@ Example 3. *Eye* output with ``output_type`` = ``f16``: *Example 2* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/generation/RandomUniform_8.md b/docs/ops/generation/RandomUniform_8.md index 07648fa7d3faec..b4a875781bb56a 100644 --- a/docs/ops/generation/RandomUniform_8.md +++ b/docs/ops/generation/RandomUniform_8.md @@ -76,7 +76,8 @@ using following formula: so to obtain float16 values *sign*, *exponent* and *mantissa* are set as follows: -.. code-block:: cpp +.. code-block:: xml + :force: sign = 0 exponent = 15 - representation of a zero exponent. @@ -85,7 +86,8 @@ so to obtain float16 values *sign*, *exponent* and *mantissa* are set as follows So the resulting float16 value is: -.. code-block:: cpp +.. code-block:: xml + :force: x_uint16 = x // Truncate the upper 16 bits. val = ((exponent << 10) | x_uint16 & 0x3ffu) - 1.0, @@ -101,7 +103,8 @@ Float32 is formatted as follows: *sign* (1 bit) *exponent* (8 bits) *mantissa* ( so to obtain float values *sign*, *exponent* and *mantissa* are set as follows: -.. code-block:: cpp +.. code-block:: xml + :force: sign = 0 exponent = 127 - representation of a zero exponent. @@ -110,7 +113,8 @@ so to obtain float values *sign*, *exponent* and *mantissa* are set as follows: So the resulting float value is: -.. code-block:: cpp +.. code-block:: xml + :force: val = ((exponent << 23) | x & 0x7fffffu) - 1.0, @@ -125,7 +129,8 @@ Double is formatted as follows: *sign* (1 bit) *exponent* (11 bits) *mantissa* ( so to obtain double values *sign*, *exponent* and *mantissa* are set as follows: -.. code-block:: cpp +.. code-block:: xml + :force: sign = 0 exponent = 1023 - representation of a zero exponent. @@ -134,7 +139,8 @@ so to obtain double values *sign*, *exponent* and *mantissa* are set as follows: So the resulting double is obtained as follows: -.. code-block:: cpp +.. code-block:: xml + :force: mantissa_h = x0 & 0xfffffu; // upper 20 bits of mantissa mantissa_l = x1; // lower 32 bits of mantissa @@ -164,7 +170,8 @@ where *x* is uint32 random value. Example 1. *RandomUniform* output with ``global_seed`` = 150, ``op_seed`` = 10, ``output_type`` = f32: -.. code-block:: cpp +.. code-block:: xml + :force: input_shape = [ 3, 3 ] output = [[0.7011236 0.30539632 0.93931055] @@ -174,7 +181,8 @@ Example 1. *RandomUniform* output with ``global_seed`` = 150, ``op_seed`` = 10, Example 2. *RandomUniform* output with ``global_seed`` = 80, ``op_seed`` = 100, ``output_type`` = double: -.. code-block:: cpp +.. code-block:: xml + :force: input_shape = [ 2, 2 ] @@ -188,7 +196,8 @@ Example 2. *RandomUniform* output with ``global_seed`` = 80, ``op_seed`` = 100, Example 3. *RandomUniform* output with ``global_seed`` = 80, ``op_seed`` = 100, ``output_type`` = i32: -.. code-block:: cpp +.. code-block:: xml + :force: input_shape = [ 2, 3 ] @@ -244,7 +253,8 @@ Example 3. *RandomUniform* output with ``global_seed`` = 80, ``op_seed`` = 100, *Example 1: IR example.* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/generation/Range_1.md b/docs/ops/generation/Range_1.md index 12833c994a31b1..20fc02958c1f90 100644 --- a/docs/ops/generation/Range_1.md +++ b/docs/ops/generation/Range_1.md @@ -59,7 +59,8 @@ where *Example 1: positive step* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -80,7 +81,8 @@ where *Example 2: negative step* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/generation/Range_4.md b/docs/ops/generation/Range_4.md index 9aedf7af68e675..a98201428d7ef3 100644 --- a/docs/ops/generation/Range_4.md +++ b/docs/ops/generation/Range_4.md @@ -73,7 +73,8 @@ This is aligned with PyTorch's operation ``torch.arange``, to align with tensorf *Example 1: positive step* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -95,7 +96,8 @@ This is aligned with PyTorch's operation ``torch.arange``, to align with tensorf *Example 2: negative step* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -117,7 +119,8 @@ This is aligned with PyTorch's operation ``torch.arange``, to align with tensorf *Example 3: floating-point* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/image/GridSample_9.md b/docs/ops/image/GridSample_9.md index b6664e0b05c1c4..30b17a8ae2c019 100644 --- a/docs/ops/image/GridSample_9.md +++ b/docs/ops/image/GridSample_9.md @@ -68,7 +68,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/image/I420toBGR_8.md b/docs/ops/image/I420toBGR_8.md index c619e0e420a0be..1f19fa57055d76 100644 --- a/docs/ops/image/I420toBGR_8.md +++ b/docs/ops/image/I420toBGR_8.md @@ -38,7 +38,8 @@ Same as specified for :doc:`I420toRGB ` ope *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -62,7 +63,8 @@ Same as specified for :doc:`I420toRGB ` ope *Example 2* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/image/I420toRGB_8.md b/docs/ops/image/I420toRGB_8.md index 8a4d42c9e5c930..bcad5ee5f00456 100644 --- a/docs/ops/image/I420toRGB_8.md +++ b/docs/ops/image/I420toRGB_8.md @@ -81,7 +81,8 @@ Input I420 image tensor shall have ``NHWC (also known as NYXC)`` layout and can *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -105,7 +106,8 @@ Input I420 image tensor shall have ``NHWC (also known as NYXC)`` layout and can *Example 2* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/image/Interpolate_1.md b/docs/ops/image/Interpolate_1.md index 5977028184674c..4835aba1d16f48 100644 --- a/docs/ops/image/Interpolate_1.md +++ b/docs/ops/image/Interpolate_1.md @@ -76,7 +76,8 @@ This is a scalar that specifies padding for each spatial dimension. **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/image/Interpolate_11.md b/docs/ops/image/Interpolate_11.md index 92585a5c5efd9a..23d59fa923be11 100644 --- a/docs/ops/image/Interpolate_11.md +++ b/docs/ops/image/Interpolate_11.md @@ -114,7 +114,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/image/Interpolate_4.md b/docs/ops/image/Interpolate_4.md index 2fb4a37edb0244..82f68ea7a8542e 100644 --- a/docs/ops/image/Interpolate_4.md +++ b/docs/ops/image/Interpolate_4.md @@ -120,7 +120,8 @@ **Detailed description** Calculations are performed according to the following rules. -.. code-block:: python +.. code-block:: py + :force: import math import numpy as np @@ -601,7 +602,8 @@ Calculations are performed according to the following rules. **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/image/NV12toBGR_8.md b/docs/ops/image/NV12toBGR_8.md index 0c9cf5f2825cff..d0ed0f9fb40aa9 100644 --- a/docs/ops/image/NV12toBGR_8.md +++ b/docs/ops/image/NV12toBGR_8.md @@ -38,7 +38,8 @@ Same as specified for :doc:`NV12toRGB ` ope *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -62,7 +63,8 @@ Same as specified for :doc:`NV12toRGB ` ope *Example 2* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/image/NV12toRGB_8.md b/docs/ops/image/NV12toRGB_8.md index 3b5c94e6631c0d..214b62e5a1a4e4 100644 --- a/docs/ops/image/NV12toRGB_8.md +++ b/docs/ops/image/NV12toRGB_8.md @@ -70,7 +70,8 @@ Input NV12 image tensor shall have ``NHWC (also known as NYXC)`` layout and can *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -94,7 +95,8 @@ Input NV12 image tensor shall have ``NHWC (also known as NYXC)`` layout and can *Example 2* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/infrastructure/Assign_3.md b/docs/ops/infrastructure/Assign_3.md index 55b2409494f783..39525e63dadc75 100644 --- a/docs/ops/infrastructure/Assign_3.md +++ b/docs/ops/infrastructure/Assign_3.md @@ -37,7 +37,8 @@ declared in ``variable_id`` and returns an error otherwise. **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/infrastructure/Constant_1.md b/docs/ops/infrastructure/Constant_1.md index 85ff739809ed3b..12a6be5887d406 100644 --- a/docs/ops/infrastructure/Constant_1.md +++ b/docs/ops/infrastructure/Constant_1.md @@ -54,7 +54,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/infrastructure/Loop_5.md b/docs/ops/infrastructure/Loop_5.md index 6ac4306bf22b99..a379f88dcd365d 100644 --- a/docs/ops/infrastructure/Loop_5.md +++ b/docs/ops/infrastructure/Loop_5.md @@ -166,7 +166,8 @@ Loop operation description in the IR also has several special sections: ``body`` *Example 1: a typical Loop structure* -.. code-block:: cpp +.. code-block:: xml + :force: ... diff --git a/docs/ops/infrastructure/Parameter_1.md b/docs/ops/infrastructure/Parameter_1.md index 57fa733fbaf642..a6c09653847896 100644 --- a/docs/ops/infrastructure/Parameter_1.md +++ b/docs/ops/infrastructure/Parameter_1.md @@ -39,19 +39,20 @@ **Example** -.. code-block:: cpp - - - element_type="f32" shape="1,3,224,224" - - - 1 - 3 - 224 - 224 - - - +.. code-block:: xml + :force: + + + element_type="f32" shape="1,3,224,224" + + + 1 + 3 + 224 + 224 + + + @endsphinxdirective diff --git a/docs/ops/infrastructure/ReadValue_3.md b/docs/ops/infrastructure/ReadValue_3.md index e2dd69113695eb..3faaa44b1ae781 100644 --- a/docs/ops/infrastructure/ReadValue_3.md +++ b/docs/ops/infrastructure/ReadValue_3.md @@ -39,7 +39,8 @@ with the shape and type from the 1 input. **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/infrastructure/Result_1.md b/docs/ops/infrastructure/Result_1.md index 71050a310c3498..4fc32760ba25e7 100644 --- a/docs/ops/infrastructure/Result_1.md +++ b/docs/ops/infrastructure/Result_1.md @@ -26,7 +26,8 @@ No attributes available. **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/infrastructure/TensorIterator_1.md b/docs/ops/infrastructure/TensorIterator_1.md index 3ff7c03ecc59b6..0ab1ad32675e8e 100644 --- a/docs/ops/infrastructure/TensorIterator_1.md +++ b/docs/ops/infrastructure/TensorIterator_1.md @@ -151,7 +151,8 @@ where ``Si`` is value of ``Result`` operation at i-th iteration in the tensor it *Example 1: a typical TensorIterator structure* -.. code-block:: cpp +.. code-block:: xml + :force: ... @@ -176,7 +177,8 @@ where ``Si`` is value of ``Result`` operation at i-th iteration in the tensor it *Example 2: a full TensorIterator layer* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/logical/LogicalAnd_1.md b/docs/ops/logical/LogicalAnd_1.md index 97ebb1f4d44f44..ea4f80f0462a25 100644 --- a/docs/ops/logical/LogicalAnd_1.md +++ b/docs/ops/logical/LogicalAnd_1.md @@ -53,7 +53,8 @@ After broadcasting *LogicalAnd* does the following with the input tensors *a* an *Example 1: no broadcast* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -77,7 +78,8 @@ After broadcasting *LogicalAnd* does the following with the input tensors *a* an *Example 2: numpy broadcast* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/logical/LogicalNot_1.md b/docs/ops/logical/LogicalNot_1.md index e41a0d5b600098..08fa79bae5e2d3 100644 --- a/docs/ops/logical/LogicalNot_1.md +++ b/docs/ops/logical/LogicalNot_1.md @@ -40,7 +40,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/logical/LogicalOr_1.md b/docs/ops/logical/LogicalOr_1.md index c89985a68dac06..f5d71ea86e4e8d 100644 --- a/docs/ops/logical/LogicalOr_1.md +++ b/docs/ops/logical/LogicalOr_1.md @@ -53,7 +53,8 @@ After broadcasting *LogicalOr* does the following with the input tensors *a* and *Example 1: no broadcast* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -76,7 +77,8 @@ After broadcasting *LogicalOr* does the following with the input tensors *a* and *Example 2: numpy broadcast* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/logical/LogicalXor_1.md b/docs/ops/logical/LogicalXor_1.md index 139ae64b488194..6ccdbb14c676a7 100644 --- a/docs/ops/logical/LogicalXor_1.md +++ b/docs/ops/logical/LogicalXor_1.md @@ -53,7 +53,8 @@ After broadcasting *LogicalXor* does the following with the input tensors *a* an *Example 1: no broadcast* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -77,7 +78,8 @@ After broadcasting *LogicalXor* does the following with the input tensors *a* an *Example 2: numpy broadcast* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/matrix/Einsum_7.md b/docs/ops/matrix/Einsum_7.md index 45938a9352dfc2..b0100f161b18b1 100644 --- a/docs/ops/matrix/Einsum_7.md +++ b/docs/ops/matrix/Einsum_7.md @@ -183,7 +183,8 @@ Example 8 shows how *Einsum* operates with an equation containing both capital a **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: @@ -204,7 +205,8 @@ Example 8 shows how *Einsum* operates with an equation containing both capital a -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/matrix/MatMul_1.md b/docs/ops/matrix/MatMul_1.md index 0a96ed48b183df..54e1dcd845667a 100644 --- a/docs/ops/matrix/MatMul_1.md +++ b/docs/ops/matrix/MatMul_1.md @@ -80,7 +80,8 @@ Two attributes, ``transpose_a`` and ``transpose_b`` specify embedded transpositi *Vector-matrix multiplication* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -102,7 +103,8 @@ Two attributes, ``transpose_a`` and ``transpose_b`` specify embedded transpositi *Matrix-vector multiplication* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -124,7 +126,8 @@ Two attributes, ``transpose_a`` and ``transpose_b`` specify embedded transpositi *Matrix-matrix multiplication (like FullyConnected with batch size 1)* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -148,7 +151,8 @@ Two attributes, ``transpose_a`` and ``transpose_b`` specify embedded transpositi *Vector-matrix multiplication with embedded transposition of the second matrix* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -171,7 +175,8 @@ Two attributes, ``transpose_a`` and ``transpose_b`` specify embedded transpositi *Matrix-matrix multiplication (like FullyConnected with batch size 10)* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -195,7 +200,8 @@ Two attributes, ``transpose_a`` and ``transpose_b`` specify embedded transpositi *Multiplication of batch of 5 matrices by a one matrix with broadcasting* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/BatchToSpace_2.md b/docs/ops/movement/BatchToSpace_2.md index e1b7408ddfcb52..b75235693ff73d 100644 --- a/docs/ops/movement/BatchToSpace_2.md +++ b/docs/ops/movement/BatchToSpace_2.md @@ -76,7 +76,8 @@ Where Example: 2D input tensor ``data`` -.. code-block:: cpp +.. code-block:: xml + :force: @@ -104,7 +105,8 @@ Example: 2D input tensor ``data`` Example: 5D input tensor ``data`` -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/Broadcast_1.md b/docs/ops/movement/Broadcast_1.md index e7d0caaececd88..27bc8cdca26bbd 100644 --- a/docs/ops/movement/Broadcast_1.md +++ b/docs/ops/movement/Broadcast_1.md @@ -49,7 +49,8 @@ For example, ``axes_mapping = [1]`` enables broadcasting of a tensor with shape **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/Broadcast_3.md b/docs/ops/movement/Broadcast_3.md index ab901ff28a2929..e14175b93f5b0c 100644 --- a/docs/ops/movement/Broadcast_3.md +++ b/docs/ops/movement/Broadcast_3.md @@ -57,7 +57,8 @@ For example, ``axes_mapping = [1]`` enables broadcasting of a tensor with shape **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/Concat_1.md b/docs/ops/movement/Concat_1.md index 8029e485c8371d..d0a43c0e47ad9c 100644 --- a/docs/ops/movement/Concat_1.md +++ b/docs/ops/movement/Concat_1.md @@ -35,7 +35,8 @@ **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: @@ -70,7 +71,8 @@ -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/DepthToSpace_1.md b/docs/ops/movement/DepthToSpace_1.md index 2d971f0d46a596..8119939effce0e 100644 --- a/docs/ops/movement/DepthToSpace_1.md +++ b/docs/ops/movement/DepthToSpace_1.md @@ -66,7 +66,8 @@ If ``mode = depth_first``: **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/ExtractImagePatches_3.md b/docs/ops/movement/ExtractImagePatches_3.md index 7a8d3412cdddd1..bc8cbfac1e7e63 100644 --- a/docs/ops/movement/ExtractImagePatches_3.md +++ b/docs/ops/movement/ExtractImagePatches_3.md @@ -67,7 +67,8 @@ The "auto_pad" attribute has no effect on the size of each patch, it determines **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/GatherElements_6.md b/docs/ops/movement/GatherElements_6.md index 8957420fe7d982..54cc42bae9f2ed 100644 --- a/docs/ops/movement/GatherElements_6.md +++ b/docs/ops/movement/GatherElements_6.md @@ -22,7 +22,7 @@ and has the same shape as ``indices``. For instance, in the 3D case (``r = 3``), the output is determined by the following equations: -.. code-block:: +.. code-block:: sh out[i][j][k] = data[indices[i][j][k]][j][k] if axis = 0 out[i][j][k] = data[i][indices[i][j][k]][k] if axis = 1 @@ -30,7 +30,7 @@ For instance, in the 3D case (``r = 3``), the output is determined by the follow Example 1 with concrete values: -.. code-block:: +.. code-block:: sh data = [ [1, 2], @@ -48,7 +48,7 @@ Example 1 with concrete values: Example 2 with ``axis`` = 1 and ``indices`` having greater (than ``data``) shape: -.. code-block:: +.. code-block:: sh data = [ [1, 7], @@ -67,7 +67,7 @@ Example 2 with ``axis`` = 1 and ``indices`` having greater (than ``data``) shape Example 3 ``indices`` has lesser (than ``data``) shape: -.. code-block:: +.. code-block:: sh data = [ [1, 2, 3], @@ -111,7 +111,8 @@ Example 3 ``indices`` has lesser (than ``data``) shape: **Example** -.. code-block:: cpp +.. code-block:: xml + :force: <... type="GatherElements" ...> diff --git a/docs/ops/movement/GatherND_5.md b/docs/ops/movement/GatherND_5.md index e6e9e4ba1fb935..76906c7dc2f0ef 100644 --- a/docs/ops/movement/GatherND_5.md +++ b/docs/ops/movement/GatherND_5.md @@ -28,7 +28,7 @@ The shape of the output can be computed as `indices.shape[:-1] + data.shape[indi Example 1 shows how *GatherND* operates with elements from `data` tensor: -.. code-block:: +.. code-block:: sh indices = [[0, 0], [1, 0]] @@ -39,7 +39,7 @@ Example 1 shows how *GatherND* operates with elements from `data` tensor: Example 2 shows how *GatherND* operates with slices from ``data`` tensor: -.. code-block:: +.. code-block:: sh indices = [[1], [0]] data = [[1, 2], @@ -50,7 +50,7 @@ Example 2 shows how *GatherND* operates with slices from ``data`` tensor: Example 3 shows how *GatherND* operates when `indices` tensor has leading dimensions: -.. code-block:: +.. code-block:: sh indices = [[[1]], [[0]]] data = [[1, 2], @@ -81,7 +81,7 @@ Example 3 shows how *GatherND* operates when `indices` tensor has leading dimens Example 4 shows how *GatherND* operates gathering elements for non-default *batch_dims* value: -.. code-block:: +.. code-block:: sh batch_dims = 1 indices = [[1], <--- this is applied to the first batch @@ -93,7 +93,7 @@ Example 4 shows how *GatherND* operates gathering elements for non-default *batc Example 5 shows how *GatherND* operates gathering slices for non-default *batch_dims* value: -.. code-block:: +.. code-block:: sh batch_dims = 1 indices = [[1], <--- this is applied to the first batch @@ -106,7 +106,7 @@ Example 5 shows how *GatherND* operates gathering slices for non-default *batch_ More complex, example 6 shows how *GatherND* operates gathering slices with leading dimensions for non-default *batch_dims* value: -.. code-block:: +.. code-block:: sh batch_dims = 2 indices = [[[[1]], <--- this is applied to the first batch @@ -144,7 +144,8 @@ for non-default *batch_dims* value: **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: @@ -171,7 +172,8 @@ for non-default *batch_dims* value: -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/GatherND_8.md b/docs/ops/movement/GatherND_8.md index e383fa9169a386..cf9e1e0eeb5c65 100644 --- a/docs/ops/movement/GatherND_8.md +++ b/docs/ops/movement/GatherND_8.md @@ -65,7 +65,7 @@ if ``indices.shape[-1] == data.rank - batch_dims``, else Example 1 shows how *GatherND* operates with elements from ``data`` tensor: -.. code-block:: +.. code-block:: sh indices = [[0, 0], [1, 0]] @@ -76,7 +76,7 @@ Example 1 shows how *GatherND* operates with elements from ``data`` tensor: Example 2 shows how *GatherND* operates with slices from ``data`` tensor: -.. code-block:: +.. code-block:: sh indices = [[1], [0]] data = [[1, 2], @@ -87,7 +87,7 @@ Example 2 shows how *GatherND* operates with slices from ``data`` tensor: Example 3 shows how *GatherND* operates when ``indices`` tensor has leading dimensions: -.. code-block:: +.. code-block:: sh indices = [[[1]], [[0]]] data = [[1, 2], @@ -98,7 +98,7 @@ Example 3 shows how *GatherND* operates when ``indices`` tensor has leading dime Example 4 shows how *GatherND* operates gathering elements for non-default *batch_dims* value: -.. code-block:: +.. code-block:: sh batch_dims = 1 indices = [[1], <--- this is applied to the first batch @@ -110,7 +110,7 @@ Example 4 shows how *GatherND* operates gathering elements for non-default *batc Example 5 shows how *GatherND* operates gathering slices for non-default *batch_dims* value: -.. code-block:: +.. code-block:: sh batch_dims = 1 indices = [[1], <--- this is applied to the first batch @@ -123,7 +123,7 @@ Example 5 shows how *GatherND* operates gathering slices for non-default *batch_ More complex examples 6 and 7 show how *GatherND* operates gathering slices with leading dimensions for non-default *batch_dims* value: -.. code-block:: +.. code-block:: sh batch_dims = 2 indices = [[[[1]], <--- this is applied to the first batch @@ -144,7 +144,7 @@ for non-default *batch_dims* value: -.. code-block:: +.. code-block:: sh batch_dims = 3 indices = [[[[1], @@ -162,7 +162,8 @@ for non-default *batch_dims* value: ], shape = (1, 2, 2) -.. code-block:: cpp +.. code-block:: xml + :force: @@ -189,7 +190,8 @@ for non-default *batch_dims* value: -.. code-block:: cpp +.. code-block:: xml + :force: @@ -218,7 +220,8 @@ for non-default *batch_dims* value: -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/GatherTree_1.md b/docs/ops/movement/GatherTree_1.md index b3660facfdd59e..cc9c65087126bd 100644 --- a/docs/ops/movement/GatherTree_1.md +++ b/docs/ops/movement/GatherTree_1.md @@ -20,7 +20,8 @@ first decoded ``end_token`` all values are filled in with ``end_token``. The algorithm in pseudocode is as follows: -.. code-block:: python +.. code-block:: py + :force: final_ids[ :, :, :] = end_token for batch in range(BATCH_SIZE): @@ -72,7 +73,8 @@ The algorithm in pseudocode is as follows: **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/Gather_1.md b/docs/ops/movement/Gather_1.md index 6fb170e9c92a5f..5e72934d91628b 100644 --- a/docs/ops/movement/Gather_1.md +++ b/docs/ops/movement/Gather_1.md @@ -15,7 +15,7 @@ to the indices specified in the second input tensor and axis from the third inpu **Detailed description** -.. code-block:: +.. code-block:: sh output[p_0, p_1, ..., p_{axis-1}, i, ..., j, ...] = input1[p_0, p_1, ..., p_{axis-1}, input2[i, ..., j], ...] @@ -39,7 +39,8 @@ Where ``axis`` is the value from the third input. **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/Gather_7.md b/docs/ops/movement/Gather_7.md index 411079cfb2b1ff..e22192ad4513b5 100644 --- a/docs/ops/movement/Gather_7.md +++ b/docs/ops/movement/Gather_7.md @@ -16,7 +16,7 @@ TensorFlow `Gather `__ ope **Detailed description** -.. code-block:: +.. code-block:: sh output[p_0, p_1, ..., p_{axis-1}, i_b, ..., i_{M-1}, p_{axis+1}, ..., p_{N-1}] = data[p_0, p_1, ..., p_{axis-1}, indices[p_0, p_1, ..., p_{b-1}, i_b, ..., i_{M-1}], p_{axis+1}, ..., p_{N-1}] @@ -40,7 +40,7 @@ the number of batch dimensions. ``N`` and ``M`` are numbers of dimensions of ``d Example 1 with default *batch_dims* value: -.. code-block:: +.. code-block:: sh batch_dims = 0 axis = 0 @@ -52,7 +52,7 @@ Example 1 with default *batch_dims* value: Example 2 with non-default *batch_dims* value: -.. code-block:: +.. code-block:: sh batch_dims = 1 axis = 1 @@ -72,7 +72,7 @@ Example 2 with non-default *batch_dims* value: Example 3 with non-default *batch_dims* value: -.. code-block:: +.. code-block:: sh batch_dims = 2 axis = 2 @@ -100,7 +100,7 @@ Example 3 with non-default *batch_dims* value: Example 4 with *axis* > *batch_dims*: -.. code-block:: +.. code-block:: sh batch_dims = 1 axis = 2 @@ -134,7 +134,7 @@ Example 4 with *axis* > *batch_dims*: Example 5 with negative *batch_dims* value: -.. code-block:: +.. code-block:: sh batch_dims = -1 <-- normalized value will be indices.rank + batch_dims = 2 - 1 = 1 axis = 1 diff --git a/docs/ops/movement/Gather_8.md b/docs/ops/movement/Gather_8.md index 346f5c3635db86..ef68b143293af5 100644 --- a/docs/ops/movement/Gather_8.md +++ b/docs/ops/movement/Gather_8.md @@ -18,7 +18,7 @@ support of negative indices. **Detailed description** -.. code-block:: +.. code-block:: sh output[p_0, p_1, ..., p_{axis-1}, i_b, ..., i_{M-1}, p_{axis+1}, ..., p_{N-1}] = data[p_0, p_1, ..., p_{axis-1}, indices[p_0, p_1, ..., p_{b-1}, i_b, ..., i_{M-1}], p_{axis+1}, ..., p_{N-1}] @@ -44,7 +44,7 @@ range output data for corresponding index will be filled with zeros (Example 7). Example 1 with default *batch_dims* value: -.. code-block:: +.. code-block:: sh batch_dims = 0 axis = 0 @@ -55,7 +55,7 @@ Example 1 with default *batch_dims* value: Example 2 with non-default *batch_dims* value: -.. code-block:: +.. code-block:: sh batch_dims = 1 axis = 1 @@ -75,7 +75,7 @@ Example 2 with non-default *batch_dims* value: Example 3 with non-default *batch_dims* value: -.. code-block:: +.. code-block:: sh batch_dims = 2 axis = 2 @@ -103,7 +103,7 @@ Example 3 with non-default *batch_dims* value: Example 4 with *axis* > *batch_dims*: -.. code-block:: +.. code-block:: sh batch_dims = 1 axis = 2 @@ -137,7 +137,7 @@ Example 4 with *axis* > *batch_dims*: Example 5 with negative *batch_dims* value: -.. code-block:: +.. code-block:: sh batch_dims = -1 <-- normalized value will be indices.rank + batch_dims = 2 - 1 = 1 axis = 1 @@ -157,7 +157,7 @@ Example 5 with negative *batch_dims* value: Example 6 with negative indices: -.. code-block:: +.. code-block:: sh batch_dims = 0 axis = 0 @@ -169,7 +169,7 @@ Example 6 with negative indices: Example 7 with indices out of the range: -.. code-block:: +.. code-block:: sh batch_dims = 0 axis = 0 @@ -203,7 +203,8 @@ of the output tensor is ``data.shape[:axis] + indices.shape[batch_dims:] + data. **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/Pad_1.md b/docs/ops/movement/Pad_1.md index ba7a89dfd40c6e..d16df20cb3540c 100644 --- a/docs/ops/movement/Pad_1.md +++ b/docs/ops/movement/Pad_1.md @@ -118,7 +118,8 @@ depending on the *pad_mode*. **Example**: constant mode -.. code-block:: cpp +.. code-block:: xml + :force: @@ -153,7 +154,8 @@ depending on the *pad_mode*. **Example**: edge mode -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/Pad_12.md b/docs/ops/movement/Pad_12.md index 647e799b82696a..6e3e629833a065 100644 --- a/docs/ops/movement/Pad_12.md +++ b/docs/ops/movement/Pad_12.md @@ -208,7 +208,8 @@ Mixed pads example: **Example**: constant mode (positive pads) -.. code-block:: cpp +.. code-block:: xml + :force: @@ -243,7 +244,8 @@ Mixed pads example: **Example**: constant mode (positive and negative pads) -.. code-block:: cpp +.. code-block:: xml + :force: @@ -278,7 +280,8 @@ Mixed pads example: **Example**: edge mode -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/ReverseSequence_1.md b/docs/ops/movement/ReverseSequence_1.md index 03389f373d6f49..c1ed72974db67c 100644 --- a/docs/ops/movement/ReverseSequence_1.md +++ b/docs/ops/movement/ReverseSequence_1.md @@ -50,7 +50,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/Reverse_1.md b/docs/ops/movement/Reverse_1.md index 442bc8cbc965b7..9a59e9c901f11d 100644 --- a/docs/ops/movement/Reverse_1.md +++ b/docs/ops/movement/Reverse_1.md @@ -47,7 +47,8 @@ If no axis specified, that means either the second input is empty if ``index`` m **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/Roll_7.md b/docs/ops/movement/Roll_7.md index 57dc015573c5ca..5c04764c56efba 100644 --- a/docs/ops/movement/Roll_7.md +++ b/docs/ops/movement/Roll_7.md @@ -83,7 +83,8 @@ No attributes available. *Example 1: "shift" and "axes" are 1D tensors.* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -113,7 +114,8 @@ No attributes available. *Example 2: "shift" value is a scalar and multiple axes are specified.* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/ScatterElementsUpdate_3.md b/docs/ops/movement/ScatterElementsUpdate_3.md index cc93ab4faccc66..cd5bccb99304d8 100644 --- a/docs/ops/movement/ScatterElementsUpdate_3.md +++ b/docs/ops/movement/ScatterElementsUpdate_3.md @@ -56,7 +56,8 @@ The value can be in range ``[-r, r - 1]`` where ``r`` is the rank of ``data``. * **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/ScatterNDUpdate_3.md b/docs/ops/movement/ScatterNDUpdate_3.md index 48a2eb9b1ec3d4..ffcaf7f6fa5fd4 100644 --- a/docs/ops/movement/ScatterNDUpdate_3.md +++ b/docs/ops/movement/ScatterNDUpdate_3.md @@ -70,7 +70,8 @@ Example 2 that shows update of two slices of ``4x4`` shape in ``data``: **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/ScatterUpdate_3.md b/docs/ops/movement/ScatterUpdate_3.md index 1b4e57700e3f13..47c673724eb503 100644 --- a/docs/ops/movement/ScatterUpdate_3.md +++ b/docs/ops/movement/ScatterUpdate_3.md @@ -58,7 +58,8 @@ The value can be in the range ``[ -r, r - 1]``, where ``r`` is the rank of ``dat *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -95,7 +96,8 @@ The value can be in the range ``[ -r, r - 1]``, where ``r`` is the rank of ``dat *Example 2* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/ShuffleChannels_1.md b/docs/ops/movement/ShuffleChannels_1.md index 23c5fa7bd0ca6b..37eea17f7c6a56 100644 --- a/docs/ops/movement/ShuffleChannels_1.md +++ b/docs/ops/movement/ShuffleChannels_1.md @@ -74,7 +74,8 @@ where ``group`` is the layer attribute described below. **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/Slice_8.md b/docs/ops/movement/Slice_8.md index 8b093babb3390e..dc14df733fe2ba 100644 --- a/docs/ops/movement/Slice_8.md +++ b/docs/ops/movement/Slice_8.md @@ -75,366 +75,378 @@ Number of elements in ``start``, ``stop``, ``step``, and ``axes`` inputs are req Example 1: basic slicing -.. code-block:: cpp - - - - < !-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] --> - 10 - - < !-- start: [1] --> - 1 - - < !-- stop: [8] --> - 1 - - < !-- step: [1] --> - 1 - - < !-- axes: [0] --> - 1 - - - - < !-- output: [1, 2, 3, 4, 5, 6, 7] --> - 7 - - - +.. code-block:: xml + :force: + + + + < !-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] --> + 10 + + < !-- start: [1] --> + 1 + + < !-- stop: [8] --> + 1 + + < !-- step: [1] --> + 1 + + < !-- axes: [0] --> + 1 + + + + < !-- output: [1, 2, 3, 4, 5, 6, 7] --> + 7 + + + Example 2: basic slicing, ``axes`` default -.. code-block:: cpp - - - - < !-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] --> - 10 - - < !-- start: [1] --> - 1 - - < !-- stop: [8] --> - 1 - - < !-- step: [1] --> - 1 - - - - < !-- output: [1, 2, 3, 4, 5, 6, 7] --> - 7 - - - +.. code-block:: xml + :force: + + + + < !-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] --> + 10 + + < !-- start: [1] --> + 1 + + < !-- stop: [8] --> + 1 + + < !-- step: [1] --> + 1 + + + + < !-- output: [1, 2, 3, 4, 5, 6, 7] --> + 7 + + + Example 3: basic slicing, ``step: [2]`` -.. code-block:: cpp - - - - < !-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] --> - 10 - - < !-- start: [1] --> - 1 - - < !-- stop: [8] --> - 1 - - < !-- step: [2] --> - 1 - - < !-- axes: [0] --> - 1 - - - - < !-- output: [1, 3, 5, 7] --> - 4 - - - +.. code-block:: xml + :force: + + + + < !-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] --> + 10 + + < !-- start: [1] --> + 1 + + < !-- stop: [8] --> + 1 + + < !-- step: [2] --> + 1 + + < !-- axes: [0] --> + 1 + + + + < !-- output: [1, 3, 5, 7] --> + 4 + + + Example 4: ``start`` and ``stop`` out of the dimension size, ``step: [1]`` -.. code-block:: cpp - - - - < !-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] --> - 10 - - < !-- start: [-100] --> - 1 - - < !-- stop: [100] --> - 1 - - < !-- step: [1] --> - 1 - - < !-- axes: [0] --> - 1 - - - - < !-- output: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] --> - 10 - - - +.. code-block:: xml + :force: + + + + < !-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] --> + 10 + + < !-- start: [-100] --> + 1 + + < !-- stop: [100] --> + 1 + + < !-- step: [1] --> + 1 + + < !-- axes: [0] --> + 1 + + + + < !-- output: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] --> + 10 + + + Example 5: slicing backward all elements, ``step: [-1]``, ``stop: [-11]`` -.. code-block:: cpp - - - - < !-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] --> - 10 - - < !-- start: [9] --> - 1 - - < !-- stop: [-11] --> - 1 - - < !-- step: [-1] --> - 1 - - < !-- axes: [0] --> - 1 - - - - < !-- output: [9, 8, 7, 6, 5, 4, 3, 2, 1, 0] --> - 10 - - - +.. code-block:: xml + :force: + + + + < !-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] --> + 10 + + < !-- start: [9] --> + 1 + + < !-- stop: [-11] --> + 1 + + < !-- step: [-1] --> + 1 + + < !-- axes: [0] --> + 1 + + + + < !-- output: [9, 8, 7, 6, 5, 4, 3, 2, 1, 0] --> + 10 + + + Example 6: slicing backward, ``step: [-1]``, ``stop: [0]`` -.. code-block:: cpp - - - - < !-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] --> - 10 - - < !-- start: [9] --> - 1 - - < !-- stop: [0] --> - 1 - - < !-- step: [-1] --> - 1 - - < !-- axes: [0] --> - 1 - - - - < !-- output: [9, 8, 7, 6, 5, 4, 3, 2, 1] --> - 9 - - - - +.. code-block:: xml + :force: + + + + < !-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] --> + 10 + + < !-- start: [9] --> + 1 + + < !-- stop: [0] --> + 1 + + < !-- step: [-1] --> + 1 + + < !-- axes: [0] --> + 1 + + + + < !-- output: [9, 8, 7, 6, 5, 4, 3, 2, 1] --> + 9 + + + + Example 7: slicing backward, ``step: [-1]``, ``stop: [-10]`` -.. code-block:: cpp - - - - < !-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] --> - 10 - - < !-- start: [9] --> - 1 - - < !-- stop: [-10] --> - 1 - - < !-- step: [-1] --> - 1 - - < !-- axes: [0] --> - 1 - - - - < !-- output: [9, 8, 7, 6, 5, 4, 3, 2, 1] --> - 9 - - - +.. code-block:: xml + :force: + + + + < !-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] --> + 10 + + < !-- start: [9] --> + 1 + + < !-- stop: [-10] --> + 1 + + < !-- step: [-1] --> + 1 + + < !-- axes: [0] --> + 1 + + + + < !-- output: [9, 8, 7, 6, 5, 4, 3, 2, 1] --> + 9 + + + Example 8: slicing backward, ``step: [-2]`` -.. code-block:: cpp - - - - < !-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] --> - 10 - - < !-- start: [9] --> - 1 - - < !-- stop: [-11] --> - 1 - - < !-- step: [-2] --> - 1 - - < !-- axes: [0] --> - 1 - - - - < !-- output: [9, 7, 5, 3, 1] --> - 5 - - - +.. code-block:: xml + :force: + + + + < !-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] --> + 10 + + < !-- start: [9] --> + 1 + + < !-- stop: [-11] --> + 1 + + < !-- step: [-2] --> + 1 + + < !-- axes: [0] --> + 1 + + + + < !-- output: [9, 7, 5, 3, 1] --> + 5 + + + Example 9: ``start`` and ``stop`` out of the dimension size, slicing backward -.. code-block:: cpp - - - - < !-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] --> - 10 - - < !-- start: [100] --> - 1 - - < !-- stop: [-100] --> - 1 - - < !-- step: [-1] --> - 1 - - < !-- axes: [0] --> - 1 - - - - < !-- output: [9, 8, 7, 6, 5, 4, 3, 2, 1, 0] --> - 10 - - - +.. code-block:: xml + :force: + + + + < !-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] --> + 10 + + < !-- start: [100] --> + 1 + + < !-- stop: [-100] --> + 1 + + < !-- step: [-1] --> + 1 + + < !-- axes: [0] --> + 1 + + + + < !-- output: [9, 8, 7, 6, 5, 4, 3, 2, 1, 0] --> + 10 + + + Example 10: slicing 2D tensor, all axes specified -.. code-block:: cpp - - - - < !-- data: data: [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]] --> - 2 - 5 - - < !-- start: [0, 1] --> - 2 - - < !-- stop: [2, 4] --> - 2 - - < !-- step: [1, 2] --> - 2 - - < !-- axes: [0, 1] --> - 2 - - - - < !-- output: [1, 3, 6, 8] --> - 2 - 2 - - - - +.. code-block:: xml + :force: + + + + < !-- data: data: [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]] --> + 2 + 5 + + < !-- start: [0, 1] --> + 2 + + < !-- stop: [2, 4] --> + 2 + + < !-- step: [1, 2] --> + 2 + + < !-- axes: [0, 1] --> + 2 + + + + < !-- output: [1, 3, 6, 8] --> + 2 + 2 + + + + Example 11: slicing 3D tensor, all axes specified -.. code-block:: cpp - - - - < !-- data --> - 20 - 10 - 5 - - < !-- start: [0, 0, 0] --> - 2 - - < !-- stop: [4, 10, 5] --> - 2 - - < !-- step: [1, 1, 1] --> - 2 - - < !-- axes: [0, 1, 2] --> - 2 - - - - < !-- output --> - 4 - 10 - 5 - - - +.. code-block:: xml + :force: + + + + < !-- data --> + 20 + 10 + 5 + + < !-- start: [0, 0, 0] --> + 2 + + < !-- stop: [4, 10, 5] --> + 2 + + < !-- step: [1, 1, 1] --> + 2 + + < !-- axes: [0, 1, 2] --> + 2 + + + + < !-- output --> + 4 + 10 + 5 + + + Example 12: slicing 3D tensor, last axes default -.. code-block:: cpp - - - - < !-- data --> - 20 - 10 - 5 - - < !-- start: [0, 0] --> - 2 - - < !-- stop: [4, 10] --> - 2 - - < !-- step: [1, 1] --> - 2 - - < !-- axes: [0, 1] --> - 2 - - - - < !-- output --> - 4 - 10 - 5 - - - +.. code-block:: xml + :force: + + + + < !-- data --> + 20 + 10 + 5 + + < !-- start: [0, 0] --> + 2 + + < !-- stop: [4, 10] --> + 2 + + < !-- step: [1, 1] --> + 2 + + < !-- axes: [0, 1] --> + 2 + + + + < !-- output --> + 4 + 10 + 5 + + + @endsphinxdirective diff --git a/docs/ops/movement/SpaceToBatch_2.md b/docs/ops/movement/SpaceToBatch_2.md index 1abdbecd45a379..4b99912ca14f9c 100644 --- a/docs/ops/movement/SpaceToBatch_2.md +++ b/docs/ops/movement/SpaceToBatch_2.md @@ -72,7 +72,8 @@ No attributes available. **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/SpaceToDepth_1.md b/docs/ops/movement/SpaceToDepth_1.md index e0c36f0eb1983e..79e7a0d3c00f80 100644 --- a/docs/ops/movement/SpaceToDepth_1.md +++ b/docs/ops/movement/SpaceToDepth_1.md @@ -71,7 +71,8 @@ If ``mode = depth_first``: **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/Split_1.md b/docs/ops/movement/Split_1.md index 9821155fb3a50d..18800fe9915621 100644 --- a/docs/ops/movement/Split_1.md +++ b/docs/ops/movement/Split_1.md @@ -50,7 +50,8 @@ Where D is the rank of input tensor ``data``. The axis being split must be evenl **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/StridedSlice_1.md b/docs/ops/movement/StridedSlice_1.md index fc1cec4f25e036..c447979308e98a 100644 --- a/docs/ops/movement/StridedSlice_1.md +++ b/docs/ops/movement/StridedSlice_1.md @@ -74,7 +74,8 @@ **Example** Example of ``begin_mask`` & ``end_mask`` usage. -.. code-block:: cpp +.. code-block:: xml + :force: @@ -106,8 +107,8 @@ Example of ``begin_mask`` & ``end_mask`` usage. Example of ``new_axis_mask`` usage. -.. code-block:: cpp - +.. code-block:: xml + :force: @@ -139,7 +140,8 @@ Example of ``new_axis_mask`` usage. Example of ``shrink_axis_mask`` usage. -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/Tile_1.md b/docs/ops/movement/Tile_1.md index fd661e5425173b..671fc6e81cb16c 100644 --- a/docs/ops/movement/Tile_1.md +++ b/docs/ops/movement/Tile_1.md @@ -49,7 +49,8 @@ No attributes available. *Example 1: number elements in "repeats" is equal to shape of data* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -73,7 +74,8 @@ No attributes available. *Example 2: number of elements in "repeats" is more than shape of "data"* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -98,7 +100,8 @@ No attributes available. *Example 3: number of elements in "repeats" is less than shape of "data"* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/Transpose_1.md b/docs/ops/movement/Transpose_1.md index 1af2e84ac9593f..41eda58c351d91 100644 --- a/docs/ops/movement/Transpose_1.md +++ b/docs/ops/movement/Transpose_1.md @@ -40,7 +40,8 @@ *Example 1* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -65,7 +66,8 @@ *Example 2: input_order = empty 1D tensor of Shape[0]* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/Unique_10.md b/docs/ops/movement/Unique_10.md index 249daac8fcfbb5..ec5b3f0597f59e 100644 --- a/docs/ops/movement/Unique_10.md +++ b/docs/ops/movement/Unique_10.md @@ -66,7 +66,8 @@ The operator can either work in elementwise mode searching for unique values in *Example 1: axis input connected to a constant containing a 'zero'* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -101,7 +102,8 @@ The operator can either work in elementwise mode searching for unique values in *Example 2: no axis provided* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -128,7 +130,8 @@ The operator can either work in elementwise mode searching for unique values in *Example 3: no axis provided, non-default outputs precision* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/movement/VariadicSplit_1.md b/docs/ops/movement/VariadicSplit_1.md index 0479c0b2b52f90..c3ec920a67cf72 100644 --- a/docs/ops/movement/VariadicSplit_1.md +++ b/docs/ops/movement/VariadicSplit_1.md @@ -42,7 +42,8 @@ Where D is the rank of input tensor `data`. The sum of elements in ``split_lengt **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: @@ -81,7 +82,8 @@ Where D is the rank of input tensor `data`. The sum of elements in ``split_lengt -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/normalization/BatchNormInference_1.md b/docs/ops/normalization/BatchNormInference_1.md index 928d238984220b..cfd16a71b8b94b 100644 --- a/docs/ops/normalization/BatchNormInference_1.md +++ b/docs/ops/normalization/BatchNormInference_1.md @@ -104,7 +104,8 @@ For a particular activation, consider a mini-batch :math:`\mathcal{B}` of m valu Example: 2D input tensor ``data`` -.. code-block:: cpp +.. code-block:: xml + :force: @@ -136,7 +137,8 @@ Example: 2D input tensor ``data`` Example: 4D input tensor ``data`` -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/normalization/BatchNormInference_5.md b/docs/ops/normalization/BatchNormInference_5.md index aa090de94dac72..7f80a3bad0edc0 100644 --- a/docs/ops/normalization/BatchNormInference_5.md +++ b/docs/ops/normalization/BatchNormInference_5.md @@ -105,7 +105,8 @@ For a particular activation, consider a mini-batch :math:`\mathcal{B}` of m valu Example: 2D input tensor ``data`` -.. code-block:: cpp +.. code-block:: xml + :force: @@ -137,7 +138,8 @@ Example: 2D input tensor ``data`` Example: 4D input tensor ``data`` -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/normalization/GRN_1.md b/docs/ops/normalization/GRN_1.md index ce6a7724646e63..b0761457f0765d 100644 --- a/docs/ops/normalization/GRN_1.md +++ b/docs/ops/normalization/GRN_1.md @@ -44,7 +44,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/normalization/GroupNormalization_12.md b/docs/ops/normalization/GroupNormalization_12.md index f8937ed2c6e93b..725261d52a43ae 100644 --- a/docs/ops/normalization/GroupNormalization_12.md +++ b/docs/ops/normalization/GroupNormalization_12.md @@ -56,7 +56,8 @@ The operation is applied per batch, per group of channels. This means that the e **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/normalization/LRN_1.md b/docs/ops/normalization/LRN_1.md index 7d7e34a0a8fa97..0cf04f9d593f10 100644 --- a/docs/ops/normalization/LRN_1.md +++ b/docs/ops/normalization/LRN_1.md @@ -90,7 +90,8 @@ Example for 4D ``data`` input tensor and ``axes = [2, 3]``: **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/normalization/MVN_1.md b/docs/ops/normalization/MVN_1.md index 6d10bd05e7ee4b..2f58f70775d7dc 100644 --- a/docs/ops/normalization/MVN_1.md +++ b/docs/ops/normalization/MVN_1.md @@ -111,7 +111,8 @@ where :math:`\sigma_{k}^2` is the variance calculated based on mean value, :math *Example: with* ``across_channels`` *attribute* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -136,7 +137,8 @@ where :math:`\sigma_{k}^2` is the variance calculated based on mean value, :math *Example: with* ``reduction_axes`` *attribute* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/normalization/MVN_6.md b/docs/ops/normalization/MVN_6.md index aad5414527e985..4b2a3fcaea61a7 100644 --- a/docs/ops/normalization/MVN_6.md +++ b/docs/ops/normalization/MVN_6.md @@ -85,7 +85,8 @@ If *normalize_variance* is set to ``true``, the output blob is divided by varian **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/normalization/NormalizeL2_1.md b/docs/ops/normalization/NormalizeL2_1.md index 03a14c204da8f6..463eff1f4c9de9 100644 --- a/docs/ops/normalization/NormalizeL2_1.md +++ b/docs/ops/normalization/NormalizeL2_1.md @@ -64,7 +64,8 @@ Particular cases: Example: Normalization over channel dimension for ``NCHW`` layout -.. code-block:: cpp +.. code-block:: xml + :force: @@ -92,7 +93,8 @@ Example: Normalization over channel dimension for ``NCHW`` layout Example: Normalization over channel and spatial dimensions for ``NCHW`` layout -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/pooling/AdaptiveAvgPool_8.md b/docs/ops/pooling/AdaptiveAvgPool_8.md index c4eb41dce65beb..ffde1548f07836 100644 --- a/docs/ops/pooling/AdaptiveAvgPool_8.md +++ b/docs/ops/pooling/AdaptiveAvgPool_8.md @@ -48,7 +48,8 @@ The output is calculated with the following formula: **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/pooling/AdaptiveMaxPool_8.md b/docs/ops/pooling/AdaptiveMaxPool_8.md index 87ea96d505dc83..f53d0002aef61a 100644 --- a/docs/ops/pooling/AdaptiveMaxPool_8.md +++ b/docs/ops/pooling/AdaptiveMaxPool_8.md @@ -59,7 +59,8 @@ The output is calculated following this formula: **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/pooling/AvgPool_1.md b/docs/ops/pooling/AvgPool_1.md index 07fa577e530411..31041331a07aa6 100644 --- a/docs/ops/pooling/AvgPool_1.md +++ b/docs/ops/pooling/AvgPool_1.md @@ -98,7 +98,8 @@ **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/pooling/MaxPool_1.md b/docs/ops/pooling/MaxPool_1.md index c1b2dd05ed3213..b3b2115ef7544f 100644 --- a/docs/ops/pooling/MaxPool_1.md +++ b/docs/ops/pooling/MaxPool_1.md @@ -205,7 +205,8 @@ If ``H + pads_begin[i] + pads_end[i] - kernel[i]`` is not divided by ``strides[i **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/pooling/MaxPool_8.md b/docs/ops/pooling/MaxPool_8.md index c99b371c0bdb9e..283b04b27392ec 100644 --- a/docs/ops/pooling/MaxPool_8.md +++ b/docs/ops/pooling/MaxPool_8.md @@ -303,7 +303,8 @@ If ``H + pads_begin[i] + pads_end[i] - kernel[i]`` is not divisible by ``strides **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/quantization/FakeQuantize_1.md b/docs/ops/quantization/FakeQuantize_1.md index e1031b90bc483d..3d6a26a1ca0767 100644 --- a/docs/ops/quantization/FakeQuantize_1.md +++ b/docs/ops/quantization/FakeQuantize_1.md @@ -26,7 +26,8 @@ specify minimum and maximum quantized values at the output. Each element of the output is defined as the result of the following expression: -.. code-block:: python +.. code-block:: py + :force: if x <= min(input_low, input_high): output = output_low @@ -76,7 +77,8 @@ Each element of the output is defined as the result of the following expression: **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/reduction/ReduceL1_4.md b/docs/ops/reduction/ReduceL1_4.md index bfa3a4c24be0b9..f0024e92bc8f03 100644 --- a/docs/ops/reduction/ReduceL1_4.md +++ b/docs/ops/reduction/ReduceL1_4.md @@ -53,7 +53,8 @@ Particular cases: **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: @@ -80,7 +81,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: @@ -105,7 +107,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: @@ -131,7 +134,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/reduction/ReduceL2_4.md b/docs/ops/reduction/ReduceL2_4.md index ef15e847df7020..84a96d5863acda 100644 --- a/docs/ops/reduction/ReduceL2_4.md +++ b/docs/ops/reduction/ReduceL2_4.md @@ -53,7 +53,8 @@ Particular cases: **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: @@ -79,7 +80,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: @@ -103,7 +105,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: @@ -128,7 +131,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/reduction/ReduceLogicalAnd_1.md b/docs/ops/reduction/ReduceLogicalAnd_1.md index f586a5f80a7ffe..1570061ab8efe3 100644 --- a/docs/ops/reduction/ReduceLogicalAnd_1.md +++ b/docs/ops/reduction/ReduceLogicalAnd_1.md @@ -55,7 +55,8 @@ Particular cases: **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: @@ -81,7 +82,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: @@ -105,7 +107,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: @@ -130,7 +133,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/reduction/ReduceLogicalOr_1.md b/docs/ops/reduction/ReduceLogicalOr_1.md index 553741ead411cd..6db017f7661e97 100644 --- a/docs/ops/reduction/ReduceLogicalOr_1.md +++ b/docs/ops/reduction/ReduceLogicalOr_1.md @@ -55,7 +55,8 @@ Particular cases: **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: @@ -81,7 +82,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: @@ -104,7 +106,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: @@ -129,7 +132,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/reduction/ReduceMax_1.md b/docs/ops/reduction/ReduceMax_1.md index 922bd13b22604b..e6262e1327ca23 100644 --- a/docs/ops/reduction/ReduceMax_1.md +++ b/docs/ops/reduction/ReduceMax_1.md @@ -55,7 +55,8 @@ Particular cases: **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: @@ -81,7 +82,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: @@ -105,7 +107,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: @@ -130,7 +133,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/reduction/ReduceMean_1.md b/docs/ops/reduction/ReduceMean_1.md index 57497239958bff..91b6db0b7a76aa 100644 --- a/docs/ops/reduction/ReduceMean_1.md +++ b/docs/ops/reduction/ReduceMean_1.md @@ -55,7 +55,8 @@ Particular cases: **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: @@ -81,7 +82,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: @@ -103,10 +105,10 @@ Particular cases: += - - -.. code-block:: cpp +.. code-block:: xml + :force: @@ -130,7 +132,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/reduction/ReduceMin_1.md b/docs/ops/reduction/ReduceMin_1.md index 27347cebdbcc9c..b38776a18a0871 100644 --- a/docs/ops/reduction/ReduceMin_1.md +++ b/docs/ops/reduction/ReduceMin_1.md @@ -55,7 +55,8 @@ Particular cases: **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: @@ -81,7 +82,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: @@ -105,7 +107,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: @@ -130,7 +133,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/reduction/ReduceProd_1.md b/docs/ops/reduction/ReduceProd_1.md index e06392c12a9708..05fc65d705dabc 100644 --- a/docs/ops/reduction/ReduceProd_1.md +++ b/docs/ops/reduction/ReduceProd_1.md @@ -55,7 +55,8 @@ Particular cases: **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: @@ -81,7 +82,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: @@ -105,7 +107,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: @@ -130,7 +133,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/reduction/ReduceSum_1.md b/docs/ops/reduction/ReduceSum_1.md index 37d94466982eb7..575dfcedd6a619 100644 --- a/docs/ops/reduction/ReduceSum_1.md +++ b/docs/ops/reduction/ReduceSum_1.md @@ -55,7 +55,8 @@ Particular cases: **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: @@ -81,7 +82,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: @@ -105,7 +107,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: @@ -130,7 +133,8 @@ Particular cases: -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/sequence/CTCGreedyDecoderSeqLen_6.md b/docs/ops/sequence/CTCGreedyDecoderSeqLen_6.md index 455d1989b4b5ab..dbeaed01d5bf54 100644 --- a/docs/ops/sequence/CTCGreedyDecoderSeqLen_6.md +++ b/docs/ops/sequence/CTCGreedyDecoderSeqLen_6.md @@ -73,7 +73,8 @@ The main difference between :doc:`CTCGreedyDecoder diff --git a/docs/ops/sequence/CTCGreedyDecoder_1.md b/docs/ops/sequence/CTCGreedyDecoder_1.md index cf5b60d800294d..15da3e0716b24c 100644 --- a/docs/ops/sequence/CTCGreedyDecoder_1.md +++ b/docs/ops/sequence/CTCGreedyDecoder_1.md @@ -47,7 +47,8 @@ Sequences in the batch can have different length. The lengths of sequences are c **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/sequence/CTCLoss_4.md b/docs/ops/sequence/CTCLoss_4.md index 0e90bb2ca7b01b..a486b0aeb441bb 100644 --- a/docs/ops/sequence/CTCLoss_4.md +++ b/docs/ops/sequence/CTCLoss_4.md @@ -96,7 +96,8 @@ Having log-probabilities for aligned paths, log of summed up probabilities for t **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/sequence/GRUCell_3.md b/docs/ops/sequence/GRUCell_3.md index 2d6747c9b57bb5..1d5cfe8bc1206c 100644 --- a/docs/ops/sequence/GRUCell_3.md +++ b/docs/ops/sequence/GRUCell_3.md @@ -14,7 +14,7 @@ **Detailed description**: *GRUCell* computes the output *Ht* for the current time step based on the followint formula: -.. code-block:: +.. code-block:: sh Formula: * - matrix multiplication @@ -90,7 +90,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/sequence/GRUSequence_5.md b/docs/ops/sequence/GRUSequence_5.md index d50028359c689f..39c8a67a2f63be 100644 --- a/docs/ops/sequence/GRUSequence_5.md +++ b/docs/ops/sequence/GRUSequence_5.md @@ -104,7 +104,8 @@ are in sync with the specification of ONNX GRU operator defined **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/sequence/LSTMCell_1.md b/docs/ops/sequence/LSTMCell_1.md index 948bab39122578..b2089451ea4aeb 100644 --- a/docs/ops/sequence/LSTMCell_1.md +++ b/docs/ops/sequence/LSTMCell_1.md @@ -89,7 +89,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/sequence/OneHot_1.md b/docs/ops/sequence/OneHot_1.md index d0224f04190eb0..f67dbd7e0fa65c 100644 --- a/docs/ops/sequence/OneHot_1.md +++ b/docs/ops/sequence/OneHot_1.md @@ -56,7 +56,8 @@ The types of input scalars ``on_value`` and ``off_value`` should match and be eq **Examples** -.. code-block:: cpp +.. code-block:: xml + :force: @@ -81,7 +82,8 @@ The types of input scalars ``on_value`` and ``off_value`` should match and be eq -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/sequence/RNNCell_3.md b/docs/ops/sequence/RNNCell_3.md index bf8553df88aef2..bcbc220917d75b 100644 --- a/docs/ops/sequence/RNNCell_3.md +++ b/docs/ops/sequence/RNNCell_3.md @@ -80,37 +80,38 @@ **Example** -.. code-block:: cpp - - - - - - 1 - 16 - - - 1 - 128 - - - 128 - 16 - - - 128 - 128 - - - 128 - - - - - 1 - 128 - - - +.. code-block:: xml + :force: + + + + + + 1 + 16 + + + 1 + 128 + + + 128 + 16 + + + 128 + 128 + + + 128 + + + + + 1 + 128 + + + @endsphinxdirective diff --git a/docs/ops/sequence/RNNSequence_5.md b/docs/ops/sequence/RNNSequence_5.md index 959170d803ad0a..66746edcec9792 100644 --- a/docs/ops/sequence/RNNSequence_5.md +++ b/docs/ops/sequence/RNNSequence_5.md @@ -84,53 +84,54 @@ A single cell in the sequence is implemented in the same way as in `RNNCell <#RN **Example** -.. code-block:: cpp - - - - - - 1 - 4 - 16 - - - 1 - 1 - 128 - - - 1 - - - 1 - 128 - 16 - - - 1 - 128 - 128 - - - 1 - 128 - - - - - 1 - 1 - 4 - 128 - - - 1 - 1 - 128 - - - +.. code-block:: xml + :force: + + + + + + 1 + 4 + 16 + + + 1 + 1 + 128 + + + 1 + + + 1 + 128 + 16 + + + 1 + 128 + 128 + + + 1 + 128 + + + + + 1 + 1 + 4 + 128 + + + 1 + 1 + 128 + + + @endsphinxdirective diff --git a/docs/ops/shape/Reshape_1.md b/docs/ops/shape/Reshape_1.md index dfa5f70764ea81..8f8671cd64cd85 100644 --- a/docs/ops/shape/Reshape_1.md +++ b/docs/ops/shape/Reshape_1.md @@ -50,7 +50,8 @@ If ``special_zero`` is set to ``true`` index of ``0`` cannot be larger than the *Example 1: reshape empty tensor* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -76,7 +77,8 @@ If ``special_zero`` is set to ``true`` index of ``0`` cannot be larger than the *Example 2: reshape tensor - preserve first dim, calculate second and fix value for third dim* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -103,7 +105,8 @@ If ``special_zero`` is set to ``true`` index of ``0`` cannot be larger than the *Example 3: reshape tensor - preserve first two dims, fix value for third dim and calculate fourth* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -130,7 +133,8 @@ If ``special_zero`` is set to ``true`` index of ``0`` cannot be larger than the *Example 4: reshape tensor - calculate first dim and preserve second dim* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -155,7 +159,8 @@ If ``special_zero`` is set to ``true`` index of ``0`` cannot be larger than the *Example 5: reshape tensor - preserve first dim and calculate second dim* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/shape/ShapeOf_1.md b/docs/ops/shape/ShapeOf_1.md index 894f173730de91..e3889db9d1f724 100644 --- a/docs/ops/shape/ShapeOf_1.md +++ b/docs/ops/shape/ShapeOf_1.md @@ -24,7 +24,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/shape/ShapeOf_3.md b/docs/ops/shape/ShapeOf_3.md index 824cc1d66f3db8..19407e12fbbe87 100644 --- a/docs/ops/shape/ShapeOf_3.md +++ b/docs/ops/shape/ShapeOf_3.md @@ -38,7 +38,8 @@ **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/shape/Squeeze_1.md b/docs/ops/shape/Squeeze_1.md index 2374e6f2ad2781..80a820c878abb1 100644 --- a/docs/ops/shape/Squeeze_1.md +++ b/docs/ops/shape/Squeeze_1.md @@ -39,7 +39,8 @@ *Example 1: squeeze 4D tensor to a 2D tensor* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -65,7 +66,8 @@ *Example 2: squeeze 1D tensor with 1 element to a 0D tensor (constant)* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/shape/Unsqueeze_1.md b/docs/ops/shape/Unsqueeze_1.md index 3142466c50b900..1687fa24980d72 100644 --- a/docs/ops/shape/Unsqueeze_1.md +++ b/docs/ops/shape/Unsqueeze_1.md @@ -34,7 +34,8 @@ *Example 1: unsqueeze 2D tensor to a 4D tensor* -.. code-block:: cpp +.. code-block:: xml + :force: @@ -61,7 +62,8 @@ *Example 2: unsqueeze 0D tensor (constant) to 1D tensor* -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/signals/DFT_7.md b/docs/ops/signals/DFT_7.md index 6d81260d01ce15..f8f64fabaf4a35 100644 --- a/docs/ops/signals/DFT_7.md +++ b/docs/ops/signals/DFT_7.md @@ -77,7 +77,8 @@ Calculations for the generic case of axes and signal sizes are similar. There is no ``signal_size`` input (4D input tensor): -.. code-block:: cpp +.. code-block:: xml + :force: @@ -102,7 +103,8 @@ There is no ``signal_size`` input (4D input tensor): There is no ``signal_size`` input (3D input tensor): -.. code-block:: cpp +.. code-block:: xml + :force: @@ -125,7 +127,8 @@ There is no ``signal_size`` input (3D input tensor): There is ``signal_size`` input (4D input tensor): -.. code-block:: cpp +.. code-block:: xml + :force: @@ -153,7 +156,8 @@ There is ``signal_size`` input (4D input tensor): There is ``signal_size`` input (3D input tensor): -.. code-block:: cpp +.. code-block:: xml + :force: @@ -179,7 +183,8 @@ There is ``signal_size`` input (3D input tensor): There is ``signal_size`` input (5D input tensor, ``-1`` in ``signal_size``, unsorted axes): -.. code-block:: cpp +.. code-block:: xml + :force: @@ -209,7 +214,8 @@ There is ``signal_size`` input (5D input tensor, ``-1`` in ``signal_size``, unso There is ``signal_size`` input (5D input tensor, ``-1`` in ``signal_size``, unsorted axes, the second example): -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/signals/IDFT_7.md b/docs/ops/signals/IDFT_7.md index e4b4c39ad75596..830e80c1a0e12e 100644 --- a/docs/ops/signals/IDFT_7.md +++ b/docs/ops/signals/IDFT_7.md @@ -80,7 +80,8 @@ Calculations for the generic case of axes and signal sizes are similar. There is no ``signal_size`` input (4D input tensor): -.. code-block:: cpp +.. code-block:: xml + :force: @@ -106,7 +107,8 @@ There is no ``signal_size`` input (4D input tensor): There is no ``signal_size`` input (3D input tensor): -.. code-block:: cpp +.. code-block:: xml + :force: @@ -131,7 +133,8 @@ There is no ``signal_size`` input (3D input tensor): There is ``signal_size`` input (4D input tensor): -.. code-block:: cpp +.. code-block:: xml + :force: @@ -161,7 +164,8 @@ There is ``signal_size`` input (4D input tensor): There is ``signal_size`` input (3D input tensor): -.. code-block:: cpp +.. code-block:: xml + :force: @@ -189,7 +193,8 @@ There is ``signal_size`` input (3D input tensor): There is ``signal_size`` input (5D input tensor, ``-1`` in ``signal_size``, unsorted axes): -.. code-block:: cpp +.. code-block:: xml + :force: @@ -221,7 +226,8 @@ There is ``signal_size`` input (5D input tensor, ``-1`` in ``signal_size``, unso There is ``signal_size`` input (5D input tensor, ``-1`` in ``signal_size``, unsorted axes, the second example): -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/signals/IRDFT_9.md b/docs/ops/signals/IRDFT_9.md index d6136f8471592c..e7305cd3a3c9ef 100644 --- a/docs/ops/signals/IRDFT_9.md +++ b/docs/ops/signals/IRDFT_9.md @@ -96,7 +96,8 @@ Calculations for the generic case of axes and signal sizes are similar. There is no ``signal_size`` input (4D input tensor): -.. code-block:: cpp +.. code-block:: xml + :force: @@ -121,7 +122,8 @@ There is no ``signal_size`` input (4D input tensor): There is no ``signal_size`` input (3D input tensor): -.. code-block:: cpp +.. code-block:: xml + :force: @@ -144,7 +146,8 @@ There is no ``signal_size`` input (3D input tensor): There is ``signal_size`` input (4D input tensor): -.. code-block:: cpp +.. code-block:: xml + :force: @@ -173,7 +176,8 @@ There is ``signal_size`` input (4D input tensor): There is ``signal_size`` input (3D input tensor): -.. code-block:: cpp +.. code-block:: xml + :force: @@ -200,7 +204,8 @@ There is ``signal_size`` input (3D input tensor): There is ``signal_size`` input (5D input tensor, ``-1`` in ``signal_size``, unsorted axes): -.. code-block:: cpp +.. code-block:: xml + :force: @@ -230,7 +235,8 @@ There is ``signal_size`` input (5D input tensor, ``-1`` in ``signal_size``, unso There is ``signal_size`` input (5D input tensor, ``-1`` in ``signal_size``, unsorted axes, the second example): -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/signals/RDFT_9.md b/docs/ops/signals/RDFT_9.md index be87f5f555e1ab..2e8d57c56febf1 100644 --- a/docs/ops/signals/RDFT_9.md +++ b/docs/ops/signals/RDFT_9.md @@ -72,7 +72,8 @@ Calculations for the generic case of axes and signal sizes are similar. There is no ``signal_size`` input (3D input tensor): -.. code-block:: cpp +.. code-block:: xml + :force: @@ -97,7 +98,8 @@ There is no ``signal_size`` input (3D input tensor): There is no ``signal_size`` input (2D input tensor): -.. code-block:: cpp +.. code-block:: xml + :force: @@ -121,7 +123,8 @@ There is no ``signal_size`` input (2D input tensor): There is ``signal_size`` input (3D input tensor): -.. code-block:: cpp +.. code-block:: xml + :force: @@ -148,7 +151,8 @@ There is ``signal_size`` input (3D input tensor): There is ``signal_size`` input (2D input tensor): -.. code-block:: cpp +.. code-block:: xml + :force: @@ -174,7 +178,8 @@ There is ``signal_size`` input (2D input tensor): There is ``signal_size`` input (4D input tensor, ``-1`` in ``signal_size``, unsorted axes): -.. code-block:: cpp +.. code-block:: xml + :force: @@ -203,7 +208,8 @@ There is ``signal_size`` input (4D input tensor, ``-1`` in ``signal_size``, unso There is ``signal_size`` input (4D input tensor, ``-1`` in ``signal_size``, unsorted axes, the second example): -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/docs/ops/sort/ExperimentalDetectronTopKROIs_6.md b/docs/ops/sort/ExperimentalDetectronTopKROIs_6.md index 5c0f0e40999333..8079b3457271bc 100644 --- a/docs/ops/sort/ExperimentalDetectronTopKROIs_6.md +++ b/docs/ops/sort/ExperimentalDetectronTopKROIs_6.md @@ -44,7 +44,8 @@ output tensor elements. **Example** -.. code-block:: cpp +.. code-block:: xml + :force: diff --git a/samples/c/hello_nv12_input_classification/README.md b/samples/c/hello_nv12_input_classification/README.md index c27625115e66b2..cdad2c595593fe 100644 --- a/samples/c/hello_nv12_input_classification/README.md +++ b/samples/c/hello_nv12_input_classification/README.md @@ -64,7 +64,7 @@ To run the sample, you need specify a model and image: The sample accepts an uncompressed image in the NV12 color format. To run the sample, you need to convert your BGR/RGB image to NV12. To do this, you can use one of the widely available tools such as FFmpeg\* or GStreamer\*. The following command shows how to convert an ordinary image into an uncompressed NV12 image using FFmpeg: -.. code-block:: bash +.. code-block:: sh ffmpeg -i cat.jpg -pix_fmt nv12 cat.yuv diff --git a/samples/cpp/benchmark_app/README.md b/samples/cpp/benchmark_app/README.md index 4a2a877de4508a..76a1697aec447f 100644 --- a/samples/cpp/benchmark_app/README.md +++ b/samples/cpp/benchmark_app/README.md @@ -153,7 +153,8 @@ Running the application with the ``-h`` or ``--help`` option yields the followin .. scrollbox:: - .. code-block:: + .. code-block:: sh + :force: [Step 1/11] Parsing and validating input arguments [ INFO ] Parsing input parameters diff --git a/samples/cpp/hello_nv12_input_classification/README.md b/samples/cpp/hello_nv12_input_classification/README.md index 31fb2820650a49..66640d58411e94 100644 --- a/samples/cpp/hello_nv12_input_classification/README.md +++ b/samples/cpp/hello_nv12_input_classification/README.md @@ -68,7 +68,7 @@ To run the sample, you need to specify a model and image: The sample accepts an uncompressed image in the NV12 color format. To run the sample, you need to convert your BGR/RGB image to NV12. To do this, you can use one of the widely available tools such as FFmpeg\* or GStreamer\*. The following command shows how to convert an ordinary image into an uncompressed NV12 image using FFmpeg: -.. code-block:: bash +.. code-block:: sh ffmpeg -i cat.jpg -pix_fmt nv12 car.yuv diff --git a/samples/python/speech_sample/README.md b/samples/python/speech_sample/README.md index b530e299b8d27b..1fa293bed19546 100644 --- a/samples/python/speech_sample/README.md +++ b/samples/python/speech_sample/README.md @@ -108,7 +108,7 @@ Running Run the application with the ``-h`` option to see the usage message: -.. code-block:: bash +.. code-block:: sh python speech_sample.py -h diff --git a/tools/pot/docs/BestPractices.md b/tools/pot/docs/BestPractices.md index 79f176ae7d3d4e..ab15576a06d8b8 100644 --- a/tools/pot/docs/BestPractices.md +++ b/tools/pot/docs/BestPractices.md @@ -22,7 +22,8 @@ Improving accuracy after the Default Quantization Parameters of the Default Quantization algorithm with basic settings are presented below: -.. code-block:: python +.. code-block:: py + :force: { "name": "DefaultQuantization", # Optimization algorithm name @@ -57,7 +58,8 @@ Below is a list of best practices that can be applied to improve accuracy withou 3. Some model architectures require a special approach when being quantized. For example, Transformer-based models need to keep some operations in the original precision to preserve accuracy. That is why POT provides a ``model_type`` option to specify the model architecture. Now, only ``"transformer"`` type is available. Use it to quantize Transformer-based models, e.g. BERT. 4. Another important option is a `range_estimator`. It defines how to calculate the minimum and maximum of quantization range for weights and activations. For example, the following ``range_estimator`` for activations can improve the accuracy for Faster R-CNN-based networks: - .. code-block:: python + .. code-block:: py + :force: { "name": "DefaultQuantization", @@ -86,7 +88,8 @@ Accuracy-aware Quantization When the steps above do not lead to the accurate quantized model, you may use the :doc:`Accuracy-aware Quantization ` algorithm which leads to mixed-precision models. A fragment of Accuracy-aware Quantization configuration with default settings is shown below: -.. code-block:: python +.. code-block:: py + :force: { "name": "AccuracyAwareQuantization", diff --git a/tools/pot/docs/DefaultQuantizationUsage.md b/tools/pot/docs/DefaultQuantizationUsage.md index 8be5242b2c7eed..674d08ba815fc3 100644 --- a/tools/pot/docs/DefaultQuantizationUsage.md +++ b/tools/pot/docs/DefaultQuantizationUsage.md @@ -75,7 +75,8 @@ Select quantization parameters Default Quantization algorithm has mandatory and optional parameters which are defined as a dictionary: -.. code-block:: python +.. code-block:: py + :force: { "name": "DefaultQuantization", @@ -105,7 +106,8 @@ POT API provides methods to load and save model objects from OpenVINO Intermedia An example code below shows a basic quantization workflow: -.. code-block:: python +.. code-block:: py + :force: from openvino.tools.pot import IEEngine from openvino.tools.pot import load_model, save_model diff --git a/tools/pot/docs/SimplifiedMode.md b/tools/pot/docs/SimplifiedMode.md index 56f63bc971c175..b7f7f944267832 100644 --- a/tools/pot/docs/SimplifiedMode.md +++ b/tools/pot/docs/SimplifiedMode.md @@ -20,7 +20,7 @@ To apply optimization when there is only a model and no data is available. It is Install Datumaro: -.. code-block:: bash +.. code-block:: sh pip install datumaro @@ -29,7 +29,7 @@ Create a synthetic dataset with elements of the specified type and shape, and sa Usage: -.. code-block:: bash +.. code-block:: sh datum generate [-h] -o OUTPUT_DIR -k COUNT --shape SHAPE [SHAPE ...] [-t {image}] [--overwrite] [--model-dir MODEL_PATH] @@ -37,7 +37,7 @@ Usage: Example of generating 300 images with height = 224 and width = 256 and saving them in the ``./dataset`` directory. -.. code-block:: bash +.. code-block:: sh datum generate -o ./dataset -k 300 --shape 224 256 diff --git a/tools/pot/openvino/tools/pot/algorithms/quantization/accuracy_aware/README.md b/tools/pot/openvino/tools/pot/algorithms/quantization/accuracy_aware/README.md index 3d7cc0f9310800..e0d733dfd8e4a4 100644 --- a/tools/pot/openvino/tools/pot/algorithms/quantization/accuracy_aware/README.md +++ b/tools/pot/openvino/tools/pot/algorithms/quantization/accuracy_aware/README.md @@ -17,7 +17,7 @@ Since the :doc:`Default Quantization ` document. Below is an example of the definition of Default Quantization method and its parameters: -.. code-block:: python +.. code-block:: py + :force: { "name": "DefaultQuantization", # the name of optimization algorithm From 82c65c25da6e7bb63342b91cc37c40e4c29af1a0 Mon Sep 17 00:00:00 2001 From: Mateusz Mikolajczyk Date: Tue, 11 Jul 2023 11:00:50 +0200 Subject: [PATCH 17/21] [PT FE] Add aten::scatter and inplace for aten::sub translation (#18341) * Add sub inplace * Add scatter implementation * Remove debug var * Add tests for empty index * Add reduce support --------- Co-authored-by: Michal Lukaszewski --- src/frontends/pytorch/src/op/scatter.cpp | 77 +++++++++++++ src/frontends/pytorch/src/op_table.cpp | 4 + .../layer_tests/pytorch_tests/test_scatter.py | 108 ++++++++++++++++++ tests/layer_tests/pytorch_tests/test_sub.py | 25 +++- 4 files changed, 209 insertions(+), 5 deletions(-) create mode 100644 src/frontends/pytorch/src/op/scatter.cpp create mode 100644 tests/layer_tests/pytorch_tests/test_scatter.py diff --git a/src/frontends/pytorch/src/op/scatter.cpp b/src/frontends/pytorch/src/op/scatter.cpp new file mode 100644 index 00000000000000..87611d93fd7ba6 --- /dev/null +++ b/src/frontends/pytorch/src/op/scatter.cpp @@ -0,0 +1,77 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/broadcast.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/convert_like.hpp" +#include "openvino/op/scatter_elements_update.hpp" +#include "openvino/op/slice.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace op { + +using namespace ov::op; + +OutputVector translate_scatter(const NodeContext& context) { + // Out-of-place schema + // aten::scatter.value(Tensor self, int dim, Tensor index, Scalar value) -> Tensor: + // aten::scatter.src(Tensor self, int dim, Tensor index, Tensor src) -> Tensor: + // aten::scatter.reduce(Tensor self, int dim, Tensor index, Tensor src, *, str reduce) -> Tensor: + // aten::scatter.value_reduce(Tensor self, int dim, Tensor index, Scalar value, *, str reduce) -> Tensor: + + // Inplace schema + // aten::scatter_.value(Tensor(a!) self, int dim, Tensor index, Scalar value) -> Tensor(a!): + // aten::scatter_.src(Tensor(a!) self, int dim, Tensor index, Tensor src) -> Tensor(a!): + // aten::scatter_.reduce(Tensor(a!) self, int dim, Tensor index, Tensor src, *, str reduce) -> Tensor(a!): + // aten::scatter_.value_reduce(Tensor(a!) self, int dim, Tensor index, Scalar value, *, str reduce) -> Tensor(a!): + num_inputs_check(context, 4, 5); + auto input = context.get_input(0); + auto dim = context.get_input(1); + auto index = context.mark_node(std::make_shared(context.get_input(2), element::i32)); + auto src = context.get_input(3); + + auto reduction = v12::ScatterElementsUpdate::Reduction::NONE; + auto input_num = context.get_input_size(); + if (input_num > 4 && !context.input_is_none(input_num - 1)) { + auto reduce_mode = context.const_input(input_num - 1); + if (reduce_mode == "add") { + reduction = v12::ScatterElementsUpdate::Reduction::SUM; + } else if (reduce_mode == "multiply") { + reduction = v12::ScatterElementsUpdate::Reduction::PROD; + } + } + auto src_partial_shape = src.get_partial_shape(); + auto index_shape_rank = get_shape_rank(context, index); + auto index_shape = std::get<0>(index_shape_rank); + auto index_rank = std::get<1>(index_shape_rank); + + // Source input can be either Tensor which should be passed in original shape or Scalar that should be broadcasted + // into shape of indices. + // TODO: Figure out way to dynamically broadcast scalar src only, without affecting Tensor src. Current + // implementation will fail if Scalar source would have dynamic rank. + if (src_partial_shape.rank().is_static() && src_partial_shape.rank().get_length() == 0) { + src = context.mark_node(std::make_shared(src, index_shape)); + } + + auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); + auto zeros = context.mark_node(std::make_shared(const_0, index_rank)); + auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {1})); + auto ones = context.mark_node(std::make_shared(const_1, index_rank)); + // In torch indices can be of different shape than source tensor. Create slice to trim source tensor to shape of + // indices. + auto src_pruned = context.mark_node(std::make_shared(src, zeros, index_shape, ones)); + + auto src_input_dtype = context.mark_node(std::make_shared(src_pruned, input)); + return { + context.mark_node(std::make_shared(input, index, src_input_dtype, dim, reduction))}; +}; + +} // namespace op +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index 22320e930420b6..a1a526972d7cc0 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -119,6 +119,7 @@ OP_CONVERTER(translate_roll); OP_CONVERTER(translate_rsqrt); OP_CONVERTER(translate_rsub); OP_CONVERTER(translate_scaled_dot_product_attention); +OP_CONVERTER(translate_scatter); OP_CONVERTER(translate_select); OP_CONVERTER(translate_set_item); OP_CONVERTER(translate_selu); @@ -332,6 +333,8 @@ const std::map get_supported_ops() { {"aten::rsub", op::translate_rsub}, {"aten::ScalarImplicit", op::skip_node}, {"aten::scaled_dot_product_attention", op::translate_scaled_dot_product_attention}, + {"aten::scatter", op::translate_scatter}, + {"aten::scatter_", op::inplace_op}, {"aten::select", op::translate_select}, {"aten::selu", op::translate_selu}, {"aten::selu_", op::inplace_op}, @@ -352,6 +355,7 @@ const std::map get_supported_ops() { {"aten::square", op::translate_square}, {"aten::squeeze", op::translate_squeeze}, {"aten::sub", op::translate_sub}, + {"aten::sub_", op::inplace_op}, {"aten::sum", op::translate_sum}, {"aten::t", op::translate_t}, {"aten::t_", op::inplace_op}, diff --git a/tests/layer_tests/pytorch_tests/test_scatter.py b/tests/layer_tests/pytorch_tests/test_scatter.py new file mode 100644 index 00000000000000..4f4972b6e89a1a --- /dev/null +++ b/tests/layer_tests/pytorch_tests/test_scatter.py @@ -0,0 +1,108 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import torch +from pytorch_layer_test_class import PytorchLayerTest + + +class TestScatter(PytorchLayerTest): + def _prepare_input(self, dtype): + inp = np.random.randn(6, 6).astype(getattr(np, dtype)) + return (inp,) + + def create_model(self, dim, index, src, inplace, reduce): + class aten_scatter(torch.nn.Module): + def __init__(self, dim, index, src, inplace, reduce): + super(aten_scatter, self).__init__() + self.dim = dim + self.use_empty_index = False + if index is None: + self.use_empty_index = True + # Placeholder + self.index = torch.empty([1]) + else: + self.index = index + self.src = src + str_forward = "_forward" + if inplace: + str_forward += "_inplace" + else: + str_forward += "_out_of_place" + + if reduce: + self.reduce = reduce + str_forward += "_reduce" + self.forward = getattr(self, str_forward) + + def _forward_out_of_place(self, x: torch.Tensor): + if self.use_empty_index: + index = torch.empty([0, 0]) + else: + index = self.index + return torch.scatter(x, self.dim, index, self.src) + + def _forward_inplace(self, x: torch.Tensor): + if self.use_empty_index: + index = torch.empty([0, 0]) + else: + index = self.index + return x.scatter_(self.dim, index, self.src) + + def _forward_out_of_place_reduce(self, x: torch.Tensor): + if self.use_empty_index: + index = torch.empty([0, 0]) + else: + index = self.index + return torch.scatter(x, self.dim, index, self.src, reduce=self.reduce) + + def _forward_inplace_reduce(self, x: torch.Tensor): + if self.use_empty_index: + index = torch.empty([0, 0]) + else: + index = self.index + return x.scatter_(self.dim, index, self.src, reduce=self.reduce) + + ref_net = None + if inplace: + op_name = "aten::scatter_" + else: + op_name = "aten::scatter" + + return aten_scatter(dim, index, src, inplace, reduce), ref_net, op_name + + @pytest.mark.nightly + @pytest.mark.precommit + @pytest.mark.parametrize("dim", [1, -1, 0]) + @pytest.mark.parametrize( + "index", + [ + None, # Empty tensor scenario. + torch.tensor([[0, 1, 2, 3]]), + torch.tensor([[0, 5], [4, 1], [2, 3]]), + ], + ) + @pytest.mark.parametrize("src", [torch.arange(1, 26).reshape(5, 5), 1]) + @pytest.mark.parametrize("dtype", ["int32", "int64", "float32", "float64"]) + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("reduce", [None, "add", "multiply"]) + def test_scatter(self, dim, index, src, dtype, inplace, reduce, ie_device, precision, ir_version): + if isinstance(src, torch.Tensor): + src = src.to(getattr(torch, dtype)) + freeze = True + if index is None: + # Freeze creates empty constant tensor which isn't supported by OV. + freeze = False + if (not freeze) and reduce: + pytest.skip( + "Cannot test reduce parameters with empty indexes due to issues with empty constant tensor or issues with prim::GetAttr str inputs." + ) + self._test( + *self.create_model(dim, index, src, inplace, reduce), + ie_device, + precision, + ir_version, + kwargs_to_prepare_input={"dtype": dtype}, + freeze_model=freeze + ) diff --git a/tests/layer_tests/pytorch_tests/test_sub.py b/tests/layer_tests/pytorch_tests/test_sub.py index 84244accc3b546..aa97b0f23653fb 100644 --- a/tests/layer_tests/pytorch_tests/test_sub.py +++ b/tests/layer_tests/pytorch_tests/test_sub.py @@ -12,16 +12,30 @@ class TestSub(PytorchLayerTest): def _prepare_input(self): return self.input_data - def create_model(self): + def create_model(self, inplace): class aten_sub(torch.nn.Module): + def __init__(self, inplace) -> None: + super().__init__() + if inplace: + self.forward = self._forward_inplace + else: + self.forward = self._forward_out_of_place - def forward(self, x, y, alpha: float): + def _forward_out_of_place(self, x, y, alpha: float): return torch.sub(x, y, alpha=alpha) + def _forward_inplace(self, x, y, alpha: float): + return x.sub_(y, alpha=alpha) + ref_net = None - return aten_sub(), ref_net, "aten::sub" + if inplace: + op_name = "aten::sub_" + else: + op_name = "aten::sub" + + return aten_sub(inplace), ref_net, op_name @pytest.mark.parametrize('input_data', [(np.random.randn(2, 3, 4).astype(np.float32), np.random.randn( @@ -31,11 +45,12 @@ def forward(self, x, y, alpha: float): np.random.randn( 1, 2, 3).astype(np.float32), np.random.randn(1)), ]) + @pytest.mark.parametrize("inplace", [True, False]) @pytest.mark.nightly @pytest.mark.precommit - def test_sub(self, ie_device, precision, ir_version, input_data): + def test_sub(self, ie_device, precision, ir_version, input_data, inplace): self.input_data = input_data - self._test(*self.create_model(), ie_device, precision, ir_version) + self._test(*self.create_model(inplace), ie_device, precision, ir_version) class TestSubTypes(PytorchLayerTest): From 0296008c7ef58914e0786fb75b8e0a7ef8440ea0 Mon Sep 17 00:00:00 2001 From: Mateusz Tabaka Date: Tue, 11 Jul 2023 11:02:28 +0200 Subject: [PATCH 18/21] =?UTF-8?q?Add=20explicit=20converts=20for=20Paramet?= =?UTF-8?q?er=20and=20Result=20in=20ConvertPrecision=20tr=E2=80=A6=20(#181?= =?UTF-8?q?83)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add explicit converts for Parameter and Result in ConvertPrecision transformation * set friendly name for convert on output * tests --- .../transformations/convert_precision.hpp | 13 +- .../src/transformations/convert_precision.cpp | 93 +++++- .../tests/utils/convert_precision.cpp | 295 ++++++++++++++++++ 3 files changed, 385 insertions(+), 16 deletions(-) diff --git a/src/common/transformations/include/transformations/convert_precision.hpp b/src/common/transformations/include/transformations/convert_precision.hpp index 910fceed47f00c..81b218e3172ae7 100644 --- a/src/common/transformations/include/transformations/convert_precision.hpp +++ b/src/common/transformations/include/transformations/convert_precision.hpp @@ -81,17 +81,21 @@ class ov::pass::ConvertPrecision : public ov::pass::ModelPass { ConvertPrecision(ov::element::Type_t from, ov::element::Type_t to, type_to_fuse_map additional_type_to_fuse_map = {}, - bool keep_precision_sensitive_in_fp32 = false) + bool keep_precision_sensitive_in_fp32 = false, + bool convert_input_output_precision = true) : m_precisions(precisions_map{{from, to}}), m_additional_type_to_fuse_map(additional_type_to_fuse_map), - m_keep_precision_sensitive_in_fp32(keep_precision_sensitive_in_fp32) {} + m_keep_precision_sensitive_in_fp32(keep_precision_sensitive_in_fp32), + m_convert_input_output_precision(convert_input_output_precision) {} ConvertPrecision(const precisions_map& precisions, const type_to_fuse_map& additional_type_to_fuse_map = {}, - bool keep_precision_sensitive_in_fp32 = false) + bool keep_precision_sensitive_in_fp32 = false, + bool convert_input_output_precision = true) : m_precisions(precisions), m_additional_type_to_fuse_map(additional_type_to_fuse_map), - m_keep_precision_sensitive_in_fp32(keep_precision_sensitive_in_fp32) {} + m_keep_precision_sensitive_in_fp32(keep_precision_sensitive_in_fp32), + m_convert_input_output_precision(convert_input_output_precision) {} bool run_on_model(const std::shared_ptr& m) override; @@ -99,4 +103,5 @@ class ov::pass::ConvertPrecision : public ov::pass::ModelPass { precisions_map m_precisions; type_to_fuse_map m_additional_type_to_fuse_map; bool m_keep_precision_sensitive_in_fp32; + bool m_convert_input_output_precision; }; diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp index 9b785432e77cd0..b3b474c3b989ed 100644 --- a/src/common/transformations/src/transformations/convert_precision.cpp +++ b/src/common/transformations/src/transformations/convert_precision.cpp @@ -27,9 +27,14 @@ #include "transformations/rt_info/decompression.hpp" #include "transformations/rt_info/disable_fp16_compression.hpp" #include "transformations/rt_info/keep_fp16_const.hpp" +#include "transformations/utils/utils.hpp" using namespace ov; +bool fuse_type_to_parameter(const std::shared_ptr& node, + const precisions_map& precisions, + bool convert_input_precision); + bool fuse_type_to_constant(const std::shared_ptr& node, const precisions_map& precisions, const std::vector>& consumers); @@ -39,7 +44,6 @@ bool fuse_type_to_random_uniform_v8(const std::shared_ptr& node, c bool fuse_type_to_unique_v10(const std::shared_ptr& node, const precisions_map& precisions); bool fuse_type_to_range_v4(const std::shared_ptr& node, const precisions_map& precisions); bool fuse_type_to_eye_v9(const std::shared_ptr& node, const precisions_map& precisions); -bool fuse_type_to_parameter(const std::shared_ptr& node, const precisions_map& precisions); bool fuse_type_to_convert(const std::shared_ptr& node, const precisions_map& precisions); bool fuse_type_to_nms3(const std::shared_ptr& node, const precisions_map& precisions); bool fuse_type_to_nms4(const std::shared_ptr& node, const precisions_map& precisions); @@ -147,9 +151,11 @@ bool convert_node_output_precision( if (t2f_it != type_to_fuse.end()) { node_changed = t2f_it->second(node, precisions); } + if ((function_changed || node_changed) && !node_is_replaced(node)) { node->revalidate_and_infer_types(); } + return node_changed; } @@ -173,20 +179,33 @@ bool convert_function_precision( bool has_fp16_compression, bool skip_precision_sensitive, bool is_changed, - bool is_subgraph) { + bool is_subgraph, + bool convert_input_output_precision) { bool is_output_precision_changed = false; - auto ops = f->get_ordered_ops(); + ov::element::TypeVector orig_result_types; + if (!convert_input_output_precision) { + const auto& results = f->get_results(); + orig_result_types.reserve(results.size()); + for (const auto& result : results) { + orig_result_types.push_back(result->get_input_element_type(0)); + } + } // Iterate over all nodes in topological order and then iterate over node outputs. // If output type mismatch given type we try to fuse type into this operation // otherwise we insert Convert operation. + auto ops = f->get_ordered_ops(); for (auto& node : ops) { if (skip_precision_sensitive && fp16_compression_is_disabled(node) && has_fp16_compression) continue; is_changed |= convert_node_input_precision(node, precisions, type_to_extend); } + for (const auto& param : f->get_parameters()) { + is_changed |= fuse_type_to_parameter(param, precisions, convert_input_output_precision); + } + if (is_changed) ops = f->get_ordered_ops(); @@ -221,6 +240,7 @@ bool convert_function_precision( has_fp16_compression, skip_precision_sensitive, is_changed || is_output_precision_changed, + true, true); } } @@ -252,6 +272,37 @@ bool convert_function_precision( } } + if (is_changed && !convert_input_output_precision) { + auto& results = f->get_results(); + for (size_t i = 0; i < results.size(); i++) { + auto& result = results[i]; + if (result->get_input_element_type(0) != orig_result_types[i]) { + auto result_input = result->input_value(0); + const auto convert = std::make_shared(result_input, orig_result_types[i]); + if (result_input.get_node()->get_output_size() > 1) { + convert->set_friendly_name(result_input.get_node()->get_friendly_name() + "." + + std::to_string(result_input.get_index())); + } else { + convert->set_friendly_name(result_input.get_node()->get_friendly_name()); + result_input.get_node()->set_friendly_name(""); + } + + auto& convert_output_tensor = convert->get_output_tensor(0); + convert_output_tensor.set_names(result_input.get_names()); + OPENVINO_SUPPRESS_DEPRECATED_START + const auto& legacy_name = ov::descriptor::get_ov_tensor_legacy_name(result_input.get_tensor()); + if (!legacy_name.empty()) { + ov::descriptor::set_ov_tensor_legacy_name(convert_output_tensor, legacy_name); + } + OPENVINO_SUPPRESS_DEPRECATED_END + + result_input.set_names({}); + result->input(0).replace_source_output(convert->output(0)); + result->revalidate_and_infer_types(); + } + } + } + return is_changed; } @@ -261,7 +312,8 @@ bool convert_precision(ov::pass::PassBase& pass, const type_to_fuse_map& type_to_extend, const precisions_map& precisions, bool has_fp16_compression, - bool skip_precision_sensitive = false) { + bool skip_precision_sensitive, + bool convert_input_output_precision) { // As Constant operations can be shared between multiple nGraph Functions so before // changing precision we need to understand which Constant consumers belongs // to the current nGraph Function @@ -274,7 +326,8 @@ bool convert_precision(ov::pass::PassBase& pass, has_fp16_compression, skip_precision_sensitive, false, - false); + false, + convert_input_output_precision); } using precisions_set_t = std::unordered_set; @@ -324,7 +377,6 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr& node, const precis return false; } -bool fuse_type_to_parameter(const std::shared_ptr& node, const precisions_map& precisions) { +bool fuse_type_to_parameter(const std::shared_ptr& node, + const precisions_map& precisions, + bool convert_input_precision) { auto it = precisions.find(node->get_output_element_type(0)); if (it == precisions.end()) return false; + bool changed = false; const auto& to = it->second; if (auto param = ov::as_type_ptr(node)) { - param->set_element_type(to); - param->validate_and_infer_types(); - return true; + if (convert_input_precision) { + param->set_element_type(to); + param->validate_and_infer_types(); + changed = true; + } else { + auto param_consumers = param->output(0).get_target_inputs(); + auto convert = std::make_shared(param, to); + for (auto& input : param_consumers) { + const auto consumer = input.get_node(); + if (ov::is_type(consumer) || ov::is_type(consumer)) { + continue; + } + input.replace_source_output(convert); + changed = true; + } + } } - return false; + return changed; } bool fuse_type_to_convert(const std::shared_ptr& node, const precisions_map& precisions) { diff --git a/src/common/transformations/tests/utils/convert_precision.cpp b/src/common/transformations/tests/utils/convert_precision.cpp index 8f1c3d01d14899..472eab49ce3c46 100644 --- a/src/common/transformations/tests/utils/convert_precision.cpp +++ b/src/common/transformations/tests/utils/convert_precision.cpp @@ -1840,3 +1840,298 @@ TEST(TransformationTests, ConvertPrecision_disable_for_quantized_nodes_2) { FunctionsComparator::Result result = func_comparator(model_ref, model); ASSERT_TRUE(result.valid) << result.message; } + +TEST(TransformationTests, ConvertPrecisionExplicitConvertsForParameterAndResult) { + shared_ptr model, model_ref; + pass::Manager manager; + { + auto param_1 = make_shared(element::f64, Shape{3}); + auto sin = make_shared(param_1); + sin->set_friendly_name("sine"); + sin->get_output_tensor(0).add_names({"sine:0"}); + auto result_sin = make_shared(sin); + model = make_shared(result_sin, ParameterVector{param_1}); + + type_to_fuse_map empty_type_to_fuse_map = {}; + bool keep_precision_sensitive_in_fp32 = false; + bool convert_input_output_precision = false; + manager.register_pass(precisions_map{{element::f64, element::f32}}, + empty_type_to_fuse_map, + keep_precision_sensitive_in_fp32, + convert_input_output_precision); + manager.run_passes(model); + } + + { + auto param_1 = make_shared(element::f64, Shape{3}); + auto converted_param = make_shared(param_1, element::f32); + auto sin = make_shared(converted_param); + auto converted_sin = make_shared(sin, element::f64); + converted_sin->get_output_tensor(0).add_names({"sine:0"}); + auto result_sin = make_shared(converted_sin); + model_ref = make_shared(result_sin, ParameterVector{param_1}); + } + + const FunctionsComparator func_comparator = FunctionsComparator::with_default(); + FunctionsComparator::Result result = func_comparator(model_ref, model); + ASSERT_TRUE(result.valid) << result.message; + + const auto& results = model->get_results(); + ASSERT_EQ("sine", results[0]->get_input_node_ptr(0)->get_friendly_name()); +} + +TEST(TransformationTests, ConvertPrecisionExplicitConvertsMultiParam) { + shared_ptr model, model_ref; + pass::Manager manager; + { + auto param_1 = make_shared(element::f64, Shape{3}); + auto convert_1 = make_shared(param_1, element::f32); + + auto param_2 = make_shared(element::f64, Shape{3}); + auto convert_2 = make_shared(param_2, element::i64); + + auto param_3 = make_shared(element::f64, Shape{3}); + auto param_4 = make_shared(element::i64, Shape{3}); + + auto add = make_shared(convert_2, param_4); + auto mul = make_shared(param_1, param_3); + auto sin = make_shared(convert_1); + + add->set_friendly_name("add"); + add->get_output_tensor(0).add_names({"add:0"}); + mul->set_friendly_name("mul"); + mul->get_output_tensor(0).add_names({"mul:0"}); + sin->set_friendly_name("sine"); + sin->get_output_tensor(0).add_names({"sine:0"}); + + auto result_add = make_shared(add); + auto result_mul = make_shared(mul); + auto result_sin = make_shared(sin); + + model = make_shared(ResultVector{result_add, result_mul, result_sin}, + ParameterVector{param_1, param_2, param_3, param_4}); + + type_to_fuse_map empty_type_to_fuse_map = {}; + bool keep_precision_sensitive_in_fp32 = false; + bool convert_input_output_precision = false; + manager.register_pass( + precisions_map{{element::f64, element::f32}, {element::i64, element::i32}}, + empty_type_to_fuse_map, + keep_precision_sensitive_in_fp32, + convert_input_output_precision); + manager.run_passes(model); + } + + { + auto param_1 = make_shared(element::f64, Shape{3}); + auto convert_1 = make_shared(param_1, element::f32); + + auto param_2 = make_shared(element::f64, Shape{3}); + auto convert_2 = make_shared(param_2, element::i32); + + auto param_3 = make_shared(element::f64, Shape{3}); + auto convert_3 = make_shared(param_3, element::f32); + auto param_4 = make_shared(element::i64, Shape{3}); + auto convert_4 = make_shared(param_4, element::i32); + + auto add = make_shared(convert_2, convert_4); + auto converted_add = make_shared(add, element::i64); + auto convert_1_2 = make_shared(param_1, element::f32); + auto mul = make_shared(convert_1_2, convert_3); + auto converted_mul = make_shared(mul, element::f64); + auto sin = make_shared(convert_1); + + converted_add->get_output_tensor(0).add_names({"add:0"}); + converted_mul->get_output_tensor(0).add_names({"mul:0"}); + sin->get_output_tensor(0).add_names({"sine:0"}); + + auto result_add = make_shared(converted_add); + auto result_mul = make_shared(converted_mul); + auto result_sin = make_shared(sin); + + model_ref = make_shared(ResultVector{result_add, result_mul, result_sin}, + ParameterVector{param_1, param_2, param_3, param_4}); + } + + const FunctionsComparator func_comparator = FunctionsComparator::with_default(); + FunctionsComparator::Result result = func_comparator(model_ref, model); + ASSERT_TRUE(result.valid) << result.message; + + const auto& results = model->get_results(); + ASSERT_EQ("add", results[0]->get_input_node_ptr(0)->get_friendly_name()); + ASSERT_EQ("mul", results[1]->get_input_node_ptr(0)->get_friendly_name()); + ASSERT_EQ("sine", results[2]->get_input_node_ptr(0)->get_friendly_name()); +} + +TEST(TransformationTests, ConvertPrecisionExplicitConvertsSingleNodeMultipleOutputs) { + shared_ptr model, model_ref; + pass::Manager manager; + { + auto param_1 = make_shared(element::f64, Shape{3}); + auto axis = opset10::Constant::create(element::i32, Shape{}, {0}); + auto split = make_shared(param_1, axis, 3); + split->set_friendly_name("split"); + split->get_output_tensor(0).add_names({"split:0"}); + split->get_output_tensor(1).add_names({"split:1"}); + split->get_output_tensor(2).add_names({"split:2"}); + OPENVINO_SUPPRESS_DEPRECATED_START + ov::descriptor::set_ov_tensor_legacy_name(split->get_output_tensor(0), "legacy_split:0"); + ov::descriptor::set_ov_tensor_legacy_name(split->get_output_tensor(1), "legacy_split:1"); + ov::descriptor::set_ov_tensor_legacy_name(split->get_output_tensor(2), "legacy_split:2"); + OPENVINO_SUPPRESS_DEPRECATED_END + model = make_shared(split->outputs(), ParameterVector{param_1}); + + type_to_fuse_map empty_type_to_fuse_map = {}; + bool keep_precision_sensitive_in_fp32 = false; + bool convert_input_output_precision = false; + manager.register_pass(precisions_map{{element::f64, element::f32}}, + empty_type_to_fuse_map, + keep_precision_sensitive_in_fp32, + convert_input_output_precision); + manager.run_passes(model); + } + + { + auto param_1 = make_shared(element::f64, Shape{3}); + auto convert_1 = make_shared(param_1, element::f32); + auto axis = opset10::Constant::create(element::i32, Shape{}, {0}); + auto split = make_shared(convert_1, axis, 3); + + auto convert_split_0 = make_shared(split->output(0), element::f64); + auto convert_split_1 = make_shared(split->output(1), element::f64); + auto convert_split_2 = make_shared(split->output(2), element::f64); + convert_split_0->get_output_tensor(0).add_names({"split:0"}); + convert_split_1->get_output_tensor(0).add_names({"split:1"}); + convert_split_2->get_output_tensor(0).add_names({"split:2"}); + model_ref = + make_shared(NodeVector{convert_split_0, convert_split_1, convert_split_2}, ParameterVector{param_1}); + } + + const FunctionsComparator func_comparator = FunctionsComparator::with_default(); + FunctionsComparator::Result result = func_comparator(model_ref, model); + ASSERT_TRUE(result.valid) << result.message; + + const auto& results = model->get_results(); + ASSERT_EQ("split.0", results[0]->get_input_node_ptr(0)->get_friendly_name()); + ASSERT_EQ("split.1", results[1]->get_input_node_ptr(0)->get_friendly_name()); + ASSERT_EQ("split.2", results[2]->get_input_node_ptr(0)->get_friendly_name()); + OPENVINO_SUPPRESS_DEPRECATED_START + ASSERT_EQ("legacy_split:0", ov::descriptor::get_ov_tensor_legacy_name(results[0]->get_input_tensor(0))); + ASSERT_EQ("legacy_split:1", ov::descriptor::get_ov_tensor_legacy_name(results[1]->get_input_tensor(0))); + ASSERT_EQ("legacy_split:2", ov::descriptor::get_ov_tensor_legacy_name(results[2]->get_input_tensor(0))); + OPENVINO_SUPPRESS_DEPRECATED_END +} + +TEST(TransformationTests, ConvertPrecisionExplicitConvertsMultiSubgraphs) { + shared_ptr model, model_ref; + pass::Manager manager; + { + auto cond = make_shared(element::boolean, Shape{}); + auto param_1 = make_shared(element::f64, Shape{3}); + auto param_2 = make_shared(element::f64, Shape{3}); + + auto if_op = make_shared(cond); + + auto param_1_then = make_shared(element::f64, Shape{3}); + auto param_2_then = make_shared(element::f64, Shape{3}); + auto add = make_shared(param_1_then, param_2_then); + auto result_then = make_shared(add); + auto then_body = make_shared(result_then, ParameterVector{param_1_then, param_2_then}); + + auto param_1_else = make_shared(element::f64, Shape{3}); + auto param_2_else = make_shared(element::f64, Shape{3}); + + auto trip_count = op::v0::Constant::create(element::i32, Shape{}, {2}); + auto term_cond = op::v0::Constant::create(element::boolean, Shape{}, {true}); + auto loop = make_shared(trip_count, term_cond); + + auto param_1_loop = make_shared(element::f64, Shape{3}); + auto param_2_loop = make_shared(element::f64, Shape{3}); + auto mul = make_shared(param_1_loop, param_2_loop); + auto result_mul = make_shared(mul); + auto result_cond = make_shared(term_cond); + auto loop_body = + make_shared(ResultVector{result_cond, result_mul}, ParameterVector{param_1_loop, param_2_loop}); + + loop->set_function(loop_body); + loop->set_special_body_ports({-1, 0}); + loop->set_merged_input(param_1_loop, param_1_else, result_mul); + + auto result_else = make_shared(loop->get_iter_value(result_mul)); + auto else_body = make_shared(result_else, ParameterVector{param_1_else, param_2_else}); + + if_op->set_then_body(then_body); + if_op->set_else_body(else_body); + if_op->set_input(param_1, param_1_then, param_1_else); + if_op->set_input(param_2, param_2_then, param_2_else); + auto result = if_op->set_output(result_then, result_else); + + result.get_node()->set_friendly_name("if_result"); + result.add_names({"if_result:0"}); + model = make_shared(OutputVector{result}, ParameterVector{cond, param_1, param_2}); + + type_to_fuse_map empty_type_to_fuse_map = {}; + bool keep_precision_sensitive_in_fp32 = false; + bool convert_input_output_precision = false; + manager.register_pass(precisions_map{{element::f64, element::f32}}, + empty_type_to_fuse_map, + keep_precision_sensitive_in_fp32, + convert_input_output_precision); + manager.run_passes(model); + } + + { + auto cond = make_shared(element::boolean, Shape{}); + auto param_1 = make_shared(element::f64, Shape{3}); + auto param_2 = make_shared(element::f64, Shape{3}); + + auto if_op = make_shared(cond); + + auto param_1_then = make_shared(element::f32, Shape{3}); + auto param_2_then = make_shared(element::f32, Shape{3}); + auto add = make_shared(param_1_then, param_2_then); + auto result_then = make_shared(add); + auto then_body = make_shared(result_then, ParameterVector{param_1_then, param_2_then}); + + auto param_1_else = make_shared(element::f32, Shape{3}); + auto param_2_else = make_shared(element::f32, Shape{3}); + + auto trip_count = op::v0::Constant::create(element::i32, Shape{}, {2}); + auto term_cond = op::v0::Constant::create(element::boolean, Shape{}, {true}); + auto loop = make_shared(trip_count, term_cond); + + auto param_1_loop = make_shared(element::f32, Shape{3}); + auto param_2_loop = make_shared(element::f32, Shape{3}); + auto mul = make_shared(param_1_loop, param_2_loop); + auto result_mul = make_shared(mul); + auto result_cond = make_shared(term_cond); + auto loop_body = + make_shared(ResultVector{result_cond, result_mul}, ParameterVector{param_1_loop, param_2_loop}); + + loop->set_function(loop_body); + loop->set_special_body_ports({-1, 0}); + loop->set_merged_input(param_1_loop, param_1_else, result_mul); + + auto result_else = make_shared(loop->get_iter_value(result_mul)); + auto else_body = make_shared(result_else, ParameterVector{param_1_else, param_2_else}); + + if_op->set_then_body(then_body); + if_op->set_else_body(else_body); + auto convert_1 = make_shared(param_1, element::f32); + auto convert_2 = make_shared(param_2, element::f32); + if_op->set_input(convert_1, param_1_then, param_1_else); + if_op->set_input(convert_2, param_2_then, param_2_else); + auto result = if_op->set_output(result_then, result_else); + auto converted_result = make_shared(result, element::f64); + converted_result->get_output_tensor(0).add_names({"if_result:0"}); + + model_ref = make_shared(converted_result, ParameterVector{cond, param_1, param_2}); + } + + const FunctionsComparator func_comparator = FunctionsComparator::with_default(); + FunctionsComparator::Result result = func_comparator(model_ref, model); + ASSERT_TRUE(result.valid) << result.message; + + const auto& results = model->get_results(); + ASSERT_EQ("if_result", results[0]->get_input_node_ptr(0)->get_friendly_name()); +} From 521c846b72e4966c977b486734c088da763422eb Mon Sep 17 00:00:00 2001 From: Marcin Kacprzak Date: Tue, 11 Jul 2023 11:32:49 +0200 Subject: [PATCH 19/21] [GNA] Fix for concat requantization problem (#18008) * [GNA] Fix for concat requantization problem * [GNA] Review fixes * [GNA] Reverted `using namespace std` * [GNA] Updated transformation description --- .../intel_gna/src/common/graph_utils.hpp | 15 +- .../src/gna_transformations_pipeline.cpp | 1 + .../src/transformations/insert_copy_layer.cpp | 5 +- .../transformations/insert_identity_layer.cpp | 123 +++- .../transformations/insert_identity_layer.hpp | 75 +++ .../gna_insert_identity_layer.cpp | 537 ++++++++++-------- 6 files changed, 493 insertions(+), 263 deletions(-) diff --git a/src/plugins/intel_gna/src/common/graph_utils.hpp b/src/plugins/intel_gna/src/common/graph_utils.hpp index 194c313e361d2e..f7949c70ead5e2 100644 --- a/src/plugins/intel_gna/src/common/graph_utils.hpp +++ b/src/plugins/intel_gna/src/common/graph_utils.hpp @@ -202,6 +202,18 @@ inline bool is_pooling(const std::shared_ptr& node) { std::dynamic_pointer_cast(node) != nullptr); } +inline bool is_concat(const std::shared_ptr& node) { + return (std::dynamic_pointer_cast(node) != nullptr); +} + +inline bool is_fake_quantize(const std::shared_ptr& node) { + return (std::dynamic_pointer_cast(node) != nullptr); +} + +inline bool is_read_value(const std::shared_ptr& node) { + return (std::dynamic_pointer_cast(node) != nullptr); +} + template inline bool is_Tbit_fq(const std::shared_ptr& node) { auto fq_node = std::dynamic_pointer_cast(node); @@ -275,8 +287,7 @@ inline bool is_interleaved(const std::shared_ptr& node) { inline bool is_gna_precision_agnostic(std::shared_ptr node) { return ((std::dynamic_pointer_cast(node) != nullptr) || (std::dynamic_pointer_cast(node) != nullptr) || - (std::dynamic_pointer_cast(node) != nullptr) || - (std::dynamic_pointer_cast(node) != nullptr) || + (std::dynamic_pointer_cast(node) != nullptr) || is_concat(node) || (std::dynamic_pointer_cast(node) != nullptr) || (std::dynamic_pointer_cast(node) != nullptr) || (std::dynamic_pointer_cast(node) != nullptr) || diff --git a/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp b/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp index a643a80feb4ca5..8f382d4c333bfd 100644 --- a/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp +++ b/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp @@ -176,6 +176,7 @@ void TransformationsPipeline::apply(const std::shared_ptr& model, manager.register_pass(config.gnaFlags.input_low_precision); manager.register_pass(); manager.register_pass(); + manager.register_pass(); // Breaks fusing of layers before result manager.register_pass(); if (!config.gnaFlags.sw_fp32 && !config.gnaFlags.uniformPwlDesign) { diff --git a/src/plugins/intel_gna/src/transformations/insert_copy_layer.cpp b/src/plugins/intel_gna/src/transformations/insert_copy_layer.cpp index db2802f53106d4..285d58e8ef7a9d 100644 --- a/src/plugins/intel_gna/src/transformations/insert_copy_layer.cpp +++ b/src/plugins/intel_gna/src/transformations/insert_copy_layer.cpp @@ -157,8 +157,7 @@ InsertCopyBeforeAssignLayer::InsertCopyBeforeAssignLayer() { // Crop -> Memory, Input -> Split -> Memory, Concat -> Memory if ((std::dynamic_pointer_cast(current_node) && !is_crop_affined(current_node)) || - std::dynamic_pointer_cast(current_node) || - std::dynamic_pointer_cast(current_node) || + is_concat(current_node) || std::dynamic_pointer_cast(current_node) || std::dynamic_pointer_cast(current_node)) { insert_copy_layer_between(matched_node_input, node, i); } @@ -281,7 +280,7 @@ bool HandleMultiConnectedLayerToConcatAndMemory::run_on_model(const std::shared_ for (const auto& child_info : children_info) { auto child = std::get<1>(child_info); - if (std::dynamic_pointer_cast(child)) { + if (is_concat(child)) { concat_nodes.push_back(child_info); } else if (std::dynamic_pointer_cast(child) || std::dynamic_pointer_cast(child)) { diff --git a/src/plugins/intel_gna/src/transformations/insert_identity_layer.cpp b/src/plugins/intel_gna/src/transformations/insert_identity_layer.cpp index 5ed4c6acee3fb2..631e8821054eca 100644 --- a/src/plugins/intel_gna/src/transformations/insert_identity_layer.cpp +++ b/src/plugins/intel_gna/src/transformations/insert_identity_layer.cpp @@ -4,6 +4,7 @@ #include "transformations/insert_identity_layer.hpp" #include +#include #include #include #include @@ -20,16 +21,18 @@ using namespace ov::intel_gna; using namespace ov::intel_gna::pass; using namespace ov::intel_gna::rt_info; using namespace ov::intel_gna::graph_utils; +using namespace ov::pass::pattern; +using namespace ov::op::util; namespace { -void mark_for_identity_insertion(std::shared_ptr node, size_t input_index) { +void mark_for_identity_insertion(std::shared_ptr node, size_t input_index) { log::debug() << "Mark input as candidate for identity insertion " << input_index << ":" << node->get_friendly_name() << std::endl; auto input = node->input(input_index); add_precision_change_flag(input, ov::element::i32, ov::element::i16); } -std::shared_ptr create_indentity(std::shared_ptr& input_op) { +std::shared_ptr create_indentity(std::shared_ptr& input_op) { auto identity_op = std::make_shared(input_op); // Keep name of previous operation identity_op->set_friendly_name(input_op->get_friendly_name()); @@ -38,7 +41,7 @@ std::shared_ptr create_indentity(std::shared_ptr& input_op, size_t index) { +void insert_identity_layer_after(std::shared_ptr& input_op, size_t index) { NGRAPH_CHECK(input_op); log::debug() << "Insert identity layer after " << input_op->get_friendly_name() << " (" << input_op->get_type_name() @@ -51,8 +54,8 @@ void insert_identity_layer_after(std::shared_ptr& input_op, size_t } } -void insert_identity_layer_between(std::shared_ptr& input_op, - std::shared_ptr& output_op, +void insert_identity_layer_between(std::shared_ptr& input_op, + std::shared_ptr& output_op, size_t index) { NGRAPH_CHECK(input_op); NGRAPH_CHECK(output_op); @@ -76,9 +79,7 @@ bool process_next_node(std::shared_ptr& prev_node, const std::shared_ptr& node, const size_t input_index) { // Check whether node is going to be skipped - bool to_be_skipped = - (is_gna_precision_agnostic(node) && !std::dynamic_pointer_cast(node)) || - is_pooling(node); + bool to_be_skipped = (is_gna_precision_agnostic(node) && !is_concat(node)) || is_pooling(node); if (to_be_skipped) { // if it is pooling, update previous node, since activation // should be inserted after the pooling @@ -135,7 +136,7 @@ bool walk_through_the_outputs(std::shared_ptr& prev_node, bool MarkIdentityCandidates::run_on_model(const std::shared_ptr& m) { RUN_ON_FUNCTION_SCOPE(MarkIdentityCandidates); for (auto& node : m->get_ordered_ops()) { - auto check_previos_node_and_mark = [&node]() { + auto check_previous_node_and_mark = [&node]() { for (size_t i = 0; i < node->get_input_size(); i++) { auto prev_node = node->get_input_node_shared_ptr(i); prev_node = get_prev_node_skipping_certain(prev_node, is_gna_precision_agnostic); @@ -166,14 +167,14 @@ bool MarkIdentityCandidates::run_on_model(const std::shared_ptr& m) { mark_for_identity_insertion(node, 1); } } - } else if (std::dynamic_pointer_cast(node) != nullptr) { - check_previos_node_and_mark(); + } else if (is_concat(node)) { + check_previous_node_and_mark(); } else { if (is_gna_precision_agnostic(node) || has_32bit_input(node) || ngraph::op::is_parameter(node) || ngraph::op::is_constant(node) || ngraph::op::is_output(node) || ngraph::op::is_sink(node)) { continue; } - check_previos_node_and_mark(); + check_previous_node_and_mark(); } } return false; @@ -184,7 +185,7 @@ BreakFusingOfOutputLayers::BreakFusingOfOutputLayers() { auto result_op = ngraph::pattern::wrap_type({ngraph::pattern::any_input()}); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + ngraph::matcher_pass_callback callback = [=](Matcher& m) { const auto& pattern_map = m.get_pattern_value_map(); auto result_node = pattern_map.at(result_op).get_node_shared_ptr(); auto input_node = result_node->get_input_node_shared_ptr(0); @@ -201,7 +202,7 @@ BreakFusingOfOutputLayers::BreakFusingOfOutputLayers() { return false; }; - auto m = std::make_shared(result_op, matcher_name); + auto m = std::make_shared(result_op, matcher_name); this->register_matcher(m, callback); } @@ -231,3 +232,97 @@ bool IdentityCandidatesCleanup::run_on_model(const std::shared_ptr& f } return false; } + +bool InsertIdentityForPrecAgnosticConcatInput::are_all_inputs_pointing_the_same_node( + const std::shared_ptr& node) { + const auto& inputs = node->inputs(); + const auto& input0 = inputs[0].get_tensor_ptr(); + return all_of(inputs.begin(), inputs.end(), [&](const ov::Input& in) { + return in.get_tensor_ptr() == input0; + }); +} + +size_t InsertIdentityForPrecAgnosticConcatInput::find_prev_layer_output_index(const std::shared_ptr& prev, + const std::shared_ptr& next) { + for (const auto& output : prev->outputs()) { + for (const auto& input : output.get_target_inputs()) { + if (input.get_node() == next.get()) { + return output.get_index(); + } + } + } + THROW_GNA_EXCEPTION << "Output not found"; +} + +bool InsertIdentityForPrecAgnosticConcatInput::insert_identity_after_nodes( + const std::vector>& nodes, + const std::shared_ptr& next) { + for (auto node : nodes) { + size_t index = find_prev_layer_output_index(node, next); + insert_identity_layer_after(node, index); + } + return nodes.size() > 0; +} + +bool InsertIdentityForPrecAgnosticConcatInput::has_fq_on_any_input(const std::shared_ptr concat_node) { + auto is_not_fq = [](std::shared_ptr node) -> bool { + return !is_parameter(node) && !is_constant(node) && !is_fake_quantize(node); + }; + for (size_t i = 0; i < concat_node->get_input_size(); i++) { + auto concat_input_node = concat_node->get_input_node_shared_ptr(i); + auto prev_node = get_prev_node_skipping_certain(concat_input_node, is_not_fq); + if (!is_parameter(prev_node) && !is_constant(prev_node)) { + return true; + } + } + return false; +} + +std::vector> InsertIdentityForPrecAgnosticConcatInput::get_nodes_for_identity_insertion( + const std::shared_ptr& concat_node) { + auto not_able_to_hold_sf = [](std::shared_ptr node) -> bool { + return is_gna_precision_agnostic(node) || is_fake_quantize(node) || is_read_value(node); + }; + + std::vector> nodes; + for (size_t i = 0; i < concat_node->get_input_size(); i++) { + auto concat_input_node = concat_node->get_input_node_shared_ptr(i); + auto prev_node = get_prev_node_skipping_certain(concat_input_node, not_able_to_hold_sf); + if ((is_parameter(prev_node) || is_constant(prev_node)) && (!is_parameter(concat_input_node))) { + nodes.emplace_back(concat_input_node); + } + } + return nodes; +} + +bool InsertIdentityForPrecAgnosticConcatInput::insert_identity_for_prec_agnostic_concat_inputs( + const std::shared_ptr& concat_node) { + if (are_all_inputs_pointing_the_same_node(concat_node)) { + return false; + } + + if (!has_fq_on_any_input(concat_node)) { + return false; + } + + const auto& nodes = get_nodes_for_identity_insertion(concat_node); + + // Skip adding Identity if none of inputs can hold scale factors + bool no_input_can_hold_sf = concat_node->get_input_size() == nodes.size(); + if (no_input_can_hold_sf) { + return false; + } + + return insert_identity_after_nodes(nodes, concat_node); +} + +bool InsertIdentityForPrecAgnosticConcatInput::run_on_model(const std::shared_ptr& m) { + RUN_ON_FUNCTION_SCOPE(InsertIdentityForPrecAgnosticConcatInput); + bool is_graph_modified = false; + for (auto& node : m->get_ordered_ops()) { + if (is_concat(node)) { + is_graph_modified |= insert_identity_for_prec_agnostic_concat_inputs(node); + } + } + return is_graph_modified; +} diff --git a/src/plugins/intel_gna/src/transformations/insert_identity_layer.hpp b/src/plugins/intel_gna/src/transformations/insert_identity_layer.hpp index d448d5c977d1ed..091c5ecc12acbd 100644 --- a/src/plugins/intel_gna/src/transformations/insert_identity_layer.hpp +++ b/src/plugins/intel_gna/src/transformations/insert_identity_layer.hpp @@ -68,6 +68,81 @@ class IdentityCandidatesCleanup : public ov::pass::ModelPass { bool run_on_model(const std::shared_ptr& m) override; }; +/** + * @brief Inserts identity for precision agnostic (or FQ) concat inputs + * Scale factor propagation requires unified scale factors for each Concat input. + * If some input does not contain any layer, which is capable of storing scale factors, + * additional layer must be introduced. + * InsertIdentityForPrecAgnosticConcatInput pass adds Identity layer, which + * is capable of storing scale factors, so the scale factors propagation can proceed. + * Note: Identity will be added to all affected inputs. + * Note: Algorighm does not depend on inputs order. + * + * Example model: + * + * Parameter + * | + * Functional ... Prec-Agnostic or FQ + * \ | / + * Concat + * | + * Result + * + * After execution: + * + * Parameter + * | + * Prec-Agnostic or FQ + * | + * Functional ... Identity + * \ | / + * Concat + * | + * Result + * + */ +class InsertIdentityForPrecAgnosticConcatInput : public ov::pass::ModelPass { +public: + OPENVINO_RTTI("InsertIdentityForPrecAgnosticConcatInput", "0"); + bool run_on_model(const std::shared_ptr& m) override; + +private: + /** + * @brief Check if FakeQuantize exists on any input + */ + bool has_fq_on_any_input(const std::shared_ptr concat_node); + + /** + * @brief Check if at least two inputs are not identical + */ + bool are_all_inputs_pointing_the_same_node(const std::shared_ptr& node); + + /** + * @brief Return vector of nodes for Identity insertion + */ + std::vector> get_nodes_for_identity_insertion( + const std::shared_ptr& concat_node); + + /** + * @brief Invokes Identity layer insertion after each node in vector + * returns true if any Identity layer was inserted + */ + bool insert_identity_after_nodes(const std::vector>& nodes, + const std::shared_ptr& next); + + /** + * @brief Invoke Identity layer insertion in case the Concat input is unable + * to store scale factors + * returns true if any Identity layer was inserted + */ + bool insert_identity_for_prec_agnostic_concat_inputs(const std::shared_ptr& node); + + /** + * @brief Find the output index of 'prev' layer, on which it is connected to 'next' layer + */ + size_t find_prev_layer_output_index(const std::shared_ptr& prev, const std::shared_ptr& next); +}; + } // namespace pass } // namespace intel_gna } // namespace ov diff --git a/src/plugins/intel_gna/tests/unit/transformations/gna_insert_identity_layer.cpp b/src/plugins/intel_gna/tests/unit/transformations/gna_insert_identity_layer.cpp index 4f7808cbf0e0f9..a4dacffc854b16 100644 --- a/src/plugins/intel_gna/tests/unit/transformations/gna_insert_identity_layer.cpp +++ b/src/plugins/intel_gna/tests/unit/transformations/gna_insert_identity_layer.cpp @@ -7,36 +7,47 @@ #include #include #include -#include -#include #include #include #include #include +#include "openvino/opsets/opset12.hpp" +#include "openvino/opsets/opset7.hpp" #include "ops/identity.hpp" #include "transformations/insert_identity_layer.hpp" #include "transformations/rt_info/gna_precision_change_flag.hpp" namespace testing { +using namespace ngraph::builder; +using namespace ngraph::op; +using namespace ov; +using namespace ov::opset12; +using namespace ov::pass; +using namespace ov::intel_gna; +using namespace ov::intel_gna::pass; +using namespace ov::intel_gna::op; +using namespace ov::element; +using namespace std; + class InsertIdentityLayerTest : public CommonTestUtils::TestsCommon { public: virtual void Validate(); virtual void Run(); public: - std::shared_ptr m_func, m_ref_func; - ngraph::Shape m_input_shape{10}; + shared_ptr m_func, m_ref_func; + Shape m_input_shape{10}; bool m_low_precision = false; }; void InsertIdentityLayerTest::Validate() { - ngraph::pass::Manager m; - m.register_pass(); - m.register_pass(m_low_precision); - m.register_pass(); - m.register_pass(); + Manager m; + m.register_pass(); + m.register_pass(m_low_precision); + m.register_pass(); + m.register_pass(); m.run_passes(m_func); ASSERT_NO_THROW(check_rt_info(m_func)); @@ -44,12 +55,12 @@ void InsertIdentityLayerTest::Validate() { ASSERT_TRUE(result.first); // Cleanup rt info and check - m.register_pass(); + m.register_pass(); m.run_passes(m_func); for (auto& node : m_func->get_ordered_ops()) { for (auto& input : node->inputs()) { - const ov::RTMap& rt_info = input.get_rt_info(); - ASSERT_EQ(rt_info.count(ov::intel_gna::rt_info::GNAPrecisionChangeFlag::get_type_info_static()), 0); + const RTMap& rt_info = input.get_rt_info(); + ASSERT_EQ(rt_info.count(rt_info::GNAPrecisionChangeFlag::get_type_info_static()), 0); } } } @@ -62,19 +73,19 @@ void InsertIdentityLayerTest::Run() { /******************************************************* Concat layer tests * *******************************************************/ -typedef std::tuple +typedef tuple InsertIdentityConcatTestParams; class InsertIdentityLayerConcatTest : public InsertIdentityLayerTest, public ::testing::WithParamInterface { public: - static std::string getTestCaseName(const testing::TestParamInfo& obj) { + static string getTestCaseName(const testing::TestParamInfo& obj) { size_t axis, inputs_num; - std::tie(axis, inputs_num) = obj.param; + tie(axis, inputs_num) = obj.param; - std::ostringstream result; + ostringstream result; result << "inputsNum=" << inputs_num << "_"; result << "axis=" << axis; @@ -82,46 +93,45 @@ class InsertIdentityLayerConcatTest : public InsertIdentityLayerTest, } void SetUp() override { size_t axis, inputs_num; - std::tie(axis, inputs_num) = this->GetParam(); + tie(axis, inputs_num) = this->GetParam(); InsertIdentityLayerTest::SetUp(); { - auto params = std::make_shared(ngraph::element::f32, m_input_shape); - auto const_add = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1}); - auto add = std::make_shared(params, const_add); - ngraph::OutputVector concat_inputs = {add}; + auto params = make_shared(f32, m_input_shape); + auto const_add = Constant::create(f32, m_input_shape, {1}); + auto add = make_shared(params, const_add); + OutputVector concat_inputs = {add}; for (size_t i = 1; i < inputs_num; ++i) { - auto const_mul = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {i}); - auto mul = std::make_shared(add, const_mul); + auto const_mul = Constant::create(f32, m_input_shape, {i}); + auto mul = make_shared(add, const_mul); concat_inputs.push_back(mul); } - auto concat = std::make_shared(concat_inputs, axis); - auto result = std::make_shared(concat); - m_func = std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{params}); + auto concat = make_shared(concat_inputs, axis); + auto result = make_shared(concat); + m_func = make_shared(ResultVector{result}, ParameterVector{params}); } { - auto params = std::make_shared(ngraph::element::f32, m_input_shape); - auto const_add = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1}); - auto add = std::make_shared(params, const_add); - auto identity = std::make_shared(add); - ngraph::OutputVector concat_inputs = {identity}; + auto params = make_shared(f32, m_input_shape); + auto const_add = Constant::create(f32, m_input_shape, {1}); + auto add = make_shared(params, const_add); + auto identity = make_shared(add); + OutputVector concat_inputs = {identity}; for (size_t i = 1; i < inputs_num; ++i) { - auto const_mul = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {i}); - auto mul = std::make_shared(identity, const_mul); - auto identity_mul = std::make_shared(mul); + auto const_mul = Constant::create(f32, m_input_shape, {i}); + auto mul = make_shared(identity, const_mul); + auto identity_mul = make_shared(mul); concat_inputs.push_back(identity_mul); } - auto concat = std::make_shared(concat_inputs, axis); - auto result = std::make_shared(concat); - m_ref_func = - std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{params}); + auto concat = make_shared(concat_inputs, axis); + auto result = make_shared(concat); + m_ref_func = make_shared(ResultVector{result}, ParameterVector{params}); } } }; const size_t axis = 0; -const std::vector inputCounts = {1, 8}; +const vector inputCounts = {1, 8}; TEST_P(InsertIdentityLayerConcatTest, CompareWithRefs) { Run(); @@ -140,36 +150,34 @@ class InsertIdentityLayerSplitTest : public InsertIdentityLayerTest { void SetUp() override { InsertIdentityLayerTest::SetUp(); { - auto params = std::make_shared(ngraph::element::f32, m_input_shape); - auto const_add = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1}); - auto add = std::make_shared(params, const_add); - auto axis_const = ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{}, {0}); - auto split = std::make_shared(add, axis_const, 2); - auto result1 = std::make_shared(split->output(0)); - auto const_reshape = ngraph::opset9::Constant::create(ngraph::element::i64, {2}, {1, 5}); - auto reshape = std::make_shared(split->output(1), const_reshape, false); - auto const_mul = ngraph::opset9::Constant::create(ngraph::element::f32, {1, 5}, {1}); - auto mul = std::make_shared(reshape, const_mul); - auto result2 = std::make_shared(mul); - m_func = std::make_shared(ngraph::ResultVector{result1, result2}, - ngraph::ParameterVector{params}); + auto params = make_shared(f32, m_input_shape); + auto const_add = Constant::create(f32, m_input_shape, {1}); + auto add = make_shared(params, const_add); + auto axis_const = Constant::create(i64, Shape{}, {0}); + auto split = make_shared(add, axis_const, 2); + auto result1 = make_shared(split->output(0)); + auto const_reshape = Constant::create(i64, {2}, {1, 5}); + auto reshape = make_shared(split->output(1), const_reshape, false); + auto const_mul = Constant::create(f32, {1, 5}, {1}); + auto mul = make_shared(reshape, const_mul); + auto result2 = make_shared(mul); + m_func = make_shared(ResultVector{result1, result2}, ParameterVector{params}); } { - auto params = std::make_shared(ngraph::element::f32, m_input_shape); - auto const_add = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1}); - auto add = std::make_shared(params, const_add); - auto identity = std::make_shared(add); - auto axis_const = ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{}, {0}); - auto split = std::make_shared(identity, axis_const, 2); - auto result1 = std::make_shared(split->output(0)); - auto const_reshape = ngraph::opset9::Constant::create(ngraph::element::i64, {2}, {1, 5}); - auto reshape = std::make_shared(split->output(1), const_reshape, false); - auto const_mul = ngraph::opset9::Constant::create(ngraph::element::f32, {1, 5}, {1}); - auto mul = std::make_shared(reshape, const_mul); - auto result2 = std::make_shared(mul); - m_ref_func = std::make_shared(ngraph::ResultVector{result1, result2}, - ngraph::ParameterVector{params}); + auto params = make_shared(f32, m_input_shape); + auto const_add = Constant::create(f32, m_input_shape, {1}); + auto add = make_shared(params, const_add); + auto identity = make_shared(add); + auto axis_const = Constant::create(i64, Shape{}, {0}); + auto split = make_shared(identity, axis_const, 2); + auto result1 = make_shared(split->output(0)); + auto const_reshape = Constant::create(i64, {2}, {1, 5}); + auto reshape = make_shared(split->output(1), const_reshape, false); + auto const_mul = Constant::create(f32, {1, 5}, {1}); + auto mul = make_shared(reshape, const_mul); + auto result2 = make_shared(mul); + m_ref_func = make_shared(ResultVector{result1, result2}, ParameterVector{params}); } } }; @@ -181,21 +189,21 @@ TEST_F(InsertIdentityLayerSplitTest, CompareWithRefs) { /******************************************************* Eltwise layer tests * *******************************************************/ -typedef std::tuple +typedef tuple InsertIdentityEltwiseTestParams; class InsertIdentityLayerEltwiseTest : public InsertIdentityLayerTest, public ::testing::WithParamInterface { public: - static std::string getTestCaseName(const testing::TestParamInfo& obj) { + static string getTestCaseName(const testing::TestParamInfo& obj) { ELTWISE_TYPE type; bool low_precision, both_inputs_32bits; - std::tie(type, low_precision, both_inputs_32bits) = obj.param; + tie(type, low_precision, both_inputs_32bits) = obj.param; - std::ostringstream result; + ostringstream result; result << "type="; switch (type) { case ELTWISE_TYPE::Sum: @@ -215,71 +223,70 @@ class InsertIdentityLayerEltwiseTest : public InsertIdentityLayerTest, void SetUp() override { ELTWISE_TYPE type; bool both_inputs_32bits; - std::tie(type, m_low_precision, both_inputs_32bits) = this->GetParam(); + tie(type, m_low_precision, both_inputs_32bits) = this->GetParam(); InsertIdentityLayerTest::SetUp(); { - ngraph::ParameterVector params; - auto input1 = std::make_shared(ngraph::element::f32, m_input_shape); + ParameterVector params; + auto input1 = make_shared(f32, m_input_shape); params.push_back(input1); - auto const_input1 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1}); - auto eltwise1 = std::make_shared(input1, const_input1, type); - std::shared_ptr second_input; + auto const_input1 = Constant::create(f32, m_input_shape, {1}); + auto eltwise1 = make_shared(input1, const_input1, type); + shared_ptr second_input; if (both_inputs_32bits) { - auto input2 = std::make_shared(ngraph::element::f32, m_input_shape); + auto input2 = make_shared(f32, m_input_shape); params.push_back(input2); - auto const_input2 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1}); - auto eltwise2 = std::make_shared(input2, const_input2, type); + auto const_input2 = Constant::create(f32, m_input_shape, {1}); + auto eltwise2 = make_shared(input2, const_input2, type); second_input = eltwise2; } else { - auto const_input2 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1}); + auto const_input2 = Constant::create(f32, m_input_shape, {1}); second_input = const_input2; } - auto eltwise3 = std::make_shared(eltwise1, second_input, type); + auto eltwise3 = make_shared(eltwise1, second_input, type); - auto result = std::make_shared(eltwise3); - m_func = std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{params}); + auto result = make_shared(eltwise3); + m_func = make_shared(ResultVector{result}, ParameterVector{params}); } { - ngraph::ParameterVector params; - auto input1 = std::make_shared(ngraph::element::f32, m_input_shape); + ParameterVector params; + auto input1 = make_shared(f32, m_input_shape); params.push_back(input1); - auto const_input1 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1}); - auto eltwise1 = std::make_shared(input1, const_input1, type); - std::shared_ptr first_input, second_input; + auto const_input1 = Constant::create(f32, m_input_shape, {1}); + auto eltwise1 = make_shared(input1, const_input1, type); + shared_ptr first_input, second_input; first_input = eltwise1; if (both_inputs_32bits) { - auto input2 = std::make_shared(ngraph::element::f32, m_input_shape); + auto input2 = make_shared(f32, m_input_shape); params.push_back(input2); - auto const_input2 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1}); - auto eltwise2 = std::make_shared(input2, const_input2, type); + auto const_input2 = Constant::create(f32, m_input_shape, {1}); + auto eltwise2 = make_shared(input2, const_input2, type); second_input = eltwise2; } else { - auto const_input2 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1}); + auto const_input2 = Constant::create(f32, m_input_shape, {1}); second_input = const_input2; } if (type == ELTWISE_TYPE::Sum && !m_low_precision && both_inputs_32bits) { - auto identity = std::make_shared(eltwise1); + auto identity = make_shared(eltwise1); first_input = identity; } else if (type == ELTWISE_TYPE::Prod || m_low_precision) { - auto identity = std::make_shared(eltwise1); + auto identity = make_shared(eltwise1); first_input = identity; if (both_inputs_32bits) { - auto identity = std::make_shared(eltwise1); + auto identity = make_shared(eltwise1); second_input = identity; } } - auto eltwise3 = std::make_shared(first_input, second_input, type); + auto eltwise3 = make_shared(first_input, second_input, type); - auto result = std::make_shared(eltwise3); - m_ref_func = - std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{params}); + auto result = make_shared(eltwise3); + m_ref_func = make_shared(ResultVector{result}, ParameterVector{params}); } } }; @@ -303,74 +310,72 @@ class InsertIdentityLayerEltwiseMultipleOutputTest : public InsertIdentityLayerE void SetUp() override { ELTWISE_TYPE type; bool both_inputs_32bits; - std::tie(type, m_low_precision, both_inputs_32bits) = this->GetParam(); + tie(type, m_low_precision, both_inputs_32bits) = this->GetParam(); InsertIdentityLayerTest::SetUp(); { - ngraph::ParameterVector params; - auto input1 = std::make_shared(ngraph::element::f32, m_input_shape); + ParameterVector params; + auto input1 = make_shared(f32, m_input_shape); params.push_back(input1); - auto const_input1 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1}); - auto eltwise1 = std::make_shared(input1, const_input1, type); - std::shared_ptr second_input; + auto const_input1 = Constant::create(f32, m_input_shape, {1}); + auto eltwise1 = make_shared(input1, const_input1, type); + shared_ptr second_input; if (both_inputs_32bits) { - auto input2 = std::make_shared(ngraph::element::f32, m_input_shape); + auto input2 = make_shared(f32, m_input_shape); params.push_back(input2); - auto const_input2 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1}); - auto eltwise2 = std::make_shared(input2, const_input2, type); + auto const_input2 = Constant::create(f32, m_input_shape, {1}); + auto eltwise2 = make_shared(input2, const_input2, type); second_input = eltwise2; } else { - auto const_input2 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1}); + auto const_input2 = Constant::create(f32, m_input_shape, {1}); second_input = const_input2; } - auto relu = std::make_shared(eltwise1); - auto eltwise3 = std::make_shared(eltwise1, second_input, type); + auto relu = make_shared(eltwise1); + auto eltwise3 = make_shared(eltwise1, second_input, type); - auto result1 = std::make_shared(relu); - auto result2 = std::make_shared(eltwise3); - m_func = std::make_shared(ngraph::ResultVector{result1, result2}, - ngraph::ParameterVector{params}); + auto result1 = make_shared(relu); + auto result2 = make_shared(eltwise3); + m_func = make_shared(ResultVector{result1, result2}, ParameterVector{params}); } { - ngraph::ParameterVector params; - auto input1 = std::make_shared(ngraph::element::f32, m_input_shape); + ParameterVector params; + auto input1 = make_shared(f32, m_input_shape); params.push_back(input1); - auto const_input1 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1}); - auto eltwise1 = std::make_shared(input1, const_input1, type); - std::shared_ptr first_input, second_input; + auto const_input1 = Constant::create(f32, m_input_shape, {1}); + auto eltwise1 = make_shared(input1, const_input1, type); + shared_ptr first_input, second_input; first_input = eltwise1; if (both_inputs_32bits) { - auto input2 = std::make_shared(ngraph::element::f32, m_input_shape); + auto input2 = make_shared(f32, m_input_shape); params.push_back(input2); - auto const_input2 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1}); - auto eltwise2 = std::make_shared(input2, const_input2, type); + auto const_input2 = Constant::create(f32, m_input_shape, {1}); + auto eltwise2 = make_shared(input2, const_input2, type); second_input = eltwise2; } else { - auto const_input2 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1}); + auto const_input2 = Constant::create(f32, m_input_shape, {1}); second_input = const_input2; } if (type == ELTWISE_TYPE::Sum && !m_low_precision && both_inputs_32bits) { - auto identity = std::make_shared(eltwise1); + auto identity = make_shared(eltwise1); first_input = identity; } else if (type == ELTWISE_TYPE::Prod || m_low_precision) { - auto identity = std::make_shared(eltwise1); + auto identity = make_shared(eltwise1); first_input = identity; if (both_inputs_32bits) { - auto identity = std::make_shared(eltwise1); + auto identity = make_shared(eltwise1); second_input = identity; } } - auto relu = std::make_shared(first_input); - auto eltwise3 = std::make_shared(first_input, second_input, type); + auto relu = make_shared(first_input); + auto eltwise3 = make_shared(first_input, second_input, type); - auto result1 = std::make_shared(relu); - auto result2 = std::make_shared(eltwise3); - m_ref_func = std::make_shared(ngraph::ResultVector{result1, result2}, - ngraph::ParameterVector{params}); + auto result1 = make_shared(relu); + auto result2 = make_shared(eltwise3); + m_ref_func = make_shared(ResultVector{result1, result2}, ParameterVector{params}); } } }; @@ -394,55 +399,50 @@ class InsertIdentityLayerEltwiseFQTest : public InsertIdentityLayerEltwiseTest { void SetUp() override { ELTWISE_TYPE type; bool both_inputs_32bits; - std::tie(type, m_low_precision, both_inputs_32bits) = this->GetParam(); + tie(type, m_low_precision, both_inputs_32bits) = this->GetParam(); InsertIdentityLayerTest::SetUp(); - auto add_fake_quantize = [&](const std::shared_ptr& node) { - auto levels = (m_low_precision) ? std::numeric_limits::max() : std::numeric_limits::max(); - auto input_low = ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {1}); - auto input_high = ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {5}); - auto output_low = ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0}); - auto output_high = ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {10}); - return std::make_shared(node, - input_low, - input_high, - output_low, - output_high, - levels); + auto add_fake_quantize = [&](const shared_ptr& node) { + auto levels = (m_low_precision) ? numeric_limits::max() : numeric_limits::max(); + auto input_low = Constant::create(i64, Shape{1}, {1}); + auto input_high = Constant::create(i64, Shape{1}, {5}); + auto output_low = Constant::create(i64, Shape{1}, {0}); + auto output_high = Constant::create(i64, Shape{1}, {10}); + return make_shared(node, input_low, input_high, output_low, output_high, levels); }; { - ngraph::ParameterVector params; - auto input1 = std::make_shared(ngraph::element::f32, m_input_shape); + ParameterVector params; + auto input1 = make_shared(f32, m_input_shape); params.push_back(input1); auto input1_fq = add_fake_quantize(input1); - auto const_input1 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1}); + auto const_input1 = Constant::create(f32, m_input_shape, {1}); auto const_input1_fq = add_fake_quantize(const_input1); - auto eltwise1 = std::make_shared(input1_fq, const_input1_fq, type); + auto eltwise1 = make_shared(input1_fq, const_input1_fq, type); auto eltwise1_fq = add_fake_quantize(eltwise1); - std::shared_ptr second_input; + shared_ptr second_input; if (both_inputs_32bits) { - auto input2 = std::make_shared(ngraph::element::f32, m_input_shape); + auto input2 = make_shared(f32, m_input_shape); params.push_back(input2); auto input2_fq = add_fake_quantize(input2); - auto const_input2 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1}); + auto const_input2 = Constant::create(f32, m_input_shape, {1}); auto const_input2_fq = add_fake_quantize(const_input2); - auto eltwise2 = std::make_shared(input2_fq, const_input2_fq, type); + auto eltwise2 = make_shared(input2_fq, const_input2_fq, type); auto eltwise2_fq = add_fake_quantize(eltwise2); second_input = eltwise2_fq; } else { - auto const_input2 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1}); + auto const_input2 = Constant::create(f32, m_input_shape, {1}); auto const_input2_fq = add_fake_quantize(const_input2); second_input = const_input2_fq; } - auto eltwise3 = std::make_shared(eltwise1_fq, second_input, type); + auto eltwise3 = make_shared(eltwise1_fq, second_input, type); auto eltwise3_fq = add_fake_quantize(eltwise3); - auto result = std::make_shared(eltwise3_fq); - m_func = std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{params}); + auto result = make_shared(eltwise3_fq); + m_func = make_shared(ResultVector{result}, ParameterVector{params}); } { m_ref_func = m_func->clone(); } @@ -463,20 +463,20 @@ INSTANTIATE_TEST_SUITE_P(TransformationTests, /***************************************************** Convolution layer tests * *****************************************************/ -typedef std::tuple +typedef tuple InsertIdentityConvTestParams; class InsertIdentityLayerConvMatMulTest : public InsertIdentityLayerTest, public ::testing::WithParamInterface { public: - static std::string getTestCaseName(const testing::TestParamInfo& obj) { + static string getTestCaseName(const testing::TestParamInfo& obj) { bool with_pool, with_act, swap_matmul; - std::tie(with_pool, with_act, swap_matmul) = obj.param; + tie(with_pool, with_act, swap_matmul) = obj.param; - std::ostringstream result; + ostringstream result; result << "with_pool=" << with_pool; result << "_with_act=" << with_act; result << "_swap_matmul=" << swap_matmul; @@ -485,85 +485,74 @@ class InsertIdentityLayerConvMatMulTest : public InsertIdentityLayerTest, } void SetUp() override { bool with_pool, with_act, swap_matmul; - std::tie(with_pool, with_act, swap_matmul) = this->GetParam(); + tie(with_pool, with_act, swap_matmul) = this->GetParam(); InsertIdentityLayerTest::SetUp(); m_input_shape = {1, 3, 1, 64}; - auto reshape_shape = ngraph::Shape{3, 64}; + auto reshape_shape = Shape{3, 64}; { - std::shared_ptr last_node; - auto input = std::make_shared(ngraph::element::f32, m_input_shape); - auto weights = ngraph::opset9::Constant::create(ngraph::element::f32, ngraph::Shape{3, 3, 1, 2}, {1}); - auto conv = std::make_shared(input, - weights, - ngraph::Strides{1, 1}, - ngraph::CoordinateDiff{0, 0}, - ngraph::CoordinateDiff{0, 1}, - ngraph::Strides{1, 1}); + shared_ptr last_node; + auto input = make_shared(f32, m_input_shape); + auto weights = Constant::create(f32, Shape{3, 3, 1, 2}, {1}); + auto conv = make_shared(input, + weights, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 1}, + Strides{1, 1}); last_node = conv; if (with_pool) { - auto max_pool = std::make_shared(last_node, - ngraph::Strides{1, 1}, - ngraph::Shape{0, 0}, - ngraph::Shape{0, 1}, - ngraph::Shape{1, 2}); + auto max_pool = + make_shared(last_node, Strides{1, 1}, Shape{0, 0}, Shape{0, 1}, Shape{1, 2}); last_node = max_pool; } if (with_act) { - auto relu = std::make_shared(last_node); + auto relu = make_shared(last_node); last_node = relu; } - auto reshape_const = ngraph::opset9::Constant::create(ngraph::element::i64, - ngraph::Shape{reshape_shape.size()}, - reshape_shape); - auto reshape = std::make_shared(last_node, reshape_const, false); - auto matmul_const = ngraph::opset9::Constant::create(ngraph::element::f32, {64, 3}, {1.2}); - auto matmul = swap_matmul ? std::make_shared(matmul_const, reshape) - : std::make_shared(reshape, matmul_const); - - auto result = std::make_shared(matmul); - m_func = std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input}); + auto reshape_const = Constant::create(i64, Shape{reshape_shape.size()}, reshape_shape); + auto reshape = make_shared(last_node, reshape_const, false); + auto matmul_const = Constant::create(f32, {64, 3}, {1.2}); + auto matmul = + swap_matmul ? make_shared(matmul_const, reshape) : make_shared(reshape, matmul_const); + + auto result = make_shared(matmul); + m_func = make_shared(ResultVector{result}, ParameterVector{input}); } { - std::shared_ptr last_node; - auto input = std::make_shared(ngraph::element::f32, m_input_shape); - auto weights = ngraph::opset9::Constant::create(ngraph::element::f32, ngraph::Shape{3, 3, 1, 2}, {1}); - auto conv = std::make_shared(input, - weights, - ngraph::Strides{1, 1}, - ngraph::CoordinateDiff{0, 0}, - ngraph::CoordinateDiff{0, 1}, - ngraph::Strides{1, 1}); + shared_ptr last_node; + auto input = make_shared(f32, m_input_shape); + auto weights = Constant::create(f32, Shape{3, 3, 1, 2}, {1}); + auto conv = make_shared(input, + weights, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 1}, + Strides{1, 1}); last_node = conv; if (with_pool) { - auto max_pool = std::make_shared(last_node, - ngraph::Strides{1, 1}, - ngraph::Shape{0, 0}, - ngraph::Shape{0, 1}, - ngraph::Shape{1, 2}); + auto max_pool = + make_shared(last_node, Strides{1, 1}, Shape{0, 0}, Shape{0, 1}, Shape{1, 2}); last_node = max_pool; } if (with_act) { - auto relu = std::make_shared(last_node); + auto relu = make_shared(last_node); last_node = relu; } else { - auto identity = std::make_shared(last_node); + auto identity = make_shared(last_node); last_node = identity; } - auto reshape_const = ngraph::opset9::Constant::create(ngraph::element::i64, - ngraph::Shape{reshape_shape.size()}, - reshape_shape); - auto reshape = std::make_shared(last_node, reshape_const, false); - auto matmul_const = ngraph::opset9::Constant::create(ngraph::element::f32, {64, 3}, {1.2}); - auto matmul = swap_matmul ? std::make_shared(matmul_const, reshape) - : std::make_shared(reshape, matmul_const); - - auto result = std::make_shared(matmul); - m_ref_func = - std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input}); + auto reshape_const = Constant::create(i64, Shape{reshape_shape.size()}, reshape_shape); + auto reshape = make_shared(last_node, reshape_const, false); + auto matmul_const = Constant::create(f32, {64, 3}, {1.2}); + auto matmul = + swap_matmul ? make_shared(matmul_const, reshape) : make_shared(reshape, matmul_const); + + auto result = make_shared(matmul); + m_ref_func = make_shared(ResultVector{result}, ParameterVector{input}); } } }; @@ -587,32 +576,30 @@ class InsertIdentityLayerResultTest : public InsertIdentityLayerTest { void SetUp() override { InsertIdentityLayerTest::SetUp(); { - auto params = std::make_shared(ngraph::element::f32, m_input_shape); - auto const_add = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1}); - auto add = std::make_shared(params, const_add); - auto relu = std::make_shared(add); - auto result1 = std::make_shared(add); - auto result2 = std::make_shared(relu); - m_func = std::make_shared(ngraph::ResultVector{result1, result2}, - ngraph::ParameterVector{params}); + auto params = make_shared(f32, m_input_shape); + auto const_add = Constant::create(f32, m_input_shape, {1}); + auto add = make_shared(params, const_add); + auto relu = make_shared(add); + auto result1 = make_shared(add); + auto result2 = make_shared(relu); + m_func = make_shared(ResultVector{result1, result2}, ParameterVector{params}); } { - auto params = std::make_shared(ngraph::element::f32, m_input_shape); - auto const_add = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1}); - auto add = std::make_shared(params, const_add); - auto identity = std::make_shared(add); - auto relu = std::make_shared(add); - auto result1 = std::make_shared(identity); - auto result2 = std::make_shared(relu); - m_ref_func = std::make_shared(ngraph::ResultVector{result1, result2}, - ngraph::ParameterVector{params}); + auto params = make_shared(f32, m_input_shape); + auto const_add = Constant::create(f32, m_input_shape, {1}); + auto add = make_shared(params, const_add); + auto identity = make_shared(add); + auto relu = make_shared(add); + auto result1 = make_shared(identity); + auto result2 = make_shared(relu); + m_ref_func = make_shared(ResultVector{result1, result2}, ParameterVector{params}); } } void Validate() override { - ngraph::pass::Manager m; - m.register_pass(); - m.register_pass(); + Manager m; + m.register_pass(); + m.register_pass(); m.run_passes(m_func); ASSERT_NO_THROW(check_rt_info(m_func)); @@ -624,4 +611,66 @@ class InsertIdentityLayerResultTest : public InsertIdentityLayerTest { TEST_F(InsertIdentityLayerResultTest, CompareWithRefs) { Run(); } + +class InsertIdentityForNonQuantizableConcatInputTest : public InsertIdentityLayerTest { + string getName() { + return "InsertIdentityForPrecAgnosticConcatInput"; + } + + shared_ptr create_fq(const Type& type, + const shared_ptr& node, + float fq_min, + float fq_max, + std::size_t levels) { + // + auto fq_inp_min = makeConstant(type, {1}, {fq_min}); + auto fq_inp_max = makeConstant(type, {1}, {fq_max}); + auto fq_out_min = makeConstant(type, {1}, {fq_min}); + auto fq_out_max = makeConstant(type, {1}, {fq_max}); + return make_shared(node, fq_inp_min, fq_inp_max, fq_out_min, fq_out_max, levels); + } + +public: + void SetUp() override { + InsertIdentityLayerTest::SetUp(); + { + auto inputs = makeParams(f32, {{m_input_shape}, {m_input_shape}}); + auto fq = create_fq(f32, inputs[0], -1, 1, 256); + auto relu = make_shared(fq); + auto reshape_const = make_shared(i64, Shape{1}, m_input_shape); + auto reshape = make_shared(inputs[1], reshape_const, false); + auto concat = makeConcat({relu, reshape}, 0); + auto result = make_shared(concat); + m_func = make_shared(result, inputs, getName()); + } + + { + auto inputs = makeParams(f32, {{m_input_shape}, {m_input_shape}}); + auto fq = create_fq(f32, inputs[0], -1, 1, 256); + auto relu = make_shared(fq); + auto reshape_const = make_shared(i64, Shape{1}, m_input_shape); + auto reshape = make_shared(inputs[1], reshape_const, false); + // We expect the following Identity layer to be inserted + auto identity = make_shared(reshape); + auto concat = makeConcat({relu, identity}, 0); + auto result = make_shared(concat); + m_ref_func = make_shared(result, inputs, getName()); + } + } + + void Validate() override { + Manager m; + m.register_pass(); + m.register_pass(); + m.run_passes(m_func); + ASSERT_NO_THROW(check_rt_info(m_func)); + + auto result = compare_functions(m_func, m_ref_func); + ASSERT_TRUE(result.first); + } +}; + +TEST_F(InsertIdentityForNonQuantizableConcatInputTest, CompareWithRefs) { + Run(); +} } // namespace testing From 88703905f4ec0cd6a346bd7383a3c9b23fcc04bf Mon Sep 17 00:00:00 2001 From: Nikolay Shchegolev Date: Tue, 11 Jul 2023 13:42:53 +0400 Subject: [PATCH 20/21] [CPU] Leftovers for Extension convert I64/U64 to I32. (#16941) * [CPU] Leftovers for Extension convert I64/U64 to I32. * Fix as per comments * Fixes as per commits 2 * Fixes as per commits 3 * Revert "Fixes as per commits 3" This reverts commit 84022be66f64faccd96a32da5611db0e5137d049. * Some fixes * Test fix --- src/plugins/intel_cpu/src/graph.cpp | 48 +++--- ...cpp => insert_convert_after_extension.cpp} | 17 +- .../pass/insert_convert_after_extension.hpp | 22 +++ .../common/pass/ref_convert_i64_i32.hpp | 21 --- .../transformation_pipeline.cpp | 8 +- .../src/custom_op_insert_convert_i64.cpp | 150 ++++++++++++++++++ 6 files changed, 211 insertions(+), 55 deletions(-) rename src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/{ref_convert_i64_i32.cpp => insert_convert_after_extension.cpp} (73%) create mode 100644 src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/insert_convert_after_extension.hpp delete mode 100644 src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/ref_convert_i64_i32.hpp create mode 100644 src/plugins/intel_cpu/tests/functional/subgraph_tests/src/custom_op_insert_convert_i64.cpp diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 185f3384c9c758..b65b582e4384b2 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -44,9 +44,9 @@ #include "utils/verbose.h" #include "memory_desc/cpu_memory_desc_utils.h" -#include -#include -#include +#include +#include +#include #include #include #include "memory_desc/dnnl_blocked_memory_desc.h" @@ -114,7 +114,7 @@ void Graph::CreateGraph(const std::vector &graphNodes, CPU_DEBUG_CAP_ENABLE(serialize(*this)); } -template void Graph::CreateGraph(const std::shared_ptr&, const GraphContext::CPtr); +template void Graph::CreateGraph(const std::shared_ptr&, const GraphContext::CPtr); template void Graph::CreateGraph(const CNNNetwork&, const GraphContext::CPtr); void Graph::Replicate(const std::shared_ptr &subgraph) { @@ -126,9 +126,9 @@ void Graph::Replicate(const std::shared_ptr &subgraph) { // nodes which has no consumers (output or just unused). But doesn't marked as graph output. // Will be stored as fake output separately. - std::deque> unusedOutputs; + std::deque> unusedOutputs; - auto getParentOutputPort = [](const std::shared_ptr childOp, const std::shared_ptr parentOp, + auto getParentOutputPort = [](const std::shared_ptr childOp, const std::shared_ptr parentOp, const size_t childInputPort) -> int { for (size_t parentPort = 0; parentPort < parentOp->get_output_size(); parentPort++) { if (childOp->input(childInputPort).get_tensor_ptr() == parentOp->output(parentPort).get_tensor_ptr()) { @@ -144,13 +144,13 @@ void Graph::Replicate(const std::shared_ptr &subgraph) { graphNodes.push_back(node); - if (op->get_type_info() == ngraph::op::v0::Parameter::get_type_info_static()) { + if (op->get_type_info() == op::v0::Parameter::get_type_info_static()) { inputNodesMap[node->getName()] = node; } - if (op->get_type_info() == ngraph::op::v0::Result::get_type_info_static()) { + if (op->get_type_info() == op::v0::Result::get_type_info_static()) { const auto prev = op->input_value(0); - const std::string inputID = ov::op::util::get_ie_output_name(prev); + const std::string inputID = op::util::get_ie_output_name(prev); outputNodesMap[inputID] = node; } @@ -167,9 +167,9 @@ void Graph::Replicate(const std::shared_ptr &subgraph) { } if (!one_of(op->get_type_info(), - ngraph::op::v0::Result::get_type_info_static(), - ngraph::op::v3::Assign::get_type_info_static(), - ngraph::op::v6::Assign::get_type_info_static())) { + op::v0::Result::get_type_info_static(), + op::v3::Assign::get_type_info_static(), + op::v6::Assign::get_type_info_static())) { for (size_t oi = 0; oi < op->get_output_size(); oi++) { if (op->get_output_target_inputs(oi).empty()) { unusedOutputs.push_back(op->output(oi)); @@ -211,11 +211,11 @@ void Graph::Replicate(const CNNNetwork &network) { auto orderedOps = func->get_ordered_ops(); - // TODO [NM]: unordered_map is preferred from performance perspective. Needs hash for ngraph::Node - std::map, NodePtr> op2node; - std::deque> unusedOutputs; // nodes which has no consumers (output or just unused) + // TODO [NM]: unordered_map is preferred from performance perspective. Needs hash for ov::Node + std::map, NodePtr> op2node; + std::deque> unusedOutputs; // nodes which has no consumers (output or just unused) - auto getParentOutputPort = [](const std::shared_ptr childOp, const std::shared_ptr parentOp, + auto getParentOutputPort = [](const std::shared_ptr childOp, const std::shared_ptr parentOp, const size_t childInputPort) -> int { for (size_t parentPort = 0; parentPort < parentOp->get_output_size(); parentPort++) { if (childOp->input(childInputPort).get_tensor_ptr() == parentOp->output(parentPort).get_tensor_ptr()) { @@ -234,7 +234,7 @@ void Graph::Replicate(const CNNNetwork &network) { graphNodes.push_back(node); - if (op->get_type_info() == ngraph::op::v0::Parameter::get_type_info_static()) { + if (op->get_type_info() == op::v0::Parameter::get_type_info_static()) { const auto inInfo = inputsInfo.find(node->getName()); if (inInfo != inputsInfo.end()) { inputNodesMap[node->getName()] = node; @@ -244,9 +244,9 @@ void Graph::Replicate(const CNNNetwork &network) { } } - if (op->get_type_info() == ngraph::op::v0::Result::get_type_info_static()) { + if (op->get_type_info() == op::v0::Result::get_type_info_static()) { const auto &input = op->input_value(0); - const auto name = ov::op::util::get_ie_output_name(input); + const auto name = op::util::get_ie_output_name(input); if (outputsInfo.count(name) != 0) { outputNodesMap[name] = node; @@ -265,9 +265,9 @@ void Graph::Replicate(const CNNNetwork &network) { } if (!one_of(op->get_type_info(), - ngraph::op::v0::Result::get_type_info_static(), - ngraph::op::v3::Assign::get_type_info_static(), - ngraph::op::v6::Assign::get_type_info_static())) { + op::v0::Result::get_type_info_static(), + op::v3::Assign::get_type_info_static(), + op::v6::Assign::get_type_info_static())) { for (size_t oi = 0; oi < op->get_output_size(); oi++) { if (op->get_output_target_inputs(oi).empty()) { unusedOutputs.push_back(op->output(oi)); @@ -1708,7 +1708,7 @@ void Graph::EnforceInferencePrecision() { // starting from output nodes for (const auto& entry : outputNodesMap) { const auto& node = entry.second; - if (node->getOriginalInputPrecisionAtPort(0) == inferPrec) + if (node->getOriginalInputPrecisionAtPort(0) == Precision::BF16) continue; searchForNodesToSkip(node, nodesToSkip); } @@ -1749,7 +1749,7 @@ void Graph::EnforceInferencePrecision() { } } -std::shared_ptr Graph::dump() const { +std::shared_ptr Graph::dump() const { return dump_graph_as_ie_ngraph_net(*this); } diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/ref_convert_i64_i32.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/insert_convert_after_extension.cpp similarity index 73% rename from src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/ref_convert_i64_i32.cpp rename to src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/insert_convert_after_extension.cpp index 66295921faf475..c388da638ede5e 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/ref_convert_i64_i32.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/insert_convert_after_extension.cpp @@ -2,16 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "insert_convert_after_extension.hpp" -#include "ref_convert_i64_i32.hpp" -#include +#include #include "cpu_types.h" -#include - #include "itt.hpp" +#include -ov::pass::RefConvertI64ToI32::RefConvertI64ToI32() { - MATCHER_SCOPE(RefConvertI64ToI32); +ov::pass::InsertConvertAfterExtension::InsertConvertAfterExtension() { + MATCHER_SCOPE(InsertConvertAfterExtension); auto i64_extension = [](const ov::Output& output) -> bool { auto node = output.get_node_shared_ptr(); @@ -27,13 +26,17 @@ ov::pass::RefConvertI64ToI32::RefConvertI64ToI32() { for (auto& output : ref->outputs()) { if (output.get_element_type() == ov::element::i64 || output.get_element_type() == ov::element::u64) { auto targetInputs = output.get_target_inputs(); - auto convert = std::make_shared(output, ov::element::i32); + auto convert = std::make_shared(output, ov::element::i32); for (const auto& targetInput : targetInputs) { targetInput.replace_source_output(convert); } auto& convertTensor = convert->output(0).get_tensor(); + + auto legacy_name = op::util::create_ie_output_name(output); + descriptor::set_ov_tensor_legacy_name(convertTensor, legacy_name); + if (!output.get_names().empty()) { convertTensor.set_names(output.get_names()); } diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/insert_convert_after_extension.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/insert_convert_after_extension.hpp new file mode 100644 index 00000000000000..14c5cb72aff1e3 --- /dev/null +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/insert_convert_after_extension.hpp @@ -0,0 +1,22 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +namespace ov { +namespace pass { + +// This pass inserts explicit Convert on Extension operation outputs for hard-coded list of precisions. +// Supported cases: I64/U64 -> I32. + +class InsertConvertAfterExtension: public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("InsertConvertAfterExtension", "0"); + InsertConvertAfterExtension(); +}; + +} // namespace pass +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/ref_convert_i64_i32.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/ref_convert_i64_i32.hpp deleted file mode 100644 index 271c4a0a42d409..00000000000000 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/ref_convert_i64_i32.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright (C) 2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include - -namespace ov { -namespace pass { - -// This pass inserts Convert node from i64 to i32 for Reference nodes. - -class RefConvertI64ToI32: public ov::pass::MatcherPass { -public: - OPENVINO_RTTI("RefConvertI64ToI32", "0"); - RefConvertI64ToI32(); -}; - -} // namespace pass -} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 51a078e18feb9c..209c3ca316725b 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -104,8 +104,8 @@ #include "transformations/cpu_opset/arm/pass/mish_decomposition.hpp" #include "transformations/cpu_opset/common/pass/decompose_integer_divide.hpp" #include "transformations/cpu_opset/common/pass/convert_fq_rnn_to_quantized_rnn.hpp" +#include "transformations/cpu_opset/common/pass/insert_convert_after_extension.hpp" #include "transformations/cpu_opset/common/pass/move_eltwise_up_data_movement.hpp" -#include "transformations/cpu_opset/common/pass/ref_convert_i64_i32.hpp" #include "transformations/cpu_opset/common/pass/swap_convert_transpose.hpp" // Snippets @@ -260,8 +260,10 @@ void Transformations::PreLpt(const std::vector& defaultPrecis CPU_REGISTER_PASS_COMMON(manager, ngraph::pass::low_precision::ConvertSubtractConstant, defaultPrecisions); } CPU_REGISTER_PASS_COMMON(manager, ov::pass::Validate); - CPU_REGISTER_PASS_COMMON(manager, ov::pass::RefConvertI64ToI32); - + // Common ConvertPrecision pass handles only a limited set of opevino operations to match the list of precisions supported by the plugin. + // However, if the extension operation produces an output precision that is not natively supported, this may lead to inconsistency during + // element type propagation. This transformation is called before the ConvertPrecision pass to align the actual precisions with the list of supported ones. + CPU_REGISTER_PASS_COMMON(manager, ov::pass::InsertConvertAfterExtension); CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConvertPrecision, precisions, type_to_fuse); CPU_REGISTER_PASS_COMMON(manager, ov::pass::EliminateConvert); diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/custom_op_insert_convert_i64.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/custom_op_insert_convert_i64.cpp new file mode 100644 index 00000000000000..551081d47f38ef --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/custom_op_insert_convert_i64.cpp @@ -0,0 +1,150 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include +#include "test_utils/cpu_test_utils.hpp" + +using namespace ov::test; +using namespace CPUTestUtils; + +namespace CPULayerTestsDefinitions { +using CustomOpI64CPUTestParams = std::tuple; + +class CustomOpI64 : public ov::op::Op { +public: + OPENVINO_OP("CustomOpI64"); + + CustomOpI64() = default; + CustomOpI64(const ov::OutputVector& args) : Op(args) { + constructor_validate_and_infer_types(); + } + + void validate_and_infer_types() override { + const auto& inputs_count = input_values().size(); + OPENVINO_ASSERT(inputs_count == 1, + "Input count must be 1, Got: ", + inputs_count); + OPENVINO_ASSERT(get_input_element_type(0) == ov::element::Type_t::i32, + "The input must be i32."); + set_output_size(2); + + auto inShape = get_input_partial_shape(0); + + set_output_type(0, ov::element::Type_t::i64, inShape); + set_output_type(1, ov::element::Type_t::i32, inShape); + } + + std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override { + OPENVINO_ASSERT(new_args.size() == 1, "Incorrect number of new arguments"); + + return std::make_shared(new_args); + } + + bool visit_attributes(ov::AttributeVisitor& visitor) override { + return true; + } + + bool evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const override { + const auto& in = inputs[0]; + auto& out0 = outputs[0]; + auto& out1 = outputs[1]; + + auto inData = in.data(); + + auto outData0 = out0.data(); + for (size_t i = 0lu; i < in.get_size(); i++) { + outData0[i] = static_cast(inData[i]); + } + + memcpy(out1.data(), inData, out1.get_byte_size()); + + return true; + } + + bool evaluate(ov::TensorVector& output_values, + const ov::TensorVector& input_values, + const ov::EvaluationContext& evaluationContext) const override { + return evaluate(output_values, input_values); + } + + bool has_evaluate() const override { + return true; + } +}; + +class CustomOpConvertI64CPUTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest, + public CPUTestsBase { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + ElementType inType; + InputShape inputShape; + std::tie(inType, inputShape) = obj.param; + + std::ostringstream result; + result << "IS=" << inputShape << "_"; + result << "Prc=" << inType; + return result.str(); + } + +protected: + void SetUp() override { + targetDevice = CommonTestUtils::DEVICE_CPU; + + ElementType inType; + InputShape inputShape; + std::tie(inType, inputShape) = this->GetParam(); + + init_input_shapes({inputShape}); + auto inputParams = ngraph::builder::makeDynamicParams(inType, inputDynamicShapes); + auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(inputParams)); + auto customOp = std::make_shared(paramOuts); + + ov::ResultVector results{std::make_shared(customOp)}; + function = std::make_shared(results, inputParams, "customOpTest"); + } + + void generate_inputs(const std::vector& targetInputStaticShapes) override { + inputs.clear(); + const auto& funcInputs = function->inputs(); + for (size_t i = 0; i < funcInputs.size(); ++i) { + const auto& funcInput = funcInputs[i]; + auto tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]); + inputs.insert({funcInput.get_node_shared_ptr(), tensor}); + } + } + + void compare(const std::vector& expected, const std::vector& actual) override { + ASSERT_EQ(expected.size(), actual.size()); + ASSERT_EQ(expected.size(), function->get_results().size()); + const auto& results = function->get_results(); + for (size_t j = 0; j < results.size(); j++) { + const auto result = results[j]; + for (size_t i = 0; i < result->get_input_size(); ++i) { + ov::test::utils::compare(expected[j], actual[j], abs_threshold, rel_threshold); + } + } + } +}; + +TEST_P(CustomOpConvertI64CPUTest, CompareWithRefs) { + run(); + // TODO: Graph could not be dumped with int64 for now. Swith on this in scope of int64 enabling. + // CPUTestUtils::CheckNumberOfNodesWithType(compiledModel, "Convert", 1); +} + +const InputShape inputShapes = { + {}, {{2, 3, 64}} +}; + +INSTANTIATE_TEST_SUITE_P(smoke_CustomOp, + CustomOpConvertI64CPUTest, + ::testing::Combine(::testing::Values(ElementType::i32), ::testing::Values(inputShapes)), + CustomOpConvertI64CPUTest::getTestCaseName); + +} // namespace CPULayerTestsDefinitions From 56f51135d45cf3e087227ac21261948f558ff5cb Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Tue, 11 Jul 2023 12:41:24 +0200 Subject: [PATCH 21/21] Grey preprocessing yuv nv12 i420 (#18239) * Add two plane YOV to Grey conversion * Add i420 to grey conversion * Add yuv to grey conversion for GPU * Fix cmakes * Remove static from local function * Remove opencv dependency from tests * Put grey_from_yuv_single_plane into namespace --- .../src/preprocess/preprocess_steps_impl.cpp | 29 ++ .../preprocessing/yuv_to_grey_tests.cpp | 14 + .../preprocess/yuv_to_grey_tests.cpp | 14 + .../preprocessing/yuv_to_grey_tests.cpp | 14 + .../preprocessing/yuv_to_grey_tests.hpp | 36 +++ .../src/preprocessing/yuv_to_grey_tests.cpp | 263 ++++++++++++++++++ 6 files changed, 370 insertions(+) create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/preprocessing/yuv_to_grey_tests.cpp create mode 100644 src/plugins/intel_gpu/tests/functional/shared_tests_instances/preprocess/yuv_to_grey_tests.cpp create mode 100644 src/plugins/template/tests/functional/preprocessing/yuv_to_grey_tests.cpp create mode 100644 src/tests/functional/plugin/shared/include/preprocessing/yuv_to_grey_tests.hpp create mode 100644 src/tests/functional/plugin/shared/src/preprocessing/yuv_to_grey_tests.cpp diff --git a/src/core/src/preprocess/preprocess_steps_impl.cpp b/src/core/src/preprocess/preprocess_steps_impl.cpp index d6ca402bc14b5f..e1ab465863b238 100644 --- a/src/core/src/preprocess/preprocess_steps_impl.cpp +++ b/src/core/src/preprocess/preprocess_steps_impl.cpp @@ -51,6 +51,23 @@ static std::string vector_to_string(const std::vector& values) { s << ")"; return s.str(); } +namespace { +std::shared_ptr grey_from_yuv_single_plane(const std::vector>& nodes) { + using namespace ov::opset8; + const auto axis = Constant::create(element::i32, {1}, {1}); + const auto yuv_shape_of = std::make_shared(nodes[0]); + const auto get_height = std::make_shared(yuv_shape_of, axis, Constant::create(element::i32, {}, {0})); + + const auto start = Constant::create(element::i32, {1}, {0}); + // slice stop is input height * (2/3) + auto mul_height = + std::make_shared(get_height, Constant::create(get_height->get_element_type(), {1}, {2})); + auto stop = std::make_shared(mul_height, Constant::create(get_height->get_element_type(), {1}, {3})); + const auto step = Constant::create(element::i32, {1}, {1}); + // + return std::make_shared(nodes[0], start, stop, step, axis); +} +} // namespace void PreStepsList::add_scale_impl(const std::vector& values) { m_actions.emplace_back( @@ -361,6 +378,9 @@ void PreStepsList::add_convert_color_impl(const ColorFormat& dst_format) { case ColorFormat::BGR: convert = std::make_shared(nodes[0]); break; + case ColorFormat::GRAY: + convert = grey_from_yuv_single_plane(nodes); + break; default: OPENVINO_ASSERT(false, "Unsupported conversion from NV12 to '", @@ -379,6 +399,9 @@ void PreStepsList::add_convert_color_impl(const ColorFormat& dst_format) { case ColorFormat::BGR: convert = std::make_shared(nodes[0], nodes[1]); break; + case ColorFormat::GRAY: + convert = nodes[0].get_node_shared_ptr(); + break; default: OPENVINO_ASSERT(false, "Unsupported conversion from NV12 to '", @@ -398,6 +421,9 @@ void PreStepsList::add_convert_color_impl(const ColorFormat& dst_format) { case ColorFormat::BGR: convert = std::make_shared(nodes[0]); break; + case ColorFormat::GRAY: + convert = grey_from_yuv_single_plane(nodes); + break; default: OPENVINO_ASSERT(false, "Unsupported conversion from I420 to '", @@ -417,6 +443,9 @@ void PreStepsList::add_convert_color_impl(const ColorFormat& dst_format) { case ColorFormat::BGR: convert = std::make_shared(nodes[0], nodes[1], nodes[2]); break; + case ColorFormat::GRAY: + convert = nodes[0].get_node_shared_ptr(); + break; default: OPENVINO_ASSERT(false, "Unsupported conversion from I420 to '", diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/preprocessing/yuv_to_grey_tests.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/preprocessing/yuv_to_grey_tests.cpp new file mode 100644 index 00000000000000..ed2359d52a187d --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/preprocessing/yuv_to_grey_tests.cpp @@ -0,0 +1,14 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "preprocessing/yuv_to_grey_tests.hpp" + +using namespace ov::preprocess; + +INSTANTIATE_TEST_SUITE_P(smoke_Preprocessing, + PreprocessingYUV2GreyTest, + testing::Values(CommonTestUtils::DEVICE_CPU), + PreprocessingYUV2GreyTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/preprocess/yuv_to_grey_tests.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/preprocess/yuv_to_grey_tests.cpp new file mode 100644 index 00000000000000..21bacdb2724b56 --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/preprocess/yuv_to_grey_tests.cpp @@ -0,0 +1,14 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "preprocessing/yuv_to_grey_tests.hpp" + +using namespace ov::preprocess; + +INSTANTIATE_TEST_SUITE_P(smoke_Preprocessing, + PreprocessingYUV2GreyTest, + testing::Values(CommonTestUtils::DEVICE_GPU), + PreprocessingYUV2GreyTest::getTestCaseName); diff --git a/src/plugins/template/tests/functional/preprocessing/yuv_to_grey_tests.cpp b/src/plugins/template/tests/functional/preprocessing/yuv_to_grey_tests.cpp new file mode 100644 index 00000000000000..1a554b6c71dd26 --- /dev/null +++ b/src/plugins/template/tests/functional/preprocessing/yuv_to_grey_tests.cpp @@ -0,0 +1,14 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "preprocessing/yuv_to_grey_tests.hpp" + +using namespace ov::preprocess; + +INSTANTIATE_TEST_SUITE_P(smoke_Preprocessing, + PreprocessingYUV2GreyTest, + testing::Values(CommonTestUtils::DEVICE_TEMPLATE), + PreprocessingYUV2GreyTest::getTestCaseName); diff --git a/src/tests/functional/plugin/shared/include/preprocessing/yuv_to_grey_tests.hpp b/src/tests/functional/plugin/shared/include/preprocessing/yuv_to_grey_tests.hpp new file mode 100644 index 00000000000000..e3a99fcee0ad79 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/preprocessing/yuv_to_grey_tests.hpp @@ -0,0 +1,36 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include + +#include "shared_test_classes/base/ov_subgraph.hpp" + +using TParams = std::tuple; + +namespace ov { +namespace preprocess { +class PreprocessingYUV2GreyTest : public testing::WithParamInterface, public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; + void run() override; + ov::TensorVector calculate_refs() override; + + size_t get_full_height(); + std::shared_ptr build_test_model(const element::Type_t et, const Shape& shape); + void set_test_model_color_conversion(ColorFormat from, ColorFormat to); + + ov::TensorVector ref_out_data; + size_t width, height; + int b_step; +}; + +} // namespace preprocess +} // namespace ov diff --git a/src/tests/functional/plugin/shared/src/preprocessing/yuv_to_grey_tests.cpp b/src/tests/functional/plugin/shared/src/preprocessing/yuv_to_grey_tests.cpp new file mode 100644 index 00000000000000..2e1dea33834fd8 --- /dev/null +++ b/src/tests/functional/plugin/shared/src/preprocessing/yuv_to_grey_tests.cpp @@ -0,0 +1,263 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "preprocessing/yuv_to_grey_tests.hpp" +#include "shared_test_classes/single_layer/convert_color_i420.hpp" +#include "shared_test_classes/single_layer/convert_color_nv12.hpp" + +namespace ov { +namespace preprocess { +std::string PreprocessingYUV2GreyTest::getTestCaseName(const testing::TestParamInfo& obj) { + std::ostringstream result; + result << "device=" << std::get<0>(obj.param); + return result.str(); +} + +void PreprocessingYUV2GreyTest::SetUp() { + const auto& test_params = GetParam(); + targetDevice = std::get<0>(test_params); + + height = width = 64; + b_step = 5; + outType = inType = element::u8; + + inputs.clear(); +} + +void PreprocessingYUV2GreyTest::run() { + compile_model(); + infer(); + validate(); +} + +ov::TensorVector PreprocessingYUV2GreyTest::calculate_refs() { + return ref_out_data; +} + +size_t PreprocessingYUV2GreyTest::get_full_height() { + return height * (255 / b_step + 1); +} + +std::shared_ptr PreprocessingYUV2GreyTest::build_test_model(const element::Type_t et, const Shape& shape) { + const auto input = std::make_shared(et, shape); + const auto zero = op::v0::Constant::create(et, Shape{}, {0.0f}); + const auto op = std::make_shared(input, zero); + const auto res = std::make_shared(op); + return std::make_shared(res, ParameterVector{input}); +} + +void PreprocessingYUV2GreyTest::set_test_model_color_conversion(ColorFormat from, ColorFormat to) { + auto ppp = PrePostProcessor(function); + ppp.input().tensor().set_color_format(from); + ppp.input().preprocess().convert_color(to); + function = ppp.build(); +} + +TEST_P(PreprocessingYUV2GreyTest, convert_single_plane_i420_hardcoded_ref) { + // clang-format off + auto input = std::vector {0x51, 0xeb, 0x51, 0xeb, + 0x51, 0xeb, 0x51, 0xeb, + 0x6d, 0x6d, 0xb8, 0xb8}; + + auto exp_out = std::vector {0x51, 0xeb, 0x51, 0xeb, + 0x51, 0xeb, 0x51, 0xeb}; + // clang-format on + + const auto yuv_input_shape = ov::Shape{1, 6, 2, 1}; + const auto output_shape = ov::Shape{1, 4, 2, 1}; + const auto input_model_shape = output_shape; + + ref_out_data.emplace_back(outType, output_shape, exp_out.data()); + + // Build model and set inputs + function = build_test_model(inType, input_model_shape); + set_test_model_color_conversion(ColorFormat::I420_SINGLE_PLANE, ColorFormat::GRAY); + + const auto& params = function->get_parameters(); + inputs.emplace(params.at(0), ov::Tensor{params.at(0)->get_element_type(), yuv_input_shape, input.data()}); + + run(); +} + +TEST_P(PreprocessingYUV2GreyTest, convert_three_plane_i420_hardcoded_ref) { + // clang-format off + auto input_y = std::vector {0x51, 0xeb, 0x51, 0xeb, + 0x52, 0xeb, 0x51, 0xeb}; + auto input_u = std::vector {0x10, 0x12}; + auto input_v = std::vector {0x21, 0x22}; + + auto exp_out = std::vector {0x51, 0xeb, 0x51, 0xeb, + 0x52, 0xeb, 0x51, 0xeb}; + // clang-format on + const auto input_y_shape = ov::Shape{1, 4, 2, 1}; + const auto input_u_shape = ov::Shape{1, 2, 1, 1}; + const auto input_v_shape = ov::Shape{1, 2, 1, 1}; + const auto output_shape = ov::Shape{1, 4, 2, 1}; + const auto input_model_shape = output_shape; + + ref_out_data.emplace_back(outType, output_shape, exp_out.data()); + + // Build model and set inputs + function = build_test_model(inType, input_model_shape); + set_test_model_color_conversion(ColorFormat::I420_THREE_PLANES, ColorFormat::GRAY); + + const auto& params = function->get_parameters(); + inputs.emplace(params.at(0), ov::Tensor{params.at(0)->get_element_type(), input_y_shape, input_y.data()}); + inputs.emplace(params.at(1), ov::Tensor{params.at(1)->get_element_type(), input_u_shape, input_u.data()}); + inputs.emplace(params.at(2), ov::Tensor{params.at(2)->get_element_type(), input_v_shape, input_v.data()}); + + run(); +} + +TEST_P(PreprocessingYUV2GreyTest, convert_single_nv12_plane_hardcoded_ref) { + // clang-format off + auto input = std::vector {0x51, 0xeb, 0x51, 0xeb, + 0x51, 0xeb, 0x51, 0xeb, + 0x6d, 0xb8, 0x6d, 0xb8}; + + auto exp_out = std::vector {0x51, 0xeb, 0x51, 0xeb, + 0x51, 0xeb, 0x51, 0xeb}; + // clang-format on + + const auto yuv_input_shape = ov::Shape{1, 6, 2, 1}; + const auto output_shape = ov::Shape{1, 4, 2, 1}; + const auto input_model_shape = output_shape; + + ref_out_data.emplace_back(outType, output_shape, exp_out.data()); + + // Build model and set inputs + function = build_test_model(inType, input_model_shape); + set_test_model_color_conversion(ColorFormat::NV12_SINGLE_PLANE, ColorFormat::GRAY); + + const auto& params = function->get_parameters(); + inputs.emplace(params.at(0), ov::Tensor{params.at(0)->get_element_type(), yuv_input_shape, input.data()}); + + run(); +} + +TEST_P(PreprocessingYUV2GreyTest, convert_two_plane_nv12_hardcoded_ref) { + // clang-format off + auto input_y = std::vector {0x51, 0xeb, 0x51, 0xeb, + 0x52, 0xeb, 0x51, 0xeb}; + auto input_uv = std::vector {0x10, 0x12, 0x21, 0x22}; + + auto exp_out = std::vector {0x51, 0xeb, 0x51, 0xeb, + 0x52, 0xeb, 0x51, 0xeb}; + // clang-format on + const auto input_y_shape = ov::Shape{1, 4, 2, 1}; + const auto input_uv_shape = ov::Shape{1, 2, 1, 2}; + const auto output_shape = ov::Shape{1, 4, 2, 1}; + const auto input_model_shape = output_shape; + + ref_out_data.emplace_back(outType, output_shape, exp_out.data()); + + // Build model and set inputs + function = build_test_model(inType, input_model_shape); + set_test_model_color_conversion(ColorFormat::NV12_TWO_PLANES, ColorFormat::GRAY); + + const auto& params = function->get_parameters(); + inputs.emplace(params.at(0), ov::Tensor{params.at(0)->get_element_type(), input_y_shape, input_y.data()}); + inputs.emplace(params.at(1), ov::Tensor{params.at(1)->get_element_type(), input_uv_shape, input_uv.data()}); + + run(); +} + +TEST_P(PreprocessingYUV2GreyTest, convert_single_plane_i420_use_opencv) { + // Test various possible r/g/b values within dimensions + const auto input_yuv_shape = Shape{1, get_full_height() * 3 / 2, width, 1}; + const auto input_y_shape = Shape{1, get_full_height(), width, 1}; + auto ov20_input_yuv = LayerTestsDefinitions::I420TestUtils::color_test_image(height, width, b_step); + auto ov20_input_y = + std::vector(ov20_input_yuv.begin(), ov20_input_yuv.begin() + shape_size(input_y_shape)); + + ref_out_data.emplace_back(outType, input_y_shape, ov20_input_y.data()); + + // Build model and set inputs + function = build_test_model(inType, input_y_shape); + set_test_model_color_conversion(ColorFormat::I420_SINGLE_PLANE, ColorFormat::GRAY); + + const auto& params = function->get_parameters(); + inputs.emplace(params.at(0), ov::Tensor{params.at(0)->get_element_type(), input_yuv_shape, ov20_input_yuv.data()}); + + run(); +} + +TEST_P(PreprocessingYUV2GreyTest, convert_three_plane_i420_use_opencv) { + // Test various possible r/g/b values within dimensions + const auto input_y_shape = Shape{1, get_full_height(), width, 1}; + const auto input_u_shape = Shape{1, get_full_height() / 2, width / 2, 1}; + const auto input_v_shape = Shape{1, get_full_height() / 2, width / 2, 1}; + // const auto input_uv_shape = Shape{1, get_full_height() / 2, width / 2, 2}; + auto ov20_input_yuv = LayerTestsDefinitions::I420TestUtils::color_test_image(height, width, b_step); + + auto input_yuv_iter = ov20_input_yuv.begin(); + auto ov20_input_y = std::vector(input_yuv_iter, input_yuv_iter + shape_size(input_y_shape)); + + input_yuv_iter += shape_size(input_y_shape); + auto ov20_input_u = std::vector(input_yuv_iter, input_yuv_iter + shape_size(input_u_shape)); + + input_yuv_iter += shape_size(input_u_shape); + auto ov20_input_v = std::vector(input_yuv_iter, input_yuv_iter + shape_size(input_v_shape)); + + ref_out_data.emplace_back(outType, input_y_shape, ov20_input_y.data()); + + // Build model and set inputs + function = build_test_model(inType, input_y_shape); + set_test_model_color_conversion(ColorFormat::I420_THREE_PLANES, ColorFormat::GRAY); + + const auto& params = function->get_parameters(); + inputs.emplace(params.at(0), ov::Tensor{params.at(0)->get_element_type(), input_y_shape, ov20_input_y.data()}); + inputs.emplace(params.at(1), ov::Tensor{params.at(1)->get_element_type(), input_u_shape, ov20_input_u.data()}); + inputs.emplace(params.at(2), ov::Tensor{params.at(2)->get_element_type(), input_v_shape, ov20_input_v.data()}); + + run(); +} + +TEST_P(PreprocessingYUV2GreyTest, convert_single_plane_nv12_use_opencv) { + // Test various possible r/g/b values within dimensions + const auto input_yuv_shape = Shape{1, get_full_height() * 3 / 2, width, 1}; + const auto input_y_shape = Shape{1, get_full_height(), width, 1}; + auto ov20_input_yuv = LayerTestsDefinitions::NV12TestUtils::color_test_image(height, width, b_step); + auto ov20_input_y = + std::vector(ov20_input_yuv.begin(), ov20_input_yuv.begin() + shape_size(input_y_shape)); + + ref_out_data.emplace_back(outType, input_y_shape, ov20_input_y.data()); + + // Build model and set inputs + function = build_test_model(inType, input_y_shape); + set_test_model_color_conversion(ColorFormat::NV12_SINGLE_PLANE, ColorFormat::GRAY); + + const auto& params = function->get_parameters(); + inputs.emplace(params.at(0), ov::Tensor{params.at(0)->get_element_type(), input_yuv_shape, ov20_input_yuv.data()}); + + run(); +} + +TEST_P(PreprocessingYUV2GreyTest, convert_two_plane_nv12_use_opencv) { + // Test various possible r/g/b values within dimensions + const auto input_y_shape = Shape{1, get_full_height(), width, 1}; + const auto input_uv_shape = Shape{1, get_full_height() / 2, width / 2, 2}; + auto ov20_input_yuv = LayerTestsDefinitions::NV12TestUtils::color_test_image(height, width, b_step); + + auto input_yuv_iter = ov20_input_yuv.begin(); + auto ov20_input_y = std::vector(input_yuv_iter, input_yuv_iter + shape_size(input_y_shape)); + input_yuv_iter += shape_size(input_y_shape); + + auto ov20_input_uv = std::vector(input_yuv_iter, input_yuv_iter + shape_size(input_uv_shape)); + input_yuv_iter += shape_size(input_uv_shape); + + ref_out_data.emplace_back(outType, input_y_shape, ov20_input_y.data()); + + // Build model and set inputs + function = build_test_model(inType, input_y_shape); + set_test_model_color_conversion(ColorFormat::NV12_TWO_PLANES, ColorFormat::GRAY); + + const auto& params = function->get_parameters(); + inputs.emplace(params.at(0), ov::Tensor{params.at(0)->get_element_type(), input_y_shape, ov20_input_y.data()}); + inputs.emplace(params.at(1), ov::Tensor{params.at(1)->get_element_type(), input_uv_shape, ov20_input_uv.data()}); + + run(); +} +} // namespace preprocess +} // namespace ov