diff --git a/.ci/azure/linux.yml b/.ci/azure/linux.yml index 2c20f894ba8ca4..7d7454f8794849 100644 --- a/.ci/azure/linux.yml +++ b/.ci/azure/linux.yml @@ -64,7 +64,7 @@ jobs: Static: CMAKE_BUILD_SHARED_LIBS: 'OFF' PYTHON_STATIC_ARGS: -m "not dynamic_library" - CMAKE_CPACK_GENERATOR: + CMAKE_CPACK_GENERATOR: "TGZ" SAMPLES_INSTALL_DIR: $(INSTALL_DIR)/samples PYTHON_SAMPLES_INSTALL_DIR: $(SAMPLES_INSTALL_DIR)/python RUN_PREFIX: . $(SETUPVARS) && diff --git a/.github/ISSUE_TEMPLATE/build.yml b/.github/ISSUE_TEMPLATE/build.yml new file mode 100644 index 00000000000000..d65a546dac2027 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/build.yml @@ -0,0 +1,95 @@ +name: Build Issue Report +description: This report is for the build/installation issue +title: "[Build]: " +labels: ["build", "support_request"] +body: + - type: markdown + attributes: + value: | + Please provide all the necessary information to expedite the response. + - type: input + id: ov_version + attributes: + label: OpenVINO Version + description: OpenVINO version, branch, or tag in OpenVINO GitHub + placeholder: 2021.4.0 LTS / Master Branch / tag 2022.3.0 + validations: + required: true + - type: dropdown + id: os + attributes: + label: Operating System + description: What OS are you using? + options: + - Ubuntu 18.04 (LTS) + - Ubuntu 20.04 (LTS) + - Ubuntu 22.04 (LTS) + - Windows System + - Red Hat Enterprise Linux 8 + - OpenSUSE + - Android System + - Raspbian Stretch OS + - macOS Systems for Intel CPU + - macOS Systems for Apple Silicon + - WebAssembly + - WSL2 for Windows + - Other (Please specify in description) + validations: + required: true + - type: dropdown + id: architecture + attributes: + label: Hardware Architecture + description: What is your hardware architecture used in this test? + options: + - x86 (64 bits) + - x86 (32 bits) + - ARM (64 bits) + - ARM (32 bits) + - RISC-V + - Other (please specify in the description) + validations: + required: true + - type: textarea + id: target_platform + attributes: + label: Target Platform + description: | + You can also provide us full system log with the following command + Windows cmd - "systeminfo" + Linux terminal - "lscpu" and "lscpu -e" + placeholder: Paste your full platform/system information here + validations: + required: false + - type: textarea + id: build_description + attributes: + label: Build issue description + description: What issue are you facing during the build/installation? + placeholder: Please provide a detailed description of what happened + validations: + required: true + - type: textarea + id: build_script + attributes: + label: Build scrip or step-by-step to reproduce + description: How can we reproduce your issue? + placeholder: Please provide detailed instructions on how to reproduce the issue + validations: + required: false + - type: textarea + id: build_logs + attributes: + label: Relevant log output + description: Please copy and paste any relevant log output. This will be automatically formatted into code, so there is no need for backticks. + render: shell + - type: checkboxes + id: terms + attributes: + label: Issue submission checklist + description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/intel/intel-one-mono/blob/main/CODE_OF_CONDUCT.md) + options: + - label: I report the issue. It's not a question + required: true + - label: I checked the problem with the documentation, FAQ, open issues, Stack Overflow, etc., and have not found the solution + required: true diff --git a/CMakeLists.txt b/CMakeLists.txt index fad19139a1de77..461f1a209cb1c0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,6 +44,7 @@ message (STATUS "CMAKE_VERSION ......................... " ${CMAKE_VERSION}) message (STATUS "OpenVINO_SOURCE_DIR ................... " ${OpenVINO_SOURCE_DIR}) message (STATUS "OpenVINO_BINARY_DIR ................... " ${OpenVINO_BINARY_DIR}) message (STATUS "CMAKE_GENERATOR ....................... " ${CMAKE_GENERATOR}) +message (STATUS "CPACK_GENERATOR ....................... " ${CPACK_GENERATOR}) message (STATUS "CMAKE_C_COMPILER_ID ................... " ${CMAKE_C_COMPILER_ID}) message (STATUS "CMAKE_CXX_COMPILER_ID ................. " ${CMAKE_CXX_COMPILER_ID}) if(OV_GENERATOR_MULTI_CONFIG) diff --git a/cmake/developer_package/packaging/archive.cmake b/cmake/developer_package/packaging/archive.cmake new file mode 100644 index 00000000000000..7513a72f880f96 --- /dev/null +++ b/cmake/developer_package/packaging/archive.cmake @@ -0,0 +1,90 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +include(GNUInstallDirs) + +# +# ov_archive_cpack_set_dirs() +# +# Set directories for ARCHIVE cpack +# +macro(ov_archive_cpack_set_dirs) + # common "archive" package locations + # TODO: move current variables to OpenVINO specific locations + set(OV_CPACK_INCLUDEDIR runtime/include) + set(OV_CPACK_IE_CMAKEDIR runtime/cmake) + set(OV_CPACK_NGRAPH_CMAKEDIR runtime/cmake) + set(OV_CPACK_OPENVINO_CMAKEDIR runtime/cmake) + set(OV_CPACK_DOCDIR docs) + set(OV_CPACK_LICENSESDIR licenses) + set(OV_CPACK_SAMPLESDIR samples) + set(OV_CPACK_WHEELSDIR tools) + set(OV_CPACK_TOOLSDIR tools) + set(OV_CPACK_DEVREQDIR tools) + set(OV_CPACK_PYTHONDIR python) + + if(WIN32) + set(OV_CPACK_LIBRARYDIR runtime/lib/${ARCH_FOLDER}/$) + set(OV_CPACK_RUNTIMEDIR runtime/bin/${ARCH_FOLDER}/$) + set(OV_CPACK_ARCHIVEDIR runtime/lib/${ARCH_FOLDER}/$) + set(OV_WHEEL_RUNTIMEDIR runtime/bin/${ARCH_FOLDER}/Release) + elseif(APPLE) + set(OV_CPACK_LIBRARYDIR runtime/lib/${ARCH_FOLDER}/$) + set(OV_CPACK_RUNTIMEDIR runtime/lib/${ARCH_FOLDER}/$) + set(OV_CPACK_ARCHIVEDIR runtime/lib/${ARCH_FOLDER}/$) + set(OV_WHEEL_RUNTIMEDIR runtime/lib/${ARCH_FOLDER}/Release) + else() + set(OV_CPACK_LIBRARYDIR runtime/lib/${ARCH_FOLDER}) + set(OV_CPACK_RUNTIMEDIR runtime/lib/${ARCH_FOLDER}) + set(OV_CPACK_ARCHIVEDIR runtime/lib/${ARCH_FOLDER}) + set(OV_WHEEL_RUNTIMEDIR ${OV_CPACK_RUNTIMEDIR}) + endif() + set(OV_CPACK_PLUGINSDIR ${OV_CPACK_RUNTIMEDIR}) + + # for BW compatibility + set(IE_CPACK_LIBRARY_PATH ${OV_CPACK_LIBRARYDIR}) + set(IE_CPACK_RUNTIME_PATH ${OV_CPACK_RUNTIMEDIR}) + set(IE_CPACK_ARCHIVE_PATH ${OV_CPACK_ARCHIVEDIR}) +endmacro() + +ov_archive_cpack_set_dirs() + +# +# Override include / exclude rules for components +# This is required to exclude some files from installation +# (e.g. archive packages don't require python_package component) +# + +macro(ov_define_component_include_rules) + # core components + unset(OV_CPACK_COMP_CORE_EXCLUDE_ALL) + unset(OV_CPACK_COMP_CORE_C_EXCLUDE_ALL) + unset(OV_CPACK_COMP_CORE_DEV_EXCLUDE_ALL) + unset(OV_CPACK_COMP_CORE_C_DEV_EXCLUDE_ALL) + # licensing + unset(OV_CPACK_COMP_LICENSING_EXCLUDE_ALL) + # samples + unset(OV_CPACK_COMP_CPP_SAMPLES_EXCLUDE_ALL) + unset(OV_CPACK_COMP_C_SAMPLES_EXCLUDE_ALL) + unset(OV_CPACK_COMP_PYTHON_SAMPLES_EXCLUDE_ALL) + # python + unset(OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL) + unset(OV_CPACK_COMP_BENCHMARK_APP_EXCLUDE_ALL) + unset(OV_CPACK_COMP_OVC_EXCLUDE_ALL) + set(OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE_EXCLUDE_ALL EXCLUDE_FROM_ALL) + unset(OV_CPACK_COMP_PYTHON_WHEELS_EXCLUDE_ALL) + # tools + set(OV_CPACK_COMP_OPENVINO_DEV_REQ_FILES_EXCLUDE_ALL EXCLUDE_FROM_ALL) + unset(OV_CPACK_COMP_DEPLOYMENT_MANAGER_EXCLUDE_ALL) + # scripts + unset(OV_CPACK_COMP_INSTALL_DEPENDENCIES_EXCLUDE_ALL) + unset(OV_CPACK_COMP_SETUPVARS_EXCLUDE_ALL) +endmacro() + +ov_define_component_include_rules() + +# New in version 3.18 +set(CPACK_ARCHIVE_THREADS 8) +# multiple packages are generated +set(CPACK_ARCHIVE_COMPONENT_INSTALL ON) diff --git a/cmake/developer_package/packaging/nsis.cmake b/cmake/developer_package/packaging/nsis.cmake index 1a89f39344016c..f5393357d0ab51 100644 --- a/cmake/developer_package/packaging/nsis.cmake +++ b/cmake/developer_package/packaging/nsis.cmake @@ -43,6 +43,52 @@ endmacro() ov_nsis_specific_settings() +# +# ov_nsis_cpack_set_dirs() +# +# Set directories for ARCHIVE cpack +# +macro(ov_archive_cpack_set_dirs) + # common "archive" package locations + # TODO: move current variables to OpenVINO specific locations + set(OV_CPACK_INCLUDEDIR runtime/include) + set(OV_CPACK_IE_CMAKEDIR runtime/cmake) + set(OV_CPACK_NGRAPH_CMAKEDIR runtime/cmake) + set(OV_CPACK_OPENVINO_CMAKEDIR runtime/cmake) + set(OV_CPACK_DOCDIR docs) + set(OV_CPACK_LICENSESDIR licenses) + set(OV_CPACK_SAMPLESDIR samples) + set(OV_CPACK_WHEELSDIR tools) + set(OV_CPACK_TOOLSDIR tools) + set(OV_CPACK_DEVREQDIR tools) + set(OV_CPACK_PYTHONDIR python) + + if(WIN32) + set(OV_CPACK_LIBRARYDIR runtime/lib/${ARCH_FOLDER}/$) + set(OV_CPACK_RUNTIMEDIR runtime/bin/${ARCH_FOLDER}/$) + set(OV_CPACK_ARCHIVEDIR runtime/lib/${ARCH_FOLDER}/$) + set(OV_WHEEL_RUNTIMEDIR runtime/bin/${ARCH_FOLDER}/Release) + elseif(APPLE) + set(OV_CPACK_LIBRARYDIR runtime/lib/${ARCH_FOLDER}/$) + set(OV_CPACK_RUNTIMEDIR runtime/lib/${ARCH_FOLDER}/$) + set(OV_CPACK_ARCHIVEDIR runtime/lib/${ARCH_FOLDER}/$) + set(OV_WHEEL_RUNTIMEDIR runtime/lib/${ARCH_FOLDER}/Release) + else() + set(OV_CPACK_LIBRARYDIR runtime/lib/${ARCH_FOLDER}) + set(OV_CPACK_RUNTIMEDIR runtime/lib/${ARCH_FOLDER}) + set(OV_CPACK_ARCHIVEDIR runtime/lib/${ARCH_FOLDER}) + set(OV_WHEEL_RUNTIMEDIR ${OV_CPACK_RUNTIMEDIR}) + endif() + set(OV_CPACK_PLUGINSDIR ${OV_CPACK_RUNTIMEDIR}) + + # for BW compatibility + set(IE_CPACK_LIBRARY_PATH ${OV_CPACK_LIBRARYDIR}) + set(IE_CPACK_RUNTIME_PATH ${OV_CPACK_RUNTIMEDIR}) + set(IE_CPACK_ARCHIVE_PATH ${OV_CPACK_ARCHIVEDIR}) +endmacro() + +ov_nsis_cpack_set_dirs() + # # Override include / exclude rules for components # This is required to exclude some files from installation diff --git a/cmake/developer_package/packaging/packaging.cmake b/cmake/developer_package/packaging/packaging.cmake index e1d9b60f6079de..50a9d14e2e5d40 100644 --- a/cmake/developer_package/packaging/packaging.cmake +++ b/cmake/developer_package/packaging/packaging.cmake @@ -39,52 +39,6 @@ function(ov_get_pyversion pyversion) endif() endfunction() -# -# ov_cpack_set_dirs() -# -# Set directories for cpack -# -macro(ov_cpack_set_dirs) - # common IRC package locations - # TODO: move current variables to OpenVINO specific locations - set(OV_CPACK_INCLUDEDIR runtime/include) - set(OV_CPACK_IE_CMAKEDIR runtime/cmake) - set(OV_CPACK_NGRAPH_CMAKEDIR runtime/cmake) - set(OV_CPACK_OPENVINO_CMAKEDIR runtime/cmake) - set(OV_CPACK_DOCDIR docs) - set(OV_CPACK_LICENSESDIR licenses) - set(OV_CPACK_SAMPLESDIR samples) - set(OV_CPACK_WHEELSDIR tools) - set(OV_CPACK_TOOLSDIR tools) - set(OV_CPACK_DEVREQDIR tools) - set(OV_CPACK_PYTHONDIR python) - - if(WIN32) - set(OV_CPACK_LIBRARYDIR runtime/lib/${ARCH_FOLDER}/$) - set(OV_CPACK_RUNTIMEDIR runtime/bin/${ARCH_FOLDER}/$) - set(OV_CPACK_ARCHIVEDIR runtime/lib/${ARCH_FOLDER}/$) - set(OV_WHEEL_RUNTIMEDIR runtime/bin/${ARCH_FOLDER}/Release) - elseif(APPLE) - set(OV_CPACK_LIBRARYDIR runtime/lib/${ARCH_FOLDER}/$) - set(OV_CPACK_RUNTIMEDIR runtime/lib/${ARCH_FOLDER}/$) - set(OV_CPACK_ARCHIVEDIR runtime/lib/${ARCH_FOLDER}/$) - set(OV_WHEEL_RUNTIMEDIR runtime/lib/${ARCH_FOLDER}/Release) - else() - set(OV_CPACK_LIBRARYDIR runtime/lib/${ARCH_FOLDER}) - set(OV_CPACK_RUNTIMEDIR runtime/lib/${ARCH_FOLDER}) - set(OV_CPACK_ARCHIVEDIR runtime/lib/${ARCH_FOLDER}) - set(OV_WHEEL_RUNTIMEDIR ${OV_CPACK_RUNTIMEDIR}) - endif() - set(OV_CPACK_PLUGINSDIR ${OV_CPACK_RUNTIMEDIR}) - - # for BW compatibility - set(IE_CPACK_LIBRARY_PATH ${OV_CPACK_LIBRARYDIR}) - set(IE_CPACK_RUNTIME_PATH ${OV_CPACK_RUNTIMEDIR}) - set(IE_CPACK_ARCHIVE_PATH ${OV_CPACK_ARCHIVEDIR}) -endmacro() - -ov_cpack_set_dirs() - # # ov_cpack_add_component(NAME ...) # @@ -169,38 +123,15 @@ endmacro() ov_define_component_names() -# default components for case when CPACK_GENERATOR is not set (i.e. default open source user) -macro(ov_define_component_include_rules) - # core components - unset(OV_CPACK_COMP_CORE_EXCLUDE_ALL) - unset(OV_CPACK_COMP_CORE_C_EXCLUDE_ALL) - unset(OV_CPACK_COMP_CORE_DEV_EXCLUDE_ALL) - unset(OV_CPACK_COMP_CORE_C_DEV_EXCLUDE_ALL) - # licensing - unset(OV_CPACK_COMP_LICENSING_EXCLUDE_ALL) - # samples - unset(OV_CPACK_COMP_CPP_SAMPLES_EXCLUDE_ALL) - unset(OV_CPACK_COMP_C_SAMPLES_EXCLUDE_ALL) - unset(OV_CPACK_COMP_PYTHON_SAMPLES_EXCLUDE_ALL) - # python - unset(OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL) - unset(OV_CPACK_COMP_BENCHMARK_APP_EXCLUDE_ALL) - unset(OV_CPACK_COMP_OVC_EXCLUDE_ALL) - set(OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE_EXCLUDE_ALL EXCLUDE_FROM_ALL) - unset(OV_CPACK_COMP_PYTHON_WHEELS_EXCLUDE_ALL) - # tools - set(OV_CPACK_COMP_OPENVINO_DEV_REQ_FILES_EXCLUDE_ALL EXCLUDE_FROM_ALL) - unset(OV_CPACK_COMP_DEPLOYMENT_MANAGER_EXCLUDE_ALL) - # scripts - unset(OV_CPACK_COMP_INSTALL_DEPENDENCIES_EXCLUDE_ALL) - unset(OV_CPACK_COMP_SETUPVARS_EXCLUDE_ALL) -endmacro() - -ov_define_component_include_rules() +if(NOT DEFINED CPACK_GENERATOR) + set(CPACK_GENERATOR "TGZ") +elseif(NOT CPACK_GENERATOR) + message(FATAL_ERROR "CPACK_GENERATOR cannot contain an empty value") +endif() # # Include generator specific configuration file: -# 1. Overrides directories set by ov__cpack_set_dirs() +# 1. Overrides directories set by ov__cpack_set_dirs() # This is requried, because different generator use different locations for installed files # 2. Merges some components using ov_override_component_names() # This is required, because different generators have different set of components @@ -230,12 +161,11 @@ elseif(CPACK_GENERATOR STREQUAL "NSIS") include(packaging/nsis) elseif(CPACK_GENERATOR MATCHES "^(CONDA-FORGE|BREW|CONAN|VCPKG)$") include(packaging/common-libraries) +elseif(CPACK_GENERATOR MATCHES "^(7Z|TBZ2|TGZ|TXZ|TZ|TZST|ZIP)$") + include(packaging/archive) endif() macro(ie_cpack) - if(NOT DEFINED CPACK_GENERATOR) - set(CPACK_GENERATOR "TGZ") - endif() set(CPACK_SOURCE_GENERATOR "") # not used set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "OpenVINO™ Toolkit") set(CPACK_COMPONENT_UNSPECIFIED_REQUIRED OFF) @@ -287,18 +217,10 @@ macro(ie_cpack) # include GENERATOR dedicated per-component configuration file # NOTE: private modules need to define ov_cpack_settings macro - # for custom packages configuration + # for custom packages configuration if(COMMAND ov_cpack_settings) ov_cpack_settings() endif() - # generator specific variables - if(CPACK_GENERATOR MATCHES "^(7Z|TBZ2|TGZ|TXZ|TZ|ZIP)$") - # New in version 3.18 - set(CPACK_ARCHIVE_THREADS 8) - # multiple packages are generated - set(CPACK_ARCHIVE_COMPONENT_INSTALL ON) - endif() - include(CPack) endmacro() diff --git a/cmake/packaging/archive.cmake b/cmake/packaging/archive.cmake new file mode 100644 index 00000000000000..59bdc01eb0427b --- /dev/null +++ b/cmake/packaging/archive.cmake @@ -0,0 +1,23 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +# +# OpenVINO Core components including frontends, plugins, etc +# +macro(ov_cpack_settings) + # fill a list of components which are part of conda + set(cpack_components_all ${CPACK_COMPONENTS_ALL}) + unset(CPACK_COMPONENTS_ALL) + foreach(item IN LISTS cpack_components_all) + string(TOUPPER ${item} UPPER_COMP) + # filter out some components, which are not needed to be wrapped to conda-forge | brew | conan | vcpkg + if(NOT OV_CPACK_COMP_${UPPER_COMP}_EXCLUDE_ALL AND + # python_package is not needed in case of archives, because components like pyopenvino are used, as well as wheels + NOT item MATCHES "^${OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE}_python.*") + list(APPEND CPACK_COMPONENTS_ALL ${item}) + endif() + endforeach() + unset(cpack_components_all) + list(REMOVE_DUPLICATES CPACK_COMPONENTS_ALL) +endmacro() diff --git a/cmake/packaging/common-libraries.cmake b/cmake/packaging/common-libraries.cmake index 5d2f7bd79e9e89..9b96ed528609df 100644 --- a/cmake/packaging/common-libraries.cmake +++ b/cmake/packaging/common-libraries.cmake @@ -13,6 +13,8 @@ macro(ov_cpack_settings) string(TOUPPER ${item} UPPER_COMP) # filter out some components, which are not needed to be wrapped to conda-forge | brew | conan | vcpkg if(NOT OV_CPACK_COMP_${UPPER_COMP}_EXCLUDE_ALL AND + # because in case of VCPKG | CONAN | BREW | CONDA-FORGE distributions, python is either not needed or installed separately + (NOT item MATCHES "^${OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE}_python.*" OR ENABLE_PYTHON_PACKAGING) AND # even for case of system TBB we have installation rules for wheels packages # so, need to skip this explicitly since they are installed in `host` section NOT item MATCHES "^tbb(_dev)?$" AND @@ -21,6 +23,7 @@ macro(ov_cpack_settings) list(APPEND CPACK_COMPONENTS_ALL ${item}) endif() endforeach() + unset(cpack_components_all) list(REMOVE_DUPLICATES CPACK_COMPONENTS_ALL) # override generator diff --git a/cmake/packaging/debian.cmake b/cmake/packaging/debian.cmake index 49a1ad9fe08d36..766209977340bb 100644 --- a/cmake/packaging/debian.cmake +++ b/cmake/packaging/debian.cmake @@ -47,7 +47,7 @@ macro(ov_cpack_settings) string(TOUPPER ${item} UPPER_COMP) # filter out some components, which are not needed to be wrapped to .deb package if(NOT OV_CPACK_COMP_${UPPER_COMP}_EXCLUDE_ALL AND - # skip OpenVINO Python API (pattern in form of "_python${PYTHON_VERSION_MAJOR}${PYTHON_VERSION_MINOR}") + # skip OpenVINO Python API (pattern in form of "pyopenvino_python${PYTHON_VERSION_MAJOR}${PYTHON_VERSION_MINOR}") NOT item MATCHES "^${OV_CPACK_COMP_PYTHON_OPENVINO}_python.*" AND # because in case of .deb package, pyopenvino_package_python${PYTHON_VERSION_MAJOR}${PYTHON_VERSION_MINOR} is installed (NOT item MATCHES "^${OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE}_python.*" OR ENABLE_PYTHON_PACKAGING) AND @@ -65,6 +65,7 @@ macro(ov_cpack_settings) list(APPEND CPACK_COMPONENTS_ALL ${item}) endif() endforeach() + unset(cpack_components_all) list(REMOVE_DUPLICATES CPACK_COMPONENTS_ALL) # version with 3 components diff --git a/cmake/packaging/nsis.cmake b/cmake/packaging/nsis.cmake index b6e809514ca713..e5467ee5afedec 100644 --- a/cmake/packaging/nsis.cmake +++ b/cmake/packaging/nsis.cmake @@ -15,6 +15,7 @@ macro(ov_cpack_settings) list(APPEND CPACK_COMPONENTS_ALL ${item}) endif() endforeach() + unset(cpack_components_all) # restore the components settings diff --git a/cmake/packaging/packaging.cmake b/cmake/packaging/packaging.cmake index a4e165d615d310..7d685f43143de0 100644 --- a/cmake/packaging/packaging.cmake +++ b/cmake/packaging/packaging.cmake @@ -3,11 +3,13 @@ # if(CPACK_GENERATOR STREQUAL "DEB") - include(cmake/packaging/debian.cmake) + include("${OpenVINO_SOURCE_DIR}/cmake/packaging/debian.cmake") elseif(CPACK_GENERATOR STREQUAL "RPM") - include(cmake/packaging/rpm.cmake) + include("${OpenVINO_SOURCE_DIR}/cmake/packaging/rpm.cmake") elseif(CPACK_GENERATOR MATCHES "^(CONDA-FORGE|BREW|CONAN|VCPKG)$") - include(cmake/packaging/common-libraries.cmake) + include("${OpenVINO_SOURCE_DIR}/cmake/packaging/common-libraries.cmake") +elseif(CPACK_GENERATOR MATCHES "^(7Z|TBZ2|TGZ|TXZ|TZ|TZST|ZIP)$") + include("${OpenVINO_SOURCE_DIR}/cmake/packaging/archive.cmake") elseif(CPACK_GENERATOR STREQUAL "NSIS") - include(cmake/packaging/nsis.cmake) + include("${OpenVINO_SOURCE_DIR}/cmake/packaging/nsis.cmake") endif() diff --git a/cmake/packaging/rpm.cmake b/cmake/packaging/rpm.cmake index 24ce1b2cb0696b..d3dd22bcdbd195 100644 --- a/cmake/packaging/rpm.cmake +++ b/cmake/packaging/rpm.cmake @@ -33,7 +33,7 @@ macro(ov_cpack_settings) string(TOUPPER ${item} UPPER_COMP) # filter out some components, which are not needed to be wrapped to .rpm package if(NOT OV_CPACK_COMP_${UPPER_COMP}_EXCLUDE_ALL AND - # skip OpenVINO Python API (pattern in form of "_python${PYTHON_VERSION_MAJOR}${PYTHON_VERSION_MINOR}") + # skip OpenVINO Python API (pattern in form of "pyopenvino_python${PYTHON_VERSION_MAJOR}${PYTHON_VERSION_MINOR}") NOT item MATCHES "^${OV_CPACK_COMP_PYTHON_OPENVINO}_python.*" AND # because in case of .rpm package, pyopenvino_package_python${PYTHON_VERSION_MAJOR}${PYTHON_VERSION_MINOR} is installed (NOT item MATCHES "^${OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE}_python.*" OR ENABLE_PYTHON_PACKAGING) AND @@ -51,6 +51,7 @@ macro(ov_cpack_settings) list(APPEND CPACK_COMPONENTS_ALL ${item}) endif() endforeach() + unset(cpack_components_all) list(REMOVE_DUPLICATES CPACK_COMPONENTS_ALL) # version with 3 components diff --git a/docs/OV_Runtime_UG/automatic_batching.md b/docs/OV_Runtime_UG/automatic_batching.md index 5b558502234f55..7fc4732a8e8db6 100644 --- a/docs/OV_Runtime_UG/automatic_batching.md +++ b/docs/OV_Runtime_UG/automatic_batching.md @@ -11,7 +11,7 @@ The Automatic Batching Execution mode (or Auto-batching for short) performs automatic batching on-the-fly to improve device utilization by grouping inference requests together, without programming effort from the user. With Automatic Batching, gathering the input and scattering the output from the individual inference requests required for the batch happen transparently, without affecting the application code. -Auto Batching can be used :ref:`directly as a virtual device ` or as an :ref:`option for inference on CPU/GPU/VPU ` (by means of configuration/hint). These 2 ways are provided for the user to enable the BATCH devices **explicitly** or **implicitly**, with the underlying logic remaining the same. An example of the difference is that the CPU device doesn’t support implicitly to enable BATCH device, commands such as ``./benchmark_app -m -d CPU -hint tput`` will not apply BATCH device **implicitly**, but ``./benchmark_app -m -d "BATCH:CPU(16)`` can **explicitly** load BATCH device. +Auto Batching can be used :ref:`directly as a virtual device ` or as an :ref:`option for inference on CPU/GPU/NPU ` (by means of configuration/hint). These 2 ways are provided for the user to enable the BATCH devices **explicitly** or **implicitly**, with the underlying logic remaining the same. An example of the difference is that the CPU device doesn’t support implicitly to enable BATCH device, commands such as ``./benchmark_app -m -d CPU -hint tput`` will not apply BATCH device **implicitly**, but ``./benchmark_app -m -d "BATCH:CPU(16)`` can **explicitly** load BATCH device. Auto-batching primarily targets the existing code written for inferencing many requests, each instance with the batch size 1. To get corresponding performance improvements, the application **must be running multiple inference requests simultaneously**. Auto-batching can also be used via a particular *virtual* device. diff --git a/docs/OV_Runtime_UG/integrate_with_your_application.md b/docs/OV_Runtime_UG/integrate_with_your_application.md index 9e006270b8b0ee..08ff6c8f3f4d45 100644 --- a/docs/OV_Runtime_UG/integrate_with_your_application.md +++ b/docs/OV_Runtime_UG/integrate_with_your_application.md @@ -20,7 +20,7 @@ Following these steps, you can implement a typical OpenVINO™ Runtime inference pipeline in your application. Before proceeding, make sure you have -:doc:`installed OpenVINO Runtime ` and set environment variables (run ``/setupvars.sh`` for Linux or ``setupvars.bat`` for Windows, otherwise, the ``OpenVINO_DIR`` variable won't be configured properly to pass ``find_package`` calls). +:doc:`installed OpenVINO Runtime ` and set environment variables (run ``/setupvars.sh`` for Linux or ``setupvars.bat`` for Windows, otherwise, the ``OpenVINO_DIR`` variable won't be configured properly to pass ``find_package`` calls). .. image:: _static/images/IMPLEMENT_PIPELINE_with_API_C.svg diff --git a/docs/OV_Runtime_UG/supported_plugins/GPU.md b/docs/OV_Runtime_UG/supported_plugins/GPU.md index eafe3aad406a43..0e6d3579e4b08d 100644 --- a/docs/OV_Runtime_UG/supported_plugins/GPU.md +++ b/docs/OV_Runtime_UG/supported_plugins/GPU.md @@ -418,6 +418,7 @@ All parameters must be set before calling ``ov::Core::compile_model()`` in order - ``ov::intel_gpu::hint::queue_priority`` - ``ov::intel_gpu::hint::queue_throttle`` - ``ov::intel_gpu::enable_loop_unrolling`` +- ``ov::intel_gpu::disable_winograd_convolution`` Read-only Properties +++++++++++++++++++++++++++++++++++++++ diff --git a/docs/_static/download/operation_conformance_table_files/opset_report_omz_static.html b/docs/_static/download/operation_conformance_table_files/opset_report_omz_static.html index dea9f8ca231cb6..4f95c22d048fae 100644 --- a/docs/_static/download/operation_conformance_table_files/opset_report_omz_static.html +++ b/docs/_static/download/operation_conformance_table_files/opset_report_omz_static.html @@ -105,7 +105,7 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver GPU NVIDIA TEMPLATE - VPUX + NPU @@ -119,7 +119,7 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver 95 95 95 - 93 + 93 @@ -131,7 +131,7 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver 76.84 % 26.32 % 97.89 % - 11.83 % + 11.83 % @@ -143,7 +143,7 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver 94.92 % 55.09 % 100.0 % - 21.1 % + 21.1 % @@ -155,7 +155,7 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver 98.25 % 88.98 % 99.96 % - 7.69 % + 7.69 % @@ -304,12 +304,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -430,12 +430,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -556,12 +556,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -682,12 +682,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -808,12 +808,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -969,12 +969,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -1100,12 +1100,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -1226,12 +1226,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -1352,12 +1352,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -1478,12 +1478,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -1604,12 +1604,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -1730,12 +1730,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -1891,12 +1891,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -2022,12 +2022,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -2183,12 +2183,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -2349,12 +2349,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -2480,12 +2480,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -2606,12 +2606,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -2767,12 +2767,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -2898,12 +2898,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -3024,12 +3024,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -3185,12 +3185,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -3311,12 +3311,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -3472,12 +3472,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -3638,12 +3638,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -3804,12 +3804,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -3935,12 +3935,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -4096,12 +4096,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -4227,12 +4227,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -4388,12 +4388,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -4554,12 +4554,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -4720,12 +4720,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -4851,12 +4851,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -5012,12 +5012,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -5143,12 +5143,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -5269,12 +5269,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -5395,12 +5395,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -5521,12 +5521,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -5647,12 +5647,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -5773,12 +5773,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -5934,12 +5934,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -6100,12 +6100,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -6266,12 +6266,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 75.0 %
@@ -6432,12 +6432,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -6563,12 +6563,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -6689,12 +6689,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -6815,12 +6815,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -6976,12 +6976,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -7142,12 +7142,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -7308,12 +7308,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -7439,12 +7439,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -7565,12 +7565,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -7691,12 +7691,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -7817,12 +7817,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -7943,12 +7943,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -8069,12 +8069,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -8097,7 +8097,7 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver N/A N/A N/A - N/A + N/A @@ -8244,12 +8244,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -8410,12 +8410,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -8576,12 +8576,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -8707,12 +8707,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -8833,12 +8833,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -8994,12 +8994,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 60.0 %
@@ -9160,12 +9160,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -9291,12 +9291,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -9452,12 +9452,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 22.73 %
@@ -9583,12 +9583,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -9709,12 +9709,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -9870,12 +9870,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 100.0 %
@@ -10001,12 +10001,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -10127,12 +10127,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -10288,12 +10288,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 100.0 %
@@ -10419,12 +10419,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -10580,12 +10580,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -10711,12 +10711,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -10837,12 +10837,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -10998,12 +10998,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -11164,12 +11164,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -11295,7 +11295,7 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - N/A + N/A @@ -11442,12 +11442,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 66.67 %
@@ -11608,12 +11608,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 100.0 %
@@ -11774,12 +11774,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -11905,12 +11905,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -12031,12 +12031,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -12192,12 +12192,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -12323,12 +12323,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -12484,12 +12484,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 100.0 %
@@ -12615,12 +12615,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -12741,12 +12741,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -12902,12 +12902,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 55.56 %
@@ -13033,12 +13033,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -13159,12 +13159,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -13285,12 +13285,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -13446,12 +13446,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -13577,12 +13577,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -13703,12 +13703,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -13829,12 +13829,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -13990,12 +13990,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -14121,12 +14121,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -14247,12 +14247,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -14408,12 +14408,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -14574,12 +14574,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 100.0 %
@@ -14705,12 +14705,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -14866,12 +14866,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -14997,12 +14997,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -15123,12 +15123,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -15249,12 +15249,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -15375,12 +15375,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -15536,12 +15536,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 50.0 %
@@ -15702,12 +15702,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -15833,12 +15833,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -15959,12 +15959,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -16120,12 +16120,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -16286,12 +16286,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -16452,12 +16452,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -16618,12 +16618,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 100.0 %
@@ -16749,12 +16749,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -16875,12 +16875,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -17001,12 +17001,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -17162,12 +17162,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -17293,12 +17293,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -17419,12 +17419,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -17580,12 +17580,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -17711,12 +17711,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -17837,12 +17837,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -17963,12 +17963,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -18089,12 +18089,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -18215,12 +18215,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -18341,12 +18341,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -18502,12 +18502,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -18668,12 +18668,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -18794,12 +18794,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -18955,12 +18955,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -19086,12 +19086,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -19247,12 +19247,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -19378,7 +19378,7 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - N/A + N/A @@ -19490,12 +19490,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -19651,12 +19651,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -19782,12 +19782,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -19908,12 +19908,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -20034,12 +20034,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -20160,12 +20160,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -20321,12 +20321,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -20452,12 +20452,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -20578,12 +20578,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -20704,12 +20704,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -20830,12 +20830,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -20956,12 +20956,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -21117,12 +21117,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -21248,12 +21248,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -21374,12 +21374,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -21500,12 +21500,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -21626,12 +21626,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -21752,12 +21752,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -21878,12 +21878,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -22004,12 +22004,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -22135,12 +22135,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -22261,12 +22261,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -22422,12 +22422,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -22588,12 +22588,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -22754,12 +22754,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -22920,12 +22920,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -23086,12 +23086,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -23252,12 +23252,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -23418,12 +23418,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -23549,12 +23549,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -23710,12 +23710,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -23841,12 +23841,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -23967,12 +23967,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -24093,12 +24093,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -24219,12 +24219,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -24380,12 +24380,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -24511,7 +24511,7 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - N/A + N/A @@ -24658,12 +24658,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -24824,12 +24824,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -24990,12 +24990,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -25156,12 +25156,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -25287,12 +25287,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -25448,12 +25448,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 100.0 %
@@ -25614,12 +25614,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 75.0 %
@@ -25780,12 +25780,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 100.0 %
@@ -25946,12 +25946,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -26077,12 +26077,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -26238,12 +26238,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -26369,12 +26369,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -26530,12 +26530,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 90.91 %
@@ -26696,12 +26696,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 100.0 %
@@ -26827,12 +26827,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -26988,12 +26988,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -27154,12 +27154,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 44.44 %
@@ -27320,12 +27320,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -27486,12 +27486,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -27652,12 +27652,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -27818,12 +27818,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -27949,12 +27949,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -28110,12 +28110,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -28276,12 +28276,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 0.0 %
@@ -28442,12 +28442,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 22.22 %
@@ -28608,12 +28608,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 100.0 %
@@ -28739,12 +28739,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -28900,12 +28900,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 100.0 %
@@ -29031,12 +29031,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -29192,12 +29192,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 50.0 %
@@ -29323,12 +29323,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -29449,12 +29449,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -29610,12 +29610,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 25.0 %
@@ -29776,12 +29776,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 71.43 %
@@ -29907,12 +29907,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
@@ -30068,12 +30068,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 63.64 %
@@ -30234,12 +30234,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - 90.0 %
@@ -30365,12 +30365,12 @@

Operations coverage summary: Tag: Conformance_report__omz_static_opset | Ver - ---
diff --git a/docs/benchmarks/performance_ov_vs_tf.md b/docs/benchmarks/performance_ov_vs_tf.md deleted file mode 100644 index 55ba34455c592f..00000000000000 --- a/docs/benchmarks/performance_ov_vs_tf.md +++ /dev/null @@ -1,102 +0,0 @@ -# OpenVINO™ and TensorFlow Comparison on Select Networks and Platforms - -This page presents the results of comparing OpenVINO™ and TensorFlow executing benchmarking on the same hardware platforms, and using neural network models based on the same original source models. All models were converted using the processes and conversion tools native to each framework. The hardware platforms represent a broad performance range, covering Intel® Celeron®, Intel® Core™, and Intel® Xeon® Scalable based platforms. (Refer to [System Description](https://docs.openvino.ai/resources/benchmark_files/system_configurations_2022.1.html) for further details). - -## deeplabv3 - -@sphinxdirective -.. raw:: html - -
- -@endsphinxdirective - -## densenet-121 - -@sphinxdirective -.. raw:: html - -
- -@endsphinxdirective - -## facenet-20180408-102900 - -@sphinxdirective -.. raw:: html - -
- -@endsphinxdirective - -## faster_rcnn_resnet50_coco - -@sphinxdirective -.. raw:: html - -
- -@endsphinxdirective - -## inception-v3 - -@sphinxdirective -.. raw:: html - -
- -@endsphinxdirective - -## inception-v4 - -@sphinxdirective -.. raw:: html - -
- -@endsphinxdirective - -## resnet-50 - -@sphinxdirective -.. raw:: html - -
- -@endsphinxdirective - -## ssd_mobilenet_v1_coco - -@sphinxdirective -.. raw:: html - -
- -@endsphinxdirective - -## ssd_resnet34_1200x1200 - -@sphinxdirective -.. raw:: html - -
- -@endsphinxdirective - -## yolo-v3-tiny - -@sphinxdirective -.. raw:: html - -
- -@endsphinxdirective - -## YOLOv4 - -@sphinxdirective -.. raw:: html - -
- -@endsphinxdirective \ No newline at end of file diff --git a/docs/get_started.md b/docs/get_started.md index ed93772cc90153..88d37186e1f4b8 100644 --- a/docs/get_started.md +++ b/docs/get_started.md @@ -11,9 +11,8 @@ :maxdepth: 1 :hidden: - Installing OpenVINO - Additional Configurations - Uninstalling + Install OpenVINO + Additional Hardware setup Troubleshooting diff --git a/docs/install_guides/installing-openvino-macos.md b/docs/install_guides/--installing-openvino-macos.md similarity index 100% rename from docs/install_guides/installing-openvino-macos.md rename to docs/install_guides/--installing-openvino-macos.md diff --git a/docs/install_guides/installing-openvino-raspbian-removedfromtoc.md b/docs/install_guides/--installing-openvino-raspbian-removedfromtoc.md similarity index 98% rename from docs/install_guides/installing-openvino-raspbian-removedfromtoc.md rename to docs/install_guides/--installing-openvino-raspbian-removedfromtoc.md index 9bdffe93189226..6803e3d5a9e7c8 100644 --- a/docs/install_guides/installing-openvino-raspbian-removedfromtoc.md +++ b/docs/install_guides/--installing-openvino-raspbian-removedfromtoc.md @@ -187,7 +187,6 @@ Now that you've installed OpenVINO Runtime, you're ready to run your own machine * `Basic object detection with the Hello Reshape SSD C++ sample `_ * `Automatic speech recognition C++ sample `_ -To uninstall the toolkit, follow the steps on the :doc:`Uninstalling page `. Additional Resources #################### diff --git a/docs/install_guides/installing-openvino-runtime.md b/docs/install_guides/--installing-openvino-runtime.md similarity index 92% rename from docs/install_guides/installing-openvino-runtime.md rename to docs/install_guides/--installing-openvino-runtime.md index 027126f6248bca..4af90ab562698a 100644 --- a/docs/install_guides/installing-openvino-runtime.md +++ b/docs/install_guides/--installing-openvino-runtime.md @@ -1,4 +1,4 @@ -# Install OpenVINO™ Runtime {#openvino_docs_install_guides_install_runtime} +# Install OpenVINO™ Runtime @sphinxdirective @@ -26,4 +26,3 @@ Enjoy your journey with OpenVINO. @endsphinxdirective - diff --git a/docs/install_guides/installing-openvino-windows.md b/docs/install_guides/--installing-openvino-windows.md similarity index 100% rename from docs/install_guides/installing-openvino-windows.md rename to docs/install_guides/--installing-openvino-windows.md diff --git a/docs/install_guides/uninstalling-openvino.md b/docs/install_guides/--uninstalling-openvino.md similarity index 91% rename from docs/install_guides/uninstalling-openvino.md rename to docs/install_guides/--uninstalling-openvino.md index f2560048ad1e92..ae06acd75f483d 100644 --- a/docs/install_guides/uninstalling-openvino.md +++ b/docs/install_guides/--uninstalling-openvino.md @@ -1,4 +1,4 @@ -# Uninstalling the Intel® Distribution of OpenVINO™ Toolkit {#openvino_docs_install_guides_uninstalling_openvino} +# Uninstalling the Intel® Distribution of OpenVINO™ Toolkit @sphinxdirective diff --git a/docs/install_guides/installing-model-dev-tools.md b/docs/install_guides/installing-model-dev-tools.md index a0be0427286c94..593edbe90d06d6 100644 --- a/docs/install_guides/installing-model-dev-tools.md +++ b/docs/install_guides/installing-model-dev-tools.md @@ -19,6 +19,7 @@ The instructions on this page show how to install OpenVINO Development Tools. If In both cases, Python 3.7 - 3.11 needs to be installed on your machine before starting. .. note:: + From the 2022.1 release, the OpenVINO™ Development Tools can only be installed via PyPI. .. _python_developers: @@ -33,7 +34,7 @@ If you are a Python developer, follow the steps in the :ref:`Installing OpenVINO For C++ Developers ################## -If you are a C++ developer, you must first install OpenVINO Runtime separately to set up the C++ libraries, sample code, and dependencies for building applications with OpenVINO. These files are not included with the PyPI distribution. See the :doc:`Install OpenVINO Runtime ` page to install OpenVINO Runtime from an archive file for your operating system. +If you are a C++ developer, you must first install OpenVINO Runtime separately to set up the C++ libraries, sample code, and dependencies for building applications with OpenVINO. These files are not included with the PyPI distribution. See the :doc:`Selector Tool ` page to install OpenVINO Runtime from an archive file for your operating system. Once OpenVINO Runtime is installed, you may install OpenVINO Development Tools for access to tools like ``mo``, Model Downloader, Benchmark Tool, and other utilities that will help you optimize your model and develop your application. Follow the steps in the :ref:`Installing OpenVINO Development Tools ` section on this page to install it. diff --git a/docs/install_guides/installing-openvino-from-archive-linux.md b/docs/install_guides/installing-openvino-from-archive-linux.md index 878b30a57132c9..41ea83e8c90242 100644 --- a/docs/install_guides/installing-openvino-from-archive-linux.md +++ b/docs/install_guides/installing-openvino-from-archive-linux.md @@ -304,7 +304,24 @@ Learn more about how to integrate a model in OpenVINO applications by trying out Uninstalling the Intel® Distribution of OpenVINO™ Toolkit ########################################################### -To uninstall the toolkit, follow the steps on the :doc:`Uninstalling page `. +If you have installed OpenVINO Runtime from archive files, you can uninstall it by deleting the archive files and the extracted folders. +Uninstallation removes all Intel® Distribution of OpenVINO™ Toolkit component files but does not affect user files in the installation directory. + +If you have created the symbolic link, remove the link first: + +.. code-block:: sh + + sudo rm /opt/intel/openvino_2023 + +To delete the files: + +.. code-block:: sh + + rm -r && rm + + + + Additional Resources diff --git a/docs/install_guides/installing-openvino-from-archive-macos.md b/docs/install_guides/installing-openvino-from-archive-macos.md index 48cb406db52e5c..3d5d037639fc03 100644 --- a/docs/install_guides/installing-openvino-from-archive-macos.md +++ b/docs/install_guides/installing-openvino-from-archive-macos.md @@ -163,7 +163,21 @@ Now that you've installed OpenVINO Runtime, you're ready to run your own machine Uninstalling Intel® Distribution of OpenVINO™ Toolkit ##################################################### -To uninstall the toolkit, follow the steps on the :doc:`Uninstalling page `. +If you have installed OpenVINO Runtime from archive files, you can uninstall it by deleting the archive files and the extracted folders. +Uninstallation removes all Intel® Distribution of OpenVINO™ Toolkit component files but does not affect user files in the installation directory. + +If you have created the symbolic link, remove the link first: + +.. code-block:: sh + + sudo rm /opt/intel/openvino_2023 + +To delete the files: + +.. code-block:: sh + + rm -r && rm + Additional Resources #################### diff --git a/docs/install_guides/installing-openvino-from-archive-windows.md b/docs/install_guides/installing-openvino-from-archive-windows.md index b50b8b655814d2..ce9bd1636dce2f 100644 --- a/docs/install_guides/installing-openvino-from-archive-windows.md +++ b/docs/install_guides/installing-openvino-from-archive-windows.md @@ -219,7 +219,25 @@ Now that you've installed OpenVINO Runtime, you're ready to run your own machine Uninstalling OpenVINO Runtime ############################# -To uninstall OpenVINO, follow the steps on the :doc:`Uninstalling page ` +If you have installed OpenVINO Runtime from archive files, you can uninstall it by deleting the archive files and the extracted folders. +Uninstallation removes all Intel® Distribution of OpenVINO™ Toolkit component files but does not affect user files in the installation directory. + +If you have created the symbolic link, remove the link first. + +Use either of the following methods to delete the files: + +* Use Windows Explorer to remove the files. +* Open a Command Prompt and run: + +.. code-block:: sh + + rmdir /s + del + + + + + Additional Resources #################### diff --git a/docs/install_guides/installing-openvino-overview.md b/docs/install_guides/installing-openvino-overview.md index c617371ce5af54..d60558f6e180b0 100644 --- a/docs/install_guides/installing-openvino-overview.md +++ b/docs/install_guides/installing-openvino-overview.md @@ -12,10 +12,11 @@ :maxdepth: 3 :hidden: - OpenVINO Runtime + OpenVINO Runtime on Linux + OpenVINO Runtime on Windows + OpenVINO Runtime on macOS OpenVINO Development Tools - Build from Source - Creating a Yocto Image + Create a Yocto Image .. raw:: html @@ -25,61 +26,32 @@ -Distribution channels of OpenVINO may differ slightly, with regard to supported hardware or available APIs (read installation guides for particular distributions for more details). -Moreover, OpenVINO Runtime and OpenVINO Development Tools offer different sets of tools, as follows: +Different OpenVINO distributions may differ with regard to supported hardware or available APIs. +Read installation guides for particular distributions for more details. -* **OpenVINO Runtime** contains the core set of libraries for running machine learning model inference on processor devices. -* **OpenVINO Development Tools** is a set of utilities for working with OpenVINO and OpenVINO models. It includes the following tools: - - Model Optimizer - - Post-Training Optimization Tool - - Benchmark Tool - - Accuracy Checker and Annotation Converter - - Model Downloader and other Open Model Zoo tools +| **OpenVINO Runtime:** +| contains the core set of libraries for running inference on various processing units. It is recommended for users who already have an optimized model + and want to deploy it in an application using OpenVINO for inference on their devices. +| **OpenVINO Development Tools:** +| includes the OpenVINO Runtime for Python, as well as a set of utilities for optimizing models and validating performance. + It is recommended for users who want to optimize and verify their models before applying them in their applications. + For Python developers it is ready out-of-the-box, while for C++ development you need to install OpenVINO Runtime libraries separately. +| See the :ref:`For C++ Developers ` section of the install guide for detailed instructions. +| Development Tools provides: + * Model conversion API + * Benchmark Tool + * Accuracy Checker and Annotation Converter + * Post-Training Optimization Tool + * Model Downloader and other Open Model Zoo tools -Install OpenVINO Development Tools (recommended) -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -The best way to get started with OpenVINO is to install OpenVINO Development Tools, which will also install the OpenVINO Runtime Python package as a dependency. Follow the instructions on the :doc:`Install OpenVINO Development Tools ` page to install it. +| **Build OpenVINO from source** +| OpenVINO Toolkit source files are available on GitHub as open source. If you want to build your own version of OpenVINO for your platform, + follow the `OpenVINO Build Instructions `__ . -**Python** -For developers working in Python, OpenVINO Development Tools can easily be installed using PyPI. See the :ref:`For Python Developers ` section of the Install OpenVINO Development Tools page for instructions. -**C++** - -For developers working in C++, the core OpenVINO Runtime libraries must be installed separately. Then, OpenVINO Development Tools can be installed using requirements files or PyPI. See the :ref:`For C++ Developers ` section of the Install OpenVINO Development Tools page for instructions. - -Install OpenVINO Runtime only -+++++++++++++++++++++++++++++++++++++++ - -OpenVINO Runtime may also be installed on its own without OpenVINO Development Tools. This is recommended for users who already have an optimized model and want to deploy it in an application that uses OpenVINO for inference on their device. To install OpenVINO Runtime only, follow the instructions on the :doc:`Install OpenVINO Runtime ` page. - -The following methods are available to install OpenVINO Runtime: - -* Linux: You can install OpenVINO Runtime using APT, YUM, archive files or Docker. See :doc:`Install OpenVINO on Linux `. -* Windows: You can install OpenVINO Runtime using archive files or Docker. See :doc:`Install OpenVINO on Windows `. -* macOS: You can install OpenVINO Runtime using archive files or Docker. See :doc:`Install OpenVINO on macOS `. - -Build OpenVINO from source -++++++++++++++++++++++++++++++++++++ - -Source files are also available in the OpenVINO Toolkit GitHub repository. If you want to build OpenVINO from source for your platform, follow the `OpenVINO Build Instructions `__ . - -Next Steps -########## - -Still unsure if you want to install OpenVINO toolkit? Check out the :doc:`OpenVINO tutorials ` to run example applications directly in your web browser without installing it locally. Here are some exciting demos you can explore: - -- `Monodepth Estimation with OpenVINO `__ -- `Live Style Transfer with OpenVINO `__ -- `OpenVINO API Tutorial `__ - -Follow these links to install OpenVINO: - -- :doc:`Install OpenVINO Development Tools ` -- :doc:`Install OpenVINO Runtime ` -- `Build from Source `__ @endsphinxdirective diff --git a/docs/install_guides/troubleshooting-issues.md b/docs/install_guides/troubleshooting-issues.md deleted file mode 100644 index 4f5b3020a85ab3..00000000000000 --- a/docs/install_guides/troubleshooting-issues.md +++ /dev/null @@ -1,75 +0,0 @@ -# Issues & Solutions for OpenVINO™ Installation & Configuration {#openvino_docs_get_started_guide_troubleshooting_issues} - -@sphinxdirective - -.. meta:: - :description: Get to know solutions for possible problems that may occur during - the installation and configuration of OpenVINO™ on your system. - -This page lists issues that you may encounter during the installation and configuration of OpenVINO™, as well as their possible solutions. - -.. _install_for_prc: - -Errors with Installing via PIP for Users in China -################################################# - -Users in China might encounter errors while downloading sources via PIP during OpenVINO™ installation. To resolve the issues, try one of the following options: - -* Add the download source using the ``-i`` parameter with the Python ``pip`` command. For example: - - .. code-block:: sh - - pip install openvino-dev -i https://mirrors.aliyun.com/pypi/simple/ - - Use the ``--trusted-host`` parameter if the URL above is ``http`` instead of ``https``. - You can also run the following command to install specific framework. For example: - - .. code-block:: sh - - pip install openvino-dev[tensorflow2] -i https://mirrors.aliyun.com/pypi/simple/ - - -* For C++ developers, if you have installed OpenVINO Runtime via APT, YUM, or the archive file, and then installed OpenVINO Development Tools via PyPI, you may run into issues. To resolve that, install the components in ``requirements.txt`` by using the following command: - - .. code-block:: sh - - pip install -r /tools/requirements.txt - - For APT and YUM users, replace the ``INSTALL_DIR`` with ``/usr/share/openvino``. - - - -Issues with Installing OpenVINO on Linux from Docker -#################################################### - -.. _proxy-issues: - -Proxy Issues -++++++++++++ - -If you met proxy issues during the installation with Docker, you need set up proxy settings for Docker. See the `Docker guide `__ for more details. - -.. _yocto_install_issues: - -Issues with Creating a Yocto Image for OpenVINO -############################################### - -Error while adding "meta-intel" layer -+++++++++++++++++++++++++++++++++++++ - -When using the ``bitbake-layers add-layer meta-intel`` command, the following error might occur: - -.. code-block:: sh - - NOTE: Starting bitbake server... - ERROR: The following required tools (as specified by HOSTTOOLS) appear to be unavailable in PATH, please install them in order to proceed: chrpath diffstat pzstd zstd - - -To resolve the issue, install the ``chrpath diffstat zstd`` tools: - -.. code-block:: sh - - sudo apt-get install chrpath diffstat zstd - -@endsphinxdirective - diff --git a/docs/install_guides/troubleshooting-steps.md b/docs/install_guides/troubleshooting-steps.md deleted file mode 100644 index d51af50dd32cbd..00000000000000 --- a/docs/install_guides/troubleshooting-steps.md +++ /dev/null @@ -1,125 +0,0 @@ -# Troubleshooting Steps for OpenVINO™ Installation and Configurations {#openvino_docs_get_started_guide_troubleshooting_steps} - -@sphinxdirective - -.. meta:: - :description: Learn what checks you may perform after encountering problems during - the installation and configuration of OpenVINO™ on your system. - - -If you run into issues while installing or configuring OpenVINO™, you can try the following methods to do some quick checks first. - -Check the versions of OpenVINO Runtime and Development Tools -############################################################# - -* To check the version of OpenVINO Development Tools, use the following command: - - - .. tab-set:: - - .. tab-item:: Python - :sync: py - - .. code-block:: py - :force: - - from openvino.tools.mo import convert_model - ov_model = convert_model(version=True) - - .. tab-item:: CLI - :sync: cli - - .. code-block:: sh - - mo --version - - -* To check the version of OpenVINO Runtime, use the following code: - - .. code-block:: sh - - from openvino.runtime import get_version - get_version() - - - -Check the versions of Python and PIP -#################################### - -To check your Python version, run ``python -VV`` or ``python --version``. The supported Python versions should be 64-bit and between 3.7 and 3.11. If you are using Python 3.6, you are recommended to upgrade the version to 3.7 or higher. - -If your Python version does not meet the requirements, update Python: - -* For Windows, **do not install Python from a Windows Store** as it can cause issues. You are highly recommended to install Python from `official website `__ . -* For Linux and macOS systems, download and install a proper Python version from `official website `__ . See the `Python Beginners' Guide `__ for more information on selecting a version. Note that macOS 10.x comes with python 2.7 installed, which is not supported, so you must install Python from the official website. - -For PIP, make sure that you have installed the latest version. To check and upgrade your PIP version, run the following command: - -.. code-block:: sh - - python -m pip install --upgrade pip - - - - - -Check if required external dependencies are installed (for pre-2022.2 releases) -############################################################################### - -For OpenVINO releases prior to 2022.2: - -- If you are using Ubuntu or RHEL 8 systems, and installed OpenVINO Runtime via the archive file, APT, or YUM repository, and then decided to :doc:`install OpenVINO Development Tools `, make sure that you **Install External Software Dependencies** first by following the steps in the corresponding installation pages. -- For C++ developers with Windows systems, make sure that Microsoft Visual Studio 2019 with MSBuild and CMake 3.14 or higher (64-bit) are installed. While installing Microsoft Visual Studio 2019, make sure that you have selected **Desktop development with C++** in the **Workloads** tab. If not, launch the installer again to select that option. For more information on modifying the installation options for Microsoft Visual Studio, see its `official support page `__ . - -Check if environment variables are set correctly -################################################ - -- For Python developers, if you previously installed OpenVINO using the archive file, and are now installing OpenVINO using PIP, remove all the PATH settings and the lines with ``setupvars`` from ``.bashrc``. Note that if you installed OpenVINO with PIP in a virtual environment, you don't need to set any environment variables. -- If you have installed OpenVINO before, you probably have added ``setupvars`` to your ``PATH /.bashrc`` or Windows environment variables. After restarting your environment, you should see similar information as below: - - .. code-block:: sh - - [setupvars.sh] OpenVINO™ environment initialized - - - - If you don't see the information above, your PATH variables may be configured incorrectly. Check if you have typed the correct or you are trying to activate in the correct directory. - - If you added it to a ``.bashrc`` file, make sure that the command is correctly written and the file is found in the ``~/.bashrc`` folder. - -Verify that OpenVINO is correctly installed -########################################### - -* For Python developers, to verify if OpenVINO is correctly installed, use the following command: - - .. code-block:: sh - - python -c "from openvino.runtime import Core; print(Core().available_devices)" - - If OpenVINO was successfully installed, you will see a list of available devices. - -* If you installed OpenVINO Runtime using the archive file, you can search "openvino" in Apps & Features on a Windows system, or check your installation directory on Linux to see if OpenVINO is there. - -* If you installed OpenVINO Runtime from APT, use the ``apt list --installed | grep openvino`` command to list the installed OpenVINO packages. - -* If you installed OpenVINO Runtime from YUM, use the ``yum list installed 'openvino*'`` command to list the installed OpenVINO packages. - -Check if GPU driver is installed -################################ - -:doc:`Additional configurations ` may be required in order to use OpenVINO with different hardware such as Intel® GPUs. - -To run inference on an Intel® GPU, make sure that you have installed the correct GPU driver. To check that, see :doc:`additional configurations for GPU `. - -Check firewall and network settings -################################### - -Make sure that your firewall and network settings are configured correctly. For example, consider configuring system-wide proxy settings and specifying options for using PIP behind the proxy: - -.. code-block:: sh - - pip install --proxy http://address:port --trusted-host pypi.org openvino - - -For specific issues, see :ref:`Errors with Installing via PIP for Users in China ` and :ref:`proxy issues with installing OpenVINO on Linux from Docker `. - -@endsphinxdirective - diff --git a/docs/install_guides/troubleshooting.md b/docs/install_guides/troubleshooting.md index 986430ee13dd09..1343ea1b1ecc18 100644 --- a/docs/install_guides/troubleshooting.md +++ b/docs/install_guides/troubleshooting.md @@ -8,21 +8,174 @@ of OpenVINO™ on your system. -.. toctree:: - :maxdepth: 2 - :hidden: +.. _troubleshooting guide for install: - Issues & Solutions - Troubleshooting Steps +This guide provides general troubleshooting steps and solutions to possible issues that can be encountered while installing and configuring OpenVINO™. +.. _install_for_prc: -.. _troubleshooting guide for install: +.. dropdown:: Errors with Installing via PIP for Users in China -This guide provides general troubleshooting steps and solutions to possible issues that can be encountered while installing and configuring OpenVINO™. + Users in China might encounter errors while downloading sources via PIP during OpenVINO™ installation. To resolve the issues, try one of the following options: + + * Add the download source using the ``-i`` parameter with the Python ``pip`` command. For example: + + .. code-block:: sh + + pip install openvino-dev -i https://mirrors.aliyun.com/pypi/simple/ + + Use the ``--trusted-host`` parameter if the URL above is ``http`` instead of ``https``. + You can also run the following command to install specific framework. For example: + + .. code-block:: sh + + pip install openvino-dev[tensorflow2] -i https://mirrors.aliyun.com/pypi/simple/ + + + * For C++ developers, if you have installed OpenVINO Runtime via APT, YUM, or the archive file, and then installed OpenVINO Development Tools via PyPI, you may run into issues. To resolve that, install the components in ``requirements.txt`` by using the following command: + + .. code-block:: sh + + pip install -r /tools/requirements.txt + + For APT and YUM users, replace the ``INSTALL_DIR`` with ``/usr/share/openvino``. + + + +.. dropdown:: Issues with Installing OpenVINO on Linux from Docker + + .. _proxy-issues: + + Proxy Issues + ++++++++++++ + + If you met proxy issues during the installation with Docker, you need set up proxy settings for Docker. See the `Docker guide `__ for more details. + + .. _yocto_install_issues: + +.. dropdown:: Issues with Creating a Yocto Image for OpenVINO + + Error while adding "meta-intel" layer + +++++++++++++++++++++++++++++++++++++ + + When using the ``bitbake-layers add-layer meta-intel`` command, the following error might occur: + + .. code-block:: sh + + NOTE: Starting bitbake server... + ERROR: The following required tools (as specified by HOSTTOOLS) appear to be unavailable in PATH, please install them in order to proceed: chrpath diffstat pzstd zstd + + + To resolve the issue, install the ``chrpath diffstat zstd`` tools: + + .. code-block:: sh + + sudo apt-get install chrpath diffstat zstd + + 3. If you run into issues while installing or configuring OpenVINO™, you can try the following methods to do some quick checks first. + +.. dropdown:: Check the versions of OpenVINO Runtime and Development Tools + + + * To check the version of OpenVINO Development Tools, use the following command: + + .. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model(version=True) + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --version + + + * To check the version of OpenVINO Runtime, use the following code: + + .. code-block:: sh + + from openvino.runtime import get_version + get_version() + + +.. dropdown:: Check the versions of Python and PIP + + To check your Python version, run ``python -VV`` or ``python --version``. The supported Python versions should be 64-bit and between 3.7 and 3.11. If you are using Python 3.6, you are recommended to upgrade the version to 3.7 or higher. + + If your Python version does not meet the requirements, update Python: + + * For Windows, **do not install Python from a Windows Store** as it can cause issues. You are highly recommended to install Python from `official website `__ . + * For Linux and macOS systems, download and install a proper Python version from `official website `__ . See the `Python Beginners' Guide `__ for more information on selecting a version. Note that macOS 10.x comes with python 2.7 installed, which is not supported, so you must install Python from the official website. + + For PIP, make sure that you have installed the latest version. To check and upgrade your PIP version, run the following command: + + .. code-block:: sh + + python -m pip install --upgrade pip + + + + + +.. dropdown:: Check if environment variables are set correctly + + - For Python developers, if you previously installed OpenVINO using the archive file, and are now installing OpenVINO using PIP, remove all the PATH settings and the lines with ``setupvars`` from ``.bashrc``. Note that if you installed OpenVINO with PIP in a virtual environment, you don't need to set any environment variables. + - If you have installed OpenVINO before, you probably have added ``setupvars`` to your ``PATH /.bashrc`` or Windows environment variables. After restarting your environment, you should see similar information as below: + + .. code-block:: sh + + [setupvars.sh] OpenVINO™ environment initialized + + + - If you don't see the information above, your PATH variables may be configured incorrectly. Check if you have typed the correct or you are trying to activate in the correct directory. + - If you added it to a ``.bashrc`` file, make sure that the command is correctly written and the file is found in the ``~/.bashrc`` folder. + +.. dropdown:: Verify that OpenVINO is correctly installed + + * For Python developers, to verify if OpenVINO is correctly installed, use the following command: + + .. code-block:: sh + + python -c "from openvino.runtime import Core; print(Core().available_devices)" + + If OpenVINO was successfully installed, you will see a list of available devices. + + * If you installed OpenVINO Runtime using the archive file, you can search "openvino" in Apps & Features on a Windows system, or check your installation directory on Linux to see if OpenVINO is there. + + * If you installed OpenVINO Runtime from APT, use the ``apt list --installed | grep openvino`` command to list the installed OpenVINO packages. + + * If you installed OpenVINO Runtime from YUM, use the ``yum list installed 'openvino*'`` command to list the installed OpenVINO packages. + +.. dropdown:: Check if GPU driver is installed + + :doc:`Additional configurations ` may be required in order to use OpenVINO with different hardware such as Intel® GPUs. + + To run inference on an Intel® GPU, make sure that you have installed the correct GPU driver. To check that, see :doc:`additional configurations for GPU `. + +.. dropdown:: Check firewall and network settings + + Make sure that your firewall and network settings are configured correctly. For example, consider configuring system-wide proxy settings and specifying options for using PIP behind the proxy: + + .. code-block:: sh + + pip install --proxy http://address:port --trusted-host pypi.org openvino + + For specific issues, see :ref:`Errors with Installing via PIP for Users in China ` and :ref:`proxy issues with installing OpenVINO on Linux from Docker `. + +.. dropdown:: Check if required external dependencies are installed (for pre-2022.2 releases) -The :doc:`Issues & Solutions ` page lists common installation and configuration errors, and their possible solutions. If you encountered a specific error while installing or configuring OpenVINO, check this page to see if there is a solution. + For OpenVINO releases prior to 2022.2: -The :doc:`Troubleshooting Steps ` page provides a set of instructions for diagnosing and resolving installation and configuration issues. If you had problems during installation and configuration, walk through these steps to try and resolve your issue. + - If you are using Ubuntu or RHEL 8 systems, and installed OpenVINO Runtime via the archive file, APT, or YUM repository, and then decided to :doc:`install OpenVINO Development Tools `, make sure that you **Install External Software Dependencies** first by following the steps in the corresponding installation pages. -@endsphinxdirective + - For C++ developers with Windows systems, make sure that Microsoft Visual Studio 2019 with MSBuild and CMake 3.14 or higher (64-bit) are installed. While installing Microsoft Visual Studio 2019, make sure that you have selected **Desktop development with C++** in the **Workloads** tab. If not, launch the installer again to select that option. For more information on modifying the installation options for Microsoft Visual Studio, see its `official support page `__ . +@endsphinxdirective \ No newline at end of file diff --git a/docs/optimization_guide/nncf/ptq/basic_quantization_flow.md b/docs/optimization_guide/nncf/ptq/basic_quantization_flow.md index 5fc93dcc34405e..2b31176f1ef102 100644 --- a/docs/optimization_guide/nncf/ptq/basic_quantization_flow.md +++ b/docs/optimization_guide/nncf/ptq/basic_quantization_flow.md @@ -191,7 +191,7 @@ Tune quantization parameters regex = '.*layer_.*' nncf.quantize(model, dataset, ignored_scope=nncf.IgnoredScope(patterns=regex)) -* ``target_device`` - defines the target device, the specificity of which will be taken into account during optimization. The following values are supported: ``ANY`` (default), ``CPU``, ``CPU_SPR``, ``GPU``, and ``VPU``. +* ``target_device`` - defines the target device, the specificity of which will be taken into account during optimization. The following values are supported: ``ANY`` (default), ``CPU``, ``CPU_SPR``, ``GPU``, and ``NPU``. .. code-block:: sh diff --git a/docs/ovsa/ovsa_get_started.md b/docs/ovsa/ovsa_get_started.md index e7e47da386a16f..dd80c2c1b8a372 100644 --- a/docs/ovsa/ovsa_get_started.md +++ b/docs/ovsa/ovsa_get_started.md @@ -22,55 +22,26 @@ The OpenVINO™ Security Add-on works with the :doc:`OpenVINO™ Model Server - - -- The Model Developer generates a access controlled model from the OpenVINO™ toolkit output. The access controlled model uses the model's Intermediate Representation (IR) files to create a access controlled output file archive that are distributed to Model Users. The Developer can also put the archive file in long-term storage or back it up without additional security. - -- The Model Developer uses the OpenVINO™ Security Add-on Tool (ovsatool) to generate and manage cryptographic keys and related collateral for the access controlled models. Cryptographic material is only available in a virtual machine (VM) environment. The OpenVINO™ Security Add-on key management system lets the Model Developer to get external Certificate Authorities to generate certificates to add to a key-store. - -- The Model Developer generates user-specific licenses in a JSON format file for the access controlled model. The Model Developer can define global or user-specific licenses and attach licensing policies to the licenses. For example, the Model Developer can add a time limit for a model or limit the number of times a user can run a model. - - -.. raw:: html - - - - -.. raw:: html - -
- - -- The Independent Software Vendor hosts the OpenVINO™ Security Add-on License Service, which responds to license validation requests when a user attempts to load a access controlled model in a model server. The licenses are registered with the OpenVINO™ Security Add-on License Service. - -- When a user loads the model, the OpenVINO™ Security Add-on Runtime contacts the License Service to make sure the license is valid and within the parameters that the Model Developer defined with the OpenVINO™ Security Add-on Tool (ovsatool). The user must be able to reach the Independent Software Vendor's License Service over the Internet. - -.. raw:: html - -
- - -.. raw:: html - -
+.. dropdown:: OpenVINO™ Security Add-on Tool: As a Model Developer or Independent Software Vendor, you use the OpenVINO™ Security Add-on Tool (``ovsatool``) to generate a access controlled model and master license. + + - The Model Developer generates a access controlled model from the OpenVINO™ toolkit output. The access controlled model uses the model's Intermediate Representation (IR) files to create a access controlled output file archive that are distributed to Model Users. The Developer can also put the archive file in long-term storage or back it up without additional security. + - The Model Developer uses the OpenVINO™ Security Add-on Tool (ovsatool) to generate and manage cryptographic keys and related collateral for the access controlled models. Cryptographic material is only available in a virtual machine (VM) environment. The OpenVINO™ Security Add-on key management system lets the Model Developer to get external Certificate Authorities to generate certificates to add to a key-store. + - The Model Developer generates user-specific licenses in a JSON format file for the access controlled model. The Model Developer can define global or user-specific licenses and attach licensing policies to the licenses. For example, the Model Developer can add a time limit for a model or limit the number of times a user can run a model. +.. dropdown:: OpenVINO™ Security Add-on License Service: Use the OpenVINO™ Security Add-on License Service to verify user parameters. -Users host the OpenVINO™ Security Add-on Runtime component in a virtual machine. + - The Independent Software Vendor hosts the OpenVINO™ Security Add-on License Service, which responds to license validation requests when a user attempts to load a access controlled model in a model server. The licenses are registered with the OpenVINO™ Security Add-on License Service. + - When a user loads the model, the OpenVINO™ Security Add-on Runtime contacts the License Service to make sure the license is valid and within the parameters that the Model Developer defined with the OpenVINO™ Security Add-on Tool (``ovsatool``). The user must be able to reach the Independent Software Vendor's License Service over the Internet. -Externally from the OpenVINO™ Security Add-on, the User adds the access controlled model to the OpenVINO™ Model Server config file. The OpenVINO™ Model Server attempts to load the model in memory. At this time, the OpenVINO™ Security Add-on Runtime component validates the user's license for the access controlled model against information stored in the License Service provided by the Independent Software Vendor. +.. dropdown:: OpenVINO™ Security Add-on Runtime: Users install and use the OpenVINO™ Security Add-on Runtime on a virtual machine. -After the license is successfully validated, the OpenVINO™ Model Server loads the model and services the inference requests. + Users host the OpenVINO™ Security Add-on Runtime component in a virtual machine. + Externally from the OpenVINO™ Security Add-on, the User adds the access controlled model to the OpenVINO™ Model Server config file. The OpenVINO™ Model Server attempts to load the model in memory. At this time, the OpenVINO™ Security Add-on Runtime component validates the user's license for the access controlled model against information stored in the License Service provided by the Independent Software Vendor. -.. raw:: html - -
+ After the license is successfully validated, the OpenVINO™ Model Server loads the model and services the inference requests. -
**Where the OpenVINO™ Security Add-on Fits into Model Development and Deployment** diff --git a/docs/resources/prerelease_information.md b/docs/resources/prerelease_information.md index 3595bf61403d0b..162d4e16c85383 100644 --- a/docs/resources/prerelease_information.md +++ b/docs/resources/prerelease_information.md @@ -53,8 +53,6 @@ Please file a github Issue on these with the label “pre-release” so we can g - Various performance improvements for StableDiffusion, SegmentAnything, U-Net, and Large Language models. - Optimized dGPU performance through the integration of oneDNN 3.2 and fusion optimizations for MVN, Crop+Concat, permute, etc. - * GNA Runtime: Addressed stability issues on the Gemini Lake platform related to GNA device plugin creation (ref: #115949). - * Frameworks: - PyTorch Updates: OpenVINO now supports originally quantized PyTorch models, including models produced with the Neural Network Compression Framework (NNCF). diff --git a/samples/cpp/benchmark_app/utils.cpp b/samples/cpp/benchmark_app/utils.cpp index 10cc2c3163ca59..af0691693545d9 100644 --- a/samples/cpp/benchmark_app/utils.cpp +++ b/samples/cpp/benchmark_app/utils.cpp @@ -61,7 +61,7 @@ size_t InputInfo::depth() const { uint32_t device_default_device_duration_in_seconds(const std::string& device) { static const std::map deviceDefaultDurationInSeconds{{"CPU", 60}, {"GPU", 60}, - {"VPU", 60}, + {"NPU", 60}, {"UNKNOWN", 120}}; uint32_t duration = 0; for (const auto& deviceDurationInSeconds : deviceDefaultDurationInSeconds) { diff --git a/samples/cpp/speech_sample/README.md b/samples/cpp/speech_sample/README.md index 2771f141923454..188a5b43cbc2c6 100644 --- a/samples/cpp/speech_sample/README.md +++ b/samples/cpp/speech_sample/README.md @@ -96,7 +96,7 @@ Several execution modes are supported via the ``-d`` flag: - ``CPU`` - All calculations are performed on CPU device using CPU Plugin. - ``GPU`` - All calculations are performed on GPU device using GPU Plugin. -- ``VPU`` - All calculations are performed on VPU device using VPU Plugin. +- ``NPU`` - All calculations are performed on NPU device using NPU Plugin. - ``GNA_AUTO`` - GNA hardware is used if available and the driver is installed. Otherwise, the GNA device is emulated in fast-but-not-bit-exact mode. - ``GNA_HW`` - GNA hardware is used if available and the driver is installed. Otherwise, an error will occur. - ``GNA_SW`` - Deprecated. The GNA device is emulated in fast-but-not-bit-exact mode. @@ -144,7 +144,7 @@ Usage message: -i "" Required. Path(s) to input file(s). Usage for a single file/layer: or . Example of usage for several files/layers: :=,:=. -m "" Required. Path to an .xml file with a trained model (required if -rg is missing). -o "" Optional. Output file name(s) to save scores (inference results). Example of usage for a single file/layer: or . Example of usage for several files/layers: :=,:=. - -d "" Optional. Specify a target device to infer on. CPU, GPU, VPU, GNA_AUTO, GNA_HW, GNA_HW_WITH_SW_FBACK, GNA_SW_FP32, GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU as a secondary (e.g. HETERO:GNA,CPU) are supported. The sample will look for a suitable plugin for device specified. + -d "" Optional. Specify a target device to infer on. CPU, GPU, NPU, GNA_AUTO, GNA_HW, GNA_HW_WITH_SW_FBACK, GNA_SW_FP32, GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU as a secondary (e.g. HETERO:GNA,CPU) are supported. The sample will look for a suitable plugin for device specified. -pc Optional. Enables per-layer performance report. -q "" Optional. Input quantization mode for GNA: static (default) or user defined (use with -sf). -qb "" Optional. Weight resolution in bits for GNA quantization: 8 or 16 (default) @@ -162,7 +162,7 @@ Usage message: -compile_target "" Optional. Specify GNA compile target generation. May be one of GNA_TARGET_2_0, GNA_TARGET_3_0. By default, generation corresponds to the GNA HW available in the system or the latest fully supported generation by the software. See the GNA Plugin's GNA_COMPILE_TARGET config option description. -memory_reuse_off Optional. Disables memory optimizations for compiled model. - Available target devices: CPU GNA GPU VPU + Available target devices: CPU GNA GPU NPU .. _model-preparation-speech: diff --git a/samples/cpp/speech_sample/speech_sample.hpp b/samples/cpp/speech_sample/speech_sample.hpp index 9ba602a55e3d39..0bc3a24c950d51 100644 --- a/samples/cpp/speech_sample/speech_sample.hpp +++ b/samples/cpp/speech_sample/speech_sample.hpp @@ -24,7 +24,7 @@ static const char model_message[] = "Required. Path to an .xml file with a train /// @brief message for assigning calculation to device static const char target_device_message[] = - "Optional. Specify a target device to infer on. CPU, GPU, VPU, GNA_AUTO, GNA_HW, " + "Optional. Specify a target device to infer on. CPU, GPU, NPU, GNA_AUTO, GNA_HW, " "GNA_HW_WITH_SW_FBACK, GNA_SW_FP32, " "GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU" " as a secondary (e.g. HETERO:GNA,CPU) are supported. " @@ -274,7 +274,7 @@ bool parse_and_check_command_line(int argc, char* argv[]) { "HETERO:GNA_HW,CPU", "HETERO:GNA_SW_EXACT,CPU", "HETERO:GNA_SW_FP32,CPU", - "VPU"}; + "NPU"}; if (std::find(supportedDevices.begin(), supportedDevices.end(), FLAGS_d) == supportedDevices.end()) { throw std::logic_error("Specified device is not supported."); diff --git a/samples/python/speech_sample/README.md b/samples/python/speech_sample/README.md index ba765f1e127f8a..a4120b10ebfd7d 100644 --- a/samples/python/speech_sample/README.md +++ b/samples/python/speech_sample/README.md @@ -95,7 +95,7 @@ Several execution modes are supported via the ``-d`` flag: - ``CPU`` - All calculations are performed on CPU device using CPU Plugin. - ``GPU`` - All calculations are performed on GPU device using GPU Plugin. -- ``VPU`` - All calculations are performed on VPU device using VPU Plugin. +- ``NPU`` - All calculations are performed on NPU device using NPU Plugin. - ``GNA_AUTO`` - GNA hardware is used if available and the driver is installed. Otherwise, the GNA device is emulated in fast-but-not-bit-exact mode. - ``GNA_HW`` - GNA hardware is used if available and the driver is installed. Otherwise, an error will occur. - ``GNA_SW`` - Deprecated. The GNA device is emulated in fast-but-not-bit-exact mode. @@ -155,7 +155,7 @@ Usage message: Usage for a single file/layer: or . Example of usage for several files/layers: :=,:=. -d DEVICE, --device DEVICE - Optional. Specify a target device to infer on. CPU, GPU, VPU, GNA_AUTO, GNA_HW, GNA_SW_FP32, + Optional. Specify a target device to infer on. CPU, GPU, NPU, GNA_AUTO, GNA_HW, GNA_SW_FP32, GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU as a secondary (e.g. HETERO:GNA,CPU) are supported. The sample will look for a suitable plugin for device specified. Default value is CPU. diff --git a/samples/python/speech_sample/arg_parser.py b/samples/python/speech_sample/arg_parser.py index d6e8c41d83404a..c498eb286df2dd 100644 --- a/samples/python/speech_sample/arg_parser.py +++ b/samples/python/speech_sample/arg_parser.py @@ -32,7 +32,7 @@ def build_arg_parser() -> argparse.ArgumentParser: 'Example of usage for several files/layers: :=,:=.') args.add_argument('-d', '--device', default='CPU', type=str, help='Optional. Specify a target device to infer on. ' - 'CPU, GPU, VPU, GNA_AUTO, GNA_HW, GNA_SW_FP32, GNA_SW_EXACT and HETERO with combination of GNA' + 'CPU, GPU, NPU, GNA_AUTO, GNA_HW, GNA_SW_FP32, GNA_SW_EXACT and HETERO with combination of GNA' ' as the primary device and CPU as a secondary (e.g. HETERO:GNA,CPU) are supported. ' 'The sample will look for a suitable plugin for device specified. Default value is CPU.') args.add_argument('-bs', '--batch_size', type=int, choices=range(1, 9), metavar='[1-8]', diff --git a/src/bindings/python/CMakeLists.txt b/src/bindings/python/CMakeLists.txt index facfb2218312e6..2e41c7e7802fbd 100644 --- a/src/bindings/python/CMakeLists.txt +++ b/src/bindings/python/CMakeLists.txt @@ -364,5 +364,8 @@ if(ENABLE_TESTS) endif() if(OpenVINODeveloperPackage_FOUND) + # provides a callback function to describe each component in repo + include("${OpenVINO_SOURCE_DIR}/cmake/packaging/packaging.cmake") + ie_cpack(${IE_CPACK_COMPONENTS_ALL}) endif() diff --git a/src/bindings/python/constraints.txt b/src/bindings/python/constraints.txt index cabd454463101f..d4c8d587644996 100644 --- a/src/bindings/python/constraints.txt +++ b/src/bindings/python/constraints.txt @@ -2,7 +2,7 @@ numpy>=1.16.6,<1.26 # Python bindings, frontends # pytest -pytest>=5.0,<7.4 +pytest>=5.0,<7.5 pytest-dependency==0.5.1 pytest-html==3.2.0 pytest-timeout==2.1.0 diff --git a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py index 4c410bb42ce161..1090dce0163ced 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py @@ -58,8 +58,15 @@ def __init__(self, pt_module, graph_element=None, example_input=None, alias_db=N self.pt_module = pt_module self.raw_inputs = list(self.graph_element.inputs()) self.raw_outputs = list(self.graph_element.outputs()) - if self._input_signature is not None and "self" in self.raw_inputs[0].debugName(): - self._input_signature.insert(0, "self") + if self._input_signature is not None: + if "self" in self.raw_inputs[0].debugName(): + self._input_signature.insert(0, "self") + if 0 < len(self._input_signature) < len(self.raw_inputs): + # last input is args input, we need to multiply that name by number of extra inputs + self._input_signature = self._input_signature[:-1] + n = len(self._input_signature) + for i in range(len(self.raw_inputs) - n): + self._input_signature.append(self.raw_inputs[i + n].debugName()) if isinstance(self.graph_element, torch.Graph): self._transform_tensor_list_constants_to_listconstruct(self.graph_element) diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp index 1ff7ce8a41be11..0953f75c06e629 100644 --- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp +++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp @@ -103,6 +103,7 @@ void regmodule_properties(py::module m) { wrap_property_RO(m_intel_gpu, ov::intel_gpu::memory_statistics, "memory_statistics"); wrap_property_RW(m_intel_gpu, ov::intel_gpu::enable_loop_unrolling, "enable_loop_unrolling"); + wrap_property_RW(m_intel_gpu, ov::intel_gpu::disable_winograd_convolution, "disable_winograd_convolution"); // Submodule hint (intel_gpu) py::module m_intel_gpu_hint = m_intel_gpu.def_submodule( diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py index a89f1578550cfc..f9ecbf9e8ee52c 100644 --- a/src/bindings/python/tests/test_runtime/test_properties.py +++ b/src/bindings/python/tests/test_runtime/test_properties.py @@ -328,6 +328,11 @@ def test_properties_ro(ov_property_ro, expected_value): "GPU_ENABLE_LOOP_UNROLLING", ((True, True),), ), + ( + properties.intel_gpu.disable_winograd_convolution, + "GPU_DISABLE_WINOGRAD_CONVOLUTION", + ((True, True),), + ), ( properties.intel_gpu.hint.queue_throttle, "GPU_QUEUE_THROTTLE", @@ -501,6 +506,7 @@ def test_single_property_setting(device): properties.hint.scheduling_core_type(): properties.hint.SchedulingCoreType.PCORE_ONLY, properties.hint.num_requests(): 12, "NUM_STREAMS": properties.streams.Num(5), + "ENABLE_MMAP": "NO", }, ], ) diff --git a/src/cmake/ie_parallel.cmake b/src/cmake/ie_parallel.cmake index 259e4152c21946..eb0b37762089a9 100644 --- a/src/cmake/ie_parallel.cmake +++ b/src/cmake/ie_parallel.cmake @@ -69,14 +69,7 @@ endfunction() macro(ov_find_package_tbb) if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO" AND NOT TBB_FOUND) - # conan generates TBBConfig.cmake files, which follows cmake's - # SameMajorVersion scheme, while TBB itself follows AnyNewerVersion one - # see https://cmake.org/cmake/help/latest/module/CMakePackageConfigHelpers.html#generating-a-package-version-file - if(CMAKE_TOOLCHAIN_FILE MATCHES "conan_toolchain.cmake" OR CONAN_EXPORTED) - set(_ov_minimal_tbb_version 2021.0) - else() - set(_ov_minimal_tbb_version 2017.0) - endif() + set(_ov_minimal_tbb_version 2017.0) if(NOT ENABLE_SYSTEM_TBB) if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.24) diff --git a/src/common/offline_transformations/src/compress_quantize_weigths.cpp b/src/common/offline_transformations/src/compress_quantize_weigths.cpp index 294178c979cedf..8c0faa85a84950 100644 --- a/src/common/offline_transformations/src/compress_quantize_weigths.cpp +++ b/src/common/offline_transformations/src/compress_quantize_weigths.cpp @@ -9,6 +9,7 @@ #include #include #include +#include static bool has_dequantization_subgraph(const std::shared_ptr& first_convert) { auto first_convert_users = first_convert->get_users(); @@ -65,6 +66,11 @@ ngraph::pass::CompressQuantizeWeights::CompressQuantizeWeights() { const auto& pattern_value_map = m.get_pattern_value_map(); const auto& input_type = fq->get_element_type(); + const auto& fq_data_input = fq->get_input_node_shared_ptr(0); + bool are_weights_decompressed = is_decompression(fq_data_input); + if (are_weights_decompressed) { + unmark_as_decompression(fq_data_input); + } // skip dequantize part if there is already dequantization subgraph after FakeQuantize auto fq_users = fq->get_users(); @@ -83,6 +89,9 @@ ngraph::pass::CompressQuantizeWeights::CompressQuantizeWeights() { } return true; } else { + if (are_weights_decompressed) { + mark_as_decompression(fq_data_input); + } return false; } } else { @@ -102,9 +111,6 @@ ngraph::pass::CompressQuantizeWeights::CompressQuantizeWeights() { const auto& weights_const = pattern_value_map.at(weights_const_pattern); Output input_low = pattern_value_map.at(input_low_pattern); Output input_high = pattern_value_map.at(input_high_pattern); - const auto& fq_data_input = pattern_value_map.count(weigths_convert_pattern) - ? pattern_value_map.at(weigths_convert_pattern) - : weights_const; auto quantize = fq->clone_with_new_inputs({fq_data_input, input_low, input_high, new_output_low, new_output_high}); // Convert quantized weights to low precision type @@ -115,6 +121,9 @@ ngraph::pass::CompressQuantizeWeights::CompressQuantizeWeights() { OPENVINO_SUPPRESS_DEPRECATED_END new_weights = constant; } else { + if (are_weights_decompressed) { + mark_as_decompression(fq_data_input); + } return false; } new_weights->set_friendly_name(weights_const.get_node()->get_friendly_name()); diff --git a/src/common/transformations/include/transformations/common_optimizations/align_eltwise_input_ranks.hpp b/src/common/transformations/include/transformations/common_optimizations/align_eltwise_input_ranks.hpp index 203f493b3fd66c..3b19a4b83ebe11 100644 --- a/src/common/transformations/include/transformations/common_optimizations/align_eltwise_input_ranks.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/align_eltwise_input_ranks.hpp @@ -17,7 +17,7 @@ namespace pass { class TRANSFORMATIONS_API AlignEltwiseInputRanks : public MatcherPass { public: - OPENVINO_RTTI("TRANSFORMATIONS_API", "0"); + OPENVINO_RTTI("AlignEltwiseInputRanks", "0"); AlignEltwiseInputRanks(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/eliminate_unsqueeze_gather.hpp b/src/common/transformations/include/transformations/common_optimizations/eliminate_unsqueeze_gather.hpp index f396fff6661739..cc02e475a60e53 100644 --- a/src/common/transformations/include/transformations/common_optimizations/eliminate_unsqueeze_gather.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/eliminate_unsqueeze_gather.hpp @@ -31,8 +31,10 @@ class ov::pass::EliminateUnsqueezeGather : public ov::pass::MatcherPass { /** * @ingroup ie_transformation_common_api - * @brief Remove Gather -> Unsqueeze pair, if Gather takes a scalar and - * Unsqueeze makes it a 1D tensor + * @brief Matches Gather ->[Binary Operation]-> Unsqueeze + * If axis for Gather and Unsqueeze is the same and Gather indices are scalar Unsqueeze is being removed and indices + * become 1D. Must be executed after SharedOpOptimization -- It is possible to have multiple similar Unsqueeze + * operations after Gather, so they must be optimized beforehand */ class ov::pass::EliminateGatherUnsqueeze : public ov::pass::MatcherPass { diff --git a/src/common/transformations/include/transformations/common_optimizations/matmul_const_transposes_extraction.hpp b/src/common/transformations/include/transformations/common_optimizations/matmul_const_transposes_extraction.hpp index 2edfad58df5d71..07acd276d5222a 100644 --- a/src/common/transformations/include/transformations/common_optimizations/matmul_const_transposes_extraction.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/matmul_const_transposes_extraction.hpp @@ -18,7 +18,7 @@ namespace pass { class TRANSFORMATIONS_API MatMulConstTransposesExtraction : public MatcherPass { public: - OPENVINO_RTTI("TRANSFORMATIONS_API", "0"); + OPENVINO_RTTI("MatMulConstTransposesExtraction", "0"); MatMulConstTransposesExtraction(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/nop_elimination.hpp b/src/common/transformations/include/transformations/common_optimizations/nop_elimination.hpp index 3cca0736f5bd54..6187d8ea4b6e7c 100644 --- a/src/common/transformations/include/transformations/common_optimizations/nop_elimination.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/nop_elimination.hpp @@ -23,6 +23,7 @@ class TRANSFORMATIONS_API EliminateTranspose; class TRANSFORMATIONS_API EliminateNopBroadcast; class TRANSFORMATIONS_API NopSliceBeforeGatherElements; class TRANSFORMATIONS_API NopElimination; +class TRANSFORMATIONS_API PrepareShapeOpsForEliminationAroundBE; } // namespace pass } // namespace ov @@ -124,7 +125,7 @@ class ov::pass::NopElimination : public GraphRewrite { }; /** - * @ingroup ie_transformation_comm on_api + * @ingroup ie_transformation_common_api * @brief EliminateSplit eliminates split+concat pairs which do nothing */ class ov::pass::EliminateSplitConcat : public ov::pass::MatcherPass { @@ -134,7 +135,7 @@ class ov::pass::EliminateSplitConcat : public ov::pass::MatcherPass { }; /** - * @ingroup ie_transformation_comm on_api + * @ingroup ie_transformation_common_api * @brief EliminateNopBroadcast eliminates broadcast or tile with all ones on the second input */ class ov::pass::EliminateNopBroadcast : public ov::pass::MatcherPass { @@ -144,7 +145,7 @@ class ov::pass::EliminateNopBroadcast : public ov::pass::MatcherPass { }; /** - * @ingroup ie_transformation_comm on_api + * @ingroup ie_transformation_common_api * @brief NopSliceBeforeGatherElements eliminates slice before GElements if slicing from 0 * It is valid since GatherElements doesn't support negative indices and Slice won't affect * indexing of elements in the original tensor that GatherElements would like to take @@ -154,3 +155,15 @@ class ov::pass::NopSliceBeforeGatherElements : public ov::pass::MatcherPass { OPENVINO_RTTI("NopSliceBeforeGatherElements", "0"); NopSliceBeforeGatherElements(); }; + +/** + * @ingroup ie_transformation_common_api + * @brief PrepareShapeOpsForEliminationAroundBE works on the subgraph like + * Reshape/Squeeze/Unsqueeze -> BinaryElementwiseOperation -> Reshape/Squeeze/Unsqueeze + * and prepares it for the following optimizations by moving bottom op up through Binary op + */ +class ov::pass::PrepareShapeOpsForEliminationAroundBE : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("PrepareShapeOpsForEliminationAroundBE", "0"); + PrepareShapeOpsForEliminationAroundBE(); +}; diff --git a/src/common/transformations/include/transformations/common_optimizations/optimize_strided_slice.hpp b/src/common/transformations/include/transformations/common_optimizations/optimize_strided_slice.hpp index 20cf7a0ca84884..b7f1a18b730d38 100644 --- a/src/common/transformations/include/transformations/common_optimizations/optimize_strided_slice.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/optimize_strided_slice.hpp @@ -16,6 +16,7 @@ class TRANSFORMATIONS_API StridedSliceOptimization; class TRANSFORMATIONS_API UselessStridedSliceEraser; class TRANSFORMATIONS_API SharedStridedSliceEraser; class TRANSFORMATIONS_API GroupedStridedSliceOptimizer; +class TRANSFORMATIONS_API GroupedSliceToVSplitOptimization; } // namespace pass } // namespace ov @@ -55,6 +56,18 @@ class ov::pass::GroupedStridedSliceOptimizer : public ov::pass::ModelPass { bool run_on_model(const std::shared_ptr& m) override; }; +/** + * @ingroup ie_transformation_common_api + * @brief GroupedSliceToVSplitOptimization transformation replaces group of Slice + * operations with VariadicSplit. All Slice operations must slice data + * with the same axis and step = 1. + */ +class ov::pass::GroupedSliceToVSplitOptimization : public ov::pass::ModelPass { +public: + OPENVINO_RTTI("GroupedSliceToVSplitOptimization", "0"); + bool run_on_model(const std::shared_ptr& m) override; +}; + /** * @ingroup ie_transformation_common_api * @brief StridedSliceOptimization transformation executes all transformations diff --git a/src/common/transformations/include/transformations/common_optimizations/simplify_shape_of_sub_graph.hpp b/src/common/transformations/include/transformations/common_optimizations/simplify_shape_of_sub_graph.hpp index 079b2a62764256..a14547c280f184 100644 --- a/src/common/transformations/include/transformations/common_optimizations/simplify_shape_of_sub_graph.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/simplify_shape_of_sub_graph.hpp @@ -13,7 +13,6 @@ namespace ov { namespace pass { class TRANSFORMATIONS_API SimplifyShapeOfSubGraph; -class TRANSFORMATIONS_API SharedShapeOf; class TRANSFORMATIONS_API GroupedGatherElimination; class TRANSFORMATIONS_API GatherNopElimination; class TRANSFORMATIONS_API SimplifyGatherShapeOf; @@ -22,18 +21,6 @@ class TRANSFORMATIONS_API SimplifySecondInputOfReshape; } // namespace pass } // namespace ov -/** - * @ingroup ie_transformation_common_api - * @brief SharedShapeOf transformation replaces group of ShapeOf - * operations with the first ShapeOf in this group. All ShapeOfs in this group - * must be equal and consume the same output port. - */ -class ov::pass::SharedShapeOf : public ov::pass::ModelPass { -public: - OPENVINO_RTTI("SharedShapeOf", "0"); - bool run_on_model(const std::shared_ptr& m) override; -}; - /** * @ingroup ie_transformation_common_api * @brief GroupedGatherElimination transformation replaces group of Gather diff --git a/src/common/transformations/src/transformations/common_optimizations/eliminate_unsqueeze_gather.cpp b/src/common/transformations/src/transformations/common_optimizations/eliminate_unsqueeze_gather.cpp index 9342c9ba276b25..4fb8659184878f 100644 --- a/src/common/transformations/src/transformations/common_optimizations/eliminate_unsqueeze_gather.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/eliminate_unsqueeze_gather.cpp @@ -4,8 +4,9 @@ #include "transformations/common_optimizations/eliminate_unsqueeze_gather.hpp" -#include -#include +#include +#include +#include #include #include "itt.hpp" @@ -13,26 +14,27 @@ #include "openvino/op/reshape.hpp" #include "openvino/op/unsqueeze.hpp" +using namespace ov; +using namespace ov::op; +using namespace ov::op::util; +using namespace ov::pass::pattern; + ov::pass::EliminateUnsqueezeGather::EliminateUnsqueezeGather() { MATCHER_SCOPE(EliminateUnsqueezeGather); // Remove Unsqueeze + Gather pair, if Gather gathers data by `1` dimension that was previously added by Unsqueeze - const auto unsqueezeAxis = pass::pattern::any_input(); - const auto unsqueezeInput = pass::pattern::any_input(); - const auto unsqueeze = - ngraph::pattern::wrap_type({unsqueezeInput, unsqueezeAxis}, pattern::consumers_count(1)); - const auto gatherIndices = ov::op::v0::Constant::create(ngraph::element::i64, ngraph::Shape{}, {0}); - const auto gatherAxis = pass::pattern::any_input(); - const auto gather = ngraph::pattern::wrap_type({unsqueeze, gatherIndices, gatherAxis}); - - ov::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + const auto unsqueezeAxis = any_input(); + const auto unsqueezeInput = any_input(); + const auto unsqueeze = wrap_type({unsqueezeInput, unsqueezeAxis}, consumers_count(1)); + const auto gatherIndices = v0::Constant::create(element::i64, Shape{}, {0}); + const auto gatherAxis = any_input(); + const auto gather = wrap_type({unsqueeze, gatherIndices, gatherAxis}); + + ov::matcher_pass_callback callback = [=](Matcher& m) { auto& patternValue = m.get_pattern_value_map(); - const auto& m_unsqueezeAxis = patternValue.at(unsqueezeAxis); const auto& m_gatherAxis = patternValue.at(gatherAxis); - - const auto& unsqueezeAxisNode = - ngraph::as_type_ptr(m_unsqueezeAxis.get_node_shared_ptr()); - const auto& gatherAxisNode = ngraph::as_type_ptr(m_gatherAxis.get_node_shared_ptr()); + const auto& unsqueezeAxisNode = as_type_ptr(m_unsqueezeAxis.get_node_shared_ptr()); + const auto& gatherAxisNode = as_type_ptr(m_gatherAxis.get_node_shared_ptr()); if (!unsqueezeAxisNode || !gatherAxisNode) { return false; @@ -53,57 +55,49 @@ ov::pass::EliminateUnsqueezeGather::EliminateUnsqueezeGather() { const auto& m_unsqueeze = patternValue.at(unsqueeze); const auto& m_unsqueezeInput = patternValue.at(unsqueezeInput); - ngraph::copy_runtime_info(m_gather.get_node_shared_ptr(), m_unsqueeze.get_node_shared_ptr()); + copy_runtime_info(m_gather.get_node_shared_ptr(), m_unsqueeze.get_node_shared_ptr()); m_gather.replace(m_unsqueezeInput); return true; }; - auto m = std::make_shared(gather, matcher_name); + auto m = std::make_shared(gather, matcher_name); register_matcher(m, callback); } +bool scalar_with_one_consumer(const Output& out) { + return rank_equals(0)(out) && consumers_count(1)(out); +} + ov::pass::EliminateGatherUnsqueeze::EliminateGatherUnsqueeze() { MATCHER_SCOPE(EliminateGatherUnsqueeze); - const auto are_all_outputs_unsqueezes = [](const Output& out) -> bool { - const auto& target_inputs = out.get_target_inputs(); - bool res = out.get_partial_shape().rank() == 0 && !target_inputs.empty(); - for (const auto& target_input : target_inputs) { - if (!res) { - break; - } - auto unsqueeze = ov::as_type(target_input.get_node()); - res = unsqueeze != nullptr && unsqueeze->output(0).get_partial_shape().rank() == 1; - } - return res; - }; - const auto gather_indices_label = ngraph::pattern::wrap_type(pattern::rank_equals(0)); - const auto gather_axis_label = ngraph::pattern::wrap_type(); - const auto gather_label = ngraph::pattern::wrap_type( - {pass::pattern::any_input(), gather_indices_label, gather_axis_label}, - are_all_outputs_unsqueezes); - ov::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { - auto pattern_nodes = m.get_pattern_map(); + const auto gather_label = wrap_type(scalar_with_one_consumer); + const auto be_label = wrap_type( + {gather_label, any_input()}, + scalar_with_one_consumer); + const auto or_label = std::make_shared(OutputVector{gather_label, be_label}); + const auto unsqueeze_label = wrap_type({or_label, any_input()}, rank_equals(1)); - auto& gather_indices = pattern_nodes.at(gather_indices_label); + ov::matcher_pass_callback callback = [=](Matcher& m) { + auto pattern_nodes = m.get_pattern_map(); auto& gather = pattern_nodes.at(gather_label); - const auto& target_unsqueezes = gather->output(0).get_target_inputs(); - - auto new_indices = - ov::op::util::make_try_fold(gather_indices, - ov::op::v0::Constant::create(element::i32, {1}, {1}), - false); - auto new_gather = gather->clone_with_new_inputs({gather->input_value(0), new_indices, gather->input_value(2)}); - - new_gather->set_friendly_name(gather->get_friendly_name()); - ngraph::copy_runtime_info({gather}, {new_gather, new_indices}); - for (const auto& unsqueeze : target_unsqueezes) { - unsqueeze.get_node()->output(0).replace(new_gather); - } + auto& unsqueeze = pattern_nodes.at(unsqueeze_label); + const auto& indices = op::util::make_try_fold(gather->input_value(1), + v0::Constant::create(element::i32, {1}, {1}), + false); + register_new_node(indices); + gather->input(1).replace_source_output(indices->output(0)); + copy_runtime_info({unsqueeze, gather}, {indices, gather}); + replace_output_update_name(unsqueeze->output(0), unsqueeze->input_value(0)); + + // in order to have correct shapes for other matchers in the same graph rewrite we revalidate nodes + gather->revalidate_and_infer_types(); + if (pattern_nodes.count(be_label)) + pattern_nodes.at(be_label)->revalidate_and_infer_types(); return true; }; - auto m = std::make_shared(gather_label, matcher_name); + auto m = std::make_shared(unsqueeze_label, matcher_name); register_matcher(m, callback); } diff --git a/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp b/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp index 33d7decc8eb1ac..0806a8bc55ee7c 100644 --- a/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp @@ -829,6 +829,45 @@ ov::pass::NopSliceBeforeGatherElements::NopSliceBeforeGatherElements() { register_matcher(m, matcher_pass_callback); } +ov::pass::PrepareShapeOpsForEliminationAroundBE::PrepareShapeOpsForEliminationAroundBE() { + MATCHER_SCOPE(PrepareShapeOpsForEliminationAroundBE); + auto first_label = pattern::wrap_type(pattern::rank_equals(0)); + auto other_input_label = pattern::any_input(pattern::rank_equals(0)); + auto binary_op_label = pattern::wrap_type({first_label, other_input_label}, + pattern::consumers_count(1)); + auto second_label = pattern::wrap_type({binary_op_label, pattern::any_input()}, + pattern::rank_equals(1)); + + ov::matcher_pass_callback matcher_pass_callback = [=](pattern::Matcher& m) { + const auto& pattern_to_node = m.get_pattern_map(); + + auto second_node = pattern_to_node.at(second_label); + auto binary = pattern_to_node.at(binary_op_label); + + auto lhs_node = + ov::op::util::clone_try_fold(second_node, {binary->input_value(0), second_node->input_value(1)}); + auto rhs_node = + ov::op::util::clone_try_fold(second_node, {binary->input_value(1), second_node->input_value(1)}); + + register_new_node(lhs_node); + register_new_node(rhs_node); + + binary->input(0).replace_source_output(lhs_node->output(0)); + binary->input(1).replace_source_output(rhs_node->output(0)); + binary->validate_and_infer_types(); + + ov::copy_runtime_info(second_node, {lhs_node, rhs_node}); + + replace_output_update_name(second_node->output(0), binary->output(0)); + return true; + }; + + auto m = std::make_shared(second_label, matcher_name); + register_matcher(m, matcher_pass_callback); +} + ov::pass::NopElimination::NopElimination(bool use_shape_for_elimination) { // shape-agnostic transformations ADD_MATCHER_FOR_THIS(EliminatePad) @@ -847,6 +886,7 @@ ov::pass::NopElimination::NopElimination(bool use_shape_for_elimination) { ADD_MATCHER_FOR_THIS(EliminateReshape) ADD_MATCHER_FOR_THIS(EliminateSqueeze) ADD_MATCHER_FOR_THIS(EliminateUnsqueeze) + ADD_MATCHER_FOR_THIS(PrepareShapeOpsForEliminationAroundBE) ADD_MATCHER_FOR_THIS(EliminateBroadcast) ADD_MATCHER_FOR_THIS(EliminateNopBroadcast) ADD_MATCHER_FOR_THIS(NopSliceBeforeGatherElements) diff --git a/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp b/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp index fed8479ea2d8ad..3f3d3546b3b8be 100644 --- a/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp @@ -12,6 +12,7 @@ #include "itt.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/result.hpp" +#include "openvino/op/slice.hpp" #include "openvino/op/strided_slice.hpp" #include "openvino/op/util/sub_graph_base.hpp" #include "openvino/op/variadic_split.hpp" @@ -260,6 +261,134 @@ bool ov::pass::GroupedStridedSliceOptimizer::run_on_model(const std::shared_ptr< return graph_rewritten; } +struct SliceAttrs { + int64_t start, stop, axis; +}; + +struct SliceWithAttrs { + std::shared_ptr slice; + SliceAttrs attrs; +}; + +bool slice_is_suitable_for_optimization(const std::shared_ptr& op, SliceAttrs& attrs) { + const auto& data_rank = op->get_input_partial_shape(0).rank(); + if (op->get_input_size() != 5 || data_rank.is_dynamic()) + return false; + + for (size_t i = 1; i < 5; ++i) { + auto input_as_constant = ov::as_type_ptr(op->get_input_node_shared_ptr(i)); + if (!input_as_constant) + return false; + if (shape_size(input_as_constant->get_shape()) != 1) + return false; + + int64_t value = input_as_constant->cast_vector()[0]; + + if (((i == 1 || i == 2) && value < 0) || (i == 3 && value != 1)) + return false; + else if (i == 1) + attrs.start = value; + else if (i == 2) + attrs.stop = value; + else if (i == 4) + attrs.axis = value >= 0 ? value : value + data_rank.get_length(); + } + if (attrs.axis < 0 || op->get_input_partial_shape(0)[attrs.axis].is_dynamic()) + return false; + return true; +} + +bool ov::pass::GroupedSliceToVSplitOptimization::run_on_model(const std::shared_ptr& model) { + RUN_ON_FUNCTION_SCOPE(GroupedSliceToVSplitOptimization); + bool graph_rewritten = false; + + using OutputWithAxis = std::pair, int64_t>; + + std::map> source_to_op_with_attrs; + + std::vector ordered_outputs; + for (const auto& node : model->get_ordered_ops()) { + // Recursively apply transformation for sub-graph based operations + if (auto multi_subgraph_op = std::dynamic_pointer_cast(node)) { + for (const auto& sub_graph : multi_subgraph_op->get_functions()) { + if (sub_graph) + graph_rewritten |= run_on_model(sub_graph); + } + } + if (auto op = ov::as_type_ptr(node)) { + SliceAttrs attributes{}; + if (slice_is_suitable_for_optimization(op, attributes)) { + OutputWithAxis current_output = {op->input_value(0), attributes.axis}; + source_to_op_with_attrs[current_output].push_back({op, attributes}); + if (std::find(ordered_outputs.begin(), ordered_outputs.end(), current_output) == ordered_outputs.end()) + ordered_outputs.push_back(current_output); + } + } + } + // optimizing in reverse topological order for case if such VSplit-like Slices are chained + std::reverse(ordered_outputs.begin(), ordered_outputs.end()); + for (const auto& output_with_axis : ordered_outputs) { + const auto& output = output_with_axis.first; + const auto& axis = output_with_axis.second; + auto attributes = source_to_op_with_attrs[output_with_axis]; + + std::sort(attributes.begin(), attributes.end(), [](const SliceWithAttrs& lhs, const SliceWithAttrs& rhs) { + if (lhs.attrs.start == rhs.attrs.start) + return lhs.attrs.stop < rhs.attrs.stop; + return lhs.attrs.start < rhs.attrs.start; + }); + + const int64_t& dimension = output.get_partial_shape()[axis].get_length(); + int64_t dimension_length_left = dimension; + std::vector split_lengths; + + int64_t prev_stop = 0; + bool valid_for_replacement = true; + + // they shouldn't overlap and no holes while slicing + for (auto& slice_with_attrs : attributes) { + const auto &start = slice_with_attrs.attrs.start, &stop = slice_with_attrs.attrs.stop; + if (prev_stop != start) { + valid_for_replacement = false; + break; + } + int64_t sliced = stop - start; + split_lengths.push_back((sliced > dimension_length_left ? -1 : sliced)); + dimension_length_left -= sliced; + prev_stop = stop; + } + if (!valid_for_replacement) + continue; + if (std::count(split_lengths.begin(), split_lengths.end(), -1) > 1) + continue; + + int64_t current_sum = 0; + for (const auto& i : split_lengths) + if (i != -1) + current_sum += i; + for (auto& i : split_lengths) + if (i == -1) { + i = dimension - current_sum; + current_sum = dimension; // we resolve -1 into actual value since we can use shape data + } + if (current_sum != dimension) + continue; + auto split_lengths_const = + op::v0::Constant::create(ngraph::element::i64, ngraph::Shape{split_lengths.size()}, split_lengths); + auto axis_const = op::v0::Constant::create(ngraph::element::i64, ngraph::Shape{}, {axis}); + auto variadic_split = std::make_shared(output, axis_const, split_lengths_const); + + auto i = 0; + for (auto& slice_with_attrs : attributes) { + graph_rewritten |= + ov::replace_output_update_name(slice_with_attrs.slice->output(0), variadic_split->output(i)); + ov::copy_runtime_info(slice_with_attrs.slice, variadic_split); + ++i; + } + } + return graph_rewritten; +} + ov::pass::StridedSliceOptimization::StridedSliceOptimization(bool use_shapes) { m_use_shapes = use_shapes; } @@ -278,6 +407,7 @@ bool ov::pass::StridedSliceOptimization::run_on_model(const std::shared_ptr #include +#include +#include #include #include +#include #include +#include #include +#include #include #include @@ -17,15 +22,17 @@ using namespace std; using namespace ov; using namespace ov::op; -bool shared_node_optimization(const shared_ptr& model, - const unordered_map& rules) { +namespace { +using rules_t = unordered_map; + +bool shared_node_optimization(const shared_ptr& model, const rules_t& rules) { bool rewritten = false; for (const auto& op : model->get_ordered_ops()) { // Recursively apply transformation for sub-graph based operations if (auto multi_subgraph_op = dynamic_pointer_cast(op)) { - for (size_t i = 0; i < multi_subgraph_op->get_internal_subgraphs_size(); i++) { - if (auto sub_graph = multi_subgraph_op->get_function(i)) + for (const auto& sub_graph : multi_subgraph_op->get_functions()) { + if (sub_graph) rewritten |= shared_node_optimization(sub_graph, rules); } } @@ -34,17 +41,15 @@ bool shared_node_optimization(const shared_ptr& model, if (target_inputs.size() <= 1) continue; // nothing to optimize unordered_map> type_to_node; - for (const auto& input : target_inputs) { - auto node = input.get_node(); - if (node && rules.count(node->get_type_info())) - type_to_node[node->get_type_info()].push_back(node); - } + for (const auto& input : target_inputs) + if (auto node = input.get_node()) + if (rules.count(node->get_type_info())) + type_to_node[node->get_type_info()].push_back(node); for (auto& item : type_to_node) { - const auto& shared_nodes = item.second; + auto& shared_nodes = item.second; if (shared_nodes.size() < 2) continue; - const auto& ops_type = item.first; - const auto& are_equal = rules.at(ops_type); + const auto& are_equal = rules.at(item.first); std::vector visited_nodes(shared_nodes.size(), false); for (size_t i = 0; i < visited_nodes.size(); ++i) { @@ -71,6 +76,8 @@ bool shared_node_optimization(const shared_ptr& model, bool inputs_from_same_source_or_equal_constants(const Node* lhs, const Node* rhs) { if (lhs->get_input_size() != rhs->get_input_size()) return false; + if (lhs->get_type_info() != rhs->get_type_info()) + return false; size_t input_size = lhs->get_input_size(); for (size_t i = 0; i < input_size; ++i) { if (lhs->input_value(i) == rhs->input_value(i)) @@ -122,21 +129,86 @@ bool reshapes_are_equal(const Node* lhs, const Node* rhs) { inputs_from_same_source_or_equal_constants(lhs, rhs); } +bool shapeof_are_equal(const Node* lhs, const Node* rhs) { + auto lhs_output_et = element::i64, rhs_output_et = element::i64; + if (const auto shape = as_type(lhs)) { + lhs_output_et = shape->get_output_type(); + } else if (!as_type(lhs)) { + return false; + } + if (const auto shape = as_type(rhs)) { + rhs_output_et = shape->get_output_type(); + } else if (!as_type(rhs)) { + return false; + } + return lhs_output_et == rhs_output_et && inputs_from_same_source_or_equal_constants(lhs, rhs); +} + +bool gathers_are_equal(const Node* lhs, const Node* rhs) { + const auto l_gather = as_type(lhs); + const auto r_gather = as_type(rhs); + if (!l_gather || !r_gather) + return false; + return l_gather->get_batch_dims() == r_gather->get_batch_dims() && + inputs_from_same_source_or_equal_constants(lhs, rhs); +} + +bool converts_are_equal(const Node* lhs, const Node* rhs) { + const auto l_convert = as_type(lhs); + const auto r_convert = as_type(rhs); + if (!l_convert || !r_convert) + return false; + return l_convert->get_destination_type() == r_convert->get_destination_type() && + inputs_from_same_source_or_equal_constants(lhs, rhs); +} + +bool shape_of_upgrade(const shared_ptr& model) { + bool rewritten = false; + for (const auto& op : model->get_ordered_ops()) { + // Recursively apply transformation for sub-graph based operations + if (auto multi_subgraph_op = dynamic_pointer_cast(op)) { + for (const auto& sub_graph : multi_subgraph_op->get_functions()) { + if (sub_graph) + rewritten |= shape_of_upgrade(sub_graph); + } + } else if (auto v1_shape_of = ov::as_type_ptr(op)) { + auto v3_shape_of = std::make_shared(v1_shape_of->input_value(0), element::i64); + v3_shape_of->set_friendly_name(v1_shape_of->get_friendly_name()); + ov::replace_output_update_name(v1_shape_of, v3_shape_of); + rewritten = true; + } + } + return rewritten; +} + +} // namespace bool pass::SharedOpOptimization::run_on_model(const shared_ptr& model) { RUN_ON_FUNCTION_SCOPE(SharedOpOptimization); +#define RECORD_NO_ATTRIBUTES(operation) \ + { operation::get_type_info_static(), inputs_from_same_source_or_equal_constants } #define RECORD(operation, func) \ { operation::get_type_info_static(), func } - const unordered_map rules = { + const rules_t rules = { // no attributes - RECORD(v8::Slice, inputs_from_same_source_or_equal_constants), - RECORD(v0::Tile, inputs_from_same_source_or_equal_constants), + RECORD_NO_ATTRIBUTES(v8::Slice), + RECORD_NO_ATTRIBUTES(v0::Squeeze), + RECORD_NO_ATTRIBUTES(v0::Tile), + RECORD_NO_ATTRIBUTES(v0::Unsqueeze), // with attributes RECORD(v0::Concat, concats_are_equal), + RECORD(v0::Convert, converts_are_equal), + RECORD(v1::Gather, gathers_are_equal), + RECORD(v7::Gather, gathers_are_equal), + RECORD(v8::Gather, gathers_are_equal), RECORD(v6::GatherElements, gather_elements_are_equal), RECORD(v1::Reshape, reshapes_are_equal), - + RECORD(v0::ShapeOf, shapeof_are_equal), + RECORD(v3::ShapeOf, shapeof_are_equal), }; // TODO: use visit_attributes to uniformly perform attributes check in the future and get rid of rules table - return shared_node_optimization(model, rules); + + bool rewritten = shape_of_upgrade(model); + rewritten |= shared_node_optimization(model, rules); + return rewritten; } diff --git a/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp b/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp index e63d9f88ed88ce..aa490f99c31566 100644 --- a/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp @@ -3,12 +3,12 @@ // #include -#include -#include #include +#include #include #include #include +#include #include #include #include @@ -21,52 +21,17 @@ #include "openvino/op/gather.hpp" #include "openvino/op/reshape.hpp" #include "openvino/op/shape_of.hpp" +#include "transformations/common_optimizations/shared_ops_optimization.hpp" -static constexpr size_t index_for_int32 = 0; -static constexpr size_t index_for_int64 = 1; - -bool ov::pass::SharedShapeOf::run_on_model(const std::shared_ptr& f) { - RUN_ON_FUNCTION_SCOPE(SharedShapeOf); - bool graph_rewritten = false; - - std::map, std::vector>> source_to_shape_of; - for (const auto& node : f->get_ordered_ops()) { - // Recursively apply transformation for sub-graph based operations - if (auto sub_graph_node = std::dynamic_pointer_cast(node)) - if (auto sub_graph = sub_graph_node->get_function()) - graph_rewritten |= run_on_model(sub_graph); - - if (ov::is_type(node) || ov::is_type(node)) - source_to_shape_of[node->input_value(0)].push_back(node); - } - - for (const auto& pair : source_to_shape_of) { - if (pair.second.size() < 2) - continue; - - NodeVector nodes_for_different_types[2]; - for (const auto& child : pair.second) { - const auto& type_of_output = child->get_output_element_type(0); - size_t index = (type_of_output == element::i32) ? index_for_int32 : index_for_int64; - nodes_for_different_types[index].push_back(child); - } - for (const auto& v : nodes_for_different_types) { - if (v.empty()) - continue; - const auto& root_ss = v[0]; - for (const auto& child_ss : v) - if (root_ss->get_instance_id() != child_ss->get_instance_id()) - graph_rewritten |= replace_output_update_name(child_ss->output(0), root_ss->output(0)); - } - } - return graph_rewritten; -} +using namespace ov; +using namespace ov::op; +using namespace ov::pass::pattern; -ov::pass::GroupedGatherElimination::GroupedGatherElimination() { +pass::GroupedGatherElimination::GroupedGatherElimination() { MATCHER_SCOPE(GroupedGatherElimination); - auto concat_label = ngraph::pattern::wrap_type(pattern::rank_equals(1)); + auto concat_label = wrap_type(rank_equals(1)); - ov::matcher_pass_callback callback = [=](pattern::Matcher& m) { + matcher_pass_callback callback = [=](Matcher& m) { auto concat = m.get_match_root(); OutputVector inputs = concat->input_values(); NodeVector new_ops; @@ -74,8 +39,7 @@ ov::pass::GroupedGatherElimination::GroupedGatherElimination() { while (inputs.size() > i + 1) { auto curr = inputs[i].get_node_shared_ptr(), next = inputs[i + 1].get_node_shared_ptr(); if (curr->get_type_info() != next->get_type_info() || - (!ov::is_type(curr) && !ov::is_type(curr) && - !ov::is_type(curr)) || + (!is_type(curr) && !is_type(curr) && !is_type(curr)) || (curr->input_value(0) != next->input_value(0))) { ++i; continue; @@ -89,25 +53,21 @@ ov::pass::GroupedGatherElimination::GroupedGatherElimination() { } // curr and next are the same type of gather which takes data from the same source - auto joint_indices = ov::op::util::make_try_fold( - OutputVector{curr->input_value(1), next->input_value(1)}, - 0); + auto joint_indices = + op::util::make_try_fold(OutputVector{curr->input_value(1), next->input_value(1)}, 0); std::shared_ptr new_gather; - if (ov::is_type(curr)) { - new_gather = register_new_node( - curr->input_value(0), - joint_indices->output(0), - ov::op::v0::Constant::create(element::i64, {}, {0})->output(0)); - } else if (ov::is_type(curr)) { - new_gather = register_new_node( - curr->input_value(0), - joint_indices->output(0), - ov::op::v0::Constant::create(element::i64, {}, {0})->output(0)); - } else if (ov::is_type(curr)) { - new_gather = register_new_node( - curr->input_value(0), - joint_indices->output(0), - ov::op::v0::Constant::create(element::i64, {}, {0})->output(0)); + if (is_type(curr)) { + new_gather = register_new_node(curr->input_value(0), + joint_indices->output(0), + v0::Constant::create(element::i64, {}, {0})->output(0)); + } else if (is_type(curr)) { + new_gather = register_new_node(curr->input_value(0), + joint_indices->output(0), + v0::Constant::create(element::i64, {}, {0})->output(0)); + } else if (is_type(curr)) { + new_gather = register_new_node(curr->input_value(0), + joint_indices->output(0), + v0::Constant::create(element::i64, {}, {0})->output(0)); } else { OPENVINO_THROW("Unexpected Gather version"); } @@ -116,31 +76,29 @@ ov::pass::GroupedGatherElimination::GroupedGatherElimination() { inputs.erase(inputs.begin() + i); inputs[i] = new_gather->output(0); } - ngraph::copy_runtime_info(concat, new_ops); + ov::copy_runtime_info(concat, new_ops); if (inputs.size() == 1) // we can optimize out concat return replace_output_update_name(concat->output(0), inputs[0]); if (original_inputs_size > inputs.size()) { - auto new_concat = std::make_shared(inputs, 0); + auto new_concat = std::make_shared(inputs, 0); new_concat->set_friendly_name(concat->get_friendly_name()); - ngraph::copy_runtime_info(concat, new_concat); - ngraph::replace_node(concat, new_concat); + ov::copy_runtime_info(concat, new_concat); + ov::replace_node(concat, new_concat); return true; } return false; }; - auto m = std::make_shared(concat_label, matcher_name); + auto m = std::make_shared(concat_label, matcher_name); this->register_matcher(m, callback); } -ov::pass::GatherNopElimination::GatherNopElimination() { +pass::GatherNopElimination::GatherNopElimination() { MATCHER_SCOPE(GatherNopElimination); - const auto gather_label = - ngraph::pattern::wrap_type({pass::pattern::any_input(pattern::has_static_shape()), - ngraph::pattern::wrap_type(), - ngraph::pattern::wrap_type()}); + const auto gather_label = wrap_type( + {any_input(has_static_shape()), wrap_type(), wrap_type()}); - ov::matcher_pass_callback callback = [](pattern::Matcher& m) { + matcher_pass_callback callback = [](Matcher& m) { auto gather = m.get_match_root(); const auto& number_of_indices = shape_size(gather->get_input_shape(1)); if (gather->get_input_shape(0) != gather->get_output_shape(0) || shape_size(gather->get_input_shape(2)) != 1 || @@ -157,55 +115,52 @@ ov::pass::GatherNopElimination::GatherNopElimination() { } return replace_output_update_name(gather->output(0), gather->input_value(0)); }; - auto m = std::make_shared(gather_label, matcher_name); + auto m = std::make_shared(gather_label, matcher_name); this->register_matcher(m, callback); } -ov::pass::SimplifyGatherShapeOf::SimplifyGatherShapeOf() { +pass::SimplifyGatherShapeOf::SimplifyGatherShapeOf() { MATCHER_SCOPE(SimplifyGatherShapeOf); - const auto gather_pattern = ngraph::pattern::wrap_type(); - const auto shape_of_pattern = - ngraph::pattern::wrap_type({gather_pattern}); + const auto gather_pattern = wrap_type(); + const auto shape_of_pattern = wrap_type({gather_pattern}); - ov::matcher_pass_callback callback = [](pattern::Matcher& m) { + matcher_pass_callback callback = [](Matcher& m) { auto node = m.get_match_root(); - auto gather = ov::as_type_ptr(node->input_value(0).get_node_shared_ptr()); + auto gather = as_type_ptr(node->input_value(0).get_node_shared_ptr()); if (!gather) { return false; } auto gather_in_rank = gather->get_input_partial_shape(0).rank(); auto indices_rank = gather->get_input_partial_shape(1).rank(); auto axis = gather->get_axis(); - if (gather_in_rank.is_dynamic() || indices_rank.is_dynamic() || - axis == ov::op::v1::Gather::AXIS_NOT_SET_VALUE) { + if (gather_in_rank.is_dynamic() || indices_rank.is_dynamic() || axis == v1::Gather::AXIS_NOT_SET_VALUE) { return false; } - auto zero_axis = ov::op::v0::Constant::create(element::i64, Shape{}, {0}); + auto zero_axis = v0::Constant::create(element::i64, Shape{}, {0}); NodeVector new_ops; - auto new_shapeof = - std::make_shared(gather->input_value(0), node->get_output_element_type(0)); + auto new_shapeof = std::make_shared(gather->input_value(0), node->get_output_element_type(0)); new_ops.push_back(new_shapeof); std::shared_ptr replace_op; if (indices_rank.get_length() == 0) { std::vector vi(gather_in_rank.get_length()); std::iota(vi.begin(), vi.end(), 0); vi.erase(vi.begin() + axis); - auto new_indices = ov::op::v0::Constant::create(element::i64, Shape{vi.size()}, vi); - replace_op = std::make_shared(new_shapeof, new_indices, zero_axis); + auto new_indices = v0::Constant::create(element::i64, Shape{vi.size()}, vi); + replace_op = std::make_shared(new_shapeof, new_indices, zero_axis); new_ops.push_back(replace_op); } else { NodeVector concat_inputs; if (axis > 0) { std::vector vi(axis); std::iota(vi.begin(), vi.end(), 0); - auto indices = ov::op::v0::Constant::create(element::i64, Shape{vi.size()}, vi); - auto new_gather = std::make_shared(new_shapeof, indices, zero_axis); + auto indices = v0::Constant::create(element::i64, Shape{vi.size()}, vi); + auto new_gather = std::make_shared(new_shapeof, indices, zero_axis); new_ops.push_back(new_gather); concat_inputs.push_back(new_gather); } auto shapeof_indices = - std::make_shared(gather->input_value(1), node->get_output_element_type(0)); + std::make_shared(gather->input_value(1), node->get_output_element_type(0)); new_ops.push_back(shapeof_indices); concat_inputs.push_back(shapeof_indices); @@ -213,12 +168,12 @@ ov::pass::SimplifyGatherShapeOf::SimplifyGatherShapeOf() { if (gather_in_rank.get_length() - 1 > axis) { std::vector vi(gather_in_rank.get_length() - (axis + 1)); std::iota(vi.begin(), vi.end(), axis + 1); - auto indices = ov::op::v0::Constant::create(element::i64, Shape{vi.size()}, vi); - auto new_gather = std::make_shared(new_shapeof, indices, zero_axis); + auto indices = v0::Constant::create(element::i64, Shape{vi.size()}, vi); + auto new_gather = std::make_shared(new_shapeof, indices, zero_axis); new_ops.push_back(new_gather); concat_inputs.push_back(new_gather); } - replace_op = std::make_shared(concat_inputs, 0); + replace_op = std::make_shared(concat_inputs, 0); new_ops.push_back(replace_op); } replace_op->set_friendly_name(node->get_friendly_name()); @@ -227,27 +182,27 @@ ov::pass::SimplifyGatherShapeOf::SimplifyGatherShapeOf() { return true; }; - auto m = std::make_shared(shape_of_pattern, matcher_name); + auto m = std::make_shared(shape_of_pattern, matcher_name); this->register_matcher(m, callback); } -ov::pass::SimplifySecondInputOfReshape::SimplifySecondInputOfReshape() { +pass::SimplifySecondInputOfReshape::SimplifySecondInputOfReshape() { MATCHER_SCOPE(SimplifySecondInputOfReshape); - const auto input = pattern::any_input(); + const auto input = any_input(); auto has_static_1d_shape = [](const Output& output) { - return pattern::has_static_shape()(output) && pattern::rank_equals(1)(output); + return has_static_shape()(output) && rank_equals(1)(output); }; - const auto concat = pattern::wrap_type(has_static_1d_shape); - const auto reshape_pattern = pattern::wrap_type({input, concat}); + const auto concat = wrap_type(has_static_1d_shape); + const auto reshape_pattern = wrap_type({input, concat}); - ov::matcher_pass_callback callback = [=](pattern::Matcher& m) { + matcher_pass_callback callback = [=](Matcher& m) { auto node = m.get_match_root(); - const auto reshape = as_type_ptr(node); + const auto reshape = as_type_ptr(node); if (!reshape || reshape->get_special_zero() == false) { return false; } - const auto concat = as_type_ptr(reshape->get_input_node_shared_ptr(1)); + const auto concat = as_type_ptr(reshape->get_input_node_shared_ptr(1)); if (!concat) return false; @@ -255,14 +210,14 @@ ov::pass::SimplifySecondInputOfReshape::SimplifySecondInputOfReshape() { OPENVINO_ASSERT(concat_axis == 0 || concat_axis == -1, "axis is not valid for matched Concat with 1D output"); auto data = m.get_pattern_value_map().at(input); - if (is_type(data.get_node_shared_ptr()) || + if (is_type(data.get_node_shared_ptr()) || op::util::is_unary_elementwise_arithmetic(data.get_node_shared_ptr())) { data = data.get_node_shared_ptr()->input_value(0); } auto check_shape_of_gather = [&](const std::shared_ptr& gather) { auto shape_of = gather->get_input_node_shared_ptr(0); - if (!is_type(shape_of) && !is_type(shape_of)) { + if (!is_type(shape_of) && !is_type(shape_of)) { return false; } return shape_of->input_value(0) == data; @@ -284,7 +239,7 @@ ov::pass::SimplifySecondInputOfReshape::SimplifySecondInputOfReshape() { // that change the arrangement of dimensions in the reshape pattern for (auto& concat_input : new_concat_inputs) { if (const auto gather = as_type_ptr(concat_input.get_node_shared_ptr())) { - auto indices_constant = as_type_ptr(gather->get_input_node_shared_ptr(1)); + auto indices_constant = as_type_ptr(gather->get_input_node_shared_ptr(1)); if (!indices_constant || !check_shape_of_gather(gather)) { update_expected_gather_location(gather); continue; @@ -302,7 +257,7 @@ ov::pass::SimplifySecondInputOfReshape::SimplifySecondInputOfReshape() { if (gather_can_be_fused) { const size_t num_of_unchanged_dimensions = indices.size(); const auto subgraph_et = gather->get_input_element_type(0); - concat_input = ov::op::v0::Constant::create(subgraph_et, Shape{num_of_unchanged_dimensions}, {0}); + concat_input = v0::Constant::create(subgraph_et, Shape{num_of_unchanged_dimensions}, {0}); gather_folded = true; } } else { @@ -314,7 +269,7 @@ ov::pass::SimplifySecondInputOfReshape::SimplifySecondInputOfReshape() { return false; } - const auto new_concat = op::util::make_try_fold(new_concat_inputs, concat_axis); + const auto new_concat = op::util::make_try_fold(new_concat_inputs, concat_axis); new_concat->set_friendly_name(concat->get_friendly_name()); copy_runtime_info(concat, new_concat); @@ -326,18 +281,17 @@ ov::pass::SimplifySecondInputOfReshape::SimplifySecondInputOfReshape() { return true; }; - auto m = std::make_shared(reshape_pattern, matcher_name); + auto m = std::make_shared(reshape_pattern, matcher_name); this->register_matcher(m, callback); } -bool ov::pass::SimplifyShapeOfSubGraph::run_on_model(const std::shared_ptr& f) { +bool pass::SimplifyShapeOfSubGraph::run_on_model(const std::shared_ptr& f) { RUN_ON_FUNCTION_SCOPE(SimplifyShapeOfSubGraph); - ov::pass::Manager manager; + Manager manager; manager.set_per_pass_validation(false); - using namespace ov::pass; - REGISTER_PASS(manager, EliminateGatherUnsqueeze) - REGISTER_PASS(manager, SharedShapeOf) + REGISTER_PASS(manager, SharedOpOptimization) + REGISTER_PASS(manager, EliminateGatherUnsqueeze) // should run after SharedOpOptimization REGISTER_PASS(manager, GroupedGatherElimination) // GatherNopElimination depends on shape, so it requires shape propagation // if previous transformations has resolved some dynamic shapes. diff --git a/src/common/transformations/tests/common_optimizations/eliminate_unsqueeze_gather.cpp b/src/common/transformations/tests/common_optimizations/eliminate_unsqueeze_gather.cpp index 2d19223305d1a9..64925b5a7fcd26 100644 --- a/src/common/transformations/tests/common_optimizations/eliminate_unsqueeze_gather.cpp +++ b/src/common/transformations/tests/common_optimizations/eliminate_unsqueeze_gather.cpp @@ -3,75 +3,217 @@ // #include -#include -#include #include +#include +#include #include +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" + +using namespace ov; +using namespace ov::op; namespace { -using TensorType = ngraph::element::Type_t; -using TensorShape = ngraph::Shape; +using TensorType = ov::element::Type_t; +using TensorShape = ov::Shape; -class EliminateUnsqueezeGatherTest : public ov::test::TestsCommon, +class EliminateUnsqueezeGatherTest : public TransformationTestsF, public testing::WithParamInterface> { public: void SetUp() override { + TransformationTestsF::SetUp(); const auto& parameters = GetParam(); const auto& inType = std::get<0>(parameters); const auto& inShape = std::get<1>(parameters); const auto& axis = std::get<2>(parameters); - - ngraph::helpers::CompareFunctions(*transform(inShape, inType, axis), *reference(inShape, inType, axis)); + model = transform(inShape, inType, axis); + model_ref = reference(inShape, inType); + manager.register_pass(); } protected: - std::shared_ptr transform(const TensorShape& inShape, - const TensorType& inType, - size_t axis) { - const auto parameter = std::make_shared(inType, inShape); + static std::shared_ptr transform(const TensorShape& inShape, const TensorType& inType, size_t axis) { + const auto parameter = std::make_shared(inType, inShape); + const auto unsqueeze = + std::make_shared(parameter, v0::Constant::create(element::i64, Shape{1}, {axis})); + const auto gather = std::make_shared(unsqueeze, + v0::Constant::create(element::i64, Shape{1}, {0}), + v0::Constant::create(element::i64, Shape{1}, {axis})); + const auto relu = std::make_shared(gather); + return std::make_shared(NodeVector{relu}, ParameterVector{parameter}, "Actual"); + } - const auto unsqueeze = std::make_shared( - parameter, - ngraph::opset6::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {axis})); + static std::shared_ptr reference(const TensorShape& inShape, const TensorType& inType) { + const auto parameter = std::make_shared(inType, inShape); + const auto relu = std::make_shared(parameter); + return std::make_shared(NodeVector{relu}, ParameterVector{parameter}, "Reference"); + } +}; - const auto gather = std::make_shared( - unsqueeze, - ngraph::opset6::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0}), - ngraph::opset6::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {axis})); +TEST_P(EliminateUnsqueezeGatherTest, CompareFunctions) {} - const auto function = std::make_shared(ngraph::NodeVector{gather}, - ngraph::ParameterVector{parameter}, - "Actual"); +INSTANTIATE_TEST_SUITE_P( + smoke_NGraph, + EliminateUnsqueezeGatherTest, + testing::Combine(testing::Values(element::f16, element::f32, element::i32, element::i64, element::u8), + testing::Values(TensorShape{3, 128, 256}), + testing::Values(0, 1, 2, 3))); - ngraph::pass::Manager manager; - manager.register_pass(); - manager.run_passes(function); +} // namespace - return function; +TEST_F(TransformationTestsF, GatherUnsqueeze) { + { + auto data = std::make_shared(element::dynamic, PartialShape{-1}); + auto indices = std::make_shared(element::dynamic, PartialShape{}); + auto axis = v0::Constant::create(element::i32, Shape{}, {0}); + + auto gather = std::make_shared(data, indices, axis); + auto unsqueeze = std::make_shared(gather, axis); + const auto relu = std::make_shared(unsqueeze); + model = std::make_shared(OutputVector{relu}, ParameterVector{data, indices}); + manager.register_pass(); + } + { + auto data = std::make_shared(element::dynamic, PartialShape{-1}); + auto indices = std::make_shared(element::dynamic, PartialShape{}); + auto axis = v0::Constant::create(element::i32, Shape{}, {0}); + + auto updated_indices = + std::make_shared(indices, v0::Constant::create(element::i32, {1}, {1}), false); + auto gather = std::make_shared(data, updated_indices, axis); + const auto relu = std::make_shared(gather); + model_ref = std::make_shared(OutputVector{relu}, ParameterVector{data, indices}); + } +} + +TEST_F(TransformationTestsF, GatherUnsqueezeReshape) { + { + auto data = std::make_shared(element::dynamic, PartialShape{-1}); + auto indices = std::make_shared(element::dynamic, PartialShape{}); + auto axis = v0::Constant::create(element::i32, Shape{}, {0}); + + auto gather = std::make_shared(data, indices, axis); + auto unsqueeze = + std::make_shared(gather, v0::Constant::create(element::i32, Shape{1}, {1}), false); + const auto relu = std::make_shared(unsqueeze); + model = std::make_shared(OutputVector{relu}, ParameterVector{data, indices}); + manager.register_pass(); } + { + auto data = std::make_shared(element::dynamic, PartialShape{-1}); + auto indices = std::make_shared(element::dynamic, PartialShape{}); + auto axis = v0::Constant::create(element::i32, Shape{}, {0}); + + auto updated_indices = + std::make_shared(indices, v0::Constant::create(element::i32, {1}, {1}), false); + auto gather = std::make_shared(data, updated_indices, axis); + const auto relu = std::make_shared(gather); + model_ref = std::make_shared(OutputVector{relu}, ParameterVector{data, indices}); + } +} + +TEST_F(TransformationTestsF, GatherUnsqueezeMul) { + { + auto data = std::make_shared(element::dynamic, PartialShape{-1}); + auto indices = std::make_shared(element::dynamic, PartialShape{}); + auto axis = v0::Constant::create(element::i32, Shape{}, {0}); + + auto gather = std::make_shared(data, indices, axis); - std::shared_ptr reference(const TensorShape& inShape, - const TensorType& inType, - size_t axis) { - const auto parameter = std::make_shared(inType, inShape); + auto scalar = std::make_shared(element::dynamic, PartialShape{}); + auto bea = std::make_shared(gather, scalar); - return std::make_shared(ngraph::NodeVector{parameter}, - ngraph::ParameterVector{parameter}, - "Reference"); + auto unsqueeze = std::make_shared(bea, axis); + const auto relu = std::make_shared(unsqueeze); + model = std::make_shared(OutputVector{relu}, ParameterVector{data, indices, scalar}); + manager.register_pass(); } -}; + { + auto data = std::make_shared(element::dynamic, PartialShape{-1}); + auto indices = std::make_shared(element::dynamic, PartialShape{}); + auto axis = v0::Constant::create(element::i32, Shape{}, {0}); -TEST_P(EliminateUnsqueezeGatherTest, CompareFunctions) {} + auto updated_indices = + std::make_shared(indices, v0::Constant::create(element::i32, {1}, {1}), false); + auto gather = std::make_shared(data, updated_indices, axis); -INSTANTIATE_TEST_SUITE_P(smoke_NGraph, - EliminateUnsqueezeGatherTest, - testing::Combine(testing::Values(ngraph::element::f16, - ngraph::element::f32, - ngraph::element::i32, - ngraph::element::i64, - ngraph::element::u8), - testing::Values(TensorShape{3, 128, 256}), - testing::Values(0, 1, 2, 3))); + auto scalar = std::make_shared(element::dynamic, PartialShape{}); + auto bea = std::make_shared(gather, scalar); -} // namespace + const auto relu = std::make_shared(bea); + model_ref = std::make_shared(OutputVector{relu}, ParameterVector{data, indices, scalar}); + } +} + +TEST_F(TransformationTestsF, GatherUnsqueezesMul) { + { + auto data = std::make_shared(element::dynamic, PartialShape{-1}); + auto indices = std::make_shared(element::dynamic, PartialShape{}); + auto axis = v0::Constant::create(element::i32, Shape{}, {0}); + + auto gather = std::make_shared(data, indices, axis); + + auto scalar = std::make_shared(element::dynamic, PartialShape{}); + auto bea = std::make_shared(gather, scalar); + + auto unsqueeze_0 = std::make_shared(bea, axis); + auto unsqueeze_1 = std::make_shared(bea, axis); + auto unsqueeze_2 = std::make_shared(bea, axis); + + auto concat = std::make_shared(OutputVector{unsqueeze_0, unsqueeze_1, unsqueeze_2}, 0); + + model = std::make_shared(OutputVector{concat}, ParameterVector{data, indices, scalar}); + manager.register_pass(); + } + { + auto data = std::make_shared(element::dynamic, PartialShape{-1}); + auto indices = std::make_shared(element::dynamic, PartialShape{}); + auto axis = v0::Constant::create(element::i32, Shape{}, {0}); + + auto updated_indices = + std::make_shared(indices, v0::Constant::create(element::i32, {1}, {1}), false); + auto gather = std::make_shared(data, updated_indices, axis); + + auto scalar = std::make_shared(element::dynamic, PartialShape{}); + auto bea = std::make_shared(gather, scalar); + + auto concat = std::make_shared(OutputVector{bea, bea, bea}, 0); + + model_ref = std::make_shared(OutputVector{concat}, ParameterVector{data, indices, scalar}); + } +} +TEST_F(TransformationTestsF, GatherUnsqueezes) { + { + auto data = std::make_shared(element::dynamic, PartialShape{-1}); + auto indices = std::make_shared(element::dynamic, PartialShape{}); + auto axis = v0::Constant::create(element::i32, Shape{1}, {0}); + + auto gather = std::make_shared(data, indices, axis); + + auto unsqueeze_0 = std::make_shared(gather, axis); + auto unsqueeze_1 = std::make_shared(gather, axis); + auto unsqueeze_2 = std::make_shared(gather, axis); + + auto concat = std::make_shared(OutputVector{unsqueeze_0, unsqueeze_1, unsqueeze_2, axis}, 0); + + model = std::make_shared(OutputVector{concat}, ParameterVector{data, indices}); + manager.register_pass(); + manager.register_pass(); + } + { + auto data = std::make_shared(element::dynamic, PartialShape{-1}); + auto indices = std::make_shared(element::dynamic, PartialShape{}); + auto axis = v0::Constant::create(element::i32, Shape{1}, {0}); + + auto updated_indices = + std::make_shared(indices, v0::Constant::create(element::i32, {1}, {1}), false); + auto gather = std::make_shared(data, updated_indices, axis); + + auto concat = std::make_shared(OutputVector{gather, gather, gather, axis}, 0); + + model_ref = std::make_shared(OutputVector{concat}, ParameterVector{data, indices}); + } +} diff --git a/src/common/transformations/tests/common_optimizations/nop_elimination.cpp b/src/common/transformations/tests/common_optimizations/nop_elimination.cpp index 0e1adebc1d6fb7..3f234a37b1925a 100644 --- a/src/common/transformations/tests/common_optimizations/nop_elimination.cpp +++ b/src/common/transformations/tests/common_optimizations/nop_elimination.cpp @@ -1422,3 +1422,28 @@ TEST_F(TransformationTestsF, NopSliceBeforeGatherElements) { model_ref = std::make_shared(ResultVector{result}, ParameterVector{data, indices}); } } + +TEST_F(TransformationTestsF, SqueezeBinaryReshape) { + { + auto data = std::make_shared(element::f32, PartialShape{1}); + + auto axis = op::v0::Constant::create(element::i32, Shape{1}, {0}); + auto squeeze = std::make_shared(data, axis); + + auto binary = + std::make_shared(squeeze, op::v0::Constant::create(element::f32, Shape{}, {0.2})); + + auto reshape = + std::make_shared(binary, op::v0::Constant::create(element::i32, Shape{1}, {1}), false); + + auto relu = std::make_shared(reshape); + model = std::make_shared(OutputVector{relu}, ParameterVector{data}); + manager.register_pass(); + } + { + auto data = std::make_shared(element::f32, PartialShape{1}); + auto binary = std::make_shared(data, op::v0::Constant::create(element::f32, Shape{1}, {0.2})); + auto relu = std::make_shared(binary); + model_ref = std::make_shared(OutputVector{relu}, ParameterVector{data}); + } +} diff --git a/src/common/transformations/tests/common_optimizations/optimize_strided_slice_test.cpp b/src/common/transformations/tests/common_optimizations/optimize_strided_slice_test.cpp index a02d0f02ac45d2..e7b71560615e1e 100644 --- a/src/common/transformations/tests/common_optimizations/optimize_strided_slice_test.cpp +++ b/src/common/transformations/tests/common_optimizations/optimize_strided_slice_test.cpp @@ -1015,3 +1015,179 @@ TEST_F(TransformationTestsF, SliceToStridedSlice_slice_all_use_shapes_false) { comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); } + +ov::Output make_slice(const ov::Output& out, + const int64_t& start, + const int64_t& stop, + const int64_t& step, + const int64_t& axis) { + return std::make_shared(out, + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {start}), + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {stop}), + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {step}), + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {axis})); +} + +ov::OutputVector make_vsplit(const ov::Output& out, + const int64_t& axis, + const std::vector& split_length) { + return std::make_shared( + out, + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{}, {axis}), + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{split_length.size()}, split_length)) + ->outputs(); +} + +TEST_F(TransformationTestsF, GroupedSliceToVSplit) { + { + auto data = std::make_shared(ov::element::f32, ov::PartialShape{-1, 3, -1, -1}); + auto relu = std::make_shared(data); + + auto slice_0 = make_slice(relu, 0, 1, 1, -3); + auto slice_1 = make_slice(relu, 1, 2, 1, 1); + auto slice_2 = make_slice(relu, 2, 7, 1, 1); + + auto concat = std::make_shared(ov::OutputVector{slice_0, slice_2, slice_1}, 1); + + model = std::make_shared(ov::NodeVector{concat}, ov::ParameterVector{data}); + manager.register_pass(); + } + { + auto data = std::make_shared(ov::element::f32, ov::PartialShape{-1, 3, -1, -1}); + auto relu = std::make_shared(data); + + auto vsplit = make_vsplit(relu, 1, {1, 1, 1}); + + auto concat = std::make_shared(ov::OutputVector{vsplit[0], vsplit[2], vsplit[1]}, 1); + + model_ref = std::make_shared(ov::NodeVector{concat}, ov::ParameterVector{data}); + } +} + +TEST_F(TransformationTestsF, GroupedSliceToVSplitChained) { + { + auto data = std::make_shared(ov::element::f32, ov::PartialShape{-1, 10, -1, -1}); + auto relu = std::make_shared(data); + + // dimension == 10 on axis == 1 aka -3 + + auto slice_0 = make_slice(relu, 0, 3, 1, -3); // slices 0, 1, 2 + auto slice_1 = make_slice(relu, 3, 7, 1, 1); // slices 3, 4, 5, 6 + auto slice_2 = make_slice(relu, 7, 15, 1, 1); // slices 7, 8, 9 + + auto slice_0_0 = make_slice(slice_0, 0, 1, 1, 1); // slices 0 + auto slice_0_1 = make_slice(slice_0, 1, 100, 1, 1); // slices 1, 2 + + auto slice_1_0 = make_slice(slice_1, 0, 2, 1, 1); // slices 3, 4 + auto slice_1_1 = make_slice(slice_1, 2, 2, 1, 1); // slices empty tensor + auto slice_1_2 = make_slice(slice_1, 2, 4, 1, 1); // slices 5, 6 + + auto slice_2_0 = make_slice(slice_2, 0, 2, -1, 1); // negative case as step is negative + auto slice_2_1 = make_slice(slice_2, 2, 10, 1, 1); + + auto slice_2_0_0 = make_slice(slice_2, 0, 3, 1, 1); // negative case as slices overlap + auto slice_2_1_0 = make_slice(slice_2, 2, 10, 1, 1); + + auto concat = std::make_shared(ov::OutputVector{slice_0, + slice_2, + slice_1, + slice_0_0, + slice_1_0, + slice_0_1, + slice_1_1, + slice_1_2, + slice_2_0, + slice_2_1, + slice_2_0_0, + slice_2_1_0}, + 1); + + model = std::make_shared(ov::NodeVector{concat}, ov::ParameterVector{data}); + manager.register_pass(); + } + { + auto data = std::make_shared(ov::element::f32, ov::PartialShape{-1, 10, -1, -1}); + auto relu = std::make_shared(data); + + auto vsplit = make_vsplit(relu, 1, {3, 4, 3}); + + auto slice_0 = vsplit[0]; + auto slice_1 = vsplit[1]; + auto slice_2 = vsplit[2]; + + auto vsplit_0 = make_vsplit(slice_0, 1, {1, 2}); + + auto slice_0_0 = vsplit_0[0]; + auto slice_0_1 = vsplit_0[1]; + + auto vsplit_1 = make_vsplit(slice_1, 1, {2, 0, 2}); + + auto slice_1_0 = vsplit_1[0]; + auto slice_1_1 = vsplit_1[1]; + auto slice_1_2 = vsplit_1[2]; + + auto slice_2_0 = make_slice(slice_2, 0, 2, -1, 1); // negative case as step is negative + auto slice_2_1 = make_slice(slice_2, 2, 10, 1, 1); + + auto slice_2_0_0 = make_slice(slice_2, 0, 3, 1, 1); // negative case as slices overlap + auto slice_2_1_0 = make_slice(slice_2, 2, 10, 1, 1); + + auto concat = std::make_shared(ov::OutputVector{slice_0, + slice_2, + slice_1, + slice_0_0, + slice_1_0, + slice_0_1, + slice_1_1, + slice_1_2, + slice_2_0, + slice_2_1, + slice_2_0_0, + slice_2_1_0}, + 1); + + model_ref = std::make_shared(ov::NodeVector{concat}, ov::ParameterVector{data}); + } +} + +TEST_F(TransformationTestsF, GroupedSliceToVSplitSameSourceDifferentAxis) { + { + auto data = std::make_shared(ov::element::f32, ov::PartialShape{-1, 3, 10, -1}); + auto relu = std::make_shared(data); + + // axis == 1 aka -3 + auto slice_0 = make_slice(relu, 0, 1, 1, -3); + auto slice_1 = make_slice(relu, 1, 7, 1, 1); + + // axis == 2 aka -2 + auto slice_2 = make_slice(relu, 0, 5, 1, -2); + auto slice_3 = make_slice(relu, 5, 10, 1, 2); + + auto concat_0 = std::make_shared(ov::OutputVector{slice_1, slice_0}, 1); + auto concat_1 = std::make_shared(ov::OutputVector{slice_2, slice_3}, 2); + + auto concat_2 = std::make_shared(ov::OutputVector{concat_0, concat_1}, 0); + + model = std::make_shared(ov::NodeVector{concat_2}, ov::ParameterVector{data}); + manager.register_pass(); + } + { + auto data = std::make_shared(ov::element::f32, ov::PartialShape{-1, 3, 10, -1}); + auto relu = std::make_shared(data); + + auto vsplit_0 = make_vsplit(relu, 1, {1, 2}); + auto slice_0 = vsplit_0[0]; + auto slice_1 = vsplit_0[1]; + + auto vsplit_1 = make_vsplit(relu, 2, {5, 5}); + auto slice_2 = vsplit_1[0]; + auto slice_3 = vsplit_1[1]; + + auto concat_0 = std::make_shared(ov::OutputVector{slice_1, slice_0}, 1); + auto concat_1 = std::make_shared(ov::OutputVector{slice_2, slice_3}, 2); + + auto concat_2 = std::make_shared(ov::OutputVector{concat_0, concat_1}, 0); + + model_ref = std::make_shared(ov::NodeVector{concat_2}, ov::ParameterVector{data}); + } +} diff --git a/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp b/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp index de94c06591070c..1dd93874cdd8e9 100644 --- a/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp +++ b/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp @@ -7,8 +7,10 @@ #include "common_test_utils/ngraph_test_utils.hpp" #include "openvino/op/concat.hpp" +#include "openvino/op/convert.hpp" #include "openvino/op/parameter.hpp" #include "openvino/op/reshape.hpp" +#include "openvino/op/shape_of.hpp" #include "openvino/op/slice.hpp" #include "openvino/op/tile.hpp" @@ -258,3 +260,176 @@ TEST_F(SharedTransformationTestsF, SharedConcatCheckOpWithResultIsntReplaced) { manager.register_pass(); } } + +TEST_F(SharedTransformationTestsF, SharedShapeOfTest) { + Shape input_shape{120, 4}; + { + auto input = std::make_shared(element::f32, input_shape); + + auto shapeof1_i32 = std::make_shared(input, element::i32); + auto shapeof2_i64 = std::make_shared(input, element::i64); + auto shapeof3_i32 = std::make_shared(input, element::i32); + auto shapeof4_i32 = std::make_shared(input, element::i32); + auto shapeof5_i64 = std::make_shared(input, element::i64); + auto shapeof6_i32 = std::make_shared(input, element::i32); + auto shapeof7_i32 = std::make_shared(input, element::i32); + + auto shapeof1_i32_convert = std::make_shared(shapeof1_i32, element::i64); + auto shapeof3_i32_convert = std::make_shared(shapeof3_i32, element::i64); + auto shapeof4_i32_convert = std::make_shared(shapeof4_i32, element::i64); + auto shapeof6_i32_convert = std::make_shared(shapeof6_i32, element::i64); + auto shapeof7_i32_convert = std::make_shared(shapeof7_i32, element::i64); + + OutputVector inputs_of_concat{shapeof1_i32_convert, + shapeof2_i64, + shapeof3_i32_convert, + shapeof4_i32_convert, + shapeof5_i64, + shapeof6_i32_convert, + shapeof7_i32_convert}; + + auto concat = std::make_shared(inputs_of_concat, 0); + model = std::make_shared(NodeVector{concat}, ParameterVector{input}); + manager.register_pass(); + } + { + auto input = std::make_shared(element::f32, input_shape); + + auto shapeof1_i32 = std::make_shared(input, element::i32); + auto shapeof2_i64 = std::make_shared(input, element::i64); + + auto shapeof1_i32_convert = std::make_shared(shapeof1_i32, element::i64); + + OutputVector inputs_of_concat{shapeof1_i32_convert, + shapeof2_i64, + shapeof1_i32_convert, + shapeof1_i32_convert, + shapeof2_i64, + shapeof1_i32_convert, + shapeof1_i32_convert}; + + auto concat = std::make_shared(inputs_of_concat, 0); + model_ref = std::make_shared(NodeVector{concat}, ParameterVector{input}); + } +} + +TEST_F(SharedTransformationTestsF, SharedShapeOfTestI64Only) { + Shape input_shape{120, 4}; + { + auto input = std::make_shared(element::f32, input_shape); + + auto shapeof1_i64 = std::make_shared(input, element::i64); + auto shapeof2_i64 = std::make_shared(input, element::i64); + auto shapeof3_i64 = std::make_shared(input, element::i64); + + OutputVector inputs_of_concat{shapeof1_i64, shapeof2_i64, shapeof3_i64}; + + auto concat = std::make_shared(inputs_of_concat, 0); + model = std::make_shared(NodeVector{concat}, ParameterVector{input}); + manager.register_pass(); + } + { + auto input = std::make_shared(element::f32, input_shape); + auto shapeof1_i64 = std::make_shared(input, element::i64); + + OutputVector inputs_of_concat{shapeof1_i64, shapeof1_i64, shapeof1_i64}; + + auto concat = std::make_shared(inputs_of_concat, 0); + model_ref = std::make_shared(NodeVector{concat}, ParameterVector{input}); + } +} + +TEST_F(SharedTransformationTestsF, SharedShapeOfTestI32Only) { + Shape input_shape{120, 4}; + { + auto input = std::make_shared(element::f32, input_shape); + + auto shapeof1_i32 = std::make_shared(input, element::i32); + auto shapeof2_i32 = std::make_shared(input, element::i32); + auto shapeof3_i32 = std::make_shared(input, element::i32); + auto shapeof4_i32 = std::make_shared(input, element::i32); + auto shapeof5_i32 = std::make_shared(input, element::i32); + + auto shapeof1_i32_convert = std::make_shared(shapeof1_i32, element::i64); + auto shapeof2_i32_convert = std::make_shared(shapeof2_i32, element::i64); + auto shapeof3_i32_convert = std::make_shared(shapeof3_i32, element::i64); + auto shapeof4_i32_convert = std::make_shared(shapeof4_i32, element::i64); + auto shapeof5_i32_convert = std::make_shared(shapeof5_i32, element::i64); + + OutputVector inputs_of_concat{shapeof1_i32_convert, + shapeof2_i32_convert, + shapeof3_i32_convert, + shapeof4_i32_convert, + shapeof5_i32_convert}; + + auto concat = std::make_shared(inputs_of_concat, 0); + model = std::make_shared(NodeVector{concat}, ParameterVector{input}); + manager.register_pass(); + } + { + auto input = std::make_shared(element::f32, input_shape); + + auto shapeof1_i32 = std::make_shared(input, element::i32); + + auto shapeof1_i32_convert = std::make_shared(shapeof1_i32, element::i64); + + OutputVector inputs_of_concat{shapeof1_i32_convert, + shapeof1_i32_convert, + shapeof1_i32_convert, + shapeof1_i32_convert, + shapeof1_i32_convert}; + + auto concat = std::make_shared(inputs_of_concat, 0); + model_ref = std::make_shared(NodeVector{concat}, ParameterVector{input}); + } +} + +TEST_F(SharedTransformationTestsF, SharedShapeOfTestMixed) { + Shape input_shape{120, 4}; + { + auto input = std::make_shared(element::f32, input_shape); + + auto shapeof1 = std::make_shared(input); + auto shapeof2_i64 = std::make_shared(input, element::i64); + auto shapeof3_i32 = std::make_shared(input, element::i32); + auto shapeof4 = std::make_shared(input); + auto shapeof5_i64 = std::make_shared(input, element::i64); + auto shapeof6_i32 = std::make_shared(input, element::i32); + auto shapeof7_i32 = std::make_shared(input, element::i32); + + auto shapeof3_i32_convert = std::make_shared(shapeof3_i32, element::i64); + auto shapeof6_i32_convert = std::make_shared(shapeof6_i32, element::i64); + auto shapeof7_i32_convert = std::make_shared(shapeof7_i32, element::i64); + + OutputVector inputs_of_concat{shapeof1, + shapeof2_i64, + shapeof3_i32_convert, + shapeof4, + shapeof5_i64, + shapeof6_i32_convert, + shapeof7_i32_convert}; + + auto concat = std::make_shared(inputs_of_concat, 0); + model = std::make_shared(NodeVector{concat}, ParameterVector{input}); + manager.register_pass(); + } + { + auto input = std::make_shared(element::f32, input_shape); + + auto shapeof1 = std::make_shared(input, element::i64); + auto shapeof2_i32 = std::make_shared(input, element::i32); + + auto shapeof3_i32_convert = std::make_shared(shapeof2_i32, element::i64); + + OutputVector inputs_of_concat{shapeof1, + shapeof1, + shapeof3_i32_convert, + shapeof1, + shapeof1, + shapeof3_i32_convert, + shapeof3_i32_convert}; + + auto concat = std::make_shared(inputs_of_concat, 0); + model_ref = std::make_shared(NodeVector{concat}, ParameterVector{input}); + } +} diff --git a/src/common/transformations/tests/common_optimizations/shared_shapeof_test.cpp b/src/common/transformations/tests/common_optimizations/shared_shapeof_test.cpp deleted file mode 100644 index b7e9ce5da72d5b..00000000000000 --- a/src/common/transformations/tests/common_optimizations/shared_shapeof_test.cpp +++ /dev/null @@ -1,203 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "common_test_utils/ngraph_test_utils.hpp" - -using namespace testing; - -TEST_F(TransformationTestsF, SharedShapeOfTest) { - ngraph::Shape input_shape{120, 4}; - { - auto input = std::make_shared(ngraph::element::f32, input_shape); - - auto shapeof1_i32 = std::make_shared(input, ngraph::element::i32); - auto shapeof2_i64 = std::make_shared(input, ngraph::element::i64); - auto shapeof3_i32 = std::make_shared(input, ngraph::element::i32); - auto shapeof4_i32 = std::make_shared(input, ngraph::element::i32); - auto shapeof5_i64 = std::make_shared(input, ngraph::element::i64); - auto shapeof6_i32 = std::make_shared(input, ngraph::element::i32); - auto shapeof7_i32 = std::make_shared(input, ngraph::element::i32); - - auto shapeof1_i32_convert = std::make_shared(shapeof1_i32, ngraph::element::i64); - auto shapeof3_i32_convert = std::make_shared(shapeof3_i32, ngraph::element::i64); - auto shapeof4_i32_convert = std::make_shared(shapeof4_i32, ngraph::element::i64); - auto shapeof6_i32_convert = std::make_shared(shapeof6_i32, ngraph::element::i64); - auto shapeof7_i32_convert = std::make_shared(shapeof7_i32, ngraph::element::i64); - - ngraph::OutputVector inputs_of_concat{shapeof1_i32_convert, - shapeof2_i64, - shapeof3_i32_convert, - shapeof4_i32_convert, - shapeof5_i64, - shapeof6_i32_convert, - shapeof7_i32_convert}; - - auto concat = std::make_shared(inputs_of_concat, 0); - function = std::make_shared(ngraph::NodeVector{concat}, ngraph::ParameterVector{input}); - manager.register_pass(); - } - { - auto input = std::make_shared(ngraph::element::f32, input_shape); - - auto shapeof1_i32 = std::make_shared(input, ngraph::element::i32); - auto shapeof2_i64 = std::make_shared(input, ngraph::element::i64); - - auto shapeof1_i32_convert = std::make_shared(shapeof1_i32, ngraph::element::i64); - auto shapeof3_i32_convert = std::make_shared(shapeof1_i32, ngraph::element::i64); - auto shapeof4_i32_convert = std::make_shared(shapeof1_i32, ngraph::element::i64); - auto shapeof6_i32_convert = std::make_shared(shapeof1_i32, ngraph::element::i64); - auto shapeof7_i32_convert = std::make_shared(shapeof1_i32, ngraph::element::i64); - - ngraph::OutputVector inputs_of_concat{shapeof1_i32_convert, - shapeof2_i64, - shapeof3_i32_convert, - shapeof4_i32_convert, - shapeof2_i64, - shapeof6_i32_convert, - shapeof7_i32_convert}; - - auto concat = std::make_shared(inputs_of_concat, 0); - function_ref = std::make_shared(ngraph::NodeVector{concat}, ngraph::ParameterVector{input}); - } -} - -TEST_F(TransformationTestsF, SharedShapeOfTestI64Only) { - ngraph::Shape input_shape{120, 4}; - { - auto input = std::make_shared(ngraph::element::f32, input_shape); - - auto shapeof1_i64 = std::make_shared(input, ngraph::element::i64); - auto shapeof2_i64 = std::make_shared(input, ngraph::element::i64); - auto shapeof3_i64 = std::make_shared(input, ngraph::element::i64); - - ngraph::OutputVector inputs_of_concat{shapeof1_i64, shapeof2_i64, shapeof3_i64}; - - auto concat = std::make_shared(inputs_of_concat, 0); - function = std::make_shared(ngraph::NodeVector{concat}, ngraph::ParameterVector{input}); - manager.register_pass(); - } - { - auto input = std::make_shared(ngraph::element::f32, input_shape); - auto shapeof1_i64 = std::make_shared(input, ngraph::element::i64); - - ngraph::OutputVector inputs_of_concat{shapeof1_i64, shapeof1_i64, shapeof1_i64}; - - auto concat = std::make_shared(inputs_of_concat, 0); - function_ref = std::make_shared(ngraph::NodeVector{concat}, ngraph::ParameterVector{input}); - } -} - -TEST_F(TransformationTestsF, SharedShapeOfTestI32Only) { - ngraph::Shape input_shape{120, 4}; - { - auto input = std::make_shared(ngraph::element::f32, input_shape); - - auto shapeof1_i32 = std::make_shared(input, ngraph::element::i32); - auto shapeof2_i32 = std::make_shared(input, ngraph::element::i32); - auto shapeof3_i32 = std::make_shared(input, ngraph::element::i32); - auto shapeof4_i32 = std::make_shared(input, ngraph::element::i32); - auto shapeof5_i32 = std::make_shared(input, ngraph::element::i32); - - auto shapeof1_i32_convert = std::make_shared(shapeof1_i32, ngraph::element::i64); - auto shapeof2_i32_convert = std::make_shared(shapeof2_i32, ngraph::element::i64); - auto shapeof3_i32_convert = std::make_shared(shapeof3_i32, ngraph::element::i64); - auto shapeof4_i32_convert = std::make_shared(shapeof4_i32, ngraph::element::i64); - auto shapeof5_i32_convert = std::make_shared(shapeof5_i32, ngraph::element::i64); - - ngraph::OutputVector inputs_of_concat{shapeof1_i32_convert, - shapeof2_i32_convert, - shapeof3_i32_convert, - shapeof4_i32_convert, - shapeof5_i32_convert}; - - auto concat = std::make_shared(inputs_of_concat, 0); - function = std::make_shared(ngraph::NodeVector{concat}, ngraph::ParameterVector{input}); - manager.register_pass(); - } - { - auto input = std::make_shared(ngraph::element::f32, input_shape); - - auto shapeof1_i32 = std::make_shared(input, ngraph::element::i32); - - auto shapeof1_i32_convert = std::make_shared(shapeof1_i32, ngraph::element::i64); - auto shapeof2_i32_convert = std::make_shared(shapeof1_i32, ngraph::element::i64); - auto shapeof3_i32_convert = std::make_shared(shapeof1_i32, ngraph::element::i64); - auto shapeof4_i32_convert = std::make_shared(shapeof1_i32, ngraph::element::i64); - auto shapeof5_i32_convert = std::make_shared(shapeof1_i32, ngraph::element::i64); - - ngraph::OutputVector inputs_of_concat{shapeof1_i32_convert, - shapeof2_i32_convert, - shapeof3_i32_convert, - shapeof4_i32_convert, - shapeof5_i32_convert}; - - auto concat = std::make_shared(inputs_of_concat, 0); - function_ref = std::make_shared(ngraph::NodeVector{concat}, ngraph::ParameterVector{input}); - } -} - -TEST_F(TransformationTestsF, SharedShapeOfTestMixed) { - ngraph::Shape input_shape{120, 4}; - { - auto input = std::make_shared(ngraph::element::f32, input_shape); - - auto shapeof1 = std::make_shared(input); - auto shapeof2_i64 = std::make_shared(input, ngraph::element::i64); - auto shapeof3_i32 = std::make_shared(input, ngraph::element::i32); - auto shapeof4 = std::make_shared(input); - auto shapeof5_i64 = std::make_shared(input, ngraph::element::i64); - auto shapeof6_i32 = std::make_shared(input, ngraph::element::i32); - auto shapeof7_i32 = std::make_shared(input, ngraph::element::i32); - - auto shapeof3_i32_convert = std::make_shared(shapeof3_i32, ngraph::element::i64); - auto shapeof6_i32_convert = std::make_shared(shapeof6_i32, ngraph::element::i64); - auto shapeof7_i32_convert = std::make_shared(shapeof7_i32, ngraph::element::i64); - - ngraph::OutputVector inputs_of_concat{shapeof1, - shapeof2_i64, - shapeof3_i32_convert, - shapeof4, - shapeof5_i64, - shapeof6_i32_convert, - shapeof7_i32_convert}; - - auto concat = std::make_shared(inputs_of_concat, 0); - function = std::make_shared(ngraph::NodeVector{concat}, ngraph::ParameterVector{input}); - manager.register_pass(); - } - { - auto input = std::make_shared(ngraph::element::f32, input_shape); - - auto shapeof1 = std::make_shared(input); - auto shapeof2_i32 = std::make_shared(input, ngraph::element::i32); - - auto shapeof3_i32_convert = std::make_shared(shapeof2_i32, ngraph::element::i64); - auto shapeof6_i32_convert = std::make_shared(shapeof2_i32, ngraph::element::i64); - auto shapeof7_i32_convert = std::make_shared(shapeof2_i32, ngraph::element::i64); - - ngraph::OutputVector inputs_of_concat{shapeof1, - shapeof1, - shapeof3_i32_convert, - shapeof1, - shapeof1, - shapeof6_i32_convert, - shapeof7_i32_convert}; - - auto concat = std::make_shared(inputs_of_concat, 0); - function_ref = std::make_shared(ngraph::NodeVector{concat}, ngraph::ParameterVector{input}); - } -} diff --git a/src/common/transformations/tests/utils/compress_quantize_weights.cpp b/src/common/transformations/tests/utils/compress_quantize_weights.cpp index df5f60ece775ea..55c35b7205af10 100644 --- a/src/common/transformations/tests/utils/compress_quantize_weights.cpp +++ b/src/common/transformations/tests/utils/compress_quantize_weights.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "common_test_utils/ngraph_test_utils.hpp" @@ -41,8 +42,10 @@ class CompressQuantizeWeightsTests std::tie(param, data_prc) = GetParam(); { std::shared_ptr data = opset8::Constant::create(data_prc, param.shape, param.weights); - if (data_prc == element::f16) + if (data_prc == element::f16) { data = std::make_shared(data, element::f32); + ov::mark_as_decompression(data); + } auto input_low = opset8::Constant::create(element::f32, Shape{}, {param.in_low}); auto input_high = opset8::Constant::create(element::f32, Shape{}, {param.in_high}); auto output_low = opset8::Constant::create(element::f32, Shape{}, {param.out_low}); @@ -159,6 +162,41 @@ TEST_F(TransformationTestsF, CompressQuantizeWeightsWithDequantizationSubgraph) comparator.enable(FunctionsComparator::CmpValues::ACCURACY); } +TEST_F(TransformationTestsF, CompressQuantizeWeightsWithDequantizationSubgraphFP16) { + { + auto data = opset8::Constant::create(element::f16, Shape{2, 4, 1, 1}, {-1, 0, 1, 2, 3, 4, 5, 11}); + auto convert_to_f32 = std::make_shared(data, element::f32); + ov::mark_as_decompression(convert_to_f32); + auto input_low = opset8::Constant::create(element::f32, Shape{}, {1}); + auto input_high = opset8::Constant::create(element::f32, Shape{}, {9}); + auto output_low = opset8::Constant::create(element::f32, Shape{}, {-128}); + auto output_high = opset8::Constant::create(element::f32, Shape{}, {127}); + auto fq = + std::make_shared(convert_to_f32, input_low, input_high, output_low, output_high, 256); + auto convert = std::make_shared(fq, element::i8); + auto second_convert = std::make_shared(convert, element::f32); + auto scale = opset8::Constant::create(element::f32, Shape{}, {10.0 / 255}); + auto zero_point = opset8::Constant::create(element::f32, Shape{}, {2 - 255.0 / 10}); + auto sub = std::make_shared(second_convert, zero_point); + auto mul = std::make_shared(sub, scale); + + function = std::make_shared(NodeVector{mul}, ParameterVector{}); + + manager.register_pass(); + } + { + auto data = opset8::Constant::create(element::i8, Shape{2, 4, 1, 1}, {-128, -128, -128, -96, -64, -32, 0, 127}); + auto convert = std::make_shared(data, element::f32); + auto scale = opset8::Constant::create(element::f32, Shape{}, {10.0 / 255}); + auto zero_point = opset8::Constant::create(element::f32, Shape{}, {2 - 255.0 / 10}); + auto sub = std::make_shared(convert, zero_point); + auto mul = std::make_shared(sub, scale); + function_ref = std::make_shared(NodeVector{mul}, ParameterVector{}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ACCURACY); +} + TEST_F(TransformationTestsF, CompressQuantizeWeightsWithZeroPointOptimizer) { { auto data = opset8::Constant::create(element::f32, Shape{3, 1, 1, 1}, {-0.144816, 0.0858578, 0.110928}); diff --git a/src/core/include/openvino/op/generate_proposals.hpp b/src/core/include/openvino/op/generate_proposals.hpp index 44f85eca68dae6..7863e88770e91e 100644 --- a/src/core/include/openvino/op/generate_proposals.hpp +++ b/src/core/include/openvino/op/generate_proposals.hpp @@ -60,6 +60,8 @@ class OPENVINO_API GenerateProposals : public Op { return m_attrs; } + void set_attrs(Attributes attrs); + const element::Type& get_roi_num_type() const { return m_roi_num_type; } diff --git a/src/core/shape_inference/include/generate_proposals_shape_inference.hpp b/src/core/shape_inference/include/generate_proposals_shape_inference.hpp index e5f5a7bff56def..ddd536fe7377e8 100644 --- a/src/core/shape_inference/include/generate_proposals_shape_inference.hpp +++ b/src/core/shape_inference/include/generate_proposals_shape_inference.hpp @@ -22,106 +22,120 @@ std::vector shape_infer(const GenerateProposals* op, const std::vector< const auto& scores_shape = input_shapes[3]; const auto im_info_shape_rank = im_info_shape.rank(); auto num_batches = Dimension::dynamic(); - NODE_VALIDATION_CHECK(op, - im_info_shape_rank.compatible(2), - "The 'input_im_info' input is expected to be a 2D. Got: ", - im_info_shape); + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + im_info_shape_rank.compatible(2), + "The 'input_im_info' input is expected to be a 2D. Got: ", + im_info_shape); if (im_info_shape_rank.is_static()) { - NODE_VALIDATION_CHECK(op, - (im_info_shape[1].compatible(3) || im_info_shape[1].compatible(4)), - "The 'input_im_info' shape[1] is expected to be a compatible with [3] or [4]. Got: ", - im_info_shape); + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + (im_info_shape[1].compatible(3) || im_info_shape[1].compatible(4)), + "The 'input_im_info' shape[1] is expected to be a compatible with [3] or [4]. Got: ", + im_info_shape); Dimension::merge(num_batches, im_info_shape[0], num_batches); } const auto anchors_shape_rank = anchors_shape.rank(); - NODE_VALIDATION_CHECK(op, - anchors_shape_rank.compatible(4), - "The 'input_anchors' input is expected to be a 4D. Got: ", - anchors_shape); + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + anchors_shape_rank.compatible(4), + "The 'input_anchors' input is expected to be a 4D. Got: ", + anchors_shape); if (anchors_shape_rank.is_static()) { - NODE_VALIDATION_CHECK(op, - anchors_shape[3].compatible(4), - "The fourth dimension of 'input_anchors' should be compatible with 4. Got: ", - anchors_shape[3]); + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + anchors_shape[3].compatible(4), + "The fourth dimension of 'input_anchors' should be compatible with 4. Got: ", + anchors_shape[3]); } const auto deltas_shape_rank = deltas_shape.rank(); const auto scores_shape_rank = scores_shape.rank(); - NODE_VALIDATION_CHECK(op, - deltas_shape_rank.compatible(4), - "The 'input_deltas' input is expected to be a 4D. Got: ", - deltas_shape); - NODE_VALIDATION_CHECK(op, - scores_shape_rank.compatible(4), - "The 'input_scores' input is expected to be a 4D. Got: ", - scores_shape); + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + deltas_shape_rank.compatible(4), + "The 'input_deltas' input is expected to be a 4D. Got: ", + deltas_shape); + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + scores_shape_rank.compatible(4), + "The 'input_scores' input is expected to be a 4D. Got: ", + scores_shape); if (deltas_shape_rank.is_static()) Dimension::merge(num_batches, deltas_shape[0], num_batches); if (scores_shape_rank.is_static()) Dimension::merge(num_batches, scores_shape[0], num_batches); if (deltas_shape_rank.is_static() && scores_shape_rank.is_static()) { - NODE_VALIDATION_CHECK(op, - deltas_shape[0].compatible(scores_shape[0]), - "Batch for inputs 'input_deltas' and 'input_scores' should be " - "equal. Got: ", - deltas_shape[0], - scores_shape[0]); - - NODE_VALIDATION_CHECK(op, - deltas_shape[1].compatible(scores_shape[1] * 4), - "Anchor number for inputs 'input_deltas' and 'input_scores' should be " - "equal. Got: ", - deltas_shape[1] / 4, - scores_shape[1]); - - NODE_VALIDATION_CHECK(op, - deltas_shape[2].compatible(scores_shape[2]), - "Heights for inputs 'input_deltas' and 'input_scores' should be " - "equal. Got: ", - deltas_shape[2], - scores_shape[2]); - - NODE_VALIDATION_CHECK(op, - deltas_shape[3].compatible(scores_shape[3]), - "Width for inputs 'input_deltas' and 'input_scores' should be " - "equal. Got: ", - deltas_shape[3], - scores_shape[3]); + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + deltas_shape[0].compatible(scores_shape[0]), + "Batch for inputs 'input_deltas' and 'input_scores' should be " + "equal. Got: ", + deltas_shape[0], + scores_shape[0]); + + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + deltas_shape[1].compatible(scores_shape[1] * 4), + "Anchor number for inputs 'input_deltas' and 'input_scores' should be " + "equal. Got: ", + deltas_shape[1] / 4, + scores_shape[1]); + + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + deltas_shape[2].compatible(scores_shape[2]), + "Heights for inputs 'input_deltas' and 'input_scores' should be " + "equal. Got: ", + deltas_shape[2], + scores_shape[2]); + + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + deltas_shape[3].compatible(scores_shape[3]), + "Width for inputs 'input_deltas' and 'input_scores' should be " + "equal. Got: ", + deltas_shape[3], + scores_shape[3]); if (im_info_shape_rank.is_static()) { - NODE_VALIDATION_CHECK(op, - deltas_shape[0].compatible(im_info_shape[0]), - "Batch for inputs 'im_info' and 'input_deltas' should be " - "equal. Got: ", - deltas_shape[0], - im_info_shape[0]); + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + deltas_shape[0].compatible(im_info_shape[0]), + "Batch for inputs 'im_info' and 'input_deltas' should be " + "equal. Got: ", + deltas_shape[0], + im_info_shape[0]); } } if (scores_shape_rank.is_static() && anchors_shape_rank.is_static()) { - NODE_VALIDATION_CHECK(op, - anchors_shape[0].compatible(scores_shape[2]), - "Heights for inputs 'input_anchors' and 'input_scores' should be " - "equal. Got: ", - anchors_shape[0], - scores_shape[2]); - - NODE_VALIDATION_CHECK(op, - anchors_shape[1].compatible(scores_shape[3]), - "Width for inputs 'input_anchors' and 'input_scores' should be " - "equal. Got: ", - anchors_shape[1], - scores_shape[3]); - - NODE_VALIDATION_CHECK(op, - anchors_shape[2].compatible(scores_shape[1]), - "Anchor number for inputs 'input_anchors' and 'input_scores' should be " - "equal. Got: ", - anchors_shape[2], - scores_shape[1]); + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + anchors_shape[0].compatible(scores_shape[2]), + "Heights for inputs 'input_anchors' and 'input_scores' should be " + "equal. Got: ", + anchors_shape[0], + scores_shape[2]); + + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + anchors_shape[1].compatible(scores_shape[3]), + "Width for inputs 'input_anchors' and 'input_scores' should be " + "equal. Got: ", + anchors_shape[1], + scores_shape[3]); + + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + anchors_shape[2].compatible(scores_shape[1]), + "Anchor number for inputs 'input_anchors' and 'input_scores' should be " + "equal. Got: ", + anchors_shape[2], + scores_shape[1]); } auto num_rois = Dimension(0, (num_batches * op->get_attrs().post_nms_count).get_max_length()); diff --git a/src/core/shape_inference/include/tensor_data_accessor.hpp b/src/core/shape_inference/include/tensor_data_accessor.hpp index 41c48641ad9ab2..300d859561b6da 100644 --- a/src/core/shape_inference/include/tensor_data_accessor.hpp +++ b/src/core/shape_inference/include/tensor_data_accessor.hpp @@ -18,6 +18,9 @@ class ITensorAccessor { * @return Tensor to data at port. */ virtual Tensor operator()(size_t port) const = 0; + +protected: + ~ITensorAccessor() = default; }; /** diff --git a/src/core/src/op/generate_proposals.cpp b/src/core/src/op/generate_proposals.cpp index 7e40b0b668752c..8dcd6c5e696010 100644 --- a/src/core/src/op/generate_proposals.cpp +++ b/src/core/src/op/generate_proposals.cpp @@ -66,4 +66,9 @@ void op::v9::GenerateProposals::validate_and_infer_types() { "The third output type must be int64 or int32."); set_output_type(2, roi_num_type, output_shapes[2]); } + +void op::v9::GenerateProposals::set_attrs(Attributes attrs) { + m_attrs = std::move(attrs); +} + } // namespace ov diff --git a/src/core/tests/type_prop/generate_proposals.cpp b/src/core/tests/type_prop/generate_proposals.cpp index efb30312840a41..3bc8c0ed22b1d0 100644 --- a/src/core/tests/type_prop/generate_proposals.cpp +++ b/src/core/tests/type_prop/generate_proposals.cpp @@ -9,10 +9,53 @@ #include "ngraph/ngraph.hpp" using namespace ngraph; +using namespace testing; using GenerateProposals = op::v9::GenerateProposals; using Attrs = op::v9::GenerateProposals::Attributes; +TEST(type_prop, generate_proposals_default_ctor) { + struct ShapesAndAttrs { + PartialShape im_info_shape; + PartialShape anchors_shape; + PartialShape deltas_shape; + PartialShape scores_shape; + size_t post_nms_count; + PartialShape expected_shape_0; + PartialShape expected_shape_1; + PartialShape expected_shape_2; + }; + + const auto dyn_dim = Dimension::dynamic(); + const auto dyn_shape = PartialShape::dynamic(); + + ShapesAndAttrs s = + {{1, 3}, {200, 336, 3, 4}, {1, 12, 200, 336}, {1, 3, 200, 336}, 1000, {{0, 1000}, 4}, {{0, 1000}}, {1}}; + Attrs attrs; + attrs.min_size = 0.0f; + attrs.nms_threshold = 0.699999988079071f; + attrs.post_nms_count = static_cast(s.post_nms_count); + attrs.pre_nms_count = 1000; + + auto im_info = std::make_shared(element::f32, s.im_info_shape); + auto anchors = std::make_shared(element::f32, s.anchors_shape); + auto deltas = std::make_shared(element::f32, s.deltas_shape); + auto scores = std::make_shared(element::f32, s.scores_shape); + + auto proposals = std::make_shared(); + proposals->set_arguments(OutputVector{im_info, anchors, deltas, scores}); + proposals->set_attrs(attrs); + proposals->validate_and_infer_types(); + + EXPECT_EQ(proposals->get_output_size(), 3); + EXPECT_EQ(proposals->get_output_element_type(0), element::f32); + EXPECT_EQ(proposals->get_output_element_type(1), element::f32); + EXPECT_EQ(proposals->get_output_element_type(2), element::i64); + EXPECT_EQ(proposals->get_output_partial_shape(0), s.expected_shape_0); + EXPECT_EQ(proposals->get_output_partial_shape(1), s.expected_shape_1); + EXPECT_EQ(proposals->get_output_partial_shape(2), s.expected_shape_2); +} + TEST(type_prop, generate_proposals) { Attrs attrs; attrs.min_size = 0.0f; @@ -114,6 +157,30 @@ TEST(type_prop, generate_proposals_dynamic) { std::vector shapes = { {{1, 3}, {200, 336, 3, 4}, {1, 12, 200, 336}, {1, 3, 200, 336}, 1000, {{0, 1000}, 4}, {{0, 1000}}, {1}}, + {{{2, 4}, 3}, + {200, 336, 3, 4}, + {{2, 4}, 12, 200, 336}, + {{2, 4}, 3, 200, 336}, + 1000, + {{0, 4000}, 4}, + {{0, 4000}}, + {{2, 4}}}, + {{{2, 8}, 3}, + {200, 336, 3, 4}, + {{2, 6}, 12, 200, 336}, + {{6, 8}, 3, 200, 336}, + 1000, + {{0, 6000}, 4}, + {{0, 6000}}, + {{6}}}, + {{dyn_dim, 3}, + {200, 336, 3, 4}, + {dyn_dim, 12, 200, 336}, + {dyn_dim, 3, 200, 336}, + 1000, + {dyn_dim, 4}, + {dyn_dim}, + {dyn_dim}}, {{2, 3}, {200, 336, 3, 4}, {2, 12, 200, 336}, dyn_shape, 500, {{0, 1000}, 4}, {{0, 1000}}, {2}}, {{1, 3}, {200, 336, 3, 4}, dyn_shape, {1, 3, 200, 336}, 700, {{0, 700}, 4}, {{0, 700}}, {1}}, {{2, 3}, {200, 336, 3, 4}, dyn_shape, dyn_shape, 300, {{0, 600}, 4}, {{0, 600}}, {2}}, @@ -197,13 +264,27 @@ TEST(type_prop, generate_proposals_dynamic) { {{1}}}, }; - for (const auto& s : shapes) { + for (auto& s : shapes) { Attrs attrs; attrs.min_size = 0.0f; attrs.nms_threshold = 0.699999988079071f; attrs.post_nms_count = static_cast(s.post_nms_count); attrs.pre_nms_count = 1000; + if (s.im_info_shape.rank().is_static()) + set_shape_labels(s.im_info_shape, 10); + if (s.anchors_shape.rank().is_static()) + set_shape_labels(s.anchors_shape, 20); + if (s.deltas_shape.rank().is_static()) + set_shape_labels(s.deltas_shape, 30); + if (s.scores_shape.rank().is_static()) + set_shape_labels(s.scores_shape, 40); + + ov::label_t expected_batch_label = + s.im_info_shape.rank().is_static() + ? 10 + : s.deltas_shape.rank().is_static() ? 30 : s.scores_shape.rank().is_static() ? 40 : ov::no_label; + auto im_info = std::make_shared(element::f32, s.im_info_shape); auto anchors = std::make_shared(element::f32, s.anchors_shape); auto deltas = std::make_shared(element::f32, s.deltas_shape); @@ -211,11 +292,15 @@ TEST(type_prop, generate_proposals_dynamic) { auto proposals = std::make_shared(im_info, anchors, deltas, scores, attrs); - ASSERT_EQ(proposals->get_output_element_type(0), element::f32); - ASSERT_EQ(proposals->get_output_element_type(1), element::f32); - ASSERT_EQ(proposals->get_output_element_type(2), element::i64); + EXPECT_EQ(proposals->get_output_size(), 3); + EXPECT_EQ(proposals->get_output_element_type(0), element::f32); + EXPECT_EQ(proposals->get_output_element_type(1), element::f32); + EXPECT_EQ(proposals->get_output_element_type(2), element::i64); EXPECT_EQ(proposals->get_output_partial_shape(0), s.expected_shape_0); EXPECT_EQ(proposals->get_output_partial_shape(1), s.expected_shape_1); EXPECT_EQ(proposals->get_output_partial_shape(2), s.expected_shape_2); + EXPECT_THAT(get_shape_labels(proposals->get_output_partial_shape(0)), ElementsAre(ov::no_label, ov::no_label)); + EXPECT_THAT(get_shape_labels(proposals->get_output_partial_shape(1)), ElementsAre(ov::no_label)); + EXPECT_THAT(get_shape_labels(proposals->get_output_partial_shape(2)), ElementsAre(expected_batch_label)); } } diff --git a/src/frontends/ir/tests/frontend_test_mmap.cpp b/src/frontends/ir/tests/frontend_test_mmap.cpp new file mode 100644 index 00000000000000..260f28b941ea92 --- /dev/null +++ b/src/frontends/ir/tests/frontend_test_mmap.cpp @@ -0,0 +1,121 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_test_utils/file_utils.hpp" +#include "frontend_test.hpp" +#include "openvino/opsets/opset1.hpp" + +#ifndef __APPLE__ // TODO: add getVmRSSInKB() for Apple platform + +class IRFrontendMMapTestsAdvanced : public ::testing::Test, public IRFrontendTestsImpl { +protected: + size_t binsize, REF_RSS; + + void SetUp() override { + size_t SIZE_MB = 32; + size_t CONST_SIZE = SIZE_MB * 1024 * 1024 / sizeof(ov::element::f32); + auto parameter = std::make_shared(ov::element::f32, ov::Shape{CONST_SIZE}); + auto constant = std::make_shared(ov::element::f32, + ov::Shape{CONST_SIZE}, + std::vector(CONST_SIZE, 0)); + auto add = std::make_shared(parameter, constant); + auto result = std::make_shared(add); + auto model = std::make_shared(ov::NodeVector{result}, ov::ParameterVector{parameter}); + + auto filePrefix = ov::test::utils::generateTestFilePrefix(); + xmlFileName = filePrefix + "_IrFrontendTestModel.xml"; + binFileName = filePrefix + "_IrFrontendTestModel.bin"; + ov::serialize(model, xmlFileName); + binsize = ov::test::utils::fileSize(binFileName) / 1024; + + // In case of enabled `mmap` RAM should not increase more than 50% of .bin size + // Otherwise RAM should increase on at least 50% of .bin size + REF_RSS = binsize / 2; + } + + void TearDown() override { + RemoveTemporalFiles(); + } +}; + +TEST_F(IRFrontendMMapTestsAdvanced, core_enable_mmap_property) { + // Test checks that with enabled `mmap` .bin file + // isn't read into RAM on `read_model` stage. + // Otherwise, with disabled `mmap` .bin file should + // be in RAM + + auto test = [&](const bool& is_mmap) { + core.set_property(ov::enable_mmap(is_mmap)); + + auto rss_init = ov::test::utils::getVmRSSInKB(); + auto model = core.read_model(xmlFileName); + auto rss_read = ov::test::utils::getVmRSSInKB(); + + bool is_weights_read = (rss_read - rss_init) > REF_RSS; + if (is_mmap == is_weights_read) { + std::cerr << "Test failed: mmap is " << (is_mmap ? "enabled" : "disabled") << ", but weights are " + << (is_weights_read ? "read" : "not read") << " in RAM" << std::endl; + exit(1); + } + std::cerr << "Test passed" << std::endl; + exit(0); + }; + + for (const auto is_mmap : {true, false}) + // Run test in a separate process to not affect RAM values by previous tests + EXPECT_EXIT(test(is_mmap), ::testing::ExitedWithCode(0), "Test passed"); +} + +TEST_F(IRFrontendMMapTestsAdvanced, fe_read_ir_by_default) { + // Test checks that IR FE `read` IR by default, + // so .bin file should be loaded to RAM + + auto test = [&]() { + ov::frontend::InputModel::Ptr input_model; + std::shared_ptr model; + + auto rss_init = ov::test::utils::getVmRSSInKB(); + auto FE = manager.load_by_model(xmlFileName); + if (FE) + input_model = FE->load(xmlFileName); + if (input_model) + model = FE->convert(input_model); + auto rss_read = ov::test::utils::getVmRSSInKB(); + + bool is_weights_read = (rss_read - rss_init) > REF_RSS; + if (!is_weights_read) { + std::cerr << "Test failed: weights are not read; RAM consumption is less than expected" << std::endl; + exit(1); + } + std::cerr << "Test passed" << std::endl; + exit(0); + }; + + // Run test in a separate process to not affect RAM values by previous tests + ASSERT_EXIT(test(), ::testing::ExitedWithCode(0), "Test passed"); +} + +TEST_F(IRFrontendMMapTestsAdvanced, core_mmap_ir_by_default) { + // Test checks that Core uses `mmap` by default, + // so .bin file should not be loaded to RAM + + auto test = [&]() { + auto rss_init = ov::test::utils::getVmRSSInKB(); + auto model = core.read_model(xmlFileName, binFileName); + auto rss_read = ov::test::utils::getVmRSSInKB(); + + bool is_weights_mapped = (rss_read - rss_init) < REF_RSS; + if (!is_weights_mapped) { + std::cerr << "Test failed: weights are not mapped; RAM consumption is more than expected" << std::endl; + exit(1); + } + std::cerr << "Test passed" << std::endl; + exit(0); + }; + + // Run test in a separate process to not affect RAM values by previous tests + ASSERT_EXIT(test(), ::testing::ExitedWithCode(0), "Test passed"); +} + +#endif diff --git a/src/frontends/onnx/tests/onnx_reader_external_data.cpp b/src/frontends/onnx/tests/onnx_reader_external_data.cpp index 837b7e1c05d020..6d1bc69d621af1 100644 --- a/src/frontends/onnx/tests/onnx_reader_external_data.cpp +++ b/src/frontends/onnx/tests/onnx_reader_external_data.cpp @@ -15,6 +15,7 @@ #include "common_test_utils/test_case.hpp" #include "common_test_utils/unicode_utils.hpp" #include "ie_blob.h" +#include "ie_common.h" #include "ie_core.hpp" #include "ngraph/ngraph.hpp" #include "openvino/frontend/manager.hpp" @@ -65,7 +66,7 @@ TEST(ONNX_Reader_Tests, ImportModelWithExternalDataFromStringException) { stream.close(); try { auto cnnNetwork = ie.ReadNetwork(modelAsString, weights); - } catch (const ngraph::ngraph_error& e) { + } catch (const InferenceEngine::Exception& e) { EXPECT_PRED_FORMAT2(testing::IsSubstring, std::string("invalid external data:"), e.what()); EXPECT_PRED_FORMAT2(testing::IsSubstring, diff --git a/src/frontends/pytorch/src/op/fake_quantize.cpp b/src/frontends/pytorch/src/op/fake_quantize.cpp index fdb3548156b01f..0cc0113ded77c7 100644 --- a/src/frontends/pytorch/src/op/fake_quantize.cpp +++ b/src/frontends/pytorch/src/op/fake_quantize.cpp @@ -67,7 +67,7 @@ OutputVector translate_fake_quantize_per_channel_affine(const NodeContext& conte auto rank = std::get<1>(get_shape_rank(context, input_node)); auto ones = std::make_shared(const_1, rank); - auto normalized_axis = normalize_axis(context, axis, input_node); + auto normalized_axis = normalize_axis(context, axis, rank); // Create vector of length of rank filled with ones, except single -1 value at place selected by axis element. auto new_shape = std::make_shared(ones, normalized_axis, const_neg_1, const_0); // Reshape scale and zero point to tensor of the same rank as input, having shape 1 everywhere except dimension diff --git a/src/frontends/pytorch/src/op/flatten.cpp b/src/frontends/pytorch/src/op/flatten.cpp index 6022661c3aa8cf..3ff896cefd83d5 100644 --- a/src/frontends/pytorch/src/op/flatten.cpp +++ b/src/frontends/pytorch/src/op/flatten.cpp @@ -21,14 +21,6 @@ using namespace ov::op; OutputVector translate_flatten(const NodeContext& context) { num_inputs_check(context, 1, 3); auto x = context.get_input(0); - int64_t start_dim = 0; - int64_t end_dim = -1; - if (!context.input_is_none(1)) { - start_dim = context.const_input(1); - } - if (!context.input_is_none(2)) { - end_dim = context.const_input(2); - } Output shape; Output rank; std::tie(shape, rank) = get_shape_rank(context, x, true); @@ -38,20 +30,16 @@ OutputVector translate_flatten(const NodeContext& context) { if (!context.input_is_none(1)) { start_dim_node = context.get_input(1); } else { - start_dim_node = v0::Constant::create(element::i32, Shape{}, {start_dim}); + start_dim_node = v0::Constant::create(element::i32, Shape{}, {0}); } if (!context.input_is_none(2)) { end_dim_node = context.get_input(2); } else { - end_dim_node = v0::Constant::create(element::i32, Shape{}, {end_dim}); - } - if (start_dim < 0) { - start_dim_node = context.mark_node(std::make_shared(rank, start_dim_node)); - } - if (end_dim < 0) { - end_dim_node = context.mark_node(std::make_shared(rank, end_dim_node)); + end_dim_node = v0::Constant::create(element::i32, Shape{}, {-1}); } - // Slice shape from begin and end, then concat with -1, if slice return empty tensor concat shuold still be able to + start_dim_node = normalize_axis(context, start_dim_node, rank); + end_dim_node = normalize_axis(context, end_dim_node, rank); + // Slice shape from begin and end, then concat with -1, if slice return empty tensor concat should still be able to // work with it auto zero = v0::Constant::create(element::i32, Shape{1}, {0}); auto one = v0::Constant::create(element::i32, Shape{1}, {1}); diff --git a/src/frontends/pytorch/src/op/linspace.cpp b/src/frontends/pytorch/src/op/linspace.cpp new file mode 100644 index 00000000000000..c2233bee15ee24 --- /dev/null +++ b/src/frontends/pytorch/src/op/linspace.cpp @@ -0,0 +1,79 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/convert_like.hpp" +#include "openvino/op/divide.hpp" +#include "openvino/op/equal.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/range.hpp" +#include "openvino/op/select.hpp" +#include "openvino/op/subtract.hpp" +#include "pt_framework_node.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace op { + +using namespace ov::op; + +OutputVector translate_linspace(const NodeContext& context) { + num_inputs_check(context, 3, 7); + // "aten::linspace(Scalar start, Scalar end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? + // device=None, bool? pin_memory=None) -> Tensor" + + // "aten::linspace.out(Scalar start, Scalar end, int steps, *, Tensor(a!) out) -> Tensor(a!)" + auto start = context.mark_node(std::make_shared(context.get_input(0), element::f32)); + auto end = context.mark_node(std::make_shared(context.get_input(1), element::f32)); + auto steps = context.mark_node(std::make_shared(context.get_input(2), element::f32)); + auto out_tensor = context.get_input(1); + auto apply_dtype = true; + auto dtype = element::f32; + if (!context.input_is_none(3) && context.get_input_size() == 7) { + // Case where dtype is provided directly in dtype input. + if (std::dynamic_pointer_cast(context.get_input_from_visible_context(3).get_node_shared_ptr())) { + dtype = convert_dtype(context.const_input(3)); + apply_dtype = true; + } else if (const auto& fw_node = cast_fw_node(context.get_input(3).get_node_shared_ptr(), "prim::dtype")) { + out_tensor = fw_node->input_value(0); + apply_dtype = false; + } else { + FRONT_END_OP_CONVERSION_CHECK(false, "Couldn't get dtype input"); + } + } else if (!context.input_is_none(3) && context.get_input_size() == 4) { + // Case where dtype is inherited from out tensor. + out_tensor = context.get_input(3); + apply_dtype = false; + } + + auto const_0 = v0::Constant::create(element::f32, Shape{}, {0}); + auto const_1 = v0::Constant::create(element::f32, Shape{}, {1}); + auto step_range = context.mark_node(std::make_shared(const_0, steps, const_1, element::f32)); + + auto sub_end_start = context.mark_node(std::make_shared(end, start)); + auto sub_steps_1 = context.mark_node(std::make_shared(steps, const_1)); + auto step_multiplier = context.mark_node(std::make_shared(sub_end_start, sub_steps_1)); + auto is_single_step = context.mark_node(std::make_shared(steps, const_1)); + auto select_multiplier = context.mark_node(std::make_shared(is_single_step, const_0, step_multiplier)); + auto step_values = context.mark_node(std::make_shared(step_range, select_multiplier)); + + auto linspace = context.mark_node(std::make_shared(step_values, start)); + if (apply_dtype) { + linspace = context.mark_node(std::make_shared(linspace, dtype)); + } else { + linspace = context.mark_node(std::make_shared(linspace, out_tensor)); + } + + return {linspace}; +}; + +} // namespace op +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/op/masked_scatter.cpp b/src/frontends/pytorch/src/op/masked_scatter.cpp new file mode 100644 index 00000000000000..2c3ff50af640e5 --- /dev/null +++ b/src/frontends/pytorch/src/op/masked_scatter.cpp @@ -0,0 +1,60 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/broadcast.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/non_zero.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/scatter_nd_update.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/slice.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/op/unsqueeze.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace op { + +using namespace ov::op; + +OutputVector translate_masked_scatter(const NodeContext& context) { + // aten::masked_scatter(Tensor self, Tensor mask, Tensor source) -> Tensor + // aten::masked_scatter.out(Tensor self, Tensor mask, Tensor source, *, Tensor(a!) out) -> Tensor(a!) + // aten::masked_scatter_(Tensor(a!) self, Tensor mask, Tensor source) -> Tensor(a!) + num_inputs_check(context, 3, 4); + auto x = context.get_input(0); + auto mask = context.get_input(1); + auto source = context.get_input(2); + // mask should be broadcastable to x shape + auto x_shape = context.mark_node(std::make_shared(x, element::i32)); + auto expanded_mask = + context.mark_node(std::make_shared(mask, x_shape, BroadcastType::BIDIRECTIONAL)); + auto index = context.mark_node(std::make_shared(expanded_mask, element::i32)); + auto input_order = context.mark_node(v0::Constant::create(element::i32, Shape{2}, {1, 0})); + index = context.mark_node(std::make_shared(index, input_order)); + // source can be arbitary shape, select only relevant data + auto const_minus_one = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {-1})); + auto flatten_source = context.mark_node(std::make_shared(source, const_minus_one, false)); + auto const_zero = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {0})); + auto index_shape = context.mark_node(std::make_shared(index, element::i32)); + auto index_dim_zero = context.mark_node(std::make_shared(index_shape, const_zero, const_zero)); + auto slice_steps = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {1})); + auto sliced_source = context.mark_node( + std::make_shared(flatten_source, const_zero, index_dim_zero, slice_steps, const_zero)); + + auto update = context.mark_node(std::make_shared(x, index, sliced_source)); + if (!context.input_is_none(3)) { + context.mutate_input(3, update); + } + return {update}; +}; + +} // namespace op +} // namespace pytorch +} // namespace frontend +} // namespace ov \ No newline at end of file diff --git a/src/frontends/pytorch/src/op/transpose.cpp b/src/frontends/pytorch/src/op/transpose.cpp index df20593fc90f98..3ea5ef17283ffe 100644 --- a/src/frontends/pytorch/src/op/transpose.cpp +++ b/src/frontends/pytorch/src/op/transpose.cpp @@ -8,6 +8,8 @@ #include "openvino/op/add.hpp" #include "openvino/op/concat.hpp" #include "openvino/op/constant.hpp" +#include "openvino/op/equal.hpp" +#include "openvino/op/if.hpp" #include "openvino/op/range.hpp" #include "openvino/op/scatter_elements_update.hpp" #include "openvino/op/unsqueeze.hpp" @@ -22,19 +24,12 @@ using namespace ov::op; OutputVector translate_transpose(const NodeContext& context) { num_inputs_check(context, 3, 3); - auto dim0 = context.const_input(1); - auto dim1 = context.const_input(2); Output rank; std::tie(std::ignore, rank) = get_shape_rank(context, context.get_input(0), true); - // Use opset::If for dim normalization auto dim0_node = context.get_input(1); auto dim1_node = context.get_input(2); - if (dim0 < 0) { - dim0_node = std::make_shared(rank, dim0_node); - } - if (dim1 < 0) { - dim1_node = std::make_shared(rank, dim1_node); - } + dim0_node = normalize_axis(context, dim0_node, rank); + dim1_node = normalize_axis(context, dim1_node, rank); auto start = v0::Constant::create(element::i32, {}, {0}); auto step = v0::Constant::create(element::i32, {}, {1}); auto range = std::make_shared(start, rank, step, element::i32); @@ -53,11 +48,39 @@ OutputVector translate_transpose(const NodeContext& context) { OutputVector translate_t(const NodeContext& context) { num_inputs_check(context, 1, 1); auto input = context.get_input(0); - if (input.get_partial_shape().rank().is_dynamic() || input.get_partial_shape().rank().get_length() < 2) { - return {input}; + if (input.get_partial_shape().rank().is_static()) { + if (input.get_partial_shape().rank().get_length() < 2) { + return {input}; + } + auto dims = context.mark_node(v0::Constant::create(element::i32, Shape{2}, {1, 0})); + return {context.mark_node(std::make_shared(input, dims))}; + } else { + // If rank is not known we create If operation + Output rank; + std::tie(std::ignore, rank) = get_shape_rank(context, input, true); + auto const_2 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {2})); + auto cond = context.mark_node(std::make_shared(rank, const_2)); + + // then body + auto param_then = std::make_shared(element::dynamic, PartialShape::dynamic()); + auto dims = context.mark_node(v0::Constant::create(element::i32, Shape{2}, {1, 0})); + auto transpose = context.mark_node(std::make_shared(param_then, dims)); + auto result_then = std::make_shared(transpose); + auto then_body = std::make_shared(ResultVector{result_then}, ParameterVector{param_then}); + + // else body + auto param_else = std::make_shared(element::dynamic, PartialShape::dynamic()); + auto result_else = std::make_shared(param_else); + auto else_body = std::make_shared(ResultVector{result_else}, ParameterVector{param_else}); + + // If op creation + auto if_node = std::make_shared(cond); + context.mark_node(if_node); + if_node->set_then_body(then_body); + if_node->set_else_body(else_body); + if_node->set_input(input, param_then, param_else); + return {if_node->set_output(result_then, result_else)}; } - auto dims = context.mark_node(v0::Constant::create(element::i32, Shape{2}, {1, 0})); - return {context.mark_node(std::make_shared(input, dims))}; } } // namespace op diff --git a/src/frontends/pytorch/src/op/unflatten.cpp b/src/frontends/pytorch/src/op/unflatten.cpp index 673efbc1480161..a913398eb35a51 100644 --- a/src/frontends/pytorch/src/op/unflatten.cpp +++ b/src/frontends/pytorch/src/op/unflatten.cpp @@ -28,11 +28,13 @@ OutputVector translate_unflatten(const NodeContext& context) { if (context.get_input_type(2).is()) { sizes = concat_list_construct(sizes); } - auto input_shape = context.mark_node(std::make_shared(input, element::i32)); + Output input_shape; + Output rank; + std::tie(input_shape, rank) = get_shape_rank(context, input); auto zero_1d = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {0})); auto one_1d = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {1})); dim = context.mark_node(std::make_shared(dim, element::i32)); - dim = normalize_axis(context, dim, input); + dim = normalize_axis(context, dim, rank); sizes = context.mark_node(std::make_shared(sizes, element::i32)); auto max_int = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {std::numeric_limits::max()})); auto dim_plus_one = context.mark_node(std::make_shared(dim, one_1d)); diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index 67a5492bde7296..f3815d17369b87 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -84,6 +84,7 @@ OP_CONVERTER(translate_linalg_norm); OP_CONVERTER(translate_linalg_matrix_norm); OP_CONVERTER(translate_linalg_vector_norm); OP_CONVERTER(translate_linear); +OP_CONVERTER(translate_linspace); OP_CONVERTER(translate_list_construct); OP_CONVERTER(translate_list_unpack); OP_CONVERTER(translate_log); @@ -92,6 +93,7 @@ OP_CONVERTER(translate_log2); OP_CONVERTER(translate_logsumexp); OP_CONVERTER(translate_loop); OP_CONVERTER(translate_masked_fill); +OP_CONVERTER(translate_masked_scatter); OP_CONVERTER(translate_max); OP_CONVERTER(translate_max_poolnd); OP_CONVERTER(translate_mean); @@ -331,6 +333,7 @@ const std::map get_supported_ops_ts() { {"aten::linalg_matrix_norm", op::translate_linalg_matrix_norm}, {"aten::linalg_vector_norm", op::translate_linalg_vector_norm}, {"aten::linear", op::translate_linear}, + {"aten::linspace", op::translate_linspace}, {"aten::log", op::translate_log}, {"aten::log_", op::inplace_op}, {"aten::log_softmax", op::translate_log_softmax}, @@ -339,6 +342,8 @@ const std::map get_supported_ops_ts() { {"aten::lt", op::translate_1to1_match_2_inputs_align_types}, {"aten::masked_fill", op::translate_masked_fill}, {"aten::masked_fill_", op::inplace_op}, + {"aten::masked_scatter", op::translate_masked_scatter}, + {"aten::masked_scatter_", op::inplace_op}, {"aten::matmul", op::translate_1to1_match_2_inputs}, {"aten::max", op::translate_max}, {"aten::max_pool1d", op::quantizable_op}, diff --git a/src/frontends/pytorch/src/utils.cpp b/src/frontends/pytorch/src/utils.cpp index 929ac8b251d454..2c26feee8981ff 100644 --- a/src/frontends/pytorch/src/utils.cpp +++ b/src/frontends/pytorch/src/utils.cpp @@ -117,11 +117,7 @@ std::shared_ptr get_axes_range(const NodeContext& context, int input_id) { return context.mark_node(std::make_shared(start, reduced_rank, step, element::i32)); }; -std::shared_ptr normalize_axis(const NodeContext& context, - const Output& axis, - const Output& input_node) { - Output rank; - std::tie(std::ignore, rank) = get_shape_rank(context, input_node); +Output normalize_axis(const NodeContext& context, const Output& axis, const Output& rank) { auto axis_rank = context.mark_node(std::make_shared(axis, rank)); auto is_less = context.mark_node(std::make_shared(axis_rank, rank)); auto new_axis = context.mark_node(std::make_shared(is_less, axis_rank, axis)); diff --git a/src/frontends/pytorch/src/utils.hpp b/src/frontends/pytorch/src/utils.hpp index 175f80e77d9181..565476e7974ad8 100644 --- a/src/frontends/pytorch/src/utils.hpp +++ b/src/frontends/pytorch/src/utils.hpp @@ -39,9 +39,7 @@ Output reshape_kernel_for_group(const NodeContext& context, const Output get_axes_range(const NodeContext& context, int input_id); -std::shared_ptr normalize_axis(const NodeContext& context, - const Output& axis, - const Output& input_node); +Output normalize_axis(const NodeContext& context, const Output& axis, const Output& input_node); std::shared_ptr numel(const NodeContext& context, const Output& x); diff --git a/src/frontends/pytorch/src/utils_quantize.cpp b/src/frontends/pytorch/src/utils_quantize.cpp index a8820b8ffe473d..5af546f3d5be5d 100644 --- a/src/frontends/pytorch/src/utils_quantize.cpp +++ b/src/frontends/pytorch/src/utils_quantize.cpp @@ -90,7 +90,7 @@ Output quantize(const NodeContext& context, const auto rank = std::get<1>(get_shape_rank(context, input_convert, false, element::i32)); const auto ones = context.mark_node(std::make_shared(one, rank)); - const auto normalized_axis = normalize_axis(context, axis_convert, input_convert); + const auto normalized_axis = normalize_axis(context, axis_convert, rank); const auto new_shape = context.mark_node(std::make_shared(ones, normalized_axis, neg_one, zero)); @@ -156,8 +156,8 @@ Output quantize(const NodeContext& context, FRONT_END_OP_CONVERSION_CHECK(false, "Failed to convert a node to QuantizedPtNode"); } -std::shared_ptr cast_quantized_fw_node(Output node) { - auto quant_node = std::dynamic_pointer_cast(node.get_node_shared_ptr()); +std::shared_ptr cast_quantized_fw_node(std::shared_ptr node) { + auto quant_node = std::dynamic_pointer_cast(node); if (!quant_node) { return nullptr; } @@ -168,19 +168,6 @@ std::shared_ptr cast_quantized_fw_node(Output node) { return quant_node; } -std::shared_ptr cast_quantized_fw_node(Output node, const std::string& type) { - auto quant_node = std::dynamic_pointer_cast(node.get_node_shared_ptr()); - if (!quant_node) { - return nullptr; - } - const auto& attrs = quant_node->get_attrs(); - if (attrs.find(QuantizedPtNode::quantized_node_type_key) == attrs.end() || - attrs.at(QuantizedPtNode::quantized_node_type_key) != type) { - return nullptr; - } - return quant_node; -} - } // namespace pytorch } // namespace frontend } // namespace ov diff --git a/src/frontends/pytorch/src/utils_quantize.hpp b/src/frontends/pytorch/src/utils_quantize.hpp index a78855ca2b3eb5..0d5219f00885f7 100644 --- a/src/frontends/pytorch/src/utils_quantize.hpp +++ b/src/frontends/pytorch/src/utils_quantize.hpp @@ -140,8 +140,7 @@ Output quantize(const NodeContext& context, const Output& zero_point, const Output& quantized_node); -std::shared_ptr cast_quantized_fw_node(Output node); -std::shared_ptr cast_quantized_fw_node(Output node, const std::string& type); +std::shared_ptr cast_quantized_fw_node(std::shared_ptr node); namespace op { /** diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp index 6ab15f47ea47e7..08abacac63cfdd 100644 --- a/src/frontends/tensorflow/src/op_table.cpp +++ b/src/frontends/tensorflow/src/op_table.cpp @@ -115,6 +115,7 @@ const std::map get_supported_ops() { // Separate translators: {"AddN", CreatorFunction(translate_add_n_op)}, + {"AdjustContrastv2", CreatorFunction(translate_adjust_contrast_op)}, {"ArgMax", CreatorFunction(translate_arg_max_op)}, {"ArgMin", CreatorFunction(translate_arg_min_op)}, {"Assert", CreatorFunction(translate_no_op)}, @@ -128,6 +129,8 @@ const std::map get_supported_ops() { {"Bucketize", CreatorFunction(translate_bucketize_op)}, {"BiasAdd", CreatorFunction(translate_bias_add_op)}, {"Cast", CreatorFunction(translate_cast_op)}, + {"CheckNumerics", CreatorFunction(translate_identity_op)}, + {"CheckNumericsV2", CreatorFunction(translate_identity_op)}, {"ClipByValue", CreatorFunction(translate_clip_by_value_op)}, {"Concat", CreatorFunction(translate_concat_op)}, {"ConcatV2", CreatorFunction(translate_concat_op)}, @@ -140,12 +143,14 @@ const std::map get_supported_ops() { {"CTCGreedyDecoder", CreatorFunction(translate_ctc_greedy_decoder_op)}, {"CTCLoss", CreatorFunction(translate_ctc_loss_op)}, {"Cumsum", CreatorFunction(translate_cumsum_op)}, + {"DivNoNan", CreatorFunction(translate_div_no_nan_op)}, {"DepthToSpace", CreatorFunction(translate_depth_to_space_op)}, {"DepthwiseConv2dNative", CreatorFunction(translate_depthwise_conv_2d_native_op)}, {"DynamicPartition", CreatorFunction(translate_dynamic_partition_op)}, {"Einsum", CreatorFunction(translate_einsum_op)}, {"Elu", CreatorFunction(translate_elu_op)}, {"EmptyTensorList", CreatorFunction(translate_tensor_list_reserve_op)}, + {"EnsureShape", CreatorFunction(translate_identity_op)}, {"ExpandDims", CreatorFunction(translate_expand_dims_op)}, {"ExtractImagePatches", CreatorFunction(translate_extract_image_patches_op)}, {"FakeQuantWithMinMaxVars", CreatorFunction(translate_fake_quant_op)}, @@ -169,6 +174,7 @@ const std::map get_supported_ops() { {"Iterator", CreatorFunction(translate_iterator_op)}, {"IteratorGetNext", CreatorFunction(translate_iterator_get_next_op)}, {"IteratorV2", CreatorFunction(translate_iterator_op)}, + {"InvertPermutation", CreatorFunction(translate_invert_permutation_op)}, {"output_arg", CreatorFunction(translate_output_arg_op)}, {"L2Loss", CreatorFunction(translate_l2_loss_op)}, {"LeakyRelu", CreatorFunction(translate_leaky_relu_op)}, @@ -184,6 +190,7 @@ const std::map get_supported_ops() { {"MaxPool", CreatorFunction(translate_max_pool_op)}, {"MaxPoolV2", CreatorFunction(translate_max_pool_op)}, {"MaxPool3D", CreatorFunction(translate_max_pool_op)}, + {"MaxPoolWithArgmax", CreatorFunction(translate_max_pool_op)}, {"Merge", CreatorFunction(translate_merge_op)}, {"MirrorPad", CreatorFunction(translate_mirror_pad_op)}, {"MutableHashTable", CreatorFunction(translate_hash_table_op)}, @@ -263,6 +270,7 @@ const std::map get_supported_ops() { {"TopKV2", CreatorFunction(translate_top_k_v2_op)}, {"Transpose", CreatorFunction(translate_transpose_op)}, {"Unpack", CreatorFunction(translate_unpack_op)}, + {"UnravelIndex", CreatorFunction(translate_unravel_index_op)}, {"While", CreatorFunction(translate_while_op)}, {"Where", CreatorFunction(translate_where_op)}, {"Xdivy", CreatorFunction(translate_x_div_y_op)}, diff --git a/src/frontends/tensorflow_common/include/common_op_table.hpp b/src/frontends/tensorflow_common/include/common_op_table.hpp index f90c54fbf3cf80..7ed0a39555ae3f 100644 --- a/src/frontends/tensorflow_common/include/common_op_table.hpp +++ b/src/frontends/tensorflow_common/include/common_op_table.hpp @@ -32,6 +32,7 @@ OP_T_CONVERTER(translate_binary_op); OP_T_CONVERTER(translate_direct_reduce_op); OP_CONVERTER(translate_add_n_op); +OP_CONVERTER(translate_adjust_contrast_op); OP_CONVERTER(translate_arg_max_op); OP_CONVERTER(translate_arg_min_op); OP_CONVERTER(translate_avg_pool_op); @@ -55,6 +56,7 @@ OP_CONVERTER(translate_cumsum_op); OP_CONVERTER(translate_crop_and_resize_op); OP_CONVERTER(translate_depth_to_space_op); OP_CONVERTER(translate_depthwise_conv_2d_native_op); +OP_CONVERTER(translate_div_no_nan_op); OP_CONVERTER(translate_dynamic_partition_op); OP_CONVERTER(translate_einsum_op); OP_CONVERTER(translate_elu_op); @@ -70,6 +72,7 @@ OP_CONVERTER(translate_gather_nd_op); OP_CONVERTER(translate_identity_op); OP_CONVERTER(translate_identity_n_op); OP_CONVERTER(translate_input_arg_op); +OP_CONVERTER(translate_invert_permutation_op); OP_CONVERTER(translate_output_arg_op); OP_CONVERTER(translate_interpolate_op); OP_CONVERTER(translate_is_finite_op); @@ -137,6 +140,7 @@ OP_CONVERTER_NAMED(translate_top_k_op); OP_CONVERTER_NAMED(translate_top_k_v2_op); OP_CONVERTER(translate_transpose_op); OP_CONVERTER(translate_unpack_op); +OP_CONVERTER(translate_unravel_index_op); OP_CONVERTER(translate_where_op); OP_CONVERTER(translate_x_div_y_op); OP_CONVERTER(translate_zeros_like_op); diff --git a/src/frontends/tensorflow_common/src/op/adjust_contrast.cpp b/src/frontends/tensorflow_common/src/op/adjust_contrast.cpp new file mode 100644 index 00000000000000..a3bcfa08596c6c --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/adjust_contrast.cpp @@ -0,0 +1,49 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert_like.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/reduce_mean.hpp" +#include "openvino/op/subtract.hpp" + +using namespace std; +using namespace ov; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { + +OutputVector translate_adjust_contrast_op(const NodeContext& node) { + default_op_checks(node, 2, {"AdjustContrastv2"}); + auto images = node.get_input(0); + auto contrast_factor = node.get_input(1); + auto node_name = node.get_name(); + + // compute mean per channel for each image + // it will reduce spatial dimensions of images in a format [batch, height, width, channel] + auto reduce_axes = make_shared(element::i32, Shape{2}, vector{-3, -2}); + auto means = make_shared(images, reduce_axes, true); + + // cast contrast_factor since its type can be different + contrast_factor = make_shared(contrast_factor, images); + + // adjust contrast by a formula: (images - means) * contrast_factor + means + auto adjust_contrast = make_shared(images, means)->output(0); + adjust_contrast = make_shared(adjust_contrast, contrast_factor); + adjust_contrast = make_shared(adjust_contrast, means); + + set_node_name(node_name, adjust_contrast.get_node_shared_ptr()); + + return {adjust_contrast}; +} + +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow_common/src/op/div_no_nan.cpp b/src/frontends/tensorflow_common/src/op/div_no_nan.cpp new file mode 100644 index 00000000000000..1464b5a7b8c589 --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/div_no_nan.cpp @@ -0,0 +1,48 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert_like.hpp" +#include "openvino/op/divide.hpp" +#include "openvino/op/equal.hpp" +#include "openvino/op/select.hpp" + +using namespace std; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { +OutputVector translate_div_no_nan_op(const NodeContext& node) { + default_op_checks(node, 2, {"DivNoNan"}); + auto numer = node.get_input(0); + auto denom = node.get_input(1); + + // prepare auxiliary zero and one constants of the same type as the inputs + auto zero = make_shared(element::f32, Shape{}, 0.0f)->output(0); + auto one = make_shared(element::f32, Shape{}, 1.0f)->output(0); + zero = make_shared(zero, denom); + one = make_shared(one, denom); + + // compute a mask to get positions of Nan values of division result + auto is_zero = make_shared(denom, zero); + + // fix zeros in the denomimator to avoid undefined behaviour + auto fixed_denom = make_shared(is_zero, one, denom); + + // compute Division and do not afraid division by zero + // since all of them fixed + auto div = make_shared(numer, fixed_denom); + + // set zero to the result where initially the denomimator is zero + auto div_no_nan = make_shared(is_zero, zero, div); + set_node_name(node.get_name(), div_no_nan); + return div_no_nan->outputs(); +} +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow_common/src/op/identity.cpp b/src/frontends/tensorflow_common/src/op/identity.cpp index a2b36f1f1a6083..add9c45243a071 100644 --- a/src/frontends/tensorflow_common/src/op/identity.cpp +++ b/src/frontends/tensorflow_common/src/op/identity.cpp @@ -3,10 +3,8 @@ // #include "common_op_table.hpp" -#include "openvino/opsets/opset8.hpp" using namespace std; -using namespace ov::opset8; namespace ov { namespace frontend { @@ -14,7 +12,10 @@ namespace tensorflow { namespace op { OutputVector translate_identity_op(const NodeContext& node) { - vector supported_ops = {"Identity", + vector supported_ops = {"CheckNumerics", + "CheckNumericsV2", + "EnsureShape", + "Identity", "PreventGradient", "Snapshot", "StopGradient", diff --git a/src/frontends/tensorflow_common/src/op/interpolate.cpp b/src/frontends/tensorflow_common/src/op/interpolate.cpp index c2c09295e4a024..a46acf773e5376 100644 --- a/src/frontends/tensorflow_common/src/op/interpolate.cpp +++ b/src/frontends/tensorflow_common/src/op/interpolate.cpp @@ -2,12 +2,18 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/op/interpolate.hpp" + #include "common_op_table.hpp" -#include "openvino/opsets/opset8.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/divide.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/slice.hpp" using namespace std; using namespace ov; -using namespace ov::opset8; +using namespace ov::op; namespace ov { namespace frontend { @@ -30,58 +36,48 @@ OutputVector translate_interpolate_op(const NodeContext& node) { " is True, the attribute align_corners must be False."); // prepare attributes for OpenVINO Interpolate operation - Interpolate::InterpolateAttrs interpolate_attrs; - interpolate_attrs.shape_calculation_mode = Interpolate::ShapeCalcMode::SIZES; + v11::Interpolate::InterpolateAttrs interpolate_attrs; + interpolate_attrs.shape_calculation_mode = v11::Interpolate::ShapeCalcMode::SIZES; if (op_type == "ResizeNearestNeighbor") { - interpolate_attrs.mode = Interpolate::InterpolateMode::NEAREST; - interpolate_attrs.nearest_mode = Interpolate::NearestMode::FLOOR; + interpolate_attrs.mode = v11::Interpolate::InterpolateMode::NEAREST; + interpolate_attrs.nearest_mode = v11::Interpolate::NearestMode::FLOOR; } else if (op_type == "ResizeBilinear") { auto input_rank = images.get_partial_shape().rank(); if (input_rank.is_static() && input_rank.get_length() == 4) { - interpolate_attrs.mode = Interpolate::InterpolateMode::LINEAR_ONNX; + interpolate_attrs.mode = v11::Interpolate::InterpolateMode::LINEAR_ONNX; } else { - interpolate_attrs.mode = Interpolate::InterpolateMode::LINEAR; + interpolate_attrs.mode = v11::Interpolate::InterpolateMode::LINEAR; } - interpolate_attrs.nearest_mode = Interpolate::NearestMode::ROUND_PREFER_FLOOR; + interpolate_attrs.nearest_mode = v11::Interpolate::NearestMode::ROUND_PREFER_FLOOR; } if (tf_align_corners) { - interpolate_attrs.coordinate_transformation_mode = Interpolate::CoordinateTransformMode::ALIGN_CORNERS; - if (interpolate_attrs.mode == Interpolate::InterpolateMode::NEAREST) { - interpolate_attrs.nearest_mode = Interpolate::NearestMode::ROUND_PREFER_CEIL; + interpolate_attrs.coordinate_transformation_mode = v11::Interpolate::CoordinateTransformMode::ALIGN_CORNERS; + if (interpolate_attrs.mode == v11::Interpolate::InterpolateMode::NEAREST) { + interpolate_attrs.nearest_mode = v11::Interpolate::NearestMode::ROUND_PREFER_CEIL; } } else if (tf_half_pixel_centers) { - if (interpolate_attrs.mode == Interpolate::InterpolateMode::NEAREST) { + if (interpolate_attrs.mode == v11::Interpolate::InterpolateMode::NEAREST) { interpolate_attrs.coordinate_transformation_mode = - Interpolate::CoordinateTransformMode::TF_HALF_PIXEL_FOR_NN; + v11::Interpolate::CoordinateTransformMode::TF_HALF_PIXEL_FOR_NN; } else { - interpolate_attrs.coordinate_transformation_mode = Interpolate::CoordinateTransformMode::HALF_PIXEL; + interpolate_attrs.coordinate_transformation_mode = v11::Interpolate::CoordinateTransformMode::HALF_PIXEL; } } else { - interpolate_attrs.coordinate_transformation_mode = Interpolate::CoordinateTransformMode::ASYMMETRIC; + interpolate_attrs.coordinate_transformation_mode = v11::Interpolate::CoordinateTransformMode::ASYMMETRIC; } - // prepare scales input - auto images_shape = make_shared(images, element::i32); - auto spatial_shape = make_shared(images_shape, - make_shared(element::i64, Shape{1}, std::vector{1}), - make_shared(element::i64, Shape{1}, std::vector{3}), - make_shared(element::i64, Shape{1}, std::vector{1}), - make_shared(element::i64, Shape{1}, std::vector{0})); - auto scales = make_shared(make_shared(size, element::f32), - make_shared(spatial_shape, element::f32)); - // since Interpolate is layout agnostic // we can avoid Transpose operation by specifying axes = {1, 2} for original NHWC layout - auto axes = make_shared(element::i32, Shape{2}, std::vector({1, 2})); + auto axes = make_shared(element::i32, Shape{2}, std::vector({1, 2})); // according to the specification of ResizeBilinear, // it always returns FP32 output type so we immediately align input type for it if (op_type == "ResizeBilinear") { - images = make_shared(images, element::f32); + images = make_shared(images, element::f32); } - auto interpolate = make_shared(images, size, scales, axes, interpolate_attrs); + auto interpolate = make_shared(images, size, axes, interpolate_attrs); set_node_name(node.get_name(), interpolate); return {interpolate}; } diff --git a/src/frontends/tensorflow_common/src/op/invert_permutation.cpp b/src/frontends/tensorflow_common/src/op/invert_permutation.cpp new file mode 100644 index 00000000000000..07b2233a22628a --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/invert_permutation.cpp @@ -0,0 +1,50 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert_like.hpp" +#include "openvino/op/range.hpp" +#include "openvino/op/scatter_update.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/squeeze.hpp" + +using namespace std; +using namespace ov; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { + +OutputVector translate_invert_permutation_op(const NodeContext& node) { + default_op_checks(node, 1, {"InvertPermutation"}); + auto x = node.get_input(0); + auto node_name = node.get_name(); + + // compute a number of elements in x + // by definition, x is 1D tensor + auto x_shape = make_shared(x, element::i64); + auto squeeze_dim = make_shared(element::i32, Shape{1}, 0); + auto n = make_shared(x_shape, squeeze_dim); + + // generate a range [0, n) + auto zero = make_shared(element::i64, Shape{}, 0); + auto one = make_shared(element::i64, Shape{}, 1); + auto values = make_shared(zero, n, one, element::i64)->output(0); + values = make_shared(values, x); + + // compute inverted permutation + auto axis = make_shared(element::i32, Shape{1}, 0); + auto invert_permutation = make_shared(x, x, values, axis); + + set_node_name(node_name, invert_permutation); + return {invert_permutation}; +} + +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow_common/src/op/max_pool.cpp b/src/frontends/tensorflow_common/src/op/max_pool.cpp index b2a4520249eb78..d64ac1a17fbafe 100644 --- a/src/frontends/tensorflow_common/src/op/max_pool.cpp +++ b/src/frontends/tensorflow_common/src/op/max_pool.cpp @@ -2,12 +2,21 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/op/max_pool.hpp" + #include "common_op_table.hpp" -#include "openvino/opsets/opset8.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/divide.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/subtract.hpp" #include "utils.hpp" using namespace std; using namespace ov; +using namespace ov::op; using namespace ov::frontend::tensorflow; namespace ov { @@ -17,21 +26,25 @@ namespace op { OutputVector translate_max_pool_util(const NodeContext& node, size_t spatial_dims_num, - const std::vector& tf_kernel_sizes, - const std::vector& tf_strides) { - default_op_checks(node, 1, {"MaxPool", "MaxPoolV2", "MaxPool3D"}); + const vector& tf_kernel_sizes, + const vector& tf_strides, + element::Type indices_element_type = element::i64, + int64_t axis = 0, + bool set_friendly_name = true, + bool with_indices = false) { + default_op_checks(node, 1, {"MaxPool", "MaxPoolV2", "MaxPool3D", "MaxPoolWithArgmax"}); TENSORFLOW_OP_VALIDATION(node, spatial_dims_num == 2 || spatial_dims_num == 3, - "Only MaxPool, MaxPoolV2 and MaxPool3D are supported."); + "Only MaxPool, MaxPoolV2, MaxPool3D and MaxPoolWithArgmax are supported."); auto input = node.get_input(0); - auto tf_padding_type = node.get_attribute("padding"); - ov::op::PadType auto_pad = convert_tf_padding(node, tf_padding_type); - auto tf_data_format = node.get_attribute("data_format", spatial_dims_num == 2 ? "NHWC" : "NDHWC"); + auto tf_padding_type = node.get_attribute("padding"); + PadType auto_pad = convert_tf_padding(node, tf_padding_type); + auto tf_data_format = node.get_attribute("data_format", spatial_dims_num == 2 ? "NHWC" : "NDHWC"); - auto tf_explicit_paddings = std::vector{}; - if (auto_pad == ov::op::PadType::EXPLICIT) { - tf_explicit_paddings = node.get_attribute>("explicit_paddings", {}); + auto tf_explicit_paddings = vector{}; + if (auto_pad == PadType::EXPLICIT) { + tf_explicit_paddings = node.get_attribute>("explicit_paddings", {}); } bool is_nhwc = true; @@ -48,40 +61,51 @@ OutputVector translate_max_pool_util(const NodeContext& node, } // prepare attributes for OpenVINO MaxPool operation - ov::Strides strides(spatial_dims_num); - ov::Strides dilations = (spatial_dims_num == 2 ? ov::Strides({1, 1}) : ov::Strides({1, 1, 1})); - ov::Shape kernel_sizes(spatial_dims_num); - ov::frontend::tensorflow::convert_nhwc_to_hw(is_nhwc, tf_strides, strides); - ov::frontend::tensorflow::convert_nhwc_to_hw(is_nhwc, tf_kernel_sizes, kernel_sizes); - - ov::CoordinateDiff pads_begin; - ov::CoordinateDiff pads_end; - if (auto_pad == ov::op::PadType::EXPLICIT) { + Strides strides(spatial_dims_num); + Strides dilations = (spatial_dims_num == 2 ? Strides({1, 1}) : Strides({1, 1, 1})); + Shape kernel_sizes(spatial_dims_num); + convert_nhwc_to_hw(is_nhwc, tf_strides, strides); + convert_nhwc_to_hw(is_nhwc, tf_kernel_sizes, kernel_sizes); + + CoordinateDiff pads_begin; + CoordinateDiff pads_end; + if (auto_pad == PadType::EXPLICIT) { fill_explicit_pads_vectors(node, is_nhwc, spatial_dims_num, tf_explicit_paddings, pads_begin, pads_end); } // prepare input to MaxPool - convert_nhwc_to_nchw(is_nhwc, input, ov::Rank(spatial_dims_num + 2)); - - auto max_pool_node = std::make_shared(input, - strides, - dilations, - ov::Shape(pads_begin.begin(), pads_begin.end()), - ov::Shape(pads_end.begin(), pads_end.end()), - kernel_sizes, - ov::op::RoundingType::FLOOR, - auto_pad); + convert_nhwc_to_nchw(is_nhwc, input, Rank(spatial_dims_num + 2)); + + auto max_pool_node = make_shared(input, + strides, + dilations, + Shape(pads_begin.begin(), pads_begin.end()), + Shape(pads_end.begin(), pads_end.end()), + kernel_sizes, + RoundingType::FLOOR, + auto_pad, + indices_element_type, + axis); auto max_pool = max_pool_node->output(0); - ov::frontend::tensorflow::convert_nchw_to_nhwc(is_nhwc, max_pool, ov::Rank(spatial_dims_num + 2)); - ov::frontend::tensorflow::set_node_name(node.get_name(), max_pool.get_node_shared_ptr()); + convert_nchw_to_nhwc(is_nhwc, max_pool, Rank(spatial_dims_num + 2)); + if (set_friendly_name) { + set_node_name(node.get_name(), max_pool.get_node_shared_ptr()); + } else { + set_out_name(node.get_name() + ":0", max_pool); + } + + if (with_indices) { + auto output_indices = max_pool_node->output(1); + return OutputVector{max_pool, output_indices}; + } return {max_pool}; } OutputVector translate_max_pool(const NodeContext& node, size_t spatial_dims_num) { // MaxPool2D and MaxPool3D have ksize and strides as attributes // retrieve attributes - auto strides = node.get_attribute>("strides"); - auto kernel_sizes = node.get_attribute>("ksize"); + auto strides = node.get_attribute>("strides"); + auto kernel_sizes = node.get_attribute>("ksize"); return translate_max_pool_util(node, spatial_dims_num, kernel_sizes, strides); } @@ -104,6 +128,81 @@ OutputVector translate_max_pool_v2(const NodeContext& node) { return translate_max_pool_util(node, 2, ksize_vector, strides_vector); } +OutputVector translate_max_pool_with_argmax(const NodeContext& node) { + // MaxPoolWithArgmax has just one input. ksize and strides are attributes + TENSORFLOW_OP_VALIDATION(node, + node.get_input_size() > 0, + "MaxPoolWithArgmax operation must have at least one input."); + auto include_batch_in_index = node.get_attribute("include_batch_in_index", false); + auto targmax = node.get_attribute("Targmax", element::i64); + auto ksize = node.get_attribute>("ksize"); + auto strides = node.get_attribute>("ksize"); + auto images = node.get_input(0); + auto node_name = node.get_name(); + + // indices from which dimension to count output indices + int64_t axis = include_batch_in_index ? 0 : 1; + + auto max_pool_with_indices = translate_max_pool_util(node, 2, ksize, strides, targmax, axis, false, true); + TENSORFLOW_OP_VALIDATION(node, + max_pool_with_indices.size() == 2, + "[TensorFlow Frontend] internal error: expect two outputs for MaxPoolWithArgmax."); + auto max_pool = max_pool_with_indices[0]; + auto output_indices_nchw = max_pool_with_indices[1]; + + auto tf_data_format = node.get_attribute("data_format", "NHWC"); + Output output_indices; + if (tf_data_format != "NHWC") { + output_indices = output_indices_nchw; + } else { + output_indices = output_indices_nchw; + // adjust output indices to have them for NHWC layout + // now it is computed for NCHW layout + // 1. compute all dimensions N, H, W, C + auto images_shape = make_shared(images, targmax); + auto const_zero = make_shared(element::i32, Shape{1}, 0); + auto const_one = make_shared(element::i32, Shape{1}, 1); + auto const_two = make_shared(element::i32, Shape{1}, 2); + auto const_three = make_shared(element::i32, Shape{1}, 3); + auto N = make_shared(images_shape, const_zero, const_zero); + auto H = make_shared(images_shape, const_one, const_zero); + auto W = make_shared(images_shape, const_two, const_zero); + auto C = make_shared(images_shape, const_three, const_zero); + + // 2. compute complex index for NCHW layout, i.e. n, h, w, c + auto HW = make_shared(H, W); + Output n; + if (include_batch_in_index) { + auto CHW = make_shared(C, HW); + n = make_shared(output_indices_nchw, CHW); + auto nCHW = make_shared(n, CHW); + output_indices_nchw = make_shared(output_indices_nchw, nCHW); + } else { + n = make_shared(targmax, Shape{1}, 0); + } + auto c = make_shared(output_indices_nchw, HW); + auto cHW = make_shared(c, HW); + output_indices_nchw = make_shared(output_indices_nchw, cHW); + auto h = make_shared(output_indices_nchw, W); + auto hW = make_shared(h, W); + auto w = make_shared(output_indices_nchw, hW); + + // transform them into flatten form for NHWC layout + auto WC = make_shared(W, C); + auto HWC = make_shared(H, WC); + output_indices = make_shared(n, HWC); + auto hWC = make_shared(h, WC); + output_indices = make_shared(output_indices, hWC); + auto wC = make_shared(w, C); + output_indices = make_shared(output_indices, wC); + output_indices = make_shared(output_indices, c); + convert_nchw_to_nhwc(true, output_indices, 4); + } + + set_out_name(node_name + ":1", output_indices); + return {max_pool, output_indices}; +} + OutputVector translate_max_pool_op(const NodeContext& node) { if (node.get_op_type() == "MaxPool") { return translate_max_pool(node, 2); @@ -111,6 +210,8 @@ OutputVector translate_max_pool_op(const NodeContext& node) { return translate_max_pool_v2(node); } else if (node.get_op_type() == "MaxPool3D") { return translate_max_pool(node, 3); + } else if (node.get_op_type() == "MaxPoolWithArgmax") { + return translate_max_pool_with_argmax(node); } else { TENSORFLOW_OP_VALIDATION(node, false, "Only MaxPool2D, MaxPoolV2 and MaxPool3D are supported."); } diff --git a/src/frontends/tensorflow_common/src/op/unravel_index.cpp b/src/frontends/tensorflow_common/src/op/unravel_index.cpp new file mode 100644 index 00000000000000..f649393d5d4c95 --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/unravel_index.cpp @@ -0,0 +1,96 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "openvino/op/broadcast.hpp" +#include "openvino/op/concat.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert_like.hpp" +#include "openvino/op/divide.hpp" +#include "openvino/op/equal.hpp" +#include "openvino/op/floor_mod.hpp" +#include "openvino/op/range.hpp" +#include "openvino/op/reduce_prod.hpp" +#include "openvino/op/roll.hpp" +#include "openvino/op/select.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/slice.hpp" +#include "openvino/op/squeeze.hpp" +#include "openvino/op/unsqueeze.hpp" + +using namespace std; +using namespace ov; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { + +OutputVector translate_unravel_index_op(const NodeContext& node) { + default_op_checks(node, 2, {"UnravelIndex"}); + auto indices = node.get_input(0); + auto dims = node.get_input(1); + auto node_name = node.get_name(); + + // create auxiliary constant + auto const_one_same_type = make_shared(element::i32, Shape{1}, 1)->output(0); + const_one_same_type = make_shared(const_one_same_type, dims); + auto num_dims = make_shared(dims, element::i32)->output(0); + auto const_zero = make_shared(element::i32, Shape{1}, 0); + auto const_one = make_shared(element::i32, Shape{1}, 1); + + // generate upper triangular matrice from dims + // for example, if dims = [3, 4, 5] it generates the following matrice: + // [3 4 5] + // [1 4 5] + // [1 1 5] + // 1. unsqueeze dims to have it of a shape [1, n] + dims = make_shared(dims, const_zero); + // 2. create a constant of ones with a shape [n, 1] + auto shape_n1 = make_shared(OutputVector{num_dims, const_one}, 0); + auto const_one_n1 = make_shared(const_one_same_type, shape_n1); + // 3. generate a mask to have upper triangular matric + auto scalar_zero = make_shared(element::i32, Shape{}, 0); + auto scalar_num = make_shared(num_dims, const_zero); + auto scalar_one = make_shared(element::i32, Shape{}, 1); + auto rng0n = make_shared(scalar_zero, scalar_num, scalar_one, element::i32); + auto rng0n_1n = make_shared(rng0n, const_zero); + auto rng0n_n1 = make_shared(rng0n, const_one); + auto mask = make_shared(rng0n_n1, rng0n_1n); + // 4. generate the upper triangular matrice + auto upper_trig_matrice = make_shared(mask, dims, const_one_n1); + + // compute reduce prod to understand how many elements are place in each slice + // for example, if dims = [3, 4, 5], slice by highest dimension has 20 elements + // lower dimension has 5 elements, etc. + // this way it computes [60, 20, 5] where 60 is a number of all elements in example tensor + auto num_elems_by_slice = make_shared(upper_trig_matrice, const_one, false)->output(0); + + // pad the resulted product with one and exclude the first element in the product + // the number of elements in the whole tensor is not needed + // for example, it computes div_coeffs = [20, 5, 1] and mod_coeffs = [60, 20, 5] by shifting + auto coeffs = make_shared(OutputVector{num_elems_by_slice, const_one_same_type}, 0); + auto stop_slice = make_shared(ov::element::i32, Shape{1}, numeric_limits::max()); + auto div_coeffs = make_shared(coeffs, const_one, stop_slice, const_one)->output(0); + auto mod_coeffs = num_elems_by_slice; + + // using computed div_coeffs and mod_coeffs, compute indices of each element by its index in the flattened tensor + // the resulted reduce product will be used for indices computation + // for example, the product is a vector + // each index will be computed by formula: (index % mod_coeff) / div_coeff + indices = make_shared(indices, const_zero); + div_coeffs = make_shared(div_coeffs, const_one); + mod_coeffs = make_shared(mod_coeffs, const_one); + auto result_indices = make_shared(indices, mod_coeffs)->output(0); + result_indices = make_shared(result_indices, div_coeffs); + set_node_name(node_name, result_indices.get_node_shared_ptr()); + + return {result_indices}; +} + +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow_common/src/utils.cpp b/src/frontends/tensorflow_common/src/utils.cpp index 05aca5ad9ba6c6..83c2f6e8796471 100644 --- a/src/frontends/tensorflow_common/src/utils.cpp +++ b/src/frontends/tensorflow_common/src/utils.cpp @@ -42,6 +42,7 @@ PadType convert_tf_padding(const frontend::NodeContext& node, const string& tf_p "MaxPool", "MaxPoolV2", "MaxPool3D", + "MaxPoolWithArgmax", "ExtractImagePatches", "DepthwiseConv2dNative", "AvgPool", @@ -68,8 +69,8 @@ PadType convert_tf_padding(const frontend::NodeContext& node, const string& tf_p return PadType::SAME_LOWER; } } else if (op_type == "Conv2D" || op_type == "Conv3D" || op_type == "MaxPool" || op_type == "MaxPoolV2" || - op_type == "MaxPool3D" || op_type == "ExtractImagePatches" || op_type == "DepthwiseConv2dNative" || - op_type == "AvgPool" || op_type == "AvgPool3D") { + op_type == "MaxPool3D" || op_type == "MaxPoolWithArgmax" || op_type == "ExtractImagePatches" || + op_type == "DepthwiseConv2dNative" || op_type == "AvgPool" || op_type == "AvgPool3D") { if (tf_padding == "SAME") { // According to the formulas for calculating auto_pad values of the // Conv layer in the Operation specification, diff --git a/src/inference/dev_api/openvino/runtime/internal_properties.hpp b/src/inference/dev_api/openvino/runtime/internal_properties.hpp index e438bb782e3618..41805b59234655 100644 --- a/src/inference/dev_api/openvino/runtime/internal_properties.hpp +++ b/src/inference/dev_api/openvino/runtime/internal_properties.hpp @@ -10,6 +10,7 @@ #pragma once #include "openvino/runtime/properties.hpp" +#include "openvino/runtime/threading/istreams_executor.hpp" namespace ov { @@ -42,6 +43,33 @@ static constexpr Property exclusive_async_requests */ static constexpr Property config_device_id{"CONFIG_DEVICE_ID"}; +/** + * @brief The name for setting CPU affinity per thread option. + * + * It is passed to Core::get_property() + * PluginConfigParams::NO (no pinning for CPU inference threads) + * PluginConfigParams::YES, which is default on the conventional CPUs (pinning threads to cores, best for static + * benchmarks), + * + * the following options are implemented only for the TBB as a threading option + * ov::threading::IStreamsExecutor::ThreadBindingType::NUMA (pinning threads to NUMA nodes, best for real-life, + * contented cases) on the Windows and MacOS* this option behaves as YES + * ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE (let the runtime to do pinning to the cores types, + * e.g. prefer the "big" cores for latency tasks) on the hybrid CPUs this option is default + * + * Also, the settings are ignored, if the OpenVINO compiled with OpenMP and any affinity-related OpenMP's + * environment variable is set (as affinity is configured explicitly) + * @ingroup ov_dev_api_plugin_api + */ +static constexpr Property cpu_bind_thread{ + "CPU_BIND_THREAD"}; + +/** + * @brief Limit \#threads that are used by IStreamsExecutor to execute `parallel_for` calls + * @ingroup ov_dev_api_plugin_api + */ +static constexpr Property threads_per_stream{"THREADS_PER_STREAM"}; + } // namespace internal OPENVINO_DEPRECATED( "This property is deprecated and will be removed soon. Use ov::internal::caching_properties instead of it.") diff --git a/src/inference/include/ie/vpu/myriad_config.hpp b/src/inference/include/ie/vpu/myriad_config.hpp index 7fc7d774941526..52d490b87d1b86 100644 --- a/src/inference/include/ie/vpu/myriad_config.hpp +++ b/src/inference/include/ie/vpu/myriad_config.hpp @@ -60,7 +60,7 @@ INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_VPU_CONFIG(MYRIAD_PCIE); INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_VPU_CONFIG(MYRIAD_USB); /** - * @brief Optimize vpu plugin execution to maximize throughput. + * @brief Optimize MYRIAD plugin execution to maximize throughput. * This option should be used with integer value which is the requested number of streams. * The only possible values are: * 1 diff --git a/src/inference/include/openvino/runtime/intel_gpu/properties.hpp b/src/inference/include/openvino/runtime/intel_gpu/properties.hpp index 4cbe906d47db81..7f661d5b67a74a 100644 --- a/src/inference/include/openvino/runtime/intel_gpu/properties.hpp +++ b/src/inference/include/openvino/runtime/intel_gpu/properties.hpp @@ -63,6 +63,14 @@ static constexpr Property, PropertyMutability::R */ static constexpr Property enable_loop_unrolling{"GPU_ENABLE_LOOP_UNROLLING"}; +/** + * @brief Turning on this key disables winograd convolution. + * Winograd convolution has different characteristics for accuracy and performance compared to other convolution + * implementations. + * @ingroup ov_runtime_ocl_gpu_prop_cpp_api + */ +static constexpr Property disable_winograd_convolution{"GPU_DISABLE_WINOGRAD_CONVOLUTION"}; + namespace hint { /** * @brief This enum represents the possible value of ov::intel_gpu::hint::queue_throttle property: diff --git a/src/inference/src/cnn_network_ngraph_impl.cpp b/src/inference/src/cnn_network_ngraph_impl.cpp index 85572d8787ec34..cb26e63e7c2626 100644 --- a/src/inference/src/cnn_network_ngraph_impl.cpp +++ b/src/inference/src/cnn_network_ngraph_impl.cpp @@ -16,9 +16,9 @@ #include "blob_factory.hpp" #include "cpp/ie_cnn_network.h" #include "ie_common.h" -#include "ie_itt.hpp" #include "ie_memcpy.h" #include "ie_ngraph_utils.hpp" +#include "itt.hpp" #include "ngraph/graph_util.hpp" #include "ngraph/pass/manager.hpp" #include "openvino/cc/pass/itt.hpp" @@ -266,7 +266,7 @@ void CNNNetworkNGraphImpl::validate(int version) { StatusCode CNNNetworkNGraphImpl::addOutput(const std::string& layerName, size_t outputIndex, ResponseDesc* resp) noexcept { - OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "CNNNetworkNGraphImpl::addOutput"); + OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "CNNNetworkNGraphImpl::addOutput"); try { for (const auto& layer : _ngraph_function->get_ops()) { @@ -471,7 +471,7 @@ void collect_dynamism_signature(const std::shared_ptr& ov_model, #endif void CNNNetworkNGraphImpl::reshape(const std::map& inputShapes) { - OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "CNNNetworkNGraphImpl::reshape"); + OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "CNNNetworkNGraphImpl::reshape"); auto params = _ngraph_function->get_parameters(); diff --git a/src/inference/src/compilation_context.cpp b/src/inference/src/compilation_context.cpp index 3668a4d932e1b8..e4a373f1c0223b 100644 --- a/src/inference/src/compilation_context.cpp +++ b/src/inference/src/compilation_context.cpp @@ -15,7 +15,7 @@ #include "cpp/ie_cnn_network.h" #include "details/ie_exception.hpp" #include "file_utils.h" -#include "ie_itt.hpp" +#include "itt.hpp" #include "ngraph/opsets/opset6.hpp" #include "openvino/pass/manager.hpp" #include "transformations/fix_rt_info.hpp" @@ -79,7 +79,7 @@ std::string ModelCache::calculate_file_info(const std::string& filePath) { } std::string ModelCache::compute_hash(const std::shared_ptr& model, const ov::AnyMap& compileOptions) { - OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "ModelCache::compute_hash - Model"); + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::ReadTime, "ModelCache::compute_hash - Model"); OPENVINO_ASSERT(model); @@ -145,7 +145,7 @@ std::string ModelCache::compute_hash(const std::shared_ptr& mod } std::string ModelCache::compute_hash(const std::string& modelName, const ov::AnyMap& compileOptions) { - OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "ModelCache::compute_hash - ModelName"); + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::ReadTime, "ModelCache::compute_hash - ModelName"); uint64_t seed = 0; try { seed = hash_combine(seed, FileUtils::absoluteFilePath(modelName)); @@ -162,7 +162,7 @@ std::string ModelCache::compute_hash(const std::string& modelName, const ov::Any std::string ModelCache::compute_hash(const std::string& modelStr, const ov::Tensor& tensor, const ov::AnyMap& compileOptions) { - OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "ModelCache::compute_hash - Model Memory"); + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::ReadTime, "ModelCache::compute_hash - Model Memory"); uint64_t seed = 0; // model string seed = hash_combine(seed, modelStr); diff --git a/src/inference/src/core.cpp b/src/inference/src/core.cpp index 2a60056fbee4fe..a1efd394a0e860 100644 --- a/src/inference/src/core.cpp +++ b/src/inference/src/core.cpp @@ -8,7 +8,7 @@ #include "cnn_network_ngraph_impl.hpp" #include "dev/converter_utils.hpp" #include "dev/core_impl.hpp" -#include "ie_itt.hpp" +#include "itt.hpp" #include "openvino/core/so_extension.hpp" #include "openvino/runtime/device_id_parser.hpp" #include "openvino/runtime/iremote_context.hpp" @@ -211,7 +211,7 @@ void Core::add_extension(const std::vector>& exte } CompiledModel Core::import_model(std::istream& modelStream, const std::string& device_name, const AnyMap& config) { - OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::import_model"); + OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "Core::import_model"); OV_CORE_CALL_STATEMENT({ auto exec = _impl->import_model(modelStream, device_name, config); return {exec._ptr, exec._so}; @@ -219,7 +219,7 @@ CompiledModel Core::import_model(std::istream& modelStream, const std::string& d } CompiledModel Core::import_model(std::istream& modelStream, const RemoteContext& context, const AnyMap& config) { - OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::import_model"); + OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "Core::import_model"); OV_CORE_CALL_STATEMENT({ auto exec = _impl->import_model(modelStream, ov::SoPtr{context._impl, context._so}, config); diff --git a/src/inference/src/cpp/ie_cnn_network.cpp b/src/inference/src/cpp/ie_cnn_network.cpp index 178a2de4a68396..db92bfe921850b 100644 --- a/src/inference/src/cpp/ie_cnn_network.cpp +++ b/src/inference/src/cpp/ie_cnn_network.cpp @@ -6,7 +6,7 @@ #include "cnn_network_ngraph_impl.hpp" #include "exception2status.hpp" -#include "ie_itt.hpp" +#include "itt.hpp" namespace InferenceEngine { @@ -21,7 +21,7 @@ CNNNetwork::CNNNetwork(std::shared_ptr network) : network(network) } CNNNetwork::CNNNetwork(const std::shared_ptr& graph, const std::vector& exts) { - OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "CNNNetwork::CNNNetwork"); + OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "CNNNetwork::CNNNetwork"); if (graph == nullptr) { IE_THROW() << "CNNNetwork was not initialized: 'graph' object is empty"; diff --git a/src/inference/src/dev/converter_utils.cpp b/src/inference/src/dev/converter_utils.cpp index a733ef7c13d5f2..07de5f321ecf46 100644 --- a/src/inference/src/dev/converter_utils.cpp +++ b/src/inference/src/dev/converter_utils.cpp @@ -430,6 +430,26 @@ class IExecutableNetworkWrapper : public InferenceEngine::IExecutableNetworkInte } InferenceEngine::Parameter GetMetric(const std::string& name) const override { + // Add legacy supported properties + if (METRIC_KEY(SUPPORTED_METRICS) == name || METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) { + try { + return m_model->get_property(name); + } catch (const ov::Exception&) { + auto props = m_model->get_property(ov::supported_properties.name()).as>(); + std::vector legacy_properties; + for (const auto& prop : props) { + if ((METRIC_KEY(SUPPORTED_METRICS) == name && !prop.is_mutable()) || + (METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name && prop.is_mutable())) + legacy_properties.emplace_back(prop); + } + if (METRIC_KEY(SUPPORTED_METRICS) == name) { + legacy_properties.emplace_back(METRIC_KEY(SUPPORTED_METRICS)); + legacy_properties.emplace_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS)); + } + + return legacy_properties; + } + } return m_model->get_property(name); } diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index f9997b545c7097..c31b905869628f 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -15,10 +15,10 @@ #include "dev/converter_utils.hpp" #include "dev/icompiled_model_wrapper.hpp" #include "file_utils.h" -#include "ie_itt.hpp" #include "ie_network_reader.hpp" #include "ie_ngraph_utils.hpp" #include "iplugin_wrapper.hpp" +#include "itt.hpp" #include "ngraph/op/constant.hpp" #include "ngraph/pass/constant_folding.hpp" #include "openvino/core/any.hpp" @@ -300,8 +300,13 @@ ov::Parsed ov::parseDeviceNameIntoConfig(const std::string& deviceName, else if (it->second == parsed_device_priority) { // do nothing } else { - IE_THROW() << "Device priority / ID mismatch: " << parsed_device_priority << " (from " << deviceName - << ") vs " << it->second.as() << " (from config)"; + OPENVINO_THROW("Device priority / ID mismatch: ", + parsed_device_priority, + " (from ", + deviceName, + ") vs ", + it->second.as(), + " (from config)"); } }; @@ -485,7 +490,7 @@ void ov::CoreImpl::register_plugins_in_registry(const std::string& xml_config_fi auto parse_result = ParseXml(xml_config_file.c_str()); if (!parse_result.error_msg.empty()) { - IE_THROW() << parse_result.error_msg; + OPENVINO_THROW(parse_result.error_msg); } pugi::xml_document& xmlDoc = *parse_result.xml; @@ -497,10 +502,10 @@ void ov::CoreImpl::register_plugins_in_registry(const std::string& xml_config_fi FOREACH_CHILD (pluginNode, devicesNode, "plugin") { std::string deviceName = GetStrAttr(pluginNode, "name"); if (pluginRegistry.find(deviceName) != pluginRegistry.end()) { - IE_THROW() << "Device with \"" << deviceName << "\" is already registered in the OpenVINO Runtime"; + OPENVINO_THROW("Device with \"", deviceName, "\" is already registered in the OpenVINO Runtime"); } if (deviceName.find('.') != std::string::npos) { - IE_THROW() << "Device name must not contain dot '.' symbol"; + OPENVINO_THROW("Device name must not contain dot '.' symbol"); } ov::util::FilePath pluginPath = @@ -539,7 +544,7 @@ void ov::CoreImpl::register_plugins_in_registry(const std::string& xml_config_fi } ov::Plugin ov::CoreImpl::get_plugin(const std::string& pluginName) const { - OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "CoreImpl::get_plugin"); + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, "CoreImpl::get_plugin"); auto deviceName = pluginName; if (deviceName == ov::DEFAULT_DEVICE_NAME) @@ -555,9 +560,9 @@ ov::Plugin ov::CoreImpl::get_plugin(const std::string& pluginName) const { it = pluginRegistry.find(deviceName); if (it == pluginRegistry.end()) { if (pluginName == ov::DEFAULT_DEVICE_NAME) - IE_THROW() << "No device is provided, so AUTO device is used by default, which failed loading."; + OPENVINO_THROW("No device is provided, so AUTO device is used by default, which failed loading."); else - IE_THROW() << "Device with \"" << deviceName << "\" name is not registered in the OpenVINO Runtime"; + OPENVINO_THROW("Device with \"", deviceName, "\" name is not registered in the OpenVINO Runtime"); } } std::lock_guard lock(get_mutex(deviceName)); @@ -715,17 +720,30 @@ ov::Plugin ov::CoreImpl::get_plugin(const std::string& pluginName) const { return plugins.emplace(deviceName, plugin).first->second; } catch (const InferenceEngine::Exception& ex) { - IE_THROW() << "Failed to create plugin " << ov::util::from_file_path(desc.libraryLocation) << " for device " - << deviceName << "\n" - << "Please, check your environment\n" - << ex.what() << "\n"; + OPENVINO_THROW("Failed to create plugin ", + ov::util::from_file_path(desc.libraryLocation), + " for device ", + deviceName, + "\n", + "Please, check your environment\n", + ex.what(), + "\n"); + } catch (const ov::Exception& ex) { + OPENVINO_THROW("Failed to create plugin ", + ov::util::from_file_path(desc.libraryLocation), + " for device ", + deviceName, + "\n", + "Please, check your environment\n", + ex.what(), + "\n"); } } ov::SoPtr ov::CoreImpl::compile_model(const std::shared_ptr& model_, const std::string& device_name, const ov::AnyMap& config) const { - OV_ITT_SCOPE(FIRST_INFERENCE, ie::itt::domains::IE_LT, "Core::compile_model::model"); + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, "Core::compile_model::model"); std::string deviceName = device_name; ov::AnyMap config_with_batch = config; // if auto-batching is applicable, the below function will patch the device name and config accordingly: @@ -756,10 +774,9 @@ ov::SoPtr ov::CoreImpl::compile_model(const std::shared_ptr< ov::SoPtr ov::CoreImpl::compile_model(const std::shared_ptr& model_, const ov::SoPtr& context, const ov::AnyMap& config) const { - OV_ITT_SCOPE(FIRST_INFERENCE, ie::itt::domains::IE_LT, "Core::compile_model::RemoteContext"); - if (!context) { - IE_THROW() << "Remote context is null"; - } + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, "Core::compile_model::RemoteContext"); + if (!context) + OPENVINO_THROW("Remote context is null"); std::string deviceName = context->get_device_name(); ov::AnyMap config_with_batch = config; // if auto-batching is applicable, the below function will patch the device name and config accordingly: @@ -807,7 +824,7 @@ ov::SoPtr ov::CoreImpl::compile_model_with_preprocess(ov::Pl ov::SoPtr ov::CoreImpl::compile_model(const std::string& model_path, const std::string& device_name, const ov::AnyMap& config) const { - OV_ITT_SCOPE(FIRST_INFERENCE, ie::itt::domains::IE_LT, "Core::compile_model::Path"); + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, "Core::compile_model::Path"); auto parsed = parseDeviceNameIntoConfig(device_name, config); // in case of compile_model(file_name), we need to clear-up core-level properties auto plugin = get_plugin(parsed._deviceName); @@ -842,7 +859,7 @@ ov::SoPtr ov::CoreImpl::compile_model(const std::string& mod const ov::Tensor& weights, const std::string& device_name, const ov::AnyMap& config) const { - OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::compile_model::from_memory"); + OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "Core::compile_model::from_memory"); auto parsed = parseDeviceNameIntoConfig(device_name, config); // in case of compile_model(file_name), we need to clear-up core-level properties auto plugin = get_plugin(parsed._deviceName); @@ -874,7 +891,7 @@ ov::SoPtr ov::CoreImpl::compile_model(const std::string& mod ov::SoPtr ov::CoreImpl::import_model(std::istream& model, const std::string& device_name, const ov::AnyMap& config) const { - OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::import_model"); + OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "Core::import_model"); auto parsed = parseDeviceNameIntoConfig(device_name, config); auto compiled_model = get_plugin(parsed._deviceName).import_model(model, parsed._config); if (auto wrapper = std::dynamic_pointer_cast(compiled_model._ptr)) { @@ -887,7 +904,7 @@ ov::SoPtr ov::CoreImpl::import_model(std::istream& model, ov::SoPtr ov::CoreImpl::import_model(std::istream& modelStream, const ov::SoPtr& context, const ov::AnyMap& config) const { - OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::import_model"); + OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "Core::import_model"); auto parsed = parseDeviceNameIntoConfig(context->get_device_name(), config); auto compiled_model = get_plugin(parsed._deviceName).import_model(modelStream, context, parsed._config); if (auto wrapper = std::dynamic_pointer_cast(compiled_model._ptr)) { @@ -900,7 +917,7 @@ ov::SoPtr ov::CoreImpl::import_model(std::istream& modelStre ov::SupportedOpsMap ov::CoreImpl::query_model(const std::shared_ptr& model, const std::string& device_name, const ov::AnyMap& config) const { - OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::query_model"); + OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "Core::query_model"); auto parsed = parseDeviceNameIntoConfig(device_name, config); return get_plugin(parsed._deviceName).query_model(model, parsed._config); } @@ -933,7 +950,7 @@ std::vector ov::CoreImpl::get_available_devices() const { if (is_hidden_device(deviceName)) continue; try { - const ie::Parameter p = GetMetric(deviceName, propertyName); + const ov::Any p = GetMetric(deviceName, propertyName); devicesIDs = p.as>(); } catch (const ie::Exception&) { // plugin is not created by e.g. invalid env @@ -942,11 +959,14 @@ std::vector ov::CoreImpl::get_available_devices() const { } catch (const std::runtime_error&) { // plugin is not created by e.g. invalid env } catch (const std::exception& ex) { - IE_THROW() << "An exception is thrown while trying to create the " << deviceName - << " device and call GetMetric: " << ex.what(); + OPENVINO_THROW("An exception is thrown while trying to create the ", + deviceName, + " device and call GetMetric: ", + ex.what()); } catch (...) { - IE_THROW() << "Unknown exception is thrown while trying to create the " << deviceName - << " device and call GetMetric"; + OPENVINO_THROW("Unknown exception is thrown while trying to create the ", + deviceName, + " device and call GetMetric"); } if (devicesIDs.size() > 1) { @@ -1202,7 +1222,7 @@ void ov::CoreImpl::unload_plugin(const std::string& deviceName) { std::lock_guard lock(get_mutex()); auto it = plugins.find(deviceName); if (it == plugins.end()) { - IE_THROW() << "Device with \"" << deviceName << "\" name is not registered in the OpenVINO Runtime"; + OPENVINO_THROW("Device with \"", deviceName, "\" name is not registered in the OpenVINO Runtime"); } plugins.erase(deviceName); @@ -1216,11 +1236,11 @@ void ov::CoreImpl::register_plugin(const std::string& plugin, auto it = pluginRegistry.find(device_name); // Proxy plugins can be configured in the runtime if (it != pluginRegistry.end() && !is_proxy_device(device_name)) { - IE_THROW() << "Device with \"" << device_name << "\" is already registered in the OpenVINO Runtime"; + OPENVINO_THROW("Device with \"", device_name, "\" is already registered in the OpenVINO Runtime"); } if (device_name.find('.') != std::string::npos) { - IE_THROW() << "Device name must not contain dot '.' symbol"; + OPENVINO_THROW("Device name must not contain dot '.' symbol"); } PluginDescriptor desc{ov::util::get_plugin_path(plugin), properties}; @@ -1277,6 +1297,18 @@ void ov::CoreImpl::set_property_for_device(const ov::AnyMap& configMap, const st coreConfig.set_cache_dir_for_device((cache_it->second).as(), clearDeviceName); } OPENVINO_SUPPRESS_DEPRECATED_END + // apply and remove core properties + auto it = config.find(ov::force_tbb_terminate.name()); + if (it != config.end()) { + auto flag = it->second.as(); + ov::threading::executor_manager()->set_property({{it->first, flag}}); + config.erase(it); + } + + it = config.find(ov::enable_mmap.name()); + if (it != config.end()) { + config.erase(it); + } } auto base_desc = pluginRegistry.find(clearDeviceName); @@ -1297,7 +1329,7 @@ void ov::CoreImpl::set_property_for_device(const ov::AnyMap& configMap, const st } if (!configIsSet && !deviceName.empty()) { - IE_THROW() << "Device with \"" << deviceName << "\" name is not registered in the OpenVINO Runtime"; + OPENVINO_THROW("Device with \"", deviceName, "\" name is not registered in the OpenVINO Runtime"); } // set config for already created plugins @@ -1378,18 +1410,7 @@ bool ov::CoreImpl::device_supports_internal_property(const ov::Plugin& plugin, c } bool ov::CoreImpl::device_supports_model_caching(const ov::Plugin& plugin) const { - auto supportedMetricKeys = plugin.get_property(METRIC_KEY(SUPPORTED_METRICS), {}).as>(); - auto supported = util::contains(supportedMetricKeys, METRIC_KEY(IMPORT_EXPORT_SUPPORT)) && - plugin.get_property(METRIC_KEY(IMPORT_EXPORT_SUPPORT), {}).as(); - if (!supported) { - supported = - device_supports_property(plugin, ov::device::capabilities) && - util::contains(plugin.get_property(ov::device::capabilities), ov::device::capability::EXPORT_IMPORT); - } - if (supported) { - supported = device_supports_internal_property(plugin, ov::internal::caching_properties); - } - return supported; + return plugin.supports_model_caching(); } bool ov::CoreImpl::device_supports_cache_dir(const ov::Plugin& plugin) const { @@ -1407,13 +1428,13 @@ ov::SoPtr ov::CoreImpl::compile_model_and_cache(const std::s const ov::AnyMap& parsedConfig, const ov::SoPtr& context, const CacheContent& cacheContent) const { - OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "CoreImpl::compile_model_and_cache"); + OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "CoreImpl::compile_model_and_cache"); ov::SoPtr execNetwork; execNetwork = compile_model_with_preprocess(plugin, model, context, parsedConfig); if (cacheContent.cacheManager && device_supports_model_caching(plugin)) { try { // need to export network for further import from "cache" - OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::compile_model::Export"); + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, "Core::compile_model::Export"); cacheContent.cacheManager->write_cache_entry(cacheContent.blobId, [&](std::ostream& networkStream) { networkStream << ov::CompiledBlobHeader(InferenceEngine::GetInferenceEngineVersion()->buildNumber, ov::ModelCache::calculate_file_info(cacheContent.modelPath)); @@ -1440,18 +1461,18 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( try { cacheContent.cacheManager->read_cache_entry(cacheContent.blobId, [&](std::istream& networkStream) { OV_ITT_SCOPE(FIRST_INFERENCE, - InferenceEngine::itt::domains::IE_LT, + ov::itt::domains::LoadTime, "Core::load_model_from_cache::ReadStreamAndImport"); try { ov::CompiledBlobHeader header; networkStream >> header; - if (header.getIeVersion() != InferenceEngine::GetInferenceEngineVersion()->buildNumber) { + if (header.getIeVersion() != ov::get_openvino_version().buildNumber) { // Build number mismatch, don't use this cache - throw InferenceEngine::NetworkNotRead("Version does not match"); + OPENVINO_THROW("Version does not match"); } if (header.getFileInfo() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) { // Original file is changed, don't use cache - throw InferenceEngine::NetworkNotRead("Original model file is changed"); + OPENVINO_THROW("Original model file is changed"); } } catch (...) { throw HeaderException(); @@ -1628,7 +1649,7 @@ void ov::CoreImpl::add_mutex(const std::string& dev_name) { } std::shared_ptr ov::CoreImpl::read_model(const std::string& modelPath, const std::string& binPath) const { - OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "CoreImpl::read_model from file"); + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::ReadTime, "CoreImpl::read_model from file"); return ReadNetwork(modelPath, binPath).getFunction(); } @@ -1639,6 +1660,6 @@ std::shared_ptr ov::CoreImpl::read_model(const std::string& model, if (weights) { blob = tensor_to_blob(get_tensor_impl(weights)); } - OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "CoreImpl::read_model from memory"); + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::ReadTime, "CoreImpl::read_model from memory"); return ReadNetwork(model, blob, frontendMode).getFunction(); } diff --git a/src/inference/src/dev/core_impl_ie.cpp b/src/inference/src/dev/core_impl_ie.cpp index c77509262943ed..5ec0ffc751fe2c 100644 --- a/src/inference/src/dev/core_impl_ie.cpp +++ b/src/inference/src/dev/core_impl_ie.cpp @@ -11,9 +11,9 @@ #include "cpp_interfaces/interface/ie_iplugin_internal.hpp" #include "dev/converter_utils.hpp" #include "dev/icompiled_model_wrapper.hpp" -#include "ie_itt.hpp" #include "ie_network_reader.hpp" #include "iplugin_wrapper.hpp" +#include "itt.hpp" #include "ngraph/op/constant.hpp" #include "ngraph/pass/constant_folding.hpp" #include "openvino/itt.hpp" @@ -33,7 +33,7 @@ ov::SoPtr ov::CoreImpl::LoadNetwork ov::Plugin& plugin, const std::map& parsedConfig, const InferenceEngine::RemoteContext::Ptr& context) { - OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "CoreImpl::LoadNetworkImpl"); + OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "CoreImpl::LoadNetworkImpl"); ov::SoPtr execNetwork; auto wrapper = std::dynamic_pointer_cast(plugin.m_ptr); OPENVINO_ASSERT(wrapper); @@ -49,7 +49,7 @@ InferenceEngine::RemoteContext::Ptr ov::CoreImpl::GetDefaultContext(const std::s } InferenceEngine::CNNNetwork ov::CoreImpl::ReadNetwork(const std::string& modelPath, const std::string& binPath) const { - OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "CoreImpl::ReadNetwork from file"); + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::ReadTime, "CoreImpl::ReadNetwork from file"); return InferenceEngine::details::ReadNetwork(modelPath, binPath, extensions, @@ -61,7 +61,7 @@ InferenceEngine::CNNNetwork ov::CoreImpl::ReadNetwork(const std::string& modelPa InferenceEngine::CNNNetwork ov::CoreImpl::ReadNetwork(const std::string& model, const InferenceEngine::Blob::CPtr& weights, bool frontendMode) const { - OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "CoreImpl::ReadNetwork from memory"); + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::ReadTime, "CoreImpl::ReadNetwork from memory"); return InferenceEngine::details::ReadNetwork(model, weights, extensions, ov_extensions, is_new_api(), frontendMode); } @@ -69,7 +69,7 @@ ov::SoPtr ov::CoreImpl::LoadNetwork const InferenceEngine::CNNNetwork& network, const std::shared_ptr& context, const std::map& config) { - OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::LoadNetwork::RemoteContext"); + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, "Core::LoadNetwork::RemoteContext"); if (network.getFunction()) { auto ctx = ov::legacy_convert::convert_remote_context(context); auto compiled_model = @@ -90,7 +90,7 @@ InferenceEngine::SoExecutableNetworkInternal ov::CoreImpl::LoadNetwork( const InferenceEngine::CNNNetwork& network, const std::string& deviceName, const std::map& config) { - OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::LoadNetwork::CNN"); + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, "Core::LoadNetwork::CNN"); if (network.getFunction()) { auto compiled_model = compile_model(ov::legacy_convert::convert_model(network, isNewAPI()), deviceName, any_copy(config)); @@ -107,7 +107,7 @@ InferenceEngine::SoExecutableNetworkInternal ov::CoreImpl::LoadNetwork( const std::string& deviceName, const std::map& config, const std::function& val) { - OV_ITT_SCOPE(FIRST_INFERENCE, ie::itt::domains::IE_LT, "Core::LoadNetwork::Path"); + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, "Core::LoadNetwork::Path"); auto compiled_model = compile_model(modelPath, deviceName, any_copy(config)); return {ov::legacy_convert::convert_compiled_model(compiled_model), compiled_model._so}; @@ -119,7 +119,7 @@ InferenceEngine::SoExecutableNetworkInternal ov::CoreImpl::LoadNetwork( const std::string& deviceName, const std::map& config, const std::function& val) { - OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::LoadNetwork::Memory"); + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, "Core::LoadNetwork::Memory"); auto compiled_model = compile_model(modelStr, @@ -143,7 +143,7 @@ InferenceEngine::SoExecutableNetworkInternal ov::CoreImpl::ImportNetwork( InferenceEngine::QueryNetworkResult ov::CoreImpl::QueryNetwork(const InferenceEngine::CNNNetwork& network, const std::string& deviceName, const std::map& config) const { - OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::QueryNetwork"); + OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "Core::QueryNetwork"); ie::QueryNetworkResult ret; if (!network.getFunction()) { ret.rc = InferenceEngine::GENERAL_ERROR; diff --git a/src/inference/src/dev/plugin.cpp b/src/inference/src/dev/plugin.cpp index 88ac29c7535f60..f3effb552e20b4 100644 --- a/src/inference/src/dev/plugin.cpp +++ b/src/inference/src/dev/plugin.cpp @@ -9,6 +9,9 @@ #include "cpp_interfaces/interface/ie_iplugin_internal.hpp" #include "ie_plugin_config.hpp" #include "iplugin_wrapper.hpp" +#include "openvino/runtime/internal_properties.hpp" +#include "openvino/runtime/properties.hpp" +#include "openvino/util/common_util.hpp" #define OV_PLUGIN_CALL_STATEMENT(...) \ OPENVINO_ASSERT(m_ptr != nullptr, "OpenVINO Runtime Plugin was not initialized."); \ @@ -140,6 +143,56 @@ ov::Any ov::Plugin::get_property(const std::string& name, const AnyMap& argument return supported_properties; } } + // Add legacy supported properties + if (METRIC_KEY(SUPPORTED_METRICS) == name || METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) { + try { + return {m_ptr->get_property(name, arguments), {m_so}}; + } catch (const ov::Exception&) { + auto props = + m_ptr->get_property(ov::supported_properties.name(), arguments).as>(); + std::vector legacy_properties; + for (const auto& prop : props) { + if ((METRIC_KEY(SUPPORTED_METRICS) == name && !prop.is_mutable()) || + (METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name && prop.is_mutable())) + legacy_properties.emplace_back(prop); + } + if (METRIC_KEY(SUPPORTED_METRICS) == name) { + legacy_properties.emplace_back(METRIC_KEY(SUPPORTED_METRICS)); + legacy_properties.emplace_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS)); + } + if (METRIC_KEY(SUPPORTED_METRICS) == name && supports_model_caching(false)) + legacy_properties.emplace_back(METRIC_KEY(IMPORT_EXPORT_SUPPORT)); + + return legacy_properties; + } + } + if (METRIC_KEY(IMPORT_EXPORT_SUPPORT) == name) { + try { + return {m_ptr->get_property(name, arguments), {m_so}}; + } catch (const ov::Exception&) { + if (!supports_model_caching(false)) + throw; + // if device has ov::device::capability::EXPORT_IMPORT it means always true + return true; + } + } return {m_ptr->get_property(name, arguments), {m_so}}; }); } + +bool ov::Plugin::supports_model_caching(bool check_old_api) const { + bool supported(false); + if (check_old_api) { + auto supportedMetricKeys = get_property(METRIC_KEY(SUPPORTED_METRICS), {}).as>(); + supported = util::contains(supportedMetricKeys, METRIC_KEY(IMPORT_EXPORT_SUPPORT)) && + get_property(METRIC_KEY(IMPORT_EXPORT_SUPPORT), {}).as(); + } + if (!supported) { + supported = util::contains(get_property(ov::supported_properties), ov::device::capabilities) && + util::contains(get_property(ov::device::capabilities), ov::device::capability::EXPORT_IMPORT); + } + if (supported) { + supported = util::contains(get_property(ov::internal::supported_properties), ov::internal::caching_properties); + } + return supported; +} diff --git a/src/inference/src/dev/plugin.hpp b/src/inference/src/dev/plugin.hpp index b392fad14d6856..64f8ec67bc62c2 100644 --- a/src/inference/src/dev/plugin.hpp +++ b/src/inference/src/dev/plugin.hpp @@ -77,6 +77,7 @@ class Plugin { T get_property(const ov::Property& property, const AnyMap& arguments) const { return get_property(property.name(), arguments).template as(); } + bool supports_model_caching(bool check_old_api = true) const; }; } // namespace ov diff --git a/src/inference/src/dev/threading/istreams_executor.cpp b/src/inference/src/dev/threading/istreams_executor.cpp index 2c63b328c92120..92d297a62ecb30 100644 --- a/src/inference/src/dev/threading/istreams_executor.cpp +++ b/src/inference/src/dev/threading/istreams_executor.cpp @@ -12,6 +12,7 @@ #include "cpp_interfaces/interface/ie_internal_plugin_config.hpp" #include "ie_plugin_config.hpp" #include "openvino/core/parallel.hpp" +#include "openvino/runtime/internal_properties.hpp" #include "openvino/runtime/properties.hpp" #include "openvino/runtime/threading/cpu_streams_info.hpp" #include "openvino/util/log.hpp" @@ -149,6 +150,8 @@ void IStreamsExecutor::Config::set_property(const ov::AnyMap& property) { ". Expected only non negative numbers (#threads)"); } _threadsPerStream = val_i; + } else if (key == ov::internal::threads_per_stream) { + _threadsPerStream = static_cast(value.as()); } else if (key == CONFIG_KEY_INTERNAL(BIG_CORE_STREAMS)) { int val_i; try { @@ -255,6 +258,7 @@ ov::Any IStreamsExecutor::Config::get_property(const std::string& key) const { CONFIG_KEY_INTERNAL(ENABLE_HYPER_THREAD), ov::num_streams.name(), ov::inference_num_threads.name(), + ov::internal::threads_per_stream.name(), ov::affinity.name(), }; OPENVINO_SUPPRESS_DEPRECATED_END @@ -290,7 +294,7 @@ ov::Any IStreamsExecutor::Config::get_property(const std::string& key) const { return {std::to_string(_threads)}; } else if (key == ov::inference_num_threads) { return decltype(ov::inference_num_threads)::value_type{_threads}; - } else if (key == CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM)) { + } else if (key == CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM) || key == ov::internal::threads_per_stream) { return {std::to_string(_threadsPerStream)}; } else if (key == CONFIG_KEY_INTERNAL(BIG_CORE_STREAMS)) { return {std::to_string(_big_core_streams)}; diff --git a/src/inference/src/ie_core.cpp b/src/inference/src/ie_core.cpp index 15a9b143455771..97dc6382aaccea 100644 --- a/src/inference/src/ie_core.cpp +++ b/src/inference/src/ie_core.cpp @@ -26,11 +26,11 @@ #include "file_utils.h" #include "ie_cache_manager.hpp" #include "ie_icore.hpp" -#include "ie_itt.hpp" #include "ie_network_reader.hpp" #include "ie_ngraph_utils.hpp" #include "ie_plugin_config.hpp" #include "ie_remote_context.hpp" +#include "itt.hpp" #include "ngraph/graph_util.hpp" #include "ngraph/ngraph.hpp" #include "ngraph/opsets/opset.hpp" @@ -86,12 +86,16 @@ class Core::Impl : public ov::CoreImpl { Core::Core(const std::string& xmlConfigFile) { _impl = std::make_shared(); - std::string xmlConfigFile_ = ov::find_plugins_xml(xmlConfigFile); - if (!xmlConfigFile_.empty()) - // If XML is default, load default plugins by absolute paths - _impl->register_plugins_in_registry(xmlConfigFile_, xmlConfigFile.empty()); - // Load plugins from pre-compiled list - _impl->register_compile_time_plugins(); + try { + std::string xmlConfigFile_ = ov::find_plugins_xml(xmlConfigFile); + if (!xmlConfigFile_.empty()) + // If XML is default, load default plugins by absolute paths + _impl->register_plugins_in_registry(xmlConfigFile_, xmlConfigFile.empty()); + // Load plugins from pre-compiled list + _impl->register_compile_time_plugins(); + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } std::map Core::GetVersions(const std::string& deviceName) const { @@ -101,66 +105,102 @@ std::map Core::GetVersions(const std::string& deviceName) #ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT CNNNetwork Core::ReadNetwork(const std::wstring& modelPath, const std::wstring& binPath) const { - return ReadNetwork(ov::util::wstring_to_string(modelPath), ov::util::wstring_to_string(binPath)); + try { + return ReadNetwork(ov::util::wstring_to_string(modelPath), ov::util::wstring_to_string(binPath)); + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } #endif CNNNetwork Core::ReadNetwork(const std::string& modelPath, const std::string& binPath) const { - return _impl->ReadNetwork(modelPath, binPath); + try { + return _impl->ReadNetwork(modelPath, binPath); + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } CNNNetwork Core::ReadNetwork(const std::string& model, const Blob::CPtr& weights) const { - return _impl->ReadNetwork(model, weights); + try { + return _impl->ReadNetwork(model, weights); + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } ExecutableNetwork Core::LoadNetwork(const CNNNetwork& network, const std::map& config) { - return LoadNetwork(network, ov::DEFAULT_DEVICE_NAME, config); + try { + return LoadNetwork(network, ov::DEFAULT_DEVICE_NAME, config); + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } ExecutableNetwork Core::LoadNetwork(const CNNNetwork& network, const std::string& deviceName, const std::map& config) { auto valid = ::CheckStatic(network); - OPENVINO_ASSERT(std::get<0>(valid), - "InferenceEngine::Core::LoadNetwork doesn't support inputs having dynamic shapes. ", - "Use ov::Core::compile_model API instead. Dynamic inputs are :", - std::get<1>(valid)); - auto exec = _impl->LoadNetwork(network, deviceName, config); - return {exec._ptr, exec._so}; + try { + OPENVINO_ASSERT(std::get<0>(valid), + "InferenceEngine::Core::LoadNetwork doesn't support inputs having dynamic shapes. ", + "Use ov::Core::compile_model API instead. Dynamic inputs are :", + std::get<1>(valid)); + auto exec = _impl->LoadNetwork(network, deviceName, config); + return {exec._ptr, exec._so}; + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } ExecutableNetwork Core::LoadNetwork(const CNNNetwork& network, RemoteContext::Ptr context, const std::map& config) { auto valid = ::CheckStatic(network); - OPENVINO_ASSERT(std::get<0>(valid), - "InferenceEngine::Core::LoadNetwork doesn't support inputs having dynamic shapes. ", - "Use ov::Core::compile_model API instead. Dynamic inputs are :", - std::get<1>(valid)); - auto exec = _impl->LoadNetwork(network, std::dynamic_pointer_cast(context), config); - return {exec._ptr, exec._so}; + try { + OPENVINO_ASSERT(std::get<0>(valid), + "InferenceEngine::Core::LoadNetwork doesn't support inputs having dynamic shapes. ", + "Use ov::Core::compile_model API instead. Dynamic inputs are :", + std::get<1>(valid)); + auto exec = _impl->LoadNetwork(network, std::dynamic_pointer_cast(context), config); + return {exec._ptr, exec._so}; + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } ExecutableNetwork Core::LoadNetwork(const std::string& modelPath, const std::string& deviceName, const std::map& config) { - auto exec = _impl->LoadNetwork(modelPath, deviceName, config, [](const CNNNetwork& network) { - auto valid = ::CheckStatic(network); - OPENVINO_ASSERT(std::get<0>(valid), - "InferenceEngine::Core::LoadNetwork doesn't support inputs having dynamic shapes. ", - "Use ov::Core::compile_model API instead. Dynamic inputs are :", - std::get<1>(valid)); - }); - return {exec._ptr, exec._so}; + try { + auto exec = _impl->LoadNetwork(modelPath, deviceName, config, [](const CNNNetwork& network) { + auto valid = ::CheckStatic(network); + OPENVINO_ASSERT(std::get<0>(valid), + "InferenceEngine::Core::LoadNetwork doesn't support inputs having dynamic shapes. ", + "Use ov::Core::compile_model API instead. Dynamic inputs are :", + std::get<1>(valid)); + }); + return {exec._ptr, exec._so}; + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } ExecutableNetwork Core::LoadNetwork(const std::string& modelPath, const std::map& config) { - return LoadNetwork(modelPath, ov::DEFAULT_DEVICE_NAME, config); + try { + return LoadNetwork(modelPath, ov::DEFAULT_DEVICE_NAME, config); + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } RemoteContext::Ptr Core::CreateContext(const std::string& deviceName, const ParamMap& params) { - return _impl->CreateContext(deviceName, params); + try { + return _impl->CreateContext(deviceName, params); + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } RemoteContext::Ptr Core::GetDefaultContext(const std::string& deviceName) { @@ -173,7 +213,11 @@ RemoteContext::Ptr Core::GetDefaultContext(const std::string& deviceName) { if (deviceName.find("AUTO") == 0) { IE_THROW() << "AUTO device does not support remote context"; } - return _impl->GetDefaultContext(deviceName); + try { + return _impl->GetDefaultContext(deviceName); + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } void Core::AddExtension(IExtensionPtr extension, const std::string& deviceName_) { @@ -187,35 +231,51 @@ void Core::AddExtension(IExtensionPtr extension, const std::string& deviceName_) IE_THROW() << "AUTO device does not support extensions. Please, set extensions directly to fallback devices"; } - _impl->AddExtension(extension); + try { + _impl->AddExtension(extension); + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } void Core::AddExtension(const IExtensionPtr& extension) { - _impl->AddExtension(extension); + try { + _impl->AddExtension(extension); + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } ExecutableNetwork Core::ImportNetwork(const std::string& modelFileName, const std::string& deviceName, const std::map& config) { - OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::ImportNetwork"); + OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "Core::ImportNetwork"); auto parsed = ov::parseDeviceNameIntoConfig(deviceName, ov::any_copy(config)); std::ifstream modelStream(modelFileName, std::ios::binary); if (!modelStream.is_open()) IE_THROW(NetworkNotRead) << "Model file " << modelFileName << " cannot be opened!"; - auto exec = _impl->get_plugin(parsed._deviceName).import_model(modelStream, parsed._config); - return {ov::legacy_convert::convert_compiled_model(exec), exec._so}; + try { + auto exec = _impl->get_plugin(parsed._deviceName).import_model(modelStream, parsed._config); + return {ov::legacy_convert::convert_compiled_model(exec), exec._so}; + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } ExecutableNetwork Core::ImportNetwork(std::istream& networkModel, const std::string& deviceName, const std::map& config) { - OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::ImportNetwork"); - auto exec = _impl->ImportNetwork(networkModel, deviceName, config); - return {exec._ptr, exec._so}; + OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "Core::ImportNetwork"); + try { + auto exec = _impl->ImportNetwork(networkModel, deviceName, config); + return {exec._ptr, exec._so}; + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } ExecutableNetwork Core::ImportNetwork(std::istream& networkModel) { - OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::ImportNetwork"); + OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "Core::ImportNetwork"); using ExportMagic = std::array; constexpr static const ExportMagic exportMagic = {{0x1, 0xE, 0xE, 0x1}}; @@ -232,14 +292,18 @@ ExecutableNetwork Core::ImportNetwork(std::istream& networkModel) { } networkModel.seekg(currentPos, networkModel.beg); - auto exec = _impl->get_plugin(deviceName).import_model(networkModel, {}); - return {ov::legacy_convert::convert_compiled_model(exec), exec._so}; + try { + auto exec = _impl->get_plugin(deviceName).import_model(networkModel, {}); + return {ov::legacy_convert::convert_compiled_model(exec), exec._so}; + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } ExecutableNetwork Core::ImportNetwork(std::istream& networkModel, const RemoteContext::Ptr& context, const std::map& config) { - OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::ImportNetwork"); + OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "Core::ImportNetwork"); if (context == nullptr) { IE_THROW() << "Remote context is null"; @@ -249,22 +313,31 @@ ExecutableNetwork Core::ImportNetwork(std::istream& networkModel, ov::DeviceIDParser device(deviceName_); std::string deviceName = device.get_device_name(); - auto parsed = ov::parseDeviceNameIntoConfig(deviceName, ov::any_copy(config)); - auto exec = _impl->get_plugin(deviceName) - .import_model(networkModel, ov::legacy_convert::convert_remote_context(context), parsed._config); - return {ov::legacy_convert::convert_compiled_model(exec), exec._so}; + try { + auto parsed = ov::parseDeviceNameIntoConfig(deviceName, ov::any_copy(config)); + auto exec = + _impl->get_plugin(deviceName) + .import_model(networkModel, ov::legacy_convert::convert_remote_context(context), parsed._config); + return {ov::legacy_convert::convert_compiled_model(exec), exec._so}; + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } QueryNetworkResult Core::QueryNetwork(const CNNNetwork& network, const std::string& deviceName, const std::map& config) const { - auto valid = ::CheckStatic(network); - OPENVINO_ASSERT(std::get<0>(valid), - "InferenceEngine::Core::QueryNetwork doesn't support inputs having dynamic shapes. ", - "Use ov::Core::compile_model API instead. Dynamic inputs are :", - std::get<1>(valid)); + try { + auto valid = ::CheckStatic(network); + OPENVINO_ASSERT(std::get<0>(valid), + "InferenceEngine::Core::QueryNetwork doesn't support inputs having dynamic shapes. ", + "Use ov::Core::compile_model API instead. Dynamic inputs are :", + std::get<1>(valid)); - return _impl->QueryNetwork(network, deviceName, config); + return _impl->QueryNetwork(network, deviceName, config); + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } void Core::SetConfig(const std::map& config, const std::string& deviceName) { @@ -287,10 +360,14 @@ void Core::SetConfig(const std::map& config, const std } ov::AnyMap conf = ov::any_copy(config); - if (deviceName.empty()) { - _impl->set_property_for_device(conf, std::string()); - } else { - _impl->set_property_for_device(conf, deviceName); + try { + if (deviceName.empty()) { + _impl->set_property_for_device(conf, std::string()); + } else { + _impl->set_property_for_device(conf, deviceName); + } + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); } } @@ -322,31 +399,55 @@ Parameter Core::GetConfig(const std::string& deviceName, const std::string& name return flag ? CONFIG_VALUE(YES) : CONFIG_VALUE(NO); } - auto parsed = ov::parseDeviceNameIntoConfig(deviceName); - return _impl->get_plugin(parsed._deviceName).get_property(name, parsed._config); + try { + auto parsed = ov::parseDeviceNameIntoConfig(deviceName); + return _impl->get_plugin(parsed._deviceName).get_property(name, parsed._config); + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } Parameter Core::GetMetric(const std::string& deviceName, const std::string& name, const ParamMap& options) const { - return _impl->GetMetric(deviceName, name, options); + try { + return _impl->GetMetric(deviceName, name, options); + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } std::vector Core::GetAvailableDevices() const { - return _impl->GetAvailableDevices(); + try { + return _impl->GetAvailableDevices(); + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } void Core::RegisterPlugin(const std::string& pluginName, const std::string& deviceName) { - _impl->register_plugin(pluginName, deviceName, {}); + try { + _impl->register_plugin(pluginName, deviceName, {}); + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } void Core::RegisterPlugins(const std::string& xmlConfigFile) { - _impl->register_plugins_in_registry(xmlConfigFile); + try { + _impl->register_plugins_in_registry(xmlConfigFile); + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what(); + } } void Core::UnregisterPlugin(const std::string& deviceName_) { - ov::DeviceIDParser parser(deviceName_); - std::string deviceName = parser.get_device_name(); + try { + ov::DeviceIDParser parser(deviceName_); + std::string deviceName = parser.get_device_name(); - _impl->unload_plugin(deviceName); + _impl->unload_plugin(deviceName); + } catch (const ov::Exception& ex) { + IE_THROW() << ex.what() << std::endl; + } } } // namespace InferenceEngine diff --git a/src/inference/src/ie_network_reader.cpp b/src/inference/src/ie_network_reader.cpp index 64de8ca20c7154..564b647eb7430c 100644 --- a/src/inference/src/ie_network_reader.cpp +++ b/src/inference/src/ie_network_reader.cpp @@ -22,7 +22,7 @@ #ifdef ENABLE_IR_V7_READER # include "legacy/ie_ir_version.hpp" #endif -#include "ie_itt.hpp" +#include "itt.hpp" #include "legacy/ie_reader.hpp" #include "legacy_op_extension.hpp" #include "ngraph/function.hpp" @@ -116,7 +116,7 @@ class Reader : public IReader { } bool supportModel(std::istream& model) const override { - OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Reader::supportModel"); + OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "Reader::supportModel"); return ptr->supportModel(model); } @@ -140,7 +140,7 @@ namespace { Reader::Ptr reader_irv7 = nullptr; void registerReaders() { - OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "registerReaders"); + OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "registerReaders"); static bool initialized = false; static std::mutex readerMutex; std::lock_guard lock(readerMutex); @@ -229,7 +229,7 @@ CNNNetwork load_ir_v7_network(const std::string& modelPath, Blob::Ptr weights = make_shared_blob({Precision::U8, {fileSize}, C}); { - OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "ReadNetworkWeights"); + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::ReadTime, "ReadNetworkWeights"); weights->allocate(); binStream.read(weights->buffer(), fileSize); binStream.close(); diff --git a/src/inference/src/ie_ngraph_utils.cpp b/src/inference/src/ie_ngraph_utils.cpp index 49292fe12b0eaf..1b30258fb6e06c 100644 --- a/src/inference/src/ie_ngraph_utils.cpp +++ b/src/inference/src/ie_ngraph_utils.cpp @@ -5,13 +5,13 @@ #include "ie_ngraph_utils.hpp" #include "cnn_network_ngraph_impl.hpp" -#include "ie_itt.hpp" +#include "itt.hpp" namespace InferenceEngine { namespace details { CNNNetwork cloneNetwork(const CNNNetwork& network) { - OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "cloneNetwork"); + OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "cloneNetwork"); if (network.getFunction()) { IE_SUPPRESS_DEPRECATED_START diff --git a/src/inference/src/ie_itt.hpp b/src/inference/src/itt.hpp similarity index 71% rename from src/inference/src/ie_itt.hpp rename to src/inference/src/itt.hpp index afc49ea6ecb2df..230a81bd284777 100644 --- a/src/inference/src/ie_itt.hpp +++ b/src/inference/src/itt.hpp @@ -4,28 +4,20 @@ /** * @brief Defines openvino domains for tracing - * @file ie_itt.hpp + * @file itt.hpp */ #pragma once -#include - -#include - -namespace InferenceEngine { -namespace itt { -namespace domains { -OV_ITT_DOMAIN(IE_LT); -} // namespace domains -} // namespace itt -} // namespace InferenceEngine +#include "openvino/cc/selective_build.h" +#include "openvino/itt.hpp" namespace ov { namespace itt { namespace domains { -OV_ITT_DOMAIN(IE); -OV_ITT_DOMAIN(IE_RT); +OV_ITT_DOMAIN(OV); +OV_ITT_DOMAIN(ReadTime); +OV_ITT_DOMAIN(LoadTime); } // namespace domains } // namespace itt } // namespace ov diff --git a/src/inference/tests/functional/cnn_network_test.cpp b/src/inference/tests/functional/cnn_network_test.cpp index ba461899086f56..497051aa92a88e 100644 --- a/src/inference/tests/functional/cnn_network_test.cpp +++ b/src/inference/tests/functional/cnn_network_test.cpp @@ -101,7 +101,7 @@ TEST_F(CNNNetworkTests, throwsHasDynamicInputs) { try { core.LoadNetwork(network); FAIL() << "LoadNetwork with dynamic inputs shall throw"; - } catch (const ov::AssertFailure& e) { + } catch (const InferenceEngine::Exception& e) { EXPECT_TRUE(std::string(e.what()).find("InferenceEngine::Core::LoadNetwork") != std::string::npos) << e.what(); EXPECT_TRUE(std::string(e.what()).find("p1_1") != std::string::npos) << e.what(); EXPECT_TRUE(std::string(e.what()).find("p1_2") != std::string::npos) << e.what(); @@ -119,7 +119,7 @@ TEST_F(CNNNetworkTests, throwsHasDynamicInputs_remoteContext) { try { core.LoadNetwork(network, InferenceEngine::RemoteContext::Ptr()); FAIL() << "LoadNetwork with dynamic inputs shall throw"; - } catch (const ov::AssertFailure& e) { + } catch (const InferenceEngine::Exception& e) { EXPECT_TRUE(std::string(e.what()).find("InferenceEngine::Core::LoadNetwork") != std::string::npos) << e.what(); EXPECT_TRUE(std::string(e.what()).find("p1_1") != std::string::npos) << e.what(); EXPECT_TRUE(std::string(e.what()).find("p1_2") != std::string::npos) << e.what(); @@ -137,7 +137,7 @@ TEST_F(CNNNetworkTests, throwsHasDynamicInputs_queryNetwork) { try { core.QueryNetwork(network, "mock"); FAIL() << "QueryNetwork with dynamic inputs shall throw"; - } catch (const ov::AssertFailure& e) { + } catch (const InferenceEngine::Exception& e) { EXPECT_TRUE(std::string(e.what()).find("InferenceEngine::Core::QueryNetwork") != std::string::npos) << e.what(); EXPECT_TRUE(std::string(e.what()).find("p1_1") != std::string::npos) << e.what(); EXPECT_TRUE(std::string(e.what()).find("p1_2") != std::string::npos) << e.what(); diff --git a/src/inference/tests/unit/core.cpp b/src/inference/tests/unit/core.cpp index 5ebff30ec7e1ea..8cb1cfc8f74ca7 100644 --- a/src/inference/tests/unit/core.cpp +++ b/src/inference/tests/unit/core.cpp @@ -262,8 +262,7 @@ TEST(CoreTests_parse_device_config, get_device_config) { ov::AnyMap{ov::device::id("0.1"), ov::log::level(ov::log::Level::INFO)}); // device ID mismatch - EXPECT_THROW(ov::parseDeviceNameIntoConfig("DEVICE.X", ov::AnyMap{ov::device::id("Y")}), - InferenceEngine::Exception); + EXPECT_THROW(ov::parseDeviceNameIntoConfig("DEVICE.X", ov::AnyMap{ov::device::id("Y")}), ov::Exception); // HETERO check_parsed_config("HETERO:DEVICE", ov::AnyMap{}, "HETERO", ov::AnyMap{ov::device::priorities("DEVICE")}); @@ -300,7 +299,7 @@ TEST(CoreTests_parse_device_config, get_device_config) { ov::device::properties(ov::AnyMap{{"DEVICE", ov::AnyMap{ov::log::level(ov::log::Level::ERR)}}})}); // device priorities mismatch EXPECT_THROW(ov::parseDeviceNameIntoConfig("HETERO:DEVICE", ov::AnyMap{ov::device::priorities("ANOTHER_DEVICE")}), - InferenceEngine::Exception); + ov::Exception); // MULTI check_parsed_config("MULTI:DEVICE", ov::AnyMap{}, "MULTI", ov::AnyMap{ov::device::priorities("DEVICE")}); diff --git a/src/plugins/auto/docs/tests.md b/src/plugins/auto/docs/tests.md index 08ec2ce9ad1db8..c149806f450bb6 100644 --- a/src/plugins/auto/docs/tests.md +++ b/src/plugins/auto/docs/tests.md @@ -49,4 +49,270 @@ Note: Google Test filter = *AutoReleaseHelperTest*cpuLoadFailure_accelerateorLoa [----------] Global test environment tear-down [==========] 1 test from 1 test suite ran. (732 ms total) [ PASSED ] 1 test. -``` \ No newline at end of file +``` + +## Tests AUTO plugin with benchmark_app + +### Performance mode +Benchmark app provides various options for configuring execution parameters on supported devices. This section convers all of the supported performance hints options for AUTO plugin tests. AUTO plugin supports three performance modes setting: including latency, throughput and cumulative_throughput. + +#### Latency + +Example of Running benchmark_app with ``-hint latency`` on AUTO plugin is shown below: + +```bash +openvino/bin/intel64/Release$ ./benchark_app -m openvino/src/core/tests/models/ir/add_abc.xml -d AUTO -hint latency +[Step 1/11] Parsing and validating input arguments +[ INFO ] Parsing input parameters +[Step 2/11] Loading OpenVINO Runtime +[ INFO ] OpenVINO: +[ INFO ] Build ................................. - +[ INFO ] +[ INFO ] Device info: +[ INFO ] AUTO +[ INFO ] Build ................................. - +... +[Step 8/11] Querying optimal runtime parameters +[ INFO ] Model: +[ INFO ] NETWORK_NAME: add_abc +[ INFO ] EXECUTION_DEVICES: (CPU) +[ INFO ] PERFORMANCE_HINT: ``LATENCY`` +[ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 1 +[ INFO ] MULTI_DEVICE_PRIORITIES: GPU,CPU +[ INFO ] CPU: +... +[ INFO ] PERFORMANCE_HINT: LATENCY +... +[Step 11/11] Dumping statistics report +[ INFO ] Execution Devices: [ GPU ] +[ INFO ] Count: 76254 iterations +[ INFO ] Duration: 120002.81 ms +[ INFO ] Latency: +[ INFO ] Median: 1.54 ms +[ INFO ] Average: 1.54 ms +[ INFO ] Min: 0.14 ms +[ INFO ] Max: 3.71 ms +[ INFO ] Throughput: 635.44 FPS +``` + +#### Throughput + +Example of Running benchmark_app with ``-hint throughput`` on AUTO plugin is shown below: +```bash +openvino/bin/intel64/Release$ ./benchark_app -m openvino/src/core/tests/models/ir/add_abc.xml -d AUTO -hint throughput +[Step 1/11] Parsing and validating input arguments +[ INFO ] Parsing input parameters +[Step 2/11] Loading OpenVINO Runtime +[ INFO ] OpenVINO: +[ INFO ] Build ................................. - +[ INFO ] +[ INFO ] Device info: +[ INFO ] AUTO +[ INFO ] Build ................................. - +[ INFO ] +[ INFO ] +... +[Step 8/11] Querying optimal runtime parameters +[ INFO ] Model: +[ INFO ] NETWORK_NAME: add_abc +[ INFO ] EXECUTION_DEVICES: (CPU) +[ INFO ] PERFORMANCE_HINT: ``THROUGHPUT`` +[ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 4 +[ INFO ] MULTI_DEVICE_PRIORITIES: GPU,CPU +... +[Step 11/11] Dumping statistics report +[ INFO ] Execution Devices: [ GPU ] +[ INFO ] Count: 168284 iterations +[ INFO ] Duration: 120004.81 ms +[ INFO ] Latency: +[ INFO ] Median: 2.79 ms +[ INFO ] Average: 2.81 ms +[ INFO ] Min: 0.44 ms +[ INFO ] Max: 12.11 ms +[ INFO ] Throughput: 1402.31 FPS +``` +#### Cumulative throughput + +Example of Running benchmark_app with ``-hint cumulative_throughput`` on AUTO plugin is shown below: +```bash +openvino/bin/intel64/Release$ ./benchark_app -m openvino/src/core/tests/models/ir/add_abc.xml -d AUTO -hint cumulative_throughput +[Step 1/11] Parsing and validating input arguments +[ INFO ] Parsing input parameters +[Step 2/11] Loading OpenVINO Runtime +[ INFO ] OpenVINO: +[ INFO ] Build ................................. - +[ INFO ] +[ INFO ] Device info: +[ INFO ] AUTO +[ INFO ] Build ................................. - +[ INFO ] +[ INFO ] +... +[Step 8/11] Querying optimal runtime parameters +[ INFO ] Model: +[ INFO ] NETWORK_NAME: add_abc +[ INFO ] EXECUTION_DEVICES: CPU GPU +[ INFO ] PERFORMANCE_HINT: ``CUMULATIVE_THROUGHPUT`` +[ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 8 +... +[Step 11/11] Dumping statistics report +[ INFO ] Execution Devices: [ CPU GPU ] +[ INFO ] Count: 468448 iterations +[ INFO ] Duration: 120001.31 ms +[ INFO ] Latency: +[ INFO ] Median: 0.36 ms +[ INFO ] Average: 0.36 ms +[ INFO ] Min: 0.22 ms +[ INFO ] Max: 10.48 ms +[ INFO ] Throughput: 3903.69 FPS +``` + +### Enable/Disable CPU as acceleration + +This section shows the setting to AUTO plugin that enables/disables CPU as acceleration (or helper device) at the beginning via the benchmark APP. Configure the property ``ENABLE_STARTUP_FALLBACK`` first in the JSON file ``config.json``. + +Running benchmark_APP with enabling the property ``ENABLE_STARTUP_FALLBACK`` in JSON file ``config.json``. + +```bash +{ + "AUTO": { + "ENABLE_STARTUP_FALLBACK": "YES" + } +} +``` + +The retrieved property ``EXECUTION_DEVICE`` from AUTO will be CPU accelerator (``(CPU)``). + +```bash +openvino/bin/intel64/Release$ ./benchark_app -m openvino/src/core/tests/models/ir/add_abc.xml -d AUTO -load_config ./config.json +[Step 1/11] Parsing and validating input arguments +[ INFO ] Parsing input parameters +[Step 2/11] Loading OpenVINO Runtime +[ INFO ] OpenVINO: +[ INFO ] Build ................................. - +[ INFO ] +[ INFO ] Device info: +[ INFO ] AUTO +[ INFO ] Build ................................. - +[ INFO ] +[ INFO ] +... +[ INFO ] EXECUTION_DEVICES: (CPU) +... +[ INFO ] First inference took 0.65 ms +... +[ INFO ] Count: 169420 iterations +[ INFO ] Duration: 120004.85 ms +[ INFO ] Latency: +[ INFO ] Median: 2.76 ms +[ INFO ] Average: 2.78 ms +[ INFO ] Min: 0.51 ms +[ INFO ] Max: 8.39 ms +[ INFO ] Throughput: 1411.78 FPS +``` + +Running benchmark_APP with disabling the property ``ENABLE_STARTUP_FALLBACK`` in JSON file ``config.json``. + +```bash +{ + "AUTO": { + "ENABLE_STARTUP_FALLBACK": "NO" + } +} +``` + +The retrieved property ``EXECUTION_DEVICE`` from AUTO will be CPU accelerator (``GPU``). + +```bash +openvino/bin/intel64/Release$ ./benchark_app -m openvino/src/core/tests/models/ir/add_abc.xml -d AUTO -load_config ./config.json +[Step 1/11] Parsing and validating input arguments +[ INFO ] Parsing input parameters +[Step 2/11] Loading OpenVINO Runtime +[ INFO ] OpenVINO: +[ INFO ] Build ................................. - +[ INFO ] +[ INFO ] Device info: +[ INFO ] AUTO +[ INFO ] Build ................................. - +[ INFO ] +[ INFO ] +... +[ INFO ] EXECUTION_DEVICES: GPU +... +[ INFO ] First inference took 3.97 ms +... +[ INFO ] Count: 167560 iterations +[ INFO ] Duration: 120003.96 ms +[ INFO ] Latency: +[ INFO ] Median: 2.76 ms +[ INFO ] Average: 2.81 ms +[ INFO ] Min: 0.78 ms +[ INFO ] Max: 5.99 ms +[ INFO ] Throughput: 1396.29 FPS +``` + +### Device selection fallback of the AUTO + +This section will show the fallback of device selection within the AUTO plugin if the device with high priority doesn't support the precision of the inputting model. For example, CPU supports both FP16 and FP32 precision model, while GNA doesn't FP32 precision. Although GNA has higher priority, AUTO plugin will ultimately choose the CPU plugin to load model with FP32 precision. + +AUTO will select GNA if no other device is specified in the device candidate list. + +```bash +openvino/bin/intel64/Release$ ./benchmark_app -m openvino/src/core/tests/models/ir/add_abc.xml -d AUTO:GNA -t 10 +[Step 1/11] Parsing and validating input arguments +[ INFO ] Parsing input parameters +[Step 2/11] Loading OpenVINO Runtime +[ INFO ] OpenVINO: +[ INFO ] Build ................................. - +[ INFO ] +[ INFO ] Device info: +[ INFO ] AUTO +[ INFO ] Build ................................. - +[ INFO ] +[ INFO ] GNA +[ INFO ] Build ................................. - +... +[ INFO ] GNA: +[ INFO ] OPTIMIZATION_CAPABILITIES: INT16 INT8 EXPORT_IMPORT +... +[Step 11/11] Dumping statistics report +[ INFO ] Execution Devices: [ GNA ] +... +[ INFO ] Latency: +[ INFO ] Median: 0.01 ms +[ INFO ] Average: 0.01 ms +[ INFO ] Min: 0.01 ms +[ INFO ] Max: 0.20 ms +[ INFO ] Throughput: 69131.99 FPS +``` + +Device selection fallback will happen here. CPU will be selected by AUTO as GNA doesn't support FP32 precision model. + +```bash +openvino/bin/intel64/Release$ ./benchmark_app -m openvino/src/core/tests/models/ir/add_abc.xml -d AUTO:GNA,CPU -t 10 +[Step 1/11] Parsing and validating input arguments +[ INFO ] Parsing input parameters +[Step 2/11] Loading OpenVINO Runtime +[ INFO ] OpenVINO: +[ INFO ] Build ................................. - +[ INFO ] +[ INFO ] Device info: +[ INFO ] AUTO +[ INFO ] Build ................................. - +[ INFO ] +[ INFO ] CPU +[ INFO ] Build ................................. - +[ INFO ] +[ INFO ] GNA +[ INFO ] Build ................................. - +... +[Step 11/11] Dumping statistics report +[ INFO ] Execution Devices: [ CPU ] +... +[ INFO ] Latency: +[ INFO ] Median: 8.90 ms +[ INFO ] Average: 8.95 ms +[ INFO ] Min: 5.16 ms +[ INFO ] Max: 32.78 ms +[ INFO ] Throughput: 446.08 FPS +``` diff --git a/src/plugins/auto/src/auto_compiled_model.cpp b/src/plugins/auto/src/auto_compiled_model.cpp index 3bfdd51628d18e..a827072bf3234b 100644 --- a/src/plugins/auto/src/auto_compiled_model.cpp +++ b/src/plugins/auto/src/auto_compiled_model.cpp @@ -201,7 +201,7 @@ ov::Any AutoCompiledModel::get_property(const std::string& name) const { LOG_WARNING_TAG("deduce optimal infer requset num for auto-batch failed :%s", iie.what()); } real = (std::max)(requests, optimal_batch_size); - } else if (device_info.device_name.find("VPU") != std::string::npos) { + } else if (device_info.device_name.find("NPU") != std::string::npos) { real = 8u; } else { real = upper_bound_streams_num ? 2 * upper_bound_streams_num : default_num_for_tput; diff --git a/src/plugins/auto/src/auto_schedule.cpp b/src/plugins/auto/src/auto_schedule.cpp index 4efcd6e0253ed6..36e9c9f54621a5 100644 --- a/src/plugins/auto/src/auto_schedule.cpp +++ b/src/plugins/auto/src/auto_schedule.cpp @@ -301,10 +301,10 @@ void AutoSchedule::try_to_compile_model(AutoCompileContext& context, const std:: } // need to recompile model, unregister it's priority // there maybe potential issue. - // for example they are dGPU, VPU, iGPU, customer want to compile model with - // configure 0 dGPU, 1 VPU, if dGPU compile failed, - // the result will be not sure, maybe two models are compiled into VPU, - // maybe 0 is compiled to VPU, 1 is compiled to iGPU + // for example they are dGPU, NPU, iGPU, customer want to compile model with + // configure 0 dGPU, 1 NPU, if dGPU compile failed, + // the result will be not sure, maybe two models are compiled into NPU, + // maybe 0 is compiled to NPU, 1 is compiled to iGPU m_plugin->unregister_priority(m_context->m_model_priority, context.m_device_info.unique_name); // remove the current device from device_list auto erase_device = deviceChecker().check_and_return_if_device_in_list(device, device_list, true); diff --git a/src/plugins/auto/src/plugin_config.cpp b/src/plugins/auto/src/plugin_config.cpp index 4fcace66dd3085..0f49680856d1a4 100644 --- a/src/plugins/auto/src/plugin_config.cpp +++ b/src/plugins/auto/src/plugin_config.cpp @@ -8,7 +8,7 @@ namespace auto_plugin { // AUTO will enable the blocklist if // 1.No device priority passed to AUTO/MULTI.(eg. core.compile_model(model, "AUTO", configs);) // 2.No valid device parsed out from device priority (eg. core.compile_model(model, "AUTO:-CPU,-GPU", configs);). -const std::set PluginConfig::device_block_list = {"VPU", "GNA", "notIntelGPU"}; +const std::set PluginConfig::device_block_list = {"NPU", "GNA", "notIntelGPU"}; PluginConfig::PluginConfig() { set_default(); diff --git a/src/plugins/auto/tests/unit/compile_model_metric_test.cpp b/src/plugins/auto/tests/unit/compile_model_metric_test.cpp index 67fac96aabe8dd..05161963605675 100644 --- a/src/plugins/auto/tests/unit/compile_model_metric_test.cpp +++ b/src/plugins/auto/tests/unit/compile_model_metric_test.cpp @@ -192,13 +192,13 @@ TEST_P(ExecNetworkget_propertyOptimalNumInferReq, OPTIMAL_NUMBER_OF_INFER_REQUES metaDevices.push_back({actualDeviceName, metaConfig, actualCustomerNum, ""}); // enable autoBatch unsigned int gpuOptimalBatchNum = 8; - unsigned int keembayOptimalBatchNum = 1; + unsigned int npuOptimalBatchNum = 1; ov::hint::PerformanceMode mode = ov::hint::PerformanceMode::THROUGHPUT; std::tuple rangeOfStreams = std::make_tuple(1, 3); ON_CALL(*core, get_property(StrEq(ov::test::utils::DEVICE_GPU), StrEq(ov::optimal_batch_size.name()), _)) .WillByDefault(RETURN_MOCK_VALUE(gpuOptimalBatchNum)); ON_CALL(*core, get_property(StrEq(ov::test::utils::DEVICE_KEEMBAY), StrEq(ov::optimal_batch_size.name()), _)) - .WillByDefault(RETURN_MOCK_VALUE(keembayOptimalBatchNum)); + .WillByDefault(RETURN_MOCK_VALUE(npuOptimalBatchNum)); ON_CALL(*core, get_property(_, StrEq(ov::range_for_streams.name()), _)) .WillByDefault(RETURN_MOCK_VALUE(rangeOfStreams)); ON_CALL(*core, get_property(_, StrEq(ov::hint::performance_mode.name()), _)) diff --git a/src/plugins/auto/tests/unit/get_device_list.cpp b/src/plugins/auto/tests/unit/get_device_list.cpp index 50837ae17f1d38..5fc8d4eedb4b43 100644 --- a/src/plugins/auto/tests/unit/get_device_list.cpp +++ b/src/plugins/auto/tests/unit/get_device_list.cpp @@ -7,8 +7,8 @@ using Config = std::map; using namespace ov::mock_auto_plugin; -const std::vector availableDevs = {"CPU", "GPU", "VPU"}; -const std::vector availableDevsWithId = {"CPU", "GPU.0", "GPU.1", "VPU"}; +const std::vector availableDevs = {"CPU", "GPU", "NPU"}; +const std::vector availableDevsWithId = {"CPU", "GPU.0", "GPU.1", "NPU"}; using Params = std::tuple; using ConfigParams = std::tuple< std::vector, // Available devices retrieved from Core @@ -96,8 +96,8 @@ const std::vector testConfigsWithId = {Params{" ", " "}, Params{"CPU,,GPU", "CPU,GPU.0,GPU.1"}, Params{"CPU, ,GPU", "CPU, ,GPU.0,GPU.1"}, Params{"CPU,GPU,GPU.1", "CPU,GPU.0,GPU.1"}, - Params{"CPU,GPU,VPU,INVALID_DEVICE", "CPU,GPU.0,GPU.1,VPU,INVALID_DEVICE"}, - Params{"VPU,GPU,CPU,-GPU.0", "VPU,GPU.1,CPU"}, + Params{"CPU,GPU,NPU,INVALID_DEVICE", "CPU,GPU.0,GPU.1,NPU,INVALID_DEVICE"}, + Params{"NPU,GPU,CPU,-GPU.0", "NPU,GPU.1,CPU"}, Params{"-GPU.0,GPU,CPU", "GPU.1,CPU"}, Params{"-GPU.0,GPU", "GPU.1"}, Params{"-GPU,GPU.0", "GPU.0"}, @@ -131,13 +131,13 @@ const std::vector testConfigs = {Params{" ", " "}, Params{"CPU,GPU,GPU.0", "CPU,GPU"}, Params{"CPU,GPU,GPU.1", "CPU,GPU,GPU.1"}, Params{"CPU,GPU.1,GPU", "CPU,GPU.1,GPU"}, - Params{"CPU,VPU", "CPU,VPU"}, - Params{"CPU,-VPU", "CPU"}, + Params{"CPU,NPU", "CPU,NPU"}, + Params{"CPU,-NPU", "CPU"}, Params{"INVALID_DEVICE", "INVALID_DEVICE"}, Params{"CPU,-INVALID_DEVICE", "CPU"}, Params{"CPU,INVALID_DEVICE", "CPU,INVALID_DEVICE"}, Params{"-CPU,INVALID_DEVICE", "INVALID_DEVICE"}, - Params{"CPU,GPU,VPU", "CPU,GPU,VPU"}}; + Params{"CPU,GPU,NPU", "CPU,GPU,NPU"}}; const std::vector testConfigsWithIdNotInteldGPU = {Params{" ", " "}, Params{"", "CPU,GPU.0"}, @@ -147,8 +147,8 @@ const std::vector testConfigsWithIdNotInteldGPU = {Params{" ", " "}, Params{"CPU,,GPU", "CPU,GPU.0,GPU.1"}, Params{"CPU, ,GPU", "CPU, ,GPU.0,GPU.1"}, Params{"CPU,GPU,GPU.1", "CPU,GPU.0,GPU.1"}, - Params{"CPU,GPU,VPU,INVALID_DEVICE", "CPU,GPU.0,GPU.1,VPU,INVALID_DEVICE"}, - Params{"VPU,GPU,CPU,-GPU.0", "VPU,GPU.1,CPU"}, + Params{"CPU,GPU,NPU,INVALID_DEVICE", "CPU,GPU.0,GPU.1,NPU,INVALID_DEVICE"}, + Params{"NPU,GPU,CPU,-GPU.0", "NPU,GPU.1,CPU"}, Params{"-GPU.0,GPU,CPU", "GPU.1,CPU"}, Params{"-GPU.0,GPU", "GPU.1"}, Params{"-GPU,GPU.0", "GPU.0"}, diff --git a/src/plugins/auto/tests/unit/select_device_failed_test.cpp b/src/plugins/auto/tests/unit/select_device_failed_test.cpp index 5559621260d45d..92afffef4b6f82 100644 --- a/src/plugins/auto/tests/unit/select_device_failed_test.cpp +++ b/src/plugins/auto/tests/unit/select_device_failed_test.cpp @@ -127,7 +127,7 @@ TEST_P(AutoLoadFailedTest, LoadCNNetWork) { metaDevices.push_back(std::move(devInfo)); // set the return value of SelectDevice - // for example if there are three device, if will return GPU on the first call, and then MYRIAD + // for example if there are three device, if will return GPU on the first call, and then NPU // at last CPU ON_CALL(*plugin, select_device(Property(&std::vector::size, Eq(selDevsSize)), _, _)) .WillByDefault(Return(metaDevices[deviceConfigs.size() - selDevsSize])); @@ -181,12 +181,12 @@ TEST_P(AutoLoadFailedTest, LoadCNNetWork) { // { true, false, GENERAL, 3 device, 2, 3, 2} // // there are three devices for loading -// CPU load for accelerator success, but GPU will load faild and then select MYRIAD and load again +// CPU load for accelerator success, but GPU will load faild and then select NPU and load again // LoadExeNetworkImpl will not throw exception and can continue to run, -// it will select twice, first select GPU, second select MYRIAD -// it will load network three times(CPU, GPU, MYRIAD) +// it will select twice, first select GPU, second select NPU +// it will load network three times(CPU, GPU, NPU) // the inference request num is loadSuccessCount * optimalNum, in this test case optimalNum is 2 -// so inference request num is 4 (CPU 2, MYRIAD 2) +// so inference request num is 4 (CPU 2, NPU 2) // const std::vector testConfigs = { ConfigParams{true, diff --git a/src/plugins/hetero/src/compiled_model.cpp b/src/plugins/hetero/src/compiled_model.cpp index 46e2419fd44b7a..7b8bf399286a8b 100644 --- a/src/plugins/hetero/src/compiled_model.cpp +++ b/src/plugins/hetero/src/compiled_model.cpp @@ -11,10 +11,13 @@ #include "ie_plugin_config.hpp" #include "itt.hpp" #include "openvino/op/util/op_types.hpp" +#include "openvino/pass/constant_folding.hpp" +#include "openvino/pass/manager.hpp" #include "openvino/runtime/internal_properties.hpp" #include "openvino/runtime/properties.hpp" #include "openvino/util/common_util.hpp" #include "plugin.hpp" +#include "properties.hpp" #include "xml_parse_utils.h" template @@ -55,6 +58,14 @@ ov::hetero::CompiledModel::CompiledModel(const std::shared_ptr& model if (std::getenv("OPENVINO_HETERO_VISUALIZE")) dumpDotFile = true; + // Calling of ConstantFolding in HETERO plugin is required because + // in some cases topology split is happening after constant subgraph. + // It may cause replacement of Constant by Parameter in such operations + // like Reshape/Transpose/Gather and lead to unexpected dynamism or exception + ov::pass::Manager manager; + manager.register_pass(); + manager.run_passes(model); + ov::SupportedOpsMap queryNetworkResult; auto orderedOps = model->get_ordered_ops(); @@ -664,7 +675,8 @@ ov::Any ov::hetero::CompiledModel::get_property(const std::string& name) const { std::vector ro_properties{ov::model_name, ov::optimal_number_of_infer_requests, ov::execution_devices, - ov::loaded_from_cache}; + ov::loaded_from_cache, + ov::hetero::number_of_submodels}; return ro_properties; }; const auto& to_string_vector = [](const std::vector& properties) { @@ -723,6 +735,8 @@ ov::Any ov::hetero::CompiledModel::get_property(const std::string& name) const { device_names.push_back(comp_model_desc.device); } return decltype(ov::execution_devices)::value_type{device_names}; + } else if (ov::hetero::number_of_submodels == name) { + return decltype(ov::hetero::number_of_submodels)::value_type{m_compiled_submodels.size()}; } return m_cfg.get(name); OPENVINO_SUPPRESS_DEPRECATED_END diff --git a/src/plugins/hetero/src/properties.hpp b/src/plugins/hetero/src/properties.hpp index 799d07dbe7be1a..8ecd488bff98ca 100644 --- a/src/plugins/hetero/src/properties.hpp +++ b/src/plugins/hetero/src/properties.hpp @@ -13,5 +13,10 @@ namespace hetero { */ static constexpr Property caching_device_properties{"CACHING_DEVICE_PROPERTIES"}; +/** + * @brief Read-only property showing number of compiled submodels + */ +static constexpr Property number_of_submodels{"HETERO_NUMBER_OF_SUBMODELS"}; + } // namespace hetero } // namespace ov diff --git a/src/plugins/hetero/tests/functional/compile_model_tests.cpp b/src/plugins/hetero/tests/functional/compile_model_tests.cpp index 9fb2745f5944ff..bfcd0ca1c8bb90 100644 --- a/src/plugins/hetero/tests/functional/compile_model_tests.cpp +++ b/src/plugins/hetero/tests/functional/compile_model_tests.cpp @@ -42,6 +42,20 @@ TEST_F(HeteroTests, compile_without_device_priorities_throw) { EXPECT_THROW(core.compile_model(model, "HETERO"), ov::Exception); } +TEST_F(HeteroTests, compile_dynamic_model_fail) { + // Change device priority + core.set_property("HETERO", ov::device::priorities("MOCK0,MOCK1")); + auto model = create_model_with_subtract_reshape(true); + EXPECT_THROW(core.compile_model(model, "HETERO"), ov::Exception); +} + +TEST_F(HeteroTests, compile_model_shapeof) { + // Change device priority + core.set_property("HETERO", ov::device::priorities("MOCK0,MOCK1")); + auto model = create_model_with_subtract_shapeof_reshape(); + EXPECT_NO_THROW(core.compile_model(model, "HETERO")); +} + TEST_F(HeteroTests, compile_with_device_properties) { ov::AnyMap config = {ov::device::priorities("MOCK0,MOCK1"), ov::device::properties("MOCK0", ov::num_streams(4), ov::enable_profiling(false)), diff --git a/src/plugins/hetero/tests/functional/hetero_tests.cpp b/src/plugins/hetero/tests/functional/hetero_tests.cpp index f8d1f5ea91c814..4228a5c14ce8e6 100644 --- a/src/plugins/hetero/tests/functional/hetero_tests.cpp +++ b/src/plugins/hetero/tests/functional/hetero_tests.cpp @@ -12,6 +12,8 @@ #include "openvino/core/any.hpp" #include "openvino/core/except.hpp" #include "openvino/opsets/opset11.hpp" +#include "openvino/pass/constant_folding.hpp" +#include "openvino/pass/manager.hpp" #include "openvino/pass/serialize.hpp" #include "openvino/runtime/exec_model_info.hpp" #include "openvino/runtime/internal_properties.hpp" @@ -22,6 +24,7 @@ #include "openvino/runtime/properties.hpp" #include "openvino/util/file_util.hpp" #include "openvino/util/shared_object.hpp" +#include "transformations/init_node_info.hpp" #include "transformations/rt_info/fused_names_attribute.hpp" namespace { @@ -67,8 +70,9 @@ ov::Tensor ov::hetero::tests::HeteroTests::create_and_fill_tensor(const ov::elem OPENVINO_THROW("Cannot generate tensor. Unsupported element type."); } -std::shared_ptr ov::hetero::tests::HeteroTests::create_model_with_subtract() { - auto param = std::make_shared(ov::element::i64, ov::Shape{1, 3, 2, 2}); +std::shared_ptr ov::hetero::tests::HeteroTests::create_model_with_subtract(bool dynamic) { + int64_t bs = dynamic ? -1 : 1; + auto param = std::make_shared(ov::element::i64, ov::PartialShape{bs, 3, 2, 2}); param->set_friendly_name("input"); auto const_value = ov::opset11::Constant::create(ov::element::i64, ov::Shape{1, 1, 1, 1}, {1}); const_value->set_friendly_name("const_val"); @@ -81,8 +85,9 @@ std::shared_ptr ov::hetero::tests::HeteroTests::create_model_with_sub return std::make_shared(ov::ResultVector{result}, ov::ParameterVector{param}); } -std::shared_ptr ov::hetero::tests::HeteroTests::create_model_with_subtract_reshape() { - auto param = std::make_shared(ov::element::i64, ov::Shape{1, 3, 2, 2}); +std::shared_ptr ov::hetero::tests::HeteroTests::create_model_with_subtract_reshape(bool dynamic) { + int64_t bs = dynamic ? -1 : 1; + auto param = std::make_shared(ov::element::i64, ov::PartialShape{bs, 3, 2, 2}); param->set_friendly_name("input"); auto const_value = ov::opset11::Constant::create(ov::element::i64, ov::Shape{1, 1, 1, 1}, {1}); const_value->set_friendly_name("const_val"); @@ -99,8 +104,9 @@ std::shared_ptr ov::hetero::tests::HeteroTests::create_model_with_sub return std::make_shared(ov::ResultVector{result}, ov::ParameterVector{param}); } -std::shared_ptr ov::hetero::tests::HeteroTests::create_model_with_subtract_reshape_relu() { - auto param = std::make_shared(ov::element::i64, ov::Shape{1, 3, 2, 2}); +std::shared_ptr ov::hetero::tests::HeteroTests::create_model_with_subtract_reshape_relu(bool dynamic) { + int64_t bs = dynamic ? -1 : 1; + auto param = std::make_shared(ov::element::i64, ov::PartialShape{bs, 3, 2, 2}); param->set_friendly_name("input"); auto const_value = ov::opset11::Constant::create(ov::element::i64, ov::Shape{1, 1, 1, 1}, {1}); const_value->set_friendly_name("const_val"); @@ -119,8 +125,9 @@ std::shared_ptr ov::hetero::tests::HeteroTests::create_model_with_sub return std::make_shared(ov::ResultVector{result}, ov::ParameterVector{param}); } -std::shared_ptr ov::hetero::tests::HeteroTests::create_model_with_reshape() { - auto param = std::make_shared(ov::element::i64, ov::Shape{1, 3, 2, 2}); +std::shared_ptr ov::hetero::tests::HeteroTests::create_model_with_reshape(bool dynamic) { + int64_t bs = dynamic ? -1 : 1; + auto param = std::make_shared(ov::element::i64, ov::PartialShape{bs, 3, 2, 2}); param->set_friendly_name("input"); auto const_value = ov::opset11::Constant::create(ov::element::i64, ov::Shape{1, 1, 1, 1}, {1}); const_value->set_friendly_name("const_val"); @@ -135,6 +142,27 @@ std::shared_ptr ov::hetero::tests::HeteroTests::create_model_with_res return std::make_shared(ov::ResultVector{result}, ov::ParameterVector{param}); } +std::shared_ptr ov::hetero::tests::HeteroTests::create_model_with_subtract_shapeof_reshape(bool dynamic) { + int64_t bs = dynamic ? -1 : 1; + auto param = std::make_shared(ov::element::i64, ov::PartialShape{bs, 3, 2, 2}); + param->set_friendly_name("input"); + auto reshape_val0 = ov::opset11::Constant::create(ov::element::i64, ov::Shape{2}, {bs, 12}); + reshape_val0->set_friendly_name("reshape_val0"); + auto reshape0 = std::make_shared(param, reshape_val0, true); + reshape0->set_friendly_name("reshape0"); + auto const_value = ov::opset11::Constant::create(ov::element::i64, ov::Shape{1, 1}, {1}); + const_value->set_friendly_name("const_val"); + auto subtract = std::make_shared(reshape0, const_value); + subtract->set_friendly_name("sub"); + auto shape_of = std::make_shared(param); + shape_of->set_friendly_name("shape_of"); + auto reshape1 = std::make_shared(subtract, shape_of, true); + reshape1->set_friendly_name("reshape1"); + auto result = std::make_shared(reshape1); + result->set_friendly_name("res"); + return std::make_shared(ov::ResultVector{result}, ov::ParameterVector{param}); +} + // Mock plugins class MockCompiledModel : public ov::ICompiledModel { @@ -386,8 +414,11 @@ class MockCustomRemoteContext : public ov::IRemoteContext { class MockPluginBase : public ov::IPlugin { public: - MockPluginBase(const std::string& name, const std::unordered_set& supported_ops) - : m_supported_ops(supported_ops) { + MockPluginBase(const std::string& name, + const std::unordered_set& supported_ops, + bool dynamism_supported = false) + : m_supported_ops(supported_ops), + m_dynamism_supported(dynamism_supported) { set_device_name(name); } @@ -501,10 +532,24 @@ class MockPluginBase : public ov::IPlugin { auto device_id = properties.count(ov::device::id.name()) ? properties.at(ov::device::id.name()).as() : m_default_device_id; - for (const auto& op : model->get_ordered_ops()) { - if (m_supported_ops.find(op->get_type_info().name) == m_supported_ops.end()) - continue; - res[op->get_friendly_name()] = get_device_name() + "." + device_id; + + auto supported = ov::get_supported_nodes( + model, + [&](std::shared_ptr& model) { + ov::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(model); + }, + [&](const std::shared_ptr& op) { + if (op->is_dynamic() && !m_dynamism_supported) + return false; + if (m_supported_ops.find(op->get_type_info().name) == m_supported_ops.end()) + return false; + return true; + }); + for (auto&& op_name : supported) { + res.emplace(op_name, get_device_name() + "." + device_id); } return res; } @@ -512,6 +557,7 @@ class MockPluginBase : public ov::IPlugin { protected: std::string m_default_device_id = "0"; std::unordered_set m_supported_ops; + bool m_dynamism_supported = false; bool m_profiling = false; bool m_loaded_from_cache{false}; }; @@ -519,7 +565,7 @@ class MockPluginBase : public ov::IPlugin { class MockPluginReshape : public MockPluginBase { public: MockPluginReshape(const std::string& name) - : MockPluginBase(name, {"Parameter", "Result", "Add", "Constant", "Reshape"}) {} + : MockPluginBase(name, {"Parameter", "Result", "Add", "Constant", "Reshape"}, true) {} const ov::Version& get_const_version() override { static const ov::Version version = {CI_BUILD_NUMBER, "openvino_mock_reshape_plugin"}; diff --git a/src/plugins/hetero/tests/functional/hetero_tests.hpp b/src/plugins/hetero/tests/functional/hetero_tests.hpp index b2af29f19472a5..7b6e5f85fad0ee 100644 --- a/src/plugins/hetero/tests/functional/hetero_tests.hpp +++ b/src/plugins/hetero/tests/functional/hetero_tests.hpp @@ -20,10 +20,11 @@ class HeteroTests : public ::testing::Test { void SetUp() override; - std::shared_ptr create_model_with_subtract(); - std::shared_ptr create_model_with_subtract_reshape(); - std::shared_ptr create_model_with_subtract_reshape_relu(); - std::shared_ptr create_model_with_reshape(); + std::shared_ptr create_model_with_subtract(bool dynamic = false); + std::shared_ptr create_model_with_subtract_reshape(bool dynamic = false); + std::shared_ptr create_model_with_subtract_reshape_relu(bool dynamic = false); + std::shared_ptr create_model_with_reshape(bool dynamic = false); + std::shared_ptr create_model_with_subtract_shapeof_reshape(bool dynamic = false); ov::Tensor create_and_fill_tensor(const ov::element::Type& type, const ov::Shape& shape); private: diff --git a/src/plugins/hetero/tests/functional/query_model_tests.cpp b/src/plugins/hetero/tests/functional/query_model_tests.cpp index 9df8c2b110abcf..4563cdd726b7f0 100644 --- a/src/plugins/hetero/tests/functional/query_model_tests.cpp +++ b/src/plugins/hetero/tests/functional/query_model_tests.cpp @@ -37,8 +37,14 @@ TEST_F(HeteroTests, query_model_on_mock1) { EXPECT_EQ(op.second, dev_name); names.erase(op.first); } - EXPECT_EQ(1, names.size()); - EXPECT_EQ("reshape", *names.begin()); + const std::vector unmarked_names = {"reshape_val", "reshape", "res"}; + EXPECT_EQ(unmarked_names.size(), names.size()); + for (auto& name : unmarked_names) { + auto it = names.find(name); + if (it != names.end()) + names.erase(it); + } + EXPECT_EQ(0, names.size()); } TEST_F(HeteroTests, query_model_on_mixed) { @@ -65,3 +71,30 @@ TEST_F(HeteroTests, query_model_on_mixed) { } EXPECT_EQ(0, names.size()); } + +TEST_F(HeteroTests, query_dynamic_model_on_mixed) { + const std::string dev_name0 = "MOCK0.3"; + const std::string dev_name1 = "MOCK1.2"; + ov::AnyMap config = {ov::device::priorities(dev_name0 + "," + dev_name1)}; + const auto model = create_model_with_subtract_reshape(true); + std::set supported_ops_mock0; + for (auto& op : core.query_model(model, dev_name0)) { + if (op.second == dev_name0) + supported_ops_mock0.insert(op.first); + } + const auto supported_ops = core.query_model(model, "HETERO", config); + std::unordered_set names; + for (const auto& op : model->get_ops()) { + names.insert(op->get_friendly_name()); + } + for (const auto& op : supported_ops) { + if (supported_ops_mock0.count(op.first)) + EXPECT_EQ(op.second, dev_name0); + else + EXPECT_EQ(op.second, dev_name1); + names.erase(op.first); + } + EXPECT_EQ(1, names.size()); + // fallback plugin doesn't support dynamism + ASSERT_TRUE(names.count("sub")); +} diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp index ff074c03d58b43..77f0062817ef84 100644 --- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp +++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp @@ -406,6 +406,11 @@ int get_model_prefer_threads(const int num_streams, // latency if (num_streams <= sockets && num_streams > 0) { if (proc_type_table[0][EFFICIENT_CORE_PROC] > 0 && proc_type_table[0][MAIN_CORE_PROC] > 0) { +#ifdef __APPLE__ + if ((proc_type_table.size() == 1) && (proc_type_table[0][EFFICIENT_CORE_PROC] > 0)) { + model_prefer = proc_type_table[0][ALL_PROC]; + } +#else bool fp_intesive = !ov::op::util::has_op_with_type(ngraphFunc); const int int8_threshold = 4; // ~relative efficiency of the VNNI-intensive code for Big vs Little cores; const int fp32_threshold = 2; // ~relative efficiency of the AVX2 fp32 code for Big vs Little cores; @@ -414,6 +419,7 @@ int get_model_prefer_threads(const int num_streams, (fp_intesive ? fp32_threshold : int8_threshold)) ? proc_type_table[0][MAIN_CORE_PROC] : proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][EFFICIENT_CORE_PROC]; +#endif } } else { // throughput model_prefer = config.modelPreferThreads; diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index de4100517fbb53..6d523ef8347721 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -1163,15 +1163,15 @@ class UpdateNodesBase : public IUpdateNodes { m_completion.store(true, std::memory_order::memory_order_relaxed); throw; } - m_prepareCounter.store(stop_indx, std::memory_order::memory_order_release); - m_completion.store(true, std::memory_order::memory_order_relaxed); + m_prepareCounter.store(stop_indx, std::memory_order::memory_order_relaxed); + m_completion.store(true, std::memory_order::memory_order_release); } void updateDynParams(size_t node_indx, size_t /*unused*/) { size_t local_counter = node_indx; while (true) { - bool completion = m_completion.load(std::memory_order::memory_order_relaxed); - size_t prepareCounter = m_prepareCounter.load(std::memory_order::memory_order_acquire); + const bool completion = m_completion.load(std::memory_order::memory_order_acquire); + const size_t prepareCounter = m_prepareCounter.load(std::memory_order::memory_order_relaxed); if (completion && local_counter == prepareCounter) { break; } diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index d40c7df913f1a3..d6956665bd24a7 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -95,6 +95,10 @@ void GraphOptimizer::ApplyCommonGraphOptimizations(Graph &graph) { FuseFCAndConvertOnWeights(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseFCAndTransposeOnWeights"); + FuseFCAndTransposeOnWeights(graph); + graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseDeconvolutionAndSimpleOperation"); FuseDeconvolutionAndSimpleOperation(graph); graph.RemoveDroppedNodes(); @@ -805,10 +809,20 @@ void GraphOptimizer::FuseFCAndConvertOnWeights(Graph& graph) { // (e.g. fuse conversion with weights reordering) auto& graphNodes = graph.GetNodes(); + auto isSuitablePattern = [](NodePtr parent) { + bool res = true && parent->getType() == Type::Convert + && parent->getChildEdges().size() == 1 + && parent->getChildEdgeAt(0)->getOutputNum() == 1 + && parent->getChildEdgeAt(0)->getChild()->getType() == Type::FullyConnected + && one_of(parent->getOriginalInputPrecisionAtPort(0), Precision::FP16) + && one_of(parent->getOriginalOutputPrecisionAtPort(0), Precision::FP32, Precision::BF16) + && parent->isConstant(); + return res; + }; + for (auto parent : graphNodes) { - if (parent->getType() == Type::Convert && parent->isConstant() && parent->getChildEdgeAt(0)->getChild()->getType() == Type::FullyConnected - && parent->getOriginalInputPrecisionAtPort(0) == Precision::FP16 - && one_of(parent->getOriginalOutputPrecisionAtPort(0), Precision::FP32, Precision::BF16)) { + if (isSuitablePattern(parent)) { + CPU_GRAPH_OPTIMIZER_SCOPE(FuseFCAndConvertOnWeights); auto childNode = parent->getChildEdgeAt(0)->getChild(); // set correct weight precision childNode->setOriginalInputPrecisionAtPort(1, parent->getOriginalInputPrecisionAtPort(0)); @@ -817,6 +831,31 @@ void GraphOptimizer::FuseFCAndConvertOnWeights(Graph& graph) { } } +void GraphOptimizer::FuseFCAndTransposeOnWeights(Graph& graph) { + // This optimization allows us to avoid transposing the weights in Transpose node and do it directly along with reordering in FC node + auto& graphNodes = graph.GetNodes(); + + auto isSuitablePattern = [](NodePtr parent) { + bool res = true && parent->getType() == Type::Transpose + && parent->getChildEdges().size() == 1 + && parent->getChildEdgeAt(0)->getOutputNum() == 1 + && parent->getChildEdgeAt(0)->getChild()->getType() == Type::FullyConnected + && parent->getOutputShapeAtPort(0).getRank() == 2 + && parent->isConstant(); + return res; + }; + + for (auto parent : graphNodes) { + if (isSuitablePattern(parent)) { + CPU_GRAPH_OPTIMIZER_SCOPE(FuseFCAndTransposeOnWeights); + auto fcNode = std::dynamic_pointer_cast(parent->getChildEdgeAt(0)->getChild()); + fcNode->keepWeightsNonTransposed(true); + auto transposeNode = std::dynamic_pointer_cast(parent); + transposeNode->setOptimized(true); + } + } +} + void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph &graph) { auto& graphNodes = graph.GetNodes(); diff --git a/src/plugins/intel_cpu/src/graph_optimizer.h b/src/plugins/intel_cpu/src/graph_optimizer.h index fe4991deb86fe6..bb6494758f38be 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.h +++ b/src/plugins/intel_cpu/src/graph_optimizer.h @@ -27,6 +27,7 @@ class GraphOptimizer { void FuseMultiplyAndAdd(Graph &graph); void MergeConvertAndScaleShift(Graph& graph); void FuseFCAndConvertOnWeights(Graph& graph); + void FuseFCAndTransposeOnWeights(Graph& graph); void FuseFullyConnectedAndSimpleOperation(Graph &graph); void FuseMatMulAndSimpleOperation(Graph &graph); void FuseConvolutionAndSimpleOperationThroughMaxPool(Graph &graph); diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index d9c83a644e5c30..e8fe6b89a00afc 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -865,28 +865,30 @@ void Node::prepareMemory(dnnl::primitive_desc_iterator& itpd) { Node::prepareMemory(intDescs); } -MemoryPtr Node::prepareWeightMemory(DnnlMemoryDescPtr weightDesc) { +MemoryPtr Node::prepareWeightMemory(DnnlMemoryDescPtr dstWeightDesc, DnnlMemoryDescPtr srcWeightDesc) { if (!getParentEdgeAt(1)->getParent()->isConstant()) IE_THROW() << "Weight input is not const for node " << getName() << "."; auto edgeMem = getParentEdgeAt(1)->getMemoryPtr(); if (!edgeMem) IE_THROW() << "Cannot get const weights edgeMem for node " << getName() << "."; - auto constDnnlMemOutDesc = edgeMem->getDescWithType(); - auto weightSrcDesc = constDnnlMemOutDesc->getDnnlDesc(); - weightSrcDesc = weightSrcDesc.reshape(weightDesc->getDnnlDesc().get_dims()); - auto create = [&] () { - auto newSrcDesc = DnnlExtensionUtils::makeDescriptor(weightSrcDesc); + if (!srcWeightDesc) { + auto constDnnlMemOutDesc = edgeMem->getDescWithType(); + auto weightSrcDesc = constDnnlMemOutDesc->getDnnlDesc(); + weightSrcDesc = weightSrcDesc.reshape(dstWeightDesc->getDnnlDesc().get_dims()); + srcWeightDesc = DnnlExtensionUtils::makeDescriptor(weightSrcDesc); + } - Memory srcMemory{ getEngine(), newSrcDesc, edgeMem->getData() }; - MemoryPtr _ptr = std::make_shared(getEngine(), weightDesc); + auto create = [&] () { + Memory srcMemory{ getEngine(), srcWeightDesc, edgeMem->getData() }; + MemoryPtr _ptr = std::make_shared(getEngine(), dstWeightDesc); node::Reorder::reorderData(srcMemory, *_ptr, context->getParamsCache()); return _ptr; }; MemoryPtr ptr; - const auto& format = weightDesc->serializeFormat(); + const auto& format = dstWeightDesc->serializeFormat(); auto itr = privateWeightCache.find(format); if (privateWeightCache.end() != itr) { ptr = itr->second; diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index cffa74050778aa..5becbfa9863f70 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -652,7 +652,7 @@ class Node { void prepareMemory(const DnnlMemoryDescPtr& intDesc, size_t indx); void prepareMemory(dnnl::primitive_desc_iterator& itpd); - MemoryPtr prepareWeightMemory(DnnlMemoryDescPtr weightDesc); + MemoryPtr prepareWeightMemory(DnnlMemoryDescPtr dstWeightDesc, DnnlMemoryDescPtr srcWeightDesc = nullptr); bool isDynamic = false; diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp index a3edf6682edcf5..07cbc38bd66300 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/conv.cpp @@ -775,21 +775,23 @@ void Convolution::initSupportedPrimitiveDescriptors() { auto& desc = descs[dIdx]; auto first_desc = dnnl::primitive_desc(DnnlExtensionUtils::clone_primitive_desc(desc.get())); + auto add_supported_desc = [&](dnnl::primitive_desc& desc) { + addSupportedPrimitiveDescriptor(desc); + descIdx.push_back(dIdx); + }; + const bool first_match = customImplPriorities.empty(); DnnlExtensionUtils::for_each_implementation(desc, first_match, [&](impl_desc_type implType) { return contains(getImplPriority(), implType); }, - [&](dnnl::primitive_desc& desc) { - addSupportedPrimitiveDescriptor(desc); - descIdx.push_back(dIdx); - }); + add_supported_desc); // fallback. if none of the primitive types is present in the priority list just add first implementation // @todo this fallback is not necessary if primitive priority list is filled correctly if (supportedPrimitiveDescriptors.empty()) - addSupportedPrimitiveDescriptor(first_desc); + add_supported_desc(first_desc); } } diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_convert.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_convert.cpp index 06b13e700ddb15..afed4031768f2f 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_convert.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_convert.cpp @@ -5,36 +5,42 @@ #include "acl_convert.hpp" #include "acl_utils.hpp" -bool ov::intel_cpu::ACLConvertExecutor::init(const ov::intel_cpu::ConvertParams& convertParams, - const MemoryDescPtr& srcDesc, - const MemoryDescPtr& dstDesc, - const dnnl::primitive_attr& attr) { +namespace ov { +namespace intel_cpu { + +using namespace arm_compute; +using namespace InferenceEngine; + +bool ACLConvertExecutor::init(const ConvertParams& convertParams, + const MemoryDescPtr& srcDesc, + const MemoryDescPtr& dstDesc, + const dnnl::primitive_attr& attr) { aclConvertParams = convertParams; auto srcPrecision = precisionToAclDataType(aclConvertParams.srcPrc); auto dstPrecision = precisionToAclDataType(aclConvertParams.dstPrc); isCopyOp = aclConvertParams.srcPrc == aclConvertParams.dstPrc; // NECast does not support S8. It could be replaced with QASYMM8_SIGNED - if (!isCopyOp && srcPrecision == arm_compute::DataType::S8) { - srcPrecision = arm_compute::DataType::QASYMM8_SIGNED; + if (!isCopyOp && srcPrecision == DataType::S8) { + srcPrecision = DataType::QASYMM8_SIGNED; } - if (!isCopyOp && dstPrecision == arm_compute::DataType::S8) { - dstPrecision = arm_compute::DataType::QASYMM8_SIGNED; + if (!isCopyOp && dstPrecision == DataType::S8) { + dstPrecision = DataType::QASYMM8_SIGNED; } auto srcDims = srcDesc->getShape().getStaticDims(); auto dstDims = dstDesc->getShape().getStaticDims(); auto srcDataLayout = getAclDataLayoutByMemoryDesc(srcDesc); auto dstDataLayout = getAclDataLayoutByMemoryDesc(dstDesc); - auto srcTensorInfo = arm_compute::TensorInfo(shapeCast(srcDims), 1, srcPrecision, srcDataLayout); - auto dstTensorInfo = arm_compute::TensorInfo(shapeCast(dstDims), 1, dstPrecision, dstDataLayout); + auto srcTensorInfo = TensorInfo(shapeCast(collapse_dims_to_max_rank(srcDims)), 1, srcPrecision, srcDataLayout); + auto dstTensorInfo = TensorInfo(shapeCast(collapse_dims_to_max_rank(dstDims)), 1, dstPrecision, dstDataLayout); if (isCopyOp) { - arm_compute::Status s = arm_compute::NECopy::validate(&srcTensorInfo, &dstTensorInfo); + Status s = NECopy::validate(&srcTensorInfo, &dstTensorInfo); if (!s) { DEBUG_LOG("NECopy validation failed: ", s.error_description()); return false; } } else { - arm_compute::Status s = arm_compute::NECast::validate(&srcTensorInfo, &dstTensorInfo, arm_compute::ConvertPolicy::SATURATE); + Status s = NECast::validate(&srcTensorInfo, &dstTensorInfo, ConvertPolicy::SATURATE); if (!s) { DEBUG_LOG("NECast validation failed: ", s.error_description()); return false; @@ -45,16 +51,16 @@ bool ov::intel_cpu::ACLConvertExecutor::init(const ov::intel_cpu::ConvertParams& dstTensor.allocator()->init(dstTensorInfo); if (isCopyOp) { - acl_copy = std::make_unique(); + acl_copy = std::make_unique(); acl_copy->configure(&srcTensor, &dstTensor); } else { - acl_cast = std::make_unique(); - acl_cast->configure(&srcTensor, &dstTensor, arm_compute::ConvertPolicy::SATURATE); + acl_cast = std::make_unique(); + acl_cast->configure(&srcTensor, &dstTensor, ConvertPolicy::SATURATE); } return true; } -void ov::intel_cpu::ACLConvertExecutor::exec(const MemoryCPtr& src, const MemoryPtr& dst) { +void ACLConvertExecutor::exec(const MemoryCPtr& src, const MemoryPtr& dst) { srcTensor.allocator()->import_memory(src->getData()); dstTensor.allocator()->import_memory(dst->getData()); @@ -68,57 +74,60 @@ void ov::intel_cpu::ACLConvertExecutor::exec(const MemoryCPtr& src, const Memory dstTensor.allocator()->free(); } -bool ov::intel_cpu::ACLConvertExecutorBuilder::isSupported(const ConvertParams& convertParams, - const MemoryDescPtr& srcDesc, - const MemoryDescPtr& dstDesc) const { +bool ACLConvertExecutorBuilder::isSupported(const ConvertParams& convertParams, + const MemoryDescPtr& srcDesc, + const MemoryDescPtr& dstDesc) const { if (convertParams.srcPrc != convertParams.dstPrc) { if (!one_of(convertParams.srcPrc, - InferenceEngine::Precision::I8, - InferenceEngine::Precision::U8, - InferenceEngine::Precision::U16, - InferenceEngine::Precision::I16, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::I32, - InferenceEngine::Precision::FP32)) { + Precision::I8, + Precision::U8, + Precision::U16, + Precision::I16, + Precision::FP16, + Precision::I32, + Precision::FP32)) { DEBUG_LOG("NECopy does not support source precision: ", convertParams.srcPrc.name()); return false; } - if ((convertParams.srcPrc == InferenceEngine::Precision::I8 && !one_of(convertParams.dstPrc, - InferenceEngine::Precision::I16, - InferenceEngine::Precision::I32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::FP32)) || - (convertParams.srcPrc == InferenceEngine::Precision::U8 && !one_of(convertParams.dstPrc, - InferenceEngine::Precision::U16, - InferenceEngine::Precision::I16, - InferenceEngine::Precision::I32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::FP32)) || - (convertParams.srcPrc == InferenceEngine::Precision::U16 && !one_of(convertParams.dstPrc, - InferenceEngine::Precision::U8, - InferenceEngine::Precision::U32)) || - (convertParams.srcPrc == InferenceEngine::Precision::I16 && !one_of(convertParams.dstPrc, - InferenceEngine::Precision::I8, - InferenceEngine::Precision::U8, - InferenceEngine::Precision::I32)) || - (convertParams.srcPrc == InferenceEngine::Precision::FP16 && !one_of(convertParams.dstPrc, - InferenceEngine::Precision::I8, - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::I32, - InferenceEngine::Precision::U8)) || - (convertParams.srcPrc == InferenceEngine::Precision::I32 && !one_of(convertParams.dstPrc, - InferenceEngine::Precision::I8, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::U8)) || - (convertParams.srcPrc == InferenceEngine::Precision::FP32 && !one_of(convertParams.dstPrc, - InferenceEngine::Precision::BF16, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::I32))) { + if ((convertParams.srcPrc == Precision::I8 && !one_of(convertParams.dstPrc, + Precision::I16, + Precision::I32, + Precision::FP16, + Precision::FP32)) || + (convertParams.srcPrc == Precision::U8 && !one_of(convertParams.dstPrc, + Precision::U16, + Precision::I16, + Precision::I32, + Precision::FP16, + Precision::FP32)) || + (convertParams.srcPrc == Precision::U16 && !one_of(convertParams.dstPrc, + Precision::U8, + Precision::U32)) || + (convertParams.srcPrc == Precision::I16 && !one_of(convertParams.dstPrc, + Precision::I8, + Precision::U8, + Precision::I32)) || + (convertParams.srcPrc == Precision::FP16 && !one_of(convertParams.dstPrc, + Precision::I8, + Precision::FP32, + Precision::I32, + Precision::U8)) || + (convertParams.srcPrc == Precision::I32 && !one_of(convertParams.dstPrc, + Precision::I8, + Precision::FP16, + Precision::FP32, + Precision::U8)) || + (convertParams.srcPrc == Precision::FP32 && !one_of(convertParams.dstPrc, + Precision::BF16, + Precision::FP16, + Precision::I32))) { DEBUG_LOG("NECopy does not support passed combination of source and destination precisions. ", "source precision: ", convertParams.srcPrc.name(), " destination precsion: ", convertParams.dstPrc.name()); return false; } } return true; -} \ No newline at end of file +} + +} // namespace intel_cpu +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_utils.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_utils.hpp index 42b35970993b2b..8133a23ee29769 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_utils.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_utils.hpp @@ -10,6 +10,28 @@ namespace ov { namespace intel_cpu { +/** +* @brief ACL supports arm_compute::MAX_DIMS maximum. The method squashes the last +* dimensions in order to comply with this limitation +* @param dims vector of dimensions to squash +* @return vector of dimensions that complies to ACL +*/ +inline VectorDims collapse_dims_to_max_rank(VectorDims dims) { + const size_t MAX_NUM_SHAPE = arm_compute::MAX_DIMS; + VectorDims result_dims(MAX_NUM_SHAPE - 1); + if (dims.size() >= MAX_NUM_SHAPE) { + for (size_t i = 0; i < MAX_NUM_SHAPE - 1; i++) { + result_dims[i] = dims[i]; + } + for (size_t i = MAX_NUM_SHAPE - 1; i < dims.size(); i++) { + result_dims[MAX_NUM_SHAPE - 2] *= dims[i]; + } + } else { + result_dims = dims; + } + return result_dims; +} + /** * @brief ACL handles NHWC specifically, it thinks it is NCHW, so we need to change layout manually: * NCHW (0, 1, 2, 3) -> NHWC (0, 2, 3, 1) diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index 25af6f996f61d5..823e8448b28695 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -294,12 +294,12 @@ void FullyConnected::prepackMLASWeight() { MemoryPtr ptr; auto create = [&]() { float* weightPtr = reinterpret_cast(weightsMem->getData()); - size_t ldb = K; + size_t ldb = weightsNonTransposed ? N : K; MemoryPtr _ptr = std::make_shared(getEngine(), intel_cpu::CpuBlockedMemoryDesc(Precision::I8, intel_cpu::Shape{packedBsize})); float* prepackedDst = reinterpret_cast(_ptr->getData()); - mlas_sgemm_pack("T", N, K, ldb, weightPtr, prepackedDst); + mlas_sgemm_pack(weightsNonTransposed ? "F" : "T", N, K, ldb, weightPtr, prepackedDst); return _ptr; }; @@ -316,7 +316,7 @@ void FullyConnected::prepackMLASWeight() { return ptr; }; const auto& wgtDims = getParentEdgeAt(WEIGHTS_ID)->getMemoryPtr()->getStaticDims(); - // Weight is transpoed by MatMulConstTransposesExtraction + // Weights are transposed by MatMulConstTransposesExtraction // K is the IC of weight // the weight is reshaped to [-1, K] in ConvertMatMulToFC K = wgtDims[1]; @@ -470,7 +470,11 @@ void FullyConnected::prepareParams() { } if (!prevExecPtr || !execPtr->getWeightDesc()->isCompatible(*(prevExecPtr->getWeightDesc()))) { - primArgs[DNNL_ARG_WEIGHTS] = prepareWeightMemory(execPtr->getWeightDesc())->getPrimitive(); + if (weightsNonTransposed) { + primArgs[DNNL_ARG_WEIGHTS] = prepareWeightMemory(execPtr->getWeightDesc(), makeTransposedWeightDescriptor())->getPrimitive(); + } else { + primArgs[DNNL_ARG_WEIGHTS] = prepareWeightMemory(execPtr->getWeightDesc())->getPrimitive(); + } } // changed shapes may also cause the kernel type changed selected_pd->setImplementationType(execPtr->getImplementationType()); @@ -1074,6 +1078,22 @@ void FullyConnected::fuseDecompressionConstant(const NodePtr& constData, std::ve Precision::FP32, elementsCount); } + +DnnlMemoryDescPtr FullyConnected::makeTransposedWeightDescriptor() { + if (!getParentEdgeAt(1)->getParent()->isConstant()) + IE_THROW() << "Weight input is not const for node " << getName() << "."; + auto edgeMem = getParentEdgeAt(1)->getMemoryPtr(); + if (!edgeMem) + IE_THROW() << "Cannot get const weights edgeMem for node " << getName() << "."; + + auto constDnnlMemOutDesc = edgeMem->getDescWithType(); + auto weightSrcDesc = constDnnlMemOutDesc->getDnnlDesc(); + weightSrcDesc = {weightSrcDesc.get_dims(), weightSrcDesc.get_data_type(), memory::format_tag::ba}; + weightSrcDesc = weightSrcDesc.reshape(execPtr->getWeightDesc()->getDnnlDesc().get_dims()); + + return DnnlExtensionUtils::makeDescriptor(weightSrcDesc); +} + } // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.h b/src/plugins/intel_cpu/src/nodes/fullyconnected.h index a344466678885f..9192d741cf6ebd 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.h +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.h @@ -56,6 +56,9 @@ class FullyConnected : public Node { void prepareParams() override; void executeDynamicImpl(dnnl::stream strm) override; bool canBeExecutedInInt8() const override; + void keepWeightsNonTransposed(bool weightsNonTransposed) { + this->weightsNonTransposed = weightsNonTransposed; + } void fuseDecompressionMultiply(const NodePtr& constData); const std::vector& getDecompressionMultiply() const { return decompressionMultiply; } @@ -118,6 +121,10 @@ class FullyConnected : public Node { bool useWeightsDecompressionImpl = false; std::vector decompressionSubtract; std::vector decompressionMultiply; + + // FC with transposed weights + bool weightsNonTransposed = false; + DnnlMemoryDescPtr makeTransposedWeightDescriptor(); }; } // namespace node diff --git a/src/plugins/intel_cpu/src/nodes/transpose.cpp b/src/plugins/intel_cpu/src/nodes/transpose.cpp index 2652447d0acd48..2379680e763028 100644 --- a/src/plugins/intel_cpu/src/nodes/transpose.cpp +++ b/src/plugins/intel_cpu/src/nodes/transpose.cpp @@ -76,7 +76,7 @@ void Transpose::initSupportedPrimitiveDescriptors() { config.inConfs[INPUT_ORDER_IDX].constant(isInputOrderConst); config.inConfs[INPUT_ORDER_IDX].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( Precision::I32, getInputShapeAtPort(INPUT_ORDER_IDX))); - config.outConfs[0].inPlace(-1); + config.outConfs[0].inPlace(isOptimized ? 0 : -1); config.outConfs[0].constant(false); transpose_context = std::make_shared(context, getImplPriority()); @@ -125,7 +125,7 @@ void Transpose::initSupportedPrimitiveDescriptors() { } bool Transpose::isExecutable() const { - return !isInputTensorAtPortEmpty(0); + return !isInputTensorAtPortEmpty(0) && !isOptimized; } bool Transpose::needPrepareParams() const { @@ -133,6 +133,9 @@ bool Transpose::needPrepareParams() const { } void Transpose::prepareParams() { + if (isOptimized) + return; + if (performAsReorder) { // Transpose(order={0,3,1,2}) can be performed as Reorder(acdb=>abcd) auto srcMemPtr = getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(); @@ -191,6 +194,9 @@ void Transpose::prepareParams() { } void Transpose::createPrimitive() { + if (isOptimized) + return; + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto srcMemPtr = getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) @@ -223,6 +229,9 @@ void Transpose::createPrimitive() { } void Transpose::execute(dnnl::stream strm) { + if (isOptimized) + return; + if (prim) { prim.execute(strm, primArgs); } else if (execPtr) { diff --git a/src/plugins/intel_cpu/src/nodes/transpose.h b/src/plugins/intel_cpu/src/nodes/transpose.h index 5fb7e9f76570bf..0712e48c65d33e 100644 --- a/src/plugins/intel_cpu/src/nodes/transpose.h +++ b/src/plugins/intel_cpu/src/nodes/transpose.h @@ -38,6 +38,10 @@ class Transpose : public Node { bool needPrepareParams() const override; void prepareParams() override; + void setOptimized(bool isOptimized) { + this->isOptimized = isOptimized; + } + protected: void executeDynamicImpl(dnnl::stream strm) override; std::shared_ptr transpose_context; @@ -56,6 +60,7 @@ class Transpose : public Node { static constexpr size_t INPUT_ORDER_IDX = 1lu; bool performAsReorder = false; + bool isOptimized = false; }; } // namespace node diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp index 6e4d2cc61ae35b..35ea7736d92cc0 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp @@ -113,12 +113,13 @@ ov::intel_cpu::ConvertMatMulToFC::ConvertMatMulToFC() { std::swap(*(transpose_order.end() - 1), *(transpose_order.end() - 2)); auto transpose_const = ngraph::op::v0::Constant::create(ngraph::element::i32, ngraph::Shape{ transpose_order.size() }, transpose_order); - auto transpose = ov::op::util::make_try_fold(node, transpose_const); + auto transpose = std::make_shared(node, transpose_const); if (!ngraph::is_type(transpose)) { new_ops.push_back(transpose_const); MatcherPass::register_new_node(transpose); } transpose->set_friendly_name(transpose_name); + ov::disable_constant_folding(transpose); new_ops.push_back(transpose); return transpose; }; diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 0cde3ca82831bf..f8356a8f793e76 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -30,6 +30,7 @@ #include "transformations/common_optimizations/augru_cell_fusion.hpp" #include "transformations/common_optimizations/common_optimizations.hpp" #include "transformations/common_optimizations/wrap_interpolate_into_transposes.hpp" +#include "transformations/common_optimizations/matmul_const_transposes_extraction.hpp" #include "transformations/control_flow/unroll_tensor_iterator.hpp" #include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp" #include "transformations/op_conversions/convert_batch_to_space.hpp" @@ -399,6 +400,7 @@ void Transformations::PreLpt(const std::vector& defaultPrecis CPU_DISABLE_PASS_COMMON(manager, ov::pass::ConvertTopK3); CPU_DISABLE_PASS_COMMON(manager, ov::pass::ConvertTopK11ToTopK3); CPU_DISABLE_PASS_COMMON(manager, ov::pass::HSwishDecomposition); + CPU_DISABLE_PASS_COMMON(manager, ov::pass::MatMulConstTransposesExtraction); CPU_DISABLE_PASS_X64(manager, ov::pass::HSigmoidDecomposition); CPU_DISABLE_PASS_X64(manager, ov::pass::ReduceL1Decomposition); diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/conversion.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/conversion.cpp index 3b475d3af39fd3..b4b9d7516cdaf5 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/conversion.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/conversion.cpp @@ -136,6 +136,14 @@ const std::vector& inShapes_4D_static() { return inShapes_4D_static; } +const std::vector& inShapes_7D_static() { + static const std::vector inShapes_7D_static = { + {{1, 2, 3, 4, 5, 6, 7}, {{1, 2, 3, 4, 5, 6, 7}}}, + {{1, 1, 1, 1, 1, 1080, 1920}, {{1, 1, 1, 1, 1, 1080, 1920}}}, + }; + return inShapes_7D_static; +} + const std::vector& inShapes_4D_dynamic() { static const std::vector inShapes_4D_dynamic = { { @@ -162,6 +170,32 @@ const std::vector& inShapes_4D_dynamic() { return inShapes_4D_dynamic; } +const std::vector& inShapes_7D_dynamic() { + static const std::vector inShapes_7D_dynamic = { + { + // dynamic + {{-1, -1, -1, -1, -1, -1, -1}}, + // target + { + {2, 4, 4, 4, 3, 3, 1}, + {2, 17, 5, 4, 3, 2, 1}, + {1, 2, 3, 4, 5, 6, 7} + } + }, + { + // dynamic + {{{1, 5}, {2, 22}, {2, 9}, {1, 4}, {1, 4}, {1, 4}, {1, 4}}}, + // target + { + {2, 17, 5, 4, 3, 1, 2}, + {5, 2, 3, 2, 4, 1, 3}, + {1, 10, 4, 1, 4, 2, 3}, + } + } + }; + return inShapes_7D_dynamic; +} + const std::vector& precisions() { static const std::vector precisions = { Precision::U8, diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/conversion.hpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/conversion.hpp index 10c331a0ff255d..8418a919a6e4d9 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/conversion.hpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/conversion.hpp @@ -38,6 +38,8 @@ class ConvertCPULayerTest : public testing::WithParamInterface& inShapes_4D_static(); const std::vector& inShapes_4D_dynamic(); + const std::vector& inShapes_7D_static(); + const std::vector& inShapes_7D_dynamic(); const std::vector& precisions(); } // namespace Conversion } // namespace CPULayerTestsDefinitions \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/arm/conversion.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/arm/conversion.cpp new file mode 100644 index 00000000000000..e7e8c25dee812c --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/arm/conversion.cpp @@ -0,0 +1,34 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "single_layer_tests/classes/conversion.hpp" +#include "shared_test_classes/single_layer/conversion.hpp" +#include "test_utils/cpu_test_utils.hpp" + +using namespace InferenceEngine; +using namespace CPUTestUtils; +using namespace ngraph::helpers; +using namespace ov::test; + +namespace CPULayerTestsDefinitions { +namespace Conversion { + +INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_7D_Dynamic, ConvertCPULayerTest, + ::testing::Combine( + ::testing::ValuesIn(inShapes_7D_dynamic()), + ::testing::ValuesIn(precisions()), + ::testing::ValuesIn(precisions()), + ::testing::Values(CPUSpecificParams({}, {}, {}, {}))), + ConvertCPULayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_7D_Static, ConvertCPULayerTest, + ::testing::Combine( + ::testing::ValuesIn(inShapes_7D_static()), + ::testing::ValuesIn(precisions()), + ::testing::ValuesIn(precisions()), + ::testing::Values(CPUSpecificParams({}, {}, {}, {}))), + ConvertCPULayerTest::getTestCaseName); + +} // namespace Conversion +} // namespace CPULayerTestsDefinitions \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/conversion.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/conversion.cpp index cf2af8b8a70bba..4a29a6d542cdcc 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/conversion.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/conversion.cpp @@ -29,7 +29,7 @@ std::vector memForm4D_dynamic = { CPUSpecificParams({nhwc}, {nhwc}, {}, expectedPrimitiveType()), }; -INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_Dynamic, ConvertCPULayerTest, +INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_4D_Dynamic, ConvertCPULayerTest, ::testing::Combine( ::testing::ValuesIn(inShapes_4D_dynamic()), ::testing::ValuesIn(precisions()), @@ -42,7 +42,7 @@ std::vector memForm4D_static_common = { CPUSpecificParams({nhwc}, {nhwc}, {}, {}), }; -INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest, ConvertCPULayerTest, +INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_4D_Static, ConvertCPULayerTest, ::testing::Combine( ::testing::ValuesIn(inShapes_4D_static()), ::testing::ValuesIn(precisions()), diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_dw_conv.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_dw_conv.cpp new file mode 100644 index 00000000000000..8416c34dd8aa93 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_dw_conv.cpp @@ -0,0 +1,55 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ngraph_functions/builders.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "test_utils/cpu_test_utils.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" + +using namespace ngraph; +namespace SubgraphTestsDefinitions { +class ConvDWConv : virtual public ov::test::SubgraphBaseTest { +protected: + void SetUp() override { + targetDevice = ov::test::utils::DEVICE_CPU; + const auto precision = ov::element::f32; + ov::test::InputShape input_shape{{}, {{1, 32, 112, 112}}}; + init_input_shapes({input_shape}); + + + auto params = ngraph::builder::makeDynamicParams(precision, inputDynamicShapes); + auto conv_weights = ngraph::builder::makeConstant(precision, std::vector{32, 32, 1, 1}, std::vector{}, true); + auto conv = ngraph::builder::makeConvolution(params[0], + conv_weights, + precision, + std::vector{1, 1}, + std::vector{1, 1}, + ov::CoordinateDiff{0, 0}, + ov::CoordinateDiff{0, 0}, + std::vector{1, 1}, + ngraph::op::PadType::EXPLICIT, + 32, + true); + + auto dw_conv_weights = ngraph::builder::makeConstant(precision, std::vector{32, 1, 1, 3, 3}, std::vector{}, true); + auto dw_conv = ngraph::builder::makeGroupConvolution(conv, + dw_conv_weights, + precision, + std::vector{1, 1}, + ov::CoordinateDiff{1, 1}, + ov::CoordinateDiff{1, 1}, + std::vector{1, 1}, + ngraph::op::PadType::EXPLICIT); + auto bias_const = ngraph::builder::makeConstant(precision, {1, 32 , 1, 1}, std::vector{}, true); + auto bias = std::make_shared(dw_conv, bias_const); + function = std::make_shared(bias, params, "ConvDWConv"); + } +}; + +TEST_F(ConvDWConv, smoke_CompareWithRefs) { + run(); +} + +} // namespace SubgraphTestsDefinitions diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_decompress_convert.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_decompress_convert.cpp index 81a7def84adaa6..bc2082d98366f0 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_decompress_convert.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_decompress_convert.cpp @@ -14,9 +14,11 @@ using namespace ov::test; namespace SubgraphTestsDefinitions { -/* This test checks that the ConvertMatMulToFC transformation should work and the MatMul node is converted to the FC node. - * The Convert node should be removed on the CPU plugin side. +/* This test checks MatMul weights constant folding on CPU plugin side and cover two optimizations: + 1. Decompressing Convert FP16 -> FP32 CF (FuseFCAndConvertOnWeights in cpu graph optimizer) + 2. Transpose CF (FuseFCAndTransposeOnWeights in cpu graph optimizer) + * 1. Graph with decompressing Convert FP16 -> FP32. The Convert node should be removed on the CPU plugin side. * Graph before: ------------ ------------ |Input(f32)| |Input(f16)| @@ -46,12 +48,46 @@ namespace SubgraphTestsDefinitions { -------- |Output| -------- + + * 2. Graph with Transpose. In case of (transpose_b == false), ConvertMatMulToFC() transformation should insert Transpose on weights. + * It must not fold and must remain in the execution graph. + * Graph before: + ------------ ------------ + |Input(f32)| |Input(f32)| + ------------ ------------ + | | + ------------------------------------- + | MatMul(transpose_b == false) | + ------------------------------------- + | + -------- + |Output| + -------- + + * Exec graph: + ------------ ------------ + |Input(f32)| |Input(f32)| + ------------ ------------ + | | + | ------------- + | | Transpose | + | ------------- + | | + ---------------------------- + | FullyConnected | + ---------------------------- + | + -------- + |Output| + -------- */ using MatMulDecompressConvertParams = std::tuple< - std::vector, // input shapes - std::pair, // transposeA, transposeB - std::map // additional config + std::vector, // input shapes + std::pair, // transposeA, transposeB + ElementType, // weights precision + std::map, // additional config + CPUSpecificParams >; class MatMulDecompressConvertTest : public testing::WithParamInterface, @@ -60,9 +96,11 @@ class MatMulDecompressConvertTest : public testing::WithParamInterface obj) { std::vector inputShapes; std::pair transpose; + ElementType weiElemType; std::map additionalConfig; + CPUSpecificParams cpuParams; - std::tie(inputShapes, transpose, additionalConfig) = obj.param; + std::tie(inputShapes, transpose, weiElemType, additionalConfig, cpuParams) = obj.param; std::ostringstream result; for (const auto& shape : inputShapes) { @@ -82,12 +120,16 @@ class MatMulDecompressConvertTest : public testing::WithParamInterface std::string { auto it = rtInfo.find(paramName); IE_ASSERT(rtInfo.end() != it); @@ -110,8 +152,8 @@ class MatMulDecompressConvertTest : public testing::WithParamInterfaceget_ops()) { if (getExecValue(fcNode->get_rt_info(), ExecGraphInfoSerialization::LAYER_TYPE) == "FullyConnected") { const auto &constNode = fcNode->get_input_node_shared_ptr(1); - ASSERT_EQ(getExecValue(constNode->get_rt_info(), ExecGraphInfoSerialization::LAYER_TYPE), "Const"); - ASSERT_EQ(getExecValue(constNode->get_rt_info(), ExecGraphInfoSerialization::OUTPUT_PRECISIONS), "FP16"); + element::Type expectedType(getExecValue(constNode->get_rt_info(), ExecGraphInfoSerialization::OUTPUT_PRECISIONS)); + ASSERT_EQ(expectedType, weiConstElemType); } } } @@ -122,14 +164,19 @@ class MatMulDecompressConvertTest : public testing::WithParamInterface inputShapes; std::pair transpose; std::map additionalConfig; + CPUSpecificParams cpuParams; - std::tie(inputShapes, transpose, additionalConfig) = this->GetParam(); + std::tie(inputShapes, transpose, weiConstElemType, additionalConfig, cpuParams) = this->GetParam(); + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; init_input_shapes(inputShapes); bool transpA = transpose.first; bool transpB = transpose.second; + if (transpA) transposesCount++; + if (!transpB) transposesCount++; + if (transpA) { transposeShape(inputDynamicShapes[0]); for (auto& shapes : targetStaticShapes) { @@ -148,32 +195,47 @@ class MatMulDecompressConvertTest : public testing::WithParamInterface(params)); - - auto matrixB = ngraph::builder::makeConstant(element::f16, inShapeB.get_shape(), {}, true); - auto convert = std::make_shared(matrixB, inType); - mark_as_decompression(convert); - auto matMul = builder::makeMatMul(paramOuts[0], convert, transpA, transpB); + std::shared_ptr inputB = builder::makeConstant(weiConstElemType, inShapeB.get_shape(), {}, true); + if (weiConstElemType == ElementType::f16) { + inputB = std::make_shared(inputB, convertOutType); + mark_as_decompression(inputB); + } + auto matMul = builder::makeMatMul(paramOuts[0], inputB, transpA, transpB); function = CPUTestsBase::makeNgraphFunction(netType, params, matMul, cpuNodeType); } + + void CheckExecutionGraph() { + CheckPluginRelatedResults(compiledModel, "FullyConnected"); + CheckNumberOfNodesWithType(compiledModel, "FullyConnected", 1); + CheckNumberOfNodesWithType(compiledModel, "Transpose", transposesCount); + CheckNumberOfNodesWithType(compiledModel, "Convert", 0); + CheckNumberOfNodesWithType(compiledModel, "Reorder", 0); + CheckFCWeightsPrecision(); + } + + size_t transposesCount = 0; + ElementType weiConstElemType = ElementType::f32; }; TEST_P(MatMulDecompressConvertTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); run(); - CheckNumberOfNodesWithType(compiledModel, "FullyConnected", 1); - CheckNumberOfNodesWithType(compiledModel, "Convert", 0); - CheckNumberOfNodesWithType(compiledModel, "Reorder", 0); - CheckConstFP16(); + CheckExecutionGraph(); } namespace { @@ -185,32 +247,148 @@ const std::vector> transposeParams = { {true, true}, }; +const std::vector> inputShapes2D = { + static_shapes_to_test_representation({{2, 3}, {3, 4}}), + { + {{-1, -1}, {{2, 3}, {5, 3}}}, + {{3, 4}, {{3, 4}, {3, 4}}} + }, +}; + +const std::vector> inputShapes3D = { + static_shapes_to_test_representation({{2, 2, 3}, {3, 4}}), + static_shapes_to_test_representation({{2, 3}, {1, 3, 4}}), + static_shapes_to_test_representation({{1, 2, 3}, {1, 3, 4}}), + { + {{-1, -1, -1}, {{2, 2, 3}, {3, 5, 3}}}, + {{3, 4}, {{3, 4}, {3, 4}}} + }, + { + {{-1, -1}, {{2, 3}, {5, 3}}}, + {{1, 3, 4}, {{1, 3, 4}, {1, 3, 4}}} + }, + { + {{-1, -1, -1}, {{1, 2, 3}, {1, 5, 3}}}, + {{1, 3, 4}, {{1, 3, 4}, {1, 3, 4}}} + }, +}; + + std::vector> filterAdditionalConfig() { std::vector> additionalConfig; +#ifndef OV_CPU_WITH_MLAS additionalConfig.push_back(std::map{/* empty config */}); +#endif + return additionalConfig; +} + +std::vector> filterAdditionalConfig_BF16() { + std::vector> additionalConfig; if (with_cpu_x86_avx512_core()) { additionalConfig.push_back({{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}); } + return additionalConfig; +} +std::vector> filterAdditionalConfig_MLAS() { + std::vector> additionalConfig; + additionalConfig.push_back(std::map{/* empty config */}); return additionalConfig; } +std::vector filterSpecificParams() { + std::vector specificParams; + if (with_cpu_x86_avx512_core()) { + specificParams.push_back(CPUSpecificParams{{}, {}, {"brgemm_avx512"}, "brgemm_avx512"}); + } else if (with_cpu_x86_avx2()) { + specificParams.push_back(CPUSpecificParams{{}, {}, {"brgemm_avx2"}, "brgemm_avx2"}); + } + return specificParams; +} + + +std::vector filterSpecificParams_BF16() { + std::vector specificParams; + specificParams.push_back(CPUSpecificParams{{}, {}, {"jit_gemm"}, "jit_gemm"}); + return specificParams; +} + + +std::vector filterSpecificParams_MLAS() { + std::vector specificParams; + specificParams.push_back(CPUSpecificParams{{}, {}, {"gemm_mlas"}, "gemm_mlas"}); + return specificParams; +} + + +#ifdef OV_CPU_WITH_MLAS +const auto testParams2D_MLAS_smoke = ::testing::Combine( + ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(transposeParams), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(filterAdditionalConfig_MLAS()), + ::testing::ValuesIn(filterSpecificParams_MLAS())); + +INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_MLAS, MatMulDecompressConvertTest, testParams2D_MLAS_smoke, + MatMulDecompressConvertTest::getTestCaseName); +#endif + + const auto testParams2D_smoke = ::testing::Combine( - ::testing::Values(static_shapes_to_test_representation({{2, 3}, {3, 4}})), - ::testing::ValuesIn(transposeParams), - ::testing::ValuesIn(filterAdditionalConfig())); + ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(transposeParams), + ::testing::Values(ElementType::f32, ElementType::f16), + ::testing::ValuesIn(filterAdditionalConfig()), + ::testing::ValuesIn(filterSpecificParams())); INSTANTIATE_TEST_SUITE_P(smoke_FC_2D, MatMulDecompressConvertTest, testParams2D_smoke, - MatMulDecompressConvertTest::getTestCaseName); + MatMulDecompressConvertTest::getTestCaseName); + + +const auto testParams2D_BF16_smoke = ::testing::Combine( + ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(transposeParams), + ::testing::Values(ElementType::f32, ElementType::f16), + ::testing::ValuesIn(filterAdditionalConfig_BF16()), + ::testing::ValuesIn(filterSpecificParams_BF16())); + +INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_BF16, MatMulDecompressConvertTest, testParams2D_BF16_smoke, + MatMulDecompressConvertTest::getTestCaseName); + + +#ifdef OV_CPU_WITH_MLAS +const auto testParams3D_MLAS_smoke = ::testing::Combine( + ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(transposeParams), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(filterAdditionalConfig_MLAS()), + ::testing::ValuesIn(filterSpecificParams_MLAS())); + +INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_MLAS, MatMulDecompressConvertTest, testParams3D_MLAS_smoke, + MatMulDecompressConvertTest::getTestCaseName); +#endif + const auto testParams3D_smoke = ::testing::Combine( - ::testing::Values(static_shapes_to_test_representation({{1, 2, 3}, {3, 4}}), - static_shapes_to_test_representation({{2, 3}, {1, 3, 4}})), - ::testing::ValuesIn(transposeParams), - ::testing::ValuesIn(filterAdditionalConfig())); + ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(transposeParams), + ::testing::Values(ElementType::f32, ElementType::f16), + ::testing::ValuesIn(filterAdditionalConfig()), + ::testing::ValuesIn(filterSpecificParams())); INSTANTIATE_TEST_SUITE_P(smoke_FC_3D, MatMulDecompressConvertTest, testParams3D_smoke, - MatMulDecompressConvertTest::getTestCaseName); + MatMulDecompressConvertTest::getTestCaseName); + + +const auto testParams3D_BF16_smoke = ::testing::Combine( + ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(transposeParams), + ::testing::Values(ElementType::f32, ElementType::f16), + ::testing::ValuesIn(filterAdditionalConfig_BF16()), + ::testing::ValuesIn(filterSpecificParams_BF16())); + +INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_BF16, MatMulDecompressConvertTest, testParams3D_BF16_smoke, + MatMulDecompressConvertTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_cpu/tests/unit/ngraph_transformations/convert_matmul_test.cpp b/src/plugins/intel_cpu/tests/unit/ngraph_transformations/convert_matmul_test.cpp index 9574a6c76d27d5..b6cf23a024f1d1 100644 --- a/src/plugins/intel_cpu/tests/unit/ngraph_transformations/convert_matmul_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/ngraph_transformations/convert_matmul_test.cpp @@ -35,10 +35,14 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest1) { } { auto input1 = std::make_shared(ngraph::element::f32, ngraph::Shape{ 3, 2, 2 }); - auto transpose_constant = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{ 3 }, { 0, 2, 1 }); - auto transpose = std::make_shared(input1, transpose_constant); + auto transpose_constant1 = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{ 3 }, { 0, 2, 1 }); + auto transpose1 = std::make_shared(input1, transpose_constant1); + auto input2 = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{ 2, 2 }, { 1 }); - auto matmul = std::make_shared(transpose, input2, ngraph::Rank(3)); + auto transpose_constant2 = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{ 2 }, { 1, 0 }); + auto transpose2 = std::make_shared(input2, transpose_constant2); + + auto matmul = std::make_shared(transpose1, transpose2, ngraph::Rank(3)); function_ref = std::make_shared(ngraph::NodeVector{ matmul }, ngraph::ParameterVector{ input1 }); } @@ -311,8 +315,12 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_decompress_convert_0) { } { auto input1 = std::make_shared(ngraph::element::f32, ngraph::Shape{ 3, 2, 2 }); - auto input2 = ngraph::opset1::Constant::create(ngraph::element::f16, ngraph::Shape{2, 2 }, { 1 }); - auto convert = std::make_shared(input2, ngraph::element::f32); + + auto input2 = ngraph::opset1::Constant::create(ngraph::element::f16, ngraph::Shape{ 2, 2 }, { 1 }); + auto transpose_constant = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{ 2 }, { 1, 0 }); + auto transpose = std::make_shared(input2, transpose_constant); + auto convert = std::make_shared(transpose, ngraph::element::f32); + auto matmul = std::make_shared(input1, convert, ngraph::Rank(3)); function_ref = std::make_shared(ngraph::NodeVector{ matmul }, ngraph::ParameterVector{ input1 }); @@ -332,11 +340,15 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_decompress_convert_1) { } { auto input1 = std::make_shared(ngraph::element::f32, ngraph::Shape{ 3, 2, 2 }); - auto transpose_constant = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{ 3 }, { 0, 2, 1 }); - auto transpose = std::make_shared(input1, transpose_constant); - auto input2 = ngraph::opset1::Constant::create(ngraph::element::f16, ngraph::Shape{2, 2 }, { 1 }); - auto convert = std::make_shared(input2, ngraph::element::f32); - auto matmul = std::make_shared(transpose, convert, ngraph::Rank(3)); + auto transpose_constant1 = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{ 3 }, { 0, 2, 1 }); + auto transpose1 = std::make_shared(input1, transpose_constant1); + + auto input2 = ngraph::opset1::Constant::create(ngraph::element::f16, ngraph::Shape{ 2, 2 }, { 1 }); + auto transpose_constant2 = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{ 2 }, { 1, 0 }); + auto transpose2 = std::make_shared(input2, transpose_constant2); + auto convert = std::make_shared(transpose2, ngraph::element::f32); + + auto matmul = std::make_shared(transpose1, convert, ngraph::Rank(3)); function_ref = std::make_shared(ngraph::NodeVector{ matmul }, ngraph::ParameterVector{ input1 }); } diff --git a/src/plugins/intel_cpu/thirdparty/mlas b/src/plugins/intel_cpu/thirdparty/mlas index 1d68240b511432..c215a2c97b47ed 160000 --- a/src/plugins/intel_cpu/thirdparty/mlas +++ b/src/plugins/intel_cpu/thirdparty/mlas @@ -1 +1 @@ -Subproject commit 1d68240b5114326604c3f5af47ac1c098e30b254 +Subproject commit c215a2c97b47ed01be7675f75bc463c820ac6e6d diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp index 82efa923997c9b..106b7696ccec40 100644 --- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -100,5 +100,7 @@ std::vector disabledTestPatterns() { R"(.*smoke_MultiHeteroOVGetMetricPropsTest.*OVGetMetricPropsTest.*(AVAILABLE_DEVICES|OPTIMIZATION_CAPABILITIES|RANGE_FOR_ASYNC_INFER_REQUESTS|RANGE_FOR_STREAMS).*)", // TODO: Issue: 111556 R"(.*SplitConvTest.CompareWithRefImpl.*IS=\(1.(128|256)\).*IC=4.*OC=4.*configItem=GNA_DEVICE_MODE_GNA_SW_FP32)", + // TODO: Issue: 114149 + R"(.*smoke_Decompose2DConv.*)", }; } diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp index 746028e26cd535..9f52d033a687de 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp @@ -283,6 +283,7 @@ struct program { bool is_body_program; std::unique_ptr _impls_cache; const size_t _impls_cache_capacity = 10000; + const int _num_async_build_threads = 1; std::unique_ptr _compilation_context; std::map> nodes_map; diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/batch_to_space.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/batch_to_space.hpp index 023209d5ebe013..fab49940fbf5f3 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/batch_to_space.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/batch_to_space.hpp @@ -61,18 +61,32 @@ struct batch_to_space : public primitive_base { block_shape(block_shape), crops_begin(crops_begin), crops_end(crops_end), - out_size(out_size) {} + out_size(out_size), + shape_constant(1) {} + + batch_to_space(const primitive_id& id, + const std::vector& inputs, + const tensor& out_size, + const padding& output_padding = padding()) + : primitive_base(id, inputs, {output_padding}), + block_shape(tensor()), + crops_begin(tensor()), + crops_end(tensor()), + out_size(out_size), + shape_constant(0) {} tensor block_shape; tensor crops_begin; tensor crops_end; tensor out_size; + int64_t shape_constant; size_t hash() const override { size_t seed = primitive::hash(); seed = hash_combine(seed, block_shape.hash()); seed = hash_combine(seed, crops_begin.hash()); seed = hash_combine(seed, crops_end.hash()); + seed = hash_combine(seed, shape_constant); return seed; } @@ -93,6 +107,7 @@ struct batch_to_space : public primitive_base { ob << crops_begin; ob << crops_end; ob << out_size; + ob << shape_constant; } void load(BinaryInputBuffer& ib) override { @@ -101,6 +116,7 @@ struct batch_to_space : public primitive_base { ib >> crops_begin; ib >> crops_end; ib >> out_size; + ib >> shape_constant; } }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/random_uniform.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/random_uniform.hpp index 3bdc74faea71d2..02c86cb80d0af1 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/random_uniform.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/random_uniform.hpp @@ -20,8 +20,7 @@ struct random_uniform : public primitive_base { random_uniform() : primitive_base("", {}), global_seed(0), op_seed(0), - output_shape{}, - output_format(format::type::any) {} + output_shape{} {} DECLARE_OBJECT_TYPE_SERIALIZATION @@ -36,20 +35,26 @@ struct random_uniform : public primitive_base { */ random_uniform(const primitive_id &id, const std::vector &inputs, const data_types &data_type, const uint64_t global_seed, - const uint64_t op_seed, const tensor output_shape, - const format output_format, + const uint64_t op_seed, const ov::Shape output_shape, const padding &output_padding = padding()) : primitive_base(id, inputs, {output_padding}, {optional_data_type{data_type}}), global_seed(global_seed), op_seed(op_seed), - output_shape(output_shape), - output_format(output_format) {} + output_shape(output_shape) {} + + random_uniform(const primitive_id &id, const std::vector &inputs, + const data_types &data_type, const uint64_t global_seed, + const uint64_t op_seed, const padding &output_padding = padding()) + : primitive_base(id, inputs, {output_padding}, + {optional_data_type{data_type}}), + global_seed(global_seed), + op_seed(op_seed), + output_shape() {} const uint64_t global_seed; const uint64_t op_seed; - const tensor output_shape; - const format output_format; + const ov::Shape output_shape; size_t hash() const override { size_t seed = primitive::hash(); @@ -73,17 +78,13 @@ struct random_uniform : public primitive_base { ob << global_seed; ob << op_seed; ob << output_shape; - ob << make_data(&output_format.value, sizeof(format::type)); } void load(BinaryInputBuffer& ib) override { primitive_base::load(ib); ib >> *const_cast(&global_seed); ib >> *const_cast(&op_seed); - ib >> *const_cast(&output_shape); - format::type tmp_type = format::type::any; - ib >> make_data(&tmp_type, sizeof(format::type)); - *const_cast(&output_format) = format(tmp_type); + ib >> *const_cast(&output_shape); } }; diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/space_to_batch.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/space_to_batch.hpp index 14eae7b2e20f7b..d7cf654bab30b7 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/space_to_batch.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/space_to_batch.hpp @@ -58,18 +58,32 @@ struct space_to_batch : public primitive_base { block_shape(block_shape), pads_begin(pads_begin), pads_end(pads_end), - out_size(out_size) {} + out_size(out_size), + shape_constant(1) {} + + space_to_batch(const primitive_id& id, + const std::vector& inputs, + const tensor& out_size, + const padding& output_padding = padding()) + : primitive_base(id, inputs, {output_padding}), + block_shape(tensor()), + pads_begin(tensor()), + pads_end(tensor()), + out_size(out_size), + shape_constant(0) {} tensor block_shape; tensor pads_begin; tensor pads_end; tensor out_size; + int64_t shape_constant; size_t hash() const override { size_t seed = primitive::hash(); seed = hash_combine(seed, block_shape.hash()); seed = hash_combine(seed, pads_begin.hash()); seed = hash_combine(seed, pads_end.hash()); + seed = hash_combine(seed, shape_constant); return seed; } @@ -81,7 +95,8 @@ struct space_to_batch : public primitive_base { return block_shape == rhs_casted.block_shape && pads_begin == rhs_casted.pads_begin && - pads_end == rhs_casted.pads_end; + pads_end == rhs_casted.pads_end && + shape_constant == rhs_casted.shape_constant; } void save(BinaryOutputBuffer& ob) const override { @@ -90,6 +105,7 @@ struct space_to_batch : public primitive_base { ob << pads_begin; ob << pads_end; ob << out_size; + ob << shape_constant; } void load(BinaryInputBuffer& ib) override { @@ -98,6 +114,7 @@ struct space_to_batch : public primitive_base { ib >> pads_begin; ib >> pads_end; ib >> out_size; + ib >> shape_constant; } }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp index d6365b691382d4..febf41a557f6e9 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp @@ -94,40 +94,42 @@ class debug_configuration { public: static const char *prefix; - int help; // Print help messages - int verbose; // Verbose execution - int verbose_color; // Print verbose color - int list_layers; // Print list layers - int print_multi_kernel_perf; // Print execution time of each kernel in multi-kernel primitimive - int disable_usm; // Disable usm usage - int disable_onednn; // Disable onednn for discrete GPU (no effect for integrated GPU) - int disable_onednn_opt_post_ops; // Disable onednn optimize post operators - std::string dump_profiling_data; // Enables dump of extended performance profiling to specified dir - std::string dump_graphs; // Dump optimized graph - std::string dump_sources; // Dump opencl sources - std::string dump_layers_path; // Enable dumping intermediate buffers and set the dest path - std::vector dump_layers; // Dump intermediate buffers of specified layers only - std::string dry_run_path; // Dry run and serialize execution graph into the specified path - int dump_layers_dst_only; // Dump only output of layers - int dump_layers_result; // Dump result layers - int dump_layers_input; // Dump input layers - int dump_layers_limit_batch; // Limit the size of batch to dump - int dump_layers_raw; // Dump raw data. - int dump_layers_binary; // Dump binary data. - int dump_runtime_memory_pool; // Dump memory pool status at each iteration - int base_batch_for_memory_estimation; // Base batch size to be used in memory estimation - std::vector after_proc; // Start inference after the listed processes - int serialize_compile; // Serialize creating primitives and compiling kernels - std::vector forced_impl_types; // Force implementation type either ocl or onednn - int max_kernels_per_batch; // Maximum number of kernels in a batch during compiling kernels - int disable_async_compilation; // Disable async compilation - int disable_dynamic_impl; // Disable dynamic implementation - int disable_runtime_buffer_fusing; // Disable runtime buffer fusing - int disable_memory_reuse; // Disable memmory reuse among layers - int disable_build_time_weight_reorder_for_dynamic_nodes; // Disable build time weight reordering for dynamic nodes - int disable_runtime_skip_reorder; // Disable runtime skip reorder - std::set dump_iteration; // Dump n-th execution of network. - std::vector load_layers_raw_dump; // List of layers to load dumped raw binary and filenames + int help; // Print help messages + int verbose; // Verbose execution + int verbose_color; // Print verbose color + int list_layers; // Print list layers + int print_multi_kernel_perf; // Print execution time of each kernel in multi-kernel primitimive + int disable_usm; // Disable usm usage + int disable_onednn; // Disable onednn for discrete GPU (no effect for integrated GPU) + int disable_onednn_opt_post_ops; // Disable onednn optimize post operators + std::string dump_profiling_data; // Enables dump of extended performance profiling to specified dir + std::string dump_graphs; // Dump optimized graph + std::string dump_sources; // Dump opencl sources + std::string dump_layers_path; // Enable dumping intermediate buffers and set the dest path + std::vector dump_layers; // Dump intermediate buffers of specified layers only + std::string dry_run_path; // Dry run and serialize execution graph into the specified path + int dump_layers_dst_only; // Dump only output of layers + int dump_layers_result; // Dump result layers + int dump_layers_input; // Dump input layers + int dump_layers_limit_batch; // Limit the size of batch to dump + int dump_layers_raw; // Dump raw data. + int dump_layers_binary; // Dump binary data. + int dump_runtime_memory_pool; // Dump memory pool status at each iteration + int base_batch_for_memory_estimation; // Base batch size to be used in memory estimation + std::vector after_proc; // Start inference after the listed processes + int serialize_compile; // Serialize creating primitives and compiling kernels + std::vector forced_impl_types; // Force implementation type either ocl or onednn + int max_kernels_per_batch; // Maximum number of kernels in a batch during compiling kernels + int disable_async_compilation; // Disable async compilation + int disable_winograd_conv; // Disable Winograd conv + int disable_dynamic_impl; // Disable dynamic implementation + int disable_runtime_buffer_fusing; // Disable runtime buffer fusing + int disable_memory_reuse; // Disable memmory reuse among layers + int disable_build_time_weight_reorder_for_dynamic_nodes; // Disable build time weight reordering for dynamic nodes + int disable_runtime_skip_reorder; // Disable runtime skip reorder + int disable_primitive_fusing; // Disable primitive fusing + std::set dump_iteration; // Dump n-th execution of network. + std::vector load_layers_raw_dump; // List of layers to load dumped raw binary and filenames static const debug_configuration *get_instance(); std::vector get_filenames_for_matched_layer_loading_binaries(const std::string& id) const; std::string get_name_for_dump(const std::string& file_name) const; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp index 2e6ceedbee7105..647a10581b4b65 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp @@ -9,8 +9,6 @@ #include "event.hpp" #include "engine_configuration.hpp" -#include "ngraph/runtime/host_tensor.hpp" - #include #ifdef ENABLE_ONEDNN_FOR_GPU @@ -247,16 +245,4 @@ inline std::vector read_vector(cldnn::memory::ptr mem, const cldnn::stream& s return out_vecs; } -inline std::shared_ptr make_host_tensor(layout l, void* memory_pointer) { - ov::element::Type et = data_type_to_element_type(l.data_type); - - return std::make_shared(et, l.get_shape(), memory_pointer); -} - -inline ov::Tensor make_tensor(layout l, void* memory_pointer) { - ov::element::Type et = data_type_to_element_type(l.data_type); - - return ov::Tensor(et, l.get_shape(), memory_pointer); -} - } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/tensor_accessor.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/tensor_accessor.hpp new file mode 100644 index 00000000000000..1072c1bdf7fe80 --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/tensor_accessor.hpp @@ -0,0 +1,89 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/runtime/tensor.hpp" +#include "tensor_data_accessor.hpp" + +#include "memory.hpp" +#include "layout.hpp" + +namespace cldnn { + +inline ov::Tensor make_tensor(const layout& l, void* memory_pointer) { + ov::element::Type et = data_type_to_element_type(l.data_type); + + return ov::Tensor(et, l.get_shape(), memory_pointer); +} + +struct TensorsContainer final { + using MemoryMap = std::unordered_map; + using TensorsMap = std::unordered_map; + + TensorsContainer(const cldnn::stream* stream, const std::map& deps_map = {}) + : m_stream(stream) + , m_memories(deps_map.begin(), deps_map.end()) { } + + ~TensorsContainer() { + for (auto& port : m_locked_memories) { + m_memories.at(port)->unlock(*m_stream); + } + } + + void emplace(size_t port, cldnn::memory::ptr mem) { + m_memories.emplace(port, mem); + } + + void emplace(size_t port, const ov::Tensor& tensor) { + auto res = m_tensors.emplace(port, tensor); + OPENVINO_ASSERT(res.first != m_tensors.end()); + } + + template + void emplace(size_t port, std::vector& vector, data_types dt = data_types::i64) { + ov::Shape shape{vector.size()}; + auto tensor = make_tensor({shape, dt, format::bfyx}, static_cast(vector.data())); + m_tensors.emplace(port, tensor); + } + + size_t size() const { return m_tensors.size(); } + ov::Tensor operator[](std::size_t port) const { + if (m_memories.count(port) > 0) { + m_locked_memories.insert(port); + auto mem = m_memories.at(port); + auto ptr = mem->lock(*m_stream, cldnn::mem_lock_type::read); + return make_tensor(mem->get_layout(), ptr); + } else if (m_tensors.count(port) > 0) { + return m_tensors.at(port); + } else { + OPENVINO_THROW("[GPU] Can't get tensor for ", port, " port!\n"); + } + } + +private: + const cldnn::stream* m_stream; + MemoryMap m_memories; + TensorsMap m_tensors; + + mutable std::set m_locked_memories = {}; +}; + +class TensorAccessor final : public ov::ITensorAccessor { +public: + explicit TensorAccessor(const TensorsContainer& container) : m_container(container) { } + + ov::Tensor operator()(size_t port) const override { + return m_container[port]; + } + +private: + const TensorsContainer& m_container; +}; + +inline cldnn::TensorAccessor make_tensor_accessor(const TensorsContainer& c) { + return cldnn::TensorAccessor(c); +} + +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/CMakeLists.txt b/src/plugins/intel_gpu/src/graph/CMakeLists.txt index efefc017cc6a0e..1f46bdfa82bb48 100644 --- a/src/plugins/intel_gpu/src/graph/CMakeLists.txt +++ b/src/plugins/intel_gpu/src/graph/CMakeLists.txt @@ -33,10 +33,9 @@ target_include_directories(${TARGET_NAME} PUBLIC target_compile_options(${TARGET_NAME} PRIVATE $<$:$,/Os,-Os>>) -target_link_libraries(${TARGET_NAME} PUBLIC OpenCL::OpenCL) +target_link_libraries(${TARGET_NAME} PUBLIC OpenCL::OpenCL ov_shape_inference) target_link_libraries(${TARGET_NAME} PRIVATE openvino_intel_gpu_kernels openvino_intel_gpu_runtime - ov_shape_inference openvino::itt openvino::runtime::dev openvino::runtime) diff --git a/src/plugins/intel_gpu/src/graph/arg_max_min.cpp b/src/plugins/intel_gpu/src/graph/arg_max_min.cpp index 28e3d41cae04d4..47303ece86cd82 100644 --- a/src/plugins/intel_gpu/src/graph/arg_max_min.cpp +++ b/src/plugins/intel_gpu/src/graph/arg_max_min.cpp @@ -87,17 +87,17 @@ std::vector arg_max_min_inst::calc_output_layouts(arg_max_min_node const auto& constant_mem = impl_param.memory_deps; if (desc->top_k > 0) { - std::map const_data; + std::unordered_map const_data; auto topk = desc->top_k; - auto top_k_tensor = std::make_shared(ov::element::u32, ov::Shape{1}, static_cast(&topk)); + auto top_k_tensor = ov::Tensor(ov::element::u32, ov::Shape{1}, static_cast(&topk)); const_data = { {1, top_k_tensor} }; output_shapes = ov::op::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); } else if (constant_mem.count(1)) { - std::map const_data; + std::unordered_map const_data; auto target_shape_mem = constant_mem.at(1); cldnn::mem_lock target_shape_lock(target_shape_mem, impl_param.get_stream()); - const_data.emplace(1, make_host_tensor(target_shape_mem->get_layout(), target_shape_lock.data())); + const_data.emplace(1, make_tensor(target_shape_mem->get_layout(), target_shape_lock.data())); output_shapes = ov::op::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); } else { diff --git a/src/plugins/intel_gpu/src/graph/batch_to_space.cpp b/src/plugins/intel_gpu/src/graph/batch_to_space.cpp index c6adbd34802467..b09f2bab6b9bbd 100644 --- a/src/plugins/intel_gpu/src/graph/batch_to_space.cpp +++ b/src/plugins/intel_gpu/src/graph/batch_to_space.cpp @@ -10,6 +10,8 @@ #include #include +#include "batch_to_space_shape_inference.hpp" + namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(batch_to_space) @@ -66,6 +68,94 @@ layout batch_to_space_inst::calc_output_layout(batch_to_space_node const& node, return layout{output_type, input_format, desc->out_size}; } +static std::vector tensor_to_vec(const tensor& t, const format f) { + std::vector vec(cldnn::format::dimension(f)); + for (size_t i = 0; i < vec.size(); ++i) { + vec[i] = t.sizes()[i]; + } + std::reverse(vec.begin() + 2, vec.end()); + return vec; +} + +template +std::vector batch_to_space_inst::calc_output_layouts(batch_to_space_node const& /*node*/, const kernel_impl_params& impl_param) { + auto desc = impl_param.typed_desc(); + auto input0_layout = impl_param.get_input_layout(0); + auto input0_shape = input0_layout.get(); + auto input0_size = input0_shape.size(); + auto input0_format = input0_layout.format; + + auto& constant_mem = impl_param.memory_deps; + auto block_data = desc->block_shape; + auto begin_data = desc->crops_begin; + auto end_data = desc->crops_end; + + auto output_type = desc->output_data_types[0].value_or(input0_layout.data_type); + if (impl_param.has_fused_primitives()) + output_type = impl_param.get_fused_output_layout().data_type; + + if (desc->shape_constant == 0 && (!constant_mem.count(1) || !constant_mem.count(2) || !constant_mem.count(3))) { + auto out_shape = ov::PartialShape::dynamic(input0_size); + return { layout{out_shape, output_type, input0_format } }; + } + + ShapeType block_shape = desc->shape_constant == 0 ? impl_param.get_input_layout(1).get() : ov::Shape{ input0_size }; + ShapeType begin_shape = desc->shape_constant == 0 ? impl_param.get_input_layout(2).get() : ov::Shape{ input0_size }; + ShapeType end_shape = desc->shape_constant == 0 ? impl_param.get_input_layout(3).get() : ov::Shape{ input0_size }; + + ov::op::v1::BatchToSpace op; + std::vector output_shapes = {ShapeType{}}; + std::vector input_shapes = { + input0_shape, + block_shape, + begin_shape, + end_shape + }; + + std::unordered_map const_data; + if (desc->shape_constant) { + auto block_sizes = tensor_to_vec(block_data, input0_format); + auto begin_sizes = tensor_to_vec(begin_data, input0_format); + auto end_sizes = tensor_to_vec(end_data, input0_format); + + auto block_values = static_cast(block_sizes.data()); + auto begin_values = static_cast(begin_sizes.data()); + auto end_values = static_cast(end_sizes.data()); + + auto block_tensor = make_tensor({ block_shape, data_types::i32, input0_format }, block_values); + auto begin_tensor = make_tensor({ begin_shape, data_types::i32, input0_format }, begin_values); + auto end_tensor = make_tensor({ end_shape, data_types::i32, input0_format }, end_values); + + const_data.emplace(1, block_tensor); + const_data.emplace(2, begin_tensor); + const_data.emplace(3, end_tensor); + + output_shapes = ov::op::v1::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); + } else { + auto block_mem = constant_mem.at(1); + auto begin_mem = constant_mem.at(2); + auto end_mem = constant_mem.at(3); + + cldnn::mem_lock lock1(block_mem, impl_param.get_stream()); + cldnn::mem_lock lock2(begin_mem, impl_param.get_stream()); + cldnn::mem_lock lock3(end_mem, impl_param.get_stream()); + + auto block_tensor = make_tensor(block_mem->get_layout(), lock1.data()); + auto begin_tensor = make_tensor(begin_mem->get_layout(), lock2.data()); + auto end_tensor = make_tensor(end_mem->get_layout(), lock3.data()); + + const_data.emplace(1, block_tensor); + const_data.emplace(2, begin_tensor); + const_data.emplace(3, end_tensor); + + output_shapes = ov::op::v1::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); + } + + return { layout{output_shapes[0], output_type, input0_format} }; +} + +template std::vector batch_to_space_inst::calc_output_layouts(batch_to_space_node const& node, const kernel_impl_params& impl_param); + std::string batch_to_space_inst::to_string(batch_to_space_node const& node) { auto desc = node.get_primitive(); auto node_info = node.desc_to_json(); diff --git a/src/plugins/intel_gpu/src/graph/border.cpp b/src/plugins/intel_gpu/src/graph/border.cpp index 2f5eef40093c4d..2e662781b01d4a 100644 --- a/src/plugins/intel_gpu/src/graph/border.cpp +++ b/src/plugins/intel_gpu/src/graph/border.cpp @@ -3,6 +3,7 @@ // #include "border_inst.h" +#include "intel_gpu/runtime/tensor_accessor.hpp" #include "pad_shape_inference.hpp" #include "intel_gpu/runtime/error_handler.hpp" @@ -41,85 +42,55 @@ std::vector border_inst::calc_output_layouts(border_node const& /*node*/ output_type = impl_param.get_fused_output_layout().data_type; } + size_t in_rank = input0_layout.get_partial_shape().size(); + ov::op::v1::Pad op; op.set_pad_mode(desc->pad_mode); const bool is_begin_mem = (desc->non_constant_input_mask & border::PAD_NON_CONST_INPUT::BEGIN); const bool is_end_mem = (desc->non_constant_input_mask & border::PAD_NON_CONST_INPUT::END); - layout pads_begin_layout, pads_end_layout; - if (is_begin_mem) { - pads_begin_layout = impl_param.get_input_layout(1); - } - if (is_end_mem) { - pads_end_layout = is_begin_mem ? impl_param.get_input_layout(2) : impl_param.get_input_layout(1); + const size_t begin_mem_idx = is_begin_mem ? 1 : 0; + const size_t end_mem_idx = is_begin_mem ? 2 : 1; + + auto& memory_deps = impl_param.memory_deps; + if ((is_begin_mem && memory_deps.count(begin_mem_idx) == 0) || + (is_end_mem && memory_deps.count(end_mem_idx) == 0)) { + return {layout{ShapeType::dynamic(static_cast(in_rank)), input0_layout.data_type, input0_layout.format}}; } - ShapeType pads_begin_shape = is_begin_mem ? pads_begin_layout.get() : ov::Shape{ desc->pads_begin.size() }; - ShapeType pads_end_shape = is_end_mem ? pads_end_layout.get() : ov::Shape{ desc->pads_end.size() }; - std::vector output_shapes; + int64_t begin_size = desc->pads_begin.size(); + int64_t end_size = desc->pads_end.size(); + + layout pads_begin_layout = is_begin_mem ? impl_param.get_input_layout(begin_mem_idx) : layout({ begin_size }, data_types::i64, format::bfyx); + layout pads_end_layout = is_end_mem ? impl_param.get_input_layout(end_mem_idx) : layout({ end_size }, data_types::i64, format::bfyx); + std::vector input_shapes = { input0_layout.get(), - pads_begin_shape, - pads_end_shape, + pads_begin_layout.get(), + pads_end_layout.get(), }; - auto& memory_deps = impl_param.memory_deps; - std::map const_data; - auto ta = ov::make_tensor_accessor(const_data); - - if ((is_begin_mem && memory_deps.count(1)) && (is_end_mem && memory_deps.count(2))) { - auto pads_begin_mem = memory_deps.at(1); - cldnn::mem_lock pads_begin_lock(pads_begin_mem, impl_param.get_stream()); - const_data.emplace(1, make_host_tensor(pads_begin_mem->get_layout(), pads_begin_lock.data())); - - auto pads_end_mem = memory_deps.at(2); - cldnn::mem_lock pads_end_lock(pads_end_mem, impl_param.get_stream()); - const_data.emplace(2, make_host_tensor(pads_end_mem->get_layout(), pads_end_lock.data())); - - output_shapes = ov::op::shape_infer(&op, input_shapes, ta); - } else if ((is_begin_mem || is_end_mem) && memory_deps.count(1)) { - if (is_begin_mem) { - auto pads_begin_mem = memory_deps.at(1); - cldnn::mem_lock pads_begin_lock(pads_begin_mem, impl_param.get_stream()); - const_data.emplace(1, make_host_tensor(pads_begin_mem->get_layout(), pads_begin_lock.data())); - - auto pads_end_data = desc->pads_end; - auto pads_end_tensor = make_host_tensor({pads_end_shape, data_types::i64, format::bfyx}, static_cast(pads_end_data.data())); - const_data.emplace(2, pads_end_tensor); - - output_shapes = ov::op::shape_infer(&op, input_shapes, ta); - } else { - auto pads_begin_data = desc->pads_begin; - auto pads_begin_tensor = make_host_tensor({pads_begin_shape, data_types::i64, format::bfyx}, static_cast(pads_begin_data.data())); - const_data.emplace(1, pads_begin_tensor); - - auto pads_end_mem = memory_deps.at(1); - cldnn::mem_lock pads_end_lock(pads_end_mem, impl_param.get_stream()); - const_data.emplace(2, make_host_tensor(pads_end_mem->get_layout(), pads_end_lock.data())); - - output_shapes = ov::op::shape_infer(&op, input_shapes, ta); - } - } else { - std::ptrdiff_t val = desc->pad_value; + TensorsContainer const_data(&impl_param.get_stream()); - auto pads_begin_data = desc->pads_begin; - if (is_begin_mem && desc->pad_mode == ov::op::PadMode::CONSTANT) { - pads_begin_data = {val, val, val, val}; - } - auto pads_begin_tensor = make_host_tensor({pads_begin_shape, data_types::i64, format::bfyx}, static_cast(pads_begin_data.data())); - const_data.emplace(1, pads_begin_tensor); + auto pads_begin_data = desc->pads_begin; + auto pads_end_data = desc->pads_end; - auto pads_end_data = desc->pads_end; - if (is_end_mem && desc->pad_mode == ov::op::PadMode::CONSTANT) { - pads_end_data = {val, val, val, val}; - } - auto pads_end_tensor = make_host_tensor({pads_end_shape, data_types::i64, format::bfyx}, static_cast(pads_end_data.data())); - const_data.emplace(2, pads_end_tensor); + if (is_begin_mem) { + const_data.emplace(1, memory_deps.at(begin_mem_idx)); + } else { + const_data.emplace(1, make_tensor(pads_begin_layout, static_cast(pads_begin_data.data()))); + } - output_shapes = ov::op::shape_infer(&op, input_shapes, ta); + if (is_end_mem) { + const_data.emplace(2, memory_deps.at(end_mem_idx)); + } else { + const_data.emplace(2, make_tensor(pads_end_layout, static_cast(pads_end_data.data()))); } + auto ta = cldnn::make_tensor_accessor(const_data); + std::vector output_shapes = ov::op::shape_infer(&op, input_shapes, ta); + format output_format = format::adjust_to_rank(input0_layout.format, output_shapes[0].size()); return { layout{output_shapes[0], output_type, output_format} }; diff --git a/src/plugins/intel_gpu/src/graph/broadcast.cpp b/src/plugins/intel_gpu/src/graph/broadcast.cpp index bc4eaab28a9816..fdce54766b26dd 100644 --- a/src/plugins/intel_gpu/src/graph/broadcast.cpp +++ b/src/plugins/intel_gpu/src/graph/broadcast.cpp @@ -61,11 +61,11 @@ std::vector broadcast_inst::calc_output_layouts(broadcast_node const& /* auto axes_mapping = desc->axes_mapping.to_vector(); ShapeType axes_mapping_shape = ov::Shape{axes_mapping.size()}; - std::map const_data; + std::unordered_map const_data; if (third_input_needed) { input_shapes.emplace_back(axes_mapping_shape); - auto axes_mapping_tensor = make_host_tensor({axes_mapping_shape, data_types::i64, format::bfyx}, + auto axes_mapping_tensor = make_tensor({axes_mapping_shape, data_types::i64, format::bfyx}, static_cast(axes_mapping.data())); const_data.emplace(2, axes_mapping_tensor); } @@ -74,12 +74,11 @@ std::vector broadcast_inst::calc_output_layouts(broadcast_node const& /* if (constant_mem.count(1)) { auto target_shape_mem = constant_mem.at(1); cldnn::mem_lock target_shape_lock(target_shape_mem, impl_param.get_stream()); - const_data.emplace(1, make_host_tensor(target_shape_mem->get_layout(), target_shape_lock.data())); + const_data.emplace(1, make_tensor(target_shape_mem->get_layout(), target_shape_lock.data())); output_shapes = ov::op::v3::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); } else if (impl_param.input_layouts.size() == 1) { // predefined pattern shape - auto target_shape_tensor = make_host_tensor({pattern_shape, data_types::i64, format::bfyx}, - static_cast(target_shape.data())); + auto target_shape_tensor = make_tensor({pattern_shape, data_types::i64, format::bfyx}, static_cast(target_shape.data())); const_data.emplace(1, target_shape_tensor); output_shapes = ov::op::v3::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); } else if (impl_param.input_layouts.size() >= 2) { diff --git a/src/plugins/intel_gpu/src/graph/compilation_context.cpp b/src/plugins/intel_gpu/src/graph/compilation_context.cpp index 128cd927a796b1..c1f483200c9a38 100644 --- a/src/plugins/intel_gpu/src/graph/compilation_context.cpp +++ b/src/plugins/intel_gpu/src/graph/compilation_context.cpp @@ -2,18 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // - #include "compilation_context.hpp" #include #include #include +#include #include "intel_gpu/runtime/utils.hpp" namespace cldnn { class CompilationContext : public ICompilationContext { public: CompilationContext(ov::threading::IStreamsExecutor::Config task_executor_config) : _task_executor_config(task_executor_config) { - _task_executor_config._streams = 4; _task_executor = std::make_shared(_task_executor_config); } @@ -21,11 +20,19 @@ class CompilationContext : public ICompilationContext { if (_stop_compilation) return; + auto promise = std::make_shared>(); + std::lock_guard lock(_mutex); + futures.emplace_back(promise->get_future()); + if (_task_keys.find(key) == _task_keys.end()) { _task_keys.insert(key); - if (_task_executor != nullptr) - _task_executor->run(task); + if (_task_executor != nullptr) { + _task_executor->run([task, promise] { + task(); + promise->set_value(); + }); + } } } @@ -61,12 +68,19 @@ class CompilationContext : public ICompilationContext { } } + void wait_all() override { + for (auto&& future : futures) { + future.wait(); + } + } + private: ov::threading::IStreamsExecutor::Config _task_executor_config; std::shared_ptr _task_executor; std::mutex _mutex; std::unordered_set _task_keys; std::atomic_bool _stop_compilation{false}; + std::vector> futures; }; std::unique_ptr ICompilationContext::create(ov::threading::IStreamsExecutor::Config task_executor_config) { diff --git a/src/plugins/intel_gpu/src/graph/crop.cpp b/src/plugins/intel_gpu/src/graph/crop.cpp index d3ea387f02c1c6..f103738baeb864 100644 --- a/src/plugins/intel_gpu/src/graph/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/crop.cpp @@ -56,27 +56,32 @@ std::vector crop_inst::calc_output_layouts(const crop_node& /*node*/, co // TODO: calling shape_infer for all cropped outpus is redundant... Need to optimize. if (desc->op_mode == cldnn::crop_ngraph_op_mode::variadic_split) { - std::map const_data; + std::unordered_map const_data; OPENVINO_ASSERT(impl_param.memory_deps.count(1) > 0, "[GPU] Can't find Crop(ngraph VariadicSplit op mode) axis values memory dependency"); auto axis_values_mem = impl_param.memory_deps.at(1); cldnn::mem_lock axis_values_mem_lock(axis_values_mem, impl_param.get_stream()); - const_data.emplace(1, make_host_tensor(axis_values_mem->get_layout(), axis_values_mem_lock.data())); - - OPENVINO_ASSERT(impl_param.memory_deps.count(2) > 0, "[GPU] Can't find Crop(ngraph VariadicSplit op mode) split length values memory dependency"); - auto split_length_mem = impl_param.memory_deps.at(2); - cldnn::mem_lock split_length_mem_lock(split_length_mem, impl_param.get_stream()); - const_data.emplace(2, make_host_tensor(split_length_mem->get_layout(), split_length_mem_lock.data())); - - ov::op::v1::VariadicSplit op; - output_shapes = shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); + const_data.emplace(1, make_tensor(axis_values_mem->get_layout(), axis_values_mem_lock.data())); + + if (impl_param.memory_deps.count(2) > 0) { + auto split_length_mem = impl_param.memory_deps.at(2); + cldnn::mem_lock split_length_mem_lock(split_length_mem, impl_param.get_stream()); + const_data.emplace(2, make_tensor(split_length_mem->get_layout(), split_length_mem_lock.data())); + + ov::op::v1::VariadicSplit op; + output_shapes = shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); + } else { + auto input0_layout = impl_param.get_input_layout(0); + auto out_shape = ov::PartialShape::dynamic(input0_layout.get_partial_shape().size()); + return { layout{out_shape, input0_layout.data_type, input0_layout.format } }; + } } else if (desc->op_mode == cldnn::crop_ngraph_op_mode::split) { - std::map const_data; + std::unordered_map const_data; OPENVINO_ASSERT(impl_param.memory_deps.count(1) > 0, "[GPU] Can't find Crop(ngraph Split op mode) axis values memory dependency"); auto axis_values_mem = impl_param.memory_deps.at(1); cldnn::mem_lock axis_values_mem_lock(axis_values_mem, impl_param.get_stream()); - const_data.emplace(1, make_host_tensor(axis_values_mem->get_layout(), axis_values_mem_lock.data())); + const_data.emplace(1, make_tensor(axis_values_mem->get_layout(), axis_values_mem_lock.data())); ov::op::v1::Split op; op.set_num_splits(desc->num_splits); diff --git a/src/plugins/intel_gpu/src/graph/gather.cpp b/src/plugins/intel_gpu/src/graph/gather.cpp index daa3416ad3235d..985ac85131dd05 100644 --- a/src/plugins/intel_gpu/src/graph/gather.cpp +++ b/src/plugins/intel_gpu/src/graph/gather.cpp @@ -88,8 +88,8 @@ std::vector gather_inst::calc_output_layouts(gather_node const& /*node*/ int64_t axis = desc->axis; - auto axis_tensor = std::make_shared(ov::element::i64, ov::Shape{1}, static_cast(&axis)); - std::map> const_data = {{2, axis_tensor}}; + auto axis_tensor = ov::Tensor(ov::element::i64, ov::Shape{1}, static_cast(&axis)); + std::unordered_map const_data = {{2, axis_tensor}}; output_shapes = ov::op::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); format output_format = format::adjust_to_rank(input0_layout.format, output_shapes[0].size()); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp index 57e973ecc4dc64..a11964c38787ae 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp @@ -9,6 +9,7 @@ #include "convert_color_inst.h" #include "fully_connected_inst.h" #include "assign_inst.h" +#include "mvn_inst.h" #include "tensor_type.h" #include @@ -26,11 +27,14 @@ If not than required reorder is added to the network. /* Add a reorder in between node and usr */ -void add_required_reorders::add_reorder(program& p, program_node* node, program_node* usr) { +void add_required_reorders::add_reorder(program& p, program_node* node, program_node* usr, bool keep_original_dt) { layout reorder_layout = node->get_output_layout(); reorder_layout.format = usr->get_output_layout().format; reorder_layout.data_type = usr->get_output_layout().data_type; + if (keep_original_dt) + reorder_layout.data_type = node->get_output_layout().data_type; + auto new_reorder = std::make_shared(node->id() + "_reorder_" + usr->id(), node->id(), reorder_layout); auto& new_reorder_node = p.get_or_create(new_reorder); new_reorder_node.set_output_layout(reorder_layout, false); @@ -75,14 +79,15 @@ void add_required_reorders::run(program& p) { } } - if (usr->is_type() && usr->is_in_shape_of_subgraph()) { + if (usr->is_type()) { for (size_t i = 0; i < usr->get_dependencies().size(); i++) { auto& dep = usr->get_dependency(i); if (!dep.is_in_data_flow() || dep.is_constant()) continue; auto dep_layout = dep.get_output_layout(); auto out_layout = usr->get_output_layout(); - bool required_reorder = out_layout.data_type != dep_layout.data_type; + bool required_reorder = (format::dimension(out_layout.format) != format::dimension(dep_layout.format)) || + (usr->is_in_shape_of_subgraph() && (out_layout.data_type != dep_layout.data_type)); if (required_reorder) { auto new_reorder = std::make_shared(dep.id() + "_reorder_" + usr->id(), dep.id(), out_layout.format, out_layout.data_type); auto& new_reorder_node = p.get_or_create(new_reorder); @@ -130,6 +135,22 @@ void add_required_reorders::run(program& p) { } } + // Remove padded-inputs in spatial axes not to use ref kernel which causes huge perf drop + if (usr->is_type() && usr->as().input().is_padded_spatial()) { + auto out_layout = usr->get_output_layout(); + // Check formats of implemented opt kernels without a spatial padding support + if (out_layout.format == format::b_fs_yx_fsv16 || out_layout.format == format::b_fs_zyx_fsv16 || + out_layout.format == format::bs_fs_yx_bsv32_fsv16 || out_layout.format == format::bs_fs_yx_bsv32_fsv32) { + auto& dep = usr->as().input(); + cldnn::layout layout_wo_padding = dep.get_output_layout(); + layout_wo_padding.data_padding = cldnn::padding{}; + auto new_reorder = std::make_shared(dep.id() + "_no_pad_reorder", dep.id(), layout_wo_padding); + auto& new_reorder_node = p.get_or_create(new_reorder); + p.add_intermediate(new_reorder_node, *usr, dep); + new_reorder_node.recalc_output_layout(false); + } + } + if (usr->type()->does_an_implementation_exist(*usr)) { if (usr->get_preferred_impl_type() != impl_types::onednn) { continue; @@ -358,8 +379,16 @@ void add_required_reorders::run(program& p) { continue; } - if (usr->get_output_layout() != node.first->get_output_layout()) - add_reorder(p, node.first, usr); + if (usr->get_output_layout() != node.first->get_output_layout()) { + // Preserve original data type to prevent Convolution input data type from changing + // in the following sequence: Node(U8, unsupported format) -> Conv(FP16, bfyx). + // Without this condition, inserted reorder will change Conv's input to FP16, instead of + // expected U8 format. + bool keep_original_dt = false; + if (usr->is_type()) + keep_original_dt = true; + add_reorder(p, node.first, usr, keep_original_dt); + } } } } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp index 3790b299a25cdb..5aade76ae00035 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp @@ -44,7 +44,7 @@ void post_optimize_weights::optimize_weights(T& node, program& p) { } // TODO: To relax current limitation w.r.t the future optimization of weight reorder process // In dynamic shape, selected weight format can change in runtime. However reordering blocked format to blocked format is not fully verified yet. - // So we need to enable other primiives such as convolution with verifying reorder b/w the possible layouts + // So we need to enable other primitives such as convolution with verifying reorder b/w the possible layouts // Also we skip weight reorder for onednn impl because onednn fully connected layer is using simple format, therefore // reordering to cldnn shape_agnostic_kernel's preferred blocked format at build time does not helpful for the performance. // This situation might be changed once onednn shape agnostic kernel is used in the future. diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp index fe9296db3cc95a..5cd3c68ff72ad6 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp @@ -117,7 +117,7 @@ bool concat_in_place_optimization::match(const program_node& concat_node, // if an input is marked as network output, prevent optimizations // which would affect a form of its output (unless debug flag is set), // we also need to restrict input types to those which support padding on all axis - if (pred.first->is_dynamic() && is_runtime) { + if (!pred.first->is_dynamic() || is_runtime) { if (!pred.first->is_padding_supported(concat_axis, lower_padd_in_axis)) return false; } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp index 7bc9953859da2b..499c8eee49fde5 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp @@ -155,7 +155,10 @@ void prepare_padding::run(program& p) { continue; auto conv = node.get_primitive(); - if (node.is_dynamic()) continue; + + if (node.is_dynamic() && !node.use_explicit_padding()) + continue; + auto& conv_input_node = node.get_dependency(0); auto conv_layout = node.get_output_layout(); @@ -197,7 +200,8 @@ void prepare_padding::run(program& p) { layout filter_layout = filter_node.get_output_layout().convert_to_weights_layout(conv->grouped_weights_shape); // Compute initial required paddings for primitive used as input for convolution. - auto pad = conv->padding_begin; + auto padding_begin = conv->padding_begin; + auto padding_end = conv->padding_end; auto stride = conv->stride; auto dilation = conv->dilation; uint32_t stride_z = stride.size() >= 3 ? static_cast(stride[stride.size() - 3]) : 1; @@ -208,23 +212,40 @@ void prepare_padding::run(program& p) { uint32_t dilation_y = dilation.size() >= 2 ? static_cast(dilation[dilation.size() - 2]) : 1; uint32_t dilation_x = dilation.size() >= 1 ? static_cast(dilation[dilation.size() - 1]) : 1; - tensor::value_type pad_z = pad.size() >= 3 ? pad[pad.size() - 3] : 0; - tensor::value_type pad_y = pad.size() >= 2 ? pad[pad.size() - 2] : 0; - tensor::value_type pad_x = pad.size() >= 1 ? pad[pad.size() - 1] : 0; - - auto input_limit_x = -pad_x + (conv_layout.spatial(0) - 1) * stride_x + - (filter_layout.spatial(0) - 1) * dilation_x + 1; - auto input_limit_y = -pad_y + (conv_layout.spatial(1) - 1) * stride_y + - (filter_layout.spatial(1) - 1) * dilation_y + 1; - auto input_limit_z = -pad_z + (conv_layout.spatial(2) - 1) * stride_z + - (filter_layout.spatial(2) - 1) * dilation_z + 1; - - auto padding_begin_x = std::max(pad_x, 0); - auto padding_begin_y = std::max(pad_y, 0); - auto padding_begin_z = std::max(pad_z, 0); - auto padding_end_x = std::max(input_limit_x - prev_prim_output_layout.spatial(0), 0); - auto padding_end_y = std::max(input_limit_y - prev_prim_output_layout.spatial(1), 0); - auto padding_end_z = std::max(input_limit_z - prev_prim_output_layout.spatial(2), 0); + tensor::value_type pad_z = padding_begin.size() >= 3 ? padding_begin[padding_begin.size() - 3] : 0; + tensor::value_type pad_y = padding_begin.size() >= 2 ? padding_begin[padding_begin.size() - 2] : 0; + tensor::value_type pad_x = padding_begin.size() >= 1 ? padding_begin[padding_begin.size() - 1] : 0; + + tensor::value_type padding_begin_x, padding_begin_y, padding_begin_z; + tensor::value_type padding_end_x, padding_end_y, padding_end_z; + + if (node.is_dynamic() && node.use_explicit_padding()) { + padding_begin_x = std::max(pad_x, 0); + padding_begin_y = std::max(pad_y, 0); + padding_begin_z = std::max(pad_z, 0); + + pad_z = padding_end.size() >= 3 ? padding_end[padding_end.size() - 3] : 0; + pad_y = padding_end.size() >= 2 ? padding_end[padding_end.size() - 2] : 0; + pad_x = padding_end.size() >= 1 ? padding_end[padding_end.size() - 1] : 0; + + padding_end_x = std::max(pad_x, 0); + padding_end_y = std::max(pad_y, 0); + padding_end_z = std::max(pad_z, 0); + } else { + auto input_limit_x = -pad_x + (conv_layout.spatial(0) - 1) * stride_x + + (filter_layout.spatial(0) - 1) * dilation_x + 1; + auto input_limit_y = -pad_y + (conv_layout.spatial(1) - 1) * stride_y + + (filter_layout.spatial(1) - 1) * dilation_y + 1; + auto input_limit_z = -pad_z + (conv_layout.spatial(2) - 1) * stride_z + + (filter_layout.spatial(2) - 1) * dilation_z + 1; + + padding_begin_x = std::max(pad_x, 0); + padding_begin_y = std::max(pad_y, 0); + padding_begin_z = std::max(pad_z, 0); + padding_end_x = std::max(input_limit_x - prev_prim_output_layout.spatial(0), 0); + padding_end_y = std::max(input_limit_y - prev_prim_output_layout.spatial(1), 0); + padding_end_z = std::max(input_limit_z - prev_prim_output_layout.spatial(2), 0); + } // Adjust right padding, so entire buffer size in X dimension is properly aligned. // TODO: NOTE: Will be reenabled with next check-in once heuristic for line-aligned algorithm will be added. diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp index a482925dfd8bec..e4318334b17060 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp @@ -46,8 +46,8 @@ struct range_impl : public typed_primitive_impl { auto ev = stream.create_user_event(false); auto params = instance.get_impl_params(); - ov::HostTensorVector input_host_tensors; - ov::HostTensorVector output_host_tensors; + ov::TensorVector input_host_tensors; + ov::TensorVector output_host_tensors; std::vector input_mem_ptrs; for (size_t i = 0; i < instance.dependencies().size(); i++) @@ -58,9 +58,9 @@ struct range_impl : public typed_primitive_impl { cldnn::mem_lock output_lock(output_mem_ptr, stream); for (size_t i = 0; i < input_mem_ptrs.size(); i++) - input_host_tensors.push_back(make_host_tensor(params->input_layouts[i], input_mem_ptrs[i]->lock(stream, mem_lock_type::read))); + input_host_tensors.push_back(make_tensor(params->input_layouts[i], input_mem_ptrs[i]->lock(stream, mem_lock_type::read))); - output_host_tensors.push_back(make_host_tensor(params->output_layouts[0], output_lock.data())); + output_host_tensors.push_back(make_tensor(params->output_layouts[0], output_lock.data())); if (!op) { const auto output_dt = params->get_output_layout().data_type; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/batch_to_space.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/batch_to_space.cpp index 3960b73e797465..629930b85d4390 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/batch_to_space.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/batch_to_space.cpp @@ -26,9 +26,34 @@ struct batch_to_space_impl : typed_primitive_impl_ocl { auto params = get_default_params(impl_param); auto optional_params = get_default_optional_params(impl_param.get_program()); - params.block_shape = convert_dim_vector(primitive->block_shape); - params.crops_begin = convert_dim_vector(primitive->crops_begin); - params.crops_end = convert_dim_vector(primitive->crops_end); + if (primitive->shape_constant) { + params.block_type = kernel_selector::base_params::ArgType::Constant; + params.block_shape = convert_dim_vector(primitive->block_shape); + + params.begin_type = kernel_selector::base_params::ArgType::Constant; + params.crops_begin = convert_dim_vector(primitive->crops_begin); + + params.end_type = kernel_selector::base_params::ArgType::Constant; + params.crops_end = convert_dim_vector(primitive->crops_end); + } else { + params.block_input_index = 1; + params.block_type = kernel_selector::base_params::ArgType::Input; + auto block_layout = impl_param.get_input_layout(params.block_input_index); + params.inputs.push_back(convert_data_tensor(block_layout)); + params.block_dims = block_layout.count(); + + params.begin_input_index = 2; + params.begin_type = kernel_selector::base_params::ArgType::Input; + auto begin_layout = impl_param.get_input_layout(params.begin_input_index); + params.inputs.push_back(convert_data_tensor(begin_layout)); + params.begin_dims = begin_layout.count(); + + params.end_input_index = 3; + params.end_type = kernel_selector::base_params::ArgType::Input; + auto end_layout = impl_param.get_input_layout(params.end_input_index); + params.inputs.push_back(convert_data_tensor(end_layout)); + params.end_dims = end_layout.count(); + } return {params, optional_params}; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp index 16072a5e53ee2c..185071ac889669 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp @@ -85,6 +85,7 @@ struct convolution_impl : typed_primitive_impl_ocl { ov::CoordinateDiff pads_begin(primitive->padding_begin.begin(), primitive->padding_begin.end()); ov::CoordinateDiff pads_end(primitive->padding_end.begin(), primitive->padding_end.end()); const auto auto_pad = primitive->auto_pad; + conv_params.has_explicit_paddings = primitive->auto_pad == ov::op::PadType::EXPLICIT; if (auto_pad == ov::op::PadType::SAME_UPPER || auto_pad == ov::op::PadType::SAME_LOWER) { pads_begin.clear(); pads_end.clear(); @@ -110,10 +111,15 @@ struct convolution_impl : typed_primitive_impl_ocl { uint32_t kz = weights_layout.spatial(2); conv_params.filterSize = { kx, ky, kz }; - uint32_t pad_z = std::max(pads_begin.size() >= 3 ? pads_begin[pads_begin.size() - 3] : 0, 0); - uint32_t pad_y = std::max(pads_begin.size() >= 2 ? pads_begin[pads_begin.size() - 2] : 0, 0); - uint32_t pad_x = std::max(pads_begin.size() >= 1 ? pads_begin[pads_begin.size() - 1] : 0, 0); - conv_params.padding = {pad_x, pad_y, pad_z}; + uint32_t pad_begin_z = std::max(pads_begin.size() >= 3 ? pads_begin[pads_begin.size() - 3] : 0, 0); + uint32_t pad_begin_y = std::max(pads_begin.size() >= 2 ? pads_begin[pads_begin.size() - 2] : 0, 0); + uint32_t pad_begin_x = std::max(pads_begin.size() >= 1 ? pads_begin[pads_begin.size() - 1] : 0, 0); + conv_params.padding_begin = {pad_begin_x, pad_begin_y, pad_begin_z}; + + uint32_t pad_end_z = std::max(pads_end.size() >= 3 ? pads_end[pads_end.size() - 3] : 0, 0); + uint32_t pad_end_y = std::max(pads_end.size() >= 2 ? pads_end[pads_end.size() - 2] : 0, 0); + uint32_t pad_end_x = std::max(pads_end.size() >= 1 ? pads_end[pads_end.size() - 1] : 0, 0); + conv_params.padding_end = {pad_end_x, pad_end_y, pad_end_z}; uint32_t stride_z = stride.size() >= 3 ? static_cast(stride[stride.size() - 3]) : 1; uint32_t stride_y = stride.size() >= 2 ? static_cast(stride[stride.size() - 2]) : 1; @@ -183,7 +189,7 @@ struct convolution_impl : typed_primitive_impl_ocl { } } conv_params.filterSize = { ky, kx, kz }; - conv_params.padding = {pad_y, pad_x, pad_z}; + conv_params.padding_begin = {pad_begin_y, pad_begin_x, pad_begin_z}; conv_params.stride = {stride_y, stride_x, stride_z}; conv_params.dilation = {dilation_y, dilation_x, dilation_z}; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp index a3c57be1b83fc4..9184701b8e70a5 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp @@ -100,7 +100,7 @@ struct deformable_interp_impl : typed_primitive_impl_ocl { uint32_t pad_y = std::max(pad.size() >= 2 ? pad[pad.size() - 2] : 0, 0); uint32_t pad_x = std::max(pad.size() >= 1 ? pad[pad.size() - 1] : 0, 0); - params.padding = {pad_x, pad_y, pad_z}; + params.padding_begin = {pad_x, pad_y, pad_z}; uint32_t stride_z = stride.size() >= 3 ? static_cast(stride[stride.size() - 3]) : 1; uint32_t stride_y = stride.size() >= 2 ? static_cast(stride[stride.size() - 2]) : 1; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp index b2ab89d3b11ae4..ca60e4a16a369a 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp @@ -572,6 +572,8 @@ kernel_selector::weights_layout to_weights_layout(format f, bool is_grouped) { return kernel_selector::weights_layout::gs_oizyx_gsv32; case format::gyxio: return kernel_selector::weights_layout::gyxio; + case format::gi_yxs_os_yxsv2_osv16: + return kernel_selector::weights_layout::gi_yxs_os_yxsv2_osv16; case format::g_is_os_zyx_isv16_osv16: return kernel_selector::weights_layout::g_is_os_zyx_isv16_osv16; case format::g_is_os_yx_isv16_osv16: diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/space_to_batch.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/space_to_batch.cpp index 688bdcdb8ceda7..141116e04beef3 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/space_to_batch.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/space_to_batch.cpp @@ -22,17 +22,47 @@ struct space_to_batch_impl : typed_primitive_impl_ocl { return make_unique(*this); } - static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) { + static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { const auto& primitive = impl_param.typed_desc(); auto params = get_default_params(impl_param); auto optional_params = get_default_optional_params(impl_param.get_program()); - params.block_shape = convert_dim_vector(primitive->block_shape); - params.pads_begin = convert_dim_vector(primitive->pads_begin); - params.pads_end = convert_dim_vector(primitive->pads_end); + if (primitive->shape_constant) { + params.block_type = kernel_selector::base_params::ArgType::Constant; + params.block_shape = convert_dim_vector(primitive->block_shape); + + params.begin_type = kernel_selector::base_params::ArgType::Constant; + params.pads_begin = convert_dim_vector(primitive->pads_begin); + + params.end_type = kernel_selector::base_params::ArgType::Constant; + params.pads_end = convert_dim_vector(primitive->pads_end); + } else { + params.block_input_index = 1; + params.block_type = kernel_selector::base_params::ArgType::Input; + auto block_layout = impl_param.get_input_layout(params.block_input_index); + params.inputs.push_back(convert_data_tensor(block_layout)); + params.block_dims = block_layout.count(); + + params.begin_input_index = 2; + params.begin_type = kernel_selector::base_params::ArgType::Input; + auto begin_layout = impl_param.get_input_layout(params.begin_input_index); + params.inputs.push_back(convert_data_tensor(begin_layout)); + params.begin_dims = begin_layout.count(); + + params.end_input_index = 3; + params.end_type = kernel_selector::base_params::ArgType::Input; + auto end_layout = impl_param.get_input_layout(params.end_input_index); + params.inputs.push_back(convert_data_tensor(end_layout)); + params.end_dims = end_layout.count(); + } return {params, optional_params}; } + + void update_dispatch_data(const kernel_impl_params& impl_param) override { + auto kernel_params = get_kernel_params(impl_param, true); + (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data); + } }; namespace detail { diff --git a/src/plugins/intel_gpu/src/graph/include/batch_to_space_inst.h b/src/plugins/intel_gpu/src/graph/include/batch_to_space_inst.h index 6ea68a6607a490..dc37e7a1f4e09e 100644 --- a/src/plugins/intel_gpu/src/graph/include/batch_to_space_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/batch_to_space_inst.h @@ -10,6 +10,16 @@ namespace cldnn { +template <> +struct typed_program_node : public typed_program_node_base { + using parent = typed_program_node_base; + +public: + using parent::parent; + + program_node& input(size_t index = 0) const { return get_dependency(index); } + std::vector get_shape_infer_dependencies() const override { return {1, 2, 3}; } +}; using batch_to_space_node = typed_program_node; template <> @@ -18,8 +28,20 @@ class typed_primitive_inst : public typed_primitive_inst_base + static std::vector calc_output_layouts(batch_to_space_node const& /*node*/, const kernel_impl_params& impl_param); static layout calc_output_layout(batch_to_space_node const& node, kernel_impl_params const& impl_param); static std::string to_string(batch_to_space_node const& node); + + bool need_reset_output_memory() const override { + const auto desc = _impl_params->typed_desc(); + if (desc->shape_constant) { + return true; + } + + return false; + } + typed_primitive_inst(network& network, batch_to_space_node const& desc); }; diff --git a/src/plugins/intel_gpu/src/graph/include/binary_convolution_inst.h b/src/plugins/intel_gpu/src/graph/include/binary_convolution_inst.h index 4ec2f8dcf32da8..1a910bcf81e499 100644 --- a/src/plugins/intel_gpu/src/graph/include/binary_convolution_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/binary_convolution_inst.h @@ -42,12 +42,12 @@ class typed_primitive_inst : public typed_primitive_inst_bas static std::string to_string(binary_convolution_node const& node); typed_primitive_inst(network& network, binary_convolution_node const& node); - bool need_reset_input_memory() const override { + bool need_reset_input_memory(size_t idx = 0) const override { + if (idx != 0) + return false; + auto input_layout = _deps[0].first->_impl_params->get_output_layout(0); - if (input_layout.data_padding) { - return true; - } - return false; + return input_layout.data_padding ? true : false; } bool need_reset_output_memory() const override { diff --git a/src/plugins/intel_gpu/src/graph/include/compilation_context.hpp b/src/plugins/intel_gpu/src/graph/include/compilation_context.hpp index e5c157cabdef8c..be8d65c6aa5ecc 100644 --- a/src/plugins/intel_gpu/src/graph/include/compilation_context.hpp +++ b/src/plugins/intel_gpu/src/graph/include/compilation_context.hpp @@ -19,6 +19,7 @@ class ICompilationContext { virtual ~ICompilationContext() = default; virtual bool is_stopped() = 0; virtual void cancel() = 0; + virtual void wait_all() = 0; static std::unique_ptr create(ov::threading::IStreamsExecutor::Config task_executor_config); }; diff --git a/src/plugins/intel_gpu/src/graph/include/convolution_inst.h b/src/plugins/intel_gpu/src/graph/include/convolution_inst.h index 6266dae364496e..7033df0905ac4c 100644 --- a/src/plugins/intel_gpu/src/graph/include/convolution_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/convolution_inst.h @@ -128,12 +128,12 @@ class typed_primitive_inst : public typed_primitive_inst_base_impl_params->get_output_layout(0); - if (input_layout.data_padding) { - return true; - } - return false; + return input_layout.data_padding ? true : false; } bool need_reset_output_memory() const override { diff --git a/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h b/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h index a35c43a58f38d0..195c00d22c3b67 100644 --- a/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h @@ -60,12 +60,12 @@ class typed_primitive_inst : public typed_primitive_inst_base_impl_params->get_output_layout(0); - if (input_layout.data_padding) { - return true; - } - return false; + return input_layout.data_padding ? true : false; } bool need_reset_output_memory() const override { diff --git a/src/plugins/intel_gpu/src/graph/include/pass_manager.h b/src/plugins/intel_gpu/src/graph/include/pass_manager.h index 2ea466764d5131..de9ec170096d22 100644 --- a/src/plugins/intel_gpu/src/graph/include/pass_manager.h +++ b/src/plugins/intel_gpu/src/graph/include/pass_manager.h @@ -57,7 +57,7 @@ class add_required_reorders : public base_pass { private: void run(program& p) override; - void add_reorder(program& p, program_node* node, program_node* usr); + void add_reorder(program& p, program_node* node, program_node* usr, bool keep_original_dt = false); }; class add_reshape_to_primitives : public base_pass { diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h index 8148ebb9343336..3244ad20b49869 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h @@ -8,6 +8,7 @@ #include "intel_gpu/runtime/event.hpp" #include "intel_gpu/runtime/memory.hpp" #include "intel_gpu/runtime/lru_cache.hpp" +#include "intel_gpu/runtime/tensor_accessor.hpp" #include "intel_gpu/graph/network.hpp" #include "intel_gpu/runtime/utils.hpp" #include "program_node.h" @@ -351,6 +352,7 @@ class primitive_inst { virtual void update_shape(); virtual event::ptr update_weights(); + bool use_async_compilation(); // if primitive_inst doesn't replace impl to new impl(static impl with opt kerenl or dynamic impl), return false bool update_impl(); event::ptr realloc_if_needed(); @@ -387,17 +389,26 @@ class primitive_inst { return { layout(in_layout.get(), output_type, in_layout.format) }; } - virtual bool need_reset_input_memory() const { + virtual bool need_reset_input_memory(size_t) const { return false; } virtual bool need_reset_output_memory() const { - std::vector users; + std::vector> users; for (auto u : _node->get_users()) - users.push_back(u->id()); - - for (const auto& u : _network.get_primitives(users)) { - if (u->need_reset_input_memory()) + users.emplace_back(u->id(), u->get_dependency_index(*_node)); + + for (const auto& u : users) { + auto user_inst = _network.get_primitive(u.first); + // Check users of optimized_out inst, as the optimized out inst will not be able to + // reset it's memory + if (user_inst->can_be_optimized()) { + if (user_inst->need_reset_output_memory()) + return true; + continue; + } + + if (user_inst->need_reset_input_memory(u.second)) return true; } return false; diff --git a/src/plugins/intel_gpu/src/graph/include/program_node.h b/src/plugins/intel_gpu/src/graph/include/program_node.h index d54bba00412a55..fb254ba757888d 100644 --- a/src/plugins/intel_gpu/src/graph/include/program_node.h +++ b/src/plugins/intel_gpu/src/graph/include/program_node.h @@ -320,6 +320,8 @@ struct program_node { bool support_padding(int axis) const { return _support_padding_in_axis[axis]; } // Checks whether with current format specified padding is supported; bool is_padding_supported(int axis, int padding) const; + // Check if layout has padding in any spatial axis + bool is_padded_spatial(size_t idx = 0) const; primitive_id get_org_primitive_id() const { return org_id; } @@ -384,8 +386,16 @@ struct program_node { std::vector& get_fused_primitives() { return fused_prims; } #ifdef ENABLE_ONEDNN_FOR_GPU - const std::shared_ptr& get_onednn_primitive_attributes() const { return onednn_attrs; } - std::shared_ptr& get_onednn_primitive_attributes() { return onednn_attrs; } + const std::shared_ptr& get_onednn_primitive_attributes() const { + if (onednn_attrs == nullptr) + const_cast(this)->init_onednn_primitive_attributes(); + return onednn_attrs; + } + std::shared_ptr& get_onednn_primitive_attributes() { + if (onednn_attrs == nullptr) + init_onednn_primitive_attributes(); + return onednn_attrs; + } const std::vector& get_fused_primitives_onednn() const { return fused_prims_onednn; } std::vector& get_fused_primitives_onednn() { return fused_prims_onednn; } diff --git a/src/plugins/intel_gpu/src/graph/include/random_uniform_inst.h b/src/plugins/intel_gpu/src/graph/include/random_uniform_inst.h index 4768a702d0833b..fa44522d019bee 100644 --- a/src/plugins/intel_gpu/src/graph/include/random_uniform_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/random_uniform_inst.h @@ -9,6 +9,17 @@ namespace cldnn { +template <> +struct typed_program_node : public typed_program_node_base { + using parent = typed_program_node_base; + +public: + using parent::parent; + + program_node& input(size_t index = 0) const { return get_dependency(index); } + std::vector get_shape_infer_dependencies() const override { return {0}; } +}; + using random_uniform_node = typed_program_node; template<> @@ -17,6 +28,8 @@ class typed_primitive_inst : public typed_primitive_inst_base + static std::vector calc_output_layouts(random_uniform_node const& /*node*/, const kernel_impl_params& impl_param); static layout calc_output_layout(random_uniform_node const &node, kernel_impl_params const& impl_param); static std::string to_string(random_uniform_node const &node); diff --git a/src/plugins/intel_gpu/src/graph/include/reduce_inst.h b/src/plugins/intel_gpu/src/graph/include/reduce_inst.h index 438ef4a4bcd823..02809fb6b33275 100644 --- a/src/plugins/intel_gpu/src/graph/include/reduce_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/reduce_inst.h @@ -33,7 +33,10 @@ class typed_primitive_inst : public typed_primitive_inst_base { static layout calc_output_layout(reduce_node const& node, kernel_impl_params const& impl_param); static std::string to_string(reduce_node const& node); - bool need_reset_input_memory() const override { + bool need_reset_input_memory(size_t idx = 0) const override { + if (idx != 0) + return false; + auto input_layout = _deps[0].first->_impl_params->get_output_layout(_deps[0].second); if (!format::format::is_simple_data_format(input_layout.format) && input_layout.feature() % 16 != 0) { return true; diff --git a/src/plugins/intel_gpu/src/graph/include/space_to_batch_inst.h b/src/plugins/intel_gpu/src/graph/include/space_to_batch_inst.h index 9663677589fd94..e5cfd5087db9cb 100644 --- a/src/plugins/intel_gpu/src/graph/include/space_to_batch_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/space_to_batch_inst.h @@ -11,6 +11,17 @@ namespace cldnn { +template <> +struct typed_program_node : public typed_program_node_base { + using parent = typed_program_node_base; + +public: + using parent::parent; + + program_node& input(size_t index = 0) const { return get_dependency(index); } + std::vector get_shape_infer_dependencies() const override { return {1, 2, 3}; } +}; + using space_to_batch_node = typed_program_node; template <> @@ -19,9 +30,20 @@ class typed_primitive_inst : public typed_primitive_inst_base + static std::vector calc_output_layouts(space_to_batch_node const& /*node*/, const kernel_impl_params& impl_param); static layout calc_output_layout(space_to_batch_node const& node, kernel_impl_params const& impl_param); static std::string to_string(space_to_batch_node const& node); + bool need_reset_output_memory() const override { + const auto desc = _impl_params->typed_desc(); + if (!desc->shape_constant) { + return true; + } + + return false; + } + typed_primitive_inst(network& network, space_to_batch_node const& desc); }; diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index e27e66ee8ceeae..005b9a218233a5 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -496,11 +496,20 @@ bool layout_optimizer::can_fuse_reorder_to_prev(program_node& prev, reorder_node } namespace { -bool should_use_winograd_2x3_s1(std::shared_ptr const& prim, +bool should_use_winograd_2x3_s1(const convolution_node& node, layout const& input_layout, layout const& weights_layout, bool output_size_handling_enabled) { + bool disable_winograd_conv = node.get_program().get_config().get_property(ov::intel_gpu::disable_winograd_convolution); + if (disable_winograd_conv) + return false; + // cases when NOT to use winograd + GPU_DEBUG_GET_INSTANCE(debug_config); + GPU_DEBUG_IF(debug_config->disable_winograd_conv == 1) + return false; + + auto prim = node.get_primitive(); if (input_layout.data_type != data_types::f16 || input_layout.feature() % 64 != 0 // current algorithm is effective for ifm to be multiply of 64 || weights_layout.spatial(0) != 3 // weights have to be 3x3 by definiton @@ -589,7 +598,7 @@ bool layout_optimizer::convolution_byxf_opt(const layout& input_layout, all_zeroes(conv->padding_begin) && all_zeroes(conv->padding_end)) || // Winograd - should_use_winograd_2x3_s1(conv, input_layout, weights_layout, _output_size_handling_enabled)) + should_use_winograd_2x3_s1(node, input_layout, weights_layout, _output_size_handling_enabled)) return true; return false; @@ -1035,11 +1044,16 @@ format layout_optimizer::get_expected_format(convolution_node const& node) { auto output_layout = node.get_output_layout(0); auto weights_layout = node.weights().get_output_layout().convert_to_weights_layout(prim->grouped_weights_shape); auto expected_format = output_layout.format; + bool i8_u8_input = input_layout.data_type == data_types::u8 || input_layout.data_type == data_types::i8; if (prim->deformable_mode) { return format::adjust_to_rank(format::bfyx, output_layout.get_partial_shape().size()); } + // Use planar bfyx format for dynamic convolutions with explicit padding + if (node.is_dynamic() && output_layout.get_partial_shape().size() == 4 && node.use_explicit_padding() && !i8_u8_input) + return format::bfyx; + if (input_layout.is_dynamic() || output_layout.is_dynamic()) { if (input_layout.get_partial_shape().size() <= 4) expected_format = format::b_fs_yx_fsv16; @@ -1052,7 +1066,6 @@ format layout_optimizer::get_expected_format(convolution_node const& node) { bool onednn_valid_post_ops = get_post_ops_count(node) <= 32; bool use_onednn_impls = _optimization_attributes.use_onednn_impls && input_layout.data_type != data_types::f32; - bool i8_u8_input = input_layout.data_type == data_types::u8 || input_layout.data_type == data_types::i8; if (use_onednn_impls && onednn_valid_post_ops) { expected_format = node.get_preferred_output_fmt(); @@ -2000,7 +2013,7 @@ bool layout_optimizer::is_format_optimized(const convolution_node& node, const f case format::b_fs_yx_fsv16: return convolution_b_fs_yx_fsv16_opt(input_layout, output_layout, weights_layout, prim, use_weak_restrictions) && // Work-around for inability to use b_fs_yx_fsv16 and winograd together - !should_use_winograd_2x3_s1(prim, input_layout, weights_layout, _output_size_handling_enabled); + !should_use_winograd_2x3_s1(node, input_layout, weights_layout, _output_size_handling_enabled); case format::b_fs_zyx_fsv16: case format::bs_fs_zyx_bsv16_fsv16: return convolution_b_fs_zyx_fsv16_opt(input_layout, output_layout, weights_layout, prim); diff --git a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp index 0ed005788965d4..bc156ccc75fda4 100644 --- a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp @@ -38,12 +38,12 @@ std::vector non_max_suppression_inst::calc_output_layouts(non_max_suppre }; auto& memory_deps = impl_param.memory_deps; - std::map const_data; + std::unordered_map const_data; if (memory_deps.count(2)) { auto max_output_boxes_per_class_mem = memory_deps.at(2); cldnn::mem_lock max_output_boxes_per_class_lock(max_output_boxes_per_class_mem, impl_param.get_stream()); - auto max_output_boxes_per_class_tensor = make_host_tensor(max_output_boxes_per_class_mem->get_layout(), + auto max_output_boxes_per_class_tensor = make_tensor(max_output_boxes_per_class_mem->get_layout(), max_output_boxes_per_class_lock.data()); const_data.emplace(2, max_output_boxes_per_class_tensor); diff --git a/src/plugins/intel_gpu/src/graph/one_hot.cpp b/src/plugins/intel_gpu/src/graph/one_hot.cpp index 32dd93633bed0c..01116bd987c263 100644 --- a/src/plugins/intel_gpu/src/graph/one_hot.cpp +++ b/src/plugins/intel_gpu/src/graph/one_hot.cpp @@ -67,8 +67,8 @@ std::vector one_hot_inst::calc_output_layouts(const one_hot_node& /*node int64_t depth = desc->depth; - auto depth_tensor = std::make_shared(ov::element::i64, ov::Shape{1}, static_cast(&depth)); - std::map> const_data = { + auto depth_tensor = ov::Tensor(ov::element::i64, ov::Shape{1}, static_cast(&depth)); + std::unordered_map const_data = { {1, depth_tensor} }; std::vector output_shapes = diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index ed49310dc17016..730a62d53beaa1 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -19,6 +19,7 @@ #include "eltwise_inst.h" #include "deconvolution_inst.h" #include "shape_of_inst.h" +#include "softmax_inst.h" #include "gemm_inst.h" #include "assign_inst.h" #include "read_value_inst.h" @@ -349,7 +350,8 @@ void primitive_inst::update_shape() { _impl_params->memory_deps = memory_deps; auto update_output_layout = [&](layout& layout, size_t idx) { - layout.data_padding = padding::max(_node->get_primitive()->output_paddings[idx], layout.data_padding); + auto data_padding = padding::max(_impl_params->get_output_layout(idx).data_padding, layout.data_padding); + layout.data_padding = padding::max(_node->get_primitive()->output_paddings[idx], data_padding); if (_impl_params->get_output_layout(idx) != layout) { GPU_DEBUG_TRACE_DETAIL << id() << ": update shape: was: " << _impl_params->get_output_layout(idx).to_short_string() << " now: " << layout.to_short_string() << std::endl; @@ -436,7 +438,7 @@ event::ptr primitive_inst::realloc_if_needed() { _outputs[0]->set_reused(true); _outputs[0] = _network.get_engine().reinterpret_buffer(*_outputs[0], actual_layout); } - if (need_reset_output_memory()) { + if (need_reset_output_memory() && !can_be_optimized()) { ev = _outputs[0]->fill(_network.get_stream()); } } else { @@ -478,6 +480,17 @@ event::ptr primitive_inst::realloc_if_needed() { return ev; } +bool primitive_inst::use_async_compilation() { + GPU_DEBUG_GET_INSTANCE(debug_config); + GPU_DEBUG_IF(debug_config->disable_async_compilation) { + return false; + } + return (_node->is_type() || + _node->is_type() || + _node->is_type() || + _node->is_type()); +} + bool primitive_inst::update_impl() { GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::update_implementation); auto prev_impl_str = _impl != nullptr ? _impl->get_kernel_name() : "nullptr"; @@ -575,7 +588,8 @@ bool primitive_inst::update_impl() { o.data_padding.set_dynamic_pad(tensor(0)); } - auto& cache = get_network().get_program()->get_implementations_cache(); + const auto& prog = get_network().get_program(); + auto& cache = prog->get_implementations_cache(); std::shared_ptr cached_impl = nullptr; { cached_impl = cache.get(updated_params_no_dyn_pad); @@ -590,15 +604,8 @@ bool primitive_inst::update_impl() { } if (!cached_impl) { if (_dynamic_impl) { - auto use_async_compilation = [&]() { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_async_compilation) { - return false; - } - return true; - }; if (use_async_compilation()) { - auto& compilation_context = get_network().get_program()->get_compilation_context(); + auto& compilation_context = prog->get_compilation_context(); compilation_context.push_task(updated_params_no_dyn_pad, [this, &compilation_context, updated_params_no_dyn_pad]() { if (compilation_context.is_stopped()) return; @@ -611,10 +618,12 @@ bool primitive_inst::update_impl() { return; } - auto impl = _node->type()->choose_impl(*_node, updated_params_no_dyn_pad); if (!can_be_optimized()) { - auto kernels = _program->get_kernels_cache().compile(updated_params_no_dyn_pad, impl->get_kernels_source()); - impl->set_kernels(kernels); + auto impl = _node->type()->choose_impl(*_node, updated_params_no_dyn_pad); + if (impl->get_kernels_source().size() > 0) { + auto kernels = _program->get_kernels_cache().compile(updated_params_no_dyn_pad, impl->get_kernels_source()); + impl->set_kernels(kernels); + } cache.add(updated_params_no_dyn_pad, impl->clone()); } }); @@ -629,7 +638,7 @@ bool primitive_inst::update_impl() { _impl = _node->type()->choose_impl(*_node, updated_params_no_dyn_pad); _impl->set_node_params(*_node); if (!can_be_optimized()) { - auto& kernels_cache = get_network().get_program()->get_kernels_cache(); + auto& kernels_cache = prog->get_kernels_cache(); auto kernels = kernels_cache.compile(updated_params_no_dyn_pad, _impl->get_kernels_source()); _impl->set_kernels(std::move(kernels)); cache.add(updated_params_no_dyn_pad, _impl->clone()); @@ -988,6 +997,7 @@ primitive_inst::primitive_inst(network& network, program_node const& node, bool if (_impl) { _impl->set_node_params(node); if (_impl->is_dynamic() && !_impl->is_cpu()) { + GPU_DEBUG_TRACE_DETAIL << id() << ": initialize impl with dynamic impl " << _impl->get_kernel_name() << std::endl; _dynamic_impl = _impl->clone(); // Actual shape info layout is the following: // input_0 -> input_1, ..., fused_dep_0, fused_dep1, ..., output_0, output_1, ... diff --git a/src/plugins/intel_gpu/src/graph/prior_box.cpp b/src/plugins/intel_gpu/src/graph/prior_box.cpp index 191d3cc947dba0..899f0db6f2ba4a 100644 --- a/src/plugins/intel_gpu/src/graph/prior_box.cpp +++ b/src/plugins/intel_gpu/src/graph/prior_box.cpp @@ -440,7 +440,7 @@ std::vector prior_box_inst::calc_output_layouts(prior_box_node const& /* impl_param.get_input_layout(1).get() }; std::vector output_shapes = {ShapeType()}; - std::map const_data; + std::unordered_map const_data; auto& memory_deps = impl_param.memory_deps; @@ -451,7 +451,7 @@ std::vector prior_box_inst::calc_output_layouts(prior_box_node const& /* cldnn::mem_lock output_size_lock(output_size_mem, impl_param.get_stream()); cldnn::mem_lock img_size_lock(img_size_mem, impl_param.get_stream()); - const_data.emplace(0, make_host_tensor(output_size_mem->get_layout(), output_size_lock.data())); + const_data.emplace(0, make_tensor(output_size_mem->get_layout(), output_size_lock.data())); auto p_param = const_cast(&impl_param); if (output_size_mem->get_layout().data_type == cldnn::data_types::i64) { diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index f3e9c97b21bfbe..6d69b30fa3d53b 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -124,9 +124,9 @@ static void adjust_num_cores(ov::threading::IStreamsExecutor::Config& config) { config._streams = std::min(config._streams, num_cores); } -static ov::threading::IStreamsExecutor::Config make_task_executor_config(const ExecutionConfig& config, std::string tags) { +static ov::threading::IStreamsExecutor::Config make_task_executor_config(const ExecutionConfig& config, std::string tags, int num_streams = 0) { ov::threading::IStreamsExecutor::Config task_executor_config(tags, 1); - task_executor_config._streams = config.get_property(ov::compilation_num_threads); + task_executor_config._streams = (num_streams > 0) ? num_streams : config.get_property(ov::compilation_num_threads); auto priority = config.get_property(ov::intel_gpu::hint::host_task_priority); switch (priority) { case ov::hint::Priority::LOW: task_executor_config._threadPreferredCoreType = ov::threading::IStreamsExecutor::Config::LITTLE; break; @@ -214,7 +214,7 @@ void program::init_program() { kernel_selector::KernelBase::get_db().get_batch_header_str())); _compilation_context = ICompilationContext::create(make_task_executor_config(_config, - "Task executor config for CompilationContext in GPU plugin")); + "Task executor config for CompilationContext in GPU plugin", _num_async_build_threads)); _impls_cache = cldnn::make_unique(_impls_cache_capacity); // Remove items of compilation context's internal queue when some impl is popped in kernels_cache @@ -543,11 +543,24 @@ void program::pre_optimize_graph(bool is_internal) { reorder_factory rf; if (optimize_data) { - apply_opt_pass(); + GPU_DEBUG_GET_INSTANCE(debug_config); +#ifdef GPU_DEBUG_CONFIG + GPU_DEBUG_IF(!debug_config->disable_primitive_fusing) { +#else + { +#endif + apply_opt_pass(); + } apply_opt_pass(lo); - apply_opt_pass(lo); +#ifdef GPU_DEBUG_CONFIG + GPU_DEBUG_IF(!debug_config->disable_primitive_fusing) { +#else + { +#endif + apply_opt_pass(lo); + } apply_opt_pass(lo); diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index 7166872646fb50..22ec3e144f5c84 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -481,7 +481,14 @@ bool program_node::is_padding_supported(int axis, int padding) const { return true; } - void program_node::set_selected_impl(std::unique_ptr impl) { +bool program_node::is_padded_spatial(size_t idx) const { + auto lower_size = get_output_layout(idx).data_padding.lower_size(); + auto upper_size = get_output_layout(idx).data_padding.upper_size(); + return std::any_of(lower_size.spatial.begin(), lower_size.spatial.end(), [](const tensor::value_type& el) { return el != 0; }) || + std::any_of(upper_size.spatial.begin(), upper_size.spatial.end(), [](const tensor::value_type& el) { return el != 0; }); +} + +void program_node::set_selected_impl(std::unique_ptr impl) { selected_impl = std::move(impl); } diff --git a/src/plugins/intel_gpu/src/graph/random_uniform.cpp b/src/plugins/intel_gpu/src/graph/random_uniform.cpp index 325f180608ca89..6d665caa4890c0 100644 --- a/src/plugins/intel_gpu/src/graph/random_uniform.cpp +++ b/src/plugins/intel_gpu/src/graph/random_uniform.cpp @@ -2,11 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 // -#include +#include "random_uniform_inst.h" #include "primitive_type_base.h" -#include -#include -#include +#include "json_object.h" + +#include "random_uniform_shape_inference.hpp" namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(random_uniform) @@ -17,9 +17,56 @@ random_uniform_inst::typed_primitive_inst(network& network, random_uniform_node layout random_uniform_inst::calc_output_layout(random_uniform_node const &node, kernel_impl_params const& impl_param) { auto primitive = impl_param.typed_desc(); - return {*primitive->output_data_types[0], primitive->output_format, primitive->output_shape}; + auto format = format::get_default_format(primitive->output_shape.size()); + + return {primitive->output_shape, *primitive->output_data_types[0], format}; } +template +std::vector random_uniform_inst::calc_output_layouts(random_uniform_node const& /*node*/, kernel_impl_params const& impl_param) { + auto desc = impl_param.typed_desc(); + auto output_data_type = desc->output_data_types[0].value_or(impl_param.get_input_layout().data_type); + + std::vector output_shapes; + std::vector input_shapes = { impl_param.get_input_layout(0).get_partial_shape(), + impl_param.get_input_layout(1).get_partial_shape(), + impl_param.get_input_layout(2).get_partial_shape() }; + + auto& memory_deps = impl_param.memory_deps; + std::unordered_map const_data; + + auto run_shape_infer = [&]() { + ov::op::v8::RandomUniform op; + if (memory_deps.count(1) > 0 && memory_deps.count(2) > 0) { + auto min_val = memory_deps.at(1); + cldnn::mem_lock min_val_lock(min_val, impl_param.get_stream()); + const_data.emplace(1, make_tensor(min_val->get_layout(), min_val_lock.data())); + + auto max_val = memory_deps.at(2); + cldnn::mem_lock max_val_lock(max_val, impl_param.get_stream()); + const_data.emplace(2, make_tensor(max_val->get_layout(), max_val_lock.data())); + + return ov::op::v8::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); + } else { + return ov::op::v8::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); + } + }; + + if (memory_deps.count(0) > 0) { + auto output_shape = memory_deps.at(0); + cldnn::mem_lock output_shape_lock(output_shape, impl_param.get_stream()); + const_data.emplace(0, make_tensor(output_shape->get_layout(), output_shape_lock.data())); + + output_shapes = run_shape_infer(); + } else { + output_shapes = run_shape_infer(); + } + + return { layout{output_shapes[0], output_data_type, format::get_default_format(output_shapes[0].size())} }; +} + +template std::vector random_uniform_inst::calc_output_layouts(random_uniform_node const& node, const kernel_impl_params& impl_param); + std::string random_uniform_inst::to_string(random_uniform_node const &node) { auto node_info = node.desc_to_json(); json_composite random_uniform_info; diff --git a/src/plugins/intel_gpu/src/graph/range.cpp b/src/plugins/intel_gpu/src/graph/range.cpp index c1dae775f19ee2..0b57793bb6650a 100644 --- a/src/plugins/intel_gpu/src/graph/range.cpp +++ b/src/plugins/intel_gpu/src/graph/range.cpp @@ -34,21 +34,21 @@ std::vector range_inst::calc_output_layouts(range_node const& /*node*/, std::vector output_shapes = {ShapeType::dynamic(1)}; std::vector input_shapes = {ov::Shape(), ov::Shape(), ov::Shape()}; - std::map const_data; + std::unordered_map const_data; auto& memory_deps = impl_param.memory_deps; if (memory_deps.count(0) > 0 && memory_deps.count(1) > 0 && memory_deps.count(2) > 0) { auto start_mem = memory_deps.at(0); cldnn::mem_lock start_mem_lock(start_mem, impl_param.get_stream()); - const_data.emplace(0, make_host_tensor(start_mem->get_layout(), start_mem_lock.data())); + const_data.emplace(0, make_tensor(start_mem->get_layout(), start_mem_lock.data())); auto stop_mem = memory_deps.at(1); cldnn::mem_lock stop_mem_lock(stop_mem, impl_param.get_stream()); - const_data.emplace(1, make_host_tensor(stop_mem->get_layout(), stop_mem_lock.data())); + const_data.emplace(1, make_tensor(stop_mem->get_layout(), stop_mem_lock.data())); auto step_mem = memory_deps.at(2); cldnn::mem_lock step_mem_lock(step_mem, impl_param.get_stream()); - const_data.emplace(2, make_host_tensor(step_mem->get_layout(), step_mem_lock.data())); + const_data.emplace(2, make_tensor(step_mem->get_layout(), step_mem_lock.data())); output_shapes = shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); } diff --git a/src/plugins/intel_gpu/src/graph/reduce.cpp b/src/plugins/intel_gpu/src/graph/reduce.cpp index d5a66fb0acb2d0..ee57a7984a4173 100644 --- a/src/plugins/intel_gpu/src/graph/reduce.cpp +++ b/src/plugins/intel_gpu/src/graph/reduce.cpp @@ -107,8 +107,8 @@ std::vector reduce_inst::calc_output_layouts(reduce_node const& /*node*/ std::vector output_shapes = {ShapeType()}; auto axes = desc->axes; - auto axes_tensor = std::make_shared(ov::element::i64, ov::Shape{axes.size()}, axes.data()); - std::map> const_data = {{1, axes_tensor}}; + auto axes_tensor = ov::Tensor(ov::element::i64, ov::Shape{axes.size()}, axes.data()); + std::unordered_map const_data = {{1, axes_tensor}}; auto ta = ov::make_tensor_accessor(const_data); // shape infer by mode diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp index 2d04188a3c3a7d..9fc276469fb7c8 100644 --- a/src/plugins/intel_gpu/src/graph/reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/reshape.cpp @@ -85,7 +85,7 @@ std::vector reshape_inst::calc_output_layouts(reshape_node const& /*node pattern_shape, }; - std::map const_data; + std::unordered_map const_data; const auto ta = ov::make_tensor_accessor(const_data); auto run_shape_infer = [&](reshape::reshape_mode mode) { @@ -120,13 +120,13 @@ std::vector reshape_inst::calc_output_layouts(reshape_node const& /*node cldnn::mem_lock pattern_lock(pattern_mem, impl_param.get_stream()); auto pattern_ptr = pattern_lock.data(); - auto pattern_tensor = make_host_tensor(pattern_mem->get_layout(), pattern_ptr); + auto pattern_tensor = make_tensor(pattern_mem->get_layout(), pattern_ptr); const_data.emplace(1, pattern_tensor); run_shape_infer(prim->mode); } else { auto pattern_data = prim->output_pattern; - auto pattern_tensor = make_host_tensor({pattern_shape, data_types::i64, format::bfyx}, static_cast(pattern_data.data())); + auto pattern_tensor = make_tensor({pattern_shape, data_types::i64, format::bfyx}, static_cast(pattern_data.data())); const_data.emplace(1, pattern_tensor); run_shape_infer(prim->mode); diff --git a/src/plugins/intel_gpu/src/graph/space_to_batch.cpp b/src/plugins/intel_gpu/src/graph/space_to_batch.cpp index 79104e74e72ee0..159778951d8b81 100644 --- a/src/plugins/intel_gpu/src/graph/space_to_batch.cpp +++ b/src/plugins/intel_gpu/src/graph/space_to_batch.cpp @@ -11,6 +11,8 @@ #include #include +#include "space_to_batch_shape_inference.hpp" + namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(space_to_batch) @@ -58,6 +60,95 @@ layout space_to_batch_inst::calc_output_layout(space_to_batch_node const& node, return layout{output_type, input_format, desc->out_size}; } +static std::vector tensor_to_vec(const tensor& t, const format f) { + std::vector vec(cldnn::format::dimension(f)); + for (size_t i = 0; i < vec.size(); ++i) { + vec[i] = t.sizes()[i]; + } + std::reverse(vec.begin() + 2, vec.end()); + return vec; +} + +template +std::vector space_to_batch_inst::calc_output_layouts(space_to_batch_node const& /*node*/, const kernel_impl_params& impl_param) { + auto desc = impl_param.typed_desc(); + auto input0_layout = impl_param.get_input_layout(0); + auto input0_shape = input0_layout.get(); + auto input0_size = input0_shape.size(); + auto input0_format = input0_layout.format; + + auto& constant_mem = impl_param.memory_deps; + auto block_data = desc->block_shape; + auto begin_data = desc->pads_begin; + auto end_data = desc->pads_end; + + if (desc->shape_constant == 0 && (!constant_mem.count(1) || !constant_mem.count(2) || !constant_mem.count(3))) { + auto out_shape = ov::PartialShape::dynamic(input0_size); + return { layout{out_shape, input0_layout.data_type, input0_format } }; + } + + + ShapeType block_shape = desc->shape_constant == 0 ? impl_param.get_input_layout(1).get() : ov::Shape{ input0_size }; + ShapeType begin_shape = desc->shape_constant == 0 ? impl_param.get_input_layout(2).get() : ov::Shape{ input0_size }; + ShapeType end_shape = desc->shape_constant == 0 ? impl_param.get_input_layout(3).get() : ov::Shape{ input0_size }; + + ov::op::v1::SpaceToBatch op; + std::vector output_shapes = {ShapeType{}}; + std::vector input_shapes = { + input0_shape, + block_shape, + begin_shape, + end_shape + }; + + std::unordered_map const_data; + if (desc->shape_constant) { + auto block_sizes = tensor_to_vec(block_data, input0_format); + auto begin_sizes = tensor_to_vec(begin_data, input0_format); + auto end_sizes = tensor_to_vec(end_data, input0_format); + + auto block_values = static_cast(block_sizes.data()); + auto begin_values = static_cast(begin_sizes.data()); + auto end_values = static_cast(end_sizes.data()); + + auto block_tensor = make_tensor({ block_shape, data_types::i32, input0_format }, block_values); + auto begin_tensor = make_tensor({ begin_shape, data_types::i32, input0_format }, begin_values); + auto end_tensor = make_tensor({ end_shape, data_types::i32, input0_format }, end_values); + + const_data.emplace(1, block_tensor); + const_data.emplace(2, begin_tensor); + const_data.emplace(3, end_tensor); + + output_shapes = ov::op::v1::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); + } else { + auto block_mem = constant_mem.at(1); + auto begin_mem = constant_mem.at(2); + auto end_mem = constant_mem.at(3); + + cldnn::mem_lock lock1(block_mem, impl_param.get_stream()); + cldnn::mem_lock lock2(begin_mem, impl_param.get_stream()); + cldnn::mem_lock lock3(end_mem, impl_param.get_stream()); + + auto block_tensor = make_tensor(block_mem->get_layout(), lock1.data()); + auto begin_tensor = make_tensor(begin_mem->get_layout(), lock2.data()); + auto end_tensor = make_tensor(end_mem->get_layout(), lock3.data()); + + const_data.emplace(1, block_tensor); + const_data.emplace(2, begin_tensor); + const_data.emplace(3, end_tensor); + + output_shapes = ov::op::v1::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); + } + + auto output_type = desc->output_data_types[0].value_or(input0_layout.data_type); + if (impl_param.has_fused_primitives()) + output_type = impl_param.get_fused_output_layout().data_type; + + return { layout{output_shapes[0], output_type, input0_layout.format} }; +} + +template std::vector space_to_batch_inst::calc_output_layouts(space_to_batch_node const& node, const kernel_impl_params& impl_param); + std::string space_to_batch_inst::to_string(space_to_batch_node const& node) { auto desc = node.get_primitive(); auto node_info = node.desc_to_json(); diff --git a/src/plugins/intel_gpu/src/graph/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/strided_slice.cpp index 3f334e58ee8d05..2bd2fcd0c483fb 100644 --- a/src/plugins/intel_gpu/src/graph/strided_slice.cpp +++ b/src/plugins/intel_gpu/src/graph/strided_slice.cpp @@ -67,12 +67,12 @@ std::vector strided_slice_inst::calc_output_layouts(strided_slice_node c op.set_shrink_axis_mask(desc->shrink_axis_mask); op.set_ellipsis_mask_mask(desc->ellipsis_mask); - std::map const_data; + std::unordered_map const_data; const auto ta = ov::make_tensor_accessor(const_data); if (!begin_data.empty() && !end_data.empty() && !strides_data.empty()) { - auto begin_tensor = make_host_tensor({ begin_shape, data_types::i64, format::bfyx }, static_cast(begin_data.data())); - auto end_tensor = make_host_tensor({ end_shape, data_types::i64, format::bfyx }, static_cast(end_data.data())); - auto strides_tensor = make_host_tensor({ strides_shape, data_types::i64, format::bfyx }, static_cast(strides_data.data())); + auto begin_tensor = make_tensor({ begin_shape, data_types::i64, format::bfyx }, static_cast(begin_data.data())); + auto end_tensor = make_tensor({ end_shape, data_types::i64, format::bfyx }, static_cast(end_data.data())); + auto strides_tensor = make_tensor({ strides_shape, data_types::i64, format::bfyx }, static_cast(strides_data.data())); const_data.emplace(1, begin_tensor); const_data.emplace(2, end_tensor); @@ -88,9 +88,9 @@ std::vector strided_slice_inst::calc_output_layouts(strided_slice_node c cldnn::mem_lock lock2(end_mem, impl_param.get_stream()); cldnn::mem_lock lock3(strides_mem, impl_param.get_stream()); - auto begin_tensor = make_host_tensor(begin_mem->get_layout(), lock1.data()); - auto end_tensor = make_host_tensor(end_mem->get_layout(), lock2.data()); - auto strides_tensor = make_host_tensor(strides_mem->get_layout(), lock3.data()); + auto begin_tensor = make_tensor(begin_mem->get_layout(), lock1.data()); + auto end_tensor = make_tensor(end_mem->get_layout(), lock2.data()); + auto strides_tensor = make_tensor(strides_mem->get_layout(), lock3.data()); const_data.emplace(1, begin_tensor); const_data.emplace(2, end_tensor); diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/batch_to_space_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/batch_to_space_ref.cl index 92fdb15bfd2baf..30133a5c32d929 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/batch_to_space_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/batch_to_space_ref.cl @@ -5,6 +5,15 @@ #include "include/batch_headers/fetch_data.cl" KERNEL(batch_to_space_ref)(const __global INPUT0_TYPE* input, +#ifdef BLOCK_TYPE + const __global BLOCK_TYPE* block, +#endif +#ifdef BEGIN_TYPE + const __global BEGIN_TYPE* begin, +#endif +#ifdef END_TYPE + const __global END_TYPE* end, +#endif __global OUTPUT_TYPE* output #if HAS_FUSED_OPS_DECLS , FUSED_OPS_DECLS @@ -14,6 +23,48 @@ KERNEL(batch_to_space_ref)(const __global INPUT0_TYPE* input, const uint batch = get_global_id(0); const uint feature = get_global_id(1); +#ifdef BLOCK_TYPE + const uint block_f = block[1]; + #if BLOCK_DIMS == 3 + const uint block_x = 1; + const uint block_y = block[BLOCK_DIMS-1]; + const uint block_z = 1; + const uint block_w = 1; + #else + const uint block_x = BLOCK_DIMS > 2 ? block[BLOCK_DIMS-1] : 1; + const uint block_y = BLOCK_DIMS > 3 ? block[BLOCK_DIMS-2] : 1; + const uint block_z = BLOCK_DIMS > 4 ? block[BLOCK_DIMS-3] : 1; + const uint block_w = BLOCK_DIMS > 5 ? block[BLOCK_DIMS-4] : 1; + #endif +#else + const uint block_f = BLOCK_SHAPE_FEATURE; + const uint block_x = BLOCK_SHAPE_X; + const uint block_y = BLOCK_SHAPE_Y; + const uint block_z = BLOCK_SHAPE_Z; + const uint block_w = BLOCK_SHAPE_W; +#endif + +#ifdef BEGIN_TYPE + const uint begin_f = begin[1]; + #if BEGIN_DIMS == 3 + const uint begin_x = 0; + const uint begin_y = begin[BEGIN_DIMS-1]; + const uint begin_z = 0; + const uint begin_w = 0; + #else + const uint begin_x = BEGIN_DIMS > 2 ? begin[BEGIN_DIMS-1] : 0; + const uint begin_y = BEGIN_DIMS > 3 ? begin[BEGIN_DIMS-2] : 0; + const uint begin_z = BEGIN_DIMS > 4 ? begin[BEGIN_DIMS-3] : 0; + const uint begin_w = BEGIN_DIMS > 5 ? begin[BEGIN_DIMS-4] : 0; + #endif +#else + const uint begin_f = CROPS_BEGIN_FEATURE; + const uint begin_x = CROPS_BEGIN_X; + const uint begin_y = CROPS_BEGIN_Y; + const uint begin_z = CROPS_BEGIN_Z; + const uint begin_w = CROPS_BEGIN_W; +#endif + #if OUTPUT_LAYOUT_BFYX || OUTPUT_LAYOUT_B_FS_YX_FSV16 const uint w = 0; const uint z = 0; @@ -30,9 +81,9 @@ KERNEL(batch_to_space_ref)(const __global INPUT0_TYPE* input, const uint y = yx / OUTPUT_SIZE_X; const uint x = yx % OUTPUT_SIZE_X; const uint input_w = 0; - const uint input_z = (z + CROPS_BEGIN_Z) / BLOCK_SHAPE_Z; + const uint input_z = (z + begin_z) / block_z; const uint offset_w = 0; - const uint offset_z = (z + CROPS_BEGIN_Z) % BLOCK_SHAPE_Z; + const uint offset_z = (z + begin_z) % block_z; #elif OUTPUT_LAYOUT_BFWZYX const uint w = (uint)get_global_id(2) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z); const uint zyx = (uint)get_global_id(2) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z); @@ -40,25 +91,25 @@ KERNEL(batch_to_space_ref)(const __global INPUT0_TYPE* input, const uint z = zyx / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); const uint y = yx / OUTPUT_SIZE_X; const uint x = yx % OUTPUT_SIZE_X; - const uint input_w = (w + CROPS_BEGIN_W) / BLOCK_SHAPE_W; - const uint input_z = (z + CROPS_BEGIN_Z) / BLOCK_SHAPE_Z; - const uint offset_w = (w + CROPS_BEGIN_W) % BLOCK_SHAPE_W; - const uint offset_z = (z + CROPS_BEGIN_Z) % BLOCK_SHAPE_Z; + const uint input_w = (w + begin_w) / block_w; + const uint input_z = (z + begin_z) / block_z; + const uint offset_w = (w + begin_w) % block_w; + const uint offset_z = (z + begin_z) % block_z; #endif - const uint input_feature = (feature + CROPS_BEGIN_FEATURE) / BLOCK_SHAPE_FEATURE; - const uint offset_feature = (feature + CROPS_BEGIN_FEATURE) % BLOCK_SHAPE_FEATURE; + const uint input_feature = (feature + begin_f) / block_f; + const uint offset_feature = (feature + begin_f) % block_f; - const uint input_y = (y + CROPS_BEGIN_Y) / BLOCK_SHAPE_Y; - const uint offset_y = (y + CROPS_BEGIN_Y) % BLOCK_SHAPE_Y; + const uint input_y = (y + begin_y) / block_y; + const uint offset_y = (y + begin_y) % block_y; - const uint input_x = (x + CROPS_BEGIN_X) / BLOCK_SHAPE_X; - const uint offset_x = (x + CROPS_BEGIN_X) % BLOCK_SHAPE_X; + const uint input_x = (x + begin_x) / block_x; + const uint offset_x = (x + begin_x) % block_x; - const uint offset_batch = ((offset_feature * BLOCK_SHAPE_W * BLOCK_SHAPE_Z * BLOCK_SHAPE_Y + - offset_w * BLOCK_SHAPE_Z * BLOCK_SHAPE_Y + - offset_z * BLOCK_SHAPE_Y + - offset_y) * BLOCK_SHAPE_X + + const uint offset_batch = ((offset_feature * block_w * block_z * block_y + + offset_w * block_z * block_y + + offset_z * block_y + + offset_y) * block_x + offset_x) * OUTPUT_BATCH_NUM; const uint input_batch = batch + offset_batch; diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_os_iyx_osv16.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_os_iyx_osv16.cl index af3ffa0658af22..86c4b8cfbbf47d 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_os_iyx_osv16.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_os_iyx_osv16.cl @@ -56,6 +56,7 @@ if (_kernel_data.leftovers) REQD_SUB_GROUP_SIZE(SUB_GROUP_SIZE) __attribute__((reqd_work_group_size(1, 1, SUB_GROUP_SIZE))) KERNEL(convolution_gpu_bfyx_os_iyx_osv16)( + OPTIONAL_SHAPE_INFO_ARG const __global UNIT_TYPE* input, __global UNIT_TYPE* output, const __global UNIT_TYPE* weights @@ -237,7 +238,14 @@ KERNEL(convolution_gpu_bfyx_os_iyx_osv16)( for(uint r = 0; r < OUTPUT_BLOCK_HEIGHT; r++) { if(!(or + r >= OUTPUT_SIZE_Y)) { -#if (OUTPUT_SIZE_X % OUTPUT_BLOCK_WIDTH) == 0 // in this case we don't need to check if we're outside of X boundaries + +#if !IS_DYNAMIC +#if (OUTPUT_SIZE_X % OUTPUT_BLOCK_WIDTH) == 0 + #define CAN_SKIP_CHECK +#endif +#endif + +#ifdef CAN_SKIP_CHECK // in this case we don't need to check if we're outside of X boundaries uint out_vstore_offset = 0; #if (OUT_BLOCK_WIDTH % 8) > 3 MAKE_VECTOR_TYPE(UNIT_TYPE, 4) tmp = MAKE_VECTOR_TYPE(UNIT_TYPE, 4)( diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/space_to_batch_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/space_to_batch_ref.cl index e6027aa6bf0a70..8d08480753e913 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/space_to_batch_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/space_to_batch_ref.cl @@ -5,6 +5,15 @@ #include "include/batch_headers/fetch_data.cl" KERNEL(space_to_batch_ref)(const __global INPUT0_TYPE* input, +#ifdef BLOCK_TYPE + const __global BLOCK_TYPE* block, +#endif +#ifdef BEGIN_TYPE + const __global BEGIN_TYPE* begin, +#endif +#ifdef END_TYPE + const __global END_TYPE* end, +#endif __global OUTPUT_TYPE* output #if HAS_FUSED_OPS_DECLS , FUSED_OPS_DECLS @@ -37,20 +46,64 @@ KERNEL(space_to_batch_ref)(const __global INPUT0_TYPE* input, const uint input_batch = batch % INPUT0_BATCH_NUM; const uint offset_batch = batch / INPUT0_BATCH_NUM; - const int input_feature = feature * BLOCK_SHAPE_FEATURE - PADS_BEGIN_FEATURE + - offset_batch / (BLOCK_SHAPE_W * BLOCK_SHAPE_Z * BLOCK_SHAPE_Y * BLOCK_SHAPE_X); - const uint offset_feature = offset_batch % (BLOCK_SHAPE_W * BLOCK_SHAPE_Z * BLOCK_SHAPE_Y * BLOCK_SHAPE_X); - const int input_w = w * BLOCK_SHAPE_W - PADS_BEGIN_W + offset_feature / (BLOCK_SHAPE_Z * BLOCK_SHAPE_Y * BLOCK_SHAPE_X); - const uint offset_w = offset_feature % (BLOCK_SHAPE_Z * BLOCK_SHAPE_Y * BLOCK_SHAPE_X); +#ifdef BLOCK_TYPE + const uint block_f = block[1]; + #if BLOCK_DIMS == 3 + const uint block_x = 1; + const uint block_y = block[BLOCK_DIMS-1]; + const uint block_z = 1; + const uint block_w = 1; + #else + const uint block_x = BLOCK_DIMS > 2 ? block[BLOCK_DIMS-1] : 1; + const uint block_y = BLOCK_DIMS > 3 ? block[BLOCK_DIMS-2] : 1; + const uint block_z = BLOCK_DIMS > 4 ? block[BLOCK_DIMS-3] : 1; + const uint block_w = BLOCK_DIMS > 5 ? block[BLOCK_DIMS-4] : 1; + #endif +#else + const uint block_f = BLOCK_SHAPE_FEATURE; + const uint block_x = BLOCK_SHAPE_X; + const uint block_y = BLOCK_SHAPE_Y; + const uint block_z = BLOCK_SHAPE_Z; + const uint block_w = BLOCK_SHAPE_W; +#endif + + +#ifdef BEGIN_TYPE + const uint begin_f = begin[1]; + #if BEGIN_DIMS == 3 + const uint begin_x = 0; + const uint begin_y = begin[BEGIN_DIMS-1]; + const uint begin_z = 0; + const uint begin_w = 0; + #else + const uint begin_x = BEGIN_DIMS > 2 ? begin[BEGIN_DIMS-1] : 0; + const uint begin_y = BEGIN_DIMS > 3 ? begin[BEGIN_DIMS-2] : 0; + const uint begin_z = BEGIN_DIMS > 4 ? begin[BEGIN_DIMS-3] : 0; + const uint begin_w = BEGIN_DIMS > 5 ? begin[BEGIN_DIMS-4] : 0; + #endif +#else + const uint begin_f = PADS_BEGIN_FEATURE; + const uint begin_x = PADS_BEGIN_X; + const uint begin_y = PADS_BEGIN_Y; + const uint begin_z = PADS_BEGIN_Z; + const uint begin_w = PADS_BEGIN_W; +#endif + + const int input_feature = feature * block_f - begin_f + + offset_batch / (block_w * block_z * block_y * block_x); + const uint offset_feature = offset_batch % (block_w * block_z * block_y * block_x); + + const int input_w = w * block_w - begin_w + offset_feature / (block_z * block_y * block_x); + const uint offset_w = offset_feature % (block_z * block_y * block_x); - const int input_z = z * BLOCK_SHAPE_Z - PADS_BEGIN_Z + offset_w / (BLOCK_SHAPE_Y * BLOCK_SHAPE_X); - const uint offset_z = offset_w % (BLOCK_SHAPE_Y * BLOCK_SHAPE_X); + const int input_z = z * block_z - begin_z + offset_w / (block_y * block_x); + const uint offset_z = offset_w % (block_y * block_x); - const int input_y = y * BLOCK_SHAPE_Y - PADS_BEGIN_Y + offset_z / BLOCK_SHAPE_X; - const uint offset_y = offset_z % BLOCK_SHAPE_X; + const int input_y = y * block_y - begin_y + offset_z / block_x; + const uint offset_y = offset_z % block_x; - const int input_x = x * BLOCK_SHAPE_X - PADS_BEGIN_X + offset_y; + const int input_x = x * block_x - begin_x + offset_y; #if OUTPUT_DIMS == 4 const int input_index = INPUT0_GET_INDEX(input_batch, input_feature, input_y, input_x); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/batch_to_space/batch_to_space_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/batch_to_space/batch_to_space_kernel_base.cpp index 9592d862e9b9f7..270f5707f09a82 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/batch_to_space/batch_to_space_kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/batch_to_space/batch_to_space_kernel_base.cpp @@ -48,6 +48,10 @@ CommonDispatchData BatchToSpaceKernelBase::SetDefault(const batch_to_space_param return dispatchData; } +inline std::string GetInputTypeStr(size_t idx) { + return "INPUT" + std::to_string(idx) + "_TYPE"; +} + JitConstants BatchToSpaceKernelBase::GetJitConstants(const batch_to_space_params& params) const { JitConstants jit = MakeBaseParamsJitConstants(params); @@ -70,9 +74,26 @@ JitConstants BatchToSpaceKernelBase::GetJitConstants(const batch_to_space_params } }; - makeJitConstForParam(jit, "BLOCK_SHAPE", params.block_shape, 1); - makeJitConstForParam(jit, "CROPS_BEGIN", params.crops_begin, 0); - makeJitConstForParam(jit, "CROPS_END", params.crops_end, 0); + if (params.block_type == base_params::ArgType::Input) { + jit.AddConstant(MakeJitConstant("BLOCK_TYPE", GetInputTypeStr(params.block_input_index))); + jit.AddConstant(MakeJitConstant("BLOCK_DIMS", params.block_dims)); + } else { + makeJitConstForParam(jit, "BLOCK_SHAPE", params.block_shape, 1); + } + + if (params.begin_type == base_params::ArgType::Input) { + jit.AddConstant(MakeJitConstant("BEGIN_TYPE", GetInputTypeStr(params.begin_input_index))); + jit.AddConstant(MakeJitConstant("BEGIN_DIMS", params.begin_dims)); + } else { + makeJitConstForParam(jit, "CROPS_BEGIN", params.crops_begin, 0); + } + + if (params.end_type == base_params::ArgType::Input) { + jit.AddConstant(MakeJitConstant("END_TYPE", GetInputTypeStr(params.end_input_index))); + jit.AddConstant(MakeJitConstant("END_DIMS", params.end_dims)); + } else { + makeJitConstForParam(jit, "CROPS_END", params.crops_end, 0); + } return jit; } @@ -93,7 +114,8 @@ KernelsData BatchToSpaceKernelBase::GetCommonKernelsData(const Params& params, c auto& kernel = kd.kernels[0]; FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, - "", false, false, 1, GetFusedPrimitiveInputsCount(params)); + "", false, false, static_cast(newParams.inputs.size()), + GetFusedPrimitiveInputsCount(params), 1, newParams.has_dynamic_tensors()); return { kd }; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/batch_to_space/batch_to_space_kernel_base.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/batch_to_space/batch_to_space_kernel_base.h index 3eadba82ce5f38..ed671d2d538551 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/batch_to_space/batch_to_space_kernel_base.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/batch_to_space/batch_to_space_kernel_base.h @@ -17,6 +17,18 @@ struct batch_to_space_params : public base_params { DimTensor block_shape; DimTensor crops_begin; DimTensor crops_end; + + base_params::ArgType block_type = base_params::ArgType::Input; + base_params::ArgType begin_type = base_params::ArgType::Input; + base_params::ArgType end_type = base_params::ArgType::Input; + + size_t block_dims = 0; + size_t begin_dims = 0; + size_t end_dims = 0; + + size_t block_input_index = 0; + size_t begin_input_index = 0; + size_t end_input_index = 0; }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/batch_to_space/batch_to_space_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/batch_to_space/batch_to_space_kernel_ref.cpp index 432d80cba01748..945c4ed4634d1c 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/batch_to_space/batch_to_space_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/batch_to_space/batch_to_space_kernel_ref.cpp @@ -14,11 +14,13 @@ ParamsKey BatchToSpaceKernelRef::GetSupportedKey() const { k.EnableInputDataType(Datatype::F32); k.EnableInputDataType(Datatype::UINT8); k.EnableInputDataType(Datatype::INT8); + k.EnableInputDataType(Datatype::INT32); k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::F32); k.EnableOutputDataType(Datatype::UINT8); k.EnableOutputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::INT32); k.EnableInputLayout(DataLayout::bfyx); k.EnableInputLayout(DataLayout::bfzyx); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.cpp index 16f810a3fef466..c7d80a8c8096ce 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.cpp @@ -281,12 +281,12 @@ bool ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::HasPaddedInput(const convoluti + (params.filterSize.z - 1) * params.dilation.z + 1; bool has_pad = true; - has_pad &= params.padding.x <= params.inputs[0].X().pad.before; - has_pad &= params.padding.y <= params.inputs[0].Y().pad.before; - has_pad &= params.padding.z <= params.inputs[0].Z().pad.before; - has_pad &= inputLimitX <= params.padding.x + params.inputs[0].X().v + params.inputs[0].X().pad.after; - has_pad &= inputLimitY <= params.padding.y + params.inputs[0].Y().v + params.inputs[0].Y().pad.after; - has_pad &= inputLimitZ <= params.padding.z + params.inputs[0].Z().v + params.inputs[0].Z().pad.after; + has_pad &= params.padding_begin.x <= params.inputs[0].X().pad.before; + has_pad &= params.padding_begin.y <= params.inputs[0].Y().pad.before; + has_pad &= params.padding_begin.z <= params.inputs[0].Z().pad.before; + has_pad &= inputLimitX <= params.padding_begin.x + params.inputs[0].X().v + params.inputs[0].X().pad.after; + has_pad &= inputLimitY <= params.padding_begin.y + params.inputs[0].Y().v + params.inputs[0].Y().pad.after; + has_pad &= inputLimitZ <= params.padding_begin.z + params.inputs[0].Z().v + params.inputs[0].Z().pad.after; return has_pad; } @@ -300,9 +300,9 @@ bool ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::ParamsHavePadding(const convol + (params.filterSize.z - 1) * params.dilation.z + 1; bool needs_pad = false; - needs_pad |= params.padding.x != 0; - needs_pad |= params.padding.y != 0; - needs_pad |= params.padding.z != 0; + needs_pad |= params.padding_begin.x != 0; + needs_pad |= params.padding_begin.y != 0; + needs_pad |= params.padding_begin.z != 0; needs_pad |= inputLimitX > params.inputs[0].X().v; needs_pad |= inputLimitY > params.inputs[0].Y().v; needs_pad |= inputLimitZ > params.inputs[0].Z().v; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_base.cpp index ec2ac036eb1a33..cabfe7d92880e6 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_base.cpp @@ -28,7 +28,7 @@ bool ConvolutionKernelBase::Validate(const Params& p, const optional_params& o) JitConstants ConvolutionKernelBase::GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const { JitConstants mem_consts = WeightBiasKernelBase::GetJitConstants(params); mem_consts.Merge(GetFusedPrimitivesJitConstants(params, dispatchData)); - const auto& padding = params.padding; + const auto& padding = params.padding_begin; const auto& input = params.inputs[0]; int64_t input_offset_with_padding = @@ -37,7 +37,7 @@ JitConstants ConvolutionKernelBase::GetJitConstants(const convolution_params& pa mem_consts.AddConstants({ MakeJitConstant("STRIDE", params.stride), - MakeJitConstant("PADDING", params.padding), + MakeJitConstant("PADDING", padding), MakeJitConstant("DILATION", params.dilation), MakeJitConstant("FILTER_ARRAY_NUM", params.groups), MakeJitConstant("INPUT0_OFFSET_WITH_PADDING", input_offset_with_padding), @@ -183,11 +183,17 @@ KernelsData ConvolutionKernelBase::GetCommonKernelsData(const Params& params, } if (NeedPaddedInput()) { - kd.reorderInput = ConvolutionUpdateInputParams(newParams); - - if (kd.reorderInput && !options.allowInputReordering) - return {}; + if (newParams.has_dynamic_inputs()) { + if (!CheckConvolutionExplicitPaddings(newParams)) + return {}; + } else { + kd.reorderInput = ConvolutionUpdateInputParams(newParams); + + if (kd.reorderInput && !options.allowInputReordering) + return {}; + } } + DispatchData dispatchData = SetDefault(newParams, autoTuneIndex); if (!params.is_shape_agnostic && !CheckWorkGroups(dispatchData)) { @@ -264,7 +270,7 @@ bool CheckConvolutionPaddedInputDesc(const convolution_params& params, const Dat reqDesc.Feature().pad.after <= params.inputs[0].Feature().pad.after && reqDesc.Batch().pad.after <= params.inputs[0].Batch().pad.after; - properPadding &= ((params.padding.x == 0 && params.padding.y == 0) || params.inputs[0].GetPaddedVal() == 0.f); + properPadding &= ((params.padding_begin.x == 0 && params.padding_begin.y == 0) || params.inputs[0].GetPaddedVal() == 0.f); return properPadding; } @@ -276,16 +282,14 @@ static DataTensor GetConvolutionBFYXPaddedTensor(const convolution_params& cp) { DataTensor t = cp.inputs[0]; std::vector pad{{0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} }; - pad[0].before = cp.padding.x; - pad[1].before = cp.padding.y; - pad[2].before = cp.padding.z; - + pad[0].before = cp.padding_begin.x; + pad[1].before = cp.padding_begin.y; + pad[2].before = cp.padding_begin.z; const auto inputLimitX = (cp.outputs[0].X().v - 1) * cp.stride.x + (cp.filterSize.x - 1) * cp.dilation.x + 1; const auto inputLimitY = (cp.outputs[0].Y().v - 1) * cp.stride.y + (cp.filterSize.y - 1) * cp.dilation.y + 1; const auto inputLimitZ = (cp.outputs[0].Z().v - 1) * cp.stride.z + (cp.filterSize.z - 1) * cp.dilation.z + 1; - pad[0].after = (size_t)std::max(static_cast(inputLimitX) - static_cast(t.X().v) - static_cast(pad[0].before), static_cast(0)); pad[1].after = (size_t)std::max(static_cast(inputLimitY) - static_cast(t.Y().v) - static_cast(pad[1].before), static_cast(0)); pad[2].after = (size_t)std::max(static_cast(inputLimitZ) - static_cast(t.Z().v) - static_cast(pad[2].before), static_cast(0)); @@ -303,10 +307,29 @@ static DataTensor GetConvolutionBFYXPaddedTensor(const convolution_params& cp) { return {dims, t.GetDType(), t.GetLayout()}; } +bool CheckConvolutionExplicitPaddings(const convolution_params& conv_params) { + if (!conv_params.has_explicit_paddings) + return false; + + bool proper_padding = true; + proper_padding &= conv_params.padding_begin.x == conv_params.inputs[0].X().pad.before && + conv_params.padding_begin.y == conv_params.inputs[0].Y().pad.before && + conv_params.padding_begin.z == conv_params.inputs[0].Z().pad.before; + + proper_padding &= conv_params.padding_end.x == conv_params.inputs[0].X().pad.after && + conv_params.padding_end.y == conv_params.inputs[0].Y().pad.after && + conv_params.padding_end.z == conv_params.inputs[0].Z().pad.after; + + return proper_padding; +} + bool ConvolutionCheckInput(const Params& p, const optional_params& o) { const convolution_params& params = static_cast(p); const convolution_optional_params& optParams = static_cast(o); + if (params.has_dynamic_inputs()) + return CheckConvolutionExplicitPaddings(params); + const auto req_input = GetConvolutionBFYXPaddedTensor(params); const bool bProperInputDesc = CheckConvolutionPaddedInputDesc(params, req_input); const bool bInputPadded = optParams.allowInputReordering || bProperInputDesc; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_base.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_base.h index f31ce93d7abdda..30fe07c0068892 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_base.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_base.h @@ -73,6 +73,7 @@ class ConvolutionKernelBase : public WeightBiasKernelBase { bool ConvolutionCheckInput(const Params& p, const optional_params& o); bool CheckConvolutionPaddedInputDesc(const convolution_params& params, const DataTensor& reqDesc); +bool CheckConvolutionExplicitPaddings(const convolution_params& conv_params); bool ConvolutionUpdateInputParams(convolution_params& params); } // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_1x1_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_1x1_opt.cpp index d701382ee04251..7111ca9ba53d14 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_1x1_opt.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_1x1_opt.cpp @@ -111,7 +111,7 @@ bool convolution_kernel_bfyx_1x1_opt::Validate(const Params& p, const optional_p if (cp.outputs[0].Feature().v % 64 != 0) return false; - if (cp.padding.x != 0 || cp.padding.y != 0) + if (cp.padding_begin.x != 0 || cp.padding_begin.y != 0) return false; if (cp.inputs[0].Feature().v % 2 != 0) { diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp index 21e5e7c988099f..47ce698e27d243 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp @@ -67,7 +67,7 @@ JitConstants ConvolutionKernel_bfyx_depthwise_weights_lwg::GetJitConstants(const const DispatchData& dispatchData) const { auto mem_consts = ConvolutionKernelBase::GetJitConstants(params, dispatchData); - if (params.padding.x != 0 || params.padding.y != 0) + if (params.padding_begin.x != 0 || params.padding_begin.y != 0) mem_consts.AddConstant(MakeJitConstant("BOUNDARY_CHECK", 1)); return mem_consts; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp index e46322345795ae..61e7e86f0bed4b 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp @@ -50,6 +50,7 @@ ParamsKey ConvolutionKernel_bfyx_os_iyx_osv16::GetSupportedKey() const { k.EnableBatching(); k.EnableDilation(); k.EnableGroupedConvolution(); + k.EnableDynamicShapesSupport(); return k; } @@ -128,7 +129,7 @@ ConvolutionKernel_bfyx_os_iyx_osv16::AutoTuneOption ConvolutionKernel_bfyx_os_iy // if less than 16 values is required to compute one single row of output // then each WI shall compute one single row to maximize reuse within SIMD subgroup (this gives very nice // performance results) - } else if (cp.outputs[0].X().v + (cp.filterSize.x - 1) * cp.dilation.x < sub_group_size) { + } else if (!p.is_shape_agnostic && cp.outputs[0].X().v + (cp.filterSize.x - 1) * cp.dilation.x < sub_group_size) { option.blockWidth = cp.outputs[0].X().v; option.blockHeight = 1; option.prefetch = 4; @@ -153,7 +154,7 @@ ConvolutionKernel_bfyx_os_iyx_osv16::AutoTuneOption ConvolutionKernel_bfyx_os_iy // if this is not 1x1 batch1 case then shrink filters, other way we're memory bound and it's best to use 16x1 block // sizes - if (cp.filterSize.x != 1 || cp.filterSize.y != 1 || cp.outputs[0].Batch().v != 1) { + if (!p.is_shape_agnostic && (cp.filterSize.x != 1 || cp.filterSize.y != 1 || cp.outputs[0].Batch().v != 1)) { shrink_blocks_to_output_size(cp.outputs[0].X().v, cp.outputs[0].Y().v, option.blockWidth, option.blockHeight, sub_group_size); } return option; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.cpp index 10fa9014e80575..c610f7a5754b45 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.cpp @@ -51,8 +51,8 @@ JitConstants ConvolutionKernel_Winograd_2x3_s1_fused::GetJitConstants(const conv auto C4_up16 = ((uint32_t)((idepth + 15) / 16) * 16) / 4; // if there's input padding then input offset should be ignored - const auto inoffset_x = (input_pad_x) ? 0 : params.padding.x; - const auto inoffset_y = (input_pad_y) ? 0 : params.padding.y; + const auto inoffset_x = (input_pad_x) ? 0 : params.padding_begin.x; + const auto inoffset_y = (input_pad_y) ? 0 : params.padding_begin.y; jit.AddConstants({ MakeJitConstant("H", rows), @@ -89,8 +89,8 @@ ConvolutionKernel_Winograd_2x3_s1_fused::Parent::DispatchData ConvolutionKernel_ const auto cols = arg.inputs[0].X().v + input_pad_x; // if there's input padding then input offset should be ignored - const auto inoffset_x = (input_pad_x) ? 0 : arg.padding.x; - const auto inoffset_y = (input_pad_y) ? 0 : arg.padding.y; + const auto inoffset_x = (input_pad_x) ? 0 : arg.padding_begin.x; + const auto inoffset_y = (input_pad_y) ? 0 : arg.padding_begin.y; auto P = rows - 2 + 2 * inoffset_y; auto Q = cols - 2 + 2 * inoffset_x; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.cpp index 368647a49b3aaf..a4f11224d643bc 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.cpp @@ -52,8 +52,8 @@ JitConstants ConvolutionKernel_Winograd_6x3_s1_fused::GetJitConstants(const conv auto C4_up16 = ((uint32_t)((idepth + 15) / 16) * 16) / 4; // if there's input padding then input offset should be ignored - const auto inoffset_x = (input_pad_x) ? 0 : params.padding.x; - const auto inoffset_y = (input_pad_y) ? 0 : params.padding.y; + const auto inoffset_x = (input_pad_x) ? 0 : params.padding_begin.x; + const auto inoffset_y = (input_pad_y) ? 0 : params.padding_begin.y; jit.AddConstants({ MakeJitConstant("H", rows), @@ -101,8 +101,8 @@ ConvolutionKernel_Winograd_6x3_s1_fused::Parent::DispatchData ConvolutionKernel_ const auto cols = arg.inputs[0].X().v + input_pad_x; // if there's input padding then input offset should be ignored - const auto inoffset_x = (input_pad_x) ? 0 : arg.padding.x; - const auto inoffset_y = (input_pad_y) ? 0 : arg.padding.y; + const auto inoffset_x = (input_pad_x) ? 0 : arg.padding_begin.x; + const auto inoffset_y = (input_pad_y) ? 0 : arg.padding_begin.y; auto P = rows - 2 + 2 * inoffset_y; auto Q = cols - 2 + 2 * inoffset_x; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_params.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_params.cpp index 9a79f63c441b1b..563eb25b7e27af 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_params.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_params.cpp @@ -20,7 +20,7 @@ std::string convolution_params::to_string() const { s << filterSize.x << "_" << filterSize.y << "_"; s << stride.x << "_" << stride.y << "_"; s << dilation.x << "_" << dilation.y << "_"; - s << padding.x << "_" << padding.y << "_"; + s << padding_begin.x << "_" << padding_begin.y << "_"; s << 1; return s.str(); @@ -33,7 +33,7 @@ std::string convolution_params::to_cache_string_v2() const { s << filterSize.x << "_" << filterSize.y << "_" << filterSize.z << ";"; s << stride.x << "_" << stride.y << "_" << stride.z << ";"; s << dilation.x << "_" << dilation.y << "_" << dilation.z << ";"; - s << padding.x << "_" << padding.y << "_" << padding.z << ";"; + s << padding_begin.x << "_" << padding_begin.y << "_" << padding_begin.z << ";"; s << 1 << ";"; s << groups; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_params.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_params.h index 169fd8f4d7f184..f5e567c86b8ef5 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_params.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_params.h @@ -20,7 +20,8 @@ struct convolution_params : public weight_bias_zero_point_params { uSize filterSize; uSize stride; uSize dilation; - uSize padding; + uSize padding_begin; + uSize padding_end; bool transposed = false; QuantizationType quantization = QuantizationType::NONE; bool deformable_mode = false; @@ -29,6 +30,7 @@ struct convolution_params : public weight_bias_zero_point_params { uint32_t deformable_groups = 1; bool bilinear_interpolation_pad {false}; bool deformable_mask_enabled {false}; + bool has_explicit_paddings {false}; std::string to_string() const override; std::string to_cache_string_v2() const override; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/deformable_convolution_kernel_bfyx_interp.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/deformable_convolution_kernel_bfyx_interp.cpp index 5cce5457d11c2d..80a7778986b328 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/deformable_convolution_kernel_bfyx_interp.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/deformable_convolution_kernel_bfyx_interp.cpp @@ -68,7 +68,7 @@ JitConstants DeformableConvolutionKernel_bfyx_interp::GetJitConstants(const conv jit.AddConstant(MakeJitConstant("FILTER_SIZE_X", params.kernelSize.x)); jit.AddConstant(MakeJitConstant("FILTER_SIZE_Y", params.kernelSize.y)); jit.AddConstants({MakeJitConstant("STRIDE", params.stride), - MakeJitConstant("PADDING", params.padding), + MakeJitConstant("PADDING", params.padding_begin), MakeJitConstant("DILATION", params.dilation) }); jit.AddConstants({MakeJitConstant("DEFORMABLE_GROUPS", params.deformable_groups)}); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/space_to_batch/space_to_batch_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/space_to_batch/space_to_batch_kernel_base.cpp index 520e7f2db94b8e..54a65f534d5b11 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/space_to_batch/space_to_batch_kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/space_to_batch/space_to_batch_kernel_base.cpp @@ -48,6 +48,10 @@ CommonDispatchData SpaceToBatchKernelBase::SetDefault(const space_to_batch_param return dispatchData; } +inline std::string GetInputTypeStr(size_t idx) { + return "INPUT" + std::to_string(idx) + "_TYPE"; +} + JitConstants SpaceToBatchKernelBase::GetJitConstants(const space_to_batch_params& params) const { JitConstants jit = MakeBaseParamsJitConstants(params); @@ -70,9 +74,26 @@ JitConstants SpaceToBatchKernelBase::GetJitConstants(const space_to_batch_params } }; - makeJitConstForParam(jit, "BLOCK_SHAPE", params.block_shape, 1); - makeJitConstForParam(jit, "PADS_BEGIN", params.pads_begin, 0); - makeJitConstForParam(jit, "PADS_END", params.pads_end, 0); + if (params.block_type == base_params::ArgType::Input) { + jit.AddConstant(MakeJitConstant("BLOCK_TYPE", GetInputTypeStr(params.block_input_index))); + jit.AddConstant(MakeJitConstant("BLOCK_DIMS", params.block_dims)); + } else { + makeJitConstForParam(jit, "BLOCK_SHAPE", params.block_shape, 1); + } + + if (params.begin_type == base_params::ArgType::Input) { + jit.AddConstant(MakeJitConstant("BEGIN_TYPE", GetInputTypeStr(params.begin_input_index))); + jit.AddConstant(MakeJitConstant("BEGIN_DIMS", params.begin_dims)); + } else { + makeJitConstForParam(jit, "PADS_BEGIN", params.pads_begin, 0); + } + + if (params.end_type == base_params::ArgType::Input) { + jit.AddConstant(MakeJitConstant("END_TYPE", GetInputTypeStr(params.end_input_index))); + jit.AddConstant(MakeJitConstant("END_DIMS", params.end_dims)); + } else { + makeJitConstForParam(jit, "PADS_END", params.pads_end, 0); + } return jit; } @@ -93,7 +114,8 @@ KernelsData SpaceToBatchKernelBase::GetCommonKernelsData(const Params& params, c auto& kernel = kd.kernels[0]; FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, - "", false, false, 1, GetFusedPrimitiveInputsCount(params)); + "", false, false, static_cast(newParams.inputs.size()), + GetFusedPrimitiveInputsCount(params), 1, newParams.has_dynamic_tensors()); return { kd }; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/space_to_batch/space_to_batch_kernel_base.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/space_to_batch/space_to_batch_kernel_base.h index 98168158f5903b..05fd4fbea4910e 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/space_to_batch/space_to_batch_kernel_base.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/space_to_batch/space_to_batch_kernel_base.h @@ -17,6 +17,18 @@ struct space_to_batch_params : public base_params { DimTensor block_shape; DimTensor pads_begin; DimTensor pads_end; + + base_params::ArgType block_type = base_params::ArgType::Input; + base_params::ArgType begin_type = base_params::ArgType::Input; + base_params::ArgType end_type = base_params::ArgType::Input; + + size_t block_dims = 0; + size_t begin_dims = 0; + size_t end_dims = 0; + + size_t block_input_index = 0; + size_t begin_input_index = 0; + size_t end_input_index = 0; }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/space_to_batch/space_to_batch_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/space_to_batch/space_to_batch_kernel_ref.cpp index 1fec7175028557..cdb039fddf907c 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/space_to_batch/space_to_batch_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/space_to_batch/space_to_batch_kernel_ref.cpp @@ -14,11 +14,13 @@ ParamsKey SpaceToBatchKernelRef::GetSupportedKey() const { k.EnableInputDataType(Datatype::F32); k.EnableInputDataType(Datatype::UINT8); k.EnableInputDataType(Datatype::INT8); + k.EnableInputDataType(Datatype::INT32); k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::F32); k.EnableOutputDataType(Datatype::UINT8); k.EnableOutputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::INT32); k.EnableInputLayout(DataLayout::bfyx); k.EnableInputLayout(DataLayout::bfzyx); diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index e20da7f0ca3f17..b215700a046c8f 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -320,6 +320,7 @@ InferenceEngine::Parameter CompiledModel::GetMetric(const std::string &name) con ov::PropertyName{ov::intel_gpu::hint::queue_priority.name(), PropertyMutability::RO}, ov::PropertyName{ov::intel_gpu::hint::queue_throttle.name(), PropertyMutability::RO}, ov::PropertyName{ov::intel_gpu::enable_loop_unrolling.name(), PropertyMutability::RO}, + ov::PropertyName{ov::intel_gpu::disable_winograd_convolution.name(), PropertyMutability::RO}, ov::PropertyName{ov::cache_dir.name(), PropertyMutability::RO}, ov::PropertyName{ov::hint::performance_mode.name(), PropertyMutability::RO}, ov::PropertyName{ov::hint::execution_mode.name(), PropertyMutability::RO}, diff --git a/src/plugins/intel_gpu/src/plugin/ops/batch_to_space.cpp b/src/plugins/intel_gpu/src/plugin/ops/batch_to_space.cpp index f4a881345b2c6f..029caaac4ae7b5 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/batch_to_space.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/batch_to_space.cpp @@ -18,33 +18,52 @@ static void CreateBatchToSpaceOp(Program& p, const std::shared_ptrget_input_shape(0).size(); + auto rank = op->get_input_partial_shape(0).size(); auto format = cldnn::format::get_default_format(rank); std::vector tensor_inputs; tensor_inputs.reserve(3); + bool non_constant_input = false; for (size_t i = 1; i < 4; ++i) { auto inConst = std::dynamic_pointer_cast(op->get_input_node_shared_ptr(i)); - OPENVINO_ASSERT(inConst != nullptr, "[GPU] Unsupported parameter nodes type in ", op->get_friendly_name(), " (", op->get_type_name(), ")"); - std::vector sizes = inConst->cast_vector(); - int32_t default_size = i == 1 ? 1 : 0; - for (size_t s = sizes.size(); s < format.dimension(); s++) { - sizes.push_back(default_size); + bool is_const_input = (inConst != nullptr); + OPENVINO_ASSERT((i == 1) || (i >= 2 && non_constant_input != is_const_input), + "[GPU] Unsupported mixed node with constant and parameter in ", op->get_friendly_name(), " (", op->get_type_name(), ")"); + + if (!inConst) { + non_constant_input = true; } - tensor_inputs.emplace_back(format, sizes, default_size); } - auto out_size = tensor_from_dims(op->get_output_shape(0)); - auto batchToSpacePrim = cldnn::batch_to_space(layerName, - inputs[0], // input - tensor_inputs[0], // block_shape - tensor_inputs[1], // crops_begin - tensor_inputs[2], // crops_end - out_size); + auto output_pshape = op->get_output_partial_shape(0); + auto out_size = output_pshape.is_static() ? tensor_from_dims(output_pshape.to_shape()) : cldnn::tensor(); + + if (non_constant_input) { + auto batchToSpacePrim = cldnn::batch_to_space(layerName, inputs, out_size); + p.add_primitive(*op, batchToSpacePrim); + } else { + for (size_t i = 1; i < 4; ++i) { + auto inConst = std::dynamic_pointer_cast(op->get_input_node_shared_ptr(i)); + + std::vector sizes = inConst->cast_vector(); + int32_t default_size = i == 1 ? 1 : 0; + for (size_t s = sizes.size(); s < format.dimension(); s++) { + sizes.push_back(default_size); + } + tensor_inputs.emplace_back(format, sizes, default_size); + } + + auto batchToSpacePrim = cldnn::batch_to_space(layerName, + inputs[0], // input + tensor_inputs[0], // block_shape + tensor_inputs[1], // crops_begin + tensor_inputs[2], // crops_end + out_size); - p.add_primitive(*op, batchToSpacePrim); + p.add_primitive(*op, batchToSpacePrim); + } } REGISTER_FACTORY_IMPL(v1, BatchToSpace); diff --git a/src/plugins/intel_gpu/src/plugin/ops/random_uniform.cpp b/src/plugins/intel_gpu/src/plugin/ops/random_uniform.cpp index 18f7b7cc5089de..916c5520914d53 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/random_uniform.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/random_uniform.cpp @@ -15,17 +15,33 @@ namespace { void CreateRandomUniformOp(Program &p, const std::shared_ptr &op) { auto inputs = p.GetInputInfo(op); - auto output_shape = op->get_output_shape(0); - cldnn::format outputFormat = cldnn::format::get_default_format(output_shape.size()); - - auto random_uniform_prim = cldnn::random_uniform(layer_type_name_ID(op), - inputs, - cldnn::element_type_to_data_type(op->get_out_type()), - op->get_global_seed(), - op->get_op_seed(), - tensor_from_dims(output_shape), - outputFormat); - p.add_primitive(*op, random_uniform_prim); + auto input_pshape = op->get_input_partial_shape(0); + auto output_pshape = op->get_output_partial_shape(0); + + OPENVINO_ASSERT(input_pshape.is_static(), "[GPU] Dynamic input of RandomUniform leads to dynamic output rank, but GPU doesn't support it yet"); + + if (output_pshape.is_static() && !p.use_new_shape_infer()) { + auto output_shape = output_pshape.get_shape(); + // Extend to 4D shape + output_shape.insert(output_shape.end(), 4 - output_shape.size(), 1ul); + + auto random_uniform_prim = cldnn::random_uniform(layer_type_name_ID(op), + inputs, + cldnn::element_type_to_data_type(op->get_out_type()), + op->get_global_seed(), + op->get_op_seed(), + output_shape); + p.add_primitive(*op, random_uniform_prim); + } else { + OPENVINO_ASSERT(input_pshape.size() == 1, "[GPU] RandomUniform expects 1D input, got ", input_pshape.size()); + + auto random_uniform_prim = cldnn::random_uniform(layer_type_name_ID(op), + inputs, + cldnn::element_type_to_data_type(op->get_out_type()), + op->get_global_seed(), + op->get_op_seed()); + p.add_primitive(*op, random_uniform_prim); + } } } // namespace diff --git a/src/plugins/intel_gpu/src/plugin/ops/space_to_batch.cpp b/src/plugins/intel_gpu/src/plugin/ops/space_to_batch.cpp index 130f27276b4b80..070860c0d2ba4c 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/space_to_batch.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/space_to_batch.cpp @@ -24,30 +24,48 @@ static void CreateSpaceToBatchOp(Program& p, const std::shared_ptr tensor_inputs; tensor_inputs.reserve(3); + bool non_constant_input = false; for (size_t i = 1; i < 4; ++i) { auto inConst = std::dynamic_pointer_cast(op->get_input_node_shared_ptr(i)); - OPENVINO_ASSERT(inConst != nullptr, "[GPU] Unsupported parameter nodes type in ", op->get_friendly_name(), " (", op->get_type_name(), ")"); - std::vector sizes = inConst->cast_vector(); - int32_t default_size = i == 1 ? 1 : 0; - for (size_t s = sizes.size(); s < format.dimension(); s++) { - sizes.push_back(default_size); + bool is_const_input = (inConst != nullptr); + OPENVINO_ASSERT((i == 1) || (i >= 2 && non_constant_input != is_const_input), + "[GPU] Unsupported mixed node with constant and parameter in ", op->get_friendly_name(), " (", op->get_type_name(), ")"); + + if (!inConst) { + non_constant_input = true; } - tensor_inputs.emplace_back(format, sizes, default_size); } - auto output_pshape = op->get_output_partial_shape(0); + // In case of dynamic shapes pass dummy shape value to space_to_batch primitive // To be removed once we enable internal shape infer for all operations + auto output_pshape = op->get_output_partial_shape(0); auto out_size = output_pshape.is_static() ? tensor_from_dims(output_pshape.to_shape()) : cldnn::tensor(); - auto spaceToBatchPrim = cldnn::space_to_batch(layerName, - inputs[0], // input - tensor_inputs[0], // block_shape - tensor_inputs[1], // crops_begin - tensor_inputs[2], // crops_end - out_size); + if (non_constant_input) { + auto spaceToBatchPrim = cldnn::space_to_batch(layerName, inputs, out_size); + p.add_primitive(*op, spaceToBatchPrim); + } else { + for (size_t i = 1; i < 4; ++i) { + auto inConst = std::dynamic_pointer_cast(op->get_input_node_shared_ptr(i)); - p.add_primitive(*op, spaceToBatchPrim); + std::vector sizes = inConst->cast_vector(); + int32_t default_size = i == 1 ? 1 : 0; + for (size_t s = sizes.size(); s < format.dimension(); s++) { + sizes.push_back(default_size); + } + tensor_inputs.emplace_back(format, sizes, default_size); + } + + auto spaceToBatchPrim = cldnn::space_to_batch(layerName, + inputs[0], // input data + tensor_inputs[0], // block_shape + tensor_inputs[1], // crops_begin + tensor_inputs[2], // crops_end + out_size); + + p.add_primitive(*op, spaceToBatchPrim); + } } REGISTER_FACTORY_IMPL(v1, SpaceToBatch); diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 4fa41fc4c53df0..a8c15b854607d4 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -746,6 +746,7 @@ std::vector Plugin::get_supported_properties() const { ov::PropertyName{ov::intel_gpu::hint::queue_priority.name(), PropertyMutability::RW}, ov::PropertyName{ov::intel_gpu::hint::queue_throttle.name(), PropertyMutability::RW}, ov::PropertyName{ov::intel_gpu::enable_loop_unrolling.name(), PropertyMutability::RW}, + ov::PropertyName{ov::intel_gpu::disable_winograd_convolution.name(), PropertyMutability::RW}, ov::PropertyName{ov::cache_dir.name(), PropertyMutability::RW}, ov::PropertyName{ov::hint::performance_mode.name(), PropertyMutability::RW}, ov::PropertyName{ov::hint::execution_mode.name(), PropertyMutability::RW}, diff --git a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp index c798626f6a7b8c..084d0095dd27c3 100644 --- a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp +++ b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp @@ -134,9 +134,13 @@ static void print_help_messages() { " For primitives fc, gemm, do, reduce, concat are supported. Separated by space."); message_list.emplace_back("OV_GPU_MaxKernelsPerBatch", "Maximum number of kernels in a batch during compiling kernels"); message_list.emplace_back("OV_GPU_DisableAsyncCompilation", "Disable async compilation"); + message_list.emplace_back("OV_GPU_DisableWinogradConv", "Disable Winograd convolution"); message_list.emplace_back("OV_GPU_DisableDynamicImpl", "Disable dynamic implementation"); message_list.emplace_back("OV_GPU_DisableRuntimeBufferFusing", "Disable runtime buffer fusing"); message_list.emplace_back("OV_GPU_DisableMemoryReuse", "Disable memory reuse"); + message_list.emplace_back("OV_GPU_DisableBuildTimeWeightReorderForDynamicNodes", "Disable build time weight reorder for dynmaic nodes."); + message_list.emplace_back("OV_GPU_DisableRuntimeSkipReorder", "Disable runtime skip reorder."); + message_list.emplace_back("OV_GPU_DisablePrimitiveFusing", "Disable primitive fusing"); message_list.emplace_back("OV_GPU_DumpIteration", "Dump n-th execution of network, separated by space."); message_list.emplace_back("OV_GPU_MemPreallocationOptions", "Controls buffer pre-allocation feature. Expects 4 values separated by space in" "the following order: number of iterations for pre-allocation(int), max size of single iteration in bytes(int), " @@ -187,11 +191,13 @@ debug_configuration::debug_configuration() , serialize_compile(0) , max_kernels_per_batch(0) , disable_async_compilation(0) + , disable_winograd_conv(0) , disable_dynamic_impl(0) , disable_runtime_buffer_fusing(0) , disable_memory_reuse(0) , disable_build_time_weight_reorder_for_dynamic_nodes(0) - , disable_runtime_skip_reorder(0) { + , disable_runtime_skip_reorder(0) + , disable_primitive_fusing(0) { #ifdef GPU_DEBUG_CONFIG get_gpu_debug_env_var("Help", help); get_common_debug_env_var("Verbose", verbose); @@ -223,11 +229,13 @@ debug_configuration::debug_configuration() get_gpu_debug_env_var("ForceImplTypes", forced_impl_types_str); get_gpu_debug_env_var("MaxKernelsPerBatch", max_kernels_per_batch); get_gpu_debug_env_var("DisableAsyncCompilation", disable_async_compilation); + get_gpu_debug_env_var("DisableWinogradConv", disable_winograd_conv); get_gpu_debug_env_var("DisableDynamicImpl", disable_dynamic_impl); get_gpu_debug_env_var("DisableRuntimeBufferFusing", disable_runtime_buffer_fusing); get_gpu_debug_env_var("DisableMemoryReuse", disable_memory_reuse); get_gpu_debug_env_var("DisableBuildTimeWeightReorderForDynamicNodes", disable_build_time_weight_reorder_for_dynamic_nodes); get_gpu_debug_env_var("DisableRuntimeSkipReorder", disable_runtime_skip_reorder); + get_gpu_debug_env_var("DisablePrimitiveFusing", disable_primitive_fusing); std::string dump_iteration_str; get_gpu_debug_env_var("DumpIteration", dump_iteration_str); std::string mem_preallocation_params_str; diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 49d82e6a7be88b..e1375ef14ddb47 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -53,6 +53,7 @@ void ExecutionConfig::set_default() { std::make_tuple(ov::intel_gpu::hint::queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM), std::make_tuple(ov::intel_gpu::hint::queue_priority, ov::hint::Priority::MEDIUM), std::make_tuple(ov::intel_gpu::enable_loop_unrolling, true), + std::make_tuple(ov::intel_gpu::disable_winograd_convolution, false), std::make_tuple(ov::internal::exclusive_async_requests, false), // Legacy API properties diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp index 793027dcf5c1d2..fcab83a4be4cbc 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp @@ -436,18 +436,24 @@ event::ptr gpu_usm::fill(stream& stream) { event::ptr gpu_usm::copy_from(stream& stream, const memory& other, bool blocking) { auto& cl_stream = downcast(stream); - auto& casted = downcast(other); - auto dst_ptr = get_buffer().get(); - auto src_ptr = casted.get_buffer().get(); auto ev = blocking ? stream.create_user_event(true) : stream.create_base_event(); cl::Event* ev_ocl = blocking ? nullptr : &downcast(ev.get())->get(); - cl_stream.get_usm_helper().enqueue_memcpy(cl_stream.get_cl_queue(), - dst_ptr, - src_ptr, - _bytes_count, - blocking, - nullptr, - ev_ocl); + if (other.get_allocation_type() == allocation_type::cl_mem) { + // Copy cl_mem to usm_memory by cl::CommandQueue::enqueueReadBuffer() + auto& mem_inst = downcast(other); + cl_stream.get_cl_queue().enqueueReadBuffer(mem_inst.get_buffer(), blocking, 0, size(), this->buffer_ptr(), nullptr, ev_ocl); + } else { + auto& casted = downcast(other); + auto dst_ptr = get_buffer().get(); + auto src_ptr = casted.get_buffer().get(); + cl_stream.get_usm_helper().enqueue_memcpy(cl_stream.get_cl_queue(), + dst_ptr, + src_ptr, + _bytes_count, + blocking, + nullptr, + ev_ocl); + } return ev; } diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/batch_to_space.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/batch_to_space.cpp new file mode 100644 index 00000000000000..bb2bb3341b0538 --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/batch_to_space.cpp @@ -0,0 +1,227 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_layer/batch_to_space.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "ngraph_functions/builders.hpp" +#include "common_test_utils/test_constants.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" + +using namespace InferenceEngine; +using namespace ov::test; + +namespace GPULayerTestsDefinitions { + +struct BatchToSpaceParams { + std::vector block; + std::vector begin; + std::vector end; +}; + +typedef std::tuple< + InputShape, // Input shapes + BatchToSpaceParams, + ElementType, // Element type + ngraph::helpers::InputLayerType, // block/begin/end input type + std::map // Additional network configuration +> BatchToSpaceParamsLayerParamSet; + +class BatchToSpaceLayerGPUTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + InputShape shapes; + BatchToSpaceParams params; + ElementType elementType; + ngraph::helpers::InputLayerType restInputType; + TargetDevice targetDevice; + std::map additionalConfig; + std::tie(shapes, params, elementType, restInputType, additionalConfig) = obj.param; + + std::ostringstream results; + results << "IS=" << ov::test::utils::partialShape2str({shapes.first}) << "_"; + results << "TS="; + for (const auto& item : shapes.second) { + results << ov::test::utils::vec2str(item) << "_"; + } + results << "netPRC=" << elementType << "_"; + results << "block=" << ov::test::utils::vec2str(params.block) << "_"; + results << "begin=" << ov::test::utils::vec2str(params.begin) << "_"; + results << "end=" << ov::test::utils::vec2str(params.end) << "_"; + results << "restInputType=" << restInputType << "_"; + results << "config=("; + for (const auto& configEntry : additionalConfig) { + results << configEntry.first << ", " << configEntry.second << ":"; + } + results << ")"; + + return results.str(); + } + + void generate_inputs(const std::vector& targetInputStaticShapes) override { + inputs.clear(); + const auto& funcInputs = function->inputs(); + for (size_t i = 0; i < funcInputs.size(); ++i) { + const auto& funcInput = funcInputs[i]; + ov::Tensor tensor; + if (i == 1) { + tensor = ov::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i]); + auto *dataPtr = tensor.data(); + for (size_t i = 0; i < block.size(); i++) { + dataPtr[i] = static_cast(block[i]); + } + } else if (i == 2) { + tensor = ov::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i]); + auto *dataPtr = tensor.data(); + for (size_t i = 0; i < begin.size(); i++) { + dataPtr[i] = static_cast(begin[i]); + } + } else if (i == 3) { + tensor = ov::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i]); + auto *dataPtr = tensor.data(); + for (size_t i = 0; i < end.size(); i++) { + dataPtr[i] = static_cast(end[i]); + } + } else { + tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]); + } + inputs.insert({funcInput.get_node_shared_ptr(), tensor}); + } + inferRequestNum++; + } + +protected: + std::vector block; + std::vector begin; + std::vector end; + size_t inferRequestNum = 0; + + void SetUp() override { + InputShape shapes; + BatchToSpaceParams ssParams; + ngraph::helpers::InputLayerType restInputType; + std::map additionalConfig; + std::tie(shapes, ssParams, inType, restInputType, additionalConfig) = this->GetParam(); + + block = ssParams.block; + begin = ssParams.begin; + end = ssParams.end; + + targetDevice = ov::test::utils::DEVICE_GPU; + + std::vector inputShapes; + inputShapes.push_back(shapes); + if (restInputType == ngraph::helpers::InputLayerType::PARAMETER) { + inputShapes.push_back(InputShape({static_cast(block.size())}, std::vector(shapes.second.size(), {block.size()}))); + inputShapes.push_back(InputShape({static_cast(begin.size())}, std::vector(shapes.second.size(), {begin.size()}))); + inputShapes.push_back(InputShape({static_cast(end.size())}, std::vector(shapes.second.size(), {end.size()}))); + } + + init_input_shapes(inputShapes); + + auto params = ngraph::builder::makeDynamicParams(inType, {inputDynamicShapes.front()}); + std::shared_ptr blockInput, beginInput, endInput; + if (restInputType == ngraph::helpers::InputLayerType::PARAMETER) { + auto blockNode = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{block.size()}); + auto beginNode = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{begin.size()}); + auto endNode = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{end.size()}); + + params.push_back(blockNode); + params.push_back(beginNode); + params.push_back(endNode); + + blockInput = blockNode; + beginInput = beginNode; + endInput = endNode; + } else { + blockInput = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{block.size()}, block); + beginInput = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{begin.size()}, begin); + endInput = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{end.size()}, end); + } + auto ss = std::make_shared(params[0], blockInput, beginInput, endInput); + + ngraph::ResultVector results; + for (size_t i = 0; i < ss->get_output_size(); i++) { + results.push_back(std::make_shared(ss->output(i))); + } + + function = std::make_shared(results, params, "BatchToSpaceFuncTest"); + } +}; + +TEST_P(BatchToSpaceLayerGPUTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + run(); +} + +namespace { + +std::map emptyAdditionalConfig; + +const std::vector inputPrecisions = { + ElementType::f32 +}; + +const std::vector restInputTypes = { + ngraph::helpers::InputLayerType::CONSTANT, + ngraph::helpers::InputLayerType::PARAMETER +}; + +const std::vector inputShapesDynamic3D = { + {{-1, -1, -1}, {{48, 3, 3}, {24, 4, 5}}}, +}; + +const std::vector paramsPlain3D = { + BatchToSpaceParams{ { 1, 2, 4 }, { 0, 0, 1 }, { 0, 0, 1 } }, + BatchToSpaceParams{ { 1, 3, 2 }, { 0, 1, 0 }, { 0, 2, 1 } }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Plain_Dynamic_3D, BatchToSpaceLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(inputShapesDynamic3D), + ::testing::ValuesIn(paramsPlain3D), + ::testing::ValuesIn(inputPrecisions), + ::testing::ValuesIn(restInputTypes), + ::testing::Values(emptyAdditionalConfig)), + BatchToSpaceLayerGPUTest::getTestCaseName); + +const std::vector inputShapesDynamic4D = { + {{-1, -1, -1, -1}, {{48, 3, 3, 1}, {24, 4, 5, 6}}}, +}; + +const std::vector paramsPlain4D = { + BatchToSpaceParams{ { 1, 2, 4, 3 }, { 0, 0, 1, 0 }, { 0, 0, 1, 0 } }, + BatchToSpaceParams{ { 1, 3, 2, 4 }, { 0, 1, 0, 1 }, { 0, 2, 1, 3 } }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Plain_Dynamic_4D, BatchToSpaceLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(inputShapesDynamic4D), + ::testing::ValuesIn(paramsPlain4D), + ::testing::ValuesIn(inputPrecisions), + ::testing::ValuesIn(restInputTypes), + ::testing::Values(emptyAdditionalConfig)), + BatchToSpaceLayerGPUTest::getTestCaseName); + +const std::vector inputShapesDynamic5D = { + {{-1, -1, -1, -1, -1}, {{48, 3, 3, 1, 5}, {96, 4, 5, 6, 7}}}, +}; + +const std::vector paramsPlain5D = { + BatchToSpaceParams{ { 1, 2, 4, 3, 2 }, { 0, 0, 1, 0, 2 }, { 0, 0, 1, 0, 3 } }, + BatchToSpaceParams{ { 1, 3, 2, 4, 2 }, { 0, 1, 0, 1, 3 }, { 0, 2, 1, 3, 2 } }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Plain_Dynamic_5D, BatchToSpaceLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(inputShapesDynamic5D), + ::testing::ValuesIn(paramsPlain5D), + ::testing::ValuesIn(inputPrecisions), + ::testing::ValuesIn(restInputTypes), + ::testing::Values(emptyAdditionalConfig)), + BatchToSpaceLayerGPUTest::getTestCaseName); + +} // namespace +} // namespace GPULayerTestsDefinitions diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/random_uniform.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/random_uniform.cpp new file mode 100644 index 00000000000000..de0a2321658e5e --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/random_uniform.cpp @@ -0,0 +1,208 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ngraph_functions/builders.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" + +using namespace ngraph; +using namespace ov::test; + +namespace GPULayerTestsDefinitions { + +typedef std::tuple< + std::vector, // Input shapes + std::pair, // Min value, Max value + std::pair, // Global seed, operation seed + ElementType, // Network precision + TargetDevice, // Device name + std::map // Additional network configuration +> RandomUnifromDynamicGPUTestParamsSet; + +class RandomUnifromDynamicGPUTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + RandomUnifromDynamicGPUTestParamsSet basicParamsSet = obj.param; + std::ostringstream result; + std::vector input_shapes; + std::pair min_max_values; + std::pair seeds; + ElementType precision; + TargetDevice target_device; + std::map additionalConfig; + std::tie(input_shapes, min_max_values, seeds, precision, target_device, additionalConfig) = basicParamsSet; + + result << "shape="; + for (const auto& shape : input_shapes) { + result << ov::test::utils::partialShape2str({shape.first}) << "_"; + for (const auto& actual_shape : shape.second) { + result << ov::test::utils::partialShape2str({actual_shape}) << "_"; + } + } + result << "precision=" << precision << "_"; + result << "min_max_values=" << min_max_values.first << "_" << min_max_values.second << "_"; + result << "seeds=" << seeds.first << "_" << seeds.second << "_"; + result << "target_device=" << target_device; + return result.str(); + } + +protected: + void init_input_shapes(const std::vector& shapes) { + if (shapes.empty()) { + targetStaticShapes = {{}}; + return; + } + size_t targetStaticShapeSize = shapes.front().second.size(); + for (size_t i = 1; i < shapes.size(); ++i) { + if (targetStaticShapeSize < shapes[i].second.size()) { + targetStaticShapeSize = shapes[i].second.size(); + } + } + targetStaticShapes.resize(targetStaticShapeSize); + + for (const auto& shape : shapes) { + auto dynShape = shape.first; + inputDynamicShapes.push_back(dynShape); + for (size_t i = 0; i < targetStaticShapeSize; ++i) { + targetStaticShapes[i].push_back(i < shape.second.size() ? shape.second.at(i) : shape.second.back()); + } + } + } + + template + void set_tensor_value(T scalar, ov::Tensor& tensor) { + #define CASE(X) \ + case X: { \ + auto *dataPtr = tensor.data::value_type>(); \ + dataPtr[0] = static_cast::value_type>(scalar); \ + break; \ + } + + switch (tensor.get_element_type()) { + CASE(ElementType::boolean) + CASE(ElementType::i8) + CASE(ElementType::i16) + CASE(ElementType::i32) + CASE(ElementType::i64) + CASE(ElementType::u8) + CASE(ElementType::u16) + CASE(ElementType::u32) + CASE(ElementType::u64) + CASE(ElementType::bf16) + CASE(ElementType::f16) + CASE(ElementType::f32) + CASE(ElementType::f64) + CASE(ElementType::u1) + CASE(ElementType::i4) + CASE(ElementType::u4) + default: OPENVINO_THROW("Unsupported element type: ", tensor.get_element_type()); + } + } + + void generate_inputs(const std::vector& targetInputStaticShapes) override { + inputs.clear(); + const auto& funcInputs = function->inputs(); + + auto generate_input = [&](size_t index, ElementType element_type) { + ov::Tensor tensor(element_type, targetInputStaticShapes[index]); + if (index != 0) { + auto scalar_val = index == 1 ? min_max_values.first : min_max_values.second; + set_tensor_value(scalar_val, tensor); + } + inputs.insert({funcInputs[index].get_node_shared_ptr(), tensor}); + }; + + for (size_t i = 0; i < targetInputStaticShapes.size(); ++i) + generate_input(i, funcInputs[i].get_element_type()); + } + + void SetUp() override { + RandomUnifromDynamicGPUTestParamsSet basicParamsSet = this->GetParam(); + std::vector shapes; + ElementType netType; + std::map additionalConfig; + std::pair seeds; + + ov::ParameterVector params; + std::tie(shapes, min_max_values, seeds, netType, targetDevice, additionalConfig) = basicParamsSet; + + init_input_shapes(shapes); + + params = builder::makeDynamicParams(netType, inputDynamicShapes); + + const auto shape_of = std::make_shared(params[0]); + const auto random_uniform = std::make_shared(shape_of, params[1], params[2], netType, seeds.first, seeds.second); + + ov::ResultVector results = {std::make_shared(random_uniform)}; + function = std::make_shared(results, params, "random_uniform_test"); + } + + precisions_map get_ref_precisions_convert_map() override { + // Do not convert reference function from FP16 to FP32 precision, since in case of RandomUniform operation + // data type is matter + return {}; + } + +private: + std::pair min_max_values; +}; + + +TEST_P(RandomUnifromDynamicGPUTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + run(); +} + +namespace { +std::map emptyAdditionalConfig; +const std::vector> dynInputShapes = { + { + {{ov::PartialShape::dynamic(4)}, {{1, 2, 3, 4}, {1, 1, 5, 5}, {2, 3, 4, 5}}}, + {{1}, {{1}}}, + {{1}, {{1}}} + }, + { + {{ov::PartialShape::dynamic(3)}, {{1, 2, 3}, {1, 1, 5}, {2, 3, 4}}}, + {{1}, {{1}}}, + {{1}, {{1}}} + }, + { + {{ov::PartialShape::dynamic(2)}, {{1, 2}, {1, 1}, {2, 3}}}, + {{1}, {{1}}}, + {{1}, {{1}}} + }, + { + {{ov::PartialShape::dynamic(1)}, {{1}, {2}, {3}}}, + {{1}, {{1}}}, + {{1}, {{1}}} + }, +}; + +const std::vector> min_max_values = { + {10, 30}, +}; + +const std::vector> seeds = { + {100, 10}, +}; + +const std::vector netPrecisions = { + ElementType::i32, + ElementType::f32, + ElementType::f16, +}; + +const auto testParams_smoke = ::testing::Combine(::testing::ValuesIn(dynInputShapes), + ::testing::ValuesIn(min_max_values), + ::testing::ValuesIn(seeds), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(ov::test::utils::DEVICE_GPU), + ::testing::Values(emptyAdditionalConfig)); + +INSTANTIATE_TEST_SUITE_P(smoke_dynamic_random_uniform, RandomUnifromDynamicGPUTest, + testParams_smoke, RandomUnifromDynamicGPUTest::getTestCaseName); + +} // namespace +} // namespace GPULayerTestsDefinitions diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/space_to_batch.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/space_to_batch.cpp new file mode 100644 index 00000000000000..4a5ac5191e4619 --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/space_to_batch.cpp @@ -0,0 +1,228 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_layer/space_to_batch.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "ngraph_functions/builders.hpp" +#include "common_test_utils/test_constants.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" + +using namespace InferenceEngine; +using namespace ov::test; + +namespace GPULayerTestsDefinitions { + +struct SpaceToBatchParams { + std::vector block; + std::vector begin; + std::vector end; +}; + +typedef std::tuple< + InputShape, // Input shapes + SpaceToBatchParams, + ElementType, // Element type + ngraph::helpers::InputLayerType, // block/begin/end input type + std::map // Additional network configuration +> SpaceToBatchParamsLayerParamSet; + +class SpaceToBatchLayerGPUTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + InputShape shapes; + SpaceToBatchParams params; + ElementType elementType; + ngraph::helpers::InputLayerType restInputType; + TargetDevice targetDevice; + std::map additionalConfig; + std::tie(shapes, params, elementType, restInputType, additionalConfig) = obj.param; + + std::ostringstream results; + results << "IS=" << ov::test::utils::partialShape2str({shapes.first}) << "_"; + results << "TS="; + for (const auto& item : shapes.second) { + results << ov::test::utils::vec2str(item) << "_"; + } + results << "netPRC=" << elementType << "_"; + results << "block=" << ov::test::utils::vec2str(params.block) << "_"; + results << "begin=" << ov::test::utils::vec2str(params.begin) << "_"; + results << "end=" << ov::test::utils::vec2str(params.end) << "_"; + results << "restInputType=" << restInputType << "_"; + results << "config=("; + for (const auto& configEntry : additionalConfig) { + results << configEntry.first << ", " << configEntry.second << ":"; + } + results << ")"; + + return results.str(); + } + + void generate_inputs(const std::vector& targetInputStaticShapes) override { + inputs.clear(); + const auto& funcInputs = function->inputs(); + for (size_t i = 0; i < funcInputs.size(); ++i) { + const auto& funcInput = funcInputs[i]; + ov::Tensor tensor; + if (i == 1) { + tensor = ov::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i]); + auto *dataPtr = tensor.data(); + for (size_t i = 0; i < block.size(); i++) { + dataPtr[i] = static_cast(block[i]); + } + } else if (i == 2) { + tensor = ov::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i]); + auto *dataPtr = tensor.data(); + for (size_t i = 0; i < begin.size(); i++) { + dataPtr[i] = static_cast(begin[i]); + } + } else if (i == 3) { + tensor = ov::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i]); + auto *dataPtr = tensor.data(); + for (size_t i = 0; i < end.size(); i++) { + dataPtr[i] = static_cast(end[i]); + } + } else { + tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]); + } + inputs.insert({funcInput.get_node_shared_ptr(), tensor}); + } + inferRequestNum++; + } + +protected: + std::vector block; + std::vector begin; + std::vector end; + size_t inferRequestNum = 0; + + void SetUp() override { + InputShape shapes; + SpaceToBatchParams ssParams; + ngraph::helpers::InputLayerType restInputType; + std::map additionalConfig; + std::tie(shapes, ssParams, inType, restInputType, additionalConfig) = this->GetParam(); + + block = ssParams.block; + begin = ssParams.begin; + end = ssParams.end; + + targetDevice = ov::test::utils::DEVICE_GPU; + + std::vector inputShapes; + inputShapes.push_back(shapes); + if (restInputType == ngraph::helpers::InputLayerType::PARAMETER) { + inputShapes.push_back(InputShape({static_cast(block.size())}, std::vector(shapes.second.size(), {block.size()}))); + inputShapes.push_back(InputShape({static_cast(begin.size())}, std::vector(shapes.second.size(), {begin.size()}))); + inputShapes.push_back(InputShape({static_cast(end.size())}, std::vector(shapes.second.size(), {end.size()}))); + } + + init_input_shapes(inputShapes); + + auto params = ngraph::builder::makeDynamicParams(inType, {inputDynamicShapes.front()}); + std::shared_ptr blockInput, beginInput, endInput; + if (restInputType == ngraph::helpers::InputLayerType::PARAMETER) { + auto blockNode = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{block.size()}); + auto beginNode = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{begin.size()}); + auto endNode = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{end.size()}); + + params.push_back(blockNode); + params.push_back(beginNode); + params.push_back(endNode); + + blockInput = blockNode; + beginInput = beginNode; + endInput = endNode; + } else { + blockInput = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{block.size()}, block); + beginInput = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{begin.size()}, begin); + endInput = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{end.size()}, end); + } + auto ss = std::make_shared(params[0], blockInput, beginInput, endInput); + + ngraph::ResultVector results; + for (size_t i = 0; i < ss->get_output_size(); i++) { + results.push_back(std::make_shared(ss->output(i))); + } + + function = std::make_shared(results, params, "SpaceToBatchFuncTest"); + } +}; + +TEST_P(SpaceToBatchLayerGPUTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + run(); +} + +namespace { + +std::map emptyAdditionalConfig; + +const std::vector inputPrecisions = { + ElementType::f32 +}; + +const std::vector restInputTypes = { + ngraph::helpers::InputLayerType::CONSTANT, + ngraph::helpers::InputLayerType::PARAMETER +}; + +const std::vector inputShapesDynamic3D = { + {{-1, -1, -1}, {{2, 3, 6}}}, +}; + +const std::vector paramsPlain3D = { + SpaceToBatchParams{ { 1, 2, 3 }, { 0, 2, 2 }, { 0, 3, 1 } }, + SpaceToBatchParams{ { 1, 4, 5 }, { 0, 4, 5 }, { 0, 9, 4 } }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Dynamic3D, SpaceToBatchLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(inputShapesDynamic3D), + ::testing::ValuesIn(paramsPlain3D), + ::testing::ValuesIn(inputPrecisions), + ::testing::ValuesIn(restInputTypes), + ::testing::Values(emptyAdditionalConfig)), + SpaceToBatchLayerGPUTest::getTestCaseName); + + +const std::vector inputShapesDynamic4D = { + {{-1, -1, -1, -1}, {{2, 3, 6, 5}}}, +}; + +const std::vector paramsPlain4D = { + SpaceToBatchParams{ { 1, 1, 2, 3 }, { 0, 2, 2, 2 }, { 0, 3, 4, 5 } }, + SpaceToBatchParams{ { 1, 1, 4, 5 }, { 0, 2, 4, 5 }, { 0, 3, 2, 5 } }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Dynamic4D, SpaceToBatchLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(inputShapesDynamic4D), + ::testing::ValuesIn(paramsPlain4D), + ::testing::ValuesIn(inputPrecisions), + ::testing::ValuesIn(restInputTypes), + ::testing::Values(emptyAdditionalConfig)), + SpaceToBatchLayerGPUTest::getTestCaseName); + +const std::vector inputShapesDynamic5D = { + {{-1, -1, -1, -1, -1}, {{2, 3, 6, 5, 7}}}, +}; + +const std::vector paramsPlain5D = { + SpaceToBatchParams{ { 1, 1, 2, 3, 7 }, { 0, 2, 2, 2, 4 }, { 0, 3, 4, 5, 3 } }, + SpaceToBatchParams{ { 1, 1, 4, 5, 8 }, { 0, 2, 4, 5, 5 }, { 0, 3, 2, 5, 4 } }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Dynamic5D, SpaceToBatchLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(inputShapesDynamic5D), + ::testing::ValuesIn(paramsPlain5D), + ::testing::ValuesIn(inputPrecisions), + ::testing::ValuesIn(restInputTypes), + ::testing::Values(emptyAdditionalConfig)), + SpaceToBatchLayerGPUTest::getTestCaseName); + +} // namespace +} // namespace GPULayerTestsDefinitions diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/split.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/split.cpp index 67ada972290441..2e6274503c2b21 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/split.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/split.cpp @@ -6,6 +6,7 @@ #include "shared_test_classes/base/ov_subgraph.hpp" #include "ie_precision.hpp" #include "ngraph_functions/builders.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" #include using namespace ngraph; @@ -128,10 +129,11 @@ INSTANTIATE_TEST_SUITE_P(smoke_SplitsCheck6D, SplitLayerGPUDynamicTest, SplitLayerGPUDynamicTest::getTestCaseName); typedef std::tuple< - int64_t, // Axis - std::vector, // SplitLength - ElementType, // Net precision - InputShape // Input shapes + int64_t, // Axis + std::vector, // SplitLength + ElementType, // Net precision + InputShape, // Input shapes + ngraph::helpers::InputLayerType // input type of splitLength > varSplitDynamicGPUTestParams; class VariadicSplitLayerGPUDynamicTest : public testing::WithParamInterface, @@ -143,7 +145,8 @@ class VariadicSplitLayerGPUDynamicTest : public testing::WithParamInterface splitLength; ElementType netPrecision; InputShape inputShape; - std::tie(axis, splitLength, netPrecision, inputShape) = obj.param; + ngraph::helpers::InputLayerType inputType; + std::tie(axis, splitLength, netPrecision, inputShape, inputType) = obj.param; result << "IS="; result << ov::test::utils::partialShape2str({inputShape.first}) << "_"; @@ -154,24 +157,66 @@ class VariadicSplitLayerGPUDynamicTest : public testing::WithParamInterface& targetInputStaticShapes) override { + inputs.clear(); + const auto& funcInputs = function->inputs(); + for (size_t i = 0; i < funcInputs.size(); ++i) { + const auto& funcInput = funcInputs[i]; + ov::Tensor tensor; + if (i == 1) { + tensor = ov::Tensor(ov::element::i64, targetInputStaticShapes[i]); + auto *dataPtr = tensor.data::value_type>(); + for (size_t i = 0; i < splitLength_vec.size(); i++) { + dataPtr[i] = splitLength_vec[i]; + } + } else { + tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]); + } + inputs.insert({funcInput.get_node_shared_ptr(), tensor}); + } + inferRequestNum++; + } + protected: + std::vector splitLength_vec; + size_t inferRequestNum = 0; + ElementType netPrecision; + void SetUp() override { targetDevice = ov::test::utils::DEVICE_GPU; int64_t axis; InputShape inputShape; std::vector splitLength; - ElementType netPrecision; - std::tie(axis, splitLength, netPrecision, inputShape) = this->GetParam(); - init_input_shapes({inputShape}); + ngraph::helpers::InputLayerType inputType; + std::tie(axis, splitLength, netPrecision, inputShape, inputType) = this->GetParam(); + + splitLength_vec = splitLength; + + std::vector inputShapes; + inputShapes.push_back(inputShape); + if (inputType == ngraph::helpers::InputLayerType::PARAMETER) { + inputShapes.push_back(InputShape({static_cast(splitLength.size())}, + std::vector(inputShape.second.size(), {splitLength.size()}))); + } + init_input_shapes(inputShapes); + auto dyn_params = ngraph::builder::makeDynamicParams(netPrecision, {inputDynamicShapes[0]}); - auto paramOuts = - ngraph::helpers::convert2OutputVector(helpers::castOps2Nodes(dyn_params)); + auto paramOuts = ngraph::helpers::convert2OutputVector(helpers::castOps2Nodes(dyn_params)); auto splitAxisOp = std::make_shared(ngraph::element::i64, ngraph::Shape{}, std::vector{static_cast(axis)}); - auto splitLengthOp = std::make_shared(ngraph::element::i32, ngraph::Shape{splitLength.size()}, splitLength); + + std::shared_ptr splitLengthOp; + if (inputType == ngraph::helpers::InputLayerType::PARAMETER) { + auto splitLengthNode = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{splitLength.size()}); + dyn_params.push_back(splitLengthNode); + splitLengthOp = splitLengthNode; + } else { + splitLengthOp = std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape{splitLength.size()}, splitLength); + } auto varSplit = std::make_shared(paramOuts[0], splitAxisOp, splitLengthOp); ngraph::ResultVector results; @@ -187,12 +232,18 @@ TEST_P(VariadicSplitLayerGPUDynamicTest, CompareWithRefs) { run(); } +const std::vector restInputTypes = { + ngraph::helpers::InputLayerType::CONSTANT, + ngraph::helpers::InputLayerType::PARAMETER +}; + INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplitsCheck4D, VariadicSplitLayerGPUDynamicTest, ::testing::Combine( ::testing::Values(1), // axes ::testing::Values(std::vector{2, 1, -1}), // splitLength ::testing::Values(ElementType::f16), // netPrec - ::testing::ValuesIn(inputShapes4d)), // inShapes + ::testing::ValuesIn(inputShapes4d), // inShapes + ::testing::ValuesIn(restInputTypes)), // input type of splitLength VariadicSplitLayerGPUDynamicTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplitsCheck5D, VariadicSplitLayerGPUDynamicTest, @@ -200,7 +251,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplitsCheck5D, VariadicSplitLayerGPUDynam ::testing::Values(2), // axes ::testing::Values(std::vector{2, -1}), // splitLength ::testing::Values(ElementType::f32), // netPrec - ::testing::ValuesIn(inputShapes5d)), // inShapes + ::testing::ValuesIn(inputShapes5d), // inShapes + ::testing::ValuesIn(restInputTypes)), // input type of splitLength VariadicSplitLayerGPUDynamicTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplitsCheck6D, VariadicSplitLayerGPUDynamicTest, @@ -208,7 +260,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplitsCheck6D, VariadicSplitLayerGPUDynam ::testing::Values(5), // nSplits ::testing::Values(std::vector{2, 3, 2, -1}), // splitLength ::testing::Values(ElementType::i8), // netPrec - ::testing::ValuesIn(inputShapes6d)), // inShapes + ::testing::ValuesIn(inputShapes6d), // inShapes + ::testing::ValuesIn(restInputTypes)), // input type of splitLength VariadicSplitLayerGPUDynamicTest::getTestCaseName); } // namespace GPULayerTestsDefinitions diff --git a/src/plugins/intel_gpu/tests/unit/dynamic_execution/is_valid_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/dynamic_execution/is_valid_fusion_test.cpp index 2f585e1b4f227b..eb7ff40c391762 100644 --- a/src/plugins/intel_gpu/tests/unit/dynamic_execution/is_valid_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/dynamic_execution/is_valid_fusion_test.cpp @@ -21,7 +21,7 @@ TEST(eltwise_activation_fusing_test, basic_dynamic_rank4) { // is_valid_fusion() should work properly when conv->add->prelu case auto& engine = get_test_engine(); - layout weight_layout = layout{ov::PartialShape{1, 3, 3, 3}, data_types::f32, format::bfyx}; + layout weight_layout = layout{ov::PartialShape{1, 3, 3, 3}, data_types::f16, format::bfyx}; auto weights = engine.allocate_memory(weight_layout); set_values(weights, { 1.0f, 1.0f, 1.0f, @@ -42,11 +42,11 @@ TEST(eltwise_activation_fusing_test, basic_dynamic_rank4) { set_values(input_mem, {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f}); - std::vector ref = { 33.0625f, 55.09375f, 55.09375f, 33.0625f, - 55.09375f, 99.1875f, 429.75f, 385.75f, - 385.75f, 760.5f, 1091.0f, 716.5f, - 363.75f, 716.5f, 716.5f, 363.75f}; - + std::vector ref = { 77.0f, 143.0f, 143.0f, 77.0f, + 143.0f, 275.0f, 275.0f, 143.0f, + 143.0f, 275.0f, 275.0f, 143.0f, + 77.0f, 143.0f, 143.0f, 77.0f }; + auto const1 = engine.allocate_memory(layout{ov::PartialShape({1, 1, 1, 1}), data_types::f32, format::bfyx}); set_values(const1, {11.0f}); auto const2 = engine.allocate_memory(layout{ov::PartialShape({1, 1, 1, 1}), data_types::f32, format::bfyx}); diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/kernel_impl_params_relevance_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/kernel_impl_params_relevance_test.cpp index abfbabce9c9736..34cc90d791a756 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/kernel_impl_params_relevance_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/kernel_impl_params_relevance_test.cpp @@ -44,7 +44,7 @@ TEST(kernel_impl_params_relevance, weights_layout) { network.set_input_data("input", actual_input_data); // 2. Force reference `fully_connected_gpu_bfyx_ref` kernel impl before execution, - // so during _node->type()->choose_impl(*_node, updated_params); call for static kernel vesrion reference + // so during _node->type()->choose_impl(*_node, updated_params); call for static kernel version reference // impl will be used. Call execute() to trigger desired kernel compilation auto fc_ref_impl = ov::intel_gpu::ImplementationDesc(format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl); auto force_impl_prop = ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc", fc_ref_impl} }); @@ -52,8 +52,8 @@ TEST(kernel_impl_params_relevance, weights_layout) { network.execute(); - // 3. WA: Call cancel() to wait for all queued kernels compilation finish (including above `fully_connected_gpu_bfyx_ref`) - network.get_program()->get_compilation_context().cancel(); + // 3. WA: Call wait_all() to wait for all queued kernels compilation finish (including above `fully_connected_gpu_bfyx_ref`) + network.get_program()->get_compilation_context().wait_all(); // 4. Call execute() second time with same input shape to use pre-compiled `fully_connected_gpu_bfyx_ref` kernel network.execute(); diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/usm_memory_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/usm_memory_test.cpp index 7f40e9976e5f1e..844e09a0eeb89b 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/usm_memory_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/usm_memory_test.cpp @@ -259,3 +259,85 @@ INSTANTIATE_TEST_SUITE_P(cldnn_usm, fill_buffer, ::testing::ValuesIn(std::vector // usm_test_params{ allocation_type::usm_shared }, // Unsupported usm_test_params{ allocation_type::usm_device }, })); + + +class copy_between_gpu_buffer_and_gpu_usm : public BaseUSMTest {}; +TEST_P(copy_between_gpu_buffer_and_gpu_usm, basic) { + auto p = GetParam(); + if (!supports_usm()) { + return; + } + try { + ocl::ocl_stream stream(*_engine, {}); + + size_t values_count = 100; + size_t values_bytes_count = values_count * sizeof(float); + std::vector src_buffer(values_count); + std::iota(src_buffer.begin(), src_buffer.end(), 0.0f); + + cldnn::layout linear_layout = cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, cldnn::tensor(1, 1, int32_t(values_count), 1)); + auto usm_host_src = _engine->allocate_memory(linear_layout, allocation_type::usm_host); + + // Fill usm_host_src memory. + cldnn::mem_lock lock(usm_host_src, stream); + std::copy(src_buffer.begin(), src_buffer.end(), lock.data()); + + // Create dst memory + auto mem_dst = _engine->allocate_memory(linear_layout, p.type); + + // Fill dst memory + switch (p.type) { + case allocation_type::usm_host: + case allocation_type::usm_shared: + case allocation_type::usm_device: + { + auto casted = std::dynamic_pointer_cast(mem_dst); + auto ev = casted->copy_from(stream, *usm_host_src, true); + ev->wait(); + break; + } + case allocation_type::cl_mem: { + auto casted = std::dynamic_pointer_cast(mem_dst); + auto ev = casted->copy_from(stream, *usm_host_src, true); + ev->wait(); + break; + } + default: + FAIL() << "Not supported allocation type!"; + } + + // Read from src buffer + std::vector dst_buffer(values_count); + switch (p.type) { + case allocation_type::usm_host: + case allocation_type::usm_shared: { + cldnn::mem_lock lock(usm_host_src, stream); + std::memcpy(dst_buffer.data(), lock.data(), values_bytes_count); + break; + } + case allocation_type::usm_device: + case allocation_type::cl_mem: { + auto host_buf = _engine->allocate_memory(linear_layout, allocation_type::usm_host); + host_buf->copy_from(stream, *mem_dst); + { + cldnn::mem_lock lock(host_buf, stream); + std::memcpy(dst_buffer.data(), lock.data(), values_bytes_count); + } + break; + } + default: + FAIL() << "Not supported allocation type!"; + } + bool are_equal = std::equal(src_buffer.begin(), src_buffer.begin() + 100, dst_buffer.begin()); + ASSERT_EQ(true, are_equal); + } catch (const char* msg) { + FAIL() << msg; + } + +} + +INSTANTIATE_TEST_SUITE_P(cldnn_usm, copy_between_gpu_buffer_and_gpu_usm, ::testing::ValuesIn(std::vector{ + usm_test_params{ allocation_type::cl_mem }, + usm_test_params{ allocation_type::usm_host }, + usm_test_params{ allocation_type::usm_device }, +})); \ No newline at end of file diff --git a/src/plugins/intel_gpu/tests/unit/passes/add_required_reorders_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/add_required_reorders_test.cpp index efa32920b4add9..cb65717800d681 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/add_required_reorders_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/add_required_reorders_test.cpp @@ -17,6 +17,8 @@ #include "permute_inst.h" #include "reshape_inst.h" #include "shape_of_inst.h" +#include "convolution_inst.h" +#include "dft_inst.h" #include "to_string_utils.h" #include "program_wrapper.h" @@ -66,6 +68,85 @@ TEST(add_required_reorders, input_reorder_inside_shape_of_subgraph) { ASSERT_EQ(eltwise_in_layout.data_type, data_types::f32); } +TEST(add_required_reorders, eltwise_input_reorder) { + // Topology: + // (bfyx)input weights + // \ / + // conv1 irdft_input(bfzyx) + // / | \ / + // (bfzyx)rdft | irdft(bfyx) + // | / + // eltwise + // | + // conv2 + // + // Expectation: + // The selected format of eltwise should be selected as bfzyx (reorder_inputs) + // A new reorder that converts from b_fs_yx_fsv16 to bfzyx should be inserted after convolution (reorder_inputs) + // If the input format of eltwise is different from the output format, reorder(bfyx->bfzyx) should be added (add_required_reorders) + + auto& engine = get_test_engine(); + + auto conv1_weight_mem = engine.allocate_memory(layout{ { 192, 384, 1, 1 }, data_types::f16, format::bfyx }); + auto conv2_weight_mem = engine.allocate_memory(layout{ { 384, 192, 1, 1 }, data_types::f16, format::bfyx }); + + topology topology; + topology.add(data("conv1_weights", conv1_weight_mem)); + topology.add(data("conv2_weights", conv2_weight_mem)); + topology.add(input_layout("input", layout{ { 1, 384, 36, 64 }, data_types::f16, format::bfyx })); + topology.add(input_layout("irdft_input", layout{ { 1, 192, 36, 33, 2 }, data_types::f16, format::bfzyx })); + topology.add(convolution("conv1", input_info("input"), "conv1_weights", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false)); + topology.add(dft("rdft", input_info("conv1"), {2, 3}, {36, 64}, {1, 192, 36, 33, 2}, dft_direction::forward, dft_mode::real)); + topology.add(dft("irdft", input_info("irdft_input"), {2, 3}, {36, 64}, {1, 192, 36, 64}, dft_direction::inverse, dft_mode::real)); + topology.add(eltwise("eltwise", input_info("conv1"), input_info("irdft"), eltwise_mode::sum)); + topology.add(convolution("conv2", input_info("eltwise"), "conv2_weights", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false)); + + ExecutionConfig config = get_test_default_config(engine); + ov::intel_gpu::ImplementationDesc conv1_impl = { format::b_fs_yx_fsv16, "" }; + ov::intel_gpu::ImplementationDesc conv2_impl = { format::b_fs_yx_fsv16, "" }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv1", conv1_impl }, { "conv2", conv2_impl } })); + config.set_property(ov::intel_gpu::optimize_data(true)); + + program::ptr prog = nullptr; + ASSERT_NO_THROW(prog = program::build_program(engine, topology, config)); + ASSERT_NE(prog, nullptr); + auto prog_impl = prog.get(); + auto& eltwise_node = prog_impl->get_node("eltwise"); + ASSERT_EQ(eltwise_node.get_input_layouts()[1].format, format::bfzyx); + ASSERT_EQ(eltwise_node.get_output_layout().format, format::bfzyx); +} + +TEST(add_required_reorders, prevent_input_dt_changing_for_convs) { + auto& engine = get_test_engine(); + + int input_b = 1, input_f = 16, input_y = 3, input_x = 3; + int output_b = input_b, output_f = 16, output_y = 6, output_x = 6; + + auto input_mem = engine.allocate_memory({ {input_b, input_f, input_y, input_x}, data_types::u8, format::bs_fs_yx_bsv16_fsv32 }); + auto input2_mem = engine.allocate_memory({ {input_b, input_f, input_y, input_x}, data_types::u8, format::bs_fs_yx_bsv16_fsv32 }); + auto weights_mem = engine.allocate_memory({ {16, 16, 1, 1}, data_types::i8, format::bfyx }); + + auto input = input_layout("input", input_mem->get_layout()); + auto input_const = data("input_const", input2_mem); + auto weights = data("weights", weights_mem); + auto eltwise1 = eltwise("eltwise1", input_info("input"), input_info("input_const"), eltwise_mode::sum); + auto conv1 = convolution("conv1", input_info("eltwise1"), "weights", "", 1, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 2, 2 }, false); + auto output_reorder = reorder("reorder", input_info("conv1"), { data_types::f32, format::bfyx, { output_b, output_f, output_y, output_x } }); + + topology topology_test(input, input_const, eltwise1, weights, conv1, output_reorder); + + ExecutionConfig config_test = get_test_default_config(engine); + ov::intel_gpu::ImplementationDesc conv1_impl_test = { format::bfyx, "", impl_types::ocl }; + config_test.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv1", conv1_impl_test } })); + + auto prog = program::build_program(engine, topology_test, config_test, false, true); + program_wrapper::apply_opt_pass(*prog); + + ASSERT_NE(prog, nullptr); + ASSERT_TRUE(prog->has_node("conv1")); + ASSERT_EQ(prog->get_node("conv1").get_input_layout(0).data_type, data_types::u8); +} + TEST(add_required_reorders, skip_adding_reorder_batch_axis_padding) { auto& engine = get_test_engine(); diff --git a/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp index eddfb3961770fc..b5b30083ce792b 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp @@ -3,6 +3,7 @@ // #include "test_utils.h" +#include "random_generator.hpp" #include "intel_gpu/runtime/engine.hpp" @@ -22,6 +23,8 @@ #include "intel_gpu/graph/network.hpp" #include "pass_manager.h" #include "to_string_utils.h" +#include "resample_inst.h" +#include "openvino/op/interpolate.hpp" #include "program_wrapper.h" @@ -852,6 +855,37 @@ TEST(prepare_buffer_fusing, test_implicit_crop_and_outerpadding_deconv) { ASSERT_EQ(crop_prim->can_be_optimized(), true); } +TEST(prepare_buffer_fusing, test_checking_padding_supported) { + auto& engine = get_test_engine(); + auto in_layout1 = layout{ ov::PartialShape{2, 36, 57, 57}, data_types::f16, format::fs_b_yx_fsv32}; + auto in_layout2 = layout{ ov::PartialShape{2, 72, 57, 57}, data_types::f16, format::fs_b_yx_fsv32}; + auto in_layout3 = layout{ ov::PartialShape{2, 144, 57, 57}, data_types::f16, format::fs_b_yx_fsv32}; + + auto padding1 = padding({0,18,1,1}, {0,0,0,0}); + auto padding2 = padding({0,0,0,0}, {0,0,0,0}); + auto padding3 = padding({0,0,0,0}, {0,0,0,0}); + + topology topology( + input_layout("input1", in_layout1), + input_layout("input2", in_layout2), + input_layout("input3", in_layout3), + resample("interp1", input_info("input1"), in_layout1.get_tensor(), 1, ov::op::v4::Interpolate::InterpolateMode::NEAREST, padding1), + resample("interp2", input_info("input2"), in_layout2.get_tensor(), 1, ov::op::v4::Interpolate::InterpolateMode::NEAREST, padding2), + resample("interp3", input_info("input3"), in_layout3.get_tensor(), 1, ov::op::v4::Interpolate::InterpolateMode::NEAREST, padding3), + concatenation("concat", {input_info("interp1"), input_info("interp2"), input_info("interp3")}, 1), + reorder("reorder", input_info("concat"), format::fs_b_yx_fsv32, data_types::f16)); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + + auto program = program::build_program(engine, topology, config, false, true); + program_wrapper::apply_opt_pass(*program); + ASSERT_NE(program, nullptr); + + auto& concat = program->get_node("concat"); + ASSERT_EQ(concat.can_be_optimized(), false); +} + #ifdef ENABLE_ONEDNN_FOR_GPU TEST(prepare_buffer_fusing, in_place_onednn_concat_static) { auto& engine = get_test_engine(); diff --git a/src/plugins/intel_gpu/tests/unit/shape_infer/pad_si_test.cpp b/src/plugins/intel_gpu/tests/unit/shape_infer/pad_si_test.cpp index 8a507ee45e1c7e..4d931f6cdc97b0 100644 --- a/src/plugins/intel_gpu/tests/unit/shape_infer/pad_si_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/shape_infer/pad_si_test.cpp @@ -177,21 +177,28 @@ INSTANTIATE_TEST_SUITE_P(smoke, pad_test_non_constant_input_begin, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {}, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {1, 0, 3, 7}, ov::op::PadMode::CONSTANT, 1.f, - layout{ov::PartialShape{3, 4, 36, 48}, data_types::f32, format::bfyx} + layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx} }, { layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx}, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {}, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {1, 0, 3, 7}, ov::op::PadMode::CONSTANT, 1.f, - layout{ov::PartialShape{{2, -1}, {1, -1}, {4, -1}, {8, -1}}, data_types::f32, format::bfyx} + layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx} + }, + { + layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx}, + layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {}, + layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {1, 0, 3, 7}, + ov::op::PadMode::EDGE, 1.f, + layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx} }, { layout{ov::PartialShape::dynamic(2), data_types::f32, format::bfyx}, layout{ov::PartialShape{2}, data_types::i64, format::bfyx}, {}, layout{ov::PartialShape{2}, data_types::i64, format::bfyx}, {1, 0}, ov::op::PadMode::CONSTANT, 1.f, - layout{ov::PartialShape{{2, -1}, {1, -1}}, data_types::f32, format::bfyx} + layout{ov::PartialShape::dynamic(2), data_types::f32, format::bfyx} } })); @@ -234,21 +241,21 @@ INSTANTIATE_TEST_SUITE_P(smoke, pad_test_non_constant_input_end, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {1, 0, 3, 7}, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {}, ov::op::PadMode::CONSTANT, 1.f, - layout{ov::PartialShape{3, 4, 36, 48}, data_types::f32, format::bfyx} + layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx} }, { layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx}, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {1, 0, 3, 7}, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {}, ov::op::PadMode::CONSTANT, 1.f, - layout{ov::PartialShape{{2, -1}, {1, -1}, {4, -1}, {8, -1}}, data_types::f32, format::bfyx} + layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx} }, { layout{ov::PartialShape::dynamic(2), data_types::f32, format::bfyx}, layout{ov::PartialShape{2}, data_types::i64, format::bfyx}, {1, 0}, layout{ov::PartialShape{2}, data_types::i64, format::bfyx}, {}, ov::op::PadMode::CONSTANT, 1.f, - layout{ov::PartialShape{{2, -1}, {1, -1}}, data_types::f32, format::bfyx} + layout{ov::PartialShape::dynamic(2), data_types::f32, format::bfyx} } })); @@ -294,21 +301,92 @@ INSTANTIATE_TEST_SUITE_P(smoke, pad_test_non_constant_input_begin_end, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {}, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {}, ov::op::PadMode::CONSTANT, 1.f, - layout{ov::PartialShape{3, 5, 34, 42}, data_types::f32, format::bfyx} + layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx} }, { layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx}, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {}, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {}, - ov::op::PadMode::CONSTANT, 1.f, - layout{ov::PartialShape{{2, -1}, {2, -1}, {2, -1}, {2, -1}}, data_types::f32, format::bfyx} + ov::op::PadMode::EDGE, 1.f, + layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx} }, { layout{ov::PartialShape::dynamic(2), data_types::f32, format::bfyx}, layout{ov::PartialShape{2}, data_types::i64, format::bfyx}, {}, layout{ov::PartialShape{2}, data_types::i64, format::bfyx}, {}, ov::op::PadMode::CONSTANT, 1.f, - layout{ov::PartialShape{{2, -1}, {2, -1}}, data_types::f32, format::bfyx} + layout{ov::PartialShape::dynamic(2), data_types::f32, format::bfyx} + } + })); + +class pad_test_non_constant_input_begin_end_with_data : public testing::TestWithParam { }; + +TEST_P(pad_test_non_constant_input_begin_end_with_data, shape_infer) { + auto p = GetParam(); + + auto& engine = get_test_engine(); + + auto input0_prim = std::make_shared("input0", p.in_layout); + auto input1_prim = std::make_shared("input1", p.pads_begin_layout); + auto input2_prim = std::make_shared("input2", p.pads_end_layout); + + auto border_prim = std::make_shared("output", + std::vector({input_info("input0"), input_info("input1"), input_info("input2")}), + border::PAD_NON_CONST_INPUT::BEGIN | border::PAD_NON_CONST_INPUT::END, + p.pads_begin_data, + p.pads_end_data, + p.pad_mode, + p.pad_value); + cldnn::program prog(engine); + + auto& input0_node = prog.get_or_create(input0_prim); + auto& input1_node = prog.get_or_create(input1_prim); + auto& input2_node = prog.get_or_create(input2_prim); + auto& border_node = prog.get_or_create(border_prim); + + program_wrapper::add_connection(prog, input0_node, border_node); + program_wrapper::add_connection(prog, input1_node, border_node); + program_wrapper::add_connection(prog, input2_node, border_node); + + auto begin_mem = engine.allocate_memory(p.pads_begin_layout); + auto end_mem = engine.allocate_memory(p.pads_end_layout); + + set_values(begin_mem, p.pads_begin_data); + set_values(end_mem, p.pads_end_data); + auto impl_params = border_node.get_kernel_impl_params(); + impl_params->memory_deps = { + {1, begin_mem}, + {2, end_mem} + }; + + auto res = border_inst::calc_output_layouts(border_node, *impl_params); + + ASSERT_EQ(res.size(), 1); + ASSERT_EQ(res[0], p.expected_layout); +} + +INSTANTIATE_TEST_SUITE_P(smoke, pad_test_non_constant_input_begin_end_with_data, + testing::ValuesIn(std::vector{ + { + layout{ov::PartialShape{1, 3, 32, 40}, data_types::f32, format::bfyx}, + layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {1, 2, 3, 4}, + layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {3, 2, 1, 0}, + ov::op::PadMode::CONSTANT, 1.f, + layout{ov::PartialShape{5, 7, 36, 44}, data_types::f32, format::bfyx} + }, + { + layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx}, + layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {1, 2, 3, 4}, + layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {3, 2, 1, 0}, + ov::op::PadMode::EDGE, 1.f, + layout{ov::PartialShape{{4, -1}, {4, -1}, {4, -1}, {4, -1}}, data_types::f32, format::bfyx} + }, + { + layout{ov::PartialShape{10, 20}, data_types::f32, format::bfyx}, + layout{ov::PartialShape{2}, data_types::i64, format::bfyx}, {1, 2}, + layout{ov::PartialShape{2}, data_types::i64, format::bfyx}, {3, 4}, + ov::op::PadMode::EDGE, 1.f, + layout{ov::PartialShape{14, 26}, data_types::f32, format::bfyx} } })); diff --git a/src/plugins/intel_gpu/tests/unit/shape_infer/random_uniform_si_test.cpp b/src/plugins/intel_gpu/tests/unit/shape_infer/random_uniform_si_test.cpp new file mode 100644 index 00000000000000..36a793b7ba3365 --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/shape_infer/random_uniform_si_test.cpp @@ -0,0 +1,143 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "test_utils.h" + +#include +#include +#include + +#include "random_uniform_inst.h" + +#include "program_wrapper.h" + +using namespace cldnn; +using namespace ::tests; + +namespace shape_infer_tests { + +struct random_uniform_si_test_params { + ov::PartialShape expected_out_pshape; + data_types out_data_type; + std::pair min_max_vals; +}; + +class random_uniform_si_test : public testing::TestWithParam { }; + +TEST_P(random_uniform_si_test, shape_infer) { + auto p = GetParam(); + auto& engine = get_test_engine(); + + cldnn::program prog(engine); + std::vector> input_prims; + std::vector input_prim_ids; + std::vector input_layouts; + const size_t num_inputs = 3; + + for (size_t idx = 0; idx < num_inputs; idx++) { + auto in_layout = layout{{1}, p.out_data_type, format::bfyx}; + if (idx == 0) { + auto input_pshape = ov::PartialShape{static_cast(p.expected_out_pshape.size())}; + in_layout = layout{input_pshape, data_types::i64, format::bfyx}; + } + input_layouts.push_back(in_layout); + + auto prim_id = "input_" + std::to_string(idx); + auto const_data_prim = std::make_shared(prim_id, in_layout); + input_prims.push_back(const_data_prim); + input_prim_ids.push_back(input_info(prim_id)); + } + + auto random_uniform_prim = std::make_shared("random_uniform", input_prim_ids, p.out_data_type, 0, 0); + auto& random_uniform_node = prog.get_or_create(random_uniform_prim); + + for (auto& iprim : input_prims) { + auto& input_node = prog.get_or_create(iprim); + program_wrapper::add_connection(prog, input_node, random_uniform_node); + } + + auto params = random_uniform_node.get_kernel_impl_params(); + params->memory_deps.clear(); + auto get_mem = [&](size_t idx, float val) -> memory::ptr { + auto in_layout = input_layouts[idx]; + auto allocated_mem = engine.allocate_memory(in_layout); + switch (p.out_data_type) { + case data_types::f16: + set_values(allocated_mem, {float_to_half(val)}); + break; + case data_types::f32: + set_values(allocated_mem, {static_cast::type>(val)}); + break; + case data_types::i32: + set_values(allocated_mem, {static_cast::type>(val)}); + break; + case data_types::i64: + set_values(allocated_mem, {static_cast::type>(val)}); + break; + case data_types::i8: + set_values(allocated_mem, {static_cast::type>(val)}); + break; + case data_types::u8: + set_values(allocated_mem, {static_cast::type>(val)}); + break; + case data_types::bin: + default: + break; + } + return allocated_mem; + }; + + if (p.expected_out_pshape.is_static()) { + auto input_mem = engine.allocate_memory(input_layouts[0]); + set_values(input_mem, p.expected_out_pshape.get_shape()); + params->memory_deps.emplace(0, input_mem); + } + + params->memory_deps.emplace(1, get_mem(1, p.min_max_vals.first)); + params->memory_deps.emplace(2, get_mem(2, p.min_max_vals.second)); + + if (p.min_max_vals.first < p.min_max_vals.second) { + auto res = random_uniform_inst::calc_output_layouts(random_uniform_node, *params); + + auto expected_out_layout = layout{p.expected_out_pshape, p.out_data_type, format::get_default_format(p.expected_out_pshape.size())}; + ASSERT_EQ(res.size(), 1); + ASSERT_EQ(res[0], expected_out_layout); + } else { + ASSERT_ANY_THROW(random_uniform_inst::calc_output_layouts(random_uniform_node, *params)); + } +} + +INSTANTIATE_TEST_SUITE_P(smoke, random_uniform_si_test, + testing::ValuesIn(std::vector{ + {ov::PartialShape{2}, data_types::i32, {0, 10}}, + {ov::PartialShape{2}, data_types::i8, {0, 10}}, + {ov::PartialShape{2}, data_types::u8, {0, 10}}, + {ov::PartialShape{2}, data_types::i64, {0, 10}}, + {ov::PartialShape{2}, data_types::i32, {0, 10}}, + {ov::PartialShape{2}, data_types::f32, {0, 10}}, + {ov::PartialShape{2}, data_types::f16, {0, 10}}, + {ov::PartialShape{2,4}, data_types::i32, {0, 10}}, + {ov::PartialShape{2,4}, data_types::f32, {0, 10}}, + {ov::PartialShape{2,4,3}, data_types::i32, {0, 10}}, + {ov::PartialShape{2,4,3}, data_types::f32, {0, 10}}, + {ov::PartialShape{2,4,3,2}, data_types::i32, {0, 10}}, + {ov::PartialShape{2,4,3,2}, data_types::f32, {0, 10}}, + {ov::PartialShape{2,4,3,1,2}, data_types::i32, {0, 10}}, + {ov::PartialShape{2,4,3,1,2}, data_types::f32, {0, 10}}, + + // Dynamic output shape + {ov::PartialShape::dynamic(1), data_types::f32, {0, 10}}, + {ov::PartialShape::dynamic(2), data_types::f32, {0, 10}}, + {ov::PartialShape::dynamic(3), data_types::f32, {0, 10}}, + {ov::PartialShape::dynamic(4), data_types::f32, {0, 10}}, + {ov::PartialShape::dynamic(5), data_types::f32, {0, 10}}, + + // Incorrect min/max values + {ov::PartialShape{2}, data_types::i32, {20, 20}}, + {ov::PartialShape{2,4,3,1,2}, data_types::i32, {20, 10}}, + {ov::PartialShape::dynamic(1), data_types::f32, {20, 20}}, + {ov::PartialShape::dynamic(5), data_types::f32, {20, 10}}, + })); + +}; // shape_infer_tests diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp index 37fad4e00b0385..7cad29ca94ee01 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp @@ -8866,7 +8866,7 @@ class convolution_test : public tests::generic_test { void SetUp() override { rg.set_seed(GET_SUITE_NAME); } - + static void TearDownTestCase() { all_generic_params.clear(); all_layer_params.clear(); @@ -9954,3 +9954,129 @@ TEST(convolution_f32_fw_gpu, basic_convolution_no_bias_swap_xy) { } EXPECT_TRUE(found_define); } + +struct conv_dyn_test_params { + ov::Shape in_shape; + ov::Shape wei_shape; + ov::Strides stride; + ov::Strides dilation; + ov::CoordinateDiff pad_begin; + ov::CoordinateDiff pad_end; +}; + +class conv_dyn_test : public testing::TestWithParam {}; +TEST_P(conv_dyn_test, convolution_gpu_bfyx_os_iyx_osv16_no_bias) { + auto& engine = get_test_engine(); + auto p = GetParam(); + + auto is_grouped = p.wei_shape.size() == 5; + auto groups_num = is_grouped ? static_cast(p.wei_shape[0]) : 1; + + auto calculate_ref = [&](memory::ptr input, memory::ptr weights, ExecutionConfig config) { + auto in_layout = input->get_layout(); + + topology topology_ref( + input_layout("input", in_layout), + data("weights", weights), + convolution("conv", input_info("input"), "weights", no_bias, groups_num, p.stride, p.dilation, p.pad_begin, p.pad_end, is_grouped)); + + network network_ref(engine, topology_ref, config); + network_ref.set_input_data("input", input); + + auto outputs_ref = network_ref.execute(); + + return outputs_ref.at("conv").get_memory(); + }; + + auto in_layout = layout{ov::PartialShape{ov::Dimension(), ov::Dimension(p.in_shape[1]), ov::Dimension(), ov::Dimension()}, data_types::f32, format::bfyx}; + auto input = engine.allocate_memory({ p.in_shape, data_types::f32, format::bfyx }); + auto weights = engine.allocate_memory({p.wei_shape, data_types::f32, is_grouped ? format::bfzyx : format::bfyx}); + + tests::random_generator rg(GET_SUITE_NAME); + VF input_rnd = rg.generate_random_1d(ov::shape_size(p.in_shape), -10, 10); + VF weights_rnd = rg.generate_random_1d(ov::shape_size(p.wei_shape), -10, 10); + + set_values(input, input_rnd); + set_values(weights, weights_rnd); + + topology topology( + input_layout("input", in_layout), + data("weights", weights), + convolution("conv", input_info("input"), "weights", no_bias, groups_num, p.stride, p.dilation, p.pad_begin, p.pad_end, is_grouped)); + + ExecutionConfig config = get_test_default_config(engine); + ov::intel_gpu::ImplementationDesc conv_impl = { format::bfyx, "convolution_gpu_bfyx_os_iyx_osv16", impl_types::ocl }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl } })); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::enable_profiling(true)); + + network network(engine, topology, config); + network.set_input_data("input", input); + + auto inst = network.get_primitive("conv"); + auto impl = inst->get_impl(); + ASSERT_TRUE(impl != nullptr); + ASSERT_TRUE(impl->is_dynamic()); + + auto outputs = network.execute(); + + auto output_memory = outputs.at("conv").get_memory(); + auto output_memory_ref = calculate_ref(input, weights, config); + + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + cldnn::mem_lock output_ptr_ref(output_memory_ref, get_test_stream()); + + ASSERT_EQ(outputs.at("conv").get_layout(), output_memory_ref->get_layout()); + for (size_t i = 0; i < output_ptr.size(); i++) { + ASSERT_EQ(output_ptr[i], output_ptr_ref[i]); + } + + { + // Change original shape for the second run + auto new_shape = p.in_shape; + new_shape[2] += 4; + new_shape[3] += 8; + + auto input = engine.allocate_memory({ new_shape, data_types::f32, format::bfyx }); + + VF input_rnd = rg.generate_random_1d(ov::shape_size(p.in_shape), -10, 10); + set_values(input, input_rnd); + + network.set_input_data("input", input); + auto outputs = network.execute(); + + auto output_memory = outputs.at("conv").get_memory(); + auto output_memory_ref = calculate_ref(input, weights, config); + + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + cldnn::mem_lock output_ptr_ref(output_memory_ref, get_test_stream()); + + ASSERT_EQ(outputs.at("conv").get_layout(), output_memory_ref->get_layout()); + for (size_t i = 0; i < output_ptr.size(); i++) { + ASSERT_EQ(output_ptr[i], output_ptr_ref[i]); + } + } +} + +INSTANTIATE_TEST_SUITE_P(smoke, conv_dyn_test, + testing::ValuesIn(std::vector{ + { ov::Shape{1, 8, 14, 14}, ov::Shape{16, 8, 3, 3}, ov::Strides{1, 1}, ov::Strides{1, 1}, ov::CoordinateDiff{0, 0}, ov::CoordinateDiff{0, 0} }, + { ov::Shape{1, 8, 32, 32}, ov::Shape{16, 8, 3, 3}, ov::Strides{1, 1}, ov::Strides{1, 1}, ov::CoordinateDiff{0, 0}, ov::CoordinateDiff{0, 0} }, + { ov::Shape{1, 8, 60, 60}, ov::Shape{16, 8, 3, 3}, ov::Strides{1, 1}, ov::Strides{1, 1}, ov::CoordinateDiff{0, 0}, ov::CoordinateDiff{0, 0} }, + { ov::Shape{1, 8, 64, 64}, ov::Shape{16, 8, 3, 3}, ov::Strides{1, 1}, ov::Strides{1, 1}, ov::CoordinateDiff{0, 0}, ov::CoordinateDiff{0, 0} }, + { ov::Shape{1, 8, 110, 111}, ov::Shape{16, 8, 3, 3}, ov::Strides{1, 1}, ov::Strides{1, 1}, ov::CoordinateDiff{0, 0}, ov::CoordinateDiff{0, 0} }, + { ov::Shape{1, 8, 110, 111}, ov::Shape{16, 8, 3, 3}, ov::Strides{1, 1}, ov::Strides{1, 1}, ov::CoordinateDiff{1, 1}, ov::CoordinateDiff{1, 1} }, + { ov::Shape{1, 8, 110, 111}, ov::Shape{16, 8, 5, 5}, ov::Strides{1, 1}, ov::Strides{1, 1}, ov::CoordinateDiff{1, 1}, ov::CoordinateDiff{1, 1} }, + { ov::Shape{2, 640, 32, 32}, ov::Shape{640, 640, 3, 3}, ov::Strides{1, 1}, ov::Strides{1, 1}, ov::CoordinateDiff{1, 1}, ov::CoordinateDiff{1, 1} }, + { ov::Shape{1, 32, 16, 16}, ov::Shape{32, 32, 3, 3}, ov::Strides{1, 1}, ov::Strides{1, 1}, ov::CoordinateDiff{1, 1}, ov::CoordinateDiff{1, 1} }, + { ov::Shape{2, 32, 16, 16}, ov::Shape{32, 32, 3, 3}, ov::Strides{2, 2}, ov::Strides{1, 1}, ov::CoordinateDiff{1, 1}, ov::CoordinateDiff{1, 1} }, + { ov::Shape{1, 32, 32, 32}, ov::Shape{64, 32, 1, 1}, ov::Strides{1, 1}, ov::Strides{1, 1}, ov::CoordinateDiff{0, 0}, ov::CoordinateDiff{0, 0} }, + { ov::Shape{1, 4, 32, 32}, ov::Shape{32, 4, 3, 3}, ov::Strides{1, 1}, ov::Strides{1, 1}, ov::CoordinateDiff{1, 1}, ov::CoordinateDiff{1, 1} }, + { ov::Shape{1, 4, 64, 64}, ov::Shape{4, 4, 1, 1}, ov::Strides{1, 1}, ov::Strides{1, 1}, ov::CoordinateDiff{0, 0}, ov::CoordinateDiff{0, 0} }, + { ov::Shape{1, 32, 28, 28}, ov::Shape{32, 1, 1, 3, 3}, ov::Strides{1, 1}, ov::Strides{1, 1}, ov::CoordinateDiff{1, 1}, ov::CoordinateDiff{1, 1} }, + { ov::Shape{1, 48, 16, 16}, ov::Shape{48, 48, 4, 4}, ov::Strides{4, 4}, ov::Strides{1, 1}, ov::CoordinateDiff{0, 0}, ov::CoordinateDiff{0, 0} }, + { ov::Shape{1, 16, 28, 28}, ov::Shape{32, 16, 2, 2}, ov::Strides{2, 2}, ov::Strides{1, 1}, ov::CoordinateDiff{0, 0}, ov::CoordinateDiff{0, 0} }, + { ov::Shape{1, 3, 32, 32}, ov::Shape{96, 3, 4, 4}, ov::Strides{4, 4}, ov::Strides{1, 1}, ov::CoordinateDiff{0, 0}, ov::CoordinateDiff{0, 0} }, + { ov::Shape{1, 768, 7, 7}, ov::Shape{768, 1, 1, 3, 3}, ov::Strides{1, 1}, ov::Strides{1, 1}, ov::CoordinateDiff{1, 1}, ov::CoordinateDiff{1, 1} }, + { ov::Shape{1, 48, 56, 56}, ov::Shape{48, 48, 8, 8}, ov::Strides{8, 8}, ov::Strides{1, 1}, ov::CoordinateDiff{0, 0}, ov::CoordinateDiff{0, 0} }, +})); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp index d96182b974bb74..afbc1e30c2f3fe 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp @@ -1738,8 +1738,8 @@ TEST(fully_connected_onednn, impl_replacement_with_cldnn) { ASSERT_EQ(-2.25f, output_ptr[2]); ASSERT_EQ(3.0f, output_ptr[3]); - // WA: Call cancel() to wait for all queued kernels compilation finish - network.get_program()->get_compilation_context().cancel(); + // WA: Call wait_all() to wait for all queued kernels compilation finish + network.get_program()->get_compilation_context().wait_all(); // Check if OneDNN's impl is used for the next execute() call network.execute(); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp index e82e076a2e6d5c..6b5b76ffd7fecc 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp @@ -1559,8 +1559,8 @@ TEST(gemm_onednn, impl_replacement_with_cldnn) { ASSERT_FLOAT_EQ(output_ptr[i], out_data[i]); } - // WA: Call cancel() to wait for all queued kernels compilation finish - network.get_program()->get_compilation_context().cancel(); + // WA: Call wait_all() to wait for all queued kernels compilation finish + network.get_program()->get_compilation_context().wait_all(); // Check if OneDNN's impl is used for the next execute() call network.execute(); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/random_uniform_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/random_uniform_gpu_test.cpp index f414c915e30b7e..62950ea1985cf5 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/random_uniform_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/random_uniform_gpu_test.cpp @@ -18,8 +18,7 @@ using namespace ::tests; */ template struct RandomUniformParams { - tensor output_tensor; - format f; + ov::Shape output_shape; T min_val; T max_val; uint64_t global_seed; @@ -36,20 +35,20 @@ struct random_uniform_gpu_test : public ::testing::TestWithParam params = testing::TestWithParam >::GetParam(); auto &engine = get_test_engine(); + auto format = format::get_default_format(params.output_shape.size()); auto shape = engine.allocate_memory( - {data_type, params.f, {1, 1, static_cast(params.output_tensor.sizes().size()), 1}}); + {{1, 1, 1, static_cast(params.output_shape.size())}, data_type, format}); auto min_val = engine.allocate_memory(layout(data_type, format::bfyx, {1, 1, 1, 1})); auto max_val = engine.allocate_memory(layout(data_type, format::bfyx, {1, 1, 1, 1})); - set_values(shape, params.output_tensor.sizes()); + set_values(shape, params.output_shape); set_values(min_val, {params.min_val}); set_values(max_val, {params.max_val}); topology topology; topology.add( random_uniform("random_uniform", { input_info("shape"), input_info("min_val"), input_info("max_val") }, data_type, params.global_seed, - params.op_seed, params.output_tensor, - params.f)); + params.op_seed, params.output_shape)); topology.add(input_layout("shape", shape->get_layout())); topology.add(input_layout("min_val", min_val->get_layout())); topology.add(input_layout("max_val", max_val->get_layout())); @@ -78,11 +77,11 @@ struct PrintToStringParamName { template std::string operator()(const testing::TestParamInfo > ¶m) { std::stringstream buf; - buf << " output tensor" << param.param.output_tensor.to_string() - << " min_value " << param.param.min_val - << " max_value " << param.param.max_val - << " global_seed " << param.param.global_seed - << " op_seed " << param.param.op_seed; + buf << "output_tensor_" << param.param.output_shape + << "_min_value_" << param.param.min_val + << "_max_value_" << param.param.max_val + << "_global_seed_" << param.param.global_seed + << "_op_seed_" << param.param.op_seed; return buf.str(); } @@ -91,11 +90,11 @@ struct PrintToStringParamName { template<> std::string PrintToStringParamName::operator()(const testing::TestParamInfo > ¶m) { std::stringstream buf; - buf << " output tensor" << param.param.output_tensor.to_string() - << " min_value " << static_cast(param.param.min_val) - << " max_value " << static_cast(param.param.max_val) - << " global_seed " << param.param.global_seed - << " op_seed " << param.param.op_seed; + buf << "output_tensor_" << param.param.output_shape + << "_min_value_" << static_cast(param.param.min_val) + << "_max_value_" << static_cast(param.param.max_val) + << "_global_seed_" << param.param.global_seed + << "_op_seed_" << param.param.op_seed; return buf.str(); } @@ -124,7 +123,7 @@ TEST_P(random_uniform_gpu_test_f16, random_f16) { INSTANTIATE_TEST_SUITE_P(smoke_random_uniform_int32, random_uniform_gpu_test_i32, ::testing::Values( - RandomUniformParams{tensor(1, 1, 2, 3), format::bfyx, 50, 100, 80, 100, + RandomUniformParams{ov::Shape{1, 1, 3, 2}, 50, 100, 80, 100, std::vector{ 65, 70, 56, 59, 82, 92 @@ -135,7 +134,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_random_uniform_int32, INSTANTIATE_TEST_SUITE_P(smoke_random_uniform_int64, random_uniform_gpu_test_i64, ::testing::Values( - RandomUniformParams{tensor(1, 1, 5, 4, 3), format::bfzyx, -2600, 3700, 755, + RandomUniformParams{ov::Shape{1, 1, 3, 4, 5}, -2600, 3700, 755, 951, { 2116L, -1581L, 2559L, -339L, -1660L, 519L, 90L, @@ -151,11 +150,17 @@ INSTANTIATE_TEST_SUITE_P(smoke_random_uniform_int64, ), PrintToStringParamName()); - INSTANTIATE_TEST_SUITE_P(smoke_random_uniform_f32, random_uniform_gpu_test_f32, ::testing::Values( - RandomUniformParams{tensor(1, 1, 3, 3), format::bfyx, 0.0, 1.0, 150, 10, + RandomUniformParams{ov::Shape{1, 1, 3, 3}, 0.0, 1.0, 150, 10, + { + 0.7011236, 0.30539632, 0.93931055, + 0.9456035, 0.11694777, 0.50770056, + 0.5197197, 0.22727466, 0.991374 + } + }, + RandomUniformParams{ov::Shape{3, 3}, 0.0, 1.0, 150, 10, { 0.7011236, 0.30539632, 0.93931055, 0.9456035, 0.11694777, 0.50770056, @@ -165,11 +170,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_random_uniform_f32, ), PrintToStringParamName()); - INSTANTIATE_TEST_SUITE_P(smoke_random_uniform_f16, random_uniform_gpu_test_f16, ::testing::Values( - RandomUniformParams{tensor(1, 1, 3, 2, 4), format::bfzyx, half_t(-1.5), + RandomUniformParams{ov::Shape{1, 1, 4, 2, 3}, half_t(-1.5), half_t(-1.0), 150, 10, {half_t(-1.19726562), half_t(-1.09667969), half_t(-1.08398438), half_t(-1.30859375), diff --git a/src/plugins/template/src/compiled_model.cpp b/src/plugins/template/src/compiled_model.cpp index 49a61d5c005c25..e550bd1e6571bb 100644 --- a/src/plugins/template/src/compiled_model.cpp +++ b/src/plugins/template/src/compiled_model.cpp @@ -7,7 +7,6 @@ #include #include "async_infer_request.hpp" -#include "ie_plugin_config.hpp" #include "itt.hpp" #include "openvino/op/util/op_types.hpp" #include "openvino/runtime/exec_model_info.hpp" @@ -33,9 +32,6 @@ ov::template_plugin::CompiledModel::CompiledModel(const std::shared_ptr>(); - for (auto&& configKey : streamExecutorConfigKeys) { - configs.emplace_back(configKey); - } - return to_string_vector(configs); - } else if (ov::model_name == name) { + if (ov::model_name == name) { auto model_name = m_model->get_friendly_name(); return decltype(ov::model_name)::value_type(model_name); } else if (ov::loaded_from_cache == name) { diff --git a/src/plugins/template/src/config.cpp b/src/plugins/template/src/config.cpp index 959cc1496c3d0d..155ed29ecd937a 100644 --- a/src/plugins/template/src/config.cpp +++ b/src/plugins/template/src/config.cpp @@ -4,9 +4,6 @@ #include "config.hpp" -#include -#include - #include "openvino/runtime/internal_properties.hpp" #include "openvino/runtime/properties.hpp" #include "template/properties.hpp" @@ -61,11 +58,11 @@ ov::Any Configuration::Get(const std::string& name) const { return {disable_transformations}; } else if (name == ov::num_streams) { return {std::to_string(streams_executor_config._streams)}; - } else if (name == CONFIG_KEY(CPU_BIND_THREAD)) { + } else if (name == ov::internal::cpu_bind_thread) { return streams_executor_config.get_property(name); - } else if (name == CONFIG_KEY(CPU_THREADS_NUM)) { + } else if (name == ov::inference_num_threads) { return {std::to_string(streams_executor_config._threads)}; - } else if (name == CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM)) { + } else if (name == ov::internal::threads_per_stream) { return {std::to_string(streams_executor_config._threadsPerStream)}; } else if (name == ov::hint::performance_mode) { return performance_mode; diff --git a/src/plugins/template/src/plugin.cpp b/src/plugins/template/src/plugin.cpp index 749973b1422b47..bcdd9b5ec3a0b4 100644 --- a/src/plugins/template/src/plugin.cpp +++ b/src/plugins/template/src/plugin.cpp @@ -6,7 +6,6 @@ #include -#include "ie_plugin_config.hpp" #include "itt.hpp" #include "openvino/pass/manager.hpp" #include "openvino/runtime/internal_properties.hpp" @@ -244,25 +243,7 @@ ov::Any ov::template_plugin::Plugin::get_property(const std::string& name, const } return ret; }; - if (METRIC_KEY(SUPPORTED_METRICS) == name) { - auto metrics = default_ro_properties(); - - add_ro_properties(METRIC_KEY(SUPPORTED_METRICS), metrics); - add_ro_properties(METRIC_KEY(SUPPORTED_CONFIG_KEYS), metrics); - add_ro_properties(METRIC_KEY(IMPORT_EXPORT_SUPPORT), metrics); - return to_string_vector(metrics); - } else if (METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) { - auto configs = default_rw_properties(); - auto streamExecutorConfigKeys = ov::threading::IStreamsExecutor::Config{} - .get_property(ov::supported_properties.name()) - .as>(); - for (auto&& configKey : streamExecutorConfigKeys) { - if (configKey != InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS) { - configs.emplace_back(configKey); - } - } - return to_string_vector(configs); - } else if (ov::supported_properties == name) { + if (ov::supported_properties == name) { auto ro_properties = default_ro_properties(); auto rw_properties = default_rw_properties(); @@ -282,8 +263,6 @@ ov::Any ov::template_plugin::Plugin::get_property(const std::string& name, const } else if (ov::device::full_name == name) { std::string device_name = "Template Device Full Name"; return decltype(ov::device::full_name)::value_type(device_name); - } else if (METRIC_KEY(IMPORT_EXPORT_SUPPORT) == name) { - return true; } else if (ov::device::architecture == name) { // TODO: return device architecture for device specified by DEVICE_ID config std::string arch = "TEMPLATE"; diff --git a/src/plugins/template/src/variable_state.hpp b/src/plugins/template/src/variable_state.hpp index e635f9b3285276..2eb12e63e0386e 100644 --- a/src/plugins/template/src/variable_state.hpp +++ b/src/plugins/template/src/variable_state.hpp @@ -4,7 +4,9 @@ #pragma once +#include "openvino/runtime/itensor.hpp" #include "openvino/runtime/ivariable_state.hpp" +#include "openvino/runtime/so_ptr.hpp" namespace ov { namespace template_plugin { diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/graph_cache.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/graph_cache.hpp index 096112f529a64c..198afd52803dff 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/graph_cache.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/graph_cache.hpp @@ -19,7 +19,7 @@ class GraphCache : public ICache { public: void update_cache(const std::shared_ptr& model, const std::string& model_meta_data, - bool extract_body = true) override; + bool extract_body) override; void serialize_cache() override; static std::shared_ptr& get() { @@ -35,6 +35,7 @@ class GraphCache : public ICache { } void reset_cache() override { + m_graph_cache.clear(); reset(); }; @@ -42,17 +43,21 @@ class GraphCache : public ICache { std::map, MetaInfo> m_graph_cache; ExtractorsManager m_manager = ExtractorsManager(); static std::shared_ptr m_cache_instance; + // cache byte size + size_t m_graph_cache_bytesize = 0; GraphCache() { ExtractorsManager::ExtractorsMap matchers = { - { "fused_names", FusedNamesExtractor::Ptr(new FusedNamesExtractor) }, - { "repeat_pattern", RepeatPatternExtractor::Ptr(new RepeatPatternExtractor) }, + // temporary disabling according mem leaks in CI and not using swap mem + // { "fused_names", FusedNamesExtractor::Ptr(new FusedNamesExtractor) }, + // { "repeat_pattern", RepeatPatternExtractor::Ptr(new RepeatPatternExtractor) }, }; m_manager.set_extractors(matchers); } void update_cache(const std::shared_ptr& model, const std::string& model_path, - const std::map& input_info, size_t model_op_cnt); + std::map& input_info, const std::string& extractor_name, + size_t model_op_cnt); }; } // namespace subgraph_dumper diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/meta/input_info.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/meta/input_info.hpp index 9cd79a3f1b1990..2aaa819520ab04 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/meta/input_info.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/meta/input_info.hpp @@ -52,7 +52,8 @@ struct InputInfo { } }; -using ExtractedPattern = std::pair, std::map>; +// ov_model, input_info, extractor_name +using ExtractedPattern = std::tuple, std::map, std::string>; } // namespace subgraph_dumper } // namespace tools diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/meta/meta_info.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/meta/meta_info.hpp index 1ded7e0044d16f..47572db370acd9 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/meta/meta_info.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/meta/meta_info.hpp @@ -13,18 +13,30 @@ namespace subgraph_dumper { class MetaInfo { public: - MetaInfo(const std::string& model_path = "", const std::map& _input_info = {}, size_t total_op_cnt = 1, size_t model_priority = 1); + MetaInfo(const std::string& model_path = "", const std::map& _input_info = {}, + size_t total_op_cnt = 1, size_t this_op_cnt = 1, const std::string& extractor = "", size_t model_priority = 1); + MetaInfo(std::map _in_info, + std::map _model_info, + std::unordered_set _extractors) : + model_info(_model_info), + input_info(_in_info), + extractors(_extractors) {}; void serialize(const std::string& serialization_path); - void update(const std::string& model_path, const std::map& _input_info, - size_t _total_op_cnt = 1, const std::vector& ignored_inputs = {}); - std::map get_input_info(); - std::map get_model_info(); + void update(const std::string& model_path, const std::map& _input_info, size_t _total_op_cnt = 1, + size_t _this_op_cnt = 1, const std::string& extractor = "", const std::vector& ignored_inputs = {}); + std::map get_input_info() const; + std::map get_model_info() const; + std::string get_any_extractor() const { return *extractors.begin(); } + + static MetaInfo read_meta_from_file(const std::string& meta_path); protected: // { input_node_name: input_info } std::map input_info; // { model_name: model_paths, this_op/graph_cnt, total_op_cnt, model_priority} std::map model_info; + // { extractors } + std::unordered_set extractors; // to store model priority ranges to normilize graph_priority static unsigned long MAX_MODEL_PRIORITY; diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/meta/model_info.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/meta/model_info.hpp index 408c6cdcc603b8..e3fa7fec575354 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/meta/model_info.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/meta/model_info.hpp @@ -15,9 +15,10 @@ struct ModelInfo { std::set model_paths; size_t this_op_cnt, total_op_cnt, model_priority; - ModelInfo(const std::string& model_path = "", size_t total_ops_in_model = 1, size_t _model_priority = 1) : - total_op_cnt(total_ops_in_model), model_paths({model_path}), - this_op_cnt(1), model_priority(_model_priority) {} + ModelInfo(const std::string& model_path = "", size_t total_ops_in_model = 1, size_t this_ops_in_model = 1, size_t _model_priority = 1) : + total_op_cnt(total_ops_in_model), this_op_cnt(this_ops_in_model), model_priority(_model_priority) { + model_paths = model_path.empty() ? std::set() : std::set({ model_path }) ; + } bool operator==(const ModelInfo& model_info_ref) const { if (this->model_priority != model_info_ref.model_priority || this->this_op_cnt != model_info_ref.this_op_cnt || diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/op_cache.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/op_cache.hpp index feca24414d63cb..b7f277526a026b 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/op_cache.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/op_cache.hpp @@ -32,6 +32,7 @@ class OpCache : public ICache { } void reset_cache() override { + m_ops_cache.clear(); reset(); }; diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/subgraph/fused_names.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/subgraph/fused_names.hpp index 84e5a88bd87066..dfc2a7dc995a08 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/subgraph/fused_names.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/subgraph/fused_names.hpp @@ -14,11 +14,16 @@ namespace subgraph_dumper { class FusedNamesExtractor : public SubgraphExtractor { public: + FusedNamesExtractor(); std::list extract(const std::shared_ptr &model, bool is_extract_body = true) override; + void set_target_device(const std::string& _device) { device = _device; } protected: std::unordered_set extract_compiled_model_names(const std::shared_ptr& model); + + std::string device; + std::shared_ptr core; }; } // namespace subgraph_dumper diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/subgraph/manager.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/subgraph/manager.hpp index e9dad1b9adc0e2..05395b80c15a26 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/subgraph/manager.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/subgraph/manager.hpp @@ -16,15 +16,25 @@ class ExtractorsManager { explicit ExtractorsManager(const ExtractorsMap& extractors = {}) : m_extractors(extractors) {} bool match(const std::shared_ptr &model, - const std::shared_ptr &ref); + const std::shared_ptr &ref, + std::map &in_info, + const std::map &in_info_ref); std::list extract(const std::shared_ptr &model, bool is_extract_body = true); void set_extractors(const ExtractorsMap& extractors = {}) { m_extractors = extractors; } ExtractorsMap get_extractors() { return m_extractors; } + std::map align_input_info(const std::shared_ptr& model, + const std::shared_ptr& model_ref, + const std::map &in_info, + const std::map &in_info_ref); + protected: ExtractorsMap m_extractors = {}; + + bool match(const std::shared_ptr &model, + const std::shared_ptr &ref); }; } // namespace subgraph_dumper diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/subgraph/subgraph.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/subgraph/subgraph.hpp index b0ab5b79c9e813..44320a8dd34c46 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/subgraph/subgraph.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/subgraph/subgraph.hpp @@ -26,7 +26,10 @@ class SubgraphExtractor { return std::list{}; }; + void set_extractor_name(const std::string& _extractor_name) { extractor_name = _extractor_name; } + protected: + std::string extractor_name = ""; FunctionsComparator comparator = FunctionsComparator::no_default() .enable(FunctionsComparator::ATTRIBUTES) .enable(FunctionsComparator::NODES) diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp index abaeb3b036e1d6..86f133be09f794 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include "openvino/util/file_util.hpp" @@ -56,7 +58,8 @@ static std::map model_cache_status_to_str = { { ModelCacheStatus::NOT_READ, "not_read_models" }, }; -std::vector find_models(const std::vector &dirs, const std::string& regexp = ".*"); +std::pair, std::pair>> +find_models(const std::vector &dirs, const std::string& regexp = ".*"); // model_cache_status: model_list std::map> cache_models( @@ -67,10 +70,136 @@ std::map> cache_models( void save_model_status_to_file(const std::map>& caching_status, const std::string& output_dir); -std::pair, std::map> +inline ExtractedPattern generate_model(const std::set>& nodes, - const std::shared_ptr& start_node, - std::unordered_set& checked_ops); + std::unordered_set& checked_ops, + const std::string& extractor_name) { + std::unordered_map> model_map; + // to create reults: { out_op_name, out_ports_without_target_inputs } + std::unordered_map> model_output_nodes; + std::map input_info; + ov::ParameterVector params; + { + // prepare map { original_op_name, cloned_op } + size_t functional_op_cnt = 0; + for (const auto& op : nodes) { + auto op_name = op->get_friendly_name(); + checked_ops.insert(op_name); + auto cloned_op = clone_node(op, true, false, op->get_friendly_name()); + model_map.insert({ op_name, cloned_op }); + + size_t output_cnt = op->outputs().size(); + std::vector out_ports(output_cnt); + std::iota(out_ports.begin(), out_ports.end(), 0); + std::unordered_set out_ports_set(out_ports.begin(), out_ports.end()); + model_output_nodes.insert({ op_name, out_ports_set }); + if (!ov::op::util::is_output(op) && !ov::op::util::is_constant(op) && !ov::op::util::is_parameter(op)) { + ++functional_op_cnt; + } + } + + if (functional_op_cnt < 2) { + throw std::runtime_error("Incorrect node number to create model"); + } + // replace new inputs by taken from graph if possible + for (const auto& op : nodes) { + int filled_input_idx = -1; + std::vector not_filled_ports; + auto in_cnt = op->inputs().size(); + auto cloned_op = model_map[op->get_friendly_name()]; + std::map this_input_info = get_input_info_by_node(cloned_op); + for (size_t in_idx = 0; in_idx < in_cnt; ++in_idx) { + auto in_node = op->get_input_node_ptr(in_idx)->shared_from_this(); + for (size_t in_out_idx = 0; in_out_idx < in_node->outputs().size(); ++in_out_idx) { + for (const auto& target_input : in_node->output(in_out_idx).get_target_inputs()) { + auto out_in_node = target_input.get_node()->shared_from_this(); + if (out_in_node == op) { + auto in_node_name = in_node->get_friendly_name(); + auto in_cloned_node = cloned_op->get_input_node_shared_ptr(in_idx); + // if op input node is in subgraph + if (model_map.count(in_node_name)) { + auto in_node = model_map[in_node_name]; + auto in_cloned_friendly_name = in_cloned_node->get_friendly_name(); + ov::replace_output_update_name(in_cloned_node->get_default_output(), in_node->output(in_out_idx)); + in_cloned_node->clear_control_dependencies(); + if (ov::op::util::is_parameter(in_node)) { + auto param = std::dynamic_pointer_cast(in_node); + params.push_back(param); + this_input_info.insert({ in_node->get_friendly_name(), this_input_info[in_cloned_friendly_name]}); + } else if (ov::op::util::is_constant(in_node)) { + auto op_to_replace = std::dynamic_pointer_cast(in_node); + auto param = convert_const_to_param(op_to_replace); + if (param != nullptr) { + params.push_back(param); + } + // insert in_info with updated in_name + this_input_info.insert({ in_node->get_friendly_name(), this_input_info[in_cloned_friendly_name]}); + } + // remove in_info with old name from input info + this_input_info.erase(in_cloned_friendly_name); + filled_input_idx++; + model_output_nodes[in_node_name].erase(in_out_idx); + if (model_output_nodes[in_node_name].empty()) { + model_output_nodes.erase(in_node_name); + } + } else if (ov::op::util::is_parameter(in_cloned_node)) { + auto param = std::dynamic_pointer_cast(in_cloned_node); + params.push_back(param); + } else if (ov::op::util::is_constant(in_cloned_node)) { + auto op_to_replace = std::dynamic_pointer_cast(in_cloned_node); + auto param = convert_const_to_param(op_to_replace); + if (param != nullptr) { + params.push_back(param); + } + } + break; + } + } + if (filled_input_idx == in_idx) { + break; + } + } + } + if (!this_input_info.empty()) { + input_info.insert(this_input_info.begin(), this_input_info.end()); + } + } + } + ov::ResultVector results; + for (const auto& out_node_name : model_output_nodes) { + auto out_node = model_map[out_node_name.first]; + if (ov::op::util::is_output(out_node)) { + results.push_back(std::dynamic_pointer_cast(out_node)); + } else { + for (const auto& out_port_id : out_node_name.second) { + results.push_back(std::make_shared(out_node->output(out_port_id))); + } + } + } + auto model = std::make_shared(results, params); + std::string string_to_hash; + for (const auto& op : model->get_ordered_ops()) { + std::ostringstream result; + result << op->get_type_info(); + for (const auto& in : op->inputs()) { + result << in.get_element_type(); + result << in.get_partial_shape().rank(); + result << in.get_partial_shape().is_static(); + } + for (const auto& out : op->outputs()) { + result << out.get_element_type(); + result << out.get_partial_shape().rank(); + result << out.get_partial_shape().is_static(); + } + string_to_hash += result.str(); + } + for (const auto& in : input_info) { + string_to_hash += (in.second.is_const ? "1" : "0"); + } + auto h1 = std::hash{}(string_to_hash); + model->set_friendly_name(std::to_string(h1)); + return { model, input_info, extractor_name }; +} } // namespace subgraph_dumper } // namespace tools diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/node.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/node.hpp index 02319ab816030f..cc6eea4809c1f2 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/node.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/node.hpp @@ -36,6 +36,9 @@ std::shared_ptr clone_node(std::shared_ptr node, bool is_copy_const_node = false, std::string node_name = ""); + +std::shared_ptr convert_const_to_param(const std::shared_ptr& constant_node); + // all inputs are defined as parameters and contains detailed info in meta std::shared_ptr generate_model_by_node(const std::shared_ptr& node); diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/cache.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/cache.cpp index 6cb27e749db09d..5dc7017ed9059f 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/cache.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/cache.cpp @@ -20,31 +20,6 @@ bool ICache::serialize_model(const std::pair, MetaInf const std::string& rel_serialization_dir) { std::shared_ptr model = graph_info.first; MetaInfo meta = graph_info.second; - std::map, std::shared_ptr> nodes; - ov::ParameterVector param_vector; - for (const auto& op : model->get_ordered_ops()) { - std::shared_ptr param = nullptr; - if (ov::op::util::is_parameter(op)) { - param = std::dynamic_pointer_cast(op); - } else if (ov::op::util::is_constant(op)) { - auto op_to_replace = std::dynamic_pointer_cast(op); - if (op_to_replace->get_byte_size() > 1024) { - param = std::make_shared( - op_to_replace->get_output_element_type(0), op_to_replace->get_output_partial_shape(0)); - param->set_friendly_name(op_to_replace->get_friendly_name()); - nodes.insert({ op_to_replace, param }); - } - } - if (param != nullptr) { - param_vector.push_back(param); - } - } - if (!nodes.empty()) { - for (const auto& node : nodes) { - model->replace_node(node.first, node.second); - } - model = std::make_shared(model->get_results(), param_vector); - } std::string model_name = model->get_friendly_name(); std::string abs_searilization_dir = ov::util::path_join({ m_serialization_dir, rel_serialization_dir }); @@ -67,7 +42,8 @@ bool ICache::serialize_model(const std::pair, MetaInf } catch (std::exception &e) { std::cout << "[ ERROR ] Failed to serialize model: " << model_name << ". Exception: " << e.what() << std::endl; - ov::test::utils::removeIRFiles(xml_path, bin_path); + ov::test::utils::removeFile(xml_path); + ov::test::utils::removeFile(bin_path); ov::test::utils::removeFile(meta_path); if (std::string(e.what()).find("Can't open") == std::string::npos) { return false; diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/graph_cache.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/graph_cache.cpp index 86bce528fbff94..034e673a1d6738 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/graph_cache.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/graph_cache.cpp @@ -2,10 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // +#include + #include "openvino/op/util/op_types.hpp" +#include "openvino/util/file_util.hpp" #include "functional_test_utils/ov_plugin_cache.hpp" #include "common_test_utils/graph_comparator.hpp" +#include "common_test_utils/file_utils.hpp" #include "cache/graph_cache.hpp" #include "utils/node.hpp" @@ -26,31 +30,73 @@ void GraphCache::update_cache(const std::shared_ptr& model, return; } while (!extracted_patterns.empty()) { - auto it = extracted_patterns.begin(); - update_cache(it->first, model_meta_data, it->second, model_total_op); + auto it = *extracted_patterns.begin(); + update_cache(std::get<0>(it), model_meta_data, std::get<1>(it), std::get<2>(it), model_total_op); extracted_patterns.pop_front(); } return; } void GraphCache::update_cache(const std::shared_ptr& extracted_model, const std::string& model_path, - const std::map& input_info, size_t model_op_cnt) { - std::shared_ptr model_to_update = nullptr; - for (const auto& cached_model : m_graph_cache) { - if (m_manager.match(cached_model.first, extracted_model)) { - model_to_update = cached_model.first; + std::map& input_info, const std::string& extractor_name, size_t model_op_cnt) { + // todo: check the number 8GB + if (m_graph_cache_bytesize >> 33 > 0) { + std::cout << "[ GRAPH CACHE ][ WARNING ] Cache size > 8 GB. Serialize graph cache" << std::endl; + serialize_cache(); + // m_graph_cache.clear(); + m_graph_cache_bytesize = 0; + } + + auto graph_name = extracted_model->get_friendly_name(); + std::string serialized_model_path = ""; + for (const auto& extractor : m_manager.get_extractors()) { + auto tmp_serialized_model_path = ov::util::path_join({ m_serialization_dir, "subgraph", extractor.first, graph_name + ".xml" }); + if (ov::util::file_exists(serialized_model_path)) { + serialized_model_path = tmp_serialized_model_path; break; } } + + std::shared_ptr model_to_update = nullptr; + // if cached model was serialized + if (!serialized_model_path.empty()) { + std::cout << "[ GRAPH CACHE ][ INFO ] Reading cached model: " << serialized_model_path << std::endl; + auto bin_path = ov::test::utils::replaceExt(serialized_model_path, ".bin"); + auto meta_path = ov::test::utils::replaceExt(serialized_model_path, ".meta"); + auto cached_model = ov::test::utils::PluginCache::get().core()->read_model(serialized_model_path); + auto cached_meta = MetaInfo::read_meta_from_file(meta_path); + + ov::test::utils::removeFile(serialized_model_path); + ov::test::utils::removeFile(bin_path); + ov::test::utils::removeFile(meta_path); + m_graph_cache.insert({ cached_model, cached_meta }); + m_graph_cache_bytesize += cached_model->get_graph_size(); + model_to_update = cached_model; + input_info = m_manager.align_input_info(extracted_model, model_to_update, + input_info, cached_meta.get_input_info()); + } else { + for (const auto& cached_model : m_graph_cache) { + if (m_manager.match(extracted_model, cached_model.first, + input_info, cached_model.second.get_input_info())) { + model_to_update = cached_model.first; + break; + } + } + } + + auto this_op_cnt = extracted_model->get_ops().size() - + extracted_model->get_parameters().size() - extracted_model->get_results().size(); if (model_to_update == nullptr) { - auto meta = MetaInfo(model_path, input_info, model_op_cnt); + auto meta = MetaInfo(model_path, input_info, model_op_cnt, this_op_cnt, extractor_name); m_graph_cache.insert({ extracted_model, meta }); + m_graph_cache_bytesize += extracted_model->get_graph_size(); return; } - m_graph_cache[model_to_update].update(model_path, input_info, model_op_cnt); + m_graph_cache[model_to_update].update(model_path, input_info, model_op_cnt, this_op_cnt, extractor_name); auto cached_model_size = model_to_update->get_graph_size(); auto pattern_model_size = extracted_model->get_graph_size(); if (pattern_model_size < cached_model_size) { + m_graph_cache_bytesize -= (cached_model_size - pattern_model_size); auto meta = m_graph_cache[model_to_update]; m_graph_cache.erase(model_to_update); m_graph_cache.insert({extracted_model, meta}); @@ -58,11 +104,18 @@ void GraphCache::update_cache(const std::shared_ptr& extracted_model, } void GraphCache::serialize_cache() { - for (const auto& cache_item : m_graph_cache) { - serialize_model(cache_item, "subgraph"); - } + // for (const auto& cache_item : m_graph_cache) { + auto it = m_graph_cache.begin(); + while (it != m_graph_cache.end()) { + auto rel_dir = ov::util::path_join({ "subgraph", it->second.get_any_extractor() }); + serialize_model(*it, rel_dir); + m_graph_cache.erase(it->first); + it = m_graph_cache.begin(); + } + auto a = 0; + // } } } // namespace subgraph_dumper } // namespace tools -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/meta/meta_info.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/meta/meta_info.cpp index d9cd0127e7f5b0..90211ff6522471 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/meta/meta_info.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/meta/meta_info.cpp @@ -15,16 +15,20 @@ namespace subgraph_dumper { unsigned long MetaInfo::MIN_MODEL_PRIORITY = std::numeric_limits::max(); unsigned long MetaInfo::MAX_MODEL_PRIORITY = std::numeric_limits::min(); -MetaInfo::MetaInfo(const std::string& _model_path, const std::map& _input_info, size_t _total_op_cnt, size_t model_priority) { +MetaInfo::MetaInfo(const std::string& _model_path, const std::map& _input_info, + size_t _total_op_cnt, size_t _this_op_cnt, const std::string& extractor, size_t model_priority) { unsigned long tmp_graph_priority = _total_op_cnt * model_priority; if (tmp_graph_priority < MIN_MODEL_PRIORITY) MIN_MODEL_PRIORITY = tmp_graph_priority; if (tmp_graph_priority > MAX_MODEL_PRIORITY) MAX_MODEL_PRIORITY = tmp_graph_priority; if (_model_path != "") { - model_info.insert({ get_model_name_by_path(_model_path), ModelInfo(_model_path, _total_op_cnt, model_priority) }); + model_info.insert({ get_model_name_by_path(_model_path), ModelInfo(_model_path, _total_op_cnt, _this_op_cnt, model_priority) }); } if (!_input_info.empty()) { input_info = _input_info; } + if (!extractor.empty()) { + extractors = { extractor }; + } } unsigned long MetaInfo::get_abs_graph_priority() { @@ -42,6 +46,53 @@ double MetaInfo::get_graph_priority() { return diff / delta; } +MetaInfo MetaInfo::read_meta_from_file(const std::string& meta_path) { + pugi::xml_document doc; + doc.load_file(meta_path.c_str()); + std::map model_info; + { + auto model_xml_root = doc.child("meta_info").child("models"); + for (const auto& model_child : model_xml_root.children()) { + ModelInfo tmp_model_info; + tmp_model_info.this_op_cnt = model_child.attribute("this_op_count").as_uint(); + tmp_model_info.total_op_cnt = model_child.attribute("total_op_count").as_uint(); + for (const auto& path : model_child.child("path")) { + tmp_model_info.model_paths.insert(std::string(path.attribute("path").value())); + } + model_info.insert({ std::string(model_child.attribute("name").value()), tmp_model_info }); + } + } + std::map input_info; + { + auto input_info_xml = doc.child("meta_info").child("input_info"); + for (const auto &input : input_info_xml.children()) { + auto in_name = std::string(input.attribute("id").value()); + ov::tools::subgraph_dumper::InputInfo in_info; + in_info.is_const = input.attribute("convert_to_const").as_bool(); + if (std::string(input.attribute("min").value()) != "undefined") { + in_info.ranges.min = input.attribute("min").as_double(); + } else { + in_info.ranges.min = DEFAULT_MIN_VALUE; + } + if (std::string(input.attribute("max").value()) != "undefined") { + in_info.ranges.max = input.attribute("max").as_double(); + } else { + in_info.ranges.min = DEFAULT_MAX_VALUE; + } + input_info.insert({in_name, in_info}); + } + } + std::unordered_set extractors; + { + auto extractors_xml = doc.child("meta_info").child("extractors"); + for (const auto& extractor : extractors_xml.children()) { + extractors.insert(std::string(extractor.attribute("name").value())); + } + } + auto new_meta = MetaInfo(input_info, model_info, extractors); + return new_meta; +} + void MetaInfo::serialize(const std::string& serialization_path) { pugi::xml_document doc; pugi::xml_node root = doc.append_child("meta_info"); @@ -56,6 +107,12 @@ void MetaInfo::serialize(const std::string& serialization_path) { model_node.append_child("path").append_child("model").append_attribute("path").set_value(model_path.c_str()); } } + if (!extractors.empty()) { + auto extractors_node = root.append_child("extractors"); + for (const auto& extractor : extractors) { + extractors_node.append_child("extractor").append_attribute("name").set_value(extractor.c_str()); + } + } double graph_priority = get_graph_priority(); root.append_child("graph_priority").append_attribute("value").set_value(graph_priority); auto ports_info = root.append_child("input_info"); @@ -80,6 +137,8 @@ void MetaInfo::serialize(const std::string& serialization_path) { void MetaInfo::update(const std::string& _model_path, const std::map& _input_info, size_t _total_op_cnt, + size_t _this_op_cnt, + const std::string& extractor, const std::vector& ignored_inputs) { if (input_info.size() != _input_info.size()) { throw std::runtime_error("Incompatible input info!"); @@ -90,7 +149,7 @@ void MetaInfo::update(const std::string& _model_path, model_info.at(model_name).model_paths.insert(_model_path); model_info.at(model_name).total_op_cnt += _total_op_cnt; } - model_info.at(model_name).this_op_cnt++; + model_info.at(model_name).this_op_cnt += _this_op_cnt; } else { model_info.insert({ model_name, ModelInfo(_model_path, _total_op_cnt) });\ } @@ -112,13 +171,16 @@ void MetaInfo::update(const std::string& _model_path, if (abs_graph_priority > MAX_MODEL_PRIORITY) MAX_MODEL_PRIORITY = abs_graph_priority; if (abs_graph_priority < MIN_MODEL_PRIORITY) MIN_MODEL_PRIORITY = abs_graph_priority; } + if (!extractor.empty()) { + extractors.insert(extractor); + } } -std::map MetaInfo::get_input_info() { +std::map MetaInfo::get_input_info() const { return input_info; } -std::map MetaInfo::get_model_info() { +std::map MetaInfo::get_model_info() const { return model_info; } diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/op_cache.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/op_cache.cpp index 703b5a19588f71..06895fbe37d6bd 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/op_cache.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/op_cache.cpp @@ -83,11 +83,12 @@ void OpCache::update_cache(const std::shared_ptr& node, } size_t priority = get_node_priority_by_version(cloned_node); - auto meta = MetaInfo(model_path, get_input_info_by_node(cloned_node), model_op_cnt, priority); + auto meta = MetaInfo(model_path, get_input_info_by_node(cloned_node), model_op_cnt, 1, "", priority); if (find_op_in_cache != nullptr) { - std::cout << "[ INFO ][ OP CACHE ] Update cache node: " << cloned_node->get_type_info().name << - " " << find_op_in_cache->get_friendly_name() << std::endl; - m_ops_cache[find_op_in_cache].update(model_path, get_input_info_by_node(cloned_node), model_op_cnt, ignored_input_names); + // std::cout << "[ INFO ][ OP CACHE ] Update cache node: " << cloned_node->get_type_info().name << cloned_node->get_friendly_name() << + // " " << find_op_in_cache->get_friendly_name() << std::endl; + m_ops_cache[find_op_in_cache].update( + model_path, get_input_info_by_node(cloned_node), model_op_cnt, 1, "", ignored_input_names); } if (find_op_in_cache > cloned_node) { meta = m_ops_cache[find_op_in_cache]; @@ -95,8 +96,8 @@ void OpCache::update_cache(const std::shared_ptr& node, find_op_in_cache = nullptr; } if (find_op_in_cache == nullptr) { - std::cout << "[ INFO ][ OP CACHE ] Insert node: " << cloned_node->get_type_info().name << - " " << cloned_node->get_friendly_name() << " to Cache" << std::endl; + // std::cout << "[ INFO ][ OP CACHE ] Insert node: " << cloned_node->get_type_info().name << + // " " << cloned_node->get_friendly_name() << " to Cache" << std::endl; m_ops_cache.insert({ cloned_node, meta }); } } diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/main.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/main.cpp index 7f723ce5ee204a..5f330f9226442e 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/main.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/main.cpp @@ -20,6 +20,7 @@ int main(int argc, char *argv[]) { std::vector dirs = ov::test::utils::splitStringByDelimiter(FLAGS_input_folders); std::vector models; + std::map> cache_model_status; if (!ov::test::utils::directoryExists(FLAGS_output_folder)) { std::string msg = "Output directory (" + FLAGS_output_folder + ") doesn't not exist! The directory will be created."; @@ -27,7 +28,9 @@ int main(int argc, char *argv[]) { ov::test::utils::createDirectoryRecursive(FLAGS_output_folder); } try { - models = find_models(dirs, FLAGS_path_regex); + auto all_models = find_models(dirs, FLAGS_path_regex); + models = all_models.first; + cache_model_status.insert(all_models.second); } catch (std::runtime_error& e) { std::cout << "[ INFO ] Try 'subgraphsDumper -h' for more information. \nException: " << e.what() << std::endl; return 1; @@ -39,27 +42,39 @@ int main(int argc, char *argv[]) { caches.push_back(OpCache::get()); } if (FLAGS_cache_type == "GRAPH" || FLAGS_cache_type.empty()) { - // todo: iefode: to check and enable it in CI - // std::cout << "[ INFO ] GraphCache is enabled!" << std::endl; - // caches.push_back(GraphCache::get()); + std::cout << "[ INFO ] GraphCache is enabled!" << std::endl; + caches.push_back(GraphCache::get()); } for (auto& cache : caches) { cache->set_serialization_dir(FLAGS_output_folder); } - std::map> cache_model_status; // Upload previously cached graphs to cache if (!FLAGS_local_cache.empty()) { - auto cachedOps = find_models(local_cache_dirs); - cache_model_status = cache_models(caches, cachedOps, FLAGS_extract_body); + auto cached_ops = find_models(local_cache_dirs); + // todo: add normal caching with meta info reading + auto this_cache_model_status = cache_models(caches, cached_ops.first, FLAGS_extract_body); + auto not_read_model = cached_ops.second; + for (auto& model_status : cache_model_status) { + auto& key = model_status.first; + auto& value = model_status.second; + if (not_read_model.first == key) { + value.insert(value.end(), not_read_model.second.begin(), not_read_model.second.end()); + } + if (this_cache_model_status.count(key)) { + value.insert(value.end(), this_cache_model_status[key].begin(), this_cache_model_status[key].end()); + } + } } { - auto tmp_cache_model_status = cache_models(caches, models, FLAGS_extract_body); - cache_model_status.insert(tmp_cache_model_status.begin(), tmp_cache_model_status.end()); - } - for (auto& cache : caches) { - cache->set_serialization_dir(FLAGS_output_folder); - cache->serialize_cache(); + auto this_cache_model_status = cache_models(caches, models, FLAGS_extract_body); + for (auto& model_status : cache_model_status) { + auto& key = model_status.first; + auto& value = model_status.second; + if (this_cache_model_status.count(key)) { + value.insert(value.end(), this_cache_model_status[key].begin(), this_cache_model_status[key].end()); + } + } } save_model_status_to_file(cache_model_status, FLAGS_output_folder); return cache_model_status[ModelCacheStatus::NOT_FULLY_CACHED].empty() && cache_model_status[ModelCacheStatus::NOT_READ].empty(); diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/fused_names.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/fused_names.cpp index e9e4f5435a3119..3eef3eb5b5b896 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/fused_names.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/fused_names.cpp @@ -15,8 +15,7 @@ using namespace ov::tools::subgraph_dumper; std::unordered_set FusedNamesExtractor::extract_compiled_model_names(const std::shared_ptr& model) { - auto core = ov::test::utils::PluginCache::get().core(); - auto compiled_model = core->compile_model(model); + auto compiled_model = core->compile_model(model, device); std::unordered_set compiled_op_name; for (const auto& compiled_op : compiled_model.get_runtime_model()->get_ordered_ops()) { const auto& rt_info = compiled_op->get_rt_info(); @@ -27,6 +26,11 @@ FusedNamesExtractor::extract_compiled_model_names(const std::shared_ptrget_available_devices().begin()); +} + std::list FusedNamesExtractor::extract(const std::shared_ptr &model, bool is_extract_body) { @@ -34,16 +38,23 @@ FusedNamesExtractor::extract(const std::shared_ptr &model, std::list matched_patterns; std::unordered_set checked_ops; std::set> nodes; - std::shared_ptr start_node = nullptr; for (const auto& op : model->get_ordered_ops()) { auto op_name = op->get_friendly_name(); if (is_node_to_skip(op) || checked_ops.count(op_name)) { continue; } - if (start_node == nullptr) { - start_node = op; + if (compiled_op_name.count(op_name)) { + try { + matched_patterns.push_back(generate_model(nodes, checked_ops, extractor_name)); + } catch(std::exception& e) { + if (std::string(e.what()) != "Incorrect node number to create model") { + std::cout << "[ WARNING ] Impossible to generate network and add to GraphCache: " <(op)) { auto ti = ov::as_type_ptr(op); @@ -65,20 +76,13 @@ FusedNamesExtractor::extract(const std::shared_ptr &model, } } } - if (!compiled_op_name.count(op_name)) { - try { - matched_patterns.push_back(generate_model(nodes, start_node, checked_ops)); - } catch(std::exception& e) { - std::cout << "[ ERROR ] Impossible to generate network and add to GraphCache: " < #include "matchers/subgraph/manager.hpp" using namespace ov::tools::subgraph_dumper; @@ -16,12 +16,71 @@ bool ExtractorsManager::match(const std::shared_ptr &model, return false; } +bool ExtractorsManager::match(const std::shared_ptr &model, + const std::shared_ptr &ref, + std::map &in_info, + const std::map &in_info_ref) { + if (match(model, ref)) { + try { + auto new_input_info = align_input_info(model, ref, in_info, in_info_ref); + in_info = new_input_info; + return true; + } catch (...) { + return false; + } + } + return false; +} + +std::map +ExtractorsManager::align_input_info(const std::shared_ptr& model, + const std::shared_ptr& model_ref, + const std::map& in_info, + const std::map& in_info_ref) { + std::map new_input_info = in_info; + bool is_update_required = false; + for (const auto& in_info_item : in_info_ref) { + if (!in_info.count(in_info_item.first)) { + is_update_required = true; + break; + } + } + if (is_update_required) { + std::map new_ref_input_info = in_info_ref; + // align matched model names + auto ref_model_ops = model_ref->get_ordered_ops(); + auto model_ops = model->get_ordered_ops(); + size_t ordered_ops_size = model_ops.size(); + if (ordered_ops_size != ref_model_ops.size()) { + throw std::runtime_error("Matched models are different!"); + } + for (size_t i = 0; i < ordered_ops_size; ++i) { + auto model_op_name = model_ops[i]->get_friendly_name(); + auto model_ref_op_name = ref_model_ops[i]->get_friendly_name(); + if (in_info.count(model_op_name)) { + auto input_info = new_input_info[model_op_name]; + if (input_info.is_const != new_ref_input_info[model_ref_op_name].is_const) { + throw std::runtime_error("Impossible yo update input info!!!"); + } + new_input_info.erase(model_op_name); + new_input_info.insert({ model_ref_op_name, input_info }); + } + } + } + return new_input_info; +} + std::list ExtractorsManager::extract(const std::shared_ptr &model, bool is_extract_body) { std::list result; for (const auto &it : m_extractors) { + auto start = std::chrono::high_resolution_clock::now(); + it.second->set_extractor_name(it.first); auto extracted_patterns = it.second->extract(model, is_extract_body); result.insert(result.end(), extracted_patterns.begin(), extracted_patterns.end()); + auto end = std::chrono::high_resolution_clock::now(); + auto delta = std::chrono::duration_cast(end - start).count(); + std::cout << "[ INFO ][ EXTRACTOR DURATION ] " << it.first << " " << delta << "ms" << std::endl; } return result; } diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/repeat_pattern.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/repeat_pattern.cpp index d98ab6f0287b52..7b293eae252ffc 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/repeat_pattern.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/repeat_pattern.cpp @@ -92,11 +92,12 @@ RepeatPatternExtractor::extract(const std::shared_ptr &model, } for (size_t i = 0; i < start_node_idx.size(); ++i) { try { - to_cache.push_back( - generate_model(nodes[i], ordered_ops[start_node_idx[i]], checked_ops)); + to_cache.push_back(generate_model(nodes[i], checked_ops, extractor_name)); nodes[i].clear(); } catch(std::exception& e) { - std::cout << "[ ERROR ] Impossible to generate network and add to GraphCache: " << e.what() << std::endl; + if (std::string(e.what()) != "Incorrect node number to create model") { + std::cout << "[ WARNING ] Impossible to generate network and add to GraphCache: " < &model, ref_ordered_ops = ref_model->get_ordered_ops(); if (ordered_ops.size() != ref_ordered_ops.size()) return false; - MatchersManager::MatchersMap matchers = { { "generic_single_op", SingleOpMatcher::Ptr(new SingleOpMatcher) }, { "convolutions", ConvolutionsMatcher::Ptr(new ConvolutionsMatcher) }, diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/model.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/model.cpp index eb1d07b709c380..2c9d46bc8877f2 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/model.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/model.cpp @@ -2,96 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/util/file_util.hpp" +#include "functional_test_utils/ov_plugin_cache.hpp" #include "utils/model.hpp" namespace ov { namespace tools { namespace subgraph_dumper { -inline std::unordered_map> -update_nodes(const std::set>& nodes, - const std::shared_ptr& start_node) { - std::unordered_map> model_map; - std::shared_ptr cloned_op = nullptr; - - for (const auto& op : nodes) { - if (ov::op::util::is_parameter(op) || ov::op::util::is_constant(op) || - ov::op::util::is_output(op)) { - continue; - } - cloned_op = clone_node(op, true, false, "Op_" + std::to_string(model_map.size())); - model_map.insert({ op->get_friendly_name(), cloned_op }); - } - - for (const auto& op : nodes) { - if (ov::op::util::is_parameter(op) || ov::op::util::is_constant(op) || - ov::op::util::is_output(op)) { - continue; - } - auto op_name = op->get_friendly_name(); - cloned_op = model_map[op->get_friendly_name()]; - size_t inputs_size = op->inputs().size(); - ov::OutputVector in_out_vector(inputs_size); - int filled_input_idx = -1; - for (size_t in_idx = 0; in_idx < inputs_size; ++in_idx) { - auto in_node = op->get_input_node_ptr(in_idx)->shared_from_this(); - for (size_t in_out_idx = 0; in_out_idx < in_node->outputs().size(); ++in_out_idx) { - for (const auto& target_input : in_node->output(in_out_idx).get_target_inputs()) { - auto out_in_node = target_input.get_node()->shared_from_this(); - if (out_in_node == op) { - auto in_node_name = in_node->get_friendly_name(); - in_out_vector[in_idx] = model_map.count(in_node_name) ? - model_map.at(in_node_name)->output(in_out_idx) : - cloned_op->get_input_node_ptr(in_idx)->output(0); - if (model_map.count(in_node_name)) { - filled_input_idx++; - } - break; - } - } - if (filled_input_idx == in_idx) { - break; - } - } - } - // todo: iefode: check this code - if (filled_input_idx < 0 && op_name != start_node->get_friendly_name()) { - model_map.erase(op_name); - } else if (filled_input_idx >= 0) { - auto name = cloned_op->get_friendly_name(); - model_map[op_name] = cloned_op->clone_with_new_inputs(in_out_vector); - model_map[op_name]->set_friendly_name(name); - } - } - return model_map; -} - -std::pair, std::map> -generate_model(const std::set>& nodes, - const std::shared_ptr& start_node, - std::unordered_set& checked_ops) { - if (nodes.size() < 2) { - throw std::runtime_error("Incorrect node number to create model"); - } - auto model_map = update_nodes(nodes, start_node); - if (model_map.size() < 2) { - throw std::runtime_error("Incorrect node number to create model"); - } - ov::OutputVector results; - std::map input_info; - for (const auto& op : model_map) { - checked_ops.insert(op.first); - auto this_input_info = get_input_info_by_node(op.second); - input_info.insert(this_input_info.begin(), this_input_info.end()); - for (size_t j = 0; j < op.second->outputs().size(); ++j) { - if (op.second->output(j).get_target_inputs().empty()) { - results.push_back(std::make_shared(op.second->output(j))); - } - } - } - return { std::make_shared(results), input_info }; -} - void save_model_status_to_file(const std::map>& caching_status, const std::string& output_dir) { std::string cache_status_path = ov::util::path_join({output_dir, "model_caching_status"}); @@ -104,7 +22,9 @@ void save_model_status_to_file(const std::map find_models(const std::vector &dirs, const std::string& regexp) { +// { models, { not_read_model }} +std::pair, std::pair>> +find_models(const std::vector &dirs, const std::string& regexp) { std::vector models, full_content; for (const auto& dir : dirs) { std::vector dir_content; @@ -120,17 +40,36 @@ std::vector find_models(const std::vector &dirs, const full_content.insert(full_content.end(), dir_content.begin(), dir_content.end()); } } + std::multimap models_sorted_by_size; + std::vector not_read_model; auto in_regex = std::regex(regexp); - for (const auto& file : full_content) { - if (std::regex_match(file, in_regex)) { + for (const auto& model_file : full_content) { + if (std::regex_match(model_file, in_regex)) { try { - models.emplace_back(file); + // models.emplace_back(file); + if (ov::util::file_exists(model_file)) { + auto core = ov::test::utils::PluginCache::get().core(); + auto model_size = core->read_model(model_file)->get_graph_size(); + models_sorted_by_size.insert({ model_size, model_file}); + } else { + continue; + } } catch (std::exception& e) { - std::cout << "[ ERROR ] Impossible to read model: " << file << std::endl << "Exception: " << e.what(); + not_read_model.emplace_back(model_file); + std::cout << "[ ERROR ] Impossible to read model: " << model_file << std::endl << "Exception: " << e.what(); } } } - return models; + // sort model by size with reverse + auto model_cnt = models_sorted_by_size.size(); + models.resize(model_cnt); + auto it = models_sorted_by_size.rbegin(); + for (size_t i = 0; i < model_cnt; ++i) { + models[i] = it->second; + ++it; + } + std::cout << "[ INFO ] Total model number is " << models.size() << std::endl; + return { models, { ModelCacheStatus::NOT_READ, not_read_model } }; } std::map> cache_models( @@ -151,9 +90,7 @@ std::map> cache_models( try { std::shared_ptr function = core->read_model(model); try { - for (auto& cache : caches) { - cache->update_cache(function, model, extract_body); - } + cache->update_cache(function, model, extract_body); } catch (std::exception &e) { std::cout << "[ ERROR ] Model processing failed with exception:" << std::endl << e.what() << std::endl; model_status = ModelCacheStatus::NOT_FULLY_CACHED; diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/node.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/node.cpp index ac6eb3adf3c1c6..220d1b00de47d0 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/node.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/node.cpp @@ -10,8 +10,11 @@ namespace subgraph_dumper { std::map get_input_info_by_node(const std::shared_ptr& node) { std::map input_info; for (size_t port_id = 0; port_id < node->get_input_size(); ++port_id) { - InputInfo in_info; std::shared_ptr input_node = node->get_input_node_shared_ptr(port_id); + if (!ov::op::util::is_parameter(input_node) && !ov::op::util::is_constant(input_node)) { + continue; + } + InputInfo in_info; std::string input_name = input_node->get_friendly_name(); if (std::dynamic_pointer_cast(input_node)) { if (ov::shape_size(input_node->get_output_shape(0)) == 0) @@ -80,9 +83,7 @@ std::map get_input_info_by_node(const std::shared_ptr clone_node(std::shared_ptr node, const auto constant_input = ov::get_constant_from_source(node->input(i).get_source_output()); OPENVINO_SUPPRESS_DEPRECATED_END if (constant_input) { - if (is_save_const || constant_input->get_byte_size() <= 1024) { + if (is_save_const || constant_input->get_byte_size() < 1024) { auto in_const = std::make_shared(constant_input->get_element_type(), constant_input->get_shape(), constant_input->get_data_ptr()); @@ -124,8 +125,9 @@ std::shared_ptr clone_node(std::shared_ptr node, param->set_friendly_name(input_name); inputs[i] = param; } - if (!has_parameters && !is_copy_const_node) { - auto cloned_node = clone_node(node, true, true); + std::shared_ptr cloned_node = nullptr; + if (!has_parameters && !is_copy_const_node && !inputs.empty()) { + cloned_node = clone_node(node, true, true, node_name); // std::cout << "The operation: " + node->get_friendly_name() + " does not have parameters! Replace first input to parameter!" << std::endl; auto param = std::make_shared(cloned_node->get_input_element_type(0), cloned_node->get_input_partial_shape(0)); @@ -133,13 +135,26 @@ std::shared_ptr clone_node(std::shared_ptr node, param->set_friendly_name(param_name); auto node_to_replace = cloned_node->get_input_node_shared_ptr(0); ov::replace_node(node_to_replace, param); - return cloned_node; + } else { + cloned_node = node->clone_with_new_inputs(inputs); + cloned_node->set_friendly_name(node_name); } - std::shared_ptr cloned_node = node->clone_with_new_inputs(inputs); - cloned_node->set_friendly_name(node_name); return cloned_node; } +std::shared_ptr convert_const_to_param(const std::shared_ptr& op_to_replace) { + if (op_to_replace->get_byte_size() > 1024) { + auto param = std::make_shared( + op_to_replace->get_output_element_type(0), op_to_replace->get_output_partial_shape(0)); + param->set_friendly_name(op_to_replace->get_friendly_name()); + if (param != nullptr) { + ov::replace_node(op_to_replace, param); + } + return param; + } + return nullptr; +} + std::shared_ptr generate_model_by_node(const std::shared_ptr& node) { static size_t model_cnt = 0; auto cloned_node = clone_node(node); diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/graph_cache.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/graph_cache.cpp index e09230e04419ef..c7810c5db1af82 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/graph_cache.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/graph_cache.cpp @@ -87,8 +87,6 @@ class GraphCacheUnitTest : public GraphCacheFuncTest, }; TEST_F(GraphCacheUnitTest, update_cache_by_graph) { - // const std::shared_ptr& model, const std::string& model_path, - // const std::map& input_info, size_t model_op_cnt Model_2 test; auto model_to_cache = test.get(); std::map in_info; @@ -97,7 +95,7 @@ TEST_F(GraphCacheUnitTest, update_cache_by_graph) { in_info.insert({ op->get_friendly_name(), InputInfo()}); } } - this->update_cache(model_to_cache, test_model_path, in_info, model_to_cache->get_ordered_ops().size()); + this->update_cache(model_to_cache, test_model_path, in_info, "test_extractor", model_to_cache->get_ordered_ops().size()); ASSERT_EQ(m_graph_cache.size(), 1); } } // namespace diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/meta.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/meta.cpp index e47b38545a1589..42d8379f7223e3 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/meta.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/meta.cpp @@ -64,11 +64,12 @@ class MetaInfoFuncTest : public SubgraphsDumperBaseTest { std::string test_model_path, test_model_name; std::map test_in_info; std::map test_model_info; - std::string test_artifacts_dir; + std::string test_artifacts_dir, test_extractor_name; void SetUp() override { SubgraphsDumperBaseTest::SetUp(); test_model_path = "test_model_path.xml"; + test_extractor_name = "test_extractor"; test_model_name = ov::test::utils::replaceExt(test_model_path, ""); test_in_info = {{ "test_in_0", InputInfo(DEFAULT_MIN_VALUE, 1, true) }}; test_model_info = {{ test_model_name, ModelInfo(test_model_path, 5) }}; @@ -86,7 +87,8 @@ TEST_F(MetaInfoFuncTest, constructor) { ASSERT_NO_THROW(auto meta = MetaInfo(test_model_name)); ASSERT_NO_THROW(auto meta = MetaInfo(test_model_name, test_in_info)); ASSERT_NO_THROW(auto meta = MetaInfo(test_model_name, test_in_info, 2)); - ASSERT_NO_THROW(auto meta = MetaInfo(test_model_name, test_in_info, 3)); + ASSERT_NO_THROW(auto meta = MetaInfo(test_model_name, test_in_info, 3, 1, test_extractor_name)); + ASSERT_NO_THROW(auto meta = MetaInfo(test_model_name, test_in_info, 3, 5, test_extractor_name, 5)); } TEST_F(MetaInfoFuncTest, get_input_info) { @@ -101,9 +103,15 @@ TEST_F(MetaInfoFuncTest, get_model_info) { ASSERT_EQ(test_meta.get_model_info(), test_model_info); } +TEST_F(MetaInfoFuncTest, get_any_extractor) { + auto test_meta = MetaInfo(test_model_path, test_in_info, 5, 3, test_extractor_name); + ASSERT_NO_THROW(test_meta.get_any_extractor()); + ASSERT_EQ(test_meta.get_any_extractor(), test_extractor_name); +} + TEST_F(MetaInfoFuncTest, update) { std::map test_in_info = {{ "test_in_0", InputInfo(DEFAULT_MIN_VALUE, 1, true) }}; - auto test_meta = MetaInfo(test_model_name, test_in_info); + auto test_meta = MetaInfo(test_model_name, test_in_info, 1, 1, test_extractor_name); std::map test_input_info_1 = {{ "test_in_0", InputInfo(0, 1, true) }}; std::string test_model_1 = "test_model_1"; std::string test_model_path_1 = ov::util::path_join({ "path", "to", test_model_1 + ".xml"}); @@ -111,6 +119,7 @@ TEST_F(MetaInfoFuncTest, update) { ASSERT_ANY_THROW(test_meta.update(test_model_path_1, {{ "test_in_1", InputInfo() }})); ASSERT_ANY_THROW(test_meta.update(test_model_path_1, {{ "test_in_0", InputInfo(0, 1, false) }})); ASSERT_NO_THROW(test_meta.update(test_model_path_1, test_input_info_1)); + ASSERT_NO_THROW(test_meta.update(test_model_path_1, test_input_info_1, 1, 2, "test_extractor_1")); ASSERT_NO_THROW(test_meta.update(test_model_path_1, test_input_info_1, 2)); } @@ -135,6 +144,7 @@ class MetaInfoUnitTest : public MetaInfoFuncTest, TEST_F(MetaInfoUnitTest, serialize) { std::string seriliazation_path(ov::util::path_join({test_artifacts_dir, "test_meta.meta"})); + this->extractors = { "extractor_0", "extractor_1" }; this->serialize(seriliazation_path); ASSERT_TRUE(ov::util::file_exists(seriliazation_path)); @@ -170,6 +180,24 @@ TEST_F(MetaInfoUnitTest, serialize) { ASSERT_EQ(input_info[in_xml].ranges.max, max_xml); } } + { + auto extractors_node = doc.child("meta_info").child("extractors"); + std::unordered_set xml_extractors; + for (const auto& in_info_xml : extractors_node.children()) { + xml_extractors.insert(std::string(in_info_xml.attribute("name").value())); + } + ASSERT_EQ(xml_extractors, this->extractors); + } +} + +TEST_F(MetaInfoUnitTest, read_meta_from_file) { + std::string seriliazation_path(ov::util::path_join({test_artifacts_dir, "test_meta.meta"})); + this->extractors = { "extractor_0", "extractor_1" }; + this->serialize(seriliazation_path); + auto new_meta = MetaInfo::read_meta_from_file(seriliazation_path); + ASSERT_TRUE(this->extractors.count(new_meta.get_any_extractor())); + ASSERT_EQ(new_meta.get_input_info(), this->input_info); + ASSERT_EQ(new_meta.get_model_info(), this->model_info); } TEST_F(MetaInfoUnitTest, update) { @@ -185,9 +213,11 @@ TEST_F(MetaInfoUnitTest, update) { ASSERT_EQ(this->model_info[test_model_1].model_paths.size(), 1); ASSERT_EQ(this->model_info[test_model_1].this_op_cnt, 2); test_model_path_1 = ov::util::path_join({ "path", "to", "test", test_model_1 + ".xml"}); - this->update(test_model_path_1, test_meta_1); + this->update(test_model_path_1, test_meta_1, 0, 1, "test_extractor"); ASSERT_EQ(this->model_info[test_model_1].model_paths.size(), 2); ASSERT_EQ(this->model_info[test_model_1].this_op_cnt, 3); + ASSERT_EQ(this->model_info[test_model_1].this_op_cnt, 3); + ASSERT_EQ(this->extractors, std::unordered_set({"test_extractor"})); } TEST_F(MetaInfoUnitTest, get_model_name_by_path) { @@ -204,4 +234,10 @@ TEST_F(MetaInfoUnitTest, get_graph_priority) { ASSERT_TRUE(this->get_graph_priority() >= 0 && this->get_graph_priority() <= 1); } +TEST_F(MetaInfoUnitTest, get_any_extractor) { + auto meta = MetaInfo(test_model_name, test_in_info, 1, 1, "test_extractor"); + ASSERT_NO_THROW(meta.get_any_extractor()); + ASSERT_EQ(meta.get_any_extractor(), "test_extractor"); +} + } // namespace \ No newline at end of file diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/subgraph/fused_names.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/subgraph/fused_names.cpp index 96bbe8443c1182..591f01c66cd52f 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/subgraph/fused_names.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/subgraph/fused_names.cpp @@ -4,6 +4,8 @@ // SPDX-License-Identifier: Apache-2.0 // +#include + #include "matchers/subgraph/fused_names.hpp" #include "utils/model.hpp" @@ -21,50 +23,44 @@ using namespace ov::tools::subgraph_dumper; class FusedNamesExtractorTest : public FusedNamesExtractor, public SubgraphsDumperBaseTest { protected: - bool is_match(const std::shared_ptr& model) { - size_t graph_cnt = 0; - { - auto compiled_names = extract_compiled_model_names(model); - std::vector op_cnt; - for (const auto& op : model->get_ordered_ops()) { - if (this->is_node_to_skip(op)) { - op_cnt.push_back(1); - continue; - } - auto op_name = op->get_friendly_name(); - if (!compiled_names.count(op_name)) { - op_cnt.push_back(1); - } else if (op_cnt.size() > 0) { - ++op_cnt[op_cnt.size() - 1]; - } - } - for (const auto& cnt : op_cnt) { - if (cnt > 1) { - ++graph_cnt; - } + void is_match(const std::shared_ptr& model) { + auto models_1 = this->extract(model); + auto models_2 = this->extract(model); + ASSERT_EQ(models_1.size(), models_2.size()); + auto it_model_1 = models_1.begin(); + auto it_model_2 = models_2.begin(); + while (it_model_1 != models_1.end() || it_model_2 != models_2.end()) { + SubgraphExtractor extractor; + ASSERT_TRUE(extractor.match(std::get<0>(*it_model_1), std::get<0>(*it_model_2))); + auto in_info_1 = std::get<1>(*it_model_1); + auto in_info_2 = std::get<1>(*it_model_2); + for (const auto& in_info : in_info_1) { + ASSERT_TRUE(in_info_2.count(in_info.first)); + ASSERT_EQ(in_info_2[in_info.first], in_info.second); } + ASSERT_EQ(std::get<2>(*it_model_1), std::get<2>(*it_model_2)); + ++it_model_1; + ++it_model_2; } - auto models = this->extract(model); - return models.size() == graph_cnt; } }; TEST_F(FusedNamesExtractorTest, extract_0) { auto test_model = Model_0(); auto model = test_model.get(); - ASSERT_TRUE(is_match(model)); + is_match(model); } TEST_F(FusedNamesExtractorTest, extract_1) { auto test_model = Model_1(); auto model = test_model.get(); - ASSERT_TRUE(is_match(model)); + is_match(model); } TEST_F(FusedNamesExtractorTest, extract_2) { auto test_model = Model_2(); auto model = test_model.get(); - ASSERT_TRUE(is_match(model)); + is_match(model); } } // namespace diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/subgraph/manager.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/subgraph/manager.cpp index f94a7e7d332fa2..b43ffe33cfb687 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/subgraph/manager.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/subgraph/manager.cpp @@ -27,6 +27,7 @@ class ExtractorsManagerTest : public ExtractorsManager, { std::shared_ptr test_parameter = std::make_shared(ov::element::f32, ov::Shape{1, 2}); + test_parameter->set_friendly_name("test_parameter_0"); std::shared_ptr test_abs = std::make_shared(test_parameter); std::shared_ptr test_res = @@ -37,6 +38,7 @@ class ExtractorsManagerTest : public ExtractorsManager, { std::shared_ptr test_parameter = std::make_shared(ov::element::f32, ov::Shape{2, 5}); + test_parameter->set_friendly_name("test_parameter_1"); std::shared_ptr test_abs = std::make_shared(test_parameter); std::shared_ptr test_res = @@ -86,9 +88,28 @@ TEST_F(ExtractorsManagerTest, match) { ASSERT_FALSE(this->match(test_model_0_1, test_model_1)); } +TEST_F(ExtractorsManagerTest, match_with_in_info) { + this->set_extractors(test_map); + std::map test_in_info({{"test_parameter_0", InputInfo()}}), test_in_info_1({{"test_parameter_1", InputInfo(1, 2, true)}}); + ASSERT_NO_THROW(this->match(test_model_0_0, test_model_0_1, test_in_info, test_in_info)); + ASSERT_TRUE(this->match(test_model_0_0, test_model_0_1, test_in_info, test_in_info)); + ASSERT_NO_THROW(this->match(test_model_0_0, test_model_0_1, test_in_info, test_in_info_1)); + ASSERT_FALSE(this->match(test_model_0_0, test_model_0_1, test_in_info, test_in_info_1)); + ASSERT_NO_THROW(this->match(test_model_0_1, test_model_1, test_in_info, test_in_info)); + ASSERT_FALSE(this->match(test_model_0_1, test_model_1, test_in_info, test_in_info)); +} + TEST_F(ExtractorsManagerTest, extract) { this->set_extractors(test_map); ASSERT_NO_THROW(this->extract(test_model_0_0)); } +TEST_F(ExtractorsManagerTest, align_input_info) { + std::map test_in_info({{"test_parameter_0", InputInfo()}}), test_in_info_ref({{"test_parameter_1", InputInfo()}}); + ASSERT_NE(test_in_info, test_in_info_ref); + ASSERT_NO_THROW(this->align_input_info(test_model_0_0, test_model_0_1, test_in_info, test_in_info_ref)); + auto c = this->align_input_info(test_model_0_0, test_model_0_1, test_in_info, test_in_info_ref); + ASSERT_EQ(c, test_in_info_ref); +} + } // namespace diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/subgraph/repeat_pattern.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/subgraph/repeat_pattern.cpp index 192a285a49cfae..a9da7845f1098c 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/subgraph/repeat_pattern.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/subgraph/repeat_pattern.cpp @@ -27,7 +27,7 @@ class RepeatPatternExtractorTest : public RepeatPatternExtractor, for (const auto& model : models) { bool is_match = false; for (const auto& ref_model : ref_models) { - if (this->match(model.first, ref_model)) { + if (this->match(std::get<0>(model), ref_model)) { is_match = true; ++match_numbers; break; diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/skip_tests_config.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/skip_tests_config.cpp index b98b2a51f1a870..ddd305cd46364f 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/skip_tests_config.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/skip_tests_config.cpp @@ -5,12 +5,6 @@ #include "functional_test_utils/skip_tests_config.hpp" std::vector disabledTestPatterns() { - std::vector retVector{ - // todo: enable these tests with graph cache enabling - R"(.*RepeatPatternExtractorTest.*extract_1.*)", - R"(.*ModelUtilsTest.*generate_.*)", - R"(.*GraphCacheFuncTest.*update_cache.*)", - R"(.*FusedNamesExtractorTest.*)", - }; + std::vector retVector {}; return retVector; } diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/test_models/model_1.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/test_models/model_1.hpp index 2af5be4cea1301..5893fb949f774a 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/test_models/model_1.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/test_models/model_1.hpp @@ -11,6 +11,7 @@ #include "openvino/op/relu.hpp" #include "openvino/op/parameter.hpp" #include "openvino/op/result.hpp" +#include "openvino/op/subtract.hpp" class Model_1 { public: @@ -23,7 +24,7 @@ class Model_1 { // | | | | // ------------ ------------ // | | - // add add param param + // add Multiply param param // | | | | // ------------------------------- -------------- // | | @@ -33,7 +34,7 @@ class Model_1 { // | | // ------------------------------------------- // | - // Multiply + // subtract // | // result size_t op_idx = 0; @@ -82,16 +83,16 @@ class Model_1 { std::make_shared(test_abs_0_1, 0, 10); test_clamp_0_1->set_friendly_name("Op_" + std::to_string(op_idx++)); - std::shared_ptr test_add_0_0 = - std::make_shared(test_relu_0_0, test_clamp_0_1); - test_add_0_0->set_friendly_name("Op_" + std::to_string(op_idx++)); - std::shared_ptr test_multiply_0_0 = - std::make_shared(test_add_0, test_add_0_0); + std::make_shared(test_relu_0_0, test_clamp_0_1); + test_multiply_0_0->set_friendly_name("Op_" + std::to_string(op_idx++)); + + std::shared_ptr test_multiply_0_1 = + std::make_shared(test_add_0, test_multiply_0_0); test_multiply_0_0->set_friendly_name("Op_" + std::to_string(op_idx++)); std::shared_ptr test_relu_0_1 = - std::make_shared(test_multiply_0_0); + std::make_shared(test_multiply_0_1); test_relu_0_1->set_friendly_name("Op_" + std::to_string(op_idx++)); std::shared_ptr test_parameter_1_0 = @@ -107,8 +108,8 @@ class Model_1 { std::make_shared(test_multiply_1_1); test_relu_1_1->set_friendly_name("Op_" + std::to_string(op_idx++)); - std::shared_ptr test_add = - std::make_shared(test_relu_0_1, test_relu_1_1); + std::shared_ptr test_add = + std::make_shared(test_relu_0_1, test_relu_1_1); test_add->set_friendly_name("Op_" + std::to_string(op_idx++)); std::shared_ptr test_res = @@ -142,67 +143,14 @@ class Model_1 { { std::shared_ptr test_parameter_0 = std::make_shared(ov::element::f32, ov::Shape{1, 2}); - std::shared_ptr test_abs_0 = - std::make_shared(test_parameter_0); - std::shared_ptr test_relu_0 = - std::make_shared(test_abs_0); - std::shared_ptr test_parameter_1 = - std::make_shared(ov::element::f32, ov::Shape{2, 1}); - std::shared_ptr test_add = - std::make_shared(test_relu_0, test_parameter_1); - std::shared_ptr res = - std::make_shared(test_add); - auto ref_model = std::make_shared(ov::ResultVector{res}, - ov::ParameterVector{test_parameter_0, test_parameter_1}); - ref.push_back(ref_model); - } - { - std::shared_ptr test_parameter_0 = - std::make_shared(ov::element::f32, ov::Shape{1, 2}); - std::shared_ptr test_parameter_1 = - std::make_shared(ov::element::f32, ov::Shape{1, 2}); std::shared_ptr test_abs_1 = - std::make_shared(test_parameter_1); + std::make_shared(test_parameter_0); std::shared_ptr test_clamp_1 = std::make_shared(test_abs_1, 0, 10); std::shared_ptr res = std::make_shared(test_clamp_1); auto ref_model = std::make_shared(ov::ResultVector{res}, - ov::ParameterVector{test_parameter_0, test_parameter_1}); - ref.push_back(ref_model); - } - { - std::shared_ptr test_parameter_0 = - std::make_shared(ov::element::f32, ov::Shape{1, 2}); - std::shared_ptr test_parameter_1 = - std::make_shared(ov::element::f32, ov::Shape{1, 2}); - std::shared_ptr test_abs_1 = - std::make_shared(test_parameter_1); - std::shared_ptr test_clamp_1 = - std::make_shared(test_abs_1, 0, 10); - std::shared_ptr test_add = - std::make_shared(test_parameter_0, test_clamp_1); - std::shared_ptr res = - std::make_shared(test_add); - auto ref_model = std::make_shared(ov::ResultVector{res}, - ov::ParameterVector{test_parameter_0, test_parameter_1}); - ref.push_back(ref_model); - } - { - std::shared_ptr test_parameter_0 = - std::make_shared(ov::element::f32, ov::Shape{1, 2}); - std::shared_ptr test_parameter_1 = - std::make_shared(ov::element::f32, ov::Shape{1, 2}); - std::shared_ptr test_abs_1 = - std::make_shared(test_parameter_1); - std::shared_ptr test_clamp_1 = - std::make_shared(test_abs_1, 0, 10); - std::shared_ptr test_add = - std::make_shared(test_parameter_0, test_clamp_1); - std::shared_ptr res = - std::make_shared(test_add); - auto ref_model = std::make_shared(ov::ResultVector{res}, - ov::ParameterVector{test_parameter_0, test_parameter_1}); + ov::ParameterVector{test_parameter_0}); ref.push_back(ref_model); } { diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/test_models/model_2.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/test_models/model_2.hpp index 72d28924354595..fd7a24cbe49e3d 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/test_models/model_2.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/test_models/model_2.hpp @@ -35,7 +35,7 @@ class Model_2 { std::shared_ptr test_parameter_0 = std::make_shared(ov::element::f32, ov::Shape{2, 5}); std::shared_ptr test_abs = - std::make_shared(test_parameter_0); + std::make_shared(test_parameter); std::shared_ptr test_clamp = std::make_shared(test_abs, 0, 10); std::shared_ptr test_relu = diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/utils/model.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/utils/model.cpp index 762623963a8cd6..a5bb560f486e1e 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/utils/model.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/utils/model.cpp @@ -16,21 +16,13 @@ using namespace ov::tools::subgraph_dumper; using ModelUtilsTest = SubgraphsDumperBaseTest; -std::pair, std::set>> +std::set> get_functional_ops(const std::shared_ptr& model) { - std::shared_ptr start_node = nullptr; std::set> nodes; - for (const auto& op : model->get_ordered_ops()) { - if (ov::op::util::is_parameter(op) || ov::op::util::is_output(op)) { - continue; - } - if (start_node == nullptr) { - start_node = op; - } nodes.insert(op); } - return { start_node, nodes }; + return nodes; } TEST_F(ModelUtilsTest, generate_0) { @@ -39,13 +31,8 @@ TEST_F(ModelUtilsTest, generate_0) { { std::unordered_set checked_ops; auto func_ops = get_functional_ops(test_model); - auto model_with_in_info = generate_model(func_ops.second, func_ops.first, checked_ops); - recovered_model = model_with_in_info.first; - for (const auto& op : recovered_model->get_ordered_ops()) { - if (ov::op::util::is_parameter(op) || ov::op::util::is_constant(op)) { - ASSERT_TRUE(model_with_in_info.second.count(op->get_friendly_name())); - } - } + auto model_with_in_info = generate_model(func_ops, checked_ops, "test_extractor"); + recovered_model = std::get<0>(model_with_in_info); } { SubgraphExtractor extractor; @@ -59,13 +46,8 @@ TEST_F(ModelUtilsTest, generate_1) { { std::unordered_set checked_ops; auto func_ops = get_functional_ops(test_model); - auto model_with_in_info = generate_model(func_ops.second, func_ops.first, checked_ops); - recovered_model = model_with_in_info.first; - for (const auto& op : recovered_model->get_ordered_ops()) { - if (ov::op::util::is_parameter(op) || ov::op::util::is_constant(op)) { - ASSERT_TRUE(model_with_in_info.second.count(op->get_friendly_name())); - } - } + auto model_with_in_info = generate_model(func_ops, checked_ops, "test_extractor"); + recovered_model = std::get<0>(model_with_in_info); } { SubgraphExtractor extractor; @@ -79,13 +61,9 @@ TEST_F(ModelUtilsTest, generate_2) { { std::unordered_set checked_ops; auto func_ops = get_functional_ops(test_model); - auto model_with_in_info = generate_model(func_ops.second, func_ops.first, checked_ops); - recovered_model = model_with_in_info.first; - for (const auto& op : recovered_model->get_ordered_ops()) { - if (ov::op::util::is_parameter(op) || ov::op::util::is_constant(op)) { - ASSERT_TRUE(model_with_in_info.second.count(op->get_friendly_name())); - } - } + auto model_with_in_info = generate_model(func_ops, checked_ops, "extract_model"); + recovered_model = std::get<0>(model_with_in_info); + auto in_info = std::get<1>(model_with_in_info); } { SubgraphExtractor extractor; diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/include/api_conformance_helpers.hpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/include/api_conformance_helpers.hpp index bcf27f17fed6ae..bdde3438192bd6 100644 --- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/include/api_conformance_helpers.hpp +++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/include/api_conformance_helpers.hpp @@ -17,7 +17,7 @@ inline const std::string get_plugin_lib_name_by_device(const std::string& device { "HETERO", "openvino_hetero_plugin" }, { "BATCH", "openvino_auto_batch_plugin" }, { "MULTI", "openvino_auto_plugin" }, - { "VPU", "openvino_intel_vpu_plugin" }, + { "NPU", "openvino_intel_npu_plugin" }, { "CPU", "openvino_intel_cpu_plugin" }, { "GNA", "openvino_intel_gna_plugin" }, { "GPU", "openvino_intel_gpu_plugin" }, diff --git a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp index e321cec288acf2..81d6a212dad7e7 100644 --- a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp @@ -22,7 +22,7 @@ void RegisterTestCustomQueries(void) { std::map& extTestQueries = *::PostgreSQLLink::get_ext_test_queries(); std::map& extTestNames = *::PostgreSQLLink::get_ext_test_names(); - std::string testName("checkPluginImplementation"); + std::string testName("checkPluginImplementationCompileModel"); extTestQueries[testName + "_ON_START"] = "OpImplCheck_CheckPluginImpl($__test_id, '$opName', '$opSet', " "'$targetDevice', '$targetDeviceArch', '$targetDeviceName', '$config', $__is_temp)"; @@ -31,7 +31,7 @@ void RegisterTestCustomQueries(void) { "OpImplCheck_CheckPluginImpl($__test_id)"; // Query expected in case of a refused results extTestNames[testName] = "$opName"; - testName = "ReadIR"; + testName = "Inference"; extTestQueries[testName + "_ON_START"] = "ReadIRTest_ReadIR($__test_id, '$opName', '$opSet', '$Type', " "'$targetDevice', '$targetDeviceArch', '$targetDeviceName', '$hashXml', '$pathXml', '$config', " diff --git a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir.cpp b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir.cpp index b5847562404372..7b2e226425981b 100644 --- a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir.cpp @@ -239,6 +239,7 @@ void ReadIRTest::SetUp() { auto it = inputMap.find(next_node->get_type_info()); auto tensor = it->second(next_node, function->get_parameter_index(param), param->get_element_type(), param->get_shape()); auto const_node = std::make_shared(tensor); + const_node->set_friendly_name(param->get_friendly_name()); ov::replace_node(param, const_node); parameter_to_remove.push_back(param); utils::ConstRanges::reset(); @@ -246,6 +247,11 @@ void ReadIRTest::SetUp() { for (const auto& param : parameter_to_remove) { function->remove_parameter(param); } + ov::pass::Manager manager; + manager.register_pass("test.xml", "test.bin"); + manager.run_passes(function); + auto b = function->get_parameters(); + auto c = 0; } bool hasDynamic = false; diff --git a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir_tests.cpp b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir_tests.cpp index 062861c9a35e74..0c7fc52e87926b 100644 --- a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir_tests.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir_tests.cpp @@ -17,15 +17,16 @@ using namespace ov::test::subgraph; namespace { -TEST_P(ReadIRTest, ReadIR) { +TEST_P(ReadIRTest, Inference) { run(); } -TEST_P(ReadIRTest, QueryModel) { +// temporarty disable to provide correct numbers for release +TEST_P(ReadIRTest, DISABLED_QueryModel) { query_model(); } -TEST_P(ReadIRTest, ImportExport) { +TEST_P(ReadIRTest, DISABLED_ImportExport) { import_export(); } @@ -38,7 +39,7 @@ TEST_P(ReadIRTest, ImportExport) { ReadIRTest::getTestCaseName); \ // It should point on latest opset which contains biggest list of operations -#include "openvino/opsets/opset10_tbl.hpp" +#include "openvino/opsets/opset12_tbl.hpp" #undef _OPENVINO_OP_REG INSTANTIATE_TEST_SUITE_P(conformance_other, diff --git a/src/tests/functional/plugin/shared/src/behavior/compiled_model/properties.cpp b/src/tests/functional/plugin/shared/src/behavior/compiled_model/properties.cpp index 40e35da2ef858d..14d45908d15b1f 100644 --- a/src/tests/functional/plugin/shared/src/behavior/compiled_model/properties.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/compiled_model/properties.cpp @@ -3,9 +3,11 @@ // #include "behavior/compiled_model/properties.hpp" -#include "openvino/runtime/properties.hpp" + #include +#include "openvino/runtime/properties.hpp" + namespace ov { namespace test { namespace behavior { @@ -94,6 +96,16 @@ TEST_P(OVClassCompiledModelPropertiesTests, canCompileModelWithPropertiesAndChec } } +TEST_P(OVClassCompileModelWithCorrectPropertiesTest, IgnoreEnableMMap) { + if (target_device.find("HETERO:") == 0 || target_device.find("MULTI:") == 0 || target_device.find("AUTO:") == 0 || + target_device.find("BATCH:") == 0) + GTEST_SKIP() << "Disabled test due to configuration" << std::endl; + // Load available plugins + core->get_available_devices(); + OV_ASSERT_NO_THROW(core->set_property(ov::enable_mmap(false))); + OV_ASSERT_NO_THROW(core->set_property(target_device, ov::enable_mmap(false))); +} + TEST_P(OVClassCompileModelWithCorrectPropertiesTest, CompileModelWithCorrectPropertiesTest) { OV_ASSERT_NO_THROW(core->compile_model(model, target_device, properties)); } @@ -137,7 +149,8 @@ TEST_P(OVClassCompiledModelPropertiesDefaultTests, CheckDefaultValues) { ASSERT_TRUE(supported) << "default_property=" << default_property.first; Any property; OV_ASSERT_NO_THROW(property = compiled_model.get_property(default_property.first)); - ASSERT_EQ(default_property.second, property) << "For property: " << default_property.first + ASSERT_EQ(default_property.second, property) + << "For property: " << default_property.first << " expected value is: " << default_property.second.as(); } } @@ -216,7 +229,6 @@ TEST_P(OVClassCompiledModelGetPropertyTest, GetMetricNoThrow_SUPPORTED_CONFIG_KE ASSERT_EXEC_METRIC_SUPPORTED(ov::supported_properties); } - TEST_P(OVClassCompiledModelGetPropertyTest, GetMetricNoThrow_NETWORK_NAME) { ov::Core ie = createCoreWithTemplate(); @@ -273,7 +285,6 @@ TEST_P(OVClassCompiledModelSetIncorrectConfigTest, canNotSetConfigToCompiledMode EXPECT_ANY_THROW(compiled_model.set_property(config)); } - // writeble TEST_P(OVClassCompiledModelGetPropertyTest_MODEL_PRIORITY, GetMetricNoThrow) { ov::Core ie = createCoreWithTemplate(); @@ -310,13 +321,15 @@ TEST_P(OVCompileModelGetExecutionDeviceTests, CanGetExecutionDeviceInfo) { std::vector expected_devices = util::split(expectedDeviceName, ','); std::vector updatedExpectDevices; updatedExpectDevices.assign(expected_devices.begin(), expected_devices.end()); - for (auto &iter : compileModelProperties) { - if ((iter.first == ov::hint::performance_mode && iter.second.as() == ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT) || + for (auto& iter : compileModelProperties) { + if ((iter.first == ov::hint::performance_mode && + iter.second.as() == ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT) || ov::test::behavior::sw_plugin_in_target_device(target_device)) { for (auto& deviceName : expected_devices) { for (auto&& device : deviceList) { if (device.find(deviceName) != std::string::npos) { - auto updatedExpectDevices_iter = std::find(updatedExpectDevices.begin(), updatedExpectDevices.end(), deviceName); + auto updatedExpectDevices_iter = + std::find(updatedExpectDevices.begin(), updatedExpectDevices.end(), deviceName); if (updatedExpectDevices_iter != updatedExpectDevices.end()) updatedExpectDevices.erase(updatedExpectDevices_iter); updatedExpectDevices.push_back(std::move(device)); diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_executable_network/properties.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_executable_network/properties.cpp index d7771151376002..51addb55b739a5 100644 --- a/src/tests/functional/plugin/shared/src/behavior/ov_executable_network/properties.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/ov_executable_network/properties.cpp @@ -3,9 +3,11 @@ // #include "behavior/ov_executable_network/properties.hpp" -#include "openvino/runtime/properties.hpp" + #include +#include "openvino/runtime/properties.hpp" + namespace ov { namespace test { namespace behavior { @@ -63,6 +65,16 @@ TEST_P(OVCompiledModelPropertiesTests, CanUseCache) { ov::test::utils::removeDir("./test_cache"); } +TEST_P(OVCompiledModelPropertiesTests, IgnoreEnableMMap) { + if (target_device.find("HETERO:") == 0 || target_device.find("MULTI:") == 0 || target_device.find("AUTO:") == 0 || + target_device.find("BATCH:") == 0) + GTEST_SKIP() << "Disabled test due to configuration" << std::endl; + // Load available plugins + core->get_available_devices(); + OV_ASSERT_NO_THROW(core->set_property(ov::enable_mmap(false))); + OV_ASSERT_NO_THROW(core->set_property(target_device, ov::enable_mmap(false))); +} // namespace behavior + TEST_P(OVCompiledModelPropertiesTests, canCompileModelWithPropertiesAndCheckGetProperty) { auto compiled_model = core->compile_model(model, target_device, properties); auto supported_properties = compiled_model.get_property(ov::supported_properties); @@ -115,7 +127,8 @@ TEST_P(OVCompiledModelPropertiesDefaultTests, CheckDefaultValues) { ASSERT_TRUE(supported) << "default_property=" << default_property.first; Any property; OV_ASSERT_NO_THROW(property = compiled_model.get_property(default_property.first)); - ASSERT_EQ(default_property.second, property) << "For property: " << default_property.first + ASSERT_EQ(default_property.second, property) + << "For property: " << default_property.first << " expected value is: " << default_property.second.as(); } } diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp index bca975fe636433..9ee6d25fc06c72 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp @@ -5,6 +5,7 @@ #pragma once #include "openvino/core/model.hpp" +#include "transformations/convert_precision.hpp" #include "common_test_utils/test_common.hpp" #include "functional_test_utils/ov_plugin_cache.hpp" @@ -69,6 +70,7 @@ class SubgraphBaseTest : public ov::test::TestsCommon { virtual std::vector calculate_refs(); virtual std::vector get_plugin_outputs(); + virtual precisions_map get_ref_precisions_convert_map(); friend void core_configuration(SubgraphBaseTest* test); }; diff --git a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp index 3e49a6d3b06820..e1ac460de2e586 100644 --- a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp +++ b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp @@ -261,14 +261,12 @@ void SubgraphBaseTest::infer() { inferRequest.infer(); } -std::vector SubgraphBaseTest::calculate_refs() { - using InputsMap = std::map, ov::Tensor>; - - auto functionToProcess = functionRefs->clone(); +precisions_map SubgraphBaseTest::get_ref_precisions_convert_map() { //TODO: remove this conversions as soon as function interpreter fully support bf16 and f16 precisions_map precisions = { { ngraph::element::bf16, ngraph::element::f32 } }; + auto convert_added = false; for (const auto ¶m : function->get_parameters()) { for (size_t i = 0; i < param->get_output_size(); i++) { @@ -281,11 +279,21 @@ std::vector SubgraphBaseTest::calculate_refs() { } } } + if (!convert_added) { precisions.insert({ ngraph::element::f16, ngraph::element::f32}); } + + return precisions; +} + +std::vector SubgraphBaseTest::calculate_refs() { + using InputsMap = std::map, ov::Tensor>; + + auto functionToProcess = functionRefs->clone(); + precisions_map convert_precisions = get_ref_precisions_convert_map(); pass::Manager manager; - manager.register_pass(precisions); + manager.register_pass(convert_precisions); manager.run_passes(functionToProcess); functionToProcess->validate_nodes_and_infer_types(); diff --git a/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp b/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp index 760496faecf83b..609acf31bf2ab1 100644 --- a/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp +++ b/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp @@ -958,6 +958,7 @@ InputsMap getInputMap() { #include "openvino/opsets/opset9_tbl.hpp" #include "openvino/opsets/opset10_tbl.hpp" #include "openvino/opsets/opset11_tbl.hpp" +#include "openvino/opsets/opset12_tbl.hpp" #include "ov_ops/opset_private_tbl.hpp" #undef _OPENVINO_OP_REG diff --git a/src/tests/functional/shared_test_classes/src/single_layer/random_uniform.cpp b/src/tests/functional/shared_test_classes/src/single_layer/random_uniform.cpp index 122bb7d438d1bc..f4f97a53e3c8c0 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/random_uniform.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/random_uniform.cpp @@ -63,13 +63,15 @@ void RandomUniformLayerTest::SetUp() { std::string targetName; std::tie(output_shape, randomUniformParams, global_seed, op_seed, targetDevice) = this->GetParam(); const auto precision = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(randomUniformParams.precision); - auto out_shape_ = std::make_shared(ov::element::i64, - ov::Shape{output_shape.size()}, - output_shape); + + // Use Parameter as input with desired precision to properly configure execution configuration + // in CoreConfiguration() function + auto input = std::make_shared(precision, output_shape); + auto shape_of = std::make_shared(input); auto min_value = createConstant(randomUniformParams.precision, randomUniformParams.min_value); auto max_value = createConstant(randomUniformParams.precision, randomUniformParams.max_value); - auto random_uniform = std::make_shared(out_shape_, + auto random_uniform = std::make_shared(shape_of, min_value, max_value, precision, @@ -77,7 +79,7 @@ void RandomUniformLayerTest::SetUp() { op_seed); ngraph::ResultVector results{std::make_shared(random_uniform)}; - function = std::make_shared(results, ngraph::ParameterVector{}, "random_uniform"); + function = std::make_shared(results, ngraph::ParameterVector{input}, "random_uniform"); } void RandomUniformLayerTest::ConvertRefsParams() { diff --git a/src/tests/test_utils/common_test_utils/CMakeLists.txt b/src/tests/test_utils/common_test_utils/CMakeLists.txt index 5fd7af8ce5d28e..a03a69b6120e32 100644 --- a/src/tests/test_utils/common_test_utils/CMakeLists.txt +++ b/src/tests/test_utils/common_test_utils/CMakeLists.txt @@ -85,7 +85,7 @@ function(add_common_utils ADD_TARGET_NAME) endfunction() -# Keep old name so that library can be used from VPU repo +# Keep old name so that library can be used from NPU repo set(TARGET_NAME commonTestUtils) set(NEW_TARGET_NAME common_test_utils) diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/common_utils.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/common_utils.hpp index 88ba8f0f031ea7..f639a1b2192633 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/common_utils.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/common_utils.hpp @@ -188,11 +188,15 @@ inline std::ostream& operator<<(std::ostream& os, const std::map + +# include "psapi.h" +#endif + namespace ov { namespace test { namespace utils { @@ -34,12 +45,70 @@ std::string generateTestFilePrefix() { testName += testInfo->name(); testName = std::to_string(std::hash()(testName)); std::stringstream ss; - auto ts = std::chrono::duration_cast(std::chrono::high_resolution_clock::now().time_since_epoch()); + auto ts = std::chrono::duration_cast( + std::chrono::high_resolution_clock::now().time_since_epoch()); ss << testName << "_" << std::this_thread::get_id() << "_" << ts.count(); testName = ss.str(); return testName; } +#ifdef _WIN32 +static PROCESS_MEMORY_COUNTERS getMemoryInfo() { + static PROCESS_MEMORY_COUNTERS pmc; + pmc.cb = sizeof(PROCESS_MEMORY_COUNTERS); + GetProcessMemoryInfo(GetCurrentProcess(), &pmc, pmc.cb); + return pmc; +} + +size_t getVmSizeInKB() { + return getMemoryInfo().PagefileUsage / 1024; +} + +size_t getVmRSSInKB() { + return getMemoryInfo().WorkingSetSize / 1024; +} + +#else + +/// Parses number from provided string +static int parseLine(std::string line) { + std::string res = ""; + for (auto c : line) + if (isdigit(c)) + res += c; + if (res.empty()) + // If number wasn't found return -1 + return -1; + return std::stoi(res); +} + +size_t getSystemDataByName(char* name) { + FILE* file = fopen("/proc/self/status", "r"); + size_t result = 0; + if (file != nullptr) { + char line[128]; + + while (fgets(line, 128, file) != NULL) { + if (strncmp(line, name, strlen(name)) == 0) { + result = parseLine(line); + break; + } + } + fclose(file); + } + return result; +} + +size_t getVmSizeInKB() { + return getSystemDataByName(const_cast("VmSize:")); +} + +size_t getVmRSSInKB() { + return getSystemDataByName(const_cast("VmRSS:")); +} + +#endif + } // namespace utils } // namespace test } // namespace ov diff --git a/src/tests/test_utils/common_test_utils/src/test_common.cpp b/src/tests/test_utils/common_test_utils/src/test_common.cpp index fc1534ac984446..6ac9052bd5cd9d 100644 --- a/src/tests/test_utils/common_test_utils/src/test_common.cpp +++ b/src/tests/test_utils/common_test_utils/src/test_common.cpp @@ -9,16 +9,6 @@ #include "openvino/runtime/threading/executor_manager.hpp" #include "precomp.hpp" -#ifdef _WIN32 -#ifndef NOMINMAX -#define NOMINMAX -#endif -#define _WINSOCKAPI_ - -#include -#include "psapi.h" -#endif - #ifdef ENABLE_CONFORMANCE_PGQL # include "common_test_utils/postgres_link.hpp" #endif @@ -26,41 +16,6 @@ namespace ov { namespace test { -inline size_t getVmSizeInKB() { -#ifdef _WIN32 - PROCESS_MEMORY_COUNTERS pmc; - pmc.cb = sizeof(PROCESS_MEMORY_COUNTERS); - GetProcessMemoryInfo(GetCurrentProcess(), &pmc, pmc.cb); - return pmc.WorkingSetSize; -#else - auto parseLine = [](char *line) { - // This assumes that a digit will be found and the line ends in " Kb". - size_t i = strlen(line); - const char *p = line; - while (*p < '0' || *p > '9') - p++; - line[i - 3] = '\0'; - i = (size_t) atoi(p); - return i; - }; - - FILE *file = fopen("/proc/self/status", "r"); - size_t result = 0; - if (file != nullptr) { - char line[128]; - - while (fgets(line, 128, file) != NULL) { - if (strncmp(line, "VmSize:", 7) == 0) { - result = parseLine(line); - break; - } - } - fclose(file); - } - return result; -#endif -} - TestsCommon::~TestsCommon() { ov::threading::executor_manager()->clear(); @@ -75,7 +30,7 @@ TestsCommon::TestsCommon() : PGLink(new utils::PostgreSQLLink(this)) #endif { - auto memsize = getVmSizeInKB(); + auto memsize = ov::test::utils::getVmSizeInKB(); if (memsize != 0) { std::cout << "\nMEM_USAGE=" << memsize << "KB\n"; } diff --git a/src/tests/test_utils/common_test_utils/src/test_constants.cpp b/src/tests/test_utils/common_test_utils/src/test_constants.cpp index c9338438075467..7018d0283f1548 100644 --- a/src/tests/test_utils/common_test_utils/src/test_constants.cpp +++ b/src/tests/test_utils/common_test_utils/src/test_constants.cpp @@ -12,8 +12,8 @@ const char *DEVICE_AUTO = "AUTO"; const char *DEVICE_CPU = "CPU"; const char *DEVICE_GNA = "GNA"; const char *DEVICE_GPU = "GPU"; +const char *DEVICE_KEEMBAY = "NPU"; const char *DEVICE_BATCH = "BATCH"; -const char *DEVICE_KEEMBAY = "VPU"; const char *DEVICE_MULTI = "MULTI"; const char *DEVICE_TEMPLATE = "TEMPLATE"; const char *DEVICE_HETERO = "HETERO"; diff --git a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/crash_handler.hpp b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/crash_handler.hpp index b59d4b8646f4fd..f8cc7858452797 100644 --- a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/crash_handler.hpp +++ b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/crash_handler.hpp @@ -36,7 +36,7 @@ class CrashHandler { } // namespace test } // namespace ov -// openvino_contrib and vpu repo use CommonTestUtils:: +// openvino_contrib and NPU repo use CommonTestUtils:: // so we need to add these names to CommonTestUtils namespace namespace CommonTestUtils { using ov::test::utils::env; diff --git a/src/tests/test_utils/functional_test_utils/layer_tests_summary/data/models.lst b/src/tests/test_utils/functional_test_utils/layer_tests_summary/data/models.lst index 21b24521160334..99c5151ca2f0d4 100644 --- a/src/tests/test_utils/functional_test_utils/layer_tests_summary/data/models.lst +++ b/src/tests/test_utils/functional_test_utils/layer_tests_summary/data/models.lst @@ -1 +1 @@ -https://storage.openvinotoolkit.org/test_data/conformance_ir/2023.0.0-9637-dda19231c89.tar \ No newline at end of file +https://storage.openvinotoolkit.org/test_data/conformance_ir/conformance_ir.tar \ No newline at end of file diff --git a/src/tests/test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py b/src/tests/test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py index 1a7a32b114b28f..c75a1e5607dbe8 100644 --- a/src/tests/test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py +++ b/src/tests/test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py @@ -67,7 +67,7 @@ def aggregate_test_results(aggregated_results: SubElement, xml_reports: list, continue xml_results = xml_root.find("results") xml_timestamp = xml_root.get("timestamp") - if aggregated_timestamp is None or xml_timestamp < aggregated_timestamp: + if aggregated_timestamp is None or xml_timestamp > aggregated_timestamp: aggregated_timestamp = xml_timestamp for xml_device_entry in xml_results: if merge_device_suffix and "." in xml_device_entry.tag: diff --git a/src/tests/test_utils/functional_test_utils/layer_tests_summary/rename_conformance_ir.py b/src/tests/test_utils/functional_test_utils/layer_tests_summary/rename_conformance_ir.py index 9347617afee3ae..0cf63c5ce1e216 100644 --- a/src/tests/test_utils/functional_test_utils/layer_tests_summary/rename_conformance_ir.py +++ b/src/tests/test_utils/functional_test_utils/layer_tests_summary/rename_conformance_ir.py @@ -24,7 +24,6 @@ if PY_OPENVINO in os.listdir(ov_bin_path): env = os.environ py_ov = os.path.join(ov_bin_path, PY_OPENVINO) - py_ov = os.path.join(py_ov, find_latest_dir(py_ov)) env = set_env_variable(env, "PYTHONPATH", py_ov) env = set_env_variable(env, LD_LIB_PATH_NAME, ov_bin_path) diff --git a/src/tests/test_utils/functional_test_utils/layer_tests_summary/run_parallel.py b/src/tests/test_utils/functional_test_utils/layer_tests_summary/run_parallel.py index 466ab3646efb0a..060e37ba5fc4a7 100644 --- a/src/tests/test_utils/functional_test_utils/layer_tests_summary/run_parallel.py +++ b/src/tests/test_utils/functional_test_utils/layer_tests_summary/run_parallel.py @@ -10,6 +10,7 @@ from pathlib import Path from shutil import rmtree, copyfile from tarfile import open as tar_open +import defusedxml.ElementTree as ET if not constants.IS_WIN: from signal import SIGKILL @@ -657,10 +658,17 @@ def __save_log(logs_dir, dir, test_name): for priority, name in fix_priority: csv_writer.writerow([name, priority]) if "IR=" in name: - ir_hashes.append(name[name.find('IR=')+3:name.find('_Device=')]) + ir_hash = name[name.find('IR=')+3:name.find('_Device=')] + if os.path.isfile(ir_hash): + _, tail = os.path.split(ir_hash) + ir_hash, _ = os.path.splitext(tail) + ir_hashes.append(ir_hash) + logger.info(f"Fix priorities list is saved to: {fix_priority_path}") # Find all irs for failed tests failed_ir_dir = os.path.join(self._working_dir, f'{self._device}_failed_ir') + failed_models_file_path = os.path.join(self._working_dir, f'failed_models.lst') + failed_models = set() for conformance_ir_filelist in self._conformance_ir_filelists: with open(conformance_ir_filelist, 'r') as file: for conformance_ir in file.readlines(): @@ -684,6 +692,12 @@ def __save_log(logs_dir, dir, test_name): copyfile(xml_file, failed_ir_xml) copyfile(bin_file, failed_ir_bin) copyfile(meta_file, failed_ir_meta) + + meta_root = ET.parse(failed_ir_meta).getroot() + for unique_model in meta_root.find("models"): + for path in unique_model: + for unique_path in path: + failed_models.add(unique_path.attrib["path"]) # api conformance has no failed irs if os.path.exists(failed_ir_dir): output_file_name = failed_ir_dir + '.tar' @@ -691,7 +705,13 @@ def __save_log(logs_dir, dir, test_name): tar.add(failed_ir_dir, arcname=os.path.basename(failed_ir_dir)) logger.info(f"All Conformance IRs for failed tests are saved to: {output_file_name}") rmtree(failed_ir_dir) - + if len(failed_models) > 0: + with open(failed_models_file_path, "w") as failed_models_file: + failed_models_list = list() + for item in failed_models: + failed_models_list.append(f"{item}\n") + failed_models_file.writelines(failed_models_list) + failed_models_file.close() disabled_tests_path = os.path.join(logs_dir, "disabled_tests.log") with open(disabled_tests_path, "w") as disabled_tests_file: for i in range(len(self._disabled_tests)): diff --git a/src/tests/test_utils/functional_test_utils/layer_tests_summary/template/report_template.html b/src/tests/test_utils/functional_test_utils/layer_tests_summary/template/report_template.html index 80efaabbdb250f..e440458b2704ed 100644 --- a/src/tests/test_utils/functional_test_utils/layer_tests_summary/template/report_template.html +++ b/src/tests/test_utils/functional_test_utils/layer_tests_summary/template/report_template.html @@ -32,7 +32,7 @@

Operations coverage summary: Tag: {{report_tag}} | Version: {{report_version Passrates are based on relative weights each subgraphs! You can check absolute value in `General passrate` row!
- Relative Passrate could be from 0 to 200% now! Relative weight of operation is calculated once for current set of models, but there are more then one tests per model. + Relative Passrate could be from 0 to 100% (Inference only)! Relative weight of operation is calculated once for current set of models, but there are more then one tests per model.
Status: diff --git a/src/tests/test_utils/functional_test_utils/layer_tests_summary/utils/constants.py b/src/tests/test_utils/functional_test_utils/layer_tests_summary/utils/constants.py index a5cef0ce3ebf9f..825e599d52ea07 100644 --- a/src/tests/test_utils/functional_test_utils/layer_tests_summary/utils/constants.py +++ b/src/tests/test_utils/functional_test_utils/layer_tests_summary/utils/constants.py @@ -28,7 +28,7 @@ LD_LIB_PATH_NAME = "PATH" if IS_WIN else "LD_LIBRARY_PATH" OPENVINO_NAME = 'openvino' -PY_OPENVINO = "python_api" +PY_OPENVINO = "python" DEBUG_DIR = "Debug" RELEASE_DIR = "Release" diff --git a/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py b/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py index cef5c0da193385..307a1dcde3f271 100644 --- a/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py +++ b/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py @@ -74,6 +74,7 @@ def __init__(self): ) def forward(self, x, y=None, z=None): + logits = None if y is None: logits = self.linear_relu_stack(x + z) if z is None: @@ -87,7 +88,7 @@ def make_ref_pt_model_one_input(shape, dtype=np.float32): shape = PartialShape(shape) param1 = ov.opset8.parameter(shape, name="input_0", dtype=dtype) relu = ov.opset8.relu(param1) - if dtype != np.float32: + if dtype not in [np.float32, Type.dynamic]: relu = ov.opset8.convert(relu, np.float32) sigm = ov.opset8.sigmoid(relu) @@ -106,9 +107,13 @@ def make_ref_pt_model_two_inputs(shape, dtype=np.float32): shape = PartialShape(shape) param1 = ov.opset8.parameter(shape, name="input_0", dtype=dtype) param2 = ov.opset8.parameter(shape, name="input_1", dtype=dtype) - mul = ov.opset8.multiply(param1, param2) + if dtype == Type.dynamic: + cl = ov.opset8.convert_like(param2, param1) + mul = ov.opset8.multiply(param1, cl) + else: + mul = ov.opset8.multiply(param1, param2) relu = ov.opset8.relu(mul) - if dtype != np.float32: + if dtype not in [np.float32, Type.dynamic]: relu = ov.opset8.convert(relu, np.float32) sigm = ov.opset8.sigmoid(relu) @@ -277,7 +282,7 @@ def scripted_fn(x: torch.Tensor, y: torch.Tensor): return torch.sigmoid(torch.relu(x * y)) inp_shape = PartialShape([Dimension(1, -1), Dimension(-1, 5), 10]) - ref_model = make_ref_pt_model_two_inputs(inp_shape) + ref_model = make_ref_pt_model_two_inputs(inp_shape, dtype=Type.dynamic) return scripted_fn, ref_model, {'input': [(inp_shape), (inp_shape)]} @@ -292,7 +297,7 @@ def create_pytorch_nn_module_layout_list(tmp_dir): ref_model.inputs[1].node.layout = Layout('nhwc') return pt_model, ref_model, { - 'input_shape': [shape, shape], 'layout': ['nchw', Layout('nhwc')], 'use_convert_model_from_mo': True + 'input': [(shape, np.float32), (shape, np.float32)], 'layout': ['nchw', Layout('nhwc')], 'use_convert_model_from_mo': True } @@ -307,30 +312,7 @@ def create_pytorch_nn_module_layout_list_case2(tmp_dir): ref_model.inputs[1].node.layout = Layout('nhwc') return pt_model, ref_model, { - 'input_shape': [shape, shape], 'layout': ('nchw', Layout('nhwc')), 'use_convert_model_from_mo': True} - - -def create_pytorch_nn_module_mean_list(tmp_dir): - pt_model = make_pt_model_two_inputs() - shape = [1, 10, 10, 3] - - shape = PartialShape(shape) - param1 = ov.opset8.parameter(shape) - param2 = ov.opset8.parameter(shape) - const1 = ov.opset8.constant([[[[-0.0, -0.0, -0.0]]]], dtype=np.float32) - const2 = ov.opset8.constant([[[[-0.0, -0.0, -0.0]]]], dtype=np.float32) - add1 = ov.opset8.add(param1, const1) - add2 = ov.opset8.add(param2, const2) - mul = ov.opset8.multiply(add1, add2) - relu = ov.opset8.relu(mul) - sigm = ov.opset8.sigmoid(relu) - - parameter_list = [param1, param2] - ref_model = Model([sigm], parameter_list, "test") - - return pt_model, ref_model, { - 'input_shape': [shape, shape], 'mean_values': [[0, 0, 0], [0, 0, 0]], 'compress_to_fp16': False, - 'use_convert_model_from_mo': True} + 'input': [(shape, np.float32), (shape, np.float32)], 'layout': ('nchw', Layout('nhwc')), 'use_convert_model_from_mo': True} def create_pytorch_nn_module_mean_list_compression_disabled(tmp_dir): @@ -351,7 +333,7 @@ def create_pytorch_nn_module_mean_list_compression_disabled(tmp_dir): parameter_list = [param1, param2] ref_model = Model([sigm], parameter_list, "test") - return pt_model, ref_model, {'input_shape': [shape, shape], 'mean_values': [[0, 0, 0], [0, 0, 0]], + return pt_model, ref_model, {'input': [(shape, np.float32), (shape, np.float32)], 'mean_values': [[0, 0, 0], [0, 0, 0]], 'compress_to_fp16': False, 'use_convert_model_from_mo': True} @@ -375,7 +357,7 @@ def create_pytorch_nn_module_mean_list_compression_default(tmp_dir): parameter_list = [param1, param2] ref_model = Model([sigm], parameter_list, "test") - return pt_model, ref_model, {'input_shape': [shape, shape], 'mean_values': [[0, 0, 0], [0, 0, 0]], + return pt_model, ref_model, {'input': [(shape, np.float32), (shape, np.float32)], 'mean_values': [[0, 0, 0], [0, 0, 0]], 'use_convert_model_from_mo': True} @@ -403,32 +385,10 @@ def create_pytorch_nn_module_mean_list_compression_enabled(tmp_dir): ref_model = Model([sigm], parameter_list, "test") return pt_model, ref_model, { - 'input_shape': [shape, shape], 'mean_values': [[0, 0, 0], [0, 0, 0]], + 'input': [(shape, np.float32), (shape, np.float32)], 'mean_values': [[0, 0, 0], [0, 0, 0]], 'compress_to_fp16': True, 'use_convert_model_from_mo': True} -def create_pytorch_nn_module_scale_list(tmp_dir): - pt_model = make_pt_model_two_inputs() - shape = [1, 10, 10, 3] - - shape = PartialShape(shape) - param1 = ov.opset8.parameter(shape) - param2 = ov.opset8.parameter(shape) - const1 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float32) - const2 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float32) - sub1 = ov.opset8.multiply(param1, const1) - sub2 = ov.opset8.multiply(param2, const2) - mul = ov.opset8.multiply(sub1, sub2) - relu = ov.opset8.relu(mul) - sigm = ov.opset8.sigmoid(relu) - - parameter_list = [param1, param2] - ref_model = Model([sigm], parameter_list, "test") - - return pt_model, ref_model, {'input_shape': [shape, shape], 'scale_values': [[1, 1, 1], [1, 1, 1]], 'compress_to_fp16': False, - 'use_convert_model_from_mo': True} - - def create_pytorch_nn_module_scale_list_compression_disabled(tmp_dir): pt_model = make_pt_model_two_inputs() shape = [1, 10, 10, 3] @@ -447,7 +407,8 @@ def create_pytorch_nn_module_scale_list_compression_disabled(tmp_dir): parameter_list = [param1, param2] ref_model = Model([sigm], parameter_list, "test") - return pt_model, ref_model, {'input_shape': [shape, shape], 'scale_values': [[1, 1, 1], [1, 1, 1]], + return pt_model, ref_model, {'input': [(shape, np.float32), (shape, np.float32)], + 'scale_values': [[1, 1, 1], [1, 1, 1]], 'compress_to_fp16': False, 'use_convert_model_from_mo': True} @@ -471,7 +432,8 @@ def create_pytorch_nn_module_scale_list_compression_default(tmp_dir): parameter_list = [param1, param2] ref_model = Model([sigm], parameter_list, "test") - return pt_model, ref_model, {'input_shape': [shape, shape], 'scale_values': [[1, 1, 1], [1, 1, 1]], + return pt_model, ref_model, {'input': [(shape, np.float32), (shape, np.float32)], + 'scale_values': [[1, 1, 1], [1, 1, 1]], 'use_convert_model_from_mo': True} @@ -497,13 +459,14 @@ def create_pytorch_nn_module_scale_list_compression_enabled(tmp_dir): parameter_list = [param1, param2] ref_model = Model([sigm], parameter_list, "test") - return pt_model, ref_model, {'input_shape': [shape, shape], 'scale_values': [[1, 1, 1], [1, 1, 1]], + return pt_model, ref_model, {'input': [(shape, np.float32), (shape, np.float32)], + 'scale_values': [[1, 1, 1], [1, 1, 1]], 'compress_to_fp16': True, 'use_convert_model_from_mo': True} def create_pytorch_nn_module_shapes_list_static(tmp_dir): pt_model = make_pt_model_two_inputs() - ref_model = make_ref_pt_model_two_inputs([1, 3, 20, 20]) + ref_model = make_ref_pt_model_two_inputs([1, 3, 20, 20], dtype=Type.dynamic) return pt_model, ref_model, {'input': [[1, 3, 20, 20], [1, 3, 20, 20]]} @@ -521,10 +484,11 @@ def create_pytorch_nn_module_shapes_list_dynamic(tmp_dir): [-1, 3, 20, Dimension(-1, 20)]] param1 = ov.opset8.parameter(PartialShape( - inp_shapes[0]), name="x", dtype=np.float32) + inp_shapes[0]), name="x", dtype=Type.dynamic) param2 = ov.opset8.parameter(PartialShape( - inp_shapes[1]), name="y", dtype=np.float32) - mul = ov.opset8.multiply(param1, param2) + inp_shapes[1]), name="y", dtype=Type.dynamic) + cl = ov.opset8.convert_like(param2, param1) + mul = ov.opset8.multiply(param1, cl) relu = ov.opset8.relu(mul) sigm = ov.opset8.sigmoid(relu) @@ -548,13 +512,13 @@ def create_pytorch_nn_module_shapes_list_dynamic_via_input(tmp_dir): parameter_list = [param1, param2] ref_model = Model([sigm], parameter_list, "test") - return pt_model, ref_model, {'input': [(inp_shapes[0],), (inp_shapes[1],)]} + return pt_model, ref_model, {'input': [(inp_shapes[0], Type.f32), (inp_shapes[1], Type.f32)]} def create_pytorch_nn_module_shapes_list_dynamic_single_input(tmp_dir): pt_model = make_pt_model_one_input() inp_shapes = [[Dimension(-1), 3, 20, Dimension(20, -1)]] - ref_model = make_ref_pt_model_one_input(inp_shapes[0]) + ref_model = make_ref_pt_model_one_input(inp_shapes[0], dtype=Type.dynamic) return pt_model, ref_model, {'input': inp_shapes} @@ -568,7 +532,7 @@ def create_pytorch_nn_module_shapes_list_dynamic_single_input_via_input(tmp_dir) def create_pytorch_nn_module_shapes_list_static_single_input(tmp_dir): pt_model = make_pt_model_one_input() inp_shapes = [[1, 3, 20, 20]] - ref_model = make_ref_pt_model_one_input(inp_shapes[0]) + ref_model = make_ref_pt_model_one_input(inp_shapes[0], dtype=Type.dynamic) return pt_model, ref_model, {'input': inp_shapes} @@ -735,20 +699,6 @@ def create_pytorch_module_with_optional_inputs_case3(tmp_dir): return net, ref_model, {"example_input": example_input, "input": [[3, 3, 3, 3], [3, 3, 3, 3]]} -def create_pytorch_module_with_optional_inputs_case4(tmp_dir): - net = make_pt_model_with_optional_input() - ref_model = make_ref_pt_model_with_optional_inputs( - [3, 3, 3, 3], z_exist=True) - return net, ref_model, {"input": [("x", [3, 3, 3, 3]), ("z", [3, 3, 3, 3])]} - - -def create_pytorch_module_with_optional_inputs_case5(tmp_dir): - net = make_pt_model_with_optional_input() - ref_model = make_ref_pt_model_with_optional_inputs( - [1, 3, -1, -1], z_exist=True) - return net, ref_model, {"input": [("x",[1, 3, -1, -1]), ("z", [1, 3, -1, -1])]} - - def create_pytorch_module_with_compressed_int8_constant_compress_to_fp16_default(tmp_dir): import torch import torch.nn.functional as F @@ -1013,11 +963,9 @@ class TestMoConvertPyTorch(CommonMOConvertTest): create_pytorch_jit_script_function, create_pytorch_nn_module_layout_list, create_pytorch_nn_module_layout_list_case2, - create_pytorch_nn_module_mean_list, create_pytorch_nn_module_mean_list_compression_default, create_pytorch_nn_module_mean_list_compression_disabled, create_pytorch_nn_module_mean_list_compression_enabled, - create_pytorch_nn_module_scale_list, create_pytorch_nn_module_scale_list_compression_default, create_pytorch_nn_module_scale_list_compression_disabled, create_pytorch_nn_module_scale_list_compression_enabled, @@ -1039,8 +987,6 @@ class TestMoConvertPyTorch(CommonMOConvertTest): create_pytorch_module_with_optional_inputs_case1, create_pytorch_module_with_optional_inputs_case2, create_pytorch_module_with_optional_inputs_case3, - create_pytorch_module_with_optional_inputs_case4, - create_pytorch_module_with_optional_inputs_case5, create_pytorch_nn_module_with_scalar_input, create_pytorch_module_with_compressed_int8_constant, create_pytorch_module_with_compressed_int8_constant_compress_to_fp16_default, diff --git a/tests/layer_tests/pytorch_tests/test_flatten.py b/tests/layer_tests/pytorch_tests/test_flatten.py index 1702d3bf525038..31345dc749df52 100644 --- a/tests/layer_tests/pytorch_tests/test_flatten.py +++ b/tests/layer_tests/pytorch_tests/test_flatten.py @@ -27,7 +27,9 @@ def forward(self, x): return aten_flatten(dim0, dim1), ref_net, "aten::flatten" - @pytest.mark.parametrize("dim0,dim1", [[0, 1], + @pytest.mark.parametrize("dim0,dim1", [[0, -1], + [-2, -1], + [0, 1], [0, 2], [0, 3], [1, 2], diff --git a/tests/layer_tests/pytorch_tests/test_linspace.py b/tests/layer_tests/pytorch_tests/test_linspace.py new file mode 100644 index 00000000000000..aa6f70d3d71c89 --- /dev/null +++ b/tests/layer_tests/pytorch_tests/test_linspace.py @@ -0,0 +1,89 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import torch +from pytorch_layer_test_class import PytorchLayerTest + + +class TestLinspace(PytorchLayerTest): + def _prepare_input(self, start, end, steps, dtype=None, ref_dtype=None): + inputs = [np.array(start).astype(dtype), np.array(end).astype(dtype), np.array(steps).astype("int32")] + if ref_dtype: + inputs.append(np.zeros(1).astype(ref_dtype)) + return inputs + + def create_model(self, dtype=None, use_out=False, ref_dtype=False): + dtype_map = { + "float32": torch.float32, + "float64": torch.float64, + "int64": torch.int64, + "int32": torch.int32, + "uint8": torch.uint8, + "int8": torch.int8, + } + + class aten_linspace_dtype(torch.nn.Module): + def __init__(self, dtype) -> None: + super().__init__() + self.dtype = dtype + + def forward(self, start, end, steps): + return torch.linspace(start=start, end=end, steps=steps, dtype=self.dtype) + + class aten_linspace_out(torch.nn.Module): + def __init__(self, out) -> None: + super().__init__() + # Size of empty tensor needs to be of equal or larger size than linspace steps + self.out = torch.empty(25, dtype=out) + + def forward(self, start, end, steps): + return torch.linspace(start=start, end=end, steps=steps, out=self.out) + + class aten_linspace_prim_dtype(torch.nn.Module): + def forward(self, start, end, steps, d): + return torch.linspace(start=start, end=end, steps=steps, dtype=d.dtype) + + dtype = dtype_map.get(dtype) + if ref_dtype: + model_class = aten_linspace_prim_dtype() + elif not use_out: + model_class = aten_linspace_dtype(dtype) + else: + model_class = aten_linspace_out(dtype) + + ref_net = None + + return model_class, ref_net, "aten::linspace" + + @pytest.mark.nightly + @pytest.mark.precommit + @pytest.mark.parametrize("dtype", ["float32", "float64", "int32", "int64", "int8"]) + @pytest.mark.parametrize( + "start,end,steps", [(0, 1, 5), (-2, 1, 5), (1, -5, 7), (1, 10, 2), (-1, -5, 2), (-1, -5, 1), (1.25, -5.5, 5)] + ) + def test_linspace_with_prim_dtype(self, dtype, end, start, steps, ie_device, precision, ir_version): + self._test( + *self.create_model(dtype, ref_dtype=True), + ie_device, + precision, + ir_version, + kwargs_to_prepare_input={"end": end, "start": start, "steps": steps, "ref_dtype": dtype} + ) + + @pytest.mark.nightly + @pytest.mark.precommit + @pytest.mark.parametrize("dtype", [None, "float32", "float64", "int32", "int64", "int8", "uin8"]) + @pytest.mark.parametrize( + "start,end,steps", [(0, 1, 5), (-2, 1, 5), (1, -5, 7), (1, 10, 2), (-1, -5, 2), (-1, -5, 1), (1.25, -5.5, 5)] + ) + @pytest.mark.parametrize("use_out", [False, True]) + def test_linspace_with_out(self, dtype, use_out, end, start, steps, ie_device, precision, ir_version): + self._test( + *self.create_model(dtype=dtype, use_out=use_out), + ie_device, + precision, + ir_version, + kwargs_to_prepare_input={"end": end, "start": start, "steps": steps} + ) diff --git a/tests/layer_tests/pytorch_tests/test_masked_scatter.py b/tests/layer_tests/pytorch_tests/test_masked_scatter.py new file mode 100644 index 00000000000000..81aab9774d7b58 --- /dev/null +++ b/tests/layer_tests/pytorch_tests/test_masked_scatter.py @@ -0,0 +1,61 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from pytorch_layer_test_class import PytorchLayerTest + + +class TestMaskedScatter(PytorchLayerTest): + def _prepare_input(self, shape, x_dtype="float32", mask_dtype="bool", out=False): + import numpy as np + x = np.random.randn(*shape).astype(x_dtype) + mask = (x > 0.5).astype(mask_dtype) + source = np.arange(np.size(x)).reshape(shape).astype(x_dtype) + if not out: + return (x, mask, source) + y = np.zeros_like(x).astype(x_dtype) + return (x, mask, source, y) + + def create_model(self, out=False, inplace=False): + import torch + + class aten_masked_scatter(torch.nn.Module): + def __init__(self, out, inplace): + super(aten_masked_scatter, self).__init__() + if inplace: + self.forward = self.forward_inplace + if out: + self.forward = self.forward_out + + def forward(self, x, mask, source): + return torch.masked_scatter(x, mask, source) + + def forward_out(self, x, mask, source, out): + return torch.masked_scatter(x, mask, source, out=out), out + + def forward_inplace(self, x, mask, source): + return x.masked_scatter_(mask, source), x + + ref_net = None + + return aten_masked_scatter(out, inplace), ref_net, "aten::masked_scatter" if not inplace else "aten::masked_scatter_" + + @pytest.mark.nightly + @pytest.mark.precommit + @pytest.mark.parametrize("shape", [[2, 5], [10, 10], [2, 3, 4], [10, 5, 10, 3], [2, 6, 4, 1]]) + @pytest.mark.parametrize("input_dtype", ["float32", "int32", "float", "int", "uint8"]) + @pytest.mark.parametrize("mask_dtype", ["bool", "uint8"]) + @pytest.mark.parametrize("out", [True, False]) + def test_masked_scatter(self, shape, input_dtype, mask_dtype, out, ie_device, precision, ir_version): + self._test(*self.create_model(out), ie_device, precision, ir_version, + kwargs_to_prepare_input={"shape": shape, "x_dtype": input_dtype, "mask_dtype": mask_dtype, "out": out}) + + @pytest.mark.nightly + @pytest.mark.precommit + @pytest.mark.parametrize("shape", [[2, 5], [10, 10], [2, 3, 4], [10, 5, 10, 3], [2, 6, 4, 1]]) + @pytest.mark.parametrize("input_dtype", ["float32", "int32", "float", "int", "uint8"]) + @pytest.mark.parametrize("mask_dtype", ["bool", "uint8"]) + def test_masked_scatter_inplace(self, shape, input_dtype, mask_dtype, ie_device, precision, ir_version): + self._test(*self.create_model(inplace=True), ie_device, precision, ir_version, + kwargs_to_prepare_input={"shape": shape, "x_dtype": input_dtype, "mask_dtype": mask_dtype}) \ No newline at end of file diff --git a/tests/layer_tests/pytorch_tests/test_native_multi_head_attention.py b/tests/layer_tests/pytorch_tests/test_native_multi_head_attention.py index 95304109604b97..41e737dba6221d 100644 --- a/tests/layer_tests/pytorch_tests/test_native_multi_head_attention.py +++ b/tests/layer_tests/pytorch_tests/test_native_multi_head_attention.py @@ -31,13 +31,13 @@ def __init__(self, mask, need_weights, average_attn_weights) -> None: # Float masks raise a warning in PyTorch and are (incorrectly) converted to bool, # which later returns NaNs as MHA's output if mask == 0: - self.mask = torch.from_numpy(np.random.randint(0, 2, (SEQ_LENGTH, SEQ_LENGTH)).astype(np.bool)) + self.mask = torch.from_numpy(np.random.randint(0, 2, (SEQ_LENGTH, SEQ_LENGTH)).astype("bool")) self.mask_type = 0 elif mask == 1: - self.mask = torch.from_numpy(np.random.randint(0, 2, (BATCH_SIZE, SEQ_LENGTH)).astype(np.bool)) + self.mask = torch.from_numpy(np.random.randint(0, 2, (BATCH_SIZE, SEQ_LENGTH)).astype("bool")) self.mask_type = 1 elif mask == 2: - self.mask = torch.from_numpy(np.random.randint(0, 2, (BATCH_SIZE, NUM_HEADS, SEQ_LENGTH, SEQ_LENGTH)).astype(np.bool)) + self.mask = torch.from_numpy(np.random.randint(0, 2, (BATCH_SIZE, NUM_HEADS, SEQ_LENGTH, SEQ_LENGTH)).astype("bool")) self.mask_type = 2 else: self.mask = None diff --git a/tests/layer_tests/pytorch_tests/test_round.py b/tests/layer_tests/pytorch_tests/test_round.py index be310e7e7abc2e..c185461310dfcd 100644 --- a/tests/layer_tests/pytorch_tests/test_round.py +++ b/tests/layer_tests/pytorch_tests/test_round.py @@ -10,6 +10,10 @@ class TestRound(PytorchLayerTest): def _prepare_input(self, out=False, dtype="float32"): import numpy as np input = np.random.randn(1, 3, 224, 224).astype(dtype) + if dtype == "float64": + # fp64 can fail by accuracy, because pytorch rounds fp64 value and ov will round fp32 value. + # To remove sporadic accuracy fails we will round the number to 6 decimal places. + input = np.round(input, 6) if not out: return (input, ) return (input, np.zeros_like(input)) diff --git a/tests/layer_tests/pytorch_tests/test_transpose.py b/tests/layer_tests/pytorch_tests/test_transpose.py index 024cad110ef480..6cd7fe5d4cda4f 100644 --- a/tests/layer_tests/pytorch_tests/test_transpose.py +++ b/tests/layer_tests/pytorch_tests/test_transpose.py @@ -48,31 +48,10 @@ def create_model(self, num_dims=2, inplace=False): import torch class aten_transpose(torch.nn.Module): - def __init__(self, num_dims, inplace): + def __init__(self, inplace): super(aten_transpose, self).__init__() - if num_dims == 2: - self.forward = self.forward_2d if not inplace else self.forward_2d_inplace - elif num_dims == 1: - self.forward = self.forward_1d if not inplace else self.forward_1d_inplace - else: - if inplace: - self.forward = self.forward_inplace - - def forward_2d(self, x): - x = torch.reshape(x, (2, -1)) - return x.t(), x - - def forward_2d_inplace(self, x): - x = torch.reshape(x, (2, -1)) - return x.t_(), x - - def forward_1d(self, x): - x = torch.reshape(x, (-1, )) - return x.t(), x - - def forward_1d_inplace(self, x): - x = torch.reshape(x, (-1, )) - return x.t_(), x + if inplace: + self.forward = self.forward_inplace def forward(self, x): return x.t(), x @@ -82,7 +61,7 @@ def forward_inplace(self, x): ref_net = None - return aten_transpose(num_dims, inplace), ref_net, "aten::t" if not inplace else "aten::t_" + return aten_transpose(inplace), ref_net, "aten::t" if not inplace else "aten::t_" @pytest.mark.parametrize("num_dims", [0, 1, 2]) @pytest.mark.parametrize("input_dtype", ["float32", "int32"]) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_AdjustContrastv2.py b/tests/layer_tests/tensorflow_tests/test_tf_AdjustContrastv2.py new file mode 100644 index 00000000000000..88944c50a38091 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_AdjustContrastv2.py @@ -0,0 +1,45 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestAdjustContrastv2(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'images' in inputs_info + images_shape = inputs_info['images'] + inputs_data = {} + inputs_data['images'] = np.random.rand(*images_shape).astype(self.input_type) + inputs_data['contrast_factor'] = np.random.rand() + return inputs_data + + def create_adjust_contrast_net(self, input_shape, input_type): + self.input_type = input_type + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + images = tf.compat.v1.placeholder(input_type, input_shape, 'images') + contrast_factor = tf.compat.v1.placeholder(input_type, [], 'contrast_factor') + tf.raw_ops.AdjustContrastv2(images=images, contrast_factor=contrast_factor) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(input_shape=[10, 20, 3], input_type=np.float32), + dict(input_shape=[5, 25, 15, 2], input_type=np.float32), + dict(input_shape=[3, 4, 8, 10, 4], input_type=np.float32), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_adjust_contrast_basic(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_adjust_contrast_net(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_CheckNumerics.py b/tests/layer_tests/tensorflow_tests/test_tf_CheckNumerics.py new file mode 100644 index 00000000000000..0034beac23394a --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_CheckNumerics.py @@ -0,0 +1,47 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestCheckNumerics(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'x' in inputs_info + x_shape = inputs_info['x'] + assert 'y' in inputs_info + y_shape = inputs_info['y'] + inputs_data = {} + inputs_data['x'] = np.random.randint(-10, 10, x_shape).astype(self.input_type) + inputs_data['y'] = np.random.randint(-10, 10, y_shape).astype(self.input_type) + return inputs_data + + def create_check_numerics_net(self, input_shape, input_type, op): + self.input_type = input_type + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(input_type, input_shape, 'x') + y = tf.compat.v1.placeholder(input_type, input_shape, 'y') + z = tf.raw_ops.AddV2(x=x, y=y) + op(tensor=z, message="z is invalid") + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(input_shape=[2, 6], input_type=np.float32, op=tf.raw_ops.CheckNumerics), + dict(input_shape=[3, 4, 5], input_type=np.float32, op=tf.raw_ops.CheckNumericsV2), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_check_numerics_basic(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_check_numerics_net(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_DivNoNan.py b/tests/layer_tests/tensorflow_tests/test_tf_DivNoNan.py new file mode 100644 index 00000000000000..58db73ece154e1 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_DivNoNan.py @@ -0,0 +1,48 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestDivNoNan(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'x' in inputs_info + assert 'y' in inputs_info + x_shape = inputs_info['x'] + y_shape = inputs_info['y'] + inputs_data = {} + inputs_data['x'] = np.random.randint(-10, 10, x_shape).astype(self.input_type) + # generate y in way to have zeros + inputs_data['y'] = np.random.randint(-10, 10, y_shape).astype(self.input_type) * \ + np.random.randint(0, 2, y_shape).astype(self.input_type) + return inputs_data + + def create_div_no_nan_net(self, input_shape, input_type): + self.input_type = input_type + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(input_type, input_shape, 'x') + y = tf.compat.v1.placeholder(input_type, input_shape, 'y') + tf.raw_ops.DivNoNan(x=x, y=y) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(input_shape=[10, 20], input_type=np.float32), + dict(input_shape=[2, 3, 4], input_type=np.float32), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_div_no_nan_basic(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_div_no_nan_net(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_EnsureShape.py b/tests/layer_tests/tensorflow_tests/test_tf_EnsureShape.py new file mode 100644 index 00000000000000..d51de4fdada431 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_EnsureShape.py @@ -0,0 +1,44 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestEnsureShape(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'tensor' in inputs_info + tensor_shape = inputs_info['tensor'] + inputs_data = {} + inputs_data['tensor'] = np.random.randint(-10, 10, tensor_shape).astype(self.input_type) + return inputs_data + + def create_ensure_shape_net(self, input_shape, input_type, target_shape): + self.input_type = input_type + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + tensor = tf.compat.v1.placeholder(input_type, input_shape, 'tensor') + shape = tf.constant(target_shape, dtype=tf.int32) + reshape = tf.raw_ops.Reshape(tensor=tensor, shape=shape) + tf.raw_ops.EnsureShape(input=reshape, shape=target_shape) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(input_shape=[2, 6], input_type=np.float32, target_shape=[2, 3, 2]), + dict(input_shape=[1], input_type=np.float32, target_shape=[]), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_ensure_shape_basic(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_ensure_shape_net(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_InvertPermutation.py b/tests/layer_tests/tensorflow_tests/test_tf_InvertPermutation.py new file mode 100644 index 00000000000000..8b0a11f825aab6 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_InvertPermutation.py @@ -0,0 +1,44 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestInvertPermutation(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'x' in inputs_info + x_shape = inputs_info['x'] + rng = np.random.default_rng() + inputs_data = {} + inputs_data['x'] = rng.permutation(x_shape[0]).astype(self.input_type) + return inputs_data + + def create_invert_permutation_net(self, input_shape, input_type): + self.input_type = input_type + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(input_type, input_shape, 'x') + tf.raw_ops.InvertPermutation(x=x) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(input_shape=[1], input_type=np.int32), + dict(input_shape=[10], input_type=np.int32), + dict(input_shape=[20], input_type=np.int64), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_invert_permutation_basic(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_invert_permutation_net(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_MaxPoolWithArgmax.py b/tests/layer_tests/tensorflow_tests/test_tf_MaxPoolWithArgmax.py new file mode 100644 index 00000000000000..bb39a94594b91a --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_MaxPoolWithArgmax.py @@ -0,0 +1,75 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestMaxPoolWithArgmax(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'input' in inputs_info + input_shape = inputs_info['input'] + inputs_data = {} + inputs_data['input'] = np.random.randint(-5, 5, input_shape).astype(self.input_type) + return inputs_data + + def create_max_pool_with_argmax_net(self, input_shape, ksize, strides, input_type, padding, targmax, + include_batch_in_index, with_second_output): + self.input_type = input_type + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + input = tf.compat.v1.placeholder(input_type, input_shape, 'input') + max_pool_with_argmax = tf.raw_ops.MaxPoolWithArgmax(input=input, ksize=ksize, strides=strides, + padding=padding, Targmax=targmax, + include_batch_in_index=include_batch_in_index + ) + tf.identity(max_pool_with_argmax[0], name='max_pool') + if with_second_output: + tf.identity(max_pool_with_argmax[1], name='output_indices') + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(input_shape=[1, 25, 24, 3], + ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1]), + dict(input_shape=[1, 10, 20, 3], + ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1]), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.parametrize("input_type", [ + np.float32, np.int32 + ]) + @pytest.mark.parametrize("padding", [ + 'VALID', 'SAME' + ]) + @pytest.mark.parametrize("targmax", [ + tf.int32, tf.int64 + ]) + @pytest.mark.parametrize("include_batch_in_index", [ + True, False + ]) + @pytest.mark.parametrize("with_second_output", [ + pytest.param( + True, + marks=pytest.mark.skip(reason="117415: TransposeSinking crash") + ), + False + ]) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_max_pool_with_argmax_basic(self, params, input_type, padding, targmax, + include_batch_in_index, with_second_output, + ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test( + *self.create_max_pool_with_argmax_net(**params, input_type=input_type, padding=padding, targmax=targmax, + include_batch_in_index=include_batch_in_index, + with_second_output=with_second_output), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_UnravelIndex.py b/tests/layer_tests/tensorflow_tests/test_tf_UnravelIndex.py new file mode 100644 index 00000000000000..da2b79b9b8c93c --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_UnravelIndex.py @@ -0,0 +1,44 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestUnravelIndex(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'indices' in inputs_info + indices_shape = inputs_info['indices'] + inputs_data = {} + inputs_data['indices'] = np.random.randint(0, self.num_elements, indices_shape).astype(self.input_type) + return inputs_data + + def create_unravel_index_net(self, input_shape, input_type, dims_value): + self.input_type = input_type + self.num_elements = np.prod(dims_value) + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + indices = tf.compat.v1.placeholder(input_type, input_shape, 'indices') + dims = tf.constant(dims_value, dtype=input_type) + tf.raw_ops.UnravelIndex(indices=indices, dims=dims) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(input_shape=[10], input_type=np.int32, dims_value=[2, 3, 4]), + dict(input_shape=[20], input_type=np.int64, dims_value=[5, 5]), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_unravel_index_basic(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_unravel_index_net(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) diff --git a/tests/time_tests/.automation/auto_plugin_test_config.yml b/tests/time_tests/.automation/auto_plugin_test_config.yml index 27cb4f05c74ed5..0a884da23815fa 100644 --- a/tests/time_tests/.automation/auto_plugin_test_config.yml +++ b/tests/time_tests/.automation/auto_plugin_test_config.yml @@ -1,392 +1,392 @@ - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/resnet-50-pytorch/onnx/FP16/resnet-50-pytorch.xml + path: ${NPU_MODELS_PKG}/resnet-50-pytorch/onnx/FP16/resnet-50-pytorch.xml name: resnet-50-pytorch precision: FP16 framework: onnx - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/resnet-50-pytorch/onnx/FP16/resnet-50-pytorch.xml + path: ${NPU_MODELS_PKG}/resnet-50-pytorch/onnx/FP16/resnet-50-pytorch.xml name: resnet-50-pytorch precision: FP16 framework: onnx - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/resnet-50-pytorch/onnx/FP16/resnet-50-pytorch.xml + path: ${NPU_MODELS_PKG}/resnet-50-pytorch/onnx/FP16/resnet-50-pytorch.xml name: resnet-50-pytorch precision: FP16 framework: onnx - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/resnet-50-pytorch/onnx/FP16-INT8/resnet-50-pytorch.xml + path: ${NPU_MODELS_PKG}/resnet-50-pytorch/onnx/FP16-INT8/resnet-50-pytorch.xml name: resnet-50-pytorch precision: FP16-INT8 framework: onnx - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/resnet-50-pytorch/onnx/FP16-INT8/resnet-50-pytorch.xml + path: ${NPU_MODELS_PKG}/resnet-50-pytorch/onnx/FP16-INT8/resnet-50-pytorch.xml name: resnet-50-pytorch precision: FP16-INT8 framework: onnx - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/resnet-50-pytorch/onnx/FP16-INT8/resnet-50-pytorch.xml + path: ${NPU_MODELS_PKG}/resnet-50-pytorch/onnx/FP16-INT8/resnet-50-pytorch.xml name: resnet-50-pytorch precision: FP16-INT8 framework: onnx - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/mobilenet-v2/caffe/FP16/mobilenet-v2.xml + path: ${NPU_MODELS_PKG}/mobilenet-v2/caffe/FP16/mobilenet-v2.xml name: mobilenet-v2 precision: FP16 framework: caffe - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/mobilenet-v2/caffe/FP16/mobilenet-v2.xml + path: ${NPU_MODELS_PKG}/mobilenet-v2/caffe/FP16/mobilenet-v2.xml name: mobilenet-v2 precision: FP16 framework: caffe - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/mobilenet-v2/caffe/FP16/mobilenet-v2.xml + path: ${NPU_MODELS_PKG}/mobilenet-v2/caffe/FP16/mobilenet-v2.xml name: mobilenet-v2 precision: FP16 framework: caffe - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/mobilenet-v2/caffe/FP16-INT8/mobilenet-v2.xml + path: ${NPU_MODELS_PKG}/mobilenet-v2/caffe/FP16-INT8/mobilenet-v2.xml name: mobilenet-v2 precision: FP16-INT8 framework: caffe - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/mobilenet-v2/caffe/FP16-INT8/mobilenet-v2.xml + path: ${NPU_MODELS_PKG}/mobilenet-v2/caffe/FP16-INT8/mobilenet-v2.xml name: mobilenet-v2 precision: FP16-INT8 framework: caffe - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/mobilenet-v2/caffe/FP16-INT8/mobilenet-v2.xml + path: ${NPU_MODELS_PKG}/mobilenet-v2/caffe/FP16-INT8/mobilenet-v2.xml name: mobilenet-v2 precision: FP16-INT8 framework: caffe - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16/faster-rcnn-resnet101-coco-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16/faster-rcnn-resnet101-coco-sparse-60-0001.xml name: faster-rcnn-resnet101-coco-sparse-60-0001 precision: FP16 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16/faster-rcnn-resnet101-coco-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16/faster-rcnn-resnet101-coco-sparse-60-0001.xml name: faster-rcnn-resnet101-coco-sparse-60-0001 precision: FP16 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16/faster-rcnn-resnet101-coco-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16/faster-rcnn-resnet101-coco-sparse-60-0001.xml name: faster-rcnn-resnet101-coco-sparse-60-0001 precision: FP16 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16-INT8/faster-rcnn-resnet101-coco-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16-INT8/faster-rcnn-resnet101-coco-sparse-60-0001.xml name: faster-rcnn-resnet101-coco-sparse-60-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16-INT8/faster-rcnn-resnet101-coco-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16-INT8/faster-rcnn-resnet101-coco-sparse-60-0001.xml name: faster-rcnn-resnet101-coco-sparse-60-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16-INT8/faster-rcnn-resnet101-coco-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16-INT8/faster-rcnn-resnet101-coco-sparse-60-0001.xml name: faster-rcnn-resnet101-coco-sparse-60-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v1/tf/FP16/googlenet-v1.xml + path: ${NPU_MODELS_PKG}/googlenet-v1/tf/FP16/googlenet-v1.xml name: googlenet-v1 precision: FP16 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v1/tf/FP16/googlenet-v1.xml + path: ${NPU_MODELS_PKG}/googlenet-v1/tf/FP16/googlenet-v1.xml name: googlenet-v1 precision: FP16 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v1/tf/FP16/googlenet-v1.xml + path: ${NPU_MODELS_PKG}/googlenet-v1/tf/FP16/googlenet-v1.xml name: googlenet-v1 precision: FP16 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v1/tf/FP16-INT8/googlenet-v1.xml + path: ${NPU_MODELS_PKG}/googlenet-v1/tf/FP16-INT8/googlenet-v1.xml name: googlenet-v1 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v1/tf/FP16-INT8/googlenet-v1.xml + path: ${NPU_MODELS_PKG}/googlenet-v1/tf/FP16-INT8/googlenet-v1.xml name: googlenet-v1 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v1/tf/FP16-INT8/googlenet-v1.xml + path: ${NPU_MODELS_PKG}/googlenet-v1/tf/FP16-INT8/googlenet-v1.xml name: googlenet-v1 precision: FP16-INT8 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v3/tf/FP16/googlenet-v3.xml + path: ${NPU_MODELS_PKG}/googlenet-v3/tf/FP16/googlenet-v3.xml name: googlenet-v3 precision: FP16 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v3/tf/FP16/googlenet-v3.xml + path: ${NPU_MODELS_PKG}/googlenet-v3/tf/FP16/googlenet-v3.xml name: googlenet-v3 precision: FP16 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v3/tf/FP16/googlenet-v3.xml + path: ${NPU_MODELS_PKG}/googlenet-v3/tf/FP16/googlenet-v3.xml name: googlenet-v3 precision: FP16 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v3/tf/FP16-INT8/googlenet-v3.xml + path: ${NPU_MODELS_PKG}/googlenet-v3/tf/FP16-INT8/googlenet-v3.xml name: googlenet-v3 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v3/tf/FP16-INT8/googlenet-v3.xml + path: ${NPU_MODELS_PKG}/googlenet-v3/tf/FP16-INT8/googlenet-v3.xml name: googlenet-v3 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v3/tf/FP16-INT8/googlenet-v3.xml + path: ${NPU_MODELS_PKG}/googlenet-v3/tf/FP16-INT8/googlenet-v3.xml name: googlenet-v3 precision: FP16-INT8 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/ssd512/caffe/FP16/ssd512.xml + path: ${NPU_MODELS_PKG}/ssd512/caffe/FP16/ssd512.xml name: ssd512 precision: FP16 framework: caffe - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/ssd512/caffe/FP16/ssd512.xml + path: ${NPU_MODELS_PKG}/ssd512/caffe/FP16/ssd512.xml name: ssd512 precision: FP16 framework: caffe - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/ssd512/caffe/FP16/ssd512.xml + path: ${NPU_MODELS_PKG}/ssd512/caffe/FP16/ssd512.xml name: ssd512 precision: FP16 framework: caffe - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/ssd512/caffe/FP16-INT8/ssd512.xml + path: ${NPU_MODELS_PKG}/ssd512/caffe/FP16-INT8/ssd512.xml name: ssd512 precision: FP16-INT8 framework: caffe - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/ssd512/caffe/FP16-INT8/ssd512.xml + path: ${NPU_MODELS_PKG}/ssd512/caffe/FP16-INT8/ssd512.xml name: ssd512 precision: FP16-INT8 framework: caffe - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/ssd512/caffe/FP16-INT8/ssd512.xml + path: ${NPU_MODELS_PKG}/ssd512/caffe/FP16-INT8/ssd512.xml name: ssd512 precision: FP16-INT8 framework: caffe - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16/yolo-v2-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16/yolo-v2-ava-0001.xml name: yolo-v2-ava-0001 precision: FP16 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16/yolo-v2-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16/yolo-v2-ava-0001.xml name: yolo-v2-ava-0001 precision: FP16 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16/yolo-v2-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16/yolo-v2-ava-0001.xml name: yolo-v2-ava-0001 precision: FP16 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16-INT8/yolo-v2-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16-INT8/yolo-v2-ava-0001.xml name: yolo-v2-ava-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16-INT8/yolo-v2-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16-INT8/yolo-v2-ava-0001.xml name: yolo-v2-ava-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16-INT8/yolo-v2-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16-INT8/yolo-v2-ava-0001.xml name: yolo-v2-ava-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16/yolo-v2-ava-sparse-35-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16/yolo-v2-ava-sparse-35-0001.xml name: yolo-v2-ava-sparse-35-0001 precision: FP16 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16/yolo-v2-ava-sparse-35-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16/yolo-v2-ava-sparse-35-0001.xml name: yolo-v2-ava-sparse-35-0001 precision: FP16 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16/yolo-v2-ava-sparse-35-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16/yolo-v2-ava-sparse-35-0001.xml name: yolo-v2-ava-sparse-35-0001 precision: FP16 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16-INT8/yolo-v2-ava-sparse-35-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16-INT8/yolo-v2-ava-sparse-35-0001.xml name: yolo-v2-ava-sparse-35-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16-INT8/yolo-v2-ava-sparse-35-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16-INT8/yolo-v2-ava-sparse-35-0001.xml name: yolo-v2-ava-sparse-35-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16-INT8/yolo-v2-ava-sparse-35-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16-INT8/yolo-v2-ava-sparse-35-0001.xml name: yolo-v2-ava-sparse-35-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16/yolo-v2-ava-sparse-70-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16/yolo-v2-ava-sparse-70-0001.xml name: yolo-v2-ava-sparse-70-0001 precision: FP16 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16/yolo-v2-ava-sparse-70-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16/yolo-v2-ava-sparse-70-0001.xml name: yolo-v2-ava-sparse-70-0001 precision: FP16 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16/yolo-v2-ava-sparse-70-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16/yolo-v2-ava-sparse-70-0001.xml name: yolo-v2-ava-sparse-70-0001 precision: FP16 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16-INT8/yolo-v2-ava-sparse-70-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16-INT8/yolo-v2-ava-sparse-70-0001.xml name: yolo-v2-ava-sparse-70-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16-INT8/yolo-v2-ava-sparse-70-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16-INT8/yolo-v2-ava-sparse-70-0001.xml name: yolo-v2-ava-sparse-70-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16-INT8/yolo-v2-ava-sparse-70-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16-INT8/yolo-v2-ava-sparse-70-0001.xml name: yolo-v2-ava-sparse-70-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16/yolo-v2-tiny-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16/yolo-v2-tiny-ava-0001.xml name: yolo-v2-tiny-ava-0001 precision: FP16 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16/yolo-v2-tiny-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16/yolo-v2-tiny-ava-0001.xml name: yolo-v2-tiny-ava-0001 precision: FP16 framework: tf @@ -394,280 +394,280 @@ name: AUTO name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16/yolo-v2-tiny-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16/yolo-v2-tiny-ava-0001.xml name: yolo-v2-tiny-ava-0001 precision: FP16 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16-INT8/yolo-v2-tiny-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16-INT8/yolo-v2-tiny-ava-0001.xml name: yolo-v2-tiny-ava-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16-INT8/yolo-v2-tiny-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16-INT8/yolo-v2-tiny-ava-0001.xml name: yolo-v2-tiny-ava-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16-INT8/yolo-v2-tiny-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16-INT8/yolo-v2-tiny-ava-0001.xml name: yolo-v2-tiny-ava-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16/yolo-v2-tiny-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16/yolo-v2-tiny-ava-sparse-30-0001.xml name: yolo-v2-tiny-ava-sparse-30-0001 precision: FP16 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16/yolo-v2-tiny-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16/yolo-v2-tiny-ava-sparse-30-0001.xml name: yolo-v2-tiny-ava-sparse-30-0001 precision: FP16 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16/yolo-v2-tiny-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16/yolo-v2-tiny-ava-sparse-30-0001.xml name: yolo-v2-tiny-ava-sparse-30-0001 precision: FP16 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-30-0001.xml name: yolo-v2-tiny-ava-sparse-30-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-30-0001.xml name: yolo-v2-tiny-ava-sparse-30-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-30-0001.xml name: yolo-v2-tiny-ava-sparse-30-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16/yolo-v2-tiny-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16/yolo-v2-tiny-ava-sparse-60-0001.xml name: yolo-v2-tiny-ava-sparse-60-0001 precision: FP16 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16/yolo-v2-tiny-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16/yolo-v2-tiny-ava-sparse-60-0001.xml name: yolo-v2-tiny-ava-sparse-60-0001 precision: FP16 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16/yolo-v2-tiny-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16/yolo-v2-tiny-ava-sparse-60-0001.xml name: yolo-v2-tiny-ava-sparse-60-0001 precision: FP16 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-60-0001.xml name: yolo-v2-tiny-ava-sparse-60-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-60-0001.xml name: yolo-v2-tiny-ava-sparse-60-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-60-0001.xml name: yolo-v2-tiny-ava-sparse-60-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/squeezenet1.1/caffe/FP16/squeezenet1.1.xml + path: ${NPU_MODELS_PKG}/squeezenet1.1/caffe/FP16/squeezenet1.1.xml name: squeezenet1.1 precision: FP16 framework: caffe - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/squeezenet1.1/caffe/FP16/squeezenet1.1.xml + path: ${NPU_MODELS_PKG}/squeezenet1.1/caffe/FP16/squeezenet1.1.xml name: squeezenet1.1 precision: FP16 framework: caffe - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/squeezenet1.1/caffe/FP16/squeezenet1.1.xml + path: ${NPU_MODELS_PKG}/squeezenet1.1/caffe/FP16/squeezenet1.1.xml name: squeezenet1.1 precision: FP16 framework: caffe - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/squeezenet1.1/caffe/FP16-INT8/squeezenet1.1.xml + path: ${NPU_MODELS_PKG}/squeezenet1.1/caffe/FP16-INT8/squeezenet1.1.xml name: squeezenet1.1 precision: FP16-INT8 framework: caffe - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/squeezenet1.1/caffe/FP16-INT8/squeezenet1.1.xml + path: ${NPU_MODELS_PKG}/squeezenet1.1/caffe/FP16-INT8/squeezenet1.1.xml name: squeezenet1.1 precision: FP16-INT8 framework: caffe - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/squeezenet1.1/caffe/FP16-INT8/squeezenet1.1.xml + path: ${NPU_MODELS_PKG}/squeezenet1.1/caffe/FP16-INT8/squeezenet1.1.xml name: squeezenet1.1 precision: FP16-INT8 framework: caffe - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16/icnet-camvid-ava-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16/icnet-camvid-ava-0001.xml name: icnet-camvid-ava-0001 precision: FP16 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16/icnet-camvid-ava-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16/icnet-camvid-ava-0001.xml name: icnet-camvid-ava-0001 precision: FP16 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16/icnet-camvid-ava-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16/icnet-camvid-ava-0001.xml name: icnet-camvid-ava-0001 precision: FP16 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16-INT8/icnet-camvid-ava-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16-INT8/icnet-camvid-ava-0001.xml name: icnet-camvid-ava-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16-INT8/icnet-camvid-ava-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16-INT8/icnet-camvid-ava-0001.xml name: icnet-camvid-ava-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16-INT8/icnet-camvid-ava-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16-INT8/icnet-camvid-ava-0001.xml name: icnet-camvid-ava-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16/icnet-camvid-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16/icnet-camvid-ava-sparse-30-0001.xml name: icnet-camvid-ava-sparse-30-0001 precision: FP16 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16/icnet-camvid-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16/icnet-camvid-ava-sparse-30-0001.xml name: icnet-camvid-ava-sparse-30-0001 precision: FP16 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16/icnet-camvid-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16/icnet-camvid-ava-sparse-30-0001.xml name: icnet-camvid-ava-sparse-30-0001 precision: FP16 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-30-0001.xml name: icnet-camvid-ava-sparse-30-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-30-0001.xml name: icnet-camvid-ava-sparse-30-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-30-0001.xml name: icnet-camvid-ava-sparse-30-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16/icnet-camvid-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16/icnet-camvid-ava-sparse-60-0001.xml name: icnet-camvid-ava-sparse-60-0001 precision: FP16 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16/icnet-camvid-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16/icnet-camvid-ava-sparse-60-0001.xml name: icnet-camvid-ava-sparse-60-0001 precision: FP16 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16/icnet-camvid-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16/icnet-camvid-ava-sparse-60-0001.xml name: icnet-camvid-ava-sparse-60-0001 precision: FP16 framework: tf - device: name: AUTO:CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-60-0001.xml name: icnet-camvid-ava-sparse-60-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-60-0001.xml name: icnet-camvid-ava-sparse-60-0001 precision: FP16-INT8 framework: tf - device: name: AUTO:GPU,CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-60-0001.xml name: icnet-camvid-ava-sparse-60-0001 precision: FP16-INT8 framework: tf diff --git a/tests/time_tests/.automation/desktop_test_config.yml b/tests/time_tests/.automation/desktop_test_config.yml index 30149dd929d0c5..a21ebe0d201a8d 100644 --- a/tests/time_tests/.automation/desktop_test_config.yml +++ b/tests/time_tests/.automation/desktop_test_config.yml @@ -1,448 +1,448 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/resnet-50-pytorch/onnx/FP16/resnet-50-pytorch.xml + path: ${NPU_MODELS_PKG}/resnet-50-pytorch/onnx/FP16/resnet-50-pytorch.xml name: resnet-50-pytorch precision: FP16 framework: onnx - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/resnet-50-pytorch/onnx/FP16/resnet-50-pytorch.xml + path: ${NPU_MODELS_PKG}/resnet-50-pytorch/onnx/FP16/resnet-50-pytorch.xml name: resnet-50-pytorch precision: FP16 framework: onnx - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/resnet-50-pytorch/onnx/FP16-INT8/resnet-50-pytorch.xml + path: ${NPU_MODELS_PKG}/resnet-50-pytorch/onnx/FP16-INT8/resnet-50-pytorch.xml name: resnet-50-pytorch precision: FP16-INT8 framework: onnx - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/resnet-50-pytorch/onnx/FP16-INT8/resnet-50-pytorch.xml + path: ${NPU_MODELS_PKG}/resnet-50-pytorch/onnx/FP16-INT8/resnet-50-pytorch.xml name: resnet-50-pytorch precision: FP16-INT8 framework: onnx - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/mobilenet-v2/caffe/FP16/mobilenet-v2.xml + path: ${NPU_MODELS_PKG}/mobilenet-v2/caffe/FP16/mobilenet-v2.xml name: mobilenet-v2 precision: FP16 framework: caffe - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/mobilenet-v2/caffe/FP16/mobilenet-v2.xml + path: ${NPU_MODELS_PKG}/mobilenet-v2/caffe/FP16/mobilenet-v2.xml name: mobilenet-v2 precision: FP16 framework: caffe - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/mobilenet-v2/caffe/FP16-INT8/mobilenet-v2.xml + path: ${NPU_MODELS_PKG}/mobilenet-v2/caffe/FP16-INT8/mobilenet-v2.xml name: mobilenet-v2 precision: FP16-INT8 framework: caffe - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/mobilenet-v2/caffe/FP16-INT8/mobilenet-v2.xml + path: ${NPU_MODELS_PKG}/mobilenet-v2/caffe/FP16-INT8/mobilenet-v2.xml name: mobilenet-v2 precision: FP16-INT8 framework: caffe - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16/faster-rcnn-resnet101-coco-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16/faster-rcnn-resnet101-coco-sparse-60-0001.xml name: faster-rcnn-resnet101-coco-sparse-60-0001 precision: FP16 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16/faster-rcnn-resnet101-coco-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16/faster-rcnn-resnet101-coco-sparse-60-0001.xml name: faster-rcnn-resnet101-coco-sparse-60-0001 precision: FP16 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16-INT8/faster-rcnn-resnet101-coco-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16-INT8/faster-rcnn-resnet101-coco-sparse-60-0001.xml name: faster-rcnn-resnet101-coco-sparse-60-0001 precision: FP16-INT8 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16-INT8/faster-rcnn-resnet101-coco-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16-INT8/faster-rcnn-resnet101-coco-sparse-60-0001.xml name: faster-rcnn-resnet101-coco-sparse-60-0001 precision: FP16-INT8 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v1/tf/FP16/googlenet-v1.xml + path: ${NPU_MODELS_PKG}/googlenet-v1/tf/FP16/googlenet-v1.xml name: googlenet-v1 precision: FP16 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v1/tf/FP16/googlenet-v1.xml + path: ${NPU_MODELS_PKG}/googlenet-v1/tf/FP16/googlenet-v1.xml name: googlenet-v1 precision: FP16 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v1/tf/FP16-INT8/googlenet-v1.xml + path: ${NPU_MODELS_PKG}/googlenet-v1/tf/FP16-INT8/googlenet-v1.xml name: googlenet-v1 precision: FP16-INT8 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v1/tf/FP16-INT8/googlenet-v1.xml + path: ${NPU_MODELS_PKG}/googlenet-v1/tf/FP16-INT8/googlenet-v1.xml name: googlenet-v1 precision: FP16-INT8 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v3/tf/FP16/googlenet-v3.xml + path: ${NPU_MODELS_PKG}/googlenet-v3/tf/FP16/googlenet-v3.xml name: googlenet-v3 precision: FP16 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v3/tf/FP16/googlenet-v3.xml + path: ${NPU_MODELS_PKG}/googlenet-v3/tf/FP16/googlenet-v3.xml name: googlenet-v3 precision: FP16 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v3/tf/FP16-INT8/googlenet-v3.xml + path: ${NPU_MODELS_PKG}/googlenet-v3/tf/FP16-INT8/googlenet-v3.xml name: googlenet-v3 precision: FP16-INT8 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v3/tf/FP16-INT8/googlenet-v3.xml + path: ${NPU_MODELS_PKG}/googlenet-v3/tf/FP16-INT8/googlenet-v3.xml name: googlenet-v3 precision: FP16-INT8 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/ssd512/caffe/FP16/ssd512.xml + path: ${NPU_MODELS_PKG}/ssd512/caffe/FP16/ssd512.xml name: ssd512 precision: FP16 framework: caffe - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/ssd512/caffe/FP16/ssd512.xml + path: ${NPU_MODELS_PKG}/ssd512/caffe/FP16/ssd512.xml name: ssd512 precision: FP16 framework: caffe - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/ssd512/caffe/FP16-INT8/ssd512.xml + path: ${NPU_MODELS_PKG}/ssd512/caffe/FP16-INT8/ssd512.xml name: ssd512 precision: FP16-INT8 framework: caffe - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/ssd512/caffe/FP16-INT8/ssd512.xml + path: ${NPU_MODELS_PKG}/ssd512/caffe/FP16-INT8/ssd512.xml name: ssd512 precision: FP16-INT8 framework: caffe - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16/yolo-v2-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16/yolo-v2-ava-0001.xml name: yolo-v2-ava-0001 precision: FP16 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16/yolo-v2-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16/yolo-v2-ava-0001.xml name: yolo-v2-ava-0001 precision: FP16 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16-INT8/yolo-v2-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16-INT8/yolo-v2-ava-0001.xml name: yolo-v2-ava-0001 precision: FP16-INT8 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16-INT8/yolo-v2-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16-INT8/yolo-v2-ava-0001.xml name: yolo-v2-ava-0001 precision: FP16-INT8 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16/yolo-v2-ava-sparse-35-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16/yolo-v2-ava-sparse-35-0001.xml name: yolo-v2-ava-sparse-35-0001 precision: FP16 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16/yolo-v2-ava-sparse-35-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16/yolo-v2-ava-sparse-35-0001.xml name: yolo-v2-ava-sparse-35-0001 precision: FP16 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16-INT8/yolo-v2-ava-sparse-35-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16-INT8/yolo-v2-ava-sparse-35-0001.xml name: yolo-v2-ava-sparse-35-0001 precision: FP16-INT8 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16-INT8/yolo-v2-ava-sparse-35-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16-INT8/yolo-v2-ava-sparse-35-0001.xml name: yolo-v2-ava-sparse-35-0001 precision: FP16-INT8 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16/yolo-v2-ava-sparse-70-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16/yolo-v2-ava-sparse-70-0001.xml name: yolo-v2-ava-sparse-70-0001 precision: FP16 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16/yolo-v2-ava-sparse-70-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16/yolo-v2-ava-sparse-70-0001.xml name: yolo-v2-ava-sparse-70-0001 precision: FP16 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16-INT8/yolo-v2-ava-sparse-70-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16-INT8/yolo-v2-ava-sparse-70-0001.xml name: yolo-v2-ava-sparse-70-0001 precision: FP16-INT8 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16-INT8/yolo-v2-ava-sparse-70-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16-INT8/yolo-v2-ava-sparse-70-0001.xml name: yolo-v2-ava-sparse-70-0001 precision: FP16-INT8 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16/yolo-v2-tiny-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16/yolo-v2-tiny-ava-0001.xml name: yolo-v2-tiny-ava-0001 precision: FP16 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16/yolo-v2-tiny-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16/yolo-v2-tiny-ava-0001.xml name: yolo-v2-tiny-ava-0001 precision: FP16 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16-INT8/yolo-v2-tiny-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16-INT8/yolo-v2-tiny-ava-0001.xml name: yolo-v2-tiny-ava-0001 precision: FP16-INT8 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16-INT8/yolo-v2-tiny-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16-INT8/yolo-v2-tiny-ava-0001.xml name: yolo-v2-tiny-ava-0001 precision: FP16-INT8 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16/yolo-v2-tiny-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16/yolo-v2-tiny-ava-sparse-30-0001.xml name: yolo-v2-tiny-ava-sparse-30-0001 precision: FP16 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16/yolo-v2-tiny-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16/yolo-v2-tiny-ava-sparse-30-0001.xml name: yolo-v2-tiny-ava-sparse-30-0001 precision: FP16 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-30-0001.xml name: yolo-v2-tiny-ava-sparse-30-0001 precision: FP16-INT8 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-30-0001.xml name: yolo-v2-tiny-ava-sparse-30-0001 precision: FP16-INT8 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16/yolo-v2-tiny-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16/yolo-v2-tiny-ava-sparse-60-0001.xml name: yolo-v2-tiny-ava-sparse-60-0001 precision: FP16 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16/yolo-v2-tiny-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16/yolo-v2-tiny-ava-sparse-60-0001.xml name: yolo-v2-tiny-ava-sparse-60-0001 precision: FP16 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-60-0001.xml name: yolo-v2-tiny-ava-sparse-60-0001 precision: FP16-INT8 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-60-0001.xml name: yolo-v2-tiny-ava-sparse-60-0001 precision: FP16-INT8 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/squeezenet1.1/caffe/FP16/squeezenet1.1.xml + path: ${NPU_MODELS_PKG}/squeezenet1.1/caffe/FP16/squeezenet1.1.xml name: squeezenet1.1 precision: FP16 framework: caffe - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/squeezenet1.1/caffe/FP16/squeezenet1.1.xml + path: ${NPU_MODELS_PKG}/squeezenet1.1/caffe/FP16/squeezenet1.1.xml name: squeezenet1.1 precision: FP16 framework: caffe - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/squeezenet1.1/caffe/FP16-INT8/squeezenet1.1.xml + path: ${NPU_MODELS_PKG}/squeezenet1.1/caffe/FP16-INT8/squeezenet1.1.xml name: squeezenet1.1 precision: FP16-INT8 framework: caffe - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/squeezenet1.1/caffe/FP16-INT8/squeezenet1.1.xml + path: ${NPU_MODELS_PKG}/squeezenet1.1/caffe/FP16-INT8/squeezenet1.1.xml name: squeezenet1.1 precision: FP16-INT8 framework: caffe - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16/icnet-camvid-ava-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16/icnet-camvid-ava-0001.xml name: icnet-camvid-ava-0001 precision: FP16 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16/icnet-camvid-ava-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16/icnet-camvid-ava-0001.xml name: icnet-camvid-ava-0001 precision: FP16 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16-INT8/icnet-camvid-ava-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16-INT8/icnet-camvid-ava-0001.xml name: icnet-camvid-ava-0001 precision: FP16-INT8 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16-INT8/icnet-camvid-ava-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16-INT8/icnet-camvid-ava-0001.xml name: icnet-camvid-ava-0001 precision: FP16-INT8 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16/icnet-camvid-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16/icnet-camvid-ava-sparse-30-0001.xml name: icnet-camvid-ava-sparse-30-0001 precision: FP16 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16/icnet-camvid-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16/icnet-camvid-ava-sparse-30-0001.xml name: icnet-camvid-ava-sparse-30-0001 precision: FP16 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-30-0001.xml name: icnet-camvid-ava-sparse-30-0001 precision: FP16-INT8 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-30-0001.xml name: icnet-camvid-ava-sparse-30-0001 precision: FP16-INT8 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16/icnet-camvid-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16/icnet-camvid-ava-sparse-60-0001.xml name: icnet-camvid-ava-sparse-60-0001 precision: FP16 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16/icnet-camvid-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16/icnet-camvid-ava-sparse-60-0001.xml name: icnet-camvid-ava-sparse-60-0001 precision: FP16 framework: tf - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-60-0001.xml name: icnet-camvid-ava-sparse-60-0001 precision: FP16-INT8 framework: tf - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-60-0001.xml name: icnet-camvid-ava-sparse-60-0001 precision: FP16-INT8 framework: tf \ No newline at end of file diff --git a/tests/time_tests/.automation/desktop_test_config_cache.yml b/tests/time_tests/.automation/desktop_test_config_cache.yml index 94a2c8bedc9453..a24be9412896ce 100644 --- a/tests/time_tests/.automation/desktop_test_config_cache.yml +++ b/tests/time_tests/.automation/desktop_test_config_cache.yml @@ -1,7 +1,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/resnet-50-pytorch/onnx/FP16/resnet-50-pytorch.xml + path: ${NPU_MODELS_PKG}/resnet-50-pytorch/onnx/FP16/resnet-50-pytorch.xml name: resnet-50-pytorch precision: FP16 framework: onnx @@ -9,7 +9,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/resnet-50-pytorch/onnx/FP16/resnet-50-pytorch.xml + path: ${NPU_MODELS_PKG}/resnet-50-pytorch/onnx/FP16/resnet-50-pytorch.xml name: resnet-50-pytorch precision: FP16 framework: onnx @@ -17,7 +17,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/resnet-50-pytorch/onnx/FP16-INT8/resnet-50-pytorch.xml + path: ${NPU_MODELS_PKG}/resnet-50-pytorch/onnx/FP16-INT8/resnet-50-pytorch.xml name: resnet-50-pytorch precision: FP16-INT8 framework: onnx @@ -25,7 +25,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/resnet-50-pytorch/onnx/FP16-INT8/resnet-50-pytorch.xml + path: ${NPU_MODELS_PKG}/resnet-50-pytorch/onnx/FP16-INT8/resnet-50-pytorch.xml name: resnet-50-pytorch precision: FP16-INT8 framework: onnx @@ -33,7 +33,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/mobilenet-v2/caffe/FP16/mobilenet-v2.xml + path: ${NPU_MODELS_PKG}/mobilenet-v2/caffe/FP16/mobilenet-v2.xml name: mobilenet-v2 precision: FP16 framework: caffe @@ -41,7 +41,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/mobilenet-v2/caffe/FP16/mobilenet-v2.xml + path: ${NPU_MODELS_PKG}/mobilenet-v2/caffe/FP16/mobilenet-v2.xml name: mobilenet-v2 precision: FP16 framework: caffe @@ -49,7 +49,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/mobilenet-v2/caffe/FP16-INT8/mobilenet-v2.xml + path: ${NPU_MODELS_PKG}/mobilenet-v2/caffe/FP16-INT8/mobilenet-v2.xml name: mobilenet-v2 precision: FP16-INT8 framework: caffe @@ -57,7 +57,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/mobilenet-v2/caffe/FP16-INT8/mobilenet-v2.xml + path: ${NPU_MODELS_PKG}/mobilenet-v2/caffe/FP16-INT8/mobilenet-v2.xml name: mobilenet-v2 precision: FP16-INT8 framework: caffe @@ -65,7 +65,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16/faster-rcnn-resnet101-coco-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16/faster-rcnn-resnet101-coco-sparse-60-0001.xml name: faster-rcnn-resnet101-coco-sparse-60-0001 precision: FP16 framework: tf @@ -73,7 +73,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16/faster-rcnn-resnet101-coco-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16/faster-rcnn-resnet101-coco-sparse-60-0001.xml name: faster-rcnn-resnet101-coco-sparse-60-0001 precision: FP16 framework: tf @@ -81,7 +81,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16-INT8/faster-rcnn-resnet101-coco-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16-INT8/faster-rcnn-resnet101-coco-sparse-60-0001.xml name: faster-rcnn-resnet101-coco-sparse-60-0001 precision: FP16-INT8 framework: tf @@ -89,7 +89,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16-INT8/faster-rcnn-resnet101-coco-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16-INT8/faster-rcnn-resnet101-coco-sparse-60-0001.xml name: faster-rcnn-resnet101-coco-sparse-60-0001 precision: FP16-INT8 framework: tf @@ -97,7 +97,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v1/tf/FP16/googlenet-v1.xml + path: ${NPU_MODELS_PKG}/googlenet-v1/tf/FP16/googlenet-v1.xml name: googlenet-v1 precision: FP16 framework: tf @@ -105,7 +105,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v1/tf/FP16/googlenet-v1.xml + path: ${NPU_MODELS_PKG}/googlenet-v1/tf/FP16/googlenet-v1.xml name: googlenet-v1 precision: FP16 framework: tf @@ -113,7 +113,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v1/tf/FP16-INT8/googlenet-v1.xml + path: ${NPU_MODELS_PKG}/googlenet-v1/tf/FP16-INT8/googlenet-v1.xml name: googlenet-v1 precision: FP16-INT8 framework: tf @@ -121,7 +121,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v1/tf/FP16-INT8/googlenet-v1.xml + path: ${NPU_MODELS_PKG}/googlenet-v1/tf/FP16-INT8/googlenet-v1.xml name: googlenet-v1 precision: FP16-INT8 framework: tf @@ -129,7 +129,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v3/tf/FP16/googlenet-v3.xml + path: ${NPU_MODELS_PKG}/googlenet-v3/tf/FP16/googlenet-v3.xml name: googlenet-v3 precision: FP16 framework: tf @@ -137,7 +137,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v3/tf/FP16/googlenet-v3.xml + path: ${NPU_MODELS_PKG}/googlenet-v3/tf/FP16/googlenet-v3.xml name: googlenet-v3 precision: FP16 framework: tf @@ -145,7 +145,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v3/tf/FP16-INT8/googlenet-v3.xml + path: ${NPU_MODELS_PKG}/googlenet-v3/tf/FP16-INT8/googlenet-v3.xml name: googlenet-v3 precision: FP16-INT8 framework: tf @@ -153,7 +153,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/googlenet-v3/tf/FP16-INT8/googlenet-v3.xml + path: ${NPU_MODELS_PKG}/googlenet-v3/tf/FP16-INT8/googlenet-v3.xml name: googlenet-v3 precision: FP16-INT8 framework: tf @@ -161,7 +161,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/ssd512/caffe/FP16/ssd512.xml + path: ${NPU_MODELS_PKG}/ssd512/caffe/FP16/ssd512.xml name: ssd512 precision: FP16 framework: caffe @@ -169,7 +169,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/ssd512/caffe/FP16/ssd512.xml + path: ${NPU_MODELS_PKG}/ssd512/caffe/FP16/ssd512.xml name: ssd512 precision: FP16 framework: caffe @@ -177,7 +177,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/ssd512/caffe/FP16-INT8/ssd512.xml + path: ${NPU_MODELS_PKG}/ssd512/caffe/FP16-INT8/ssd512.xml name: ssd512 precision: FP16-INT8 framework: caffe @@ -185,7 +185,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/ssd512/caffe/FP16-INT8/ssd512.xml + path: ${NPU_MODELS_PKG}/ssd512/caffe/FP16-INT8/ssd512.xml name: ssd512 precision: FP16-INT8 framework: caffe @@ -193,7 +193,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16/yolo-v2-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16/yolo-v2-ava-0001.xml name: yolo-v2-ava-0001 precision: FP16 framework: tf @@ -201,7 +201,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16/yolo-v2-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16/yolo-v2-ava-0001.xml name: yolo-v2-ava-0001 precision: FP16 framework: tf @@ -209,7 +209,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16-INT8/yolo-v2-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16-INT8/yolo-v2-ava-0001.xml name: yolo-v2-ava-0001 precision: FP16-INT8 framework: tf @@ -217,7 +217,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16-INT8/yolo-v2-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16-INT8/yolo-v2-ava-0001.xml name: yolo-v2-ava-0001 precision: FP16-INT8 framework: tf @@ -225,7 +225,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16/yolo-v2-ava-sparse-35-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16/yolo-v2-ava-sparse-35-0001.xml name: yolo-v2-ava-sparse-35-0001 precision: FP16 framework: tf @@ -233,7 +233,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16/yolo-v2-ava-sparse-35-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16/yolo-v2-ava-sparse-35-0001.xml name: yolo-v2-ava-sparse-35-0001 precision: FP16 framework: tf @@ -241,7 +241,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16-INT8/yolo-v2-ava-sparse-35-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16-INT8/yolo-v2-ava-sparse-35-0001.xml name: yolo-v2-ava-sparse-35-0001 precision: FP16-INT8 framework: tf @@ -249,7 +249,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16-INT8/yolo-v2-ava-sparse-35-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16-INT8/yolo-v2-ava-sparse-35-0001.xml name: yolo-v2-ava-sparse-35-0001 precision: FP16-INT8 framework: tf @@ -257,7 +257,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16/yolo-v2-ava-sparse-70-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16/yolo-v2-ava-sparse-70-0001.xml name: yolo-v2-ava-sparse-70-0001 precision: FP16 framework: tf @@ -265,7 +265,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16/yolo-v2-ava-sparse-70-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16/yolo-v2-ava-sparse-70-0001.xml name: yolo-v2-ava-sparse-70-0001 precision: FP16 framework: tf @@ -273,7 +273,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16-INT8/yolo-v2-ava-sparse-70-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16-INT8/yolo-v2-ava-sparse-70-0001.xml name: yolo-v2-ava-sparse-70-0001 precision: FP16-INT8 framework: tf @@ -281,7 +281,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16-INT8/yolo-v2-ava-sparse-70-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16-INT8/yolo-v2-ava-sparse-70-0001.xml name: yolo-v2-ava-sparse-70-0001 precision: FP16-INT8 framework: tf @@ -289,7 +289,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16/yolo-v2-tiny-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16/yolo-v2-tiny-ava-0001.xml name: yolo-v2-tiny-ava-0001 precision: FP16 framework: tf @@ -297,7 +297,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16/yolo-v2-tiny-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16/yolo-v2-tiny-ava-0001.xml name: yolo-v2-tiny-ava-0001 precision: FP16 framework: tf @@ -305,7 +305,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16-INT8/yolo-v2-tiny-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16-INT8/yolo-v2-tiny-ava-0001.xml name: yolo-v2-tiny-ava-0001 precision: FP16-INT8 framework: tf @@ -313,7 +313,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16-INT8/yolo-v2-tiny-ava-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16-INT8/yolo-v2-tiny-ava-0001.xml name: yolo-v2-tiny-ava-0001 precision: FP16-INT8 framework: tf @@ -321,7 +321,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16/yolo-v2-tiny-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16/yolo-v2-tiny-ava-sparse-30-0001.xml name: yolo-v2-tiny-ava-sparse-30-0001 precision: FP16 framework: tf @@ -329,7 +329,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16/yolo-v2-tiny-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16/yolo-v2-tiny-ava-sparse-30-0001.xml name: yolo-v2-tiny-ava-sparse-30-0001 precision: FP16 framework: tf @@ -337,7 +337,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-30-0001.xml name: yolo-v2-tiny-ava-sparse-30-0001 precision: FP16-INT8 framework: tf @@ -345,7 +345,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-30-0001.xml name: yolo-v2-tiny-ava-sparse-30-0001 precision: FP16-INT8 framework: tf @@ -353,7 +353,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16/yolo-v2-tiny-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16/yolo-v2-tiny-ava-sparse-60-0001.xml name: yolo-v2-tiny-ava-sparse-60-0001 precision: FP16 framework: tf @@ -361,7 +361,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16/yolo-v2-tiny-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16/yolo-v2-tiny-ava-sparse-60-0001.xml name: yolo-v2-tiny-ava-sparse-60-0001 precision: FP16 framework: tf @@ -369,7 +369,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-60-0001.xml name: yolo-v2-tiny-ava-sparse-60-0001 precision: FP16-INT8 framework: tf @@ -377,7 +377,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-60-0001.xml name: yolo-v2-tiny-ava-sparse-60-0001 precision: FP16-INT8 framework: tf @@ -385,7 +385,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/squeezenet1.1/caffe/FP16/squeezenet1.1.xml + path: ${NPU_MODELS_PKG}/squeezenet1.1/caffe/FP16/squeezenet1.1.xml name: squeezenet1.1 precision: FP16 framework: caffe @@ -393,7 +393,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/squeezenet1.1/caffe/FP16/squeezenet1.1.xml + path: ${NPU_MODELS_PKG}/squeezenet1.1/caffe/FP16/squeezenet1.1.xml name: squeezenet1.1 precision: FP16 framework: caffe @@ -401,7 +401,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/squeezenet1.1/caffe/FP16-INT8/squeezenet1.1.xml + path: ${NPU_MODELS_PKG}/squeezenet1.1/caffe/FP16-INT8/squeezenet1.1.xml name: squeezenet1.1 precision: FP16-INT8 framework: caffe @@ -409,7 +409,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/squeezenet1.1/caffe/FP16-INT8/squeezenet1.1.xml + path: ${NPU_MODELS_PKG}/squeezenet1.1/caffe/FP16-INT8/squeezenet1.1.xml name: squeezenet1.1 precision: FP16-INT8 framework: caffe @@ -417,7 +417,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16/icnet-camvid-ava-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16/icnet-camvid-ava-0001.xml name: icnet-camvid-ava-0001 precision: FP16 framework: tf @@ -425,7 +425,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16/icnet-camvid-ava-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16/icnet-camvid-ava-0001.xml name: icnet-camvid-ava-0001 precision: FP16 framework: tf @@ -433,7 +433,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16-INT8/icnet-camvid-ava-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16-INT8/icnet-camvid-ava-0001.xml name: icnet-camvid-ava-0001 precision: FP16-INT8 framework: tf @@ -441,7 +441,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16-INT8/icnet-camvid-ava-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16-INT8/icnet-camvid-ava-0001.xml name: icnet-camvid-ava-0001 precision: FP16-INT8 framework: tf @@ -449,7 +449,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16/icnet-camvid-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16/icnet-camvid-ava-sparse-30-0001.xml name: icnet-camvid-ava-sparse-30-0001 precision: FP16 framework: tf @@ -457,7 +457,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16/icnet-camvid-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16/icnet-camvid-ava-sparse-30-0001.xml name: icnet-camvid-ava-sparse-30-0001 precision: FP16 framework: tf @@ -465,7 +465,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-30-0001.xml name: icnet-camvid-ava-sparse-30-0001 precision: FP16-INT8 framework: tf @@ -473,7 +473,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-30-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-30-0001.xml name: icnet-camvid-ava-sparse-30-0001 precision: FP16-INT8 framework: tf @@ -481,7 +481,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16/icnet-camvid-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16/icnet-camvid-ava-sparse-60-0001.xml name: icnet-camvid-ava-sparse-60-0001 precision: FP16 framework: tf @@ -489,7 +489,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16/icnet-camvid-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16/icnet-camvid-ava-sparse-60-0001.xml name: icnet-camvid-ava-sparse-60-0001 precision: FP16 framework: tf @@ -497,7 +497,7 @@ - device: name: CPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-60-0001.xml name: icnet-camvid-ava-sparse-60-0001 precision: FP16-INT8 framework: tf @@ -505,7 +505,7 @@ - device: name: GPU model: - path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-60-0001.xml + path: ${NPU_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-60-0001.xml name: icnet-camvid-ava-sparse-60-0001 precision: FP16-INT8 framework: tf diff --git a/tests/time_tests/test_runner/test_timetest.py b/tests/time_tests/test_runner/test_timetest.py index a090161394fe9b..f7d7600b28115b 100644 --- a/tests/time_tests/test_runner/test_timetest.py +++ b/tests/time_tests/test_runner/test_timetest.py @@ -43,7 +43,7 @@ def test_timetest(instance, executable, niter, cl_cache_dir, model_cache, model_ :param niter: number of times to run executable :param cl_cache_dir: directory to store OpenCL cache :param cpu_cache: flag to enable model CPU cache - :param vpu_compiler: flag to change VPU compiler type + :param npu_compiler: flag to change NPU compiler type :param perf_hint: performance hint (optimize device for latency or throughput settings) :param model_cache_dir: directory to store IE model cache :param test_info: custom `test_info` field of built-in `request` pytest fixture diff --git a/thirdparty/dependencies.cmake b/thirdparty/dependencies.cmake index 3b9d9dc5b0fe4f..47b23885e95efe 100644 --- a/thirdparty/dependencies.cmake +++ b/thirdparty/dependencies.cmake @@ -572,14 +572,14 @@ endif() # if(ENABLE_SAMPLES) - # Note: VPU requires 3.9.0 version, because it contains 'nlohmann::ordered_json' + # Note: NPU requires 3.9.0 version, because it contains 'nlohmann::ordered_json' find_package(nlohmann_json 3.9.0 QUIET) if(nlohmann_json_FOUND) # conan and vcpkg create imported target nlohmann_json::nlohmann_json else() add_subdirectory(thirdparty/json EXCLUDE_FROM_ALL) - # this is required only because of VPU plugin reused this + # this is required only because of NPU plugin reused this openvino_developer_export_targets(COMPONENT openvino_common TARGETS nlohmann_json) # for nlohmann library versions older than v3.0.0 diff --git a/thirdparty/open_model_zoo b/thirdparty/open_model_zoo index 4035972e8a3dbb..7353ea15fc0ba5 160000 --- a/thirdparty/open_model_zoo +++ b/thirdparty/open_model_zoo @@ -1 +1 @@ -Subproject commit 4035972e8a3dbbac90d22df5cf741f6cbbca1ea2 +Subproject commit 7353ea15fc0ba57d4bcdd7aaf9c4cde454ca447b diff --git a/tools/benchmark_tool/openvino/tools/benchmark/utils/constants.py b/tools/benchmark_tool/openvino/tools/benchmark/utils/constants.py index d9e3ca2ae71802..1fb45e1f4a2ae3 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/utils/constants.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/utils/constants.py @@ -1,7 +1,7 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -VPU_DEVICE_NAME = 'VPU' +NPU_DEVICE_NAME = 'NPU' CPU_DEVICE_NAME = 'CPU' GPU_DEVICE_NAME = 'GPU' HETERO_DEVICE_NAME = 'HETERO' @@ -22,7 +22,7 @@ DEVICE_DURATION_IN_SECS = { CPU_DEVICE_NAME: 60, GPU_DEVICE_NAME: 60, - VPU_DEVICE_NAME: 60, + NPU_DEVICE_NAME: 60, GNA_DEVICE_NAME: 60, UNKNOWN_DEVICE_TYPE: 120 } diff --git a/tools/mo/openvino/tools/mo/moc_frontend/pytorch_frontend_utils.py b/tools/mo/openvino/tools/mo/moc_frontend/pytorch_frontend_utils.py index f3d2a5e1bc9607..8c489dc412770c 100644 --- a/tools/mo/openvino/tools/mo/moc_frontend/pytorch_frontend_utils.py +++ b/tools/mo/openvino/tools/mo/moc_frontend/pytorch_frontend_utils.py @@ -30,7 +30,7 @@ def get_pytorch_decoder(model, input_shape, example_inputs, args): "NNCF models produced by nncf<2.6 are not supported directly. Please export to ONNX first.") except: pass - inputs = prepare_torch_inputs(example_inputs, input_shape, args.get("input"), allow_none=True) + inputs = prepare_torch_inputs(example_inputs) decoder = TorchScriptPythonDecoder(model, example_input=inputs, shared_memory=args.get("share_weights", True)) args['input_model'] = decoder args["framework"] = "pytorch" @@ -151,36 +151,7 @@ def to_torch_tensor(tensor): "Got {}".format(type(tensor))) -def get_torch_dtype(dtype): - import torch - ov_str_to_torch = { - "boolean": torch.bool, - "f16": torch.float16, - "f32": torch.float32, - "f64": torch.float64, - "i8": torch.int8, - "i16": torch.int16, - "i32": torch.int32, - "i64": torch.int64, - "u8": torch.uint8, - } - if dtype is None: - return torch.float - if isinstance(dtype, torch.dtype): - return dtype - if isinstance(dtype, (type, np.dtype)): - dtype = get_element_type_str(dtype) - if isinstance(dtype, Type): - dtype = dtype.get_type_name() - if isinstance(dtype, str): - str_dtype = ov_str_to_torch.get(dtype) - if str_dtype is None: - raise Error(f"Unexpected data type '{dtype}' for input") - return str_dtype - raise Error(f"Unexpected data type for input. Supported torch.dtype, numpy.dtype, ov.Type and str. Got {type(dtype)}") - - -def prepare_torch_inputs(example_inputs, input_shape, input_info=None, allow_none=False): +def prepare_torch_inputs(example_inputs): import torch inputs = None if example_inputs is not None: @@ -201,29 +172,7 @@ def prepare_torch_inputs(example_inputs, input_shape, input_info=None, allow_non inputs[name] = to_torch_tensor(tensor) else: inputs = to_torch_tensor(inputs) - elif input_info is not None or input_shape is not None: - input_info = input_to_input_cut_info(input_info) or [] - input_shape_to_input_cut_info(input_shape, input_info) - inputs = [] - inputs_with_names = {} - for inp in input_info: - shape = inp.shape - if shape is None: - if not allow_none: - raise Error("Please provide input_shape or example_input for all inputs converting PyTorch model.") - inputs = None - break - dtype = get_torch_dtype(inp.type) - static_shape = get_static_shape(shape, dynamic_value=1) - input_tensor = torch.zeros(static_shape, dtype=dtype) # pylint: disable=no-member - if inp.name is not None: - inputs_with_names[inp.name] = input_tensor - inputs.append(input_tensor) - if isinstance(inputs, list): - inputs = tuple(inputs) - if inputs is not None and len(inputs) == len(inputs_with_names): - inputs = inputs_with_names else: - if not allow_none: - raise Error("Please provide input_shape or example_input for converting PyTorch model.") + # No example_input were provided, decoder will use scripting + return None return inputs diff --git a/tools/ovc/openvino/tools/ovc/convert_impl.py b/tools/ovc/openvino/tools/ovc/convert_impl.py index 68228093654801..7fd0e683f638a8 100644 --- a/tools/ovc/openvino/tools/ovc/convert_impl.py +++ b/tools/ovc/openvino/tools/ovc/convert_impl.py @@ -140,8 +140,8 @@ def prepare_ir(argv: argparse.Namespace): argv.placeholder_data_types, getattr(argv, "example_input", None), argv.share_weights) - t.send_event("mo", "conversion_method", moc_front_end.get_name() + "_frontend") - moc_front_end.add_extension(TelemetryExtension("mo", t.send_event, t.send_error, t.send_stack_trace)) + t.send_event("ovc", "conversion_method", moc_front_end.get_name() + "_frontend") + moc_front_end.add_extension(TelemetryExtension("ovc", t.send_event, t.send_error, t.send_stack_trace)) if new_extensions_used(argv): for extension in argv.extension: moc_front_end.add_extension(extension) @@ -407,8 +407,8 @@ def _convert(cli_parser: argparse.ArgumentParser, args, python_api_used): return None, None simplified_ie_version = VersionChecker().get_ie_simplified_version() telemetry = init_mo_telemetry() - telemetry.start_session('mo') - telemetry.send_event('mo', 'version', simplified_ie_version) + telemetry.start_session('ovc') + telemetry.send_event('ovc', 'version', simplified_ie_version) # Initialize logger with 'ERROR' as default level to be able to form nice messages # before arg parser deliver log_level requested by user init_logger('ERROR', False) diff --git a/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py b/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py index effe5e438241b0..da2abdb21f3b55 100644 --- a/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +++ b/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py @@ -30,7 +30,7 @@ def get_pytorch_decoder(model, example_inputs, args): "NNCF models produced by nncf<2.6 are not supported directly. Please export to ONNX first.") except: pass - inputs = prepare_torch_inputs(example_inputs, args.get("input"), allow_none=True) + inputs = prepare_torch_inputs(example_inputs) decoder = TorchScriptPythonDecoder(model, example_input=inputs, shared_memory=args.get("share_weights", True)) args['input_model'] = decoder args["example_input"] = inputs @@ -150,36 +150,7 @@ def to_torch_tensor(tensor): "Got {}".format(type(tensor))) -def get_torch_dtype(dtype): - import torch - ov_str_to_torch = { - "boolean": torch.bool, - "f16": torch.float16, - "f32": torch.float32, - "f64": torch.float64, - "i8": torch.int8, - "i16": torch.int16, - "i32": torch.int32, - "i64": torch.int64, - "u8": torch.uint8, - } - if dtype is None: - return torch.float - if isinstance(dtype, torch.dtype): - return dtype - if isinstance(dtype, (type, np.dtype)): - dtype = get_element_type_str(dtype) - if isinstance(dtype, Type): - dtype = dtype.get_type_name() - if isinstance(dtype, str): - str_dtype = ov_str_to_torch.get(dtype) - if str_dtype is None: - raise Error(f"Unexpected data type '{dtype}' for input") - return str_dtype - raise Error(f"Unexpected data type for input. Supported torch.dtype, numpy.dtype, ov.Type and str. Got {type(dtype)}") - - -def prepare_torch_inputs(example_inputs, input_info=None, allow_none=False): +def prepare_torch_inputs(example_inputs): import torch inputs = None if example_inputs is not None: @@ -200,28 +171,7 @@ def prepare_torch_inputs(example_inputs, input_info=None, allow_none=False): inputs[name] = to_torch_tensor(tensor) else: inputs = to_torch_tensor(inputs) - elif input_info is not None: - input_info = input_to_input_cut_info(input_info) or [] - inputs = [] - inputs_with_names = {} - for inp in input_info: - shape = inp.shape - if shape is None: - if not allow_none: - raise Error("Please provide shape in `input` or `example_input` for all inputs converting PyTorch model.") - inputs = None - break - dtype = get_torch_dtype(inp.type) - static_shape = get_static_shape(shape, dynamic_value=1) - input_tensor = torch.zeros(static_shape, dtype=dtype) # pylint: disable=no-member - if inp.name is not None: - inputs_with_names[inp.name] = input_tensor - inputs.append(input_tensor) - if isinstance(inputs, list): - inputs = tuple(inputs) - if inputs is not None and len(inputs) == len(inputs_with_names): - inputs = inputs_with_names else: - if not allow_none: - raise Error("Please provide shapes `input` or `example_input` for converting PyTorch model.") + # No example_input were provided, decoder will use scripting + return None return inputs diff --git a/tools/ovc/openvino/tools/ovc/telemetry_utils.py b/tools/ovc/openvino/tools/ovc/telemetry_utils.py index c0f1c7b8ec37a3..87e0132ccd17a6 100644 --- a/tools/ovc/openvino/tools/ovc/telemetry_utils.py +++ b/tools/ovc/openvino/tools/ovc/telemetry_utils.py @@ -25,7 +25,7 @@ def send_framework_info(framework: str): :param framework: framework name. """ t = tm.Telemetry() - t.send_event('mo', 'framework', framework) + t.send_event('ovc', 'framework', framework) def get_tid(): @@ -37,8 +37,8 @@ def get_tid(): def send_conversion_result(conversion_result: str, need_shutdown=False): t = tm.Telemetry() - t.send_event('mo', 'conversion_result', conversion_result) - t.end_session('mo') + t.send_event('ovc', 'conversion_result', conversion_result) + t.end_session('ovc') if need_shutdown: t.force_shutdown(1.0) @@ -71,4 +71,4 @@ def send_params_info(argv: argparse.Namespace, cli_parser: argparse.ArgumentPars else: param_str = arg + ":" + arg_to_str(arg_value) - t.send_event('mo', 'cli_parameters', param_str) + t.send_event('ovc', 'cli_parameters', param_str) diff --git a/tools/ovc/openvino/tools/ovc/utils.py b/tools/ovc/openvino/tools/ovc/utils.py index 29d4deaf3ef983..0f9915d3960e63 100644 --- a/tools/ovc/openvino/tools/ovc/utils.py +++ b/tools/ovc/openvino/tools/ovc/utils.py @@ -21,7 +21,7 @@ def refer_to_faq_msg(question_num: int): try: t = tm.Telemetry() - t.send_event('mo', 'error_info', "faq:" + str(question_num)) + t.send_event('ovc', 'error_info', "faq:" + str(question_num)) except Exception: # Telemetry can be not initialized if it is used in MO IR Reader pass diff --git a/tools/pot/README_dev.md b/tools/pot/README_dev.md index 785e409edd416f..d524ec5bb59d89 100644 --- a/tools/pot/README_dev.md +++ b/tools/pot/README_dev.md @@ -13,7 +13,7 @@ Post-Training Optimization Tool includes standalone command-line tool and Python * Per-channel quantization for Convolutional and Fully-Connected layers. * Multiple domains: Computer Vision, Recommendation Systems. * Ability to implement custom calibration pipeline via supported [API](openvino/tools/pot/api/README.md). -* Compression for different HW targets such as CPU, GPU, VPU. +* Compression for different HW targets such as CPU, GPU, NPU. * Post-training sparsity. ## Usage diff --git a/tools/pot/docs/BestPractices.md b/tools/pot/docs/BestPractices.md index ab15576a06d8b8..bee4d8fbcadca5 100644 --- a/tools/pot/docs/BestPractices.md +++ b/tools/pot/docs/BestPractices.md @@ -14,7 +14,7 @@ the fastest and easiest way to get a quantized model. It requires only some unan .. note:: - POT uses inference on the CPU during model optimization. It means that ability to infer the original floating-point model is essential for model optimization. In case of the 8-bit quantization, it is recommended to run POT on the same CPU architecture when optimizing for CPU or VNNI-based CPU when quantizing for a non-CPU device, such as GPU, VPU, or GNA. It should help to avoid the impact of the :doc:`saturation issue ` that occurs on AVX and SSE-based CPU devices. + POT uses inference on the CPU during model optimization. It means that ability to infer the original floating-point model is essential for model optimization. In case of the 8-bit quantization, it is recommended to run POT on the same CPU architecture when optimizing for CPU or VNNI-based CPU when quantizing for a non-CPU device, such as GPU, NPU, or GNA. It should help to avoid the impact of the :doc:`saturation issue ` that occurs on AVX and SSE-based CPU devices. Improving accuracy after the Default Quantization @@ -32,7 +32,7 @@ Parameters of the Default Quantization algorithm with basic settings are present # the quantization scheme. For the CPU: # performance - symmetric quantization of weights and activations. # mixed - symmetric weights and asymmetric activations. - # accuracy - the same as "mixed" for CPU, GPU, and GNA devices; asymmetric weights and activations for VPU device. + # accuracy - the same as "mixed" for CPU, GPU, and GNA devices; asymmetric weights and activations for NPU device. "stat_subset_size": 300 # Size of the subset to calculate activations statistics that can be used # for quantization parameters calculation. } diff --git a/tools/pot/docs/DefaultQuantizationUsage.md b/tools/pot/docs/DefaultQuantizationUsage.md index 674d08ba815fc3..ec2edbacca6742 100644 --- a/tools/pot/docs/DefaultQuantizationUsage.md +++ b/tools/pot/docs/DefaultQuantizationUsage.md @@ -90,7 +90,7 @@ Default Quantization algorithm has mandatory and optional parameters which are d * ``"target_device"`` - the following options are available: - * ``"ANY"`` (or ``"CPU"``) - default option to quantize models for CPU, GPU, or VPU + * ``"ANY"`` (or ``"CPU"``) - default option to quantize models for CPU, GPU, or NPU * ``"CPU_SPR"`` - to quantize models for CPU SPR (4th Generation Intel® Xeon® Scalable processor family) * ``"GNA"``, ``"GNA3"``, ``"GNA3.5"`` - to quantize models for GNA devices respectively. diff --git a/tools/pot/openvino/tools/pot/algorithms/quantization/README.md b/tools/pot/openvino/tools/pot/algorithms/quantization/README.md index 0444889bf4494e..3b8b9f352e5761 100644 --- a/tools/pot/openvino/tools/pot/algorithms/quantization/README.md +++ b/tools/pot/openvino/tools/pot/algorithms/quantization/README.md @@ -2,9 +2,9 @@ ## Introduction -The primary optimization feature of the Post-training Optimization Tool (POT) is the uniform integer quantization which allows substantially increasing inference performance and reducing the model size. Different HW platforms can support different integer precisions and POT is designed to support all of them, for example, 8-bit for CPU, GPU, VPU, 16-bit for GNA. Moreover, POT makes the specification of HW settings transparent for the user by introducing a concept of the `target_device` parameter. +The primary optimization feature of the Post-training Optimization Tool (POT) is the uniform integer quantization which allows substantially increasing inference performance and reducing the model size. Different HW platforms can support different integer precisions and POT is designed to support all of them, for example, 8-bit for CPU, GPU, NPU, 16-bit for GNA. Moreover, POT makes the specification of HW settings transparent for the user by introducing a concept of the `target_device` parameter. -> **NOTE**: There is a special `target_device: "ANY"` which leads to portable quantized models compatible with CPU, GPU, and VPU devices. GNA-quantized models are compatible only with CPU. +> **NOTE**: There is a special `target_device: "ANY"` which leads to portable quantized models compatible with CPU, GPU, and NPU devices. GNA-quantized models are compatible only with CPU. During the quantization process, the POT tool runs inference of the optimizing model to estimate quantization parameters for input activations of the quantizable operation. It means that a calibration dataset is required to perform quantization. This dataset may have or not have annotation depending on the quantization algorithm that is used. diff --git a/tools/pot/openvino/tools/pot/algorithms/quantization/accuracy_aware/algorithm.py b/tools/pot/openvino/tools/pot/algorithms/quantization/accuracy_aware/algorithm.py index d08c970f1af7b5..b1c29d88872bc7 100644 --- a/tools/pot/openvino/tools/pot/algorithms/quantization/accuracy_aware/algorithm.py +++ b/tools/pot/openvino/tools/pot/algorithms/quantization/accuracy_aware/algorithm.py @@ -17,7 +17,7 @@ def __init__(self, config, engine): algos_by_devices = { 'ANY': 'AccuracyAwareCommon', 'CPU': 'AccuracyAwareCommon', - 'VPU': 'AccuracyAwareCommon', + 'NPU': 'AccuracyAwareCommon', 'GPU': 'AccuracyAwareCommon', 'GNA': 'AccuracyAwareGNA', 'GNA3': 'AccuracyAwareGNA', diff --git a/tools/pot/openvino/tools/pot/algorithms/quantization/accuracy_aware_common/mixed_precision.py b/tools/pot/openvino/tools/pot/algorithms/quantization/accuracy_aware_common/mixed_precision.py index 70011eb9cccc56..c99e9733fae6db 100644 --- a/tools/pot/openvino/tools/pot/algorithms/quantization/accuracy_aware_common/mixed_precision.py +++ b/tools/pot/openvino/tools/pot/algorithms/quantization/accuracy_aware_common/mixed_precision.py @@ -31,11 +31,11 @@ def __init__(self, config, engine): self.original_quantization_config.weights.bits = 8 self.original_quantization_config.activations.bits = 8 self._config.convert_to_mixed_preset = False - self._restrict_for_vpu = True + self._restrict_for_npu = True self._engine.calculate_metrics = True def _can_set_fq_to_low_bitwidth(self, node): - if self._restrict_for_vpu: + if self._restrict_for_npu: return (nu.get_node_output(node, 0)[0].type == 'Convolution') and \ ('group' not in nu.get_node_output(node, 0)[0]) return nu.get_node_output(node, 0)[0].type in OPERATIONS_WITH_WEIGHTS diff --git a/tools/pot/openvino/tools/pot/algorithms/quantization/fake_quantize_configuration.py b/tools/pot/openvino/tools/pot/algorithms/quantization/fake_quantize_configuration.py index 4642201b558d94..35733a07b84163 100644 --- a/tools/pot/openvino/tools/pot/algorithms/quantization/fake_quantize_configuration.py +++ b/tools/pot/openvino/tools/pot/algorithms/quantization/fake_quantize_configuration.py @@ -528,7 +528,7 @@ def _get_node_valuable_descendant(node): def change_configurations_by_model_type(model, config, fq_configuration, hardware_config): - if config['model_type'] == 'transformer' and config['target_device'] in ['ANY', 'CPU', 'CPU_SPR', 'GPU', 'VPU']: + if config['model_type'] == 'transformer' and config['target_device'] in ['ANY', 'CPU', 'CPU_SPR', 'GPU', 'NPU']: change_configurations_by_model_type_transformer(model, fq_configuration, hardware_config) diff --git a/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py b/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py index 9ad333a72bfcf5..be69ad8bc637a2 100644 --- a/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py +++ b/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py @@ -21,7 +21,7 @@ 'GNA3': 'gna3.json', 'GNA3.5': 'gna3.json', 'GPU': 'gpu.json', # Same as cpu.json but without LSTM/GRUSequence quantization - 'VPU': 'vpu.json', + 'NPU': 'npu.json', 'CPU_SPR': 'cpu.json'} diff --git a/tools/pot/openvino/tools/pot/configs/config.py b/tools/pot/openvino/tools/pot/configs/config.py index 9c55cbd8e4901a..0b5b41adc30bbf 100644 --- a/tools/pot/openvino/tools/pot/configs/config.py +++ b/tools/pot/openvino/tools/pot/configs/config.py @@ -347,7 +347,7 @@ def _configure_algo_params(self): aliases = {'symmetric': 'performance', 'asymmetric': 'accuracy'} preset = aliases.get(preset, preset) presets_aliases_by_device = { - 'VPU': {'accuracy': 'accuracy'}, + 'NPU': {'accuracy': 'accuracy'}, 'GNA': {'accuracy': 'accuracy', 'mixed': 'accuracy'}, 'GNA3': {'accuracy': 'accuracy', 'mixed': 'accuracy'}, 'GNA3.5': {'accuracy': 'accuracy', 'mixed': 'accuracy'}, diff --git a/tools/pot/openvino/tools/pot/graph/vpu_patterns.py b/tools/pot/openvino/tools/pot/graph/npu_patterns.py similarity index 92% rename from tools/pot/openvino/tools/pot/graph/vpu_patterns.py rename to tools/pot/openvino/tools/pot/graph/npu_patterns.py index 5da9861a34ecbc..44a7012f19286d 100644 --- a/tools/pot/openvino/tools/pot/graph/vpu_patterns.py +++ b/tools/pot/openvino/tools/pot/graph/npu_patterns.py @@ -4,7 +4,7 @@ from openvino.tools.pot.graph.pattern_utils import get_clamp_mult_const_pattern, \ get_softmax_reshape_transpose_gather_matmul_pattern -def get_vpu_ignored_patterns(): +def get_npu_ignored_patterns(): return { 'blocks': [get_softmax_reshape_transpose_gather_matmul_pattern()], 'activations': [get_clamp_mult_const_pattern()], diff --git a/tools/pot/openvino/tools/pot/graph/utils.py b/tools/pot/openvino/tools/pot/graph/utils.py index 30844760d58348..1358c74ae9f2fc 100644 --- a/tools/pot/openvino/tools/pot/graph/utils.py +++ b/tools/pot/openvino/tools/pot/graph/utils.py @@ -10,7 +10,7 @@ from openvino.tools.pot.version import get_version from .cpu_patterns import get_cpu_ignored_patterns, get_cpu_spr_ignored_patterns from .gpu_patterns import get_gpu_ignored_patterns -from .vpu_patterns import get_vpu_ignored_patterns +from .npu_patterns import get_npu_ignored_patterns from .gna_patterns import get_gna_ignored_patterns, get_gna3_ignored_patterns from .special_operations import QUANTIZE_AGNOSTIC_OPERATIONS from .node_utils import get_all_node_outputs, get_input_shape @@ -19,7 +19,7 @@ 'ANY': get_cpu_ignored_patterns(), 'CPU': get_cpu_ignored_patterns(), 'GPU': get_gpu_ignored_patterns(), - 'VPU': get_vpu_ignored_patterns(), + 'NPU': get_npu_ignored_patterns(), 'GNA': get_gna_ignored_patterns(), 'GNA3': get_gna3_ignored_patterns(), 'GNA3.5': get_gna3_ignored_patterns(), diff --git a/tools/pot/tests/test_target_device.py b/tools/pot/tests/test_target_device.py index d42d294c8aedbc..2e6a8e429b1aa1 100644 --- a/tools/pot/tests/test_target_device.py +++ b/tools/pot/tests/test_target_device.py @@ -7,7 +7,7 @@ DEVICE = [ 'CPU', 'GPU', - 'VPU' + 'NPU' ]