From 9ab75685c0958aacaf28fedb3fc290b7600fe66d Mon Sep 17 00:00:00 2001 From: mingmingtasd Date: Wed, 3 Aug 2022 11:15:03 +0800 Subject: [PATCH] Delete the implementation of DML backend and disable CI --- .github/workflows/build_test_dml.yml | 123 - .github/workflows/build_test_node_dml.yml | 168 - .github/workflows/memory_leak_check_dml.yml | 60 - src/webnn/native/BUILD.gn | 15 +- src/webnn/native/dml/BackendDML.cpp | 51 - src/webnn/native/dml/BackendDML.h | 45 - src/webnn/native/dml/ContextDML.cpp | 34 - src/webnn/native/dml/ContextDML.h | 41 - src/webnn/native/dml/DMLUtils.cpp | 93 - src/webnn/native/dml/DMLUtils.h | 175 -- src/webnn/native/dml/GraphDML.cpp | 3035 ------------------- src/webnn/native/dml/GraphDML.h | 246 -- 12 files changed, 1 insertion(+), 4085 deletions(-) delete mode 100644 .github/workflows/build_test_dml.yml delete mode 100644 .github/workflows/build_test_node_dml.yml delete mode 100644 .github/workflows/memory_leak_check_dml.yml delete mode 100644 src/webnn/native/dml/BackendDML.cpp delete mode 100644 src/webnn/native/dml/BackendDML.h delete mode 100644 src/webnn/native/dml/ContextDML.cpp delete mode 100644 src/webnn/native/dml/ContextDML.h delete mode 100644 src/webnn/native/dml/DMLUtils.cpp delete mode 100644 src/webnn/native/dml/DMLUtils.h delete mode 100644 src/webnn/native/dml/GraphDML.cpp delete mode 100644 src/webnn/native/dml/GraphDML.h diff --git a/.github/workflows/build_test_dml.yml b/.github/workflows/build_test_dml.yml deleted file mode 100644 index 81da4670d..000000000 --- a/.github/workflows/build_test_dml.yml +++ /dev/null @@ -1,123 +0,0 @@ -name: DirectML backend (Windows) - -on: [push, pull_request] - -jobs: - - job: - - runs-on: windows-2019 - - steps: - - name: Git config - run: | - git config --global core.autocrlf false - git config --global core.eol lf - - - name: Install depot_tools - shell: cmd - run: | - git clone https://chromium.googlesource.com/chromium/tools/depot_tools.git ..\depot_tools - set "PATH=%CD%\..\depot_tools;%PATH%" - gclient - - - name: Set up Python 3.x - uses: actions/setup-python@v2 - with: - python-version: '3.x' - - - uses: actions/checkout@v2 - with: - ref: main - path: baseline - fetch-depth: 0 - - - name: Update DEPS for main branch - shell: pwsh - run: | - cd baseline - (Get-Content -path .\DEPS -Raw) -replace "'checkout_onnxruntime': True", "'checkout_onnxruntime': False" | Set-Content -path .\DEPS - (Get-Content -path .\DEPS -Raw) -replace "'checkout_samples': True", "'checkout_samples': False" | Set-Content -path .\DEPS - - - name: Sync code for main branch - shell: cmd - run: | - set "PATH=%CD%\..\depot_tools;%PATH%" - set "DEPOT_TOOLS_WIN_TOOLCHAIN=0" - cd baseline - copy scripts\standalone.gclient .gclient - gclient sync - - - name: Generate project for main branch - shell: cmd - run: | - set "PATH=%CD%\..\depot_tools;%PATH%" - set "DEPOT_TOOLS_WIN_TOOLCHAIN=0" - cd baseline - gn gen out\Release --args="webnn_enable_dml=true is_debug=false gpgmm_enable_device_checks=true" - - - name: Build for main branch - shell: cmd - run: | - set "PATH=%CD%\..\depot_tools;%PATH%" - set "DEPOT_TOOLS_WIN_TOOLCHAIN=0" - cd baseline - ninja -C out\Release - - - name: Test for main branch - shell: cmd - run: | - cd baseline - echo "Run End2End Tests..." - out\Release\webnn_end2end_tests.exe --gtest_output=json:${{ github.workspace }}\..\baseline_end2endtests.json - cd .. - rmdir /s /q baseline - - - uses: actions/checkout@v2 - with: - path: update - fetch-depth: 0 - - - name: Update DEPS for update branch - shell: pwsh - run: | - cd update - (Get-Content -path .\DEPS -Raw) -replace "'checkout_onnxruntime': True", "'checkout_onnxruntime': False" | Set-Content -path .\DEPS - (Get-Content -path .\DEPS -Raw) -replace "'checkout_samples': True", "'checkout_samples': False" | Set-Content -path .\DEPS - - - name: Sync latest code - shell: cmd - run: | - set "PATH=%CD%\..\depot_tools;%PATH%" - set "DEPOT_TOOLS_WIN_TOOLCHAIN=0" - cd update - copy scripts\standalone.gclient .gclient - gclient sync - - - name: Generate project for update branch - shell: cmd - run: | - set "PATH=%CD%\..\depot_tools;%PATH%" - set "DEPOT_TOOLS_WIN_TOOLCHAIN=0" - cd update - gn gen out\Release --args="webnn_enable_dml=true webnn_enable_wire=true is_debug=false gpgmm_enable_device_checks=true" - - - name: Build for update branch - shell: cmd - run: | - set "PATH=%CD%\..\depot_tools;%PATH%" - set "DEPOT_TOOLS_WIN_TOOLCHAIN=0" - cd update - ninja -C out\Release - - - name: Test for update branch - shell: cmd - run: | - cd update - echo "Run End2End Tests..." - out\Release\webnn_end2end_tests.exe --gtest_output=json:${{ github.workspace }}\..\update_end2endtests.json || true - - - name: Regression check - run: | - echo "Regression checking..." - python update\workflow_scripts\regression_check.py ${{ github.workspace }}\..\baseline_end2endtests.json ${{ github.workspace }}\..\update_end2endtests.json diff --git a/.github/workflows/build_test_node_dml.yml b/.github/workflows/build_test_node_dml.yml deleted file mode 100644 index f776879c1..000000000 --- a/.github/workflows/build_test_node_dml.yml +++ /dev/null @@ -1,168 +0,0 @@ -name: Node Binding (DirectML backend / Windows) - -on: [push, pull_request] - -jobs: - - job: - - runs-on: windows-2019 - - steps: - - name: Git config - run: | - git config --global core.autocrlf false - git config --global core.eol lf - - - name: Install depot_tools - shell: cmd - run: | - git clone https://chromium.googlesource.com/chromium/tools/depot_tools.git ..\depot_tools - set "PATH=%CD%\..\depot_tools;%PATH%" - gclient - - - name: Set up Python 3.x - uses: actions/setup-python@v2 - with: - python-version: '3.x' - - - uses: actions/setup-node@v2 - with: - node-version: '14' - - - uses: actions/checkout@v2 - with: - ref: main - path: baseline - fetch-depth: 0 - - - name: Update DEPS for main branch - shell: pwsh - run: | - cd baseline - (Get-Content -path .\DEPS -Raw) -replace "'checkout_onnxruntime': True", "'checkout_onnxruntime': False" | Set-Content -path .\DEPS - (Get-Content -path .\DEPS -Raw) -replace "'checkout_samples': True", "'checkout_samples': False" | Set-Content -path .\DEPS - - - name: Sync code for main branch - shell: cmd - run: | - set "PATH=%CD%\..\depot_tools;%PATH%" - set "DEPOT_TOOLS_WIN_TOOLCHAIN=0" - cd baseline - copy scripts\standalone.gclient .gclient - gclient sync - - - name: Generate project for main branch - shell: cmd - run: | - set "PATH=%CD%\..\depot_tools;%PATH%" - set "DEPOT_TOOLS_WIN_TOOLCHAIN=0" - cd baseline - gn gen out\Release --args="webnn_enable_dml=true is_debug=false gpgmm_enable_device_checks=true" - - - name: Build for main branch - shell: cmd - run: | - set "PATH=%CD%\..\depot_tools;%PATH%" - set "DEPOT_TOOLS_WIN_TOOLCHAIN=0" - cd baseline - ninja -C out\Release - - - name: Run 'npm install' command under node folder of main branch - shell: cmd - run: | - set "PATH=%CD%\..\depot_tools;%CD%\baseline\out\Release;%PATH%" - cd baseline\node - npm install --webnn_native_lib_path="../out/Release" - - - name: Run 'npm run build' command under node folder of main branch - shell: cmd - run: | - set "PATH=%CD%\..\depot_tools;%CD%\baseline\out\Release;%PATH%" - cd baseline\node - npm run build --webnn_native_lib_path="../out/Release" - - - name: Run 'npm run report' command under node folder of main branch - shell: cmd - run: | - set "PATH=%CD%\baseline\out\Release;%PATH%" - cd baseline\node - npm run report || true - - - name: Prepare baseline result file for regression checking - shell: cmd - run: | - echo "Baseline node test result:" - type baseline\node\result.xml - copy baseline\node\result.xml ${{ github.workspace }}\..\baseline.xml - rmdir /s /q baseline - - - uses: actions/checkout@v2 - with: - path: update - fetch-depth: 0 - - - name: Update DEPS for update branch - shell: pwsh - run: | - cd update - (Get-Content -path .\DEPS -Raw) -replace "'checkout_onnxruntime': True", "'checkout_onnxruntime': False" | Set-Content -path .\DEPS - (Get-Content -path .\DEPS -Raw) -replace "'checkout_samples': True", "'checkout_samples': False" | Set-Content -path .\DEPS - - - name: Sync latest code - shell: cmd - run: | - set "PATH=%CD%\..\depot_tools;%PATH%" - set "DEPOT_TOOLS_WIN_TOOLCHAIN=0" - cd update - copy scripts\standalone.gclient .gclient - gclient sync - - - name: Generate project for update branch - shell: cmd - run: | - set "PATH=%CD%\..\depot_tools;%PATH%" - set "DEPOT_TOOLS_WIN_TOOLCHAIN=0" - cd update - gn gen out\Release --args="webnn_enable_dml=true is_debug=false gpgmm_enable_device_checks=true" - - - name: Build for update branch - shell: cmd - run: | - set "PATH=%CD%\..\depot_tools;%PATH%" - set "DEPOT_TOOLS_WIN_TOOLCHAIN=0" - cd update - ninja -C out\Release - - - name: Run 'npm install' command under node folder of update branch - shell: cmd - run: | - set "PATH=%CD%\..\depot_tools;%CD%\update\out\Release;%PATH%" - cd update\node - npm install --webnn_native_lib_path="../out/Release" - - - name: Run 'npm run build' command under node folder of update branch - shell: cmd - run: | - set "PATH=%CD%\..\depot_tools;%CD%\update\out\Release;%PATH%" - cd update\node - npm run build --webnn_native_lib_path="../out/Release" - - - name: Run 'npm run report' command under node folder of update branch - shell: cmd - run: | - set "PATH=%CD%\update\out\Release;%PATH%" - cd update\node - npm run report || true - - - name: Prepare latest result file for regression checking - shell: cmd - run: | - echo "Latest node test result:" - type update\node\result.xml - copy update\node\result.xml ${{ github.workspace }}\..\update.xml - - - name: Regression check - run: | - echo "Regression checking..." - python update\workflow_scripts\regression_check.py ${{ github.workspace }}\..\baseline.xml ${{ github.workspace }}\..\update.xml \ No newline at end of file diff --git a/.github/workflows/memory_leak_check_dml.yml b/.github/workflows/memory_leak_check_dml.yml deleted file mode 100644 index 6ae2db549..000000000 --- a/.github/workflows/memory_leak_check_dml.yml +++ /dev/null @@ -1,60 +0,0 @@ -name: Check memory leak for DirectML backend (Windows) - -on: [push, pull_request] - -jobs: - - job: - - runs-on: windows-2019 - - steps: - - name: Git config - run: | - git config --global core.autocrlf false - git config --global core.eol lf - - name: Install depot_tools - shell: cmd - run: | - git clone https://chromium.googlesource.com/chromium/tools/depot_tools.git ..\depot_tools - set "PATH=%CD%\..\depot_tools;%PATH%" - gclient - - name: Set up Python 3.x - uses: actions/setup-python@v2 - with: - python-version: '3.x' - - - uses: actions/checkout@v2 - with: - path: update - fetch-depth: 0 - - - name: Sync latest code - shell: cmd - run: | - set "PATH=%CD%\..\depot_tools;%PATH%" - set "DEPOT_TOOLS_WIN_TOOLCHAIN=0" - cd update - copy scripts\standalone.gclient .gclient - gclient sync - - name: Generate project for update branch - shell: cmd - run: | - set "PATH=%CD%\..\depot_tools;%PATH%" - set "DEPOT_TOOLS_WIN_TOOLCHAIN=0" - cd update - gn gen out\Debug --args="webnn_enable_dml=true is_debug=true" - - name: Build for update branch - shell: cmd - run: | - set "PATH=%CD%\..\depot_tools;%PATH%" - set "DEPOT_TOOLS_WIN_TOOLCHAIN=0" - cd update - ninja -C out\Debug - - name: Check memory leak for update branch - shell: cmd - run: | - cd update - echo "Run End2End Tests..." - out\Debug\webnn_end2end_tests.exe --gtest_filter=-GemmTests.ScalarBias:Pool2dTests.MaxPool2dDilationsDefault:Pool2dTests.MaxPool2dDilationsNhwc:Pool2dTests.*Pool2dAutoPadExplicitOutputSizes4x4Nhwc:Pool2dTests.*Pool2dAutoPadExplicitRoundingTypeCeilNhwc > update_end2endtests.txt || true - python workflow_scripts\memory_leak_check.py update_end2endtests.txt \ No newline at end of file diff --git a/src/webnn/native/BUILD.gn b/src/webnn/native/BUILD.gn index c24d5bae7..5e682b374 100644 --- a/src/webnn/native/BUILD.gn +++ b/src/webnn/native/BUILD.gn @@ -210,19 +210,6 @@ source_set("sources") { } } - if (webnn_enable_dml) { - sources += [ - "dml/BackendDML.cpp", - "dml/BackendDML.h", - "dml/ContextDML.cpp", - "dml/ContextDML.h", - "dml/GraphDML.cpp", - "dml/GraphDML.h", - "dml/DMLUtils.cpp", - "dml/DMLUtils.h", - ] - } - if (webnn_enable_dmlx) { if (webnn_enable_gpu_buffer == false) { sources += [ @@ -488,7 +475,7 @@ if (webnn_enable_dml || webnn_enable_dmlx) { dml_dll_path = "${webnn_root}/third_party/microsoft.ai.directml.1.8.2/bin/${os_folder}" copy("copy_dml_dll") { - sources = [ "${dml_dll_path}/DirectML.dll"] + sources = [ "${dml_dll_path}/DirectML.dll" ] if (is_debug) { sources += [ "${dml_dll_path}/DirectML.Debug.dll" ] } diff --git a/src/webnn/native/dml/BackendDML.cpp b/src/webnn/native/dml/BackendDML.cpp deleted file mode 100644 index ec4bee493..000000000 --- a/src/webnn/native/dml/BackendDML.cpp +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2019 The Dawn Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "webnn/native/dml/BackendDML.h" - -#include "webnn/native/Instance.h" -#include "webnn/native/dml/ContextDML.h" - -namespace webnn::native::dml { - - Backend::Backend(InstanceBase* instance) - : BackendConnection(instance, wnn::BackendType::DirectML) { - } - - MaybeError Backend::Initialize() { - return {}; - } - - ContextBase* Backend::CreateContext(ContextOptions const* options) { - return new Context(options); - } - -#if defined(WEBNN_ENABLE_GPU_BUFFER) - ContextBase* Backend::CreateContextWithGpuDevice(WGPUDevice device) { - return new Context(device); - } -#endif - - BackendConnection* Connect(InstanceBase* instance) { - Backend* backend = new Backend(instance); - - if (instance->ConsumedError(backend->Initialize())) { - delete backend; - return nullptr; - } - - return backend; - } - -} // namespace webnn::native::dml diff --git a/src/webnn/native/dml/BackendDML.h b/src/webnn/native/dml/BackendDML.h deleted file mode 100644 index 79804da85..000000000 --- a/src/webnn/native/dml/BackendDML.h +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2019 The Dawn Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef WEBNN_NATIVE_DML_BACKENDDML_H_ -#define WEBNN_NATIVE_DML_BACKENDDML_H_ - -#include "webnn/native/BackendConnection.h" -#include "webnn/native/Context.h" -#include "webnn/native/Error.h" - -#if defined(WEBNN_ENABLE_GPU_BUFFER) -# include -#endif -#include - -namespace webnn::native::dml { - - class Backend : public BackendConnection { - public: - Backend(InstanceBase* instance); - - MaybeError Initialize(); - ContextBase* CreateContext(ContextOptions const* options = nullptr) override; - -#if defined(WEBNN_ENABLE_GPU_BUFFER) - ContextBase* CreateContextWithGpuDevice(WGPUDevice device) override; -#endif - - private: - }; - -} // namespace webnn::native::dml - -#endif // WEBNN_NATIVE_DML_BACKENDDML_H_ diff --git a/src/webnn/native/dml/ContextDML.cpp b/src/webnn/native/dml/ContextDML.cpp deleted file mode 100644 index ca070627c..000000000 --- a/src/webnn/native/dml/ContextDML.cpp +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2021 The WebNN-native Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "webnn/native/dml/ContextDML.h" - -#include "common/RefCounted.h" -#include "webnn/native/dml/GraphDML.h" - -namespace webnn::native::dml { - - Context::Context(ContextOptions const* options) : ContextBase(options) { - } - -#if defined(WEBNN_ENABLE_GPU_BUFFER) - Context::Context(WGPUDevice device) : ContextBase(device) { - } -#endif - - GraphBase* Context::CreateGraphImpl() { - return new Graph(this); - } - -} // namespace webnn::native::dml diff --git a/src/webnn/native/dml/ContextDML.h b/src/webnn/native/dml/ContextDML.h deleted file mode 100644 index 05578b5dd..000000000 --- a/src/webnn/native/dml/ContextDML.h +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2021 The WebNN-native Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef WEBNN_NATIVE_DML_CONTEXT_DML_H_ -#define WEBNN_NATIVE_DML_CONTEXT_DML_H_ - -#include "webnn/native/Context.h" -#include "webnn/native/Graph.h" - -#if defined(WEBNN_ENABLE_GPU_BUFFER) -# include -#endif - -namespace webnn::native::dml { - - class Context : public ContextBase { - public: - explicit Context(ContextOptions const* options); -#if defined(WEBNN_ENABLE_GPU_BUFFER) - explicit Context(WGPUDevice device); -#endif - ~Context() override = default; - - private: - GraphBase* CreateGraphImpl() override; - }; - -} // namespace webnn::native::dml - -#endif // WEBNN_NATIVE_DML_CONTEXT_DML_H_ diff --git a/src/webnn/native/dml/DMLUtils.cpp b/src/webnn/native/dml/DMLUtils.cpp deleted file mode 100644 index 29dfa0ce3..000000000 --- a/src/webnn/native/dml/DMLUtils.cpp +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2021 The WebNN-native Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "DMLUtils.h" - -namespace webnn::native::dml { - - bool IsWarpAdapter(IDXGIAdapter1* pAdapter) { - DXGI_ADAPTER_DESC1 pDesc; - WEBNN_CHECK(pAdapter->GetDesc1(&pDesc)); - // See here for documentation on filtering WARP adapter: - // https://docs.microsoft.com/en-us/windows/desktop/direct3ddxgi/d3d10-graphics-programming-guide-dxgi#new-info-about-enumerating-adapters-for-windows-8 - auto isBasicRenderDriverVendorId = pDesc.VendorId == 0x1414; - auto isBasicRenderDriverDeviceId = pDesc.DeviceId == 0x8c; - auto isSoftwareAdapter = pDesc.Flags == DXGI_ADAPTER_FLAG_SOFTWARE; - return isSoftwareAdapter || (isBasicRenderDriverVendorId && isBasicRenderDriverDeviceId); - } - - void InitD3D12(ComPtr& commandList, - ComPtr& commandQueue, - ComPtr& commandAllocator, - ComPtr& D3D12Device, - DXGI_GPU_PREFERENCE gpuPreference, - bool useGpu) { -#if defined(_DEBUG) - ComPtr debug; - if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&debug)))) { - debug->EnableDebugLayer(); - } -#endif - ComPtr dxgiAdapter; - if (useGpu) { - ComPtr dxgiFactory; - WEBNN_CHECK(CreateDXGIFactory1(IID_PPV_ARGS(&dxgiFactory))); - UINT i = 0; - while (dxgiFactory->EnumAdapterByGpuPreference( - i++, gpuPreference, IID_PPV_ARGS(&dxgiAdapter)) != DXGI_ERROR_NOT_FOUND) { - if (!IsWarpAdapter(dxgiAdapter.Get())) { - break; - } - } - } - if (!useGpu || FAILED(D3D12CreateDevice(dxgiAdapter.Get(), D3D_FEATURE_LEVEL_11_0, - IID_PPV_ARGS(&D3D12Device)))) { - // If a computer's display driver is not functioning or is disabled, the computer's - // primary (NULL) adapter might also be called "Microsoft Basic Render Driver." - ComPtr dxgiFactory; - WEBNN_CHECK(CreateDXGIFactory1(IID_PPV_ARGS(&dxgiFactory))); - WEBNN_CHECK(dxgiFactory->EnumWarpAdapter(IID_PPV_ARGS(&dxgiAdapter))); - WEBNN_CHECK(D3D12CreateDevice(dxgiAdapter.Get(), D3D_FEATURE_LEVEL_11_0, - IID_PPV_ARGS(&D3D12Device))); - } - - D3D12_COMMAND_QUEUE_DESC commandQueueDesc{}; - commandQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; - commandQueueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; - WEBNN_CHECK( - D3D12Device->CreateCommandQueue(&commandQueueDesc, IID_PPV_ARGS(&commandQueue))); - WEBNN_CHECK(D3D12Device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, - IID_PPV_ARGS(&commandAllocator))); - WEBNN_CHECK(D3D12Device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, - commandAllocator.Get(), nullptr, - IID_PPV_ARGS(&commandList))); - } - - void CloseExecuteResetWait(ComPtr commandList, - ComPtr commandQueue, - ComPtr commandAllocator, - ComPtr D3D12Device) { - WEBNN_CHECK(commandList->Close()); - ID3D12CommandList* commandLists[] = {commandList.Get()}; - commandQueue->ExecuteCommandLists(ARRAYSIZE(commandLists), commandLists); - WEBNN_CHECK(commandQueue.Get()->GetDevice(IID_PPV_ARGS(D3D12Device.GetAddressOf()))); - ComPtr fence; - WEBNN_CHECK( - D3D12Device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(fence.GetAddressOf()))); - WEBNN_CHECK(commandQueue.Get()->Signal(fence.Get(), 1)); - WEBNN_CHECK(fence->SetEventOnCompletion(1, nullptr)); - WEBNN_CHECK(commandAllocator->Reset()); - WEBNN_CHECK(commandList->Reset(commandAllocator.Get(), nullptr)); - } -} // namespace webnn::native::dml \ No newline at end of file diff --git a/src/webnn/native/dml/DMLUtils.h b/src/webnn/native/dml/DMLUtils.h deleted file mode 100644 index 6c24ac9f3..000000000 --- a/src/webnn/native/dml/DMLUtils.h +++ /dev/null @@ -1,175 +0,0 @@ -// Copyright 2021 The WebNN-native Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef WEBNN_NATIVE_DMLUTILS_H_ -#define WEBNN_NATIVE_DMLUTILS_H_ - -#define DML_TARGET_VERSION_USE_LATEST 1 - -#include -#include -#include -#include -#include - -#include "DirectML.h" -#include "common/Assert.h" -#include "common/Log.h" - -namespace webnn::native::dml { -#define WEBNN_CHECK(hr) \ - if (((HRESULT)(hr)) < 0) { \ - dawn::ErrorLog() << "Failed to do " << #hr; \ - DAWN_ASSERT(0); \ - } - - using namespace Microsoft::WRL; - - // Represent the DirectML tensor description. - struct DmlTensorDesc { - std::vector dimensions = {}; - std::vector strides = {}; - // Describes a tensor that will be stored in a Direct3D 12 buffer resource. - DML_BUFFER_TENSOR_DESC bufferDesc = {}; - }; - - // Represent the information of the graph's edges. - struct EdgeInfoBase { - virtual ~EdgeInfoBase() = default; - DML_TENSOR_DESC outputTensorDESC = {}; - std::string name = ""; - bool isInputEdge = false; - }; - - // Only represent the information of the input edges. - struct InputEdgeInfo final : public EdgeInfoBase { - ~InputEdgeInfo() override = default; - // Indicate the index of the graph's input. - size_t inputIndex = 0; - void const* buffer = nullptr; - size_t byteLength = 0; - // Indicate if the input is from constant buffer which need to be - // uploaded in the stage of initialization. - bool isConstantInput = false; - }; - - // Represent the information of the intermediate edges and output edges. - struct EdgeInfo final : public EdgeInfoBase { - ~EdgeInfo() override = default; - // Indicate the index of the intermediate node from which this edge was produced. - uint32_t nodeIndex = 0; - // Indicate the index of the intermediate node' output from which this edge was produced. - uint32_t outputNodeIndex = 0; - }; - - // Describe a graph of DirectML operators used to compile a combined, optimized operator. - class DmlGraphDesc { - public: - void AddInputEdge(std::unique_ptr& inputEdgeDesc) { - mInputEdges.push_back({DML_GRAPH_EDGE_TYPE_INPUT, inputEdgeDesc.get()}); - mInputEdgesDesc.push_back(std::move(inputEdgeDesc)); - }; - void AddIntermediateEdge( - std::unique_ptr& intermediateEdgeDesc) { - mIntermediateEdges.push_back( - {DML_GRAPH_EDGE_TYPE_INTERMEDIATE, intermediateEdgeDesc.get()}); - mIntermediateEdgesDesc.push_back(std::move(intermediateEdgeDesc)); - }; - void AddOutputEdge(std::unique_ptr& outputEdgeDesc) { - mOutputEdges.push_back({DML_GRAPH_EDGE_TYPE_OUTPUT, outputEdgeDesc.get()}); - mOutputEdgesDesc.push_back(std::move(outputEdgeDesc)); - }; - void AddIntermediateNode(ComPtr dmlOperator) { - mIntermediateNodesMap[mIntermediateNodes.size()] = dmlOperator; - std::unique_ptr nodeDesc( - new DML_OPERATOR_GRAPH_NODE_DESC); - nodeDesc->Operator = mIntermediateNodesMap[mIntermediateNodes.size()].Get(); - mIntermediateNodes.push_back({DML_GRAPH_NODE_TYPE_OPERATOR, nodeDesc.get()}); - mIntermediateNodesDesc.push_back(std::move(nodeDesc)); - } - size_t NodeCount() { - return mIntermediateNodes.size(); - }; - - DML_GRAPH_DESC ConvertDmlGraphDesc(size_t inputCount, size_t outputCount) { - DML_GRAPH_DESC graphDesc = {}; - graphDesc.NodeCount = static_cast(mIntermediateNodes.size()); - graphDesc.Nodes = mIntermediateNodes.data(); - graphDesc.InputEdgeCount = static_cast(mInputEdges.size()); - graphDesc.InputEdges = mInputEdges.data(); - graphDesc.OutputEdgeCount = static_cast(mOutputEdges.size()); - graphDesc.OutputEdges = mOutputEdges.data(); - graphDesc.IntermediateEdgeCount = static_cast(mIntermediateEdges.size()); - graphDesc.IntermediateEdges = mIntermediateEdges.data(); - graphDesc.InputCount = static_cast(inputCount); - graphDesc.OutputCount = static_cast(outputCount); - return graphDesc; - }; - - private: - std::vector mIntermediateNodes; - std::vector mInputEdges; - std::vector mOutputEdges; - std::vector mIntermediateEdges; - - // Keep intermediate nodes here to avoid releasing too early. - std::map> mIntermediateNodesMap; - // Keep the descriptions of nodes and edges here to avoid releasing too early. - std::vector> mIntermediateNodesDesc; - std::vector> mInputEdgesDesc; - std::vector> mOutputEdgesDesc; - std::vector> mIntermediateEdgesDesc; - }; - - inline D3D12_HEAP_PROPERTIES CreateHeapProperties( - D3D12_HEAP_TYPE type = D3D12_HEAP_TYPE_DEFAULT) { - return {type, D3D12_CPU_PAGE_PROPERTY_UNKNOWN, D3D12_MEMORY_POOL_UNKNOWN, 1, 1}; - }; - - inline D3D12_RESOURCE_DESC CreateResourceDesc( - UINT64 width, - D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE) { - return {D3D12_RESOURCE_DIMENSION_BUFFER, 0, width, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0}, - D3D12_TEXTURE_LAYOUT_ROW_MAJOR, flags}; - }; - - template - T RoundUpToMultiple(T value, T multiple) { - static_assert(std::is_integral_v); - - T remainder = value % multiple; - if (remainder != 0) { - value += multiple - remainder; - } - - return value; - } - - // An adapter called the "Microsoft Basic Render Driver" is always present. This adapter is a - // render-only device that has no display outputs. - HRESULT IsWarpAdapter(IDXGIAdapter1* pAdapter, bool* isWarpAdapter); - - void InitD3D12(ComPtr& commandList, - ComPtr& commandQueue, - ComPtr& commandAllocator, - ComPtr& D3D12Device, - DXGI_GPU_PREFERENCE gpuPreference, - bool useGpu); - - void CloseExecuteResetWait(ComPtr commandList, - ComPtr commandQueue, - ComPtr commandAllocator, - ComPtr D3D12Device); -} // namespace webnn::native::dml -#endif // WEBNN_NATIVE_DML_UTILS_H_ \ No newline at end of file diff --git a/src/webnn/native/dml/GraphDML.cpp b/src/webnn/native/dml/GraphDML.cpp deleted file mode 100644 index 1ed0a9a92..000000000 --- a/src/webnn/native/dml/GraphDML.cpp +++ /dev/null @@ -1,3035 +0,0 @@ -// Copyright 2021 The WebNN-native Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "webnn/native/dml/GraphDML.h" - -#include - -#include "webnn/native/NamedInputs.h" -#include "webnn/native/NamedOutputs.h" -#include "webnn/native/Utils.h" - -namespace webnn::native ::dml { - -#define CREATE_OPERATOR(type, dmlSpecificOperatorDesc) \ - DML_OPERATOR_DESC dmlOperatorDesc = {}; \ - dmlOperatorDesc.Type = DML_OPERATOR_##type; \ - dmlOperatorDesc.Desc = &dmlSpecificOperatorDesc; \ - WEBNN_CHECK(mDevice->CreateOperator(&dmlOperatorDesc, IID_PPV_ARGS(&dmlOperator))); - -#define CREATE_BINARY_OPERATOR(type, aTensorDesc, bTensorDesc, outputTensorDesc, dmlOperator) \ - DML_ELEMENT_WISE_##type##_OPERATOR_DESC dmlSpecificOperatorDesc{}; \ - dmlSpecificOperatorDesc.ATensor = &aTensorDesc; \ - dmlSpecificOperatorDesc.BTensor = &bTensorDesc; \ - dmlSpecificOperatorDesc.OutputTensor = &outputTensorDesc; \ - CREATE_OPERATOR(ELEMENT_WISE_##type, dmlSpecificOperatorDesc) - -#define CREATE_UNARY_OPERATOR(type, inputTensorDesc, dmlOperator) \ - DML_##type##_OPERATOR_DESC dmlSpecificOperatorDesc{}; \ - dmlSpecificOperatorDesc.InputTensor = &inputTensorDesc; \ - dmlSpecificOperatorDesc.OutputTensor = &inputTensorDesc; \ - CREATE_OPERATOR(type, dmlSpecificOperatorDesc) - - // Append IDENTITY to remove the strides of input tensor. Use this to implement Reshape, - // Squeeze, Transpose and avoid creating an invaild graph with input = output. - void Graph::AppendIdentity(const DML_TENSOR_DESC& inputTensorDesc, - DML_TENSOR_DESC& outputTensorDesc, - ComPtr& dmlOperator) { - DML_ACTIVATION_IDENTITY_OPERATOR_DESC dmlSpecificOperatorDesc{}; - dmlSpecificOperatorDesc.InputTensor = &inputTensorDesc; - dmlSpecificOperatorDesc.OutputTensor = &outputTensorDesc; - DML_OPERATOR_DESC dmlOperatorDesc = {}; - dmlOperatorDesc.Type = DML_OPERATOR_ACTIVATION_IDENTITY; - dmlOperatorDesc.Desc = &dmlSpecificOperatorDesc; - WEBNN_CHECK(mDevice->CreateOperator(&dmlOperatorDesc, IID_PPV_ARGS(&dmlOperator))); - } - - void CopyBufferRegion(ComPtr commandList, - ComPtr srcResource, - ComPtr destResource, - UINT64 resourceSize, - D3D12_RESOURCE_STATES state, - bool needBarrierEnd = true) { - D3D12_RESOURCE_BARRIER resourceBarrier; - if (state == D3D12_RESOURCE_STATE_COPY_DEST) { - resourceBarrier.Transition.pResource = destResource.Get(); - } else if (state == D3D12_RESOURCE_STATE_COPY_SOURCE) { - resourceBarrier.Transition.pResource = srcResource.Get(); - } else { - dawn::ErrorLog() << "Unsupported D3D12_RESOURCE_STATES."; - DAWN_ASSERT(0); - } - resourceBarrier.Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - resourceBarrier.Transition.StateAfter = state; - resourceBarrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - resourceBarrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - resourceBarrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - commandList->ResourceBarrier(1, &resourceBarrier); - commandList->CopyBufferRegion(destResource.Get(), 0, srcResource.Get(), 0, resourceSize); - if (needBarrierEnd) { - resourceBarrier.Transition.StateBefore = state; - resourceBarrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - commandList->ResourceBarrier(1, &resourceBarrier); - } - } - - // Strides are used to express broadcasting (by specifying a stride of 0) as well as - // padding. If Strides is not specified, each dimension in the tensor is considered to - // be contiguously packed, with no additional padding. The calculated strides refer to - // https://docs.microsoft.com/en-us/windows/win32/direct3d12/dml-helper-functions#calculatestrides - std::vector CalculateStridesForBroadcast(std::vector originDims, - std::vector broadcastedDims, - const DML_TENSOR_DESC& inputTensorDesc, - size_t skipAxes = 0) { - auto originRank = originDims.size(), broadcastedRank = broadcastedDims.size(); - if (originRank < skipAxes || originRank > broadcastedRank) { - dawn::ErrorLog() << "Shapes are incompatible, broadcasting failed."; - DAWN_ASSERT(0); - } - std::vector broadcastFlags(broadcastedRank, false); - auto rankGap = broadcastedRank - originRank; - for (size_t i = 0; i < rankGap; ++i) { - broadcastFlags[i] = true; - } - for (size_t i = 0; i < originRank - skipAxes; ++i) { - if (originDims[i] == 1 && broadcastedDims[rankGap + i] != 1) { - broadcastFlags[rankGap + i] = true; - } - } - - for (size_t i = 0; i < broadcastedRank; ++i) { - if (broadcastFlags[i]) { - broadcastedDims[i] = 1; - } - } - std::vector strides(broadcastedRank); - - const DML_BUFFER_TENSOR_DESC* bufferDesc = - reinterpret_cast(inputTensorDesc.Desc); - DAWN_ASSERT(bufferDesc != nullptr && broadcastedRank >= bufferDesc->DimensionCount); - auto existedStrides = bufferDesc->Strides; - if (existedStrides != nullptr) { - auto indexBegin = broadcastedRank - bufferDesc->DimensionCount; - for (size_t i = 0, j = 0; i < broadcastedRank; ++i) { - if (i < indexBegin) { - strides[i] = 0; - } else { - strides[i] = broadcastFlags[i] ? 0 : existedStrides[j]; - ++j; - } - } - } else { - strides[broadcastedRank - 1] = broadcastFlags[broadcastedRank - 1] ? 0 : 1; - size_t elements = 1; - for (size_t i = 1; i < broadcastedRank; i++) { - size_t j = broadcastedRank - i - 1; - elements *= broadcastedDims[j + 1]; - strides[j] = broadcastFlags[j] ? 0 : elements; - } - } - return strides; - } - - uint32_t SizeOfShape(const std::vector& dims) { - uint32_t prod = 1; - for (size_t i = 0; i < dims.size(); ++i) - prod *= dims[i]; - return prod; - } - - std::vector ConvertDimensions(const std::vector& dimensions) { - std::vector convertedDimensions; - for (auto dim : dimensions) { - if (dim < 0) { - dawn::ErrorLog() << "DML doesn't support the negative dimension value"; - DAWN_ASSERT(0); - } - convertedDimensions.push_back(dim); - } - return convertedDimensions; - } - - std::vector ExpandDimensions(const std::vector& dims, size_t rank) { - DAWN_ASSERT(rank >= dims.size()); - std::vector newDims(rank, 1); - for (size_t i = 0; i < dims.size(); ++i) { - newDims[newDims.size() - i - 1] = dims[dims.size() - i - 1]; - } - return newDims; - } - - enum TransposeType { NhwcToNchw, NchwToNhwc }; - - std::vector transposeStrides(TransposeType transposeType, - const std::vector& inputDims) { - UINT nStride = 0, cStride = 0, hStride = 0, wStride = 0; - switch (transposeType) { - case NhwcToNchw: - nStride = inputDims[1] * inputDims[2] * inputDims[3]; - hStride = inputDims[2] * inputDims[3]; - wStride = inputDims[3]; - cStride = 1; - return {nStride, cStride, hStride, wStride}; - case NchwToNhwc: - nStride = inputDims[1] * inputDims[2] * inputDims[3]; - cStride = inputDims[2] * inputDims[3]; - hStride = inputDims[3]; - wStride = 1; - return {nStride, hStride, wStride, cStride}; - default: - DAWN_ASSERT(0); - break; - } - } - - std::vector transposeDimensions(TransposeType transposeType, - const std::vector& inputDims) { - std::vector newInputDims(4); - switch (transposeType) { - case NhwcToNchw: - newInputDims[0] = inputDims[0]; - newInputDims[1] = inputDims[3]; - newInputDims[2] = inputDims[1]; - newInputDims[3] = inputDims[2]; - break; - case NchwToNhwc: - newInputDims[0] = inputDims[0]; - newInputDims[1] = inputDims[2]; - newInputDims[2] = inputDims[3]; - newInputDims[3] = inputDims[1]; - break; - default: - DAWN_ASSERT(0); - break; - } - return newInputDims; - } - - std::vector transposeFilterDimensionsAsOihw(wnn::Conv2dFilterOperandLayout filterLayout, - const std::vector& filterDims) { - std::vector newFilterDims(4); - switch (filterLayout) { - case wnn::Conv2dFilterOperandLayout::Ohwi: - newFilterDims[0] = filterDims[0]; - newFilterDims[1] = filterDims[3]; - newFilterDims[2] = filterDims[1]; - newFilterDims[3] = filterDims[2]; - break; - case wnn::Conv2dFilterOperandLayout::Hwio: - newFilterDims[0] = filterDims[3]; - newFilterDims[1] = filterDims[2]; - newFilterDims[2] = filterDims[0]; - newFilterDims[3] = filterDims[1]; - break; - case wnn::Conv2dFilterOperandLayout::Ihwo: - newFilterDims[0] = filterDims[3]; - newFilterDims[1] = filterDims[0]; - newFilterDims[2] = filterDims[1]; - newFilterDims[3] = filterDims[2]; - break; - default: - DAWN_ASSERT(0); - break; - } - return newFilterDims; - } - - std::vector transposeFilterDimensionsAsIohw( - wnn::ConvTranspose2dFilterOperandLayout filterLayout, - const std::vector& filterDims) { - std::vector newFilterDims(4); - switch (filterLayout) { - case wnn::ConvTranspose2dFilterOperandLayout::Hwoi: - newFilterDims[0] = filterDims[3]; - newFilterDims[1] = filterDims[2]; - newFilterDims[2] = filterDims[0]; - newFilterDims[3] = filterDims[1]; - break; - case wnn::ConvTranspose2dFilterOperandLayout::Ohwi: - newFilterDims[0] = filterDims[3]; - newFilterDims[1] = filterDims[0]; - newFilterDims[2] = filterDims[1]; - newFilterDims[3] = filterDims[2]; - break; - default: - DAWN_ASSERT(0); - break; - } - return newFilterDims; - } - - std::vector transposeFilterStridesAsOihw(wnn::Conv2dFilterOperandLayout filterLayout, - const std::vector& filterDims) { - UINT hStride = 0, wStride = 0, iStride = 0, oStride = 0; - switch (filterLayout) { - case wnn::Conv2dFilterOperandLayout::Hwio: - hStride = filterDims[1] * filterDims[2] * filterDims[3]; - wStride = filterDims[2] * filterDims[3]; - iStride = filterDims[3]; - oStride = 1; - break; - case wnn::Conv2dFilterOperandLayout::Ohwi: - oStride = filterDims[1] * filterDims[2] * filterDims[3]; - hStride = filterDims[2] * filterDims[3]; - wStride = filterDims[3]; - iStride = 1; - break; - case wnn::Conv2dFilterOperandLayout::Ihwo: - iStride = filterDims[1] * filterDims[2] * filterDims[3]; - hStride = filterDims[2] * filterDims[3]; - wStride = filterDims[3]; - oStride = 1; - break; - default: - DAWN_ASSERT(0); - break; - } - return {oStride, iStride, hStride, wStride}; - } - - std::vector transposeFilterStridesAsIohw( - wnn::ConvTranspose2dFilterOperandLayout filterLayout, - const std::vector& filterDims) { - UINT hStride = 0, wStride = 0, iStride = 0, oStride = 0; - switch (filterLayout) { - case wnn::ConvTranspose2dFilterOperandLayout::Hwoi: - hStride = filterDims[1] * filterDims[2] * filterDims[3]; - wStride = filterDims[2] * filterDims[3]; - oStride = filterDims[3]; - iStride = 1; - break; - case wnn::ConvTranspose2dFilterOperandLayout::Ohwi: - oStride = filterDims[1] * filterDims[2] * filterDims[3]; - hStride = filterDims[2] * filterDims[3]; - wStride = filterDims[3]; - iStride = 1; - break; - default: - DAWN_ASSERT(0); - break; - } - return {iStride, oStride, hStride, wStride}; - } - - template - std::vector ImplicitPadding(const T* options, - const std::vector& inputDims, - const std::vector& filterDims) { - return webnn::native::utils::ComputeImplicitPaddingForAutoPad( - options, {inputDims[2], inputDims[3]}, - {filterDims[filterDims.size() - 2], filterDims[filterDims.size() - 1]}); - } - - template - std::vector ExplicitPadding(const T* options) { - UINT paddingTop = static_cast(options->padding[0]); - UINT paddingBottom = static_cast(options->padding[1]); - UINT paddingLeft = static_cast(options->padding[2]); - UINT paddingRight = static_cast(options->padding[3]); - - return {paddingTop, paddingBottom, paddingLeft, paddingRight}; - } - - DML_RECURRENT_NETWORK_DIRECTION getRecurrentSequenceDirection( - wnn::RecurrentNetworkDirection direction) { - switch (direction) { - case wnn::RecurrentNetworkDirection::Forward: - return DML_RECURRENT_NETWORK_DIRECTION_FORWARD; - case wnn::RecurrentNetworkDirection::Backward: - return DML_RECURRENT_NETWORK_DIRECTION_BACKWARD; - case wnn::RecurrentNetworkDirection::Both: - return DML_RECURRENT_NETWORK_DIRECTION_BIDIRECTIONAL; - default: - dawn::ErrorLog() << "This direction type is not supported"; - DAWN_ASSERT(0); - } - } - - bool CreateDmlTensorDesc(std::vector>& dmlTensorsDesc, - const std::shared_ptr& dmlTensorDesc, - const std::vector& dimensions, - const std::vector& strides = {}, - DML_TENSOR_DATA_TYPE dataType = DML_TENSOR_DATA_TYPE_FLOAT32, - DML_TENSOR_FLAGS tensorFlag = DML_TENSOR_FLAG_NONE) { - dmlTensorDesc->dimensions = dimensions; - dmlTensorDesc->strides = strides; - if (!strides.empty() && dimensions.size() != strides.size()) { - dawn::ErrorLog() << "Dimension size should be equal to strides size."; - return false; - } - - size_t typeLength = 4; - switch (dataType) { - case DML_TENSOR_DATA_TYPE_FLOAT32: - case DML_TENSOR_DATA_TYPE_INT32: - case DML_TENSOR_DATA_TYPE_UINT32: - break; - case DML_TENSOR_DATA_TYPE_FLOAT16: - typeLength = 2; - break; - default: - dawn::ErrorLog() << "This data type is not supported"; - return false; - } - - size_t elementsCount = 1; - if (dmlTensorDesc->dimensions.size() > DML_TENSOR_DIMENSION_COUNT_MAX) { - dawn::ErrorLog() << "Tensor dimension count " << dmlTensorDesc->dimensions.size() - << " is greater than DML_TENSOR_DIMENSION_COUNT_MAX " - << DML_TENSOR_DIMENSION_COUNT_MAX; - return false; - } - if (dmlTensorDesc->dimensions.size() == 0) { - dmlTensorDesc->dimensions.resize(1); - dmlTensorDesc->dimensions[0] = 1; - } else { - for (uint32_t i = 0; i < dmlTensorDesc->dimensions.size(); ++i) { - auto dim = dmlTensorDesc->dimensions[i]; - if (strides.empty()) { - elementsCount *= dim; - } else { - // The specific dim from broadcasting shouldn't increase the count of - // elements. - if (strides[i] == 0) { - dim = 1; - } - elementsCount *= dim; - } - } - } - auto TotalTensorSizeInBytes = elementsCount * typeLength; - dmlTensorDesc->bufferDesc.DimensionCount = dmlTensorDesc->dimensions.size(); - dmlTensorDesc->bufferDesc.Sizes = dmlTensorDesc->dimensions.data(); - dmlTensorDesc->bufferDesc.Strides = dmlTensorDesc->strides.data(); - dmlTensorDesc->bufferDesc.TotalTensorSizeInBytes = TotalTensorSizeInBytes; - dmlTensorDesc->bufferDesc.GuaranteedBaseOffsetAlignment = 0; - dmlTensorDesc->bufferDesc.DataType = dataType; - dmlTensorDesc->bufferDesc.Flags = tensorFlag; - - dmlTensorsDesc.push_back(dmlTensorDesc); - return true; - } - - bool CreateDmlTensorDesc(std::vector>& dmlTensorsDesc, - const std::shared_ptr& dmlTensorDesc, - OperandDescriptor const* desc, - DML_TENSOR_FLAGS tensorFlag = DML_TENSOR_FLAGS::DML_TENSOR_FLAG_NONE) { - DAWN_ASSERT(desc != nullptr); - std::vector dimensions; - DML_TENSOR_DATA_TYPE dataType; - for (uint32_t i = 0; i < desc->dimensionsCount; ++i) { - if (desc->dimensions[i] < 0) { - dawn::ErrorLog() << "DML doesn't support the negative dimension value"; - return false; - } - } - dimensions.assign(desc->dimensions, desc->dimensions + desc->dimensionsCount); - if (desc->type == wnn::OperandType::Float32) { - dataType = DML_TENSOR_DATA_TYPE_FLOAT32; - } else if (desc->type == wnn::OperandType::Float16) { - dataType = DML_TENSOR_DATA_TYPE_FLOAT16; - } else if (desc->type == wnn::OperandType::Int32) { - dataType = DML_TENSOR_DATA_TYPE_INT32; - } else if (desc->type == wnn::OperandType::Uint32) { - dataType = DML_TENSOR_DATA_TYPE_UINT32; - } else { - dawn::ErrorLog() << "This data type is not supported"; - return false; - } - - return CreateDmlTensorDesc(dmlTensorsDesc, dmlTensorDesc, dimensions, {}, dataType, - tensorFlag); - } - - bool CreateDmlTensorDesc(std::vector>& dmlTensorsDesc, - const std::shared_ptr& dmlTensorDesc, - DML_TENSOR_DESC* tensorDESC, - std::vector dimensions = {}, - std::vector strides = {}, - bool useDefaultFlags = false) { - DAWN_ASSERT(tensorDESC != nullptr); - const DML_BUFFER_TENSOR_DESC* desc = - reinterpret_cast(tensorDESC->Desc); - - if (dimensions.empty()) { - dimensions.assign(desc->Sizes, desc->Sizes + desc->DimensionCount); - } - DML_TENSOR_FLAGS tensorFlags = useDefaultFlags ? DML_TENSOR_FLAG_NONE : desc->Flags; - return CreateDmlTensorDesc(dmlTensorsDesc, dmlTensorDesc, dimensions, strides, - desc->DataType, tensorFlags); - } - - // Only used to create the output edge from a node. - std::shared_ptr CreateEdgeFromThisNode(const DML_TENSOR_DESC& outputTensorDesc, - const uint32_t nodeIndex, - const uint32_t outputNodeIndex = 0, - bool isDefault = true) { - std::shared_ptr edgeInfo(new EdgeInfo()); - edgeInfo->outputTensorDESC = outputTensorDesc; - edgeInfo->nodeIndex = nodeIndex; - edgeInfo->outputNodeIndex = outputNodeIndex; - edgeInfo->isInputEdge = false; - std::shared_ptr edge(edgeInfo); - return edge; - } - - std::shared_ptr updateEdge(std::shared_ptr edge, - const DML_TENSOR_DESC& tensorDesc) { - if (edge->isInputEdge) { - std::shared_ptr newEdgeInfo(new InputEdgeInfo()); - memcpy(static_cast(newEdgeInfo.get()), static_cast(edge.get()), - sizeof(InputEdgeInfo)); - newEdgeInfo->outputTensorDESC = tensorDesc; - std::shared_ptr newEdge(newEdgeInfo); - return newEdge; - } else { - std::shared_ptr newEdgeInfo(new EdgeInfo()); - memcpy(static_cast(newEdgeInfo.get()), static_cast(edge.get()), - sizeof(EdgeInfo)); - newEdgeInfo->outputTensorDESC = tensorDesc; - std::shared_ptr newEdge(newEdgeInfo); - return newEdge; - } - } - - // Add an intermediate node and the related edges which point to this node to the graph - // description. - void AddNodeAndEdgesToGraphDesc(DmlGraphDesc& graphDesc, - std::vector> edges, - ComPtr dmlOperator) { - for (size_t i = 0; i < edges.size(); ++i) { - if (edges[i]->isInputEdge) { - auto edge = reinterpret_cast(edges[i].get()); - std::unique_ptr inputEdgeDesc( - new DML_INPUT_GRAPH_EDGE_DESC); - inputEdgeDesc->GraphInputIndex = edge->inputIndex; - inputEdgeDesc->ToNodeIndex = graphDesc.NodeCount(); - inputEdgeDesc->ToNodeInputIndex = i; - graphDesc.AddInputEdge(inputEdgeDesc); - } else { - auto edge = reinterpret_cast(edges[i].get()); - std::unique_ptr intermediateEdgeDesc( - new DML_INTERMEDIATE_GRAPH_EDGE_DESC); - intermediateEdgeDesc->FromNodeIndex = edge->nodeIndex; - intermediateEdgeDesc->FromNodeOutputIndex = edge->outputNodeIndex; - intermediateEdgeDesc->ToNodeIndex = graphDesc.NodeCount(); - intermediateEdgeDesc->ToNodeInputIndex = i; - graphDesc.AddIntermediateEdge(intermediateEdgeDesc); - } - } - graphDesc.AddIntermediateNode(dmlOperator); - } - - MaybeError Graph::TransposeOutputToNhwc(std::shared_ptr& inputEdge, - const std::vector& nchwOutputDims) { - auto nhwcOutputStrides = transposeStrides(NchwToNhwc, nchwOutputDims); - auto nhwcOutputDims = transposeDimensions(NchwToNhwc, nchwOutputDims); - std::shared_ptr nhwcOutputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, nhwcOutputDmlTensorDesc, - &inputEdge->outputTensorDESC, nhwcOutputDims, nhwcOutputStrides, - true)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC nhwcOutputTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &nhwcOutputDmlTensorDesc->bufferDesc}; - - inputEdge = updateEdge(inputEdge, nhwcOutputTensorDesc); - return {}; - } - - Graph::Graph(Context* context) : GraphBase(context) { - wnn::DevicePreference devicePreference = GetContext()->GetContextOptions().devicePreference; - bool useGpu = devicePreference == wnn::DevicePreference::Cpu ? false : true; - - wnn::PowerPreference powerPreference = GetContext()->GetContextOptions().powerPreference; - DXGI_GPU_PREFERENCE gpuPreference; - switch (powerPreference) { - case wnn::PowerPreference::High_performance: - gpuPreference = DXGI_GPU_PREFERENCE::DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE; - break; - case wnn::PowerPreference::Low_power: - gpuPreference = DXGI_GPU_PREFERENCE::DXGI_GPU_PREFERENCE_MINIMUM_POWER; - break; - default: - gpuPreference = DXGI_GPU_PREFERENCE::DXGI_GPU_PREFERENCE_UNSPECIFIED; - } - // Set up Direct3D 12. - InitD3D12(mCommandList, mCommandQueue, mCommandAllocator, mD3D12Device, gpuPreference, - useGpu); - - // Create the DirectML device. - DML_CREATE_DEVICE_FLAGS dmlCreateDeviceFlags = DML_CREATE_DEVICE_FLAG_NONE; -#if defined(_DEBUG) - dmlCreateDeviceFlags = DML_CREATE_DEVICE_FLAG_DEBUG; -#endif - if (dmlCreateDeviceFlags == DML_CREATE_DEVICE_FLAG_DEBUG) { - if (DMLCreateDevice(mD3D12Device.Get(), DML_CREATE_DEVICE_FLAG_DEBUG, - IID_PPV_ARGS(&mDevice)) < 0) { - dawn::WarningLog() << "Failed to create a DirectML device with debug flag, " - "will fall back to use none flag."; - WEBNN_CHECK(DMLCreateDevice(mD3D12Device.Get(), DML_CREATE_DEVICE_FLAG_NONE, - IID_PPV_ARGS(&mDevice))); - } - } else { - WEBNN_CHECK(DMLCreateDevice(mD3D12Device.Get(), DML_CREATE_DEVICE_FLAG_NONE, - IID_PPV_ARGS(&mDevice))); - } - } - - MaybeError Graph::AddConstant(const op::Constant* constant) { - const OperandDescriptor* desc = constant->GetOperandDescriptor(); - std::shared_ptr dmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, dmlTensorDesc, desc, - DML_TENSOR_FLAG_OWNED_BY_DML)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC outputTensorDESC = {DML_TENSOR_TYPE_BUFFER, &(dmlTensorDesc->bufferDesc)}; - - std::shared_ptr inputEdgeInfo(new InputEdgeInfo()); - inputEdgeInfo->outputTensorDESC = outputTensorDESC; - inputEdgeInfo->name = "Input_Constant_" + std::to_string(mInputs.size()); - inputEdgeInfo->isInputEdge = true; - inputEdgeInfo->inputIndex = mInputs.size(); - inputEdgeInfo->buffer = constant->GetBuffer(); - inputEdgeInfo->byteLength = constant->GetByteLength(); - inputEdgeInfo->isConstantInput = true; - std::shared_ptr edge(inputEdgeInfo); - - mGraphEdgesMap[constant->PrimaryOutput()] = edge; - mInputs.push_back(inputEdgeInfo); - mConstantSet.insert(constant->PrimaryOutput()); - return {}; - } - - MaybeError Graph::CreateConstantInput(DML_TENSOR_DESC& tensorDESC, - void const* value, - size_t size, - const std::vector& dmlTensorDims, - const std::vector& strides, - DML_TENSOR_DATA_TYPE dataType, - DML_TENSOR_FLAGS tensorFlag) { - std::unique_ptr buffer(new char[size]); - memcpy(buffer.get(), value, size); - - std::shared_ptr dmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, dmlTensorDesc, dmlTensorDims, strides, dataType, - tensorFlag)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - tensorDESC = {DML_TENSOR_TYPE_BUFFER, &(dmlTensorDesc->bufferDesc)}; - - std::shared_ptr inputEdgeInfo(new InputEdgeInfo()); - inputEdgeInfo->outputTensorDESC = tensorDESC; - inputEdgeInfo->name = "Input_Constant_" + std::to_string(mInputs.size()); - inputEdgeInfo->isInputEdge = true; - inputEdgeInfo->inputIndex = mInputs.size(); - inputEdgeInfo->buffer = static_cast(buffer.get()); - inputEdgeInfo->byteLength = size; - inputEdgeInfo->isConstantInput = true; - - mInputs.push_back(inputEdgeInfo); - mConstantsBuffer.push_back(std::move(buffer)); - return {}; - } - - MaybeError Graph::AddInput(const op::Input* input) { - const OperandDescriptor* desc = input->GetOperandDescriptor(); - std::shared_ptr dmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, dmlTensorDesc, desc)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC outputTensorDESC = {DML_TENSOR_TYPE_BUFFER, &(dmlTensorDesc->bufferDesc)}; - std::shared_ptr inputEdgeInfo(new InputEdgeInfo()); - inputEdgeInfo->outputTensorDESC = outputTensorDESC; - inputEdgeInfo->name = input->GetName(); - inputEdgeInfo->isInputEdge = true; - inputEdgeInfo->inputIndex = mInputs.size(); - inputEdgeInfo->byteLength = dmlTensorDesc->bufferDesc.TotalTensorSizeInBytes; - std::shared_ptr edge(inputEdgeInfo); - - mGraphEdgesMap[input->PrimaryOutput()] = edge; - mInputs.push_back(inputEdgeInfo); - return {}; - } - - MaybeError Graph::AddBinary(const op::Binary* binary) { - DAWN_ASSERT(binary->Inputs().size() == 2); - DAWN_ASSERT(mGraphEdgesMap.find(binary->Inputs()[0].Get()) != mGraphEdgesMap.end()); - DAWN_ASSERT(mGraphEdgesMap.find(binary->Inputs()[1].Get()) != mGraphEdgesMap.end()); - - auto aEdge = mGraphEdgesMap[binary->Inputs()[0].Get()]; - auto bEdge = mGraphEdgesMap[binary->Inputs()[1].Get()]; - auto aDims = ConvertDimensions(binary->Inputs()[0].Get()->Shape()); - auto bDims = ConvertDimensions(binary->Inputs()[1].Get()->Shape()); - auto outputDims = ConvertDimensions(binary->Outputs()[0].Get()->Shape()); - size_t aRank = aDims.size(), bRank = bDims.size(), outputRank = outputDims.size(); - size_t broadcastSkipAxis = 0; - std::vector aNewDims, bNewDims, outputNewDims = outputDims; - - if (binary->GetType() == op::BinaryOpType::kMatMul) { - // DML GEMM requires 4D input tensors. - if (aRank > 4 || bRank > 4) { - return DAWN_INTERNAL_ERROR("The size of input dimensions is greater than 4."); - } - if (aRank < 4) { - aDims = ExpandDimensions(aDims, 4); - } - - if (bRank < 4) { - if (bRank == 1) { - // If b is 1-D, it is converted to a 2-D tensor by by appending a 1 to - // its dimensions. - bDims.push_back(1); - } - bDims = ExpandDimensions(bDims, 4); - } - - if (outputRank < 4) { - outputNewDims = ExpandDimensions(outputDims, 4); - } - - if (aRank > 2 || bRank > 2) { - // If either a or b is N-D, N > 2, it is treated as a stack of matrices - // with dimensions corresponding to the last two indices. The matrix - // multiplication will be broadcasted accordingly by following - // [numpy-broadcasting-rule]. - broadcastSkipAxis = 2; - } - aNewDims = bNewDims = outputNewDims; - aNewDims[2] = aDims[2]; - aNewDims[3] = aDims[3]; - bNewDims[2] = bDims[2]; - bNewDims[3] = bDims[3]; - } else { - aNewDims = bNewDims = outputNewDims; - } - - auto aNewStrides = CalculateStridesForBroadcast(aDims, aNewDims, aEdge->outputTensorDESC, - broadcastSkipAxis); - std::shared_ptr aDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, aDmlTensorDesc, &aEdge->outputTensorDESC, - aNewDims, aNewStrides)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC aTensorDesc = {DML_TENSOR_TYPE_BUFFER, &aDmlTensorDesc->bufferDesc}; - - auto bNewStrides = CalculateStridesForBroadcast(bDims, bNewDims, bEdge->outputTensorDESC, - broadcastSkipAxis); - std::shared_ptr bDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, bDmlTensorDesc, &bEdge->outputTensorDESC, - bNewDims, bNewStrides)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC bTensorDesc = {DML_TENSOR_TYPE_BUFFER, &bDmlTensorDesc->bufferDesc}; - - std::shared_ptr outputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputDmlTensorDesc, &aEdge->outputTensorDESC, - outputNewDims, {}, true)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC outputTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &outputDmlTensorDesc->bufferDesc}; - - ComPtr dmlOperator; - switch (binary->GetType()) { - case op::BinaryOpType::kAdd: { - CREATE_BINARY_OPERATOR(ADD, aTensorDesc, bTensorDesc, outputTensorDesc, - dmlOperator); - } break; - case op::BinaryOpType::kDiv: { - CREATE_BINARY_OPERATOR(DIVIDE, aTensorDesc, bTensorDesc, outputTensorDesc, - dmlOperator); - } break; - case op::BinaryOpType::kMul: { - CREATE_BINARY_OPERATOR(MULTIPLY, aTensorDesc, bTensorDesc, outputTensorDesc, - dmlOperator); - } break; - case op::BinaryOpType::kSub: { - CREATE_BINARY_OPERATOR(SUBTRACT, aTensorDesc, bTensorDesc, outputTensorDesc, - dmlOperator); - } break; - case op::BinaryOpType::kMax: { - CREATE_BINARY_OPERATOR(MAX, aTensorDesc, bTensorDesc, outputTensorDesc, - dmlOperator); - } break; - case op::BinaryOpType::kMin: { - CREATE_BINARY_OPERATOR(MIN, aTensorDesc, bTensorDesc, outputTensorDesc, - dmlOperator); - } break; - case op::BinaryOpType::kPower: { - DML_ELEMENT_WISE_POW_OPERATOR_DESC dmlSpecificOperatorDesc{}; - dmlSpecificOperatorDesc.InputTensor = &aTensorDesc; - dmlSpecificOperatorDesc.ExponentTensor = &bTensorDesc; - dmlSpecificOperatorDesc.OutputTensor = &outputTensorDesc; - DML_OPERATOR_DESC dmlOperatorDesc = {}; - dmlOperatorDesc.Type = DML_OPERATOR_ELEMENT_WISE_POW; - dmlOperatorDesc.Desc = &dmlSpecificOperatorDesc; - WEBNN_CHECK(mDevice->CreateOperator(&dmlOperatorDesc, IID_PPV_ARGS(&dmlOperator))); - } break; - case op::BinaryOpType::kMatMul: { - DML_GEMM_OPERATOR_DESC dmlSpecificOperatorDesc{}; - dmlSpecificOperatorDesc.ATensor = &aTensorDesc; - dmlSpecificOperatorDesc.BTensor = &bTensorDesc; - dmlSpecificOperatorDesc.OutputTensor = &outputTensorDesc; - dmlSpecificOperatorDesc.Alpha = 1.0; - DML_OPERATOR_DESC dmlOperatorDesc = {}; - dmlOperatorDesc.Type = DML_OPERATOR_GEMM; - dmlOperatorDesc.Desc = &dmlSpecificOperatorDesc; - WEBNN_CHECK(mDevice->CreateOperator(&dmlOperatorDesc, IID_PPV_ARGS(&dmlOperator))); - } break; - default: - return DAWN_UNIMPLEMENTED_ERROR(" Binary op is not implemented."); - } - if (outputDims != outputNewDims) { - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputDmlTensorDesc, &aEdge->outputTensorDESC, - outputDims, {}, true)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - } - - mGraphEdgesMap[binary->PrimaryOutput()] = - CreateEdgeFromThisNode(outputTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, {aEdge, bEdge}, dmlOperator); - return {}; - } - - MaybeError Graph::HardSwish(std::shared_ptr& inputEdge, - const std::vector& inputDims) { - dawn::WarningLog() << "The hardSwish is emulated from other operations, maybe the " - "performance isn't best"; - std::shared_ptr intermediateEdge, outputEdge; - uint32_t length = SizeOfShape(inputDims); - DML_TENSOR_DESC constantInputTensorDesc, constantSixInputTensorDesc, intermediateTensorDesc, - inputTensorDesc = inputEdge->outputTensorDESC; - - std::shared_ptr outputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputDmlTensorDesc, &inputEdge->outputTensorDESC, - inputDims, {}, true)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC outputTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &outputDmlTensorDesc->bufferDesc}; - - std::vector constant(length, 3); - size_t initialInputIndex = mInputs.size() - 1; - ComPtr dmlOperator; - // x+3 - { - // Create the first constant input. - if (CreateConstantInput(constantInputTensorDesc, constant.data(), - length * sizeof(float), inputDims, {}, - DML_TENSOR_DATA_TYPE_FLOAT32) - .IsError()) { - return DAWN_INTERNAL_ERROR("Failed to create a constant input tensor."); - }; - CREATE_BINARY_OPERATOR(ADD, inputTensorDesc, constantInputTensorDesc, outputTensorDesc, - dmlOperator); - - outputEdge = CreateEdgeFromThisNode(outputTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, {inputEdge, mInputs.back()}, dmlOperator); - } - - // min(6, (x + 3)) - { - intermediateTensorDesc = outputEdge->outputTensorDESC; - intermediateEdge = outputEdge; - constant = std::vector(length, 6); - if (CreateConstantInput(constantSixInputTensorDesc, constant.data(), - length * sizeof(float), inputDims, {}, - DML_TENSOR_DATA_TYPE_FLOAT32) - .IsError()) { - return DAWN_INTERNAL_ERROR("Failed to create a constant input tensor."); - }; - CREATE_BINARY_OPERATOR(MIN, intermediateTensorDesc, constantSixInputTensorDesc, - outputTensorDesc, dmlOperator); - - outputEdge = CreateEdgeFromThisNode(outputTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, {intermediateEdge, mInputs.back()}, dmlOperator); - } - - // max(0, min(6, (x + 3))) - { - intermediateTensorDesc = outputEdge->outputTensorDESC; - intermediateEdge = outputEdge; - constant = std::vector(length, 0); - // Create the third constant input. - if (CreateConstantInput(constantInputTensorDesc, constant.data(), - length * sizeof(float), inputDims, {}, - DML_TENSOR_DATA_TYPE_FLOAT32) - .IsError()) { - return DAWN_INTERNAL_ERROR("Failed to create a constant input tensor."); - }; - CREATE_BINARY_OPERATOR(MAX, intermediateTensorDesc, constantInputTensorDesc, - outputTensorDesc, dmlOperator); - - outputEdge = CreateEdgeFromThisNode(outputTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, {intermediateEdge, mInputs.back()}, dmlOperator); - } - - // x * max(0, min(6, (x + 3))) - { - intermediateTensorDesc = outputEdge->outputTensorDESC; - intermediateEdge = outputEdge; - CREATE_BINARY_OPERATOR(MULTIPLY, inputTensorDesc, intermediateTensorDesc, - outputTensorDesc, dmlOperator); - - outputEdge = CreateEdgeFromThisNode(outputTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, {inputEdge, intermediateEdge}, dmlOperator); - } - - // x * max(0, min(6, (x + 3))) / 6 - { - intermediateTensorDesc = outputEdge->outputTensorDESC; - intermediateEdge = outputEdge; - CREATE_BINARY_OPERATOR(DIVIDE, intermediateTensorDesc, constantSixInputTensorDesc, - outputTensorDesc, dmlOperator); - - inputEdge = CreateEdgeFromThisNode(outputTensorDesc, mGraphDesc.NodeCount()); - // Reuse the second constant input we created above. - AddNodeAndEdgesToGraphDesc( - mGraphDesc, {intermediateEdge, mInputs[initialInputIndex + 2]}, dmlOperator); - return {}; - } - } - - MaybeError Graph::AddUnary(const op::Unary* unary) { - DAWN_ASSERT(unary->Inputs().size() == 1); - const OperandBase* inputOperand = unary->Inputs()[0].Get(); - DAWN_ASSERT(mGraphEdgesMap.find(inputOperand) != mGraphEdgesMap.end()); - - auto inputEdge = mGraphEdgesMap[inputOperand]; - auto inputDims = ConvertDimensions(inputOperand->Shape()); - std::vector> inputEdges = {inputEdge}; - DML_TENSOR_DESC inputTensorDesc = inputEdge->outputTensorDESC; - ComPtr dmlOperator; - switch (unary->GetType()) { - case op::UnaryOpType::kAbs: { - CREATE_UNARY_OPERATOR(ELEMENT_WISE_ABS, inputTensorDesc, dmlOperator); - } break; - case op::UnaryOpType::kCeil: { - CREATE_UNARY_OPERATOR(ELEMENT_WISE_CEIL, inputTensorDesc, dmlOperator); - } break; - case op::UnaryOpType::kCos: { - CREATE_UNARY_OPERATOR(ELEMENT_WISE_COS, inputTensorDesc, dmlOperator); - } break; - case op::UnaryOpType::kExp: { - CREATE_UNARY_OPERATOR(ELEMENT_WISE_EXP, inputTensorDesc, dmlOperator); - } break; - case op::UnaryOpType::kFloor: { - CREATE_UNARY_OPERATOR(ELEMENT_WISE_FLOOR, inputTensorDesc, dmlOperator); - } break; - case op::UnaryOpType::kHardSwish: { - if (HardSwish(inputEdge, inputDims).IsError()) { - return DAWN_INTERNAL_ERROR("Failed to create the HardSwish."); - }; - mGraphEdgesMap[unary->PrimaryOutput()] = inputEdge; - return {}; - } - case op::UnaryOpType::kLog: { - CREATE_UNARY_OPERATOR(ELEMENT_WISE_LOG, inputTensorDesc, dmlOperator); - } break; - case op::UnaryOpType::kLeakyRelu: { - DML_ACTIVATION_LEAKY_RELU_OPERATOR_DESC dmlSpecificOperatorDesc{}; - dmlSpecificOperatorDesc.InputTensor = &inputTensorDesc; - dmlSpecificOperatorDesc.OutputTensor = &inputTensorDesc; - dmlSpecificOperatorDesc.Alpha = - reinterpret_cast(unary)->GetAlpha(); - CREATE_OPERATOR(ACTIVATION_LEAKY_RELU, dmlSpecificOperatorDesc) - } break; - // DML doesn't support element-wise negative, emulated it from multiplying input by - // -1. - case op::UnaryOpType::kNeg: { - uint32_t length = SizeOfShape(inputDims); - DML_TENSOR_DESC constantInputTensorDesc; - if (inputOperand->Type() == wnn::OperandType::Float32) { - std::vector constant(length, -1); - if (CreateConstantInput(constantInputTensorDesc, constant.data(), - length * sizeof(float), inputDims, {}, - DML_TENSOR_DATA_TYPE_FLOAT32) - .IsError()) { - return DAWN_INTERNAL_ERROR("Failed to create a constant input tensor."); - }; - } else if (inputOperand->Type() == wnn::OperandType::Int32) { - std::vector constant(length, -1); - if (CreateConstantInput(constantInputTensorDesc, constant.data(), - length * sizeof(int32_t), inputDims, {}, - DML_TENSOR_DATA_TYPE_INT32) - .IsError()) { - return DAWN_INTERNAL_ERROR("Failed to create a constant input tensor."); - }; - } else { - return DAWN_UNIMPLEMENTED_ERROR("This data type is not supported for neg."); - } - - CREATE_BINARY_OPERATOR(MULTIPLY, inputTensorDesc, constantInputTensorDesc, - inputTensorDesc, dmlOperator); - inputEdges.push_back(mInputs.back()); - } break; - case op::UnaryOpType::kRelu: { - CREATE_UNARY_OPERATOR(ACTIVATION_RELU, inputTensorDesc, dmlOperator); - } break; - case op::UnaryOpType::kSigmoid: { - CREATE_UNARY_OPERATOR(ACTIVATION_SIGMOID, inputTensorDesc, dmlOperator); - } break; - case op::UnaryOpType::kSin: { - CREATE_UNARY_OPERATOR(ELEMENT_WISE_SIN, inputTensorDesc, dmlOperator); - } break; - case op::UnaryOpType::kSoftmax: { - CREATE_UNARY_OPERATOR(ACTIVATION_SOFTMAX, inputTensorDesc, dmlOperator); - } break; - case op::UnaryOpType::kTan: { - CREATE_UNARY_OPERATOR(ELEMENT_WISE_TAN, inputTensorDesc, dmlOperator); - } break; - case op::UnaryOpType::kTanh: { - CREATE_UNARY_OPERATOR(ACTIVATION_TANH, inputTensorDesc, dmlOperator); - } break; - default: - return DAWN_UNIMPLEMENTED_ERROR("This Unary op is not implemented."); - } - - mGraphEdgesMap[unary->PrimaryOutput()] = - CreateEdgeFromThisNode(inputTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, inputEdges, dmlOperator); - return {}; - } - - MaybeError Graph::AddSplit(const op::Split* split) { - DAWN_ASSERT(split->Inputs().size() == 1); - auto inputOperand = split->Inputs()[0].Get(); - DAWN_ASSERT(mGraphEdgesMap.find(inputOperand) != mGraphEdgesMap.end()); - - auto inputDims = inputOperand->Shape(); - int32_t axis = split->GetAxis(); - // This value must be in the range [0, InputTensor.DimensionCount - 1]. Negative values - // address dimensions from the end. - if (axis < 0) { - axis = axis + inputDims.size(); - } - - size_t outputNum = split->Outputs().size(); - - auto inputEdge = mGraphEdgesMap[inputOperand]; - DML_TENSOR_DESC inputTensorDesc = inputEdge->outputTensorDESC; - std::vector outputTensorsDesc; - outputTensorsDesc.reserve(outputNum); - for (size_t i = 0; i < outputNum; ++i) { - std::shared_ptr dmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, dmlTensorDesc, &inputEdge->outputTensorDESC, - ConvertDimensions(split->Outputs()[i].Get()->Shape()), {}, - true)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC outputTensorDesc = {DML_TENSOR_TYPE_BUFFER, &dmlTensorDesc->bufferDesc}; - outputTensorsDesc.push_back(outputTensorDesc); - } - - DML_SPLIT_OPERATOR_DESC dmlSplitOperatorDesc{}; - dmlSplitOperatorDesc.Axis = axis; - dmlSplitOperatorDesc.InputTensor = &inputTensorDesc; - dmlSplitOperatorDesc.OutputCount = outputTensorsDesc.size(); - dmlSplitOperatorDesc.OutputTensors = outputTensorsDesc.data(); - - DML_OPERATOR_DESC dmlOperatorDesc = {}; - dmlOperatorDesc.Type = DML_OPERATOR_SPLIT; - dmlOperatorDesc.Desc = &dmlSplitOperatorDesc; - - ComPtr dmlOperator; - WEBNN_CHECK(mDevice->CreateOperator(&dmlOperatorDesc, IID_PPV_ARGS(&dmlOperator))); - - for (size_t i = 0; i < outputNum; ++i) { - mGraphEdgesMap[split->Outputs()[i].Get()] = - CreateEdgeFromThisNode(outputTensorsDesc[i], mGraphDesc.NodeCount(), i); - } - AddNodeAndEdgesToGraphDesc(mGraphDesc, {inputEdge}, dmlOperator); - return {}; - } - - MaybeError Graph::AddReshape(const op::Reshape* reshape) { - DAWN_ASSERT(reshape->Inputs().size() == 1); - const OperandBase* inputOperand = reshape->Inputs()[0].Get(); - DAWN_ASSERT(mGraphEdgesMap.find(inputOperand) != mGraphEdgesMap.end()); - - auto inputEdge = mGraphEdgesMap[inputOperand]; - auto outputDims = ConvertDimensions(reshape->Outputs()[0].Get()->Shape()); - std::shared_ptr outputDmlTensorDesc(new DmlTensorDesc); - // Reshape needn't new strides, because the layout has not been changed. - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputDmlTensorDesc, &inputEdge->outputTensorDESC, - outputDims)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC outputTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &outputDmlTensorDesc->bufferDesc}; - // Reshape is not a real node in DML, just need to update the edge created from it. - mGraphEdgesMap[reshape->PrimaryOutput()] = updateEdge(inputEdge, outputTensorDesc); - return {}; - } - - MaybeError Graph::AddTranspose(const op::Transpose* transpose) { - DAWN_ASSERT(transpose->Inputs().size() == 1); - const OperandBase* inputOperand = transpose->Inputs()[0].Get(); - DAWN_ASSERT(mGraphEdgesMap.find(inputOperand) != mGraphEdgesMap.end()); - - auto inputDims = ConvertDimensions(transpose->Inputs()[0].Get()->Shape()); - auto outputDims = ConvertDimensions(transpose->Outputs()[0].Get()->Shape()); - std::vector permutation = transpose->GetPermutation(); - - // Transpose need new strides, because the layout has been changed. - std::vector strides(outputDims.size()), transposedStrides; - uint32_t stride = 1; - for (size_t i = strides.size(); i-- > 0;) { - strides[i] = stride; - stride *= inputDims[i]; - } - // Permute the strides. - for (auto dimPermuted : permutation) { - transposedStrides.push_back(strides[dimPermuted]); - } - - auto inputEdge = mGraphEdgesMap[inputOperand]; - std::shared_ptr outputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputDmlTensorDesc, &inputEdge->outputTensorDESC, - outputDims, transposedStrides)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC outputTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &outputDmlTensorDesc->bufferDesc}; - // Transpose is not a real node in DML, just need to update the edge. - mGraphEdgesMap[transpose->PrimaryOutput()] = updateEdge(inputEdge, outputTensorDesc); - return {}; - } - - DML_OPERATOR_DESC* CreateFusedOperator( - FusionType fusionType, - DML_ACTIVATION_LINEAR_OPERATOR_DESC& dmlActicationOperatorDesc, - DML_OPERATOR_DESC& dmlFusedOperatorDesc, - float alpha = 0.0, - float beta = 0.0) { - dmlActicationOperatorDesc.InputTensor = nullptr; - dmlActicationOperatorDesc.OutputTensor = nullptr; - dmlActicationOperatorDesc.Alpha = alpha; - dmlActicationOperatorDesc.Beta = beta; - switch (fusionType) { - case FusionType::Relu: { - dmlFusedOperatorDesc.Type = DML_OPERATOR_ACTIVATION_RELU; - } break; - case FusionType::Sigmoid: { - dmlFusedOperatorDesc.Type = DML_OPERATOR_ACTIVATION_SIGMOID; - } break; - case FusionType::Tanh: { - dmlFusedOperatorDesc.Type = DML_OPERATOR_ACTIVATION_TANH; - } break; - case FusionType::LeakyRelu: { - dmlFusedOperatorDesc.Type = DML_OPERATOR_ACTIVATION_LEAKY_RELU; - } break; - case FusionType::Clamp: - case FusionType::HardSwish: - return nullptr; - default: - dawn::ErrorLog() << "This fusion type is not supported."; - DAWN_ASSERT(0); - } - dmlFusedOperatorDesc.Desc = &dmlActicationOperatorDesc; - return &dmlFusedOperatorDesc; - } - - DML_OPERATOR_DESC* CreateFusedOperator( - FusionOperatorBase* activation, - DML_ACTIVATION_LINEAR_OPERATOR_DESC& dmlActicationOperatorDesc, - DML_OPERATOR_DESC& dmlFusedOperatorDesc) { - if (activation == nullptr) { - return nullptr; - } - float alpha = activation->GetFusionType() == FusionType::LeakyRelu - ? reinterpret_cast(activation)->GetAlpha() - : 0.0; - return CreateFusedOperator(activation->GetFusionType(), dmlActicationOperatorDesc, - dmlFusedOperatorDesc, alpha); - } - - MaybeError Graph::EmulateFusedOperator(FusionOperatorBase* activation, - std::shared_ptr& inputEdge, - const std::vector& inputDims) { - // HardSwish and Clamp are not supported for fusion, so we add them directly to - // emulate. Currently we implement Relu6 operator by Clamp. - if (activation == nullptr) { - return {}; - } - - auto fusionType = activation->GetFusionType(); - if (fusionType == FusionType::Clamp) { - auto clamp = reinterpret_cast(activation); - inputEdge = Clamp(clamp, inputEdge); - } else if (fusionType == FusionType::HardSwish) { - if (HardSwish(inputEdge, inputDims).IsError()) { - return DAWN_INTERNAL_ERROR("Failed to create the HardSwish."); - }; - } - return {}; - } - - std::shared_ptr Graph::Clamp(const op::ClampBase* clamp, - std::shared_ptr inputEdge) { - DML_TENSOR_DESC inputTensorDesc = inputEdge->outputTensorDESC; - - // Set OutputTensor = InputTensor with the same strides to optimize performance. - DML_ELEMENT_WISE_CLIP_OPERATOR_DESC desc = {}; - desc.InputTensor = &inputTensorDesc; - desc.OutputTensor = &inputTensorDesc; - desc.ScaleBias = nullptr; - desc.Min = clamp->GetMinValue(); - desc.Max = clamp->GetMaxValue(); - DML_OPERATOR_DESC dmlOperatorDesc = {}; - dmlOperatorDesc.Type = DML_OPERATOR_ELEMENT_WISE_CLIP; - dmlOperatorDesc.Desc = &desc; - - ComPtr dmlOperator; - WEBNN_CHECK(mDevice->CreateOperator(&dmlOperatorDesc, IID_PPV_ARGS(&dmlOperator))); - - std::shared_ptr outputEdge = - CreateEdgeFromThisNode(inputTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, {inputEdge}, dmlOperator); - return outputEdge; - } - - MaybeError Graph::AddClamp(const op::Clamp* clamp) { - auto inputsOperand = clamp->Inputs(); - DAWN_ASSERT(inputsOperand.size() == 1); - auto inputEdge = mGraphEdgesMap[inputsOperand[0].Get()]; - mGraphEdgesMap[clamp->PrimaryOutput()] = Clamp(clamp, inputEdge); - return {}; - } - - std::vector transposeStridesToNchw(const std::vector& inputDims, - const DML_TENSOR_DESC& inputTensorDesc) { - const DML_BUFFER_TENSOR_DESC* bufferDesc = - reinterpret_cast(inputTensorDesc.Desc); - DAWN_ASSERT(bufferDesc != nullptr && bufferDesc->DimensionCount == 4); - auto strides = bufferDesc->Strides; - if (strides != nullptr) { - return {strides[0], strides[3], strides[1], strides[2]}; - } else { - return transposeStrides(NhwcToNchw, inputDims); - } - } - - MaybeError Graph::AddConv2d(const op::Conv2d* conv2d) { - auto inputsOperand = conv2d->Inputs(); - DAWN_ASSERT(inputsOperand.size() == 2 || inputsOperand.size() == 3); - DAWN_ASSERT(mGraphEdgesMap.find(inputsOperand[0].Get()) != mGraphEdgesMap.end()); - DAWN_ASSERT(mGraphEdgesMap.find(inputsOperand[1].Get()) != mGraphEdgesMap.end()); - - auto inputEdge = mGraphEdgesMap[inputsOperand[0].Get()]; - auto filterEdge = mGraphEdgesMap[inputsOperand[1].Get()]; - - auto inputDims = ConvertDimensions(inputsOperand[0].Get()->Shape()); - auto filterDims = ConvertDimensions(inputsOperand[1].Get()->Shape()); - auto outputDims = ConvertDimensions(conv2d->Outputs()[0].Get()->Shape()); - std::vector newInputDims = inputDims, newFilterDims = filterDims, - newOutputDims = outputDims, newInputStrides, newFilterStrides; - - const Conv2dOptions* options = conv2d->GetOptions(); - - DML_TENSOR_DESC inputTensorDesc = inputEdge->outputTensorDESC; - if (options->inputLayout == wnn::InputOperandLayout::Nhwc) { - newInputDims = transposeDimensions(NhwcToNchw, inputDims); - newOutputDims = transposeDimensions(NhwcToNchw, outputDims); - newInputStrides = transposeStridesToNchw(inputDims, inputTensorDesc); - - std::shared_ptr inputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, inputDmlTensorDesc, - &inputEdge->outputTensorDESC, newInputDims, newInputStrides)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - inputTensorDesc = {DML_TENSOR_TYPE_BUFFER, &inputDmlTensorDesc->bufferDesc}; - } - - DML_TENSOR_DESC filterTensorDesc = filterEdge->outputTensorDESC; - if (options->filterLayout != wnn::Conv2dFilterOperandLayout::Oihw) { - newFilterDims = transposeFilterDimensionsAsOihw(options->filterLayout, filterDims); - newFilterStrides = transposeFilterStridesAsOihw(options->filterLayout, filterDims); - - std::shared_ptr filterDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, filterDmlTensorDesc, - &filterEdge->outputTensorDESC, newFilterDims, - newFilterStrides)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - filterTensorDesc = {DML_TENSOR_TYPE_BUFFER, &filterDmlTensorDesc->bufferDesc}; - } - - std::vector> inputEdges = {inputEdge, filterEdge}; - - const DML_TENSOR_DESC* biasTensorDescPtr = nullptr; - DML_TENSOR_DESC newBiasTensorDesc = {}; - if (options->bias != nullptr) { - DAWN_ASSERT(mGraphEdgesMap.find(inputsOperand[2].Get()) != mGraphEdgesMap.end()); - auto biasEdge = mGraphEdgesMap[inputsOperand[2].Get()]; - auto biasDims = ConvertDimensions(conv2d->Inputs()[2].Get()->Shape()); - if (biasDims[0] != newFilterDims[0] || biasDims.size() != 1) { - return DAWN_INTERNAL_ERROR( - "The bias should be 1-D tensor with the shape of [output_channels]."); - } - - // Reshape bias from 1-D to 4-D for NCHW layout. - std::vector newBiasDims = {1, biasDims[0], 1, 1}; - std::shared_ptr biasDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, biasDmlTensorDesc, - &biasEdge->outputTensorDESC, newBiasDims)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - newBiasTensorDesc = {DML_TENSOR_TYPE_BUFFER, &biasDmlTensorDesc->bufferDesc}; - biasTensorDescPtr = &newBiasTensorDesc; - inputEdges.push_back(biasEdge); - } - - std::shared_ptr outputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputDmlTensorDesc, &inputEdge->outputTensorDESC, - newOutputDims, {}, true)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC outputTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &outputDmlTensorDesc->bufferDesc}; - - // FIXME(nhu): strides, dilations, padding should be uint32_t - // need to fix the spec. - std::vector strides, dilations; - strides.assign(options->strides, options->strides + options->stridesCount); - dilations.assign(options->dilations, options->dilations + options->dilationsCount); - - std::vector padding = - options->autoPad == wnn::AutoPad::Explicit - ? ExplicitPadding(options) - : ImplicitPadding(options, newInputDims, newFilterDims); - std::vector startPadding = {padding[0], padding[2]}; - std::vector endPadding = {padding[1], padding[3]}; - std::vector defaultOutPadding = {0, 0}; - - DML_ACTIVATION_LINEAR_OPERATOR_DESC dmlActicationOperatorDesc{}; - DML_OPERATOR_DESC dmlFusedOperatorDesc = {}; - DML_OPERATOR_DESC* fusedActivation = CreateFusedOperator( - options->activation, dmlActicationOperatorDesc, dmlFusedOperatorDesc); - - ComPtr dmlOperator; - DML_CONVOLUTION_OPERATOR_DESC dmlSpecificOperatorDesc{}; - dmlSpecificOperatorDesc.InputTensor = &inputTensorDesc; - dmlSpecificOperatorDesc.FilterTensor = &filterTensorDesc; - dmlSpecificOperatorDesc.BiasTensor = biasTensorDescPtr; - dmlSpecificOperatorDesc.OutputTensor = &outputTensorDesc; - - dmlSpecificOperatorDesc.Mode = DML_CONVOLUTION_MODE_CROSS_CORRELATION; - dmlSpecificOperatorDesc.Direction = DML_CONVOLUTION_DIRECTION_FORWARD; - dmlSpecificOperatorDesc.DimensionCount = inputDims.size() - 2; - dmlSpecificOperatorDesc.Strides = strides.data(); - dmlSpecificOperatorDesc.Dilations = dilations.data(); - dmlSpecificOperatorDesc.StartPadding = startPadding.data(); - dmlSpecificOperatorDesc.EndPadding = endPadding.data(); - dmlSpecificOperatorDesc.OutputPadding = defaultOutPadding.data(); - dmlSpecificOperatorDesc.GroupCount = static_cast(options->groups); - dmlSpecificOperatorDesc.FusedActivation = fusedActivation; - - DML_OPERATOR_DESC dmlOperatorDesc = {}; - dmlOperatorDesc.Type = DML_OPERATOR_CONVOLUTION; - dmlOperatorDesc.Desc = &dmlSpecificOperatorDesc; - WEBNN_CHECK(mDevice->CreateOperator(&dmlOperatorDesc, IID_PPV_ARGS(&dmlOperator))); - - auto outputEdge = CreateEdgeFromThisNode(outputTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, inputEdges, dmlOperator); - - // Transpose output from nchw->nhwc. - if (options->inputLayout == wnn::InputOperandLayout::Nhwc) { - if (TransposeOutputToNhwc(outputEdge, newOutputDims).IsError()) { - return DAWN_INTERNAL_ERROR("Failed to transpose output from Nchw to Nhwc."); - }; - } - - if (EmulateFusedOperator(options->activation, outputEdge, outputDims).IsError()) { - return DAWN_INTERNAL_ERROR("Failed to emulate fused operator."); - } - mGraphEdgesMap[conv2d->PrimaryOutput()] = outputEdge; - return {}; - } - - MaybeError Graph::AddPool2d(const op::Pool2d* pool2d) { - DAWN_ASSERT(pool2d->Inputs().size() == 1); - const OperandBase* inputOperand = pool2d->Inputs()[0].Get(); - DAWN_ASSERT(mGraphEdgesMap.find(inputOperand) != mGraphEdgesMap.end()); - - auto inputEdge = mGraphEdgesMap[inputOperand]; - auto inputDims = ConvertDimensions(inputOperand->Shape()); - auto outputDims = ConvertDimensions(pool2d->Outputs()[0].Get()->Shape()); - std::vector newInputDims = inputDims, newOutputDims = outputDims, newInputStrides; - const Pool2dOptions* options = pool2d->GetOptions(); - - DML_TENSOR_DESC inputTensorDesc = inputEdge->outputTensorDESC; - if (options->layout == wnn::InputOperandLayout::Nhwc) { - newInputDims = transposeDimensions(NhwcToNchw, inputDims); - newOutputDims = transposeDimensions(NhwcToNchw, outputDims); - newInputStrides = transposeStridesToNchw(inputDims, inputTensorDesc); - - std::shared_ptr inputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, inputDmlTensorDesc, - &inputEdge->outputTensorDESC, newInputDims, newInputStrides)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - inputTensorDesc = {DML_TENSOR_TYPE_BUFFER, &inputDmlTensorDesc->bufferDesc}; - } - - std::shared_ptr outputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputDmlTensorDesc, &inputEdge->outputTensorDESC, - newOutputDims, {}, true)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC outputTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &outputDmlTensorDesc->bufferDesc}; - - std::vector strides, dilations; - strides.assign(reinterpret_cast(options->strides), - reinterpret_cast(options->strides) + options->stridesCount); - dilations.assign(reinterpret_cast(options->dilations), - reinterpret_cast(options->dilations) + options->stridesCount); - - std::vector windowSizes; - if (options->windowDimensions != nullptr) { - const UINT* windowDimensions = reinterpret_cast(options->windowDimensions); - windowSizes.assign(windowDimensions, windowDimensions + options->windowDimensionsCount); - } else { - windowSizes = {newInputDims[2], newInputDims[3]}; - } - - auto padding = options->autoPad == wnn::AutoPad::Explicit - ? ExplicitPadding(options) - : ImplicitPadding(options, newInputDims, windowSizes); - std::vector startPadding = {padding[0], padding[2]}; - std::vector endPadding = {padding[1], padding[3]}; - - ComPtr dmlOperator; - if (pool2d->GetType() == op::Pool2dType::kAveragePool2d) { - if (dilations[0] != 1 || dilations[1] != 1) { - return DAWN_INTERNAL_ERROR("The dilations of average pool2d are not supported."); - } - DML_AVERAGE_POOLING_OPERATOR_DESC desc = {}; - desc.InputTensor = &inputTensorDesc; - desc.OutputTensor = &outputTensorDesc; - desc.DimensionCount = static_cast(windowSizes.size()); - desc.Strides = strides.data(); - desc.WindowSize = windowSizes.data(); - desc.StartPadding = startPadding.data(); - desc.EndPadding = endPadding.data(); - desc.IncludePadding = false; - DML_OPERATOR_DESC dmlOperatorDesc = {}; - dmlOperatorDesc.Type = DML_OPERATOR_AVERAGE_POOLING; - dmlOperatorDesc.Desc = &desc; - WEBNN_CHECK(mDevice->CreateOperator(&dmlOperatorDesc, IID_PPV_ARGS(&dmlOperator))); - } else if (pool2d->GetType() == op::Pool2dType::kL2Pool2d) { - if (dilations[0] != 1 || dilations[1] != 1) { - return DAWN_INTERNAL_ERROR("The dilations of L2 pool2d are not supported."); - } - DML_LP_POOLING_OPERATOR_DESC desc = {}; - desc.InputTensor = &inputTensorDesc; - desc.OutputTensor = &outputTensorDesc; - desc.DimensionCount = static_cast(windowSizes.size()); - desc.Strides = strides.data(); - desc.WindowSize = windowSizes.data(); - desc.StartPadding = startPadding.data(); - desc.EndPadding = endPadding.data(); - desc.P = 2; - DML_OPERATOR_DESC dmlOperatorDesc = {}; - dmlOperatorDesc.Type = DML_OPERATOR_LP_POOLING; - dmlOperatorDesc.Desc = &desc; - WEBNN_CHECK(mDevice->CreateOperator(&dmlOperatorDesc, IID_PPV_ARGS(&dmlOperator))); - } else if (pool2d->GetType() == op::Pool2dType::kMaxPool2d) { - if (dilations[0] != 1 || dilations[1] != 1) { - for (size_t i = 0; i < windowSizes.size(); ++i) { - uint32_t paddedInputSize = - newInputDims[2 + i] + startPadding[i] + endPadding[i]; - uint32_t dilatedWindowSize = 1 + (windowSizes[i] - 1) * dilations[i]; - newOutputDims[2 + i] = - (dilatedWindowSize >= paddedInputSize) - ? 1 - : (paddedInputSize - dilatedWindowSize) / strides[i] + 1; - } - outputDims = transposeDimensions(NchwToNhwc, newOutputDims); - // Update output tensor. - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputDmlTensorDesc, newOutputDims)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - } - - DML_MAX_POOLING2_OPERATOR_DESC desc = {}; - desc.InputTensor = &inputTensorDesc; - desc.OutputTensor = &outputTensorDesc; - desc.OutputIndicesTensor = nullptr; - desc.DimensionCount = static_cast(windowSizes.size()); - desc.Strides = strides.data(); - desc.WindowSize = windowSizes.data(); - desc.StartPadding = startPadding.data(); - desc.EndPadding = endPadding.data(); - desc.Dilations = dilations.data(); - DML_OPERATOR_DESC dmlOperatorDesc = {}; - dmlOperatorDesc.Type = DML_OPERATOR_MAX_POOLING2; - dmlOperatorDesc.Desc = &desc; - WEBNN_CHECK(mDevice->CreateOperator(&dmlOperatorDesc, IID_PPV_ARGS(&dmlOperator))); - } else { - return DAWN_INTERNAL_ERROR("This pool2d type is not supported."); - } - - auto outputEdge = CreateEdgeFromThisNode(outputTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, {inputEdge}, dmlOperator); - - // Transpose output from nchw->nhwc. - if (options->layout == wnn::InputOperandLayout::Nhwc) { - if (TransposeOutputToNhwc(outputEdge, newOutputDims).IsError()) { - return DAWN_INTERNAL_ERROR("Failed to transpose output from Nchw to Nhwc."); - }; - } - - mGraphEdgesMap[pool2d->PrimaryOutput()] = outputEdge; - return {}; - } - - MaybeError Graph::AddPad(const op::Pad* pad) { - auto inputsOperand = pad->Inputs(); - DAWN_ASSERT(inputsOperand.size() == 2); - DAWN_ASSERT(mGraphEdgesMap.find(inputsOperand[0].Get()) != mGraphEdgesMap.end()); - DAWN_ASSERT(mGraphEdgesMap.find(inputsOperand[1].Get()) != mGraphEdgesMap.end()); - - auto inputEdge = mGraphEdgesMap[inputsOperand[0].Get()]; - auto paddingEdge = mGraphEdgesMap[inputsOperand[1].Get()]; - auto inputDims = ConvertDimensions(inputsOperand[0].Get()->Shape()); - auto paddingDims = ConvertDimensions(inputsOperand[1].Get()->Shape()); - auto outputDims = ConvertDimensions(pad->Outputs()[0].Get()->Shape()); - size_t inputRank = inputDims.size(); - - // Workaround(mingming): If padding was added in mGraph, it must be used. - // Use "Pad_"+std::to_string(mGraphEdgesMap.size()) to generate a unique name for the - // output node. This may be a dml issue: - // https://github.com/microsoft/DirectML/issues/133. - std::string name = "Pad_" + std::to_string(mGraphEdgesMap.size()); - auto paddingTensorDesc = paddingEdge->outputTensorDESC; - - // Ensure that the DML_TENSOR_FLAGS of output tensor is DML_TENSOR_FLAG_NONE. - std::shared_ptr outputPaddingTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputPaddingTensorDesc, &paddingTensorDesc, - paddingDims, {}, true)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC outputTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &outputPaddingTensorDesc->bufferDesc}; - - ComPtr dmlOperator; - { - DML_ELEMENT_WISE_IDENTITY_OPERATOR_DESC dmlSpecificOperatorDesc{}; - dmlSpecificOperatorDesc.InputTensor = &paddingTensorDesc; - dmlSpecificOperatorDesc.OutputTensor = &outputTensorDesc; - dmlSpecificOperatorDesc.ScaleBias = nullptr; - DML_OPERATOR_DESC dmlOperatorDesc = {}; - dmlOperatorDesc.Type = DML_OPERATOR_ELEMENT_WISE_IDENTITY; - dmlOperatorDesc.Desc = &dmlSpecificOperatorDesc; - WEBNN_CHECK(mDevice->CreateOperator(&dmlOperatorDesc, IID_PPV_ARGS(&dmlOperator))); - } - - auto outputEdge = CreateEdgeFromThisNode(paddingTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, {paddingEdge}, dmlOperator); - - outputEdge->name = name; - std::unique_ptr outputEdgeDesc(new DML_OUTPUT_GRAPH_EDGE_DESC); - auto outputEdgeInfo = reinterpret_cast(outputEdge.get()); - outputEdgeDesc->FromNodeIndex = outputEdgeInfo->nodeIndex; - outputEdgeDesc->FromNodeOutputIndex = outputEdgeInfo->outputNodeIndex; - outputEdgeDesc->GraphOutputIndex = mOutputs.size(); - mGraphDesc.AddOutputEdge(outputEdgeDesc); - mOutputs.push_back(*outputEdgeInfo); - - if (mConstantSet.find(inputsOperand[1].Get()) == mConstantSet.end()) { - return DAWN_INTERNAL_ERROR("The padding constant is not found."); - } - - const op::Constant* paddingConstant = - reinterpret_cast(inputsOperand[1]->Operator()); - const uint32_t* paddingData = static_cast(paddingConstant->GetBuffer()); - std::vector startPadding, endPadding; - for (size_t i = 0; i < inputRank; ++i) { - startPadding.push_back(paddingData[2 * i]); - endPadding.push_back(paddingData[2 * i + 1]); - } - const PadOptions* options = pad->GetOptions(); - DML_PADDING_MODE paddingMode; - switch (options->mode) { - case wnn::PaddingMode::Edge: - paddingMode = DML_PADDING_MODE_EDGE; - break; - case wnn::PaddingMode::Reflection: - paddingMode = DML_PADDING_MODE_REFLECTION; - break; - case wnn::PaddingMode::Symmetric: - paddingMode = DML_PADDING_MODE_SYMMETRIC; - break; - case wnn::PaddingMode::Constant: - paddingMode = DML_PADDING_MODE_CONSTANT; - break; - default: - DAWN_ASSERT(0); - } - auto inputTensorDesc = inputEdge->outputTensorDESC; - std::shared_ptr outputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputDmlTensorDesc, &inputEdge->outputTensorDESC, - outputDims, {}, true)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - outputTensorDesc = {DML_TENSOR_TYPE_BUFFER, &outputDmlTensorDesc->bufferDesc}; - - DML_PADDING_OPERATOR_DESC desc = {}; - desc.InputTensor = &inputTensorDesc; - desc.OutputTensor = &outputTensorDesc; - desc.PaddingMode = paddingMode; - desc.PaddingValue = options->value; - desc.DimensionCount = static_cast(startPadding.size()); - desc.StartPadding = startPadding.data(); - desc.EndPadding = endPadding.data(); - DML_OPERATOR_DESC dmlOperatorDesc = {}; - dmlOperatorDesc.Type = DML_OPERATOR_PADDING; - dmlOperatorDesc.Desc = &desc; - - WEBNN_CHECK(mDevice->CreateOperator(&dmlOperatorDesc, IID_PPV_ARGS(&dmlOperator))); - - mGraphEdgesMap[pad->PrimaryOutput()] = - CreateEdgeFromThisNode(outputTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, {inputEdge}, dmlOperator); - return {}; - } - - MaybeError Graph::AddBatchNorm(const op::BatchNorm* batchNorm) { - auto inputs = batchNorm->Inputs(); - DAWN_ASSERT(inputs.size() == 3 || inputs.size() == 4 || inputs.size() == 5); - DAWN_ASSERT(mGraphEdgesMap.find(batchNorm->Inputs()[0].Get()) != mGraphEdgesMap.end()); - auto inputEdge = mGraphEdgesMap[batchNorm->Inputs()[0].Get()]; - auto inputDims = ConvertDimensions(inputs[0].Get()->Shape()); - auto outputDims = ConvertDimensions(batchNorm->Outputs()[0].Get()->Shape()); - std::vector newInputDims = inputDims, newOutputDims = outputDims, newInputStrides; - const BatchNormOptions* options = batchNorm->GetOptions(); - - // When input is a 4-D tensor of the "nchw" or "nhwc" layout, options.axis should be set - // to 1 or 3 respectively. - uint32_t axis = options->axis; - DML_TENSOR_DESC inputTensorDesc = inputEdge->outputTensorDESC; - if (options->axis == 3) { - axis = 1; - newInputDims = transposeDimensions(NhwcToNchw, inputDims); - newOutputDims = transposeDimensions(NhwcToNchw, outputDims); - newInputStrides = transposeStridesToNchw(inputDims, inputTensorDesc); - - std::shared_ptr inputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, inputDmlTensorDesc, - &inputEdge->outputTensorDESC, newInputDims, newInputStrides)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - inputTensorDesc = {DML_TENSOR_TYPE_BUFFER, &inputDmlTensorDesc->bufferDesc}; - } - - // Reshape 1D mean, variance, scale, bias to 4D with setting 1 to automatically - // broadcast. - std::vector tensorsDesc; - std::vector> edges; - for (size_t i = 1; i < inputs.size(); ++i) { - DAWN_ASSERT(mGraphEdgesMap.find(batchNorm->Inputs()[i].Get()) != mGraphEdgesMap.end()); - auto edge = mGraphEdgesMap[batchNorm->Inputs()[i].Get()]; - auto dims = ConvertDimensions(inputs[i].Get()->Shape()); - DAWN_ASSERT(dims.size() == 1); - if (dims[0] != newInputDims[axis]) { - return DAWN_INTERNAL_ERROR( - "The 1-D tensor of the values whose length size is not equal to the size " - "of " - "the input dimension denoted by options.axis."); - } - // This tensor's dimensions should be { BatchCount, ChannelCount, Height,Width}. - // Set 1 to automatically broadcast those dimensions across the input. - std::vector expandDims(4, 1); - expandDims[axis] = dims[0]; - std::shared_ptr dmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, dmlTensorDesc, &edge->outputTensorDESC, - expandDims)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC tensorDesc = {DML_TENSOR_TYPE_BUFFER, &dmlTensorDesc->bufferDesc}; - tensorsDesc.push_back(tensorDesc); - edges.push_back(updateEdge(edge, tensorDesc)); - } - - DML_TENSOR_DESC constantTensorDesc; - if (options->scale == nullptr) { - float scale = 1.0; - std::vector scaleDims = {1, newInputDims[1], 1, 1}; - auto index = mInputs.size() - 1; - // Create a constant scale. - if (CreateConstantInput(constantTensorDesc, &scale, sizeof(float), {1, 1, 1, 1}, {}, - DML_TENSOR_DATA_TYPE_FLOAT32) - .IsError()) { - return DAWN_INTERNAL_ERROR("Failed to create a constant input tensor."); - }; - tensorsDesc.insert( - options->bias == nullptr ? tensorsDesc.end() : tensorsDesc.begin() + 2, - constantTensorDesc); - edges.insert(options->bias == nullptr ? edges.end() : edges.begin() + 2, - mInputs[index + 1]); - } - - if (options->bias == nullptr) { - float bias = 0; - std::vector biasDims = {1, newInputDims[1], 1, 1}; - // Create a constant scale. - if (CreateConstantInput(constantTensorDesc, &bias, sizeof(float), {1, 1, 1, 1}, {}, - DML_TENSOR_DATA_TYPE_FLOAT32) - .IsError()) { - return DAWN_INTERNAL_ERROR("Failed to create a constant input tensor."); - }; - tensorsDesc.push_back(constantTensorDesc); - edges.push_back(mInputs.back()); - } - - std::shared_ptr outputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputDmlTensorDesc, &inputEdge->outputTensorDESC, - newOutputDims, {}, true)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC outputTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &outputDmlTensorDesc->bufferDesc}; - - DML_ACTIVATION_LINEAR_OPERATOR_DESC dmlActicationOperatorDesc{}; - DML_OPERATOR_DESC dmlFusedOperatorDesc = {}; - DML_OPERATOR_DESC* fusedActivation = CreateFusedOperator( - options->activation, dmlActicationOperatorDesc, dmlFusedOperatorDesc); - - DML_BATCH_NORMALIZATION_OPERATOR_DESC desc = {}; - desc.InputTensor = &inputTensorDesc; - desc.MeanTensor = &tensorsDesc[0]; - desc.VarianceTensor = &tensorsDesc[1]; - desc.ScaleTensor = &tensorsDesc[2]; - desc.BiasTensor = &tensorsDesc[3]; - desc.OutputTensor = &outputTensorDesc; - desc.Spatial = true; - desc.Epsilon = options->epsilon; - desc.FusedActivation = fusedActivation; - DML_OPERATOR_DESC dmlOperatorDesc = {}; - dmlOperatorDesc.Type = DML_OPERATOR_BATCH_NORMALIZATION; - dmlOperatorDesc.Desc = &desc; - - ComPtr dmlOperator; - WEBNN_CHECK(mDevice->CreateOperator(&dmlOperatorDesc, IID_PPV_ARGS(&dmlOperator))); - - auto outputEdge = CreateEdgeFromThisNode(outputTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, {inputEdge, edges[0], edges[1], edges[2], edges[3]}, - dmlOperator); - - // Transpose output from nchw->nhwc. - if (options->axis == 3) { - if (TransposeOutputToNhwc(outputEdge, newOutputDims).IsError()) { - return DAWN_INTERNAL_ERROR("Failed to transpose output from Nchw to Nhwc."); - }; - } - - if (EmulateFusedOperator(options->activation, outputEdge, outputDims).IsError()) { - return DAWN_INTERNAL_ERROR("Failed to emulate fused operator."); - }; - mGraphEdgesMap[batchNorm->PrimaryOutput()] = outputEdge; - return {}; - } - - MaybeError Graph::AddConvTranspose2d(const op::ConvTranspose2d* convTranspose2d) { - auto inputsOperand = convTranspose2d->Inputs(); - DAWN_ASSERT(inputsOperand.size() == 2 || inputsOperand.size() == 3); - DAWN_ASSERT(mGraphEdgesMap.find(inputsOperand[0].Get()) != mGraphEdgesMap.end()); - DAWN_ASSERT(mGraphEdgesMap.find(inputsOperand[1].Get()) != mGraphEdgesMap.end()); - - auto inputEdge = mGraphEdgesMap[inputsOperand[0].Get()]; - auto filterEdge = mGraphEdgesMap[inputsOperand[1].Get()]; - - auto inputDims = ConvertDimensions(inputsOperand[0].Get()->Shape()); - auto filterDims = ConvertDimensions(inputsOperand[1].Get()->Shape()); - std::vector newInputDims = inputDims, newFilterDims = filterDims, newInputStrides, - newFilterStrides; - - const ConvTranspose2dOptions* options = convTranspose2d->GetOptions(); - - DML_TENSOR_DESC inputTensorDesc = inputEdge->outputTensorDESC; - if (options->inputLayout == wnn::InputOperandLayout::Nhwc) { - newInputDims = transposeDimensions(NhwcToNchw, inputDims); - newInputStrides = transposeStridesToNchw(inputDims, inputTensorDesc); - - std::shared_ptr inputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, inputDmlTensorDesc, - &inputEdge->outputTensorDESC, newInputDims, newInputStrides)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - inputTensorDesc = {DML_TENSOR_TYPE_BUFFER, &inputDmlTensorDesc->bufferDesc}; - } - - DML_TENSOR_DESC filterTensorDesc = filterEdge->outputTensorDESC; - if (options->filterLayout != wnn::ConvTranspose2dFilterOperandLayout::Iohw) { - newFilterDims = transposeFilterDimensionsAsIohw(options->filterLayout, filterDims); - newFilterStrides = transposeFilterStridesAsIohw(options->filterLayout, filterDims); - - std::shared_ptr filterDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, filterDmlTensorDesc, - &filterEdge->outputTensorDESC, newFilterDims, - newFilterStrides)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - filterTensorDesc = {DML_TENSOR_TYPE_BUFFER, &filterDmlTensorDesc->bufferDesc}; - } - - std::vector> inputEdges = {inputEdge, filterEdge}; - - const DML_TENSOR_DESC* biasTensorDescPtr = nullptr; - DML_TENSOR_DESC newBiasTensorDesc = {}; - if (options->bias != nullptr) { - DAWN_ASSERT(mGraphEdgesMap.find(inputsOperand[2].Get()) != mGraphEdgesMap.end()); - auto biasEdge = mGraphEdgesMap[inputsOperand[2].Get()]; - auto biasDims = ConvertDimensions(convTranspose2d->Inputs()[2].Get()->Shape()); - if (biasDims[0] != newFilterDims[0] || biasDims.size() != 1) { - return DAWN_INTERNAL_ERROR( - "The bias should be 1-D tensor with the shape of [output_channels]."); - } - - // Reshape bias from 1-D to 4-D for NCHW layout. - std::vector newBiasDims = {1, biasDims[0], 1, 1}; - std::shared_ptr biasDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, biasDmlTensorDesc, - &biasEdge->outputTensorDESC, newBiasDims)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - newBiasTensorDesc = {DML_TENSOR_TYPE_BUFFER, &biasDmlTensorDesc->bufferDesc}; - biasTensorDescPtr = &newBiasTensorDesc; - inputEdges.push_back(biasEdge); - } - - std::vector outputDims(4); - if (options->outputSizes != nullptr) { - std::vector outputSizes; - outputSizes.assign(options->outputSizes, - options->outputSizes + options->outputSizesCount); - if (options->inputLayout == wnn::InputOperandLayout::Nchw) { - outputDims = {inputDims[0], newFilterDims[1], outputSizes[0], outputSizes[1]}; - } else { - outputDims = {inputDims[0], outputSizes[0], outputSizes[1], newFilterDims[1]}; - } - } else { - outputDims = ConvertDimensions(convTranspose2d->Outputs()[0]->Shape()); - } - std::vector newOutputDims = outputDims; - if (options->inputLayout == wnn::InputOperandLayout::Nhwc) { - newOutputDims = transposeDimensions(NhwcToNchw, outputDims); - } - std::shared_ptr outputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputDmlTensorDesc, &inputEdge->outputTensorDESC, - newOutputDims, {}, true)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC outputTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &outputDmlTensorDesc->bufferDesc}; - - // FIXME(nhu): strides, dilations, padding should be uint32_t - // need to fix the spec. - std::vector strides, dilations, outputPadding; - strides.assign(options->strides, options->strides + options->stridesCount); - dilations.assign(options->dilations, options->dilations + options->dilationsCount); - outputPadding.assign(options->outputPadding, - options->outputPadding + options->outputPaddingCount); - - std::vector padding(4); - if (options->autoPad == wnn::AutoPad::Explicit) { - padding = ExplicitPadding(options); - } else { - std::vector inputSize = {inputDims[2], inputDims[3]}; - std::vector filterSize = {filterDims[2], filterDims[3]}; - padding = webnn::native::utils::ComputeImplicitPaddingForConvTranspose2dAutoPad( - options, inputSize, filterSize); - } - std::vector startPadding = {padding[0], padding[2]}; - std::vector endPadding = {padding[1], padding[3]}; - - DML_ACTIVATION_LINEAR_OPERATOR_DESC dmlActicationOperatorDesc{}; - DML_OPERATOR_DESC dmlFusedOperatorDesc = {}; - DML_OPERATOR_DESC* fusedActivation = CreateFusedOperator( - options->activation, dmlActicationOperatorDesc, dmlFusedOperatorDesc); - - ComPtr dmlOperator; - DML_CONVOLUTION_OPERATOR_DESC dmlSpecificOperatorDesc{}; - dmlSpecificOperatorDesc.InputTensor = &inputTensorDesc; - dmlSpecificOperatorDesc.FilterTensor = &filterTensorDesc; - dmlSpecificOperatorDesc.BiasTensor = biasTensorDescPtr; - dmlSpecificOperatorDesc.OutputTensor = &outputTensorDesc; - - dmlSpecificOperatorDesc.Mode = DML_CONVOLUTION_MODE_CONVOLUTION; - dmlSpecificOperatorDesc.Direction = DML_CONVOLUTION_DIRECTION_BACKWARD; - dmlSpecificOperatorDesc.DimensionCount = inputDims.size() - 2; - dmlSpecificOperatorDesc.Strides = strides.data(); - dmlSpecificOperatorDesc.Dilations = dilations.data(); - dmlSpecificOperatorDesc.StartPadding = startPadding.data(); - dmlSpecificOperatorDesc.EndPadding = endPadding.data(); - dmlSpecificOperatorDesc.OutputPadding = outputPadding.data(); - dmlSpecificOperatorDesc.GroupCount = static_cast(options->groups); - dmlSpecificOperatorDesc.FusedActivation = fusedActivation; - - DML_OPERATOR_DESC dmlOperatorDesc = {}; - dmlOperatorDesc.Type = DML_OPERATOR_CONVOLUTION; - dmlOperatorDesc.Desc = &dmlSpecificOperatorDesc; - WEBNN_CHECK(mDevice->CreateOperator(&dmlOperatorDesc, IID_PPV_ARGS(&dmlOperator))); - - auto outputEdge = CreateEdgeFromThisNode(outputTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, inputEdges, dmlOperator); - - // Transpose output from nchw->nhwc. - if (options->inputLayout == wnn::InputOperandLayout::Nhwc) { - if (TransposeOutputToNhwc(outputEdge, newOutputDims).IsError()) { - return DAWN_INTERNAL_ERROR("Failed to transpose output from Nchw to Nhwc."); - }; - } - - if (EmulateFusedOperator(options->activation, outputEdge, outputDims).IsError()) { - return DAWN_INTERNAL_ERROR("Failed to emulate fused operator."); - } - mGraphEdgesMap[convTranspose2d->PrimaryOutput()] = outputEdge; - return {}; - } - - MaybeError Graph::AddGru(const op::Gru* gru) { - const auto inputsOperand = gru->Inputs(); - DAWN_ASSERT(inputsOperand.size() >= 3 && inputsOperand.size() <= 6); - DAWN_ASSERT(mGraphEdgesMap.find(inputsOperand[0].Get()) != mGraphEdgesMap.end()); - DAWN_ASSERT(mGraphEdgesMap.find(inputsOperand[1].Get()) != mGraphEdgesMap.end()); - DAWN_ASSERT(mGraphEdgesMap.find(inputsOperand[2].Get()) != mGraphEdgesMap.end()); - std::vector> inputEdges; - - // Input: 4D tensor with the Sizes of { 1, seq_length, batch_size, input_size }. - // Need to reshape input from WebNN 3-D to DML 4-D. - auto inputEdge = mGraphEdgesMap[inputsOperand[0].Get()]; - auto webnnInputDims = ConvertDimensions(inputsOperand[0].Get()->Shape()); - std::vector inputDims = {1, webnnInputDims[0], webnnInputDims[1], webnnInputDims[2]}; - std::shared_ptr inputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, inputDmlTensorDesc, &inputEdge->outputTensorDESC, - inputDims)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC inputTensorDesc = {DML_TENSOR_TYPE_BUFFER, &inputDmlTensorDesc->bufferDesc}; - inputEdges.push_back(inputEdge); - - // Weight: 4D tensor with the Sizes of { 1, num_directions, 3 * hidden_size, input_size }. - // Need to reshape weight from WebNN 3-D to DML 4-D. - // The TENSOR_FLAGS of weight, bias and hiddenInit in gru must be DML_TENSOR_FLAG_NONE. - ComPtr dmlOperator; - auto constantWeightEdge = mGraphEdgesMap[inputsOperand[1].Get()]; - auto webnnWeightDims = ConvertDimensions(inputsOperand[1].Get()->Shape()); - std::vector weightDims = {1, webnnWeightDims[0], webnnWeightDims[1], - webnnWeightDims[2]}; - std::shared_ptr constantWeightDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, constantWeightDmlTensorDesc, - &constantWeightEdge->outputTensorDESC, weightDims)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - // Workaround: append identity to convert constant input tensor with - // DML_TENSOR_FLAG_OWNED_BY_DML falg to input tenor with DML_TENSOR_FLAG_NONE flag. - DML_TENSOR_DESC constantWeightTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &constantWeightDmlTensorDesc->bufferDesc}; - std::shared_ptr weightDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, weightDmlTensorDesc, &constantWeightTensorDesc, - {}, {}, true)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC weightTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &weightDmlTensorDesc->bufferDesc}; - AppendIdentity(constantWeightTensorDesc, weightTensorDesc, dmlOperator); - auto weightEdge = CreateEdgeFromThisNode(weightTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, {constantWeightEdge}, dmlOperator); - inputEdges.push_back(weightEdge); - - // Recurrence: 4D tensor with the Sizes { 1, num_directions, 3 * hidden_size, hidden_size }. - // Need to reshape recurrence from WebNN 3-D to DML 4-D. - // Need to convert tensor flag to NONE. - auto constantRecurrenceEdge = mGraphEdgesMap[inputsOperand[2].Get()]; - auto webnnRecurrenceDims = ConvertDimensions(inputsOperand[2].Get()->Shape()); - std::vector recurrenceDims = {1, webnnRecurrenceDims[0], webnnRecurrenceDims[1], - webnnRecurrenceDims[2]}; - std::shared_ptr constantRecurrenceDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, constantRecurrenceDmlTensorDesc, - &constantRecurrenceEdge->outputTensorDESC, recurrenceDims)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC constantRecurrenceTensorDesc = { - DML_TENSOR_TYPE_BUFFER, &constantRecurrenceDmlTensorDesc->bufferDesc}; - std::shared_ptr recurrenceDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, recurrenceDmlTensorDesc, - &constantRecurrenceTensorDesc, {}, {}, true)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC recurrenceTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &recurrenceDmlTensorDesc->bufferDesc}; - AppendIdentity(constantRecurrenceTensorDesc, recurrenceTensorDesc, dmlOperator); - auto recurrenceEdge = CreateEdgeFromThisNode(recurrenceTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, {constantRecurrenceEdge}, dmlOperator); - inputEdges.push_back(recurrenceEdge); - - const GruOptions* options = gru->GetOptions(); - UINT operandIndex = 3; - - // Bias: 4D tensor with the Sizes of { 1, 1, num_directions, 6 * hidden_size }. - // Need to concat bias tensor and recurrentBias tensor. - // Need to reshape bias from WebNN 2-D to DML 4-D. - std::vector webnnBiasDims = {weightDims[1], - weightDims[2]}; // { num_directions, 3 * hidden_size } - uint32_t webnnBiasLength = SizeOfShape(webnnBiasDims); - std::vector biasConstantData(webnnBiasLength, 0); - std::shared_ptr webnnBiasEdge; - DML_TENSOR_DESC webnnBiasTensorDesc = {}; - if (options->bias != nullptr) { - DAWN_ASSERT(mGraphEdgesMap.find(inputsOperand[operandIndex].Get()) != - mGraphEdgesMap.end()); - webnnBiasEdge = mGraphEdgesMap[inputsOperand[operandIndex].Get()]; - webnnBiasTensorDesc = webnnBiasEdge->outputTensorDESC; - operandIndex++; - } else { - if (CreateConstantInput(webnnBiasTensorDesc, biasConstantData.data(), - webnnBiasLength * sizeof(float), webnnBiasDims, {}, - DML_TENSOR_DATA_TYPE_FLOAT32) - .IsError()) { - return DAWN_INTERNAL_ERROR("Failed to create a constant bias tensor."); - }; - webnnBiasEdge = mInputs.back(); - } - std::shared_ptr webnnRecurrentBiasEdge; - DML_TENSOR_DESC webnnRecurrentBiasTensorDesc = {}; - if (options->recurrentBias != nullptr) { - DAWN_ASSERT(mGraphEdgesMap.find(inputsOperand[operandIndex].Get()) != - mGraphEdgesMap.end()); - webnnRecurrentBiasEdge = mGraphEdgesMap[inputsOperand[operandIndex].Get()]; - webnnRecurrentBiasTensorDesc = webnnRecurrentBiasEdge->outputTensorDESC; - operandIndex++; - } else { - if (CreateConstantInput(webnnRecurrentBiasTensorDesc, biasConstantData.data(), - webnnBiasLength * sizeof(float), webnnBiasDims, {}, - DML_TENSOR_DATA_TYPE_FLOAT32) - .IsError()) { - return DAWN_INTERNAL_ERROR("Failed to create a constant bias tensor."); - }; - webnnRecurrentBiasEdge = mInputs.back(); - } - // Concat - std::vector joinInputTensorDescs = {webnnBiasTensorDesc, - webnnRecurrentBiasTensorDesc}; - std::shared_ptr joinOutputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, joinOutputDmlTensorDesc, - &webnnBiasEdge->outputTensorDESC, - {webnnBiasDims[0], webnnBiasDims[1] * 2}, {}, true)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC joinOutputTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &joinOutputDmlTensorDesc->bufferDesc}; - DML_JOIN_OPERATOR_DESC joinDesc = {}; - joinDesc.Axis = 1; - joinDesc.InputCount = static_cast(joinInputTensorDescs.size()); - joinDesc.InputTensors = joinInputTensorDescs.data(); - joinDesc.OutputTensor = &joinOutputTensorDesc; - DML_OPERATOR_DESC dmlJoinDesc = {}; - dmlJoinDesc.Type = DML_OPERATOR_JOIN; - dmlJoinDesc.Desc = &joinDesc; - WEBNN_CHECK(mDevice->CreateOperator(&dmlJoinDesc, IID_PPV_ARGS(&dmlOperator))); - auto biasEdge = CreateEdgeFromThisNode(joinOutputTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, {webnnBiasEdge, webnnRecurrentBiasEdge}, - dmlOperator); - // Reshape - std::vector biasDims = {1, 1, webnnBiasDims[0], - webnnBiasDims[1] * 2}; // { num_directions, 6 * hidden_size } - std::shared_ptr biasDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, biasDmlTensorDesc, &biasEdge->outputTensorDESC, - biasDims)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC biasTensorDesc = {DML_TENSOR_TYPE_BUFFER, &biasDmlTensorDesc->bufferDesc}; - inputEdges.push_back(biasEdge); - - // HiddenInit: 4D tensor with the Sizes of { 1, num_directions, batch_size, hidden_size }. - // Need to reshape hiddenInit from WebNN 3-D to DML 4-D. - // Need to convert tensor flag to NONE. - DML_TENSOR_DESC hiddenInitTensorDesc = {}; - DML_TENSOR_DESC* hiddenInitTensorDescPtr = nullptr; - if (options->initialHiddenState != nullptr) { - DAWN_ASSERT(mGraphEdgesMap.find(inputsOperand[operandIndex].Get()) != - mGraphEdgesMap.end()); - auto constantHiddenInitEdge = mGraphEdgesMap[inputsOperand[operandIndex].Get()]; - auto webnnHiddenInitDims = - ConvertDimensions(inputsOperand[operandIndex].Get()->Shape()); - std::vector hiddenInitDims = {1, webnnHiddenInitDims[0], webnnHiddenInitDims[1], - webnnHiddenInitDims[2]}; - std::shared_ptr constantHiddenInitDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, constantHiddenInitDmlTensorDesc, - &constantHiddenInitEdge->outputTensorDESC, hiddenInitDims)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC constantHiddenInitTensorDesc = { - DML_TENSOR_TYPE_BUFFER, &constantHiddenInitDmlTensorDesc->bufferDesc}; - - std::shared_ptr hiddenInitDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, hiddenInitDmlTensorDesc, - &constantHiddenInitTensorDesc, {}, {}, true)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - hiddenInitTensorDesc = {DML_TENSOR_TYPE_BUFFER, &hiddenInitDmlTensorDesc->bufferDesc}; - AppendIdentity(constantHiddenInitTensorDesc, hiddenInitTensorDesc, dmlOperator); - hiddenInitTensorDescPtr = &hiddenInitTensorDesc; - auto hiddenInitEdge = - CreateEdgeFromThisNode(hiddenInitTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, {constantHiddenInitEdge}, dmlOperator); - inputEdges.push_back(hiddenInitEdge); - } - - // Outputs Tensor - DML_TENSOR_DESC outputSequenceTensorDesc = {}; - DML_TENSOR_DESC* outputSequenceTensorDescPtr = nullptr; - if (options->returnSequence) { - std::vector outputSequenceSizes(4); - outputSequenceSizes[0] = inputDims[1]; // SequenceLength - outputSequenceSizes[1] = recurrenceDims[1]; // NumDirections - outputSequenceSizes[2] = inputDims[2]; // BatchSize - outputSequenceSizes[3] = recurrenceDims[3]; // HiddenSize - std::shared_ptr outputSequenceDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputSequenceDmlTensorDesc, - outputSequenceSizes)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - outputSequenceTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &outputSequenceDmlTensorDesc->bufferDesc}; - outputSequenceTensorDescPtr = &outputSequenceTensorDesc; - } - std::shared_ptr outputSingleDmlTensorDesc(new DmlTensorDesc); - std::vector outputSingleSizes(4); - outputSingleSizes[0] = 1; - outputSingleSizes[1] = recurrenceDims[1]; - outputSingleSizes[2] = inputDims[2]; - outputSingleSizes[3] = recurrenceDims[3]; - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputSingleDmlTensorDesc, outputSingleSizes)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC outputSingleTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &outputSingleDmlTensorDesc->bufferDesc}; - - // Attributes - DML_RECURRENT_NETWORK_DIRECTION direction = - getRecurrentSequenceDirection(options->direction); - DML_ACTIVATION_LINEAR_OPERATOR_DESC fActicationOperatorDesc{}, gActicationOperatorDesc{}; - DML_OPERATOR_DESC fFusedOperatorDesc = {}, gFusedOperatorDesc = {}, *fActivation, - *gActivation; - if (options->activations == nullptr) { - fActivation = CreateFusedOperator(FusionType::Sigmoid, fActicationOperatorDesc, - fFusedOperatorDesc); - gActivation = - CreateFusedOperator(FusionType::Tanh, gActicationOperatorDesc, gFusedOperatorDesc); - } else { - fActivation = CreateFusedOperator(options->activations->Get(0), fActicationOperatorDesc, - fFusedOperatorDesc); - gActivation = CreateFusedOperator(options->activations->Get(1), gActicationOperatorDesc, - gFusedOperatorDesc); - } - UINT activationDescCount; - std::vector activations; - if (direction == DML_RECURRENT_NETWORK_DIRECTION_BIDIRECTIONAL) { - activationDescCount = 4; - activations = {*fActivation, *gActivation, *fActivation, *gActivation}; - } else { - activationDescCount = 2; - activations = {*fActivation, *gActivation}; - } - bool linearBeforeReset = options->resetAfter; - - DML_GRU_OPERATOR_DESC desc = {}; - desc.InputTensor = &inputTensorDesc; - desc.WeightTensor = &weightTensorDesc; - desc.RecurrenceTensor = &recurrenceTensorDesc; - desc.BiasTensor = &biasTensorDesc; - desc.HiddenInitTensor = hiddenInitTensorDescPtr; - desc.SequenceLengthsTensor = nullptr; - desc.OutputSequenceTensor = outputSequenceTensorDescPtr; - desc.OutputSingleTensor = &outputSingleTensorDesc; - desc.ActivationDescCount = activationDescCount; - desc.ActivationDescs = activations.data(); - desc.Direction = direction; - desc.LinearBeforeReset = linearBeforeReset; - - DML_OPERATOR_DESC dmlGruDesc = {}; - dmlGruDesc.Type = DML_OPERATOR_GRU; - dmlGruDesc.Desc = &desc; - WEBNN_CHECK(mDevice->CreateOperator(&dmlGruDesc, IID_PPV_ARGS(&dmlOperator))); - auto outputSingleEdge = - CreateEdgeFromThisNode(outputSingleTensorDesc, mGraphDesc.NodeCount(), 1); - auto webnnOutputSingleDims = ConvertDimensions(gru->Outputs()[0].Get()->Shape()); - std::shared_ptr webnnOutputSingleDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, webnnOutputSingleDmlTensorDesc, - &outputSingleEdge->outputTensorDESC, webnnOutputSingleDims)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC webnnOutputSingleTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &webnnOutputSingleDmlTensorDesc->bufferDesc}; - mGraphEdgesMap[gru->PrimaryOutput()] = - updateEdge(outputSingleEdge, webnnOutputSingleTensorDesc); - if (options->returnSequence) { - auto outputSequenceEdge = - CreateEdgeFromThisNode(outputSequenceTensorDesc, mGraphDesc.NodeCount(), 0); - mGraphEdgesMap[gru->Outputs()[1].Get()] = outputSequenceEdge; - } - AddNodeAndEdgesToGraphDesc(mGraphDesc, inputEdges, dmlOperator); - return {}; - } - -#define CREATE_REDUCE_OPERATOR(type, inputTensorDesc, outputTensorDesc, axes, dmlOperator) \ - DML_REDUCE_OPERATOR_DESC desc = {}; \ - desc.Function = DML_REDUCE_FUNCTION_##type; \ - desc.InputTensor = &inputTensorDesc; \ - desc.OutputTensor = &outputTensorDesc; \ - desc.AxisCount = static_cast(axes.size()); \ - desc.Axes = axes.data(); \ - DML_OPERATOR_DESC dmlOperatorDesc = {}; \ - dmlOperatorDesc.Type = DML_OPERATOR_REDUCE; \ - dmlOperatorDesc.Desc = &desc; \ - WEBNN_CHECK(mDevice->CreateOperator(&dmlOperatorDesc, IID_PPV_ARGS(&dmlOperator))); - - MaybeError Graph::AddReduce(const op::Reduce* reduce) { - DAWN_ASSERT(reduce->Inputs().size() == 1); - const OperandBase* inputOperand = reduce->Inputs()[0].Get(); - DAWN_ASSERT(mGraphEdgesMap.find(inputOperand) != mGraphEdgesMap.end()); - - auto inputEdge = mGraphEdgesMap[inputOperand]; - const ReduceOptions* options = reduce->GetOptions(); - std::vector axes; - auto inputDims = ConvertDimensions(inputOperand->Shape()); - auto outputDims = ConvertDimensions(reduce->Outputs()[0].Get()->Shape()); - - auto inputTensorDesc = inputEdge->outputTensorDESC; - auto reducedDims = inputDims; - for (size_t i = 0; i < options->axesCount; ++i) { - // Axes values must be in the range [0, InputTensor.DimensionCount - 1]. - // The dimensions to reduce where -1 means the last dimension. - uint32_t axis = options->axes[i] == -1 ? inputDims.size() - 1 : options->axes[i]; - axes.push_back(axis); - reducedDims[axis] = 1; - } - std::shared_ptr outputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputDmlTensorDesc, &inputTensorDesc, - reducedDims, {}, true)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC outputTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &outputDmlTensorDesc->bufferDesc}; - - ComPtr dmlOperator; - switch (reduce->GetType()) { - case op::ReduceType::kReduceL1: { - CREATE_REDUCE_OPERATOR(L1, inputTensorDesc, outputTensorDesc, axes, dmlOperator) - } break; - case op::ReduceType::kReduceL2: { - CREATE_REDUCE_OPERATOR(L2, inputTensorDesc, outputTensorDesc, axes, dmlOperator) - } break; - case op::ReduceType::kReduceMax: { - CREATE_REDUCE_OPERATOR(MAX, inputTensorDesc, outputTensorDesc, axes, dmlOperator) - } break; - case op::ReduceType::kReduceMean: { - CREATE_REDUCE_OPERATOR(AVERAGE, inputTensorDesc, outputTensorDesc, axes, - dmlOperator) - } break; - case op::ReduceType::kReduceMin: { - CREATE_REDUCE_OPERATOR(MIN, inputTensorDesc, outputTensorDesc, axes, dmlOperator) - } break; - case op::ReduceType::kReduceProduct: { - CREATE_REDUCE_OPERATOR(MULTIPLY, inputTensorDesc, outputTensorDesc, axes, - dmlOperator) - } break; - case op::ReduceType::kReduceSum: { - CREATE_REDUCE_OPERATOR(SUM, inputTensorDesc, outputTensorDesc, axes, dmlOperator) - } break; - default: - return DAWN_INTERNAL_ERROR("The reduce op type isn't supported."); - } - - auto outputEdge = CreateEdgeFromThisNode(outputTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, {inputEdge}, dmlOperator); - - // Reshape if dimensions needn't be kept. Output edge has been updated with new output - // dims. - if (!options->keepDimensions) { - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputDmlTensorDesc, - &outputEdge->outputTensorDESC, outputDims)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - } - - mGraphEdgesMap[reduce->PrimaryOutput()] = outputEdge; - return {}; - } - - MaybeError Graph::AddResample2d(const op::Resample2d* resample2d) { - DAWN_ASSERT(resample2d->Inputs().size() == 1); - const OperandBase* inputOperand = resample2d->Inputs()[0].Get(); - DAWN_ASSERT(mGraphEdgesMap.find(inputOperand) != mGraphEdgesMap.end()); - - auto inputEdge = mGraphEdgesMap[inputOperand]; - auto inputDims = ConvertDimensions(inputOperand->Shape()); - auto outputDims = ConvertDimensions(resample2d->Outputs()[0].Get()->Shape()); - - auto inputTensorDesc = inputEdge->outputTensorDESC; - std::shared_ptr outputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputDmlTensorDesc, &inputTensorDesc, - outputDims)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC outputTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &outputDmlTensorDesc->bufferDesc}; - - const Resample2dOptions* options = resample2d->GetOptions(); - DML_INTERPOLATION_MODE mode; - switch (options->mode) { - case wnn::InterpolationMode::NearestNeighbor: - mode = DML_INTERPOLATION_MODE_NEAREST_NEIGHBOR; - break; - case wnn::InterpolationMode::Linear: - mode = DML_INTERPOLATION_MODE_LINEAR; - break; - default: - DAWN_ASSERT(0); - break; - } - - // Scales is computed by dividing the output sizes by the input sizes. - // InputPixelOffsets = 0.5f for each dimension. - // OutputPixelOffsets = -0.5f for each dimension. - std::vector scales; - for (size_t i = 0; i < inputDims.size(); ++i) { - scales.push_back(outputDims[i] / inputDims[i]); - } - std::vector inputPixelOffsets(4, 0.5), outputPixelOffsets(4, -0.5); - - DML_RESAMPLE1_OPERATOR_DESC desc = {}; - desc.InputTensor = &inputTensorDesc; - desc.OutputTensor = &outputTensorDesc; - desc.InterpolationMode = mode; - desc.DimensionCount = 4; - desc.Scales = scales.data(); - desc.InputPixelOffsets = inputPixelOffsets.data(); - desc.OutputPixelOffsets = outputPixelOffsets.data(); - DML_OPERATOR_DESC dmlOperatorDesc = {}; - dmlOperatorDesc.Type = DML_OPERATOR_RESAMPLE1; - dmlOperatorDesc.Desc = &desc; - - ComPtr dmlOperator; - WEBNN_CHECK(mDevice->CreateOperator(&dmlOperatorDesc, IID_PPV_ARGS(&dmlOperator))); - - mGraphEdgesMap[resample2d->PrimaryOutput()] = - CreateEdgeFromThisNode(outputTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, {inputEdge}, dmlOperator); - return {}; - } - -#define SLICE_ONE_AXIS(axis, index) \ - inputWindowOffsets[axis] = \ - starts[index] < 0 ? (starts[index] + inputDims[axis]) : starts[index]; \ - inputWindowSizes[axis] = \ - sizes[index] == -1 ? (inputDims[axis] - inputWindowOffsets[axis]) : sizes[index]; \ - do { \ - } while (0) - - MaybeError Graph::AddSlice(const op::Slice* slice) { - DAWN_ASSERT(slice->Inputs().size() == 1); - const OperandBase* inputOperand = slice->Inputs()[0].Get(); - DAWN_ASSERT(mGraphEdgesMap.find(inputOperand) != mGraphEdgesMap.end()); - - auto inputEdge = mGraphEdgesMap[inputOperand]; - auto inputDims = ConvertDimensions(inputOperand->Shape()); - auto outputDims = ConvertDimensions(slice->Outputs()[0].Get()->Shape()); - - auto inputTensorDesc = inputEdge->outputTensorDESC; - std::shared_ptr outputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputDmlTensorDesc, &inputTensorDesc, outputDims, - {}, true)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC outputTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &outputDmlTensorDesc->bufferDesc}; - - std::vector inputWindowOffsets(inputDims.size(), 0); - std::vector inputWindowSizes(inputDims); - auto starts = slice->GetStarts(); - auto axes = slice->GetAxes(); - auto sizes = slice->GetSizes(); - if (axes.empty()) { - for (size_t i = 0; i < inputDims.size(); ++i) { - SLICE_ONE_AXIS(i, i); - } - } else { - for (size_t i = 0; i < axes.size(); ++i) { - if (axes[i] < 0) { - axes[i] = inputDims.size() + axes[i]; - } - SLICE_ONE_AXIS(axes[i], i); - } - } - std::vector inputWindowStrides(inputDims.size(), 1); - - DML_SLICE1_OPERATOR_DESC desc = {}; - desc.InputTensor = &inputTensorDesc; - desc.OutputTensor = &outputTensorDesc; - desc.DimensionCount = static_cast(inputDims.size()); - desc.InputWindowOffsets = inputWindowOffsets.data(); - desc.InputWindowSizes = inputWindowSizes.data(); - desc.InputWindowStrides = inputWindowStrides.data(); - DML_OPERATOR_DESC dmlOperatorDesc = {}; - dmlOperatorDesc.Type = DML_OPERATOR_SLICE1; - dmlOperatorDesc.Desc = &desc; - - ComPtr dmlOperator; - WEBNN_CHECK(mDevice->CreateOperator(&dmlOperatorDesc, IID_PPV_ARGS(&dmlOperator))); - - mGraphEdgesMap[slice->PrimaryOutput()] = - CreateEdgeFromThisNode(outputTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, {inputEdge}, dmlOperator); - return {}; - } - - MaybeError Graph::AddSqueeze(const op::Squeeze* squeeze) { - DAWN_ASSERT(squeeze->Inputs().size() == 1); - const OperandBase* inputOperand = squeeze->Inputs()[0].Get(); - DAWN_ASSERT(mGraphEdgesMap.find(inputOperand) != mGraphEdgesMap.end()); - - auto inputEdge = mGraphEdgesMap[inputOperand]; - auto outputDims = ConvertDimensions(squeeze->Outputs()[0].Get()->Shape()); - // Squeeze perform like reshape which needn't new strides, because the layout has not - // been changed. - std::shared_ptr outputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputDmlTensorDesc, &inputEdge->outputTensorDESC, - outputDims)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC outputTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &outputDmlTensorDesc->bufferDesc}; - // Squeeze is not a real node in DML, just need to update the edge created from it. - mGraphEdgesMap[squeeze->PrimaryOutput()] = updateEdge(inputEdge, outputTensorDesc); - return {}; - } - - MaybeError Graph::AddInstanceNorm(const op::InstanceNorm* instanceNorm) { - auto inputs = instanceNorm->Inputs(); - DAWN_ASSERT(inputs.size() == 1 || inputs.size() == 2 || inputs.size() == 3); - DAWN_ASSERT(mGraphEdgesMap.find(instanceNorm->Inputs()[0].Get()) != mGraphEdgesMap.end()); - auto inputEdge = mGraphEdgesMap[instanceNorm->Inputs()[0].Get()]; - auto inputDims = ConvertDimensions(inputs[0].Get()->Shape()); - auto outputDims = ConvertDimensions(instanceNorm->Outputs()[0].Get()->Shape()); - std::vector newInputDims = inputDims, newOutputDims = outputDims, newInputStrides; - const InstanceNormOptions* options = instanceNorm->GetOptions(); - - DML_TENSOR_DESC inputTensorDesc = inputEdge->outputTensorDESC; - if (options->layout == wnn::InputOperandLayout::Nhwc) { - newInputDims = transposeDimensions(NhwcToNchw, inputDims); - newOutputDims = transposeDimensions(NhwcToNchw, outputDims); - newInputStrides = transposeStridesToNchw(inputDims, inputTensorDesc); - - std::shared_ptr inputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, inputDmlTensorDesc, - &inputEdge->outputTensorDESC, newInputDims, newInputStrides)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - inputTensorDesc = {DML_TENSOR_TYPE_BUFFER, &inputDmlTensorDesc->bufferDesc}; - } - - std::shared_ptr outputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputDmlTensorDesc, &inputEdge->outputTensorDESC, - newOutputDims, {}, true)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC outputTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &outputDmlTensorDesc->bufferDesc}; - - std::vector tensorsDesc; - std::vector> edges; - // Reshape 1D scale, bias to 4D with setting 1 to automatically broadcast. - for (size_t i = 1; i < inputs.size(); ++i) { - DAWN_ASSERT(mGraphEdgesMap.find(instanceNorm->Inputs()[i].Get()) != - mGraphEdgesMap.end()); - auto edge = mGraphEdgesMap[inputs[i].Get()]; - auto dims = ConvertDimensions(inputs[i].Get()->Shape()); - DAWN_ASSERT(dims.size() == 1); - if (dims[0] != newInputDims[1]) { - return DAWN_INTERNAL_ERROR( - "The 1-D tensor of the values whose length size is not equal to the size " - "of " - "feature dimension of the input "); - } - // This tensor's dimensions should be {BatchCount, ChannelCount, Height, Width}. - // Set 1 to automatically broadcast those dimensions across the input. - std::vector expandDims(4, 1); - expandDims[1] = dims[0]; - std::shared_ptr dmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, dmlTensorDesc, &edge->outputTensorDESC, - expandDims)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC tensorDesc = {DML_TENSOR_TYPE_BUFFER, &dmlTensorDesc->bufferDesc}; - tensorsDesc.push_back(tensorDesc); - edges.push_back(updateEdge(edge, tensorDesc)); - } - - // Set tensor's dimensions to {1, channel, 1, 1} if scale or bias is null. - DML_TENSOR_DESC constantTensorDesc; - if (options->scale == nullptr) { - std::vector scale(newInputDims[1], 1.0); - std::vector scaleDims = {1, newInputDims[1], 1, 1}; - auto index = mInputs.size() - 1; - // Create a constant scale. - if (CreateConstantInput(constantTensorDesc, scale.data(), - newInputDims[1] * sizeof(float), scaleDims, {}, - DML_TENSOR_DATA_TYPE_FLOAT32) - .IsError()) { - return DAWN_INTERNAL_ERROR("Failed to create a constant input tensor."); - }; - tensorsDesc.insert(tensorsDesc.begin(), constantTensorDesc); - edges.insert(edges.begin(), mInputs[index + 1]); - } - - if (options->bias == nullptr) { - std::vector bias(newInputDims[1], 0.0); - std::vector biasDims = {1, newInputDims[1], 1, 1}; - // Create a constant scale. - if (CreateConstantInput(constantTensorDesc, bias.data(), - newInputDims[1] * sizeof(float), biasDims, {}, - DML_TENSOR_DATA_TYPE_FLOAT32) - .IsError()) { - return DAWN_INTERNAL_ERROR("Failed to create a constant input tensor."); - }; - tensorsDesc.push_back(constantTensorDesc); - edges.push_back(mInputs.back()); - } - - std::vector axes({2, 3}); - - DML_MEAN_VARIANCE_NORMALIZATION1_OPERATOR_DESC desc = {}; - desc.InputTensor = &inputTensorDesc; - desc.ScaleTensor = &tensorsDesc[0]; - desc.BiasTensor = &tensorsDesc[1]; - desc.OutputTensor = &outputTensorDesc; - desc.AxisCount = static_cast(axes.size()); - desc.Axes = axes.data(); - desc.NormalizeVariance = true; - desc.Epsilon = options->epsilon; - DML_OPERATOR_DESC dmlOperatorDesc = {}; - dmlOperatorDesc.Type = DML_OPERATOR_MEAN_VARIANCE_NORMALIZATION1; - dmlOperatorDesc.Desc = &desc; - - ComPtr dmlOperator; - WEBNN_CHECK(mDevice->CreateOperator(&dmlOperatorDesc, IID_PPV_ARGS(&dmlOperator))); - - auto outputEdge = CreateEdgeFromThisNode(outputTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, {inputEdge, edges[0], edges[1]}, dmlOperator); - - // Transpose output from nchw->nhwc. - if (options->layout == wnn::InputOperandLayout::Nhwc) { - if (TransposeOutputToNhwc(outputEdge, newOutputDims).IsError()) { - return DAWN_INTERNAL_ERROR("Failed to transpose output from Nchw to Nhwc."); - }; - } - - mGraphEdgesMap[instanceNorm->PrimaryOutput()] = outputEdge; - // The input edges order should match the operator's inputs order. - return {}; - } - - MaybeError Graph::AddConcat(const op::Concat* concat) { - DAWN_ASSERT(concat->Inputs().size() >= 1); - auto inputsOperand = concat->Inputs(); - std::vector> inputEdges; - std::shared_ptr primaryEdge = mGraphEdgesMap[inputsOperand[0].Get()]; - auto primaryDims = ConvertDimensions(inputsOperand[0].Get()->Shape()); - - std::vector inputTensorsDesc; - for (auto& inputOperand : inputsOperand) { - DAWN_ASSERT(mGraphEdgesMap.find(inputOperand.Get()) != mGraphEdgesMap.end()); - auto inputEdge = mGraphEdgesMap[inputOperand.Get()]; - auto inputDims = ConvertDimensions(inputOperand.Get()->Shape()); - inputEdges.push_back(inputEdge); - - // Expand dimensions to DML_TENSOR_DIMENSION_COUNT_MAX if needed. - if (inputDims.size() < DML_TENSOR_DIMENSION_COUNT_MAX) { - auto newInputDims = ExpandDimensions(inputDims, DML_TENSOR_DIMENSION_COUNT_MAX); - auto newInputStrides = CalculateStridesForBroadcast(inputDims, newInputDims, - inputEdge->outputTensorDESC); - std::shared_ptr inputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, inputDmlTensorDesc, - &inputEdge->outputTensorDESC, newInputDims, - newInputStrides)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - inputTensorsDesc.push_back( - {DML_TENSOR_TYPE_BUFFER, &inputDmlTensorDesc->bufferDesc}); - } else if (inputDims.size() == DML_TENSOR_DIMENSION_COUNT_MAX) { - inputTensorsDesc.push_back(inputEdge->outputTensorDESC); - } else { - return DAWN_INTERNAL_ERROR("The size of input dimensions is greater than max"); - } - } - - auto outputDims = ConvertDimensions(concat->Outputs()[0].Get()->Shape()); - auto newOutputDims = outputDims; - if (outputDims.size() < DML_TENSOR_DIMENSION_COUNT_MAX) { - newOutputDims = ExpandDimensions(outputDims, DML_TENSOR_DIMENSION_COUNT_MAX); - } - - std::shared_ptr outputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputDmlTensorDesc, - &primaryEdge->outputTensorDESC, newOutputDims, {}, true)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC outputTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &outputDmlTensorDesc->bufferDesc}; - - // Update the axis to align with the DML_TENSOR_DIMENSION_COUNT_MAX. - uint32_t axis = concat->GetAxis(); - axis += DML_TENSOR_DIMENSION_COUNT_MAX - primaryDims.size(); - - DML_JOIN_OPERATOR_DESC desc = {}; - desc.Axis = axis; - desc.InputCount = static_cast(inputTensorsDesc.size()); - desc.InputTensors = inputTensorsDesc.data(); - desc.OutputTensor = &outputTensorDesc; - DML_OPERATOR_DESC dmlOperatorDesc = {}; - dmlOperatorDesc.Type = DML_OPERATOR_JOIN; - dmlOperatorDesc.Desc = &desc; - - ComPtr dmlOperator; - WEBNN_CHECK(mDevice->CreateOperator(&dmlOperatorDesc, IID_PPV_ARGS(&dmlOperator))); - - mGraphEdgesMap[concat->PrimaryOutput()] = - CreateEdgeFromThisNode(outputTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, {inputEdges}, dmlOperator); - - // Reshape back according to output rank if needed to update the output edge. - if (outputDims.size() < newOutputDims.size()) { - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputDmlTensorDesc, - &primaryEdge->outputTensorDESC, outputDims)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - } - return {}; - } - - MaybeError Graph::AddGemm(const op::Gemm* gemm) { - auto inputsOperand = gemm->Inputs(); - DAWN_ASSERT(inputsOperand.size() == 2 || inputsOperand.size() == 3); - DAWN_ASSERT(mGraphEdgesMap.find(inputsOperand[0].Get()) != mGraphEdgesMap.end()); - auto aEdge = mGraphEdgesMap[inputsOperand[0].Get()]; - auto aDims = ConvertDimensions(inputsOperand[0].Get()->Shape()); - DAWN_ASSERT(mGraphEdgesMap.find(inputsOperand[1].Get()) != mGraphEdgesMap.end()); - auto bEdge = mGraphEdgesMap[inputsOperand[1].Get()]; - auto bDims = ConvertDimensions(inputsOperand[1].Get()->Shape()); - auto outputDims = ConvertDimensions(gemm->Outputs()[0].Get()->Shape()); - std::vector> inputEdges = {aEdge, bEdge}; - - // The shape of a tensor is 2D definited in WebNN Spec, but DML only support 4D, - // so expand dimensions to 4D. - DAWN_ASSERT(aDims.size() == 2); - aDims = ExpandDimensions(aDims, 4); - std::shared_ptr aDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, aDmlTensorDesc, &aEdge->outputTensorDESC, - aDims)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC aTensorDesc = {DML_TENSOR_TYPE_BUFFER, &aDmlTensorDesc->bufferDesc}; - - DAWN_ASSERT(bDims.size() == 2); - bDims = ExpandDimensions(bDims, 4); - std::shared_ptr bDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, bDmlTensorDesc, &bEdge->outputTensorDESC, - bDims)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC bTensorDesc = {DML_TENSOR_TYPE_BUFFER, &bDmlTensorDesc->bufferDesc}; - - DAWN_ASSERT(outputDims.size() == 2); - auto expandedOutputDims = ExpandDimensions(outputDims, 4); - std::shared_ptr outputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputDmlTensorDesc, &aEdge->outputTensorDESC, - expandedOutputDims, {}, true)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC outputTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &outputDmlTensorDesc->bufferDesc}; - - // The operand c is optional. - DML_TENSOR_DESC* cTensorDescPtr = nullptr; - DML_TENSOR_DESC cTensorDesc; - if (inputsOperand.size() == 3) { - DAWN_ASSERT(mGraphEdgesMap.find(inputsOperand[2].Get()) != mGraphEdgesMap.end()); - auto cEdge = mGraphEdgesMap[inputsOperand[2].Get()]; - auto cDims = ConvertDimensions(inputsOperand[2].Get()->Shape()); - // It is either a scalar, or of the shape that is unidirectionally broadcastable to - // the shape [M, N] definited in WebNN Spec, DML only support 4D, so broadCast the - // Shape of optional C to {1, 1, M, N } supported in DML. - auto cNewDims = expandedOutputDims; - auto cNewStrides = - CalculateStridesForBroadcast(cDims, cNewDims, cEdge->outputTensorDESC); - std::shared_ptr cDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, cDmlTensorDesc, &cEdge->outputTensorDESC, - cNewDims, cNewStrides)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - cTensorDesc = {DML_TENSOR_TYPE_BUFFER, &cDmlTensorDesc->bufferDesc}; - cTensorDescPtr = &cTensorDesc; - inputEdges.push_back(cEdge); - } - - const GemmOptions* options = gemm->GetOptions(); - DML_MATRIX_TRANSFORM aTranspose = gemm->GetOptions()->aTranspose - ? DML_MATRIX_TRANSFORM_TRANSPOSE - : DML_MATRIX_TRANSFORM_NONE; - DML_MATRIX_TRANSFORM bTranspose = gemm->GetOptions()->bTranspose - ? DML_MATRIX_TRANSFORM_TRANSPOSE - : DML_MATRIX_TRANSFORM_NONE; - DML_GEMM_OPERATOR_DESC desc{}; - desc.ATensor = &aTensorDesc; - desc.BTensor = &bTensorDesc; - desc.CTensor = cTensorDescPtr; - desc.OutputTensor = &outputTensorDesc; - desc.TransA = aTranspose; - desc.TransB = bTranspose; - desc.Alpha = options->alpha; - desc.Beta = options->beta; - DML_OPERATOR_DESC dmlOperatorDesc = {}; - dmlOperatorDesc.Type = DML_OPERATOR_GEMM; - dmlOperatorDesc.Desc = &desc; - - ComPtr dmlOperator; - WEBNN_CHECK(mDevice->CreateOperator(&dmlOperatorDesc, IID_PPV_ARGS(&dmlOperator))); - - mGraphEdgesMap[gemm->PrimaryOutput()] = - CreateEdgeFromThisNode(outputTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, {inputEdges}, dmlOperator); - - // Reshape back according to output rank if needed to update the output edge. - if (outputDims.size() < expandedOutputDims.size()) { - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputDmlTensorDesc, &aEdge->outputTensorDESC, - outputDims, {}, true)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - } - return {}; - } - - MaybeError Graph::AddOutput(std::string_view name, const OperandBase* output) { - DAWN_ASSERT(mGraphEdgesMap.find(output) != mGraphEdgesMap.end()); - auto outputEdge = mGraphEdgesMap[output]; - DAWN_ASSERT(outputEdge != nullptr); - - const DML_BUFFER_TENSOR_DESC* bufferDesc = - reinterpret_cast(outputEdge->outputTensorDESC.Desc); - DAWN_ASSERT(bufferDesc != nullptr); - auto strides = bufferDesc->Strides; - - // Append identity to avoid directly using graph input as output, and avoid lack of - // considering the impacts of strides if there are. - if (outputEdge->isInputEdge || strides != nullptr) { - auto edge = outputEdge; - auto inputTensorDesc = outputEdge->outputTensorDESC; - - std::shared_ptr outputDmlTensorDesc(new DmlTensorDesc); - if (!CreateDmlTensorDesc(mDmlTensorsDesc, outputDmlTensorDesc, &inputTensorDesc)) { - return DAWN_INTERNAL_ERROR("Failed to create DML tensor description."); - } - DML_TENSOR_DESC outputTensorDesc = {DML_TENSOR_TYPE_BUFFER, - &outputDmlTensorDesc->bufferDesc}; - - ComPtr dmlOperator; - AppendIdentity(inputTensorDesc, outputTensorDesc, dmlOperator); - outputEdge = CreateEdgeFromThisNode(outputTensorDesc, mGraphDesc.NodeCount()); - AddNodeAndEdgesToGraphDesc(mGraphDesc, {edge}, dmlOperator); - } - outputEdge->name = name; - std::unique_ptr outputEdgeDesc(new DML_OUTPUT_GRAPH_EDGE_DESC); - auto outputEdgeInfo = reinterpret_cast(outputEdge.get()); - outputEdgeDesc->FromNodeIndex = outputEdgeInfo->nodeIndex; - outputEdgeDesc->FromNodeOutputIndex = outputEdgeInfo->outputNodeIndex; - outputEdgeDesc->GraphOutputIndex = mOutputs.size(); - mGraphDesc.AddOutputEdge(outputEdgeDesc); - mOutputs.push_back(*outputEdgeInfo); - return {}; - } - - MaybeError Graph::Finish() { - if (mInputs.empty()) { - return DAWN_VALIDATION_ERROR("Model inputs must be set."); - } - return {}; - } - - void Graph::FillUploadResourceAndInputBindings( - uint64_t uploadResourceSize, - std::vector& inputBufferBinding, - std::unordered_map namedInputs) { - D3D12_RANGE uploadBufferRange{0, uploadResourceSize}; - int8_t* uploadBuffer; - WEBNN_CHECK(mCompiledGraph->uploadResource->Map(0, &uploadBufferRange, - reinterpret_cast(&uploadBuffer))); - uint64_t offset = 0; - for (size_t i = 0; i < mInputs.size(); ++i) { - auto input = mInputs[i]; - if (namedInputs.empty()) { - if (input->isConstantInput) { - offset = - RoundUpToMultiple(offset, (uint64_t)DML_MINIMUM_BUFFER_TENSOR_ALIGNMENT); - inputBufferBinding[i].Buffer = mCompiledGraph->inputResource.Get(); - inputBufferBinding[i].Offset = offset; - inputBufferBinding[i].SizeInBytes = input->byteLength; - memcpy(uploadBuffer + offset, input->buffer, - static_cast(input->byteLength)); - offset = offset + input->byteLength; - } - } else { - if (!input->isConstantInput) { - offset = - RoundUpToMultiple(offset, (uint64_t)DML_MINIMUM_BUFFER_TENSOR_ALIGNMENT); - auto arrayBufferView = namedInputs[input->name].resource.arrayBufferView; - inputBufferBinding[i].Buffer = mCompiledGraph->inputResource.Get(); - inputBufferBinding[i].Offset = offset; - inputBufferBinding[i].SizeInBytes = arrayBufferView.byteLength; - memcpy( - uploadBuffer + offset, - static_cast(arrayBufferView.buffer) + arrayBufferView.byteOffset, - arrayBufferView.byteLength); - offset = offset + arrayBufferView.byteLength; - } - } - } - mCompiledGraph->uploadResource->Unmap(0, nullptr); - } - - MaybeError Graph::CompileImpl() { - DML_GRAPH_DESC graphDesc = mGraphDesc.ConvertDmlGraphDesc(mInputs.size(), mOutputs.size()); - // Compiles a graph of DirectML operators into an object that can be dispatched to the - // GPU. - mCompiledGraph.reset( - new CompiledGraph(mD3D12Device, mDevice, mDevice1, graphDesc, DML_EXECUTION_FLAG_NONE)); - // Set the descriptor heap(s). - ID3D12DescriptorHeap* descriptorHeaps[] = {mCompiledGraph->descriptorHeap.Get()}; - mCommandList->SetDescriptorHeaps(ARRAYSIZE(descriptorHeaps), descriptorHeaps); - - mCompiledGraph->BindTemporaryResource(); - mCompiledGraph->BindPersistentResource(); - - // Initialize constant inputs. - uint64_t constantInputsResourceSize = 0; - for (auto& input : mInputs) { - if (input->isConstantInput) { - uint64_t offset = RoundUpToMultiple(constantInputsResourceSize, - (uint64_t)DML_MINIMUM_BUFFER_TENSOR_ALIGNMENT); - constantInputsResourceSize = offset + input->byteLength; - } else { - uint64_t offset = RoundUpToMultiple(mCompiledGraph->commonInputsResourceSize, - (uint64_t)DML_MINIMUM_BUFFER_TENSOR_ALIGNMENT); - mCompiledGraph->commonInputsResourceSize = offset + input->byteLength; - } - } - - if (constantInputsResourceSize) { - WEBNN_CHECK(mD3D12Device->CreateCommittedResource( - &CreateHeapProperties(D3D12_HEAP_TYPE_UPLOAD), D3D12_HEAP_FLAG_NONE, - &CreateResourceDesc(constantInputsResourceSize), D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, IID_PPV_ARGS(&mCompiledGraph->uploadResource))); - - WEBNN_CHECK(mD3D12Device->CreateCommittedResource( - &CreateHeapProperties(), D3D12_HEAP_FLAG_NONE, - &CreateResourceDesc(constantInputsResourceSize, - D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), - D3D12_RESOURCE_STATE_UNORDERED_ACCESS, nullptr, - IID_PPV_ARGS(&mCompiledGraph->inputResource))); - - std::vector inputBufferBinding(mInputs.size()); - FillUploadResourceAndInputBindings(constantInputsResourceSize, inputBufferBinding); - // Copy buffer from uploadResource to inputResource. - CopyBufferRegion(mCommandList, mCompiledGraph->uploadResource, - mCompiledGraph->inputResource, constantInputsResourceSize, - D3D12_RESOURCE_STATE_COPY_DEST); - - DML_BUFFER_ARRAY_BINDING inputBufferArrayBinding = {}; - inputBufferArrayBinding.BindingCount = inputBufferBinding.size(); - inputBufferArrayBinding.Bindings = inputBufferBinding.data(); - DML_BINDING_DESC inputBindingDesc{DML_BINDING_TYPE_BUFFER_ARRAY, - &inputBufferArrayBinding}; - mCompiledGraph->bindingTable->BindInputs(1, &inputBindingDesc); - } - - // Record execution of the operator initializer. - // The command recorder is a stateless object that records Dispatches into an existing - // Direct3D 12 command list. - WEBNN_CHECK(mDevice->CreateCommandRecorder(IID_PPV_ARGS(&mCommandRecorder))); - mCommandRecorder->RecordDispatch(mCommandList.Get(), - mCompiledGraph->compiledOperatorInitializer.Get(), - mCompiledGraph->bindingTable.Get()); - CloseExecuteResetWait(mCommandList, mCommandQueue, mCommandAllocator, mD3D12Device); - - if (mCompiledGraph->commonInputsResourceSize) { - // Release the upload resource and input resource which has been allocated for - // initializing constant inputs and then re-allocate them with new size to prepare - // for initializing common inputs. - mCompiledGraph->uploadResource = nullptr; - mCompiledGraph->inputResource = nullptr; - WEBNN_CHECK(mD3D12Device->CreateCommittedResource( - &CreateHeapProperties(D3D12_HEAP_TYPE_UPLOAD), D3D12_HEAP_FLAG_NONE, - &CreateResourceDesc(mCompiledGraph->commonInputsResourceSize), - D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, - IID_PPV_ARGS(&mCompiledGraph->uploadResource))); - - WEBNN_CHECK(mD3D12Device->CreateCommittedResource( - &CreateHeapProperties(), D3D12_HEAP_FLAG_NONE, - &CreateResourceDesc(mCompiledGraph->commonInputsResourceSize, - D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), - D3D12_RESOURCE_STATE_UNORDERED_ACCESS, nullptr, - IID_PPV_ARGS(&mCompiledGraph->inputResource))); - } - - for (size_t i = 0; i < mOutputs.size(); ++i) { - uint64_t byteLength = - reinterpret_cast(mOutputs[i].outputTensorDESC.Desc) - ->TotalTensorSizeInBytes; - uint64_t offset = RoundUpToMultiple(mCompiledGraph->outputResourceSize, - (uint64_t)DML_MINIMUM_BUFFER_TENSOR_ALIGNMENT); - mCompiledGraph->outputResourceSize = offset + byteLength; - } - - if (mCompiledGraph->outputResourceSize) { - WEBNN_CHECK(mD3D12Device->CreateCommittedResource( - &CreateHeapProperties(), D3D12_HEAP_FLAG_NONE, - &CreateResourceDesc(mCompiledGraph->outputResourceSize, - D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), - D3D12_RESOURCE_STATE_UNORDERED_ACCESS, nullptr, - IID_PPV_ARGS(&mCompiledGraph->outputResource))); - - mD3D12Device->CreateCommittedResource( - &CreateHeapProperties(D3D12_HEAP_TYPE_READBACK), D3D12_HEAP_FLAG_NONE, - &CreateResourceDesc(mCompiledGraph->outputResourceSize), - D3D12_RESOURCE_STATE_COPY_DEST, nullptr, - IID_PPV_ARGS(&mCompiledGraph->readBackResource)); - } - - // Bind and execute the operator on the GPU. - // Reset the binding table to bind for the operator we want to execute (it was - // previously used to bind for the initializer). - mCompiledGraph->bindingTableDesc.Dispatchable = mCompiledGraph->compiledOperator.Get(); - mCompiledGraph->bindingTable->Reset(&mCompiledGraph->bindingTableDesc); - mCompiledGraph->BindTemporaryResource(false); - mCompiledGraph->BindPersistentResource(false); - - return {}; - } - - MaybeError Graph::ComputeImpl(NamedInputsBase* inputs, NamedOutputsBase* outputs) { - auto namedInputs = inputs->GetRecords(); - for (auto& input : mInputs) { - // All the inputs must be set. - if (!input->isConstantInput && namedInputs.find(input->name) == namedInputs.end()) { - return DAWN_INTERNAL_ERROR("The input must be set."); - } - } - - // Initialize common inputs. - if (mCompiledGraph->commonInputsResourceSize) { - std::vector inputBufferBinding(mInputs.size()); - FillUploadResourceAndInputBindings(mCompiledGraph->commonInputsResourceSize, - inputBufferBinding, namedInputs); - // Copy buffer from uploadResource to inputResource. - CopyBufferRegion( - mCommandList, mCompiledGraph->uploadResource, mCompiledGraph->inputResource, - mCompiledGraph->commonInputsResourceSize, D3D12_RESOURCE_STATE_COPY_DEST); - - std::vector inputBindingDesc(mInputs.size()); - for (size_t i = 0; i < inputBufferBinding.size(); ++i) { - if (inputBufferBinding[i].Buffer != nullptr) { - inputBindingDesc[i] = {DML_BINDING_TYPE_BUFFER, &inputBufferBinding[i]}; - } - } - mCompiledGraph->bindingTable->BindInputs(inputBindingDesc.size(), - inputBindingDesc.data()); - } - - // Prepare for outputs and read back buffer from Gpu. - std::vector outputArrayBufferViews; - ArrayBufferView outputArrayBufferView; - for (size_t i = 0; i < mOutputs.size(); ++i) { - std::string name = mOutputs[i].name; - auto namedOutputs = outputs->GetRecords(); - if (namedOutputs.find(name) != namedOutputs.end()) { - outputArrayBufferView = namedOutputs[name].arrayBufferView; - outputArrayBufferViews.push_back(outputArrayBufferView); - DAWN_ASSERT(outputArrayBufferView.buffer != nullptr && - outputArrayBufferView.byteLength != 0); - } else { - size_t byteLength = reinterpret_cast( - mOutputs[i].outputTensorDESC.Desc) - ->TotalTensorSizeInBytes; - // It is an unuseful output of dml graph. We need not read back and copy buffer - // to it, just reserve it as a placeholder. - outputArrayBufferView = {nullptr, byteLength, 0}; - outputArrayBufferViews.push_back(outputArrayBufferView); - } - } - - std::vector outputBindingDesc(mOutputs.size()); - std::vector outputBufferBinding(mOutputs.size()); - - uint64_t outputOffset = 0; - for (size_t i = 0; i < mOutputs.size(); ++i) { - auto output = outputArrayBufferViews[i]; - outputOffset = - RoundUpToMultiple(outputOffset, (uint64_t)DML_MINIMUM_BUFFER_TENSOR_ALIGNMENT); - outputBufferBinding[i].Buffer = mCompiledGraph->outputResource.Get(); - outputBufferBinding[i].Offset = outputOffset; - outputBufferBinding[i].SizeInBytes = output.byteLength; - outputBindingDesc[i] = {DML_BINDING_TYPE_BUFFER, &outputBufferBinding[i]}; - outputOffset = outputOffset + output.byteLength; - } - mCompiledGraph->bindingTable->BindOutputs(outputBindingDesc.size(), - outputBindingDesc.data()); - - // Record execution of the compiled operator. - ID3D12DescriptorHeap* descriptorHeaps[] = {mCompiledGraph->descriptorHeap.Get()}; - mCommandList->SetDescriptorHeaps(ARRAYSIZE(descriptorHeaps), descriptorHeaps); - mCommandRecorder->RecordDispatch(mCommandList.Get(), mCompiledGraph->compiledOperator.Get(), - mCompiledGraph->bindingTable.Get()); - - // Copy buffer from outputResource to readBackResource. - CopyBufferRegion(mCommandList, mCompiledGraph->outputResource, - mCompiledGraph->readBackResource, mCompiledGraph->outputResourceSize, - D3D12_RESOURCE_STATE_COPY_SOURCE, false); - CloseExecuteResetWait(mCommandList, mCommandQueue, mCommandAllocator, mD3D12Device); - - D3D12_RANGE tensorBufferRange{0, mCompiledGraph->outputResourceSize}; - int8_t* readBackBuffer; - WEBNN_CHECK(mCompiledGraph->readBackResource->Map( - 0, &tensorBufferRange, reinterpret_cast(&readBackBuffer))); - - uint64_t offset = 0; - for (size_t i = 0; i < mOutputs.size(); ++i) { - offset = RoundUpToMultiple(offset, (uint64_t)DML_MINIMUM_BUFFER_TENSOR_ALIGNMENT); - ArrayBufferView output = outputArrayBufferViews[i]; - if (output.buffer) { - memcpy(static_cast(output.buffer) + output.byteOffset, - readBackBuffer + offset, output.byteLength); - } - offset += output.byteLength; - } - - mCompiledGraph->readBackResource->Unmap(0, nullptr); - return {}; - } -} // namespace webnn::native::dml diff --git a/src/webnn/native/dml/GraphDML.h b/src/webnn/native/dml/GraphDML.h deleted file mode 100644 index 6064dd8a2..000000000 --- a/src/webnn/native/dml/GraphDML.h +++ /dev/null @@ -1,246 +0,0 @@ -// Copyright 2021 The WebNN-native Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef WEBNN_NATIVE_DML_MODEL_DML_H_ -#define WEBNN_NATIVE_DML_MODEL_DML_H_ - -#define DML_TARGET_VERSION_USE_LATEST 1 - -#include -#include - -#include "DMLUtils.h" -#include "DirectML.h" -#include "webnn/native/Graph.h" -#include "webnn/native/Operand.h" -#include "webnn/native/Operator.h" -#include "webnn/native/dml/ContextDML.h" -#include "webnn/native/ops/BatchNorm.h" -#include "webnn/native/ops/Binary.h" -#include "webnn/native/ops/Clamp.h" -#include "webnn/native/ops/Concat.h" -#include "webnn/native/ops/Constant.h" -#include "webnn/native/ops/Conv2d.h" -#include "webnn/native/ops/Gemm.h" -#include "webnn/native/ops/Gru.h" -#include "webnn/native/ops/Input.h" -#include "webnn/native/ops/InstanceNorm.h" -#include "webnn/native/ops/LeakyRelu.h" -#include "webnn/native/ops/Pad.h" -#include "webnn/native/ops/Pool2d.h" -#include "webnn/native/ops/Reduce.h" -#include "webnn/native/ops/Resample2d.h" -#include "webnn/native/ops/Reshape.h" -#include "webnn/native/ops/Slice.h" -#include "webnn/native/ops/Split.h" -#include "webnn/native/ops/Squeeze.h" -#include "webnn/native/ops/Transpose.h" -#include "webnn/native/ops/Unary.h" -namespace webnn::native::dml { - - using namespace Microsoft::WRL; - - struct CompiledGraph { - CompiledGraph(ComPtr d3d12Device, - ComPtr device, - ComPtr device1, - const DML_GRAPH_DESC& graphDesc, - DML_EXECUTION_FLAGS flag = DML_EXECUTION_FLAG_NONE) - : D3D12Device(d3d12Device) { - WEBNN_CHECK(device.Get()->QueryInterface(IID_PPV_ARGS(&device1))); - WEBNN_CHECK(device1->CompileGraph(&graphDesc, flag, IID_PPV_ARGS(&compiledOperator))); - IDMLCompiledOperator* compiledOperators[] = {compiledOperator.Get()}; - WEBNN_CHECK( - device->CreateOperatorInitializer(ARRAYSIZE(compiledOperators), compiledOperators, - IID_PPV_ARGS(&compiledOperatorInitializer))); - DML_BINDING_PROPERTIES initializeBindingProperties = - compiledOperatorInitializer->GetBindingProperties(); - DML_BINDING_PROPERTIES executeBindingProperties = - compiledOperator->GetBindingProperties(); - UINT descriptorCount = std::max(initializeBindingProperties.RequiredDescriptorCount, - executeBindingProperties.RequiredDescriptorCount); - initializedTemporaryResourceSize = initializeBindingProperties.TemporaryResourceSize; - temporaryResourceSize = std::max(initializedTemporaryResourceSize, - executeBindingProperties.TemporaryResourceSize); - persistentResourceSize = executeBindingProperties.PersistentResourceSize; - - // Describe and create a constant buffer view (CBV), Shader resource view (SRV), and - // unordered access view (UAV) descriptor heap. - D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc{}; - descriptorHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; - descriptorHeapDesc.NumDescriptors = descriptorCount; - descriptorHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; - WEBNN_CHECK(D3D12Device->CreateDescriptorHeap(&descriptorHeapDesc, - IID_PPV_ARGS(&descriptorHeap))); - - // Create a binding table over the descriptor heap we just created. - bindingTableDesc.Dispatchable = compiledOperatorInitializer.Get(); - bindingTableDesc.CPUDescriptorHandle = - descriptorHeap->GetCPUDescriptorHandleForHeapStart(); - bindingTableDesc.GPUDescriptorHandle = - descriptorHeap->GetGPUDescriptorHandleForHeapStart(); - // The size of the binding table, in descriptors. This is the maximum number of - // descriptors that DirectML is permitted to write, from the start of both the supplied - // CPU and GPU descriptor handles. - bindingTableDesc.SizeInDescriptors = descriptorCount; - WEBNN_CHECK(device->CreateBindingTable(&bindingTableDesc, IID_PPV_ARGS(&bindingTable))); - }; - - void BindTemporaryResource(bool bindForInitialization = true) { - if (temporaryResourceSize != 0) { - if (temporaryResource == nullptr) { - D3D12Device->CreateCommittedResource( - &CreateHeapProperties(), D3D12_HEAP_FLAG_NONE, - &CreateResourceDesc(temporaryResourceSize, - D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), - D3D12_RESOURCE_STATE_UNORDERED_ACCESS, nullptr, - IID_PPV_ARGS(&temporaryResource)); - } - - if ((bindForInitialization && initializedTemporaryResourceSize != 0) || - (!bindForInitialization && temporaryResourceSize != 0)) { - DML_BUFFER_BINDING bufferBinding{temporaryResource.Get(), 0, - temporaryResourceSize}; - DML_BINDING_DESC bindingDesc{DML_BINDING_TYPE_BUFFER, &bufferBinding}; - bindingTable->BindTemporaryResource(&bindingDesc); - } - } - }; - - void BindPersistentResource(bool bindForInitialization = true) { - if (persistentResourceSize != 0) { - if (persistentResource == nullptr) { - D3D12Device->CreateCommittedResource( - &CreateHeapProperties(), D3D12_HEAP_FLAG_NONE, - &CreateResourceDesc(persistentResourceSize, - D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), - D3D12_RESOURCE_STATE_UNORDERED_ACCESS, nullptr, - IID_PPV_ARGS(&persistentResource)); - } - - DML_BUFFER_BINDING bufferBinding{persistentResource.Get(), 0, - persistentResourceSize}; - DML_BINDING_DESC bindingDesc{DML_BINDING_TYPE_BUFFER, &bufferBinding}; - if (bindForInitialization) { - bindingTable->BindOutputs(1, &bindingDesc); - } else { - bindingTable->BindPersistentResource(&bindingDesc); - } - } - }; - - ComPtr D3D12Device; - // IDMLCompiledOperator represents the DirectML graph's output which need to be initialized - // by IDMLOperatorInitializer. - ComPtr compiledOperator; - ComPtr compiledOperatorInitializer; - - ComPtr descriptorHeap; - ComPtr bindingTable; - DML_BINDING_TABLE_DESC bindingTableDesc; - - ComPtr uploadResource; - ComPtr inputResource; - ComPtr outputResource; - ComPtr readBackResource; - ComPtr temporaryResource; - ComPtr persistentResource; - uint64_t commonInputsResourceSize = 0; - uint64_t outputResourceSize = 0; - UINT64 temporaryResourceSize = 0; - UINT64 initializedTemporaryResourceSize = 0; - UINT64 persistentResourceSize = 0; - }; - - class Graph : public GraphBase { - public: - explicit Graph(Context* context); - ~Graph() override = default; - - virtual MaybeError AddConstant(const op::Constant* constant) override; - virtual MaybeError AddInput(const op::Input* input) override; - virtual MaybeError AddOutput(std::string_view name, const OperandBase* output) override; - virtual MaybeError AddBatchNorm(const op::BatchNorm* batchNorm) override; - virtual MaybeError AddBinary(const op::Binary* binary) override; - virtual MaybeError AddConv2d(const op::Conv2d* conv2d) override; - virtual MaybeError AddConvTranspose2d(const op::ConvTranspose2d* convTranspose2d) override; - virtual MaybeError AddPad(const op::Pad* pad) override; - virtual MaybeError AddPool2d(const op::Pool2d* pool2d) override; - virtual MaybeError AddReduce(const op::Reduce* reduce) override; - virtual MaybeError AddResample2d(const op::Resample2d* resample2d) override; - virtual MaybeError AddReshape(const op::Reshape* reshape) override; - virtual MaybeError AddSlice(const op::Slice* slice) override; - virtual MaybeError AddSplit(const op::Split* split) override; - virtual MaybeError AddSqueeze(const op::Squeeze* squeeze) override; - virtual MaybeError AddTranspose(const op::Transpose* transpose) override; - virtual MaybeError AddUnary(const op::Unary* unary) override; - virtual MaybeError AddGemm(const op::Gemm* Gemm) override; - virtual MaybeError AddGru(const op::Gru* Gru) override; - virtual MaybeError AddConcat(const op::Concat* concat) override; - virtual MaybeError AddClamp(const op::Clamp* clamp) override; - virtual MaybeError AddInstanceNorm(const op::InstanceNorm* instanceNorm) override; - virtual MaybeError Finish() override; - - void FillUploadResourceAndInputBindings( - uint64_t uploadResourceSize, - std::vector& inputBufferBinding, - std::unordered_map namedInputs = {}); - MaybeError CreateConstantInput(DML_TENSOR_DESC& inputTensorDESC, - void const* value, - size_t size, - const std::vector& dmlTensorDims, - const std::vector& strides = {}, - DML_TENSOR_DATA_TYPE dataType = DML_TENSOR_DATA_TYPE_FLOAT32, - DML_TENSOR_FLAGS tensorFlag = DML_TENSOR_FLAG_OWNED_BY_DML); - std::shared_ptr Clamp(const op::ClampBase* clamp, - std::shared_ptr inputEdge); - void AppendIdentity(const DML_TENSOR_DESC& inputTensorDesc, - DML_TENSOR_DESC& outputTensorDesc, - ComPtr& dmlOperator); - MaybeError HardSwish(std::shared_ptr& inputEdge, - const std::vector& inputDims); - MaybeError EmulateFusedOperator(FusionOperatorBase* activation, - std::shared_ptr& inputEdge, - const std::vector& inputDims); - MaybeError TransposeOutputToNhwc(std::shared_ptr& inputEdge, - const std::vector& nchwOutputDims); - - private: - MaybeError CompileImpl() override; - MaybeError ComputeImpl(NamedInputsBase* inputs, NamedOutputsBase* outputs) override; - - ComPtr mDevice; - ComPtr mDevice1; - ComPtr mD3D12Device; - ComPtr mCommandRecorder; - ComPtr mCommandQueue; - ComPtr mCommandAllocator; - ComPtr mCommandList; - - // Describe a graph of DirectML operators used to compile a combined, optimized operator. - std::vector> mInputs; - std::vector mOutputs; - DmlGraphDesc mGraphDesc; - std::unique_ptr mCompiledGraph; - - std::map> mGraphEdgesMap; - // Keep the input tensors description here to avoid releasing too early. - std::vector> mDmlTensorsDesc; - std::unordered_set mConstantSet; - std::vector> mConstantsBuffer; - }; - -} // namespace webnn::native::dml - -#endif // WEBNN_NATIVE_DML_MODEL_DML_H_ \ No newline at end of file