From bd738081b8fcea4599a06f01d5c07979f3b0fcb3 Mon Sep 17 00:00:00 2001 From: David Monahan Date: Fri, 8 Dec 2023 12:50:02 +0000 Subject: IVGCVSW-7165 - PreCompiledWorkload and Conv2d Integration work for GpuFsa * Add PreCompiledWorkload implementation for GpuFsa * Add ConstantWorkload implementation for GpuFsa * Add Input/Output workloads for GpuFsa * Added CopyMemGeneric workload for GpuFsa * Separate creation and validation of sketch tensors into seperate functions Signed-off-by: Kevin May Signed-off-by: David Monahan Change-Id: Ie7299a4c61073b5ca03d9f8681458869ef7ce743 --- src/backends/gpuFsa/CMakeLists.txt | 2 +- src/backends/gpuFsa/GpuFsaBackend.cpp | 32 ++-- src/backends/gpuFsa/GpuFsaBackend.hpp | 24 ++- src/backends/gpuFsa/GpuFsaLayerSupport.cpp | 4 +- src/backends/gpuFsa/GpuFsaWorkloadFactory.cpp | 58 ++++++- src/backends/gpuFsa/GpuFsaWorkloadFactory.hpp | 6 +- src/backends/gpuFsa/backend.mk | 24 ++- src/backends/gpuFsa/layerValidators/CMakeLists.txt | 14 -- .../GpuFsaConvolution2dValidate.cpp | 126 --------------- .../GpuFsaConvolution2dValidate.hpp | 28 ---- src/backends/gpuFsa/layers/CMakeLists.txt | 14 ++ src/backends/gpuFsa/layers/GpuFsaConvolution2d.cpp | 180 +++++++++++++++++++++ src/backends/gpuFsa/layers/GpuFsaConvolution2d.hpp | 30 ++++ src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp | 23 ++- src/backends/gpuFsa/workloads/CMakeLists.txt | 7 +- .../gpuFsa/workloads/GpuFsaConstantWorkload.cpp | 114 +++++++++++++ .../gpuFsa/workloads/GpuFsaConstantWorkload.hpp | 30 ++++ .../gpuFsa/workloads/GpuFsaPreCompiledWorkload.cpp | 106 ++++++++++++ .../gpuFsa/workloads/GpuFsaPreCompiledWorkload.hpp | 56 +++++++ .../gpuFsa/workloads/GpuFsaWorkloadUtils.hpp | 163 +++++++++++++++++++ 20 files changed, 830 insertions(+), 211 deletions(-) delete mode 100644 src/backends/gpuFsa/layerValidators/CMakeLists.txt delete mode 100644 src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.cpp delete mode 100644 src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.hpp create mode 100644 src/backends/gpuFsa/layers/CMakeLists.txt create mode 100644 src/backends/gpuFsa/layers/GpuFsaConvolution2d.cpp create mode 100644 src/backends/gpuFsa/layers/GpuFsaConvolution2d.hpp create mode 100644 src/backends/gpuFsa/workloads/GpuFsaConstantWorkload.cpp create mode 100644 src/backends/gpuFsa/workloads/GpuFsaConstantWorkload.hpp create mode 100644 src/backends/gpuFsa/workloads/GpuFsaPreCompiledWorkload.cpp create mode 100644 src/backends/gpuFsa/workloads/GpuFsaPreCompiledWorkload.hpp create mode 100644 src/backends/gpuFsa/workloads/GpuFsaWorkloadUtils.hpp diff --git a/src/backends/gpuFsa/CMakeLists.txt b/src/backends/gpuFsa/CMakeLists.txt index 8d1a58ee27..5181f2288e 100644 --- a/src/backends/gpuFsa/CMakeLists.txt +++ b/src/backends/gpuFsa/CMakeLists.txt @@ -23,7 +23,7 @@ if(ARMCOMPUTEGPUFSA) GpuFsaWorkloadFactory.hpp ) - add_subdirectory(layerValidators) + add_subdirectory(layers) add_subdirectory(workloads) if(BUILD_UNIT_TESTS) diff --git a/src/backends/gpuFsa/GpuFsaBackend.cpp b/src/backends/gpuFsa/GpuFsaBackend.cpp index 8ea9e8e7d3..9886a6e187 100644 --- a/src/backends/gpuFsa/GpuFsaBackend.cpp +++ b/src/backends/gpuFsa/GpuFsaBackend.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -20,10 +20,7 @@ #include #include -#include -#include - -#include "layerValidators/GpuFsaConvolution2dValidate.hpp" +#include "layers/GpuFsaConvolution2d.hpp" namespace armnn { @@ -218,9 +215,6 @@ OptimizationViews GpuFsaBackend::OptimizeSubgraphView(const SubgraphView& subgra OptimizationViews optimizationViews(modelOptions); using namespace arm_compute::experimental::dynamic_fusion; - // Create a new workload sketch, for validation purposes - auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context(); - auto gpuCtx = GpuWorkloadContext(&compileCtx); auto it = subgraph.end(); std::map untouched; @@ -233,32 +227,41 @@ OptimizationViews GpuFsaBackend::OptimizeSubgraphView(const SubgraphView& subgra GpuFsaLayerSupport supportChecker; it = subgraph.end(); + arm_compute::CLCompileContext* compileCtx = &(arm_compute::CLKernelLibrary::get().get_compile_context()); + + // Setup the GpuWokloadContext which will exist for the lifetime of the Graph. This contains the TensorInfos + std::shared_ptr workloadContext = std::make_shared(compileCtx); while (it != subgraph.begin()) { --it; Layer& base = *(PolymorphicDowncast(*it)); + // Create a GpuFsaPreCompiledBlob, this contains all of the information needed to execute an operator + GpuFsaPreCompiledBlob* preCompiledBlobPtr = new GpuFsaPreCompiledBlob(); + preCompiledBlobPtr->workloadContext = workloadContext; + preCompiledBlobPtr->sketch = std::make_unique(workloadContext.get()); - std::unique_ptr sketch = std::make_unique(&gpuCtx); + // Configure and setup the sketch for each supported op. Their data will be wrapped into a PreCompiled layer switch (base.GetType()) { case (LayerType::Convolution2d): { auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); auto weights = base.GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(); - //std::vector infos = {input, weights}; auto desc = PolymorphicDowncast(&base.GetParameters()); if (desc->m_BiasEnabled) { auto bias = base.GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo(); - GpuFsaConvolution2dCreateOp(input, + GpuFsaConvolution2dCreateOp(preCompiledBlobPtr, + input, *desc, weights, bias); } else { - GpuFsaConvolution2dCreateOp(input, + GpuFsaConvolution2dCreateOp(preCompiledBlobPtr, + input, *desc, weights, EmptyOptional()); @@ -270,7 +273,8 @@ OptimizationViews GpuFsaBackend::OptimizeSubgraphView(const SubgraphView& subgra continue; } - auto compiledBlob = std::make_unique(sketch.release(), DeleteAsType); + auto compiledBlob = + std::make_unique(preCompiledBlobPtr, DeleteAsType); IConnectableLayer* preCompiledLayer = optimizationViews.GetINetwork()->AddPrecompiledLayer( PreCompiledDescriptor(base.GetNumInputSlots(), base.GetNumOutputSlots()), @@ -289,7 +293,7 @@ OptimizationViews GpuFsaBackend::OptimizeSubgraphView(const SubgraphView& subgra CreateOutputsFrom(&base), {&base}); - optimizationViews.AddSubstitution({ *substituteSubgraph, SubgraphView(preCompiledLayer) }); + optimizationViews.AddSubstitution({ std::move(*substituteSubgraph), SubgraphView(preCompiledLayer) }); untouched.erase(base.GetGuid()); } diff --git a/src/backends/gpuFsa/GpuFsaBackend.hpp b/src/backends/gpuFsa/GpuFsaBackend.hpp index 26960065c7..4c2a5f02e3 100644 --- a/src/backends/gpuFsa/GpuFsaBackend.hpp +++ b/src/backends/gpuFsa/GpuFsaBackend.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once @@ -11,6 +11,8 @@ #include #include #include +#include +#include // System includes for mapping and unmapping memory #include @@ -18,13 +20,31 @@ namespace armnn { +/** + * A structure which contains all the elements needed to execute a fused workload in the GpuFsa Backend + * + * @param[in, out] sketch A unique pointer to the sketch containing the operators which have been fused. + * @param[in, out] TensorInfos A shared pointer to a GpuWorkloadContext which contains TensorInfos + * @param[in, out] inputIds A unique pointer to a vector of input Ids used to access workloadContext TensorInfos + * @param[in, out] outputIds A unique pointer to a vector of output Ids used to access workloadContext TensorInfos + * + */ +struct GpuFsaPreCompiledBlob +{ + std::unique_ptr sketch = nullptr; + std::shared_ptr workloadContext = nullptr; + + std::unique_ptr> inputIds = nullptr; + std::unique_ptr> outputIds = nullptr; +}; + // add new capabilities here.. const BackendCapabilities gpuFsaCapabilities("GpuFsa", { {"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, - {"ConstantTensorsAsInputs", false}, + {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", false}, {"MultiAxisPacking", false}, diff --git a/src/backends/gpuFsa/GpuFsaLayerSupport.cpp b/src/backends/gpuFsa/GpuFsaLayerSupport.cpp index 063af2732e..96c986ab33 100644 --- a/src/backends/gpuFsa/GpuFsaLayerSupport.cpp +++ b/src/backends/gpuFsa/GpuFsaLayerSupport.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -10,7 +10,7 @@ #include #if defined(ARMCOMPUTEGPUFSA_ENABLED) -#include "layerValidators/GpuFsaConvolution2dValidate.hpp" +#include "layers/GpuFsaConvolution2d.hpp" #endif #include diff --git a/src/backends/gpuFsa/GpuFsaWorkloadFactory.cpp b/src/backends/gpuFsa/GpuFsaWorkloadFactory.cpp index 6d13879f51..faa0d38386 100644 --- a/src/backends/gpuFsa/GpuFsaWorkloadFactory.cpp +++ b/src/backends/gpuFsa/GpuFsaWorkloadFactory.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -9,6 +9,11 @@ #include "GpuFsaBackendId.hpp" #include "GpuFsaTensorHandle.hpp" +#include "workloads/GpuFsaConstantWorkload.hpp" +#include "workloads/GpuFsaPreCompiledWorkload.hpp" + +#include + namespace armnn { @@ -43,11 +48,13 @@ bool IsDataType(const WorkloadInfo& info) GpuFsaWorkloadFactory::GpuFsaWorkloadFactory(const std::shared_ptr& memoryManager) : m_MemoryManager(memoryManager) { + InitializeCLCompileContext(); } GpuFsaWorkloadFactory::GpuFsaWorkloadFactory() : m_MemoryManager(new GpuFsaMemoryManager()) { + InitializeCLCompileContext(); } const BackendId& GpuFsaWorkloadFactory::GetBackendId() const @@ -81,11 +88,52 @@ std::unique_ptr GpuFsaWorkloadFactory::CreateTensorHandle(const T return tensorHandle; } -std::unique_ptr GpuFsaWorkloadFactory::CreateWorkload(LayerType /*type*/, - const QueueDescriptor& /*descriptor*/, - const WorkloadInfo& /*info*/) const + +void GpuFsaWorkloadFactory::InitializeCLCompileContext() { + // Initialize our m_CLCompileContext using default device and context + auto context = arm_compute::CLKernelLibrary::get().context(); + auto device = arm_compute::CLKernelLibrary::get().get_device(); + m_CLCompileContext = arm_compute::CLCompileContext(context, device); +} + +std::unique_ptr GpuFsaWorkloadFactory::CreateWorkload(LayerType type, + const QueueDescriptor& descriptor, + const WorkloadInfo& info) const { - return nullptr; + switch(type) + { + case LayerType::Constant : + { + auto constQueueDescriptor = PolymorphicDowncast(&descriptor); + return std::make_unique(*constQueueDescriptor, info, m_CLCompileContext); + } + case LayerType::Input : + { + auto inputQueueDescriptor = PolymorphicDowncast(&descriptor); + return std::make_unique(*inputQueueDescriptor, info); + } + case LayerType::Output : + { + auto outputQueueDescriptor = PolymorphicDowncast(&descriptor); + return std::make_unique(*outputQueueDescriptor, info); + } + case LayerType::MemCopy : + { + auto memCopyQueueDescriptor = PolymorphicDowncast(&descriptor); + if (memCopyQueueDescriptor->m_Inputs.empty() || !memCopyQueueDescriptor->m_Inputs[0]) + { + throw InvalidArgumentException("GpuFsaWorkloadFactory: Invalid null input for MemCopy workload"); + } + return std::make_unique(*memCopyQueueDescriptor, info); + } + case LayerType::PreCompiled : + { + auto precompiledQueueDescriptor = PolymorphicDowncast(&descriptor); + return std::make_unique(*precompiledQueueDescriptor, info); + } + default : + return nullptr; + } } } // namespace armnn \ No newline at end of file diff --git a/src/backends/gpuFsa/GpuFsaWorkloadFactory.hpp b/src/backends/gpuFsa/GpuFsaWorkloadFactory.hpp index 9b97070766..04074cf0ab 100644 --- a/src/backends/gpuFsa/GpuFsaWorkloadFactory.hpp +++ b/src/backends/gpuFsa/GpuFsaWorkloadFactory.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once @@ -8,6 +8,8 @@ #include +#include + namespace armnn { @@ -44,6 +46,7 @@ public: std::unique_ptr CreateTensorHandle(const TensorInfo& tensorInfo, DataLayout dataLayout, const bool IsMemoryManaged = true) const override; + void InitializeCLCompileContext(); std::unique_ptr CreateWorkload(LayerType type, const QueueDescriptor& descriptor, @@ -54,6 +57,7 @@ private: std::unique_ptr MakeWorkload(const QueueDescriptorType& descriptor, const WorkloadInfo& info) const; mutable std::shared_ptr m_MemoryManager; + arm_compute::CLCompileContext m_CLCompileContext; }; } // namespace armnn diff --git a/src/backends/gpuFsa/backend.mk b/src/backends/gpuFsa/backend.mk index d8d254205b..a219ad4fec 100644 --- a/src/backends/gpuFsa/backend.mk +++ b/src/backends/gpuFsa/backend.mk @@ -1,5 +1,5 @@ # -# Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. +# Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # @@ -22,34 +22,28 @@ BACKEND_SOURCES := \ GpuFsaRegistryInitializer.cpp \ GpuFsaTensorHandleFactory.cpp \ GpuFsaWorkloadFactory.cpp \ - layerValidators/GpuFsaConvolution2dValidate.cpp -else - -# ARMNN_COMPUTE_GPUFSA_ENABLED == 0 -# No source file will be compiled for the GPU Dynamic Fusion backend - -BACKEND_SOURCES := - -endif + layers/GpuFsaConvolution2d.cpp # BACKEND_TEST_SOURCES contains the list of files to be included # in the Android unit test build (armnn-tests) and it is picked # up by the Android.mk file in the root of ArmNN -# The variable to enable/disable the GPU Dynamic Fusion backend -# (ARMNN_COMPUTE_GPUFSA_ENABLED is declared in android-nn-driver/Android.mk) -ifeq ($(ARMNN_COMPUTE_GPUFSA_ENABLED),1) - # ARMNN_COMPUTE_GPUFSA_ENABLED == 1 # Include the source files for the GPU Dynamic Fusion backend tests BACKEND_TEST_SOURCES := \ - test/GpuFsaEndToEndTests.cpp \ + test/GpuFsaEndToEndTests.cpp \ test/GpuFsaLayerSupportTests.cpp \ test/GpuFsaLayerTests.cpp \ test/GpuFsaOptimizedNetworkTests.cpp else +# ARMNN_COMPUTE_GPUFSA_ENABLED == 0 +# No source file will be compiled for the GPU Dynamic Fusion backend + +BACKEND_SOURCES := + + # ARMNN_COMPUTE_GPUFSA_ENABLED == 0 # No source file will be compiled for the GPU Dynamic Fusion backend tests diff --git a/src/backends/gpuFsa/layerValidators/CMakeLists.txt b/src/backends/gpuFsa/layerValidators/CMakeLists.txt deleted file mode 100644 index 57ea41d56c..0000000000 --- a/src/backends/gpuFsa/layerValidators/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -# -# Copyright © 2023 Arm Ltd and Contributors. All rights reserved. -# SPDX-License-Identifier: MIT -# - -list(APPEND armnnGpuFsaBackendLayerValidators_sources - GpuFsaConvolution2dValidate.cpp - GpuFsaConvolution2dValidate.hpp - ) - -add_library(armnnGpuFsaBackendLayerValidators OBJECT ${armnnGpuFsaBackendLayerValidators_sources}) -target_include_directories(armnnGpuFsaBackendLayerValidators PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) -target_include_directories(armnnGpuFsaBackendLayerValidators PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) -target_include_directories(armnnGpuFsaBackendLayerValidators PRIVATE ${PROJECT_SOURCE_DIR}/src/backends) diff --git a/src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.cpp b/src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.cpp deleted file mode 100644 index bed7b26f74..0000000000 --- a/src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.cpp +++ /dev/null @@ -1,126 +0,0 @@ -// -// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "GpuFsaConvolution2dValidate.hpp" - -#include -#include - -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include - -namespace armnn -{ - -using namespace armcomputetensorutils; - -inline arm_compute::Status ValidateAndCreateOp(const TensorInfo& input, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional& biases, - const bool createOp = false) -{ - // Create a new workload sketch, for validation purposes - auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context(); - auto gpuCtx = GpuWorkloadContext(&compileCtx); - GpuWorkloadSketch sketch{ &gpuCtx }; - - // Build and create tensor infos using the sketch - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); - arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); - aclWeightsInfo.set_are_values_constant(weights.IsConstant()); - - auto inputInfo = gpuCtx.create_tensor_info(aclInputInfo); - auto weightInfo = gpuCtx.create_tensor_info(aclWeightsInfo); - - // Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op - arm_compute::TensorInfo aclBiasInfo; - arm_compute::TensorInfo biasSketchInfo; - arm_compute::TensorInfo* biasSketchInfoPtr = nullptr; - - if (descriptor.m_BiasEnabled) - { - if(!biases.has_value()) - { - throw InvalidArgumentException("GpuFsaConvolution2dValidate: No biases set when biases are enabled"); - } - aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout); - aclBiasInfo.set_are_values_constant(biases.value().IsConstant()); - - biasSketchInfo = gpuCtx.create_tensor_info(aclBiasInfo); - biasSketchInfoPtr = &biasSketchInfo; - } - - // Set Conv2d attributes using descriptor - const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX, - descriptor.m_DilationY); - const arm_compute::Padding2D aclPadInfo = BuildArmComputePaddingInfo(descriptor); - const arm_compute::Size2D aclStrideInfo = BuildArmComputeSize2D(descriptor.m_StrideX, descriptor.m_StrideY); - - Conv2dAttributes conv2DAttributes{}; - conv2DAttributes.dilation(aclDilationInfo); - conv2DAttributes.pad(aclPadInfo); - conv2DAttributes.stride(aclStrideInfo); - - // Validate operator, check status and update reasonIfUnsupported - arm_compute::Status aclStatus = GpuConv2d::validate_op(sketch, - &inputInfo, - &weightInfo, - biasSketchInfoPtr, - conv2DAttributes); - - if (createOp) - { - const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); - if (!supported) - { - throw BackendCapabilityException("\"GpuFsa\" backend failed during operation validation when attempting " - "to fuse a GpuConv2d operator into the existing workload sketch."); - } - - arm_compute::ITensorInfo* convOutInfo = GpuConv2d::create_op(sketch, - &inputInfo, - &weightInfo, - biasSketchInfoPtr, - conv2DAttributes); - - // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created. - auto outputInfo = gpuCtx.create_tensor_info(); - GpuOutput::create_op(sketch, convOutInfo, &outputInfo); - } - - return aclStatus; -} - -arm_compute::Status GpuFsaConvolution2dValidate(const TensorInfo& input, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional& biases) -{ - return ValidateAndCreateOp(input, descriptor, weights, biases); -} - -void GpuFsaConvolution2dCreateOp(const TensorInfo& input, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional& biases) -{ - ValidateAndCreateOp(input, descriptor, weights, biases, true); -} - -} // namespace armnn \ No newline at end of file diff --git a/src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.hpp b/src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.hpp deleted file mode 100644 index 120060e8ad..0000000000 --- a/src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// -// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include -#include - -#include -#include - -namespace armnn -{ - -using namespace arm_compute::experimental::dynamic_fusion; - -arm_compute::Status GpuFsaConvolution2dValidate(const TensorInfo& input, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional& biases); - -void GpuFsaConvolution2dCreateOp(const TensorInfo& input, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional& biases); - -} // namespace armnn \ No newline at end of file diff --git a/src/backends/gpuFsa/layers/CMakeLists.txt b/src/backends/gpuFsa/layers/CMakeLists.txt new file mode 100644 index 0000000000..3a02ce1a77 --- /dev/null +++ b/src/backends/gpuFsa/layers/CMakeLists.txt @@ -0,0 +1,14 @@ +# +# Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved. +# SPDX-License-Identifier: MIT +# + +list(APPEND armnnGpuFsaBackendLayerValidators_sources + GpuFsaConvolution2d.cpp + GpuFsaConvolution2d.hpp + ) + +add_library(armnnGpuFsaBackendLayerValidators OBJECT ${armnnGpuFsaBackendLayerValidators_sources}) +target_include_directories(armnnGpuFsaBackendLayerValidators PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) +target_include_directories(armnnGpuFsaBackendLayerValidators PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) +target_include_directories(armnnGpuFsaBackendLayerValidators PRIVATE ${PROJECT_SOURCE_DIR}/src/backends) diff --git a/src/backends/gpuFsa/layers/GpuFsaConvolution2d.cpp b/src/backends/gpuFsa/layers/GpuFsaConvolution2d.cpp new file mode 100644 index 0000000000..c7137d7ac8 --- /dev/null +++ b/src/backends/gpuFsa/layers/GpuFsaConvolution2d.cpp @@ -0,0 +1,180 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "GpuFsaConvolution2d.hpp" + +#include + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +namespace armnn +{ + +using namespace armcomputetensorutils; + +arm_compute::Status GpuFsaConvolution2dValidate(const TensorInfo& input, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const Optional& biases) +{ + // Create a new workload sketch, for validation purposes + auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context(); + auto workloadContext = GpuWorkloadContext(&compileCtx); + GpuWorkloadSketch sketch{ &workloadContext }; + + // Build and create tensor infos using the sketch + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); + aclWeightsInfo.set_are_values_constant(weights.IsConstant()); + + auto inputInfo = workloadContext.create_tensor_info(aclInputInfo); + auto weightInfo = workloadContext.create_tensor_info(aclWeightsInfo); + + // Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op + arm_compute::TensorInfo aclBiasInfo; + arm_compute::TensorInfo biasSketchInfo; + arm_compute::TensorInfo* biasSketchInfoPtr = nullptr; + + if (descriptor.m_BiasEnabled) + { + if(!biases.has_value()) + { + throw InvalidArgumentException("GpuFsaConvolution2d::ValidateOp: No biases set when biases are enabled"); + } + aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout); + aclBiasInfo.set_are_values_constant(biases.value().IsConstant()); + + biasSketchInfo = workloadContext.create_tensor_info(aclBiasInfo); + biasSketchInfoPtr = &biasSketchInfo; + } + + // Set Conv2d attributes using descriptor + const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX, + descriptor.m_DilationY); + const arm_compute::Padding2D aclPadInfo = BuildArmComputePaddingInfo(descriptor); + const arm_compute::Size2D aclStrideInfo = BuildArmComputeSize2D(descriptor.m_StrideX, descriptor.m_StrideY); + + Conv2dAttributes conv2DAttributes{}; + conv2DAttributes.dilation(aclDilationInfo); + conv2DAttributes.pad(aclPadInfo); + conv2DAttributes.stride(aclStrideInfo); + + // Validate operator, check status and update reasonIfUnsupported + arm_compute::Status aclStatus = GpuConv2d::validate_op(sketch, + &inputInfo, + &weightInfo, + biasSketchInfoPtr, + conv2DAttributes); + + return aclStatus; +} + +void GpuFsaConvolution2dCreateOp(GpuFsaPreCompiledBlob* blob, + const TensorInfo& input, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const Optional& biases) +{ +/* + * Creating an Op for the GpuFds backend requires us to create and maintain quite a bit of data, which is then stored + * in a GpuFsaPreCompiledBlob for execution later. Specifically we need: + * GpuWorkloadContext, this contains the TensorInfos and is unique to the Graph being executed + * Sketch, this is similar to a subgraph and can contain one or more operations. Multiple ops can be "fused" together + * using a single sketch. + * The TensorInfoIds, these are the ids of the TensorInfos used when creating the sketch. They refer to the TensorInfos + * stored within the GpuWorkloadContext and are used to fetch them later when executing the sketch. + */ + using namespace arm_compute::experimental::dynamic_fusion; + GpuWorkloadSketch* sketch = blob->sketch.get(); + GpuWorkloadContext* workloadContext = blob->workloadContext.get(); + std::vector inputIds = {}; + std::vector outputIds = {}; + + // Build and create tensor infos using the sketch + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); + aclWeightsInfo.set_are_values_constant(weights.IsConstant()); + auto inputInfo = workloadContext->create_tensor_info(aclInputInfo); + aclWeightsInfo.set_are_values_constant(weights.IsConstant()); + inputIds.emplace_back(inputInfo.id()); + + auto weightInfo = workloadContext->create_tensor_info(aclWeightsInfo); + inputIds.emplace_back(weightInfo.id()); + + // Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op + arm_compute::TensorInfo aclBiasInfo; + arm_compute::TensorInfo biasSketchInfo; + arm_compute::ITensorInfo* biasSketchInfoPtr = nullptr; + + if (descriptor.m_BiasEnabled) + { + if(!biases.has_value()) + { + throw InvalidArgumentException("GpuFsaConvolution2d::CreateOp: No biases set when biases are enabled"); + } + aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout); + aclBiasInfo.set_are_values_constant(biases.value().IsConstant()); + + biasSketchInfo = workloadContext->create_tensor_info(aclBiasInfo); + inputIds.emplace_back(biasSketchInfo.id()); + biasSketchInfoPtr = workloadContext->implementation().get_tensor_info(biasSketchInfo.id()); + } + + // Set Conv2d attributes using descriptor + const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX, + descriptor.m_DilationY); + const arm_compute::Padding2D aclPadInfo = BuildArmComputePaddingInfo(descriptor); + const arm_compute::Size2D aclStrideInfo = BuildArmComputeSize2D(descriptor.m_StrideX, descriptor.m_StrideY); + + Conv2dAttributes conv2DAttributes{}; + conv2DAttributes.dilation(aclDilationInfo); + conv2DAttributes.pad(aclPadInfo); + conv2DAttributes.stride(aclStrideInfo); + + // Validate operator, check status and update reasonIfUnsupported + arm_compute::Status aclStatus = + GpuConv2d::validate_op(*sketch, + workloadContext->implementation().get_tensor_info(inputInfo.id()), + workloadContext->implementation().get_tensor_info(weightInfo.id()), + biasSketchInfoPtr, + conv2DAttributes); + + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported) + { + throw BackendCapabilityException("\"GpuFsa\" backend failed during Convolution2D operation validation"); + } + + arm_compute::ITensorInfo* convOutInfo = + GpuConv2d::create_op(*sketch, + workloadContext->implementation().get_tensor_info(inputInfo.id()), + workloadContext->implementation().get_tensor_info(weightInfo.id()), + biasSketchInfoPtr, + conv2DAttributes); + + arm_compute::TensorInfo outputDstInfo = workloadContext->create_tensor_info(); + outputIds.emplace_back(outputDstInfo.id()); + + GpuOutput::create_op(*sketch, convOutInfo, workloadContext->implementation().get_tensor_info(outputDstInfo.id())); + blob->inputIds = std::make_unique>(inputIds); + blob->outputIds = std::make_unique>(outputIds); +} + +} // namespace armnn diff --git a/src/backends/gpuFsa/layers/GpuFsaConvolution2d.hpp b/src/backends/gpuFsa/layers/GpuFsaConvolution2d.hpp new file mode 100644 index 0000000000..3346dc1028 --- /dev/null +++ b/src/backends/gpuFsa/layers/GpuFsaConvolution2d.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include + +#include +#include +#include + +namespace armnn +{ + +using namespace arm_compute::experimental::dynamic_fusion; + +arm_compute::Status GpuFsaConvolution2dValidate(const TensorInfo& input, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const Optional& biases); + +void GpuFsaConvolution2dCreateOp(GpuFsaPreCompiledBlob* blob, + const TensorInfo& input, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const Optional& biases); + +} // namespace armnn diff --git a/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp b/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp index 1d6b99a31f..c2cdd57574 100644 --- a/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp +++ b/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp @@ -1,8 +1,27 @@ // -// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "backendsCommon/test/EndToEndTestImpl.hpp" -#include \ No newline at end of file +#include "backendsCommon/test/Convolution2dEndToEndTestImpl.hpp" +#include + +TEST_SUITE("GpuFsaEndToEnd") +{ + +std::vector gpuFsaDefaultBackends = {"GpuFsa"}; + +// Conv2d +TEST_CASE("GpuFsaConv2dEndtoEndTestFloat32") +{ + Convolution2dEndToEnd(gpuFsaDefaultBackends, armnn::DataLayout::NHWC); +} + +TEST_CASE("GpuFsaConv2dWithoutBiasEndtoEndTestFloat32") +{ + Convolution2dEndToEnd(gpuFsaDefaultBackends, armnn::DataLayout::NHWC, false); +} + +} diff --git a/src/backends/gpuFsa/workloads/CMakeLists.txt b/src/backends/gpuFsa/workloads/CMakeLists.txt index 4d100123ea..9edc9e9d3c 100644 --- a/src/backends/gpuFsa/workloads/CMakeLists.txt +++ b/src/backends/gpuFsa/workloads/CMakeLists.txt @@ -1,10 +1,15 @@ # -# Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. +# Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # list(APPEND armnnGpuFsaBackendWorkloads_sources GpuFsaBaseWorkload.hpp + GpuFsaConstantWorkload.hpp + GpuFsaConstantWorkload.cpp + GpuFsaPreCompiledWorkload.hpp + GpuFsaPreCompiledWorkload.cpp + GpuFsaWorkloadUtils.hpp ) add_library(armnnGpuFsaBackendWorkloads OBJECT ${armnnGpuFsaBackendWorkloads_sources}) diff --git a/src/backends/gpuFsa/workloads/GpuFsaConstantWorkload.cpp b/src/backends/gpuFsa/workloads/GpuFsaConstantWorkload.cpp new file mode 100644 index 0000000000..39d3c0ddab --- /dev/null +++ b/src/backends/gpuFsa/workloads/GpuFsaConstantWorkload.cpp @@ -0,0 +1,114 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "GpuFsaConstantWorkload.hpp" +#include "GpuFsaWorkloadUtils.hpp" + +#include +#include +#include +#include + +namespace armnn +{ + +arm_compute::Status GpuFsaConstantWorkloadValidate(const TensorInfo& output) +{ + const arm_compute::TensorInfo neonOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + std::array supportedTypes = { + arm_compute::DataType::F16, + arm_compute::DataType::F32, + arm_compute::DataType::QASYMM8, + arm_compute::DataType::QASYMM8_SIGNED, + arm_compute::DataType::QSYMM16, + arm_compute::DataType::QSYMM8, + arm_compute::DataType::QSYMM8_PER_CHANNEL, + arm_compute::DataType::S32 + }; + auto it = std::find(begin(supportedTypes), end(supportedTypes), neonOutputInfo.data_type()); + + if (it != end(supportedTypes)) + { + return arm_compute::Status{}; + } + else + { + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported DataType"}; + } +} + +GpuFsaConstantWorkload::GpuFsaConstantWorkload(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext&) + : GpuFsaBaseWorkload(descriptor, info) + , m_RanOnce(false) +{ +} + +void GpuFsaConstantWorkload::Execute() const +{ + // The intermediate tensor held by the corresponding layer output handler can be initialised with the given data + // on the first inference, then reused for subsequent inferences. + // The initialisation cannot happen at workload construction time since the ACL kernel for the next layer may not + // have been configured at the time. + if (!m_RanOnce) + { + const ConstantQueueDescriptor& data = this->m_Data; + + ARMNN_ASSERT(data.m_LayerOutput != nullptr); + arm_compute::CLTensor& output = static_cast(data.m_Outputs[0])->GetTensor(); + arm_compute::DataType computeDataType = static_cast(data.m_Outputs[0])->GetDataType(); + + switch (computeDataType) + { + case arm_compute::DataType::F16: + { + CopyArmComputeClTensorData(output, data.m_LayerOutput->GetConstTensor()); + break; + } + case arm_compute::DataType::F32: + { + CopyArmComputeClTensorData(output, data.m_LayerOutput->GetConstTensor()); + break; + } + case arm_compute::DataType::QASYMM8: + { + CopyArmComputeClTensorData(output, data.m_LayerOutput->GetConstTensor()); + break; + } + case arm_compute::DataType::QASYMM8_SIGNED: + { + CopyArmComputeClTensorData(output, data.m_LayerOutput->GetConstTensor()); + break; + } + case arm_compute::DataType::QSYMM16: + { + CopyArmComputeClTensorData(output, data.m_LayerOutput->GetConstTensor()); + break; + } + case arm_compute::DataType::QSYMM8: + case arm_compute::DataType::QSYMM8_PER_CHANNEL: + { + CopyArmComputeClTensorData(output, data.m_LayerOutput->GetConstTensor()); + break; + } + case arm_compute::DataType::S32: + { + CopyArmComputeClTensorData(output, data.m_LayerOutput->GetConstTensor()); + break; + } + default: + { + ARMNN_ASSERT_MSG(false, "Unknown data type"); + break; + } + } + + m_RanOnce = true; + } +} + +} //namespace armnn diff --git a/src/backends/gpuFsa/workloads/GpuFsaConstantWorkload.hpp b/src/backends/gpuFsa/workloads/GpuFsaConstantWorkload.hpp new file mode 100644 index 0000000000..98b383b89f --- /dev/null +++ b/src/backends/gpuFsa/workloads/GpuFsaConstantWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "GpuFsaBaseWorkload.hpp" + +#include +#include + +namespace armnn +{ + arm_compute::Status GpuFsaConstantWorkloadValidate(const TensorInfo& output); + + class GpuFsaConstantWorkload : public GpuFsaBaseWorkload + { + public: + GpuFsaConstantWorkload(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); + + void Execute() const override; + + private: + mutable bool m_RanOnce; + }; + +} //namespace armnn diff --git a/src/backends/gpuFsa/workloads/GpuFsaPreCompiledWorkload.cpp b/src/backends/gpuFsa/workloads/GpuFsaPreCompiledWorkload.cpp new file mode 100644 index 0000000000..20386b5d86 --- /dev/null +++ b/src/backends/gpuFsa/workloads/GpuFsaPreCompiledWorkload.cpp @@ -0,0 +1,106 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "GpuFsaPreCompiledWorkload.hpp" +#include "GpuFsaWorkloadUtils.hpp" +#include "armnn/utility/PolymorphicDowncast.hpp" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace armnn { + +GpuFsaPreCompiledWorkload::GpuFsaPreCompiledWorkload(const PreCompiledQueueDescriptor &descriptor, + const WorkloadInfo &info) + : BaseWorkload(descriptor, info), m_workloadInfo(info) +{ + // Check that the workload is holding a pointer to a valid pre-compiled object + if (m_Data.m_PreCompiledObject == nullptr) + { + throw InvalidArgumentException( + "GpuFsaPrecompiledWorkload requires a valid pre-compiled object (GpuWorkloadSketch)."); + } +} + +void GpuFsaPreCompiledWorkload::Execute() const +{ +/* + * The Execute function of the GpuFsa Backends PreCompiled workload needs to jump through various hoops in order to + * create a valid sketch and runtime that can execute the kernel + * First we need all of the data stored within the PreCompiled blob which was used to setup the workload, namely: + * The GpuWorkloadContext, this is a context which contains the TensorInfos and is unique to the graph being run + * The Sketch, this can contain one or many ops and acts as a subgraph within the context + * The TensorInfoIds, These are the ids of the TensorInfos used during the creation of the Sketch and stored within + * the context. + * It is very important that the Tensors passed into the Runtime being used to execute this sketch are created with + * the same TensorInfos as used when creating the sketch. We do this by creating new tensors, getting the original + * TensorInfos from the GpuWorkloadContext via their ids, and then importing the buffers from our own TensorHandles + * directly into these newly created Tensors. This allows us to link the externally visible Tensors from ArmNN to the + * Tensors which are needed to execute with the Sketch. + * + */ + using namespace arm_compute::experimental::dynamic_fusion; + // Get the runtime and configure it with the precompiled sketch + ClWorkloadRuntime runtime; + GpuFsaPreCompiledBlob *preCompiledBlob = static_cast(m_Data.m_PreCompiledObject); + auto workloadContext = + &(preCompiledBlob->workloadContext->implementation()); + auto sketch = preCompiledBlob->sketch.release(); + std::vector inputIds = *(preCompiledBlob->inputIds.get()); + std::vector outputIds = *(preCompiledBlob->outputIds.get()); + auto status = runtime.configure(*sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + for(auto &data : runtime.get_auxiliary_tensors()) + { + arm_compute::CLTensor* tensor = std::get<0>(data); + arm_compute::TensorInfo info = std::get<1>(data); + arm_compute::experimental::dynamic_fusion::AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + + // Create and initialize user tensors + std::vector inputsWeightsOutputs; + inputsWeightsOutputs.reserve(m_Data.m_Inputs.size() + m_Data.m_Outputs.size()); + + for (uint32_t inputSlotIdx = 0; inputSlotIdx < m_Data.m_Inputs.size(); ++inputSlotIdx) + { + arm_compute::CLTensor* input = new arm_compute::CLTensor{}; + input->allocator()->init(*(dynamic_cast( + workloadContext->get_tensor_info(inputIds[inputSlotIdx])))); + auto* inputHandle = PolymorphicDowncast(m_Data.m_Inputs[inputSlotIdx]); + input->allocator()->import_memory(inputHandle->GetTensor().cl_buffer()); + inputsWeightsOutputs.emplace_back(std::move(input)); + } + // Set the outputs + for (uint32_t outputSlotIdx = 0; outputSlotIdx < m_Data.m_Outputs.size(); ++outputSlotIdx) + { + arm_compute::CLTensor* output = new arm_compute::CLTensor{}; + output->allocator()->init(*(dynamic_cast( + workloadContext->get_tensor_info(outputIds[outputSlotIdx])))); + auto* outputHandle = PolymorphicDowncast(m_Data.m_Outputs[outputSlotIdx]); + output->allocator()->import_memory(outputHandle->GetTensor().cl_buffer()); + inputsWeightsOutputs.emplace_back(std::move(output)); + } + runtime.run(inputsWeightsOutputs); +} +} // namespace armnn \ No newline at end of file diff --git a/src/backends/gpuFsa/workloads/GpuFsaPreCompiledWorkload.hpp b/src/backends/gpuFsa/workloads/GpuFsaPreCompiledWorkload.hpp new file mode 100644 index 0000000000..d29bf37e69 --- /dev/null +++ b/src/backends/gpuFsa/workloads/GpuFsaPreCompiledWorkload.hpp @@ -0,0 +1,56 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "armnn/backends/Workload.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +namespace armnn +{ + +bool GpuFsaPreCompiledWorkloadValidate(std::string* reasonIfUnsupported); + +class GpuFsaPreCompiledWorkload : public BaseWorkload +{ +public: + GpuFsaPreCompiledWorkload(const PreCompiledQueueDescriptor& descriptor, + const WorkloadInfo& info); + void Execute() const override; + +private: + bool SupportsTensorHandleReplacement() const override + { + return true; + } + + void ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override + { + this->m_Data.m_Inputs[slot] = tensorHandle; + } + + void ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override + { + this->m_Data.m_Outputs[slot] = tensorHandle; + } + + WorkloadInfo m_workloadInfo; +}; + +} //namespace armnn \ No newline at end of file diff --git a/src/backends/gpuFsa/workloads/GpuFsaWorkloadUtils.hpp b/src/backends/gpuFsa/workloads/GpuFsaWorkloadUtils.hpp new file mode 100644 index 0000000000..10954b07b5 --- /dev/null +++ b/src/backends/gpuFsa/workloads/GpuFsaWorkloadUtils.hpp @@ -0,0 +1,163 @@ +// +// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include + +#include +#include + +#include + +#include +#include + +#include + + +namespace armnn +{ + + inline std::string GetConvolutionMethodString(arm_compute::ConvolutionMethod& convolutionMethod) + { + switch (convolutionMethod) + { + case arm_compute::ConvolutionMethod::FFT: + return "FFT"; + case arm_compute::ConvolutionMethod::DIRECT: + return "Direct"; + case arm_compute::ConvolutionMethod::GEMM: + return "GEMM"; + case arm_compute::ConvolutionMethod::WINOGRAD: + return "Winograd"; + default: + return "Unknown"; + } + } + + template + void CopyArmComputeClTensorData(arm_compute::CLTensor& dstTensor, const T* srcData) + { + { + dstTensor.map(true); + } + + { + armcomputetensorutils::CopyArmComputeITensorData(srcData, dstTensor); + } + + dstTensor.unmap(); + } + + inline auto SetClStridedSliceData(const std::vector& m_begin, + const std::vector& m_end, + const std::vector& m_stride) + { + arm_compute::Coordinates starts; + arm_compute::Coordinates ends; + arm_compute::Coordinates strides; + + unsigned int num_dims = static_cast(m_begin.size()); + + for (unsigned int i = 0; i < num_dims; i++) { + unsigned int revertedIndex = num_dims - i - 1; + + starts.set(i, static_cast(m_begin[revertedIndex])); + ends.set(i, static_cast(m_end[revertedIndex])); + strides.set(i, static_cast(m_stride[revertedIndex])); + } + + return std::make_tuple(starts, ends, strides); + } + + inline auto SetClSliceData(const std::vector& m_begin, + const std::vector& m_size) + { + // This function must translate the size vector given to an end vector + // expected by the ACL NESlice workload + arm_compute::Coordinates starts; + arm_compute::Coordinates ends; + + unsigned int num_dims = static_cast(m_begin.size()); + + // For strided slices, we have the relationship size = (end - begin) / stride + // For slice, we assume stride to be a vector of all ones, yielding the formula + // size = (end - begin) therefore we know end = size + begin + for (unsigned int i = 0; i < num_dims; i++) + { + unsigned int revertedIndex = num_dims - i - 1; + + starts.set(i, static_cast(m_begin[revertedIndex])); + ends.set(i, static_cast(m_begin[revertedIndex] + m_size[revertedIndex])); + } + + return std::make_tuple(starts, ends); + } + + inline void InitializeArmComputeClTensorData(arm_compute::CLTensor& clTensor, + const ConstTensorHandle* handle) + { + ARMNN_ASSERT(handle); + + armcomputetensorutils::InitialiseArmComputeTensorEmpty(clTensor); + switch(handle->GetTensorInfo().GetDataType()) + { + case DataType::Float16: + CopyArmComputeClTensorData(clTensor, handle->GetConstTensor()); + break; + case DataType::Float32: + CopyArmComputeClTensorData(clTensor, handle->GetConstTensor()); + break; + case DataType::QAsymmU8: + CopyArmComputeClTensorData(clTensor, handle->GetConstTensor()); + break; + case DataType::QAsymmS8: + case DataType::QSymmS8: + CopyArmComputeClTensorData(clTensor, handle->GetConstTensor()); + break; + case DataType::QSymmS16: + CopyArmComputeClTensorData(clTensor, handle->GetConstTensor()); + break; + case DataType::Signed32: + CopyArmComputeClTensorData(clTensor, handle->GetConstTensor()); + break; + case DataType::BFloat16: + CopyArmComputeClTensorData(clTensor, handle->GetConstTensor()); + break; + default: + // Throw exception; assertion not called in release build. + throw Exception("Unexpected tensor type during InitializeArmComputeClTensorData()."); + } + }; + + inline RuntimeException WrapClError(const cl::Error& clError, const CheckLocation& location) + { + std::stringstream message; + message << "CL error: " << clError.what() << ". Error code: " << clError.err(); + + return RuntimeException(message.str(), location); + } + + inline void RunClFunction(arm_compute::IFunction& function, const CheckLocation& location) + { + try + { + function.run(); + } + catch (cl::Error& error) + { + throw WrapClError(error, location); + } + } + + template + DataType* GetOutputTensorData(unsigned int idx, const PayloadType& data) + { + ITensorHandle* tensorHandle = data.m_Outputs[idx]; + return reinterpret_cast(tensorHandle->Map()); + } + +} //namespace armnn -- cgit v1.2.1