diff options
Diffstat (limited to 'src/backends/gpuFsa')
18 files changed, 348 insertions, 104 deletions
diff --git a/src/backends/gpuFsa/GpuFsaBackend.cpp b/src/backends/gpuFsa/GpuFsaBackend.cpp index 8b62aec9e6..f14687b8e0 100644 --- a/src/backends/gpuFsa/GpuFsaBackend.cpp +++ b/src/backends/gpuFsa/GpuFsaBackend.cpp @@ -24,6 +24,7 @@ #include "layers/GpuFsaDepthwiseConvolution2d.hpp" #include "layers/GpuFsaElementwiseBinaryAdd.hpp" #include "layers/GpuFsaElementwiseBinarySub.hpp" +#include "layers/GpuFsaPooling2d.hpp" namespace armnn { @@ -315,6 +316,13 @@ OptimizationViews GpuFsaBackend::OptimizeSubgraphView(const SubgraphView& subgra } break; } + case (LayerType::Pooling2d): + { + auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); + auto desc = PolymorphicDowncast<const Pooling2dDescriptor*>(&base.GetParameters()); + GpuFsaPooling2dCreateOp(preCompiledBlobPtr, input, *desc); + break; + } default: // unsupported layer for GpuFsa backend continue; diff --git a/src/backends/gpuFsa/GpuFsaLayerSupport.cpp b/src/backends/gpuFsa/GpuFsaLayerSupport.cpp index 2e5c7d5a53..b73b3e9088 100644 --- a/src/backends/gpuFsa/GpuFsaLayerSupport.cpp +++ b/src/backends/gpuFsa/GpuFsaLayerSupport.cpp @@ -14,6 +14,7 @@ #include "layers/GpuFsaDepthwiseConvolution2d.hpp" #include "layers/GpuFsaElementwiseBinaryAdd.hpp" #include "layers/GpuFsaElementwiseBinarySub.hpp" +#include "layers/GpuFsaPooling2d.hpp" #endif #include <vector> @@ -156,7 +157,21 @@ bool GpuFsaLayerSupport::IsLayerSupported(const LayerType& type, { throw InvalidArgumentException("Invalid ElementwiseBinary BinaryOperation operation."); } - return false; + } + case LayerType::Pooling2d: + { + if (infos.size() != 2) + { + throw InvalidArgumentException("Invalid number of Pooling2d TensorInfos. " + "TensorInfos should be of format: {input, output}."); + } + + auto desc = PolymorphicDowncast<const Pooling2dDescriptor*>(&descriptor); + + FORWARD_LAYER_VALIDATE_FUNC(GpuFsaPooling2dValidate, + reasonIfUnsupported, + infos[0], + *desc); } case LayerType::Constant: case LayerType::Input: diff --git a/src/backends/gpuFsa/backend.cmake b/src/backends/gpuFsa/backend.cmake index 16473336e0..67e9be7fab 100644 --- a/src/backends/gpuFsa/backend.cmake +++ b/src/backends/gpuFsa/backend.cmake @@ -1,5 +1,5 @@ # -# Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. +# Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # @@ -7,7 +7,7 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/src/backends/gpuFsa) list(APPEND armnnLibraries armnnGpuFsaBackend) if(ARMCOMPUTEGPUFSA) - list(APPEND armnnLibraries armnnGpuFsaBackendLayerValidators) + list(APPEND armnnLibraries armnnGpuFsaBackendLayers) list(APPEND armnnLibraries armnnGpuFsaBackendWorkloads) list(APPEND armnnUnitTestLibraries armnnGpuFsaBackendUnitTests) else() diff --git a/src/backends/gpuFsa/layers/CMakeLists.txt b/src/backends/gpuFsa/layers/CMakeLists.txt index 182a32c121..5e0d0e7486 100644 --- a/src/backends/gpuFsa/layers/CMakeLists.txt +++ b/src/backends/gpuFsa/layers/CMakeLists.txt @@ -3,7 +3,7 @@ # SPDX-License-Identifier: MIT # -list(APPEND armnnGpuFsaBackendLayerValidators_sources +list(APPEND armnnGpuFsaBackendLayers_sources GpuFsaConvolution2d.cpp GpuFsaConvolution2d.hpp GpuFsaDepthwiseConvolution2d.cpp @@ -12,9 +12,13 @@ list(APPEND armnnGpuFsaBackendLayerValidators_sources GpuFsaElementwiseBinaryAdd.hpp GpuFsaElementwiseBinarySub.cpp GpuFsaElementwiseBinarySub.hpp + GpuFsaPooling2d.cpp + GpuFsaPooling2d.hpp + UtilsGpuFsa.cpp + UtilsGpuFsa.hpp ) -add_library(armnnGpuFsaBackendLayerValidators OBJECT ${armnnGpuFsaBackendLayerValidators_sources}) -target_include_directories(armnnGpuFsaBackendLayerValidators PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) -target_include_directories(armnnGpuFsaBackendLayerValidators PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) -target_include_directories(armnnGpuFsaBackendLayerValidators PRIVATE ${PROJECT_SOURCE_DIR}/src/backends) +add_library(armnnGpuFsaBackendLayers OBJECT ${armnnGpuFsaBackendLayers_sources}) +target_include_directories(armnnGpuFsaBackendLayers PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) +target_include_directories(armnnGpuFsaBackendLayers PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) +target_include_directories(armnnGpuFsaBackendLayers PRIVATE ${PROJECT_SOURCE_DIR}/src/backends) diff --git a/src/backends/gpuFsa/layers/GpuFsaConvolution2d.cpp b/src/backends/gpuFsa/layers/GpuFsaConvolution2d.cpp index 90b0ddc78c..e9409634ed 100644 --- a/src/backends/gpuFsa/layers/GpuFsaConvolution2d.cpp +++ b/src/backends/gpuFsa/layers/GpuFsaConvolution2d.cpp @@ -4,29 +4,23 @@ // #include "GpuFsaConvolution2d.hpp" - -//#include <armnn/Types.hpp> +#include "UtilsGpuFsa.hpp" #include <aclCommon/ArmComputeTensorUtils.hpp> -//#include <arm_compute/core/ITensorInfo.h> -//#include <arm_compute/core/TensorInfo.h> -//#include <arm_compute/core/TensorShape.h> -//#include <arm_compute/core/CL/CLKernelLibrary.h> -//#include <arm_compute/core/CL/CLCompileContext.h> - -//#include <arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h> #include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h> +#include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h> #include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h> #include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h> #include <vector> +using namespace arm_compute::experimental::dynamic_fusion; +using namespace armnn::armcomputetensorutils; + namespace armnn { -using namespace armcomputetensorutils; - arm_compute::Status GpuFsaConvolution2dValidate(const TensorInfo& input, const Convolution2dDescriptor& descriptor, const TensorInfo& weights, @@ -61,23 +55,14 @@ arm_compute::Status GpuFsaConvolution2dValidate(const TensorInfo& input, biasSketchInfoPtr = workloadContext.create_tensor_info(aclBiasInfo); } - // Set Conv2d attributes using descriptor - const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX, - descriptor.m_DilationY); - const arm_compute::Padding2D aclPadInfo = BuildArmComputePaddingInfo(descriptor); - const arm_compute::Size2D aclStrideInfo = BuildArmComputeSize2D(descriptor.m_StrideX, descriptor.m_StrideY); - - Conv2dAttributes conv2DAttributes{}; - conv2DAttributes.dilation(aclDilationInfo); - conv2DAttributes.pad(aclPadInfo); - conv2DAttributes.stride(aclStrideInfo); + Conv2dAttributes conv2dAttributes = CreateConv2dAttributes(descriptor); // Validate operator, check status and update reasonIfUnsupported arm_compute::Status aclStatus = GpuConv2d::validate_op(sketch, inputInfo, weightInfo, biasSketchInfoPtr, - conv2DAttributes); + conv2dAttributes); return aclStatus; } @@ -99,7 +84,6 @@ void GpuFsaConvolution2dCreateOp(GpuFsaPreCompiledBlob* blob, * as the TensorInfos used when creating the Tensors must match those used to create the Sketch. Otherwise the runtime * doesn't know which Tensors to use. */ - using namespace arm_compute::experimental::dynamic_fusion; GpuWorkloadSketch* sketch = blob->sketch.get(); GpuWorkloadContext* workloadContext = blob->workloadContext.get(); std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {}; @@ -130,23 +114,14 @@ void GpuFsaConvolution2dCreateOp(GpuFsaPreCompiledBlob* blob, biasSketchInfoPtr = inputTensorInfos[2]; } - // Set Conv2d attributes using descriptor - const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX, - descriptor.m_DilationY); - const arm_compute::Padding2D aclPadInfo = BuildArmComputePaddingInfo(descriptor); - const arm_compute::Size2D aclStrideInfo = BuildArmComputeSize2D(descriptor.m_StrideX, descriptor.m_StrideY); - - Conv2dAttributes conv2DAttributes{}; - conv2DAttributes.dilation(aclDilationInfo); - conv2DAttributes.pad(aclPadInfo); - conv2DAttributes.stride(aclStrideInfo); + Conv2dAttributes conv2dAttributes = CreateConv2dAttributes(descriptor); // Validate operator, check status and update reasonIfUnsupported arm_compute::Status aclStatus = GpuConv2d::validate_op(*sketch, inputTensorInfos[0], inputTensorInfos[1], biasSketchInfoPtr, - conv2DAttributes); + conv2dAttributes); const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); if (!supported) @@ -159,7 +134,7 @@ void GpuFsaConvolution2dCreateOp(GpuFsaPreCompiledBlob* blob, inputTensorInfos[0], inputTensorInfos[1], biasSketchInfoPtr, - conv2DAttributes); + conv2dAttributes); // Create the Output outputTensorInfos.emplace_back(workloadContext->create_tensor_info()); diff --git a/src/backends/gpuFsa/layers/GpuFsaConvolution2d.hpp b/src/backends/gpuFsa/layers/GpuFsaConvolution2d.hpp index 424ba41f56..55067f0b5d 100644 --- a/src/backends/gpuFsa/layers/GpuFsaConvolution2d.hpp +++ b/src/backends/gpuFsa/layers/GpuFsaConvolution2d.hpp @@ -10,9 +10,6 @@ namespace armnn { - -using namespace arm_compute::experimental::dynamic_fusion; - arm_compute::Status GpuFsaConvolution2dValidate(const TensorInfo& input, const Convolution2dDescriptor& descriptor, const TensorInfo& weights, diff --git a/src/backends/gpuFsa/layers/GpuFsaDepthwiseConvolution2d.cpp b/src/backends/gpuFsa/layers/GpuFsaDepthwiseConvolution2d.cpp index a3c3dd9e44..21077afbfb 100644 --- a/src/backends/gpuFsa/layers/GpuFsaDepthwiseConvolution2d.cpp +++ b/src/backends/gpuFsa/layers/GpuFsaDepthwiseConvolution2d.cpp @@ -4,22 +4,25 @@ // #include "GpuFsaDepthwiseConvolution2d.hpp" +#include "UtilsGpuFsa.hpp" + #include <backendsCommon/WorkloadUtils.hpp> #include <aclCommon/ArmComputeTensorUtils.hpp> -#include <arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h> #include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h> +#include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h> #include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h> #include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h> #include <vector> +using namespace arm_compute::experimental::dynamic_fusion; +using namespace armnn::armcomputetensorutils; + namespace armnn { -using namespace armcomputetensorutils; - arm_compute::Status GpuFsaDepthwiseConvolution2dValidate(const TensorInfo& input, const DepthwiseConvolution2dDescriptor& descriptor, const TensorInfo& weights, @@ -71,17 +74,7 @@ arm_compute::Status GpuFsaDepthwiseConvolution2dValidate(const TensorInfo& input biasSketchInfoPtr = workloadContext.create_tensor_info(aclBiasInfo); } - // Set DepthwiseConv2d attributes using descriptor - const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX, - descriptor.m_DilationY); - const arm_compute::Padding2D aclPadInfo = BuildArmComputePaddingInfo(descriptor); - const arm_compute::Size2D aclStrideInfo = BuildArmComputeSize2D(descriptor.m_StrideX, descriptor.m_StrideY); - - DepthwiseConv2dAttributes depthwiseConv2dAttributes{}; - depthwiseConv2dAttributes.pad(aclPadInfo); - depthwiseConv2dAttributes.stride(aclStrideInfo); - depthwiseConv2dAttributes.dilation(aclDilationInfo); - depthwiseConv2dAttributes.depth_multiplier(aclDepthMultiplier); + DepthwiseConv2dAttributes depthwiseConv2dAttributes = CreateDWConv2dAttributes(descriptor, aclDepthMultiplier); // Validate operator, check status and update reasonIfUnsupported arm_compute::Status aclStatus = GpuDepthwiseConv2d::validate_op(sketch, @@ -110,7 +103,6 @@ void GpuFsaDepthwiseConvolution2dCreateOp(GpuFsaPreCompiledBlob* blob, * as the TensorInfos used when creating the Tensors must match those used to create the Sketch. Otherwise the runtime * doesn't know which Tensors to use. */ - using namespace arm_compute::experimental::dynamic_fusion; GpuWorkloadSketch* sketch = blob->sketch.get(); GpuWorkloadContext* workloadContext = blob->workloadContext.get(); std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {}; @@ -157,17 +149,7 @@ void GpuFsaDepthwiseConvolution2dCreateOp(GpuFsaPreCompiledBlob* blob, biasSketchInfoPtr = inputTensorInfos[2]; } - // Set DepthwiseConv2d attributes using descriptor - const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX, - descriptor.m_DilationY); - const arm_compute::Padding2D aclPadInfo = BuildArmComputePaddingInfo(descriptor); - const arm_compute::Size2D aclStrideInfo = BuildArmComputeSize2D(descriptor.m_StrideX, descriptor.m_StrideY); - - DepthwiseConv2dAttributes depthwiseConv2dAttributes{}; - depthwiseConv2dAttributes.pad(aclPadInfo); - depthwiseConv2dAttributes.stride(aclStrideInfo); - depthwiseConv2dAttributes.dilation(aclDilationInfo); - depthwiseConv2dAttributes.depth_multiplier(aclDepthMultiplier); + DepthwiseConv2dAttributes depthwiseConv2dAttributes = CreateDWConv2dAttributes(descriptor, aclDepthMultiplier); // Validate operator, check status and update reasonIfUnsupported arm_compute::Status aclStatus = GpuDepthwiseConv2d::validate_op(*sketch, diff --git a/src/backends/gpuFsa/layers/GpuFsaDepthwiseConvolution2d.hpp b/src/backends/gpuFsa/layers/GpuFsaDepthwiseConvolution2d.hpp index d3e562d5a4..924d1d3999 100644 --- a/src/backends/gpuFsa/layers/GpuFsaDepthwiseConvolution2d.hpp +++ b/src/backends/gpuFsa/layers/GpuFsaDepthwiseConvolution2d.hpp @@ -10,9 +10,6 @@ namespace armnn { - -using namespace arm_compute::experimental::dynamic_fusion; - arm_compute::Status GpuFsaDepthwiseConvolution2dValidate(const TensorInfo& input, const DepthwiseConvolution2dDescriptor& descriptor, const TensorInfo& weights, diff --git a/src/backends/gpuFsa/layers/GpuFsaElementwiseBinaryAdd.cpp b/src/backends/gpuFsa/layers/GpuFsaElementwiseBinaryAdd.cpp index fa016a6815..d6404dd67e 100644 --- a/src/backends/gpuFsa/layers/GpuFsaElementwiseBinaryAdd.cpp +++ b/src/backends/gpuFsa/layers/GpuFsaElementwiseBinaryAdd.cpp @@ -7,12 +7,13 @@ #include <aclCommon/ArmComputeTensorUtils.hpp> -#include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h> -#include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h> #include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h> #include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h> +#include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h> +#include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h> using namespace arm_compute::experimental::dynamic_fusion; +using namespace armnn::armcomputetensorutils; namespace armnn { @@ -20,8 +21,6 @@ namespace armnn arm_compute::Status GpuFsaElementwiseBinaryAddValidate(const TensorInfo& input0, const TensorInfo& input1) { - using namespace armcomputetensorutils; - // Create a new workload sketch, for validation purposes auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context(); auto workloadContext = GpuWorkloadContext(&compileCtx); @@ -43,8 +42,6 @@ void GpuFsaElementwiseBinaryAddCreateOp(GpuFsaPreCompiledBlob* blob, const TensorInfo& input0, const TensorInfo& input1) { - using namespace armcomputetensorutils; - GpuWorkloadSketch* sketch = blob->sketch.get(); GpuWorkloadContext* workloadContext = blob->workloadContext.get(); std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {}; diff --git a/src/backends/gpuFsa/layers/GpuFsaElementwiseBinaryAdd.hpp b/src/backends/gpuFsa/layers/GpuFsaElementwiseBinaryAdd.hpp index 73f1fcbd58..1392d01257 100644 --- a/src/backends/gpuFsa/layers/GpuFsaElementwiseBinaryAdd.hpp +++ b/src/backends/gpuFsa/layers/GpuFsaElementwiseBinaryAdd.hpp @@ -10,14 +10,11 @@ namespace armnn { +arm_compute::Status GpuFsaElementwiseBinaryAddValidate(const TensorInfo& input0, + const TensorInfo& input1); - using namespace arm_compute::experimental::dynamic_fusion; - - arm_compute::Status GpuFsaElementwiseBinaryAddValidate(const TensorInfo& input0, - const TensorInfo& input1); - - void GpuFsaElementwiseBinaryAddCreateOp(GpuFsaPreCompiledBlob* blob, - const TensorInfo& input0, - const TensorInfo& input1); +void GpuFsaElementwiseBinaryAddCreateOp(GpuFsaPreCompiledBlob* blob, + const TensorInfo& input0, + const TensorInfo& input1); } // namespace armnn
\ No newline at end of file diff --git a/src/backends/gpuFsa/layers/GpuFsaElementwiseBinarySub.cpp b/src/backends/gpuFsa/layers/GpuFsaElementwiseBinarySub.cpp index 4e7eb77190..5e0f478686 100644 --- a/src/backends/gpuFsa/layers/GpuFsaElementwiseBinarySub.cpp +++ b/src/backends/gpuFsa/layers/GpuFsaElementwiseBinarySub.cpp @@ -7,12 +7,13 @@ #include <aclCommon/ArmComputeTensorUtils.hpp> -#include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h> -#include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h> #include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h> #include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h> +#include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h> +#include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h> using namespace arm_compute::experimental::dynamic_fusion; +using namespace armnn::armcomputetensorutils; namespace armnn { @@ -20,8 +21,6 @@ namespace armnn arm_compute::Status GpuFsaElementwiseBinarySubValidate(const TensorInfo& input0, const TensorInfo& input1) { - using namespace armcomputetensorutils; - // Create a new workload sketch, for validation purposes auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context(); auto workloadContext = GpuWorkloadContext(&compileCtx); @@ -43,8 +42,6 @@ void GpuFsaElementwiseBinarySubCreateOp(GpuFsaPreCompiledBlob* blob, const TensorInfo& input0, const TensorInfo& input1) { - using namespace armcomputetensorutils; - GpuWorkloadSketch* sketch = blob->sketch.get(); GpuWorkloadContext* workloadContext = blob->workloadContext.get(); std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {}; diff --git a/src/backends/gpuFsa/layers/GpuFsaElementwiseBinarySub.hpp b/src/backends/gpuFsa/layers/GpuFsaElementwiseBinarySub.hpp index 59d8189f1f..4d58f313b6 100644 --- a/src/backends/gpuFsa/layers/GpuFsaElementwiseBinarySub.hpp +++ b/src/backends/gpuFsa/layers/GpuFsaElementwiseBinarySub.hpp @@ -2,7 +2,6 @@ // Copyright © 2024 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // - #pragma once #include <armnn/Descriptors.hpp> @@ -11,13 +10,11 @@ namespace armnn { +arm_compute::Status GpuFsaElementwiseBinarySubValidate(const TensorInfo& input0, + const TensorInfo& input1); - using namespace arm_compute::experimental::dynamic_fusion; - - arm_compute::Status GpuFsaElementwiseBinarySubValidate(const TensorInfo& input0, - const TensorInfo& input1); +void GpuFsaElementwiseBinarySubCreateOp(GpuFsaPreCompiledBlob* blob, + const TensorInfo& input0, + const TensorInfo& input1); - void GpuFsaElementwiseBinarySubCreateOp(GpuFsaPreCompiledBlob* blob, - const TensorInfo& input0, - const TensorInfo& input1); -}
\ No newline at end of file +} // namespace armnn
\ No newline at end of file diff --git a/src/backends/gpuFsa/layers/GpuFsaPooling2d.cpp b/src/backends/gpuFsa/layers/GpuFsaPooling2d.cpp new file mode 100644 index 0000000000..4575d21421 --- /dev/null +++ b/src/backends/gpuFsa/layers/GpuFsaPooling2d.cpp @@ -0,0 +1,83 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "GpuFsaPooling2d.hpp" +#include "UtilsGpuFsa.hpp" + +#include <aclCommon/ArmComputeTensorUtils.hpp> + +#include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h> +#include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h> +#include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h> +#include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h> + +using namespace arm_compute::experimental::dynamic_fusion; +using namespace armnn::armcomputetensorutils; + +namespace armnn +{ + +arm_compute::Status GpuFsaPooling2dValidate(const TensorInfo& input, + const Pooling2dDescriptor& descriptor) +{ + // Create a new workload sketch, for validation purposes + auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context(); + auto workloadContext = GpuWorkloadContext(&compileCtx); + GpuWorkloadSketch sketch{ &workloadContext }; + + arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + aclInputInfo.set_are_values_constant(input.IsConstant()); + arm_compute::ITensorInfo* inputInfo = workloadContext.create_tensor_info(aclInputInfo); + + Pool2dAttributes pool2dAttributes = CreatePool2dAttributes(descriptor); + GpuPool2dSettings pool2dSettings{}; + + return GpuPool2d::validate_op(sketch, inputInfo, pool2dAttributes, pool2dSettings); +} + +void GpuFsaPooling2dCreateOp(GpuFsaPreCompiledBlob* blob, + const TensorInfo& input, + const Pooling2dDescriptor& descriptor) +{ + GpuWorkloadSketch* sketch = blob->sketch.get(); + GpuWorkloadContext* workloadContext = blob->workloadContext.get(); + std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {}; + std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {}; + + arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + aclInputInfo.set_are_values_constant(input.IsConstant()); + + inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo)); + + Pool2dAttributes pool2dAttributes = CreatePool2dAttributes(descriptor); + GpuPool2dSettings pool2dSettings{}; + + // Validate operator, check status and update reasonIfUnsupported + arm_compute::Status aclStatus = GpuPool2d::validate_op(*sketch, + inputTensorInfos[0], + pool2dAttributes, + pool2dSettings); + + const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK; + if (!supported) + { + throw BackendCapabilityException("\"GpuFsa\" backend failed during pooling 2d validation"); + } + + arm_compute::ITensorInfo* addOutputInfo = GpuPool2d::create_op(*sketch, + inputTensorInfos[0], + pool2dAttributes, + pool2dSettings); + + // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created. + outputTensorInfos.emplace_back(workloadContext->create_tensor_info()); + GpuOutput::create_op(*sketch, addOutputInfo, outputTensorInfos[0]); + + // Store the TensorInfos within the blob as unique_ptrs to be used later + blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos); + blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos); +} + +} // namespace armnn
\ No newline at end of file diff --git a/src/backends/gpuFsa/layers/GpuFsaPooling2d.hpp b/src/backends/gpuFsa/layers/GpuFsaPooling2d.hpp new file mode 100644 index 0000000000..25f6e72cd0 --- /dev/null +++ b/src/backends/gpuFsa/layers/GpuFsaPooling2d.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include <armnn/Descriptors.hpp> + +#include <gpuFsa/GpuFsaBackend.hpp> + +namespace armnn +{ +arm_compute::Status GpuFsaPooling2dValidate(const TensorInfo& input, + const Pooling2dDescriptor& descriptor); + +void GpuFsaPooling2dCreateOp(GpuFsaPreCompiledBlob* blob, + const TensorInfo& input, + const Pooling2dDescriptor& descriptor); + +} // namespace armnn
\ No newline at end of file diff --git a/src/backends/gpuFsa/layers/UtilsGpuFsa.cpp b/src/backends/gpuFsa/layers/UtilsGpuFsa.cpp new file mode 100644 index 0000000000..a1d96f0ec1 --- /dev/null +++ b/src/backends/gpuFsa/layers/UtilsGpuFsa.cpp @@ -0,0 +1,61 @@ +// +// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "UtilsGpuFsa.hpp" +#include "aclCommon/ArmComputeTensorUtils.hpp" +#include "aclCommon/ArmComputeUtils.hpp" + +using namespace armnn; +using namespace armnn::armcomputetensorutils; +using namespace arm_compute::experimental::dynamic_fusion; + +Conv2dAttributes CreateConv2dAttributes(const Convolution2dDescriptor& descriptor) +{ + const arm_compute::Padding2D padInfo = BuildArmComputePaddingInfo(descriptor); + const arm_compute::Size2D strideInfo = BuildArmComputeSize2D(descriptor.m_StrideX, descriptor.m_StrideY); + const arm_compute::Size2D dilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX, descriptor.m_DilationY); + + arm_compute::experimental::dynamic_fusion::Conv2dAttributes conv2dAttributes{}; + conv2dAttributes.pad(padInfo); + conv2dAttributes.stride(strideInfo); + conv2dAttributes.dilation(dilationInfo); + + return conv2dAttributes; +} + +arm_compute::experimental::dynamic_fusion::DepthwiseConv2dAttributes +CreateDWConv2dAttributes(const DepthwiseConvolution2dDescriptor& descriptor, const unsigned int aclDepthMultiplier) +{ + const arm_compute::Padding2D padInfo = BuildArmComputePaddingInfo(descriptor); + const arm_compute::Size2D strideInfo = BuildArmComputeSize2D(descriptor.m_StrideX, descriptor.m_StrideY); + const arm_compute::Size2D dilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX, descriptor.m_DilationY); + + arm_compute::experimental::dynamic_fusion::DepthwiseConv2dAttributes depthwiseConv2dAttributes{}; + depthwiseConv2dAttributes.pad(padInfo); + depthwiseConv2dAttributes.stride(strideInfo); + depthwiseConv2dAttributes.dilation(dilationInfo); + depthwiseConv2dAttributes.depth_multiplier(aclDepthMultiplier); + + return depthwiseConv2dAttributes; +} + +arm_compute::experimental::dynamic_fusion::Pool2dAttributes +CreatePool2dAttributes(const Pooling2dDescriptor& descriptor) +{ + const arm_compute::PoolingType poolType = ConvertPoolingAlgorithmToAclPoolingType(descriptor.m_PoolType); + const arm_compute::Padding2D padding = BuildArmComputePaddingInfo(descriptor); + const arm_compute::Size2D poolSize = BuildArmComputeSize2D(descriptor.m_PoolWidth, descriptor.m_PoolHeight); + const arm_compute::Size2D strides = BuildArmComputeSize2D(descriptor.m_StrideX, descriptor.m_StrideY); + const bool excludePadding = (descriptor.m_PaddingMethod == PaddingMethod::Exclude); + + arm_compute::experimental::dynamic_fusion::Pool2dAttributes pool2dAttributes{}; + pool2dAttributes.pool_type(poolType); + pool2dAttributes.pad(padding); + pool2dAttributes.pool_size(poolSize); + pool2dAttributes.stride(strides); + pool2dAttributes.exclude_padding(excludePadding); + + return pool2dAttributes; +}
\ No newline at end of file diff --git a/src/backends/gpuFsa/layers/UtilsGpuFsa.hpp b/src/backends/gpuFsa/layers/UtilsGpuFsa.hpp new file mode 100644 index 0000000000..6c1d97ad87 --- /dev/null +++ b/src/backends/gpuFsa/layers/UtilsGpuFsa.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "armnn/Descriptors.hpp" +#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h" + +/// Utility function used to setup an arm_compute::Conv2dAttributes object from given descriptor +/// @param[in] armnn::Convolution2dDescriptor +/// @return arm_compute::experimental::dynamic_fusion::Conv2dAttributes +arm_compute::experimental::dynamic_fusion::Conv2dAttributes +CreateConv2dAttributes(const armnn::Convolution2dDescriptor& descriptor); + +/// Utility function used to setup an arm_compute::DepthwiseConv2dAttributes object from given descriptor +/// @param[in] armnn::DepthwiseConvolution2dDescriptor +/// @return arm_compute::experimental::dynamic_fusion::DepthwiseConv2dAttributes +arm_compute::experimental::dynamic_fusion::DepthwiseConv2dAttributes +CreateDWConv2dAttributes(const armnn::DepthwiseConvolution2dDescriptor& descriptor, + const unsigned int aclDepthMultiplier); + +/// Utility function used to setup an arm_compute::Pool2dAttributes object from given descriptor +/// @param[in] armnn::Pooling2dDescriptor +/// @return arm_compute::experimental::dynamic_fusion::Pool2dAttributes +arm_compute::experimental::dynamic_fusion::Pool2dAttributes +CreatePool2dAttributes(const armnn::Pooling2dDescriptor& descriptor); diff --git a/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp b/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp index 26c7cb8d63..93a4a81f9d 100644 --- a/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp +++ b/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp @@ -9,6 +9,8 @@ #include "backendsCommon/test/DepthwiseConvolution2dEndToEndTests.hpp" #include "backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp" +#include "backendsCommon/test/Pooling2dEndToEndTestImpl.hpp" + #include <doctest/doctest.h> @@ -56,4 +58,56 @@ TEST_CASE("GpuFsaElementwiseBinarySubTestFloat16") ElementwiseBinarySimple3DEndToEnd<armnn::DataType::Float16>(gpuFsaDefaultBackends, BinaryOperation::Sub); } +// Pooling 2D +// Average Pool 2D +TEST_CASE("GpuFsaAvgPool2DEndtoEndTestFloat32") +{ + AvgPool2dEndToEnd<DataType::Float32>(gpuFsaDefaultBackends); +} + +TEST_CASE("GpuFsaAvgPool2DEndtoEndTestFloat16") +{ + + AvgPool2dEndToEndFloat16<DataType::Float16>(gpuFsaDefaultBackends); +} + +TEST_CASE("UNSUPPORTED_GpuFsaAvgPool2DIgnoreValueEndtoEndTestFloat32") +{ + // Exclude padding must be set to true in Attributes! to be supported by GPU + try + { + AvgPool2dEndToEnd<DataType::Float32>(gpuFsaDefaultBackends, PaddingMethod::IgnoreValue); + FAIL("An exception should have been thrown"); + } + catch (const armnn::InvalidArgumentException& e) + { + CHECK(strcmp(e.what(), "Failed to assign a backend to each layer") == 0); + } +} + +// Max Pool 2D +TEST_CASE("GpuFsaMaxPool2DEndtoEndTestFloat32") +{ + MaxPool2dEndToEnd<DataType::Float32>(gpuFsaDefaultBackends); +} + +TEST_CASE("GpuFsaMaxPool2DEndtoEndTestFloat16") +{ + MaxPool2dEndToEndFloat16<DataType::Float16>(gpuFsaDefaultBackends); +} + +TEST_CASE("UNSUPPORTED_GpuFsaMaxPool2DIgnoreValueEndtoEndTestFloat32") +{ + // Exclude padding must be set to true in Attributes! to be supported by GPU + try + { + MaxPool2dEndToEnd<DataType::Float32>(gpuFsaDefaultBackends, PaddingMethod::IgnoreValue); + FAIL("An exception should have been thrown"); + } + catch (const armnn::InvalidArgumentException& e) + { + CHECK(strcmp(e.what(), "Failed to assign a backend to each layer") == 0); + } +} + } diff --git a/src/backends/gpuFsa/test/GpuFsaLayerSupportTests.cpp b/src/backends/gpuFsa/test/GpuFsaLayerSupportTests.cpp index 9d4b3b9367..fee0d07820 100644 --- a/src/backends/gpuFsa/test/GpuFsaLayerSupportTests.cpp +++ b/src/backends/gpuFsa/test/GpuFsaLayerSupportTests.cpp @@ -101,4 +101,34 @@ TEST_CASE("IsLayerSupportedGpuFsaElementWiseBinarySub") CHECK(supported); } +TEST_CASE("IsLayerSupportedGpuFsaPooling2d") +{ + TensorInfo inputInfo({ 1, 3, 4, 1 }, DataType::Float32); + TensorInfo outputInfo({ 1, 2, 2, 1 }, DataType::Float32); + + Pooling2dDescriptor desc{}; + desc.m_PoolType = PoolingAlgorithm::Max; + desc.m_PadLeft = 0; + desc.m_PadRight = 0; + desc.m_PadTop = 0; + desc.m_PadBottom = 0; + desc.m_PoolWidth = 2; + desc.m_PoolHeight = 2; + desc.m_StrideX = 1; + desc.m_StrideY = 1; + desc.m_OutputShapeRounding = OutputShapeRounding::Floor; + desc.m_PaddingMethod = PaddingMethod::Exclude; + desc.m_DataLayout = DataLayout::NHWC; + + GpuFsaLayerSupport supportChecker; + std::string reasonIfNotSupported; + auto supported = supportChecker.IsLayerSupported(LayerType::Pooling2d, + {inputInfo, outputInfo}, + desc, + EmptyOptional(), + EmptyOptional(), + reasonIfNotSupported); + CHECK(supported); +} + }
\ No newline at end of file |