From ddbda6a1ed13f7bef7e0dce07a37e91b062ce98a Mon Sep 17 00:00:00 2001 From: Teresa Charlin Date: Wed, 7 Feb 2024 22:58:29 +0000 Subject: IVGCVSW-8275 GpuFsa Op: Add Activation functions available * Currently Sigmoid and TanH Functions are implemented. Signed-off-by: Teresa Charlin Change-Id: If9483be9201dfe47b86acc41ec7932725ac2e39e --- src/backends/gpuFsa/GpuFsaBackend.cpp | 8 ++ src/backends/gpuFsa/GpuFsaLayerSupport.cpp | 19 +++- src/backends/gpuFsa/layers/CMakeLists.txt | 2 + src/backends/gpuFsa/layers/GpuFsaActivation.cpp | 126 +++++++++++++++++++++ src/backends/gpuFsa/layers/GpuFsaActivation.hpp | 20 ++++ src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp | 13 +++ .../gpuFsa/test/GpuFsaLayerSupportTests.cpp | 19 +++- .../gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp | 54 ++++++++- 8 files changed, 251 insertions(+), 10 deletions(-) create mode 100644 src/backends/gpuFsa/layers/GpuFsaActivation.cpp create mode 100644 src/backends/gpuFsa/layers/GpuFsaActivation.hpp diff --git a/src/backends/gpuFsa/GpuFsaBackend.cpp b/src/backends/gpuFsa/GpuFsaBackend.cpp index ec82f3ddf1..29eb1adb6c 100644 --- a/src/backends/gpuFsa/GpuFsaBackend.cpp +++ b/src/backends/gpuFsa/GpuFsaBackend.cpp @@ -20,6 +20,7 @@ #include #include +#include "layers/GpuFsaActivation.hpp" #include "layers/GpuFsaBatchMatMul.hpp" #include "layers/GpuFsaCast.hpp" #include "layers/GpuFsaConvolution2d.hpp" @@ -250,6 +251,13 @@ OptimizationViews GpuFsaBackend::OptimizeSubgraphView(const SubgraphView& subgra // Configure and setup the sketch for each supported op. Their data will be wrapped into a PreCompiled layer switch (base.GetType()) { + case (LayerType::Activation): + { + auto desc = PolymorphicDowncast(&base.GetParameters()); + auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); + GpuFsaActivationCreateOp(preCompiledBlobPtr, input, *desc); + break; + } case (LayerType::Cast): { auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); diff --git a/src/backends/gpuFsa/GpuFsaLayerSupport.cpp b/src/backends/gpuFsa/GpuFsaLayerSupport.cpp index 98fb4300b8..d75f18ccdb 100644 --- a/src/backends/gpuFsa/GpuFsaLayerSupport.cpp +++ b/src/backends/gpuFsa/GpuFsaLayerSupport.cpp @@ -10,6 +10,7 @@ #include #if defined(ARMCOMPUTEGPUFSA_ENABLED) +#include "layers/GpuFsaActivation.hpp" #include "layers/GpuFsaBatchMatMul.hpp" #include "layers/GpuFsaCast.hpp" #include "layers/GpuFsaConvolution2d.hpp" @@ -78,6 +79,20 @@ bool GpuFsaLayerSupport::IsLayerSupported(const LayerType& type, switch (type) { + case LayerType::Activation: + { + if (infos.size() != 2) + { + throw InvalidArgumentException("Invalid number of Activation TensorInfos. " + "TensorInfos should be of format: {input, output}."); + } + + auto desc = PolymorphicDowncast(&descriptor); + FORWARD_LAYER_VALIDATE_FUNC(GpuFsaActivationValidate, + reasonIfUnsupported, + infos[0], + *desc); + } case LayerType::BatchMatMul: { if (infos.size() != 3) @@ -87,7 +102,6 @@ bool GpuFsaLayerSupport::IsLayerSupported(const LayerType& type, } auto desc = PolymorphicDowncast(&descriptor); - FORWARD_LAYER_VALIDATE_FUNC(GpuFsaBatchMatMulValidate, reasonIfUnsupported, infos[0], @@ -101,6 +115,7 @@ bool GpuFsaLayerSupport::IsLayerSupported(const LayerType& type, throw InvalidArgumentException("Invalid number of cast TensorInfos. " "TensorInfos should be of format: {input, output}."); } + FORWARD_LAYER_VALIDATE_FUNC(GpuFsaCastValidate, reasonIfUnsupported, infos[0], @@ -186,7 +201,6 @@ bool GpuFsaLayerSupport::IsLayerSupported(const LayerType& type, } auto desc = PolymorphicDowncast(&descriptor); - FORWARD_LAYER_VALIDATE_FUNC(GpuFsaPooling2dValidate, reasonIfUnsupported, infos[0], @@ -201,7 +215,6 @@ bool GpuFsaLayerSupport::IsLayerSupported(const LayerType& type, } auto desc = PolymorphicDowncast(&descriptor); - FORWARD_LAYER_VALIDATE_FUNC(GpuFsaResizeValidate, reasonIfUnsupported, infos[0], diff --git a/src/backends/gpuFsa/layers/CMakeLists.txt b/src/backends/gpuFsa/layers/CMakeLists.txt index c7b83ae438..38d551140b 100644 --- a/src/backends/gpuFsa/layers/CMakeLists.txt +++ b/src/backends/gpuFsa/layers/CMakeLists.txt @@ -4,6 +4,8 @@ # list(APPEND armnnGpuFsaBackendLayers_sources + GpuFsaActivation.cpp + GpuFsaActivation.hpp GpuFsaBatchMatMul.cpp GpuFsaBatchMatMul.hpp GpuFsaCast.cpp diff --git a/src/backends/gpuFsa/layers/GpuFsaActivation.cpp b/src/backends/gpuFsa/layers/GpuFsaActivation.cpp new file mode 100644 index 0000000000..4b0773fb48 --- /dev/null +++ b/src/backends/gpuFsa/layers/GpuFsaActivation.cpp @@ -0,0 +1,126 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "GpuFsaActivation.hpp" + +#include + +#include +#include +#include +#include +#include + +using namespace arm_compute::experimental::dynamic_fusion; +using namespace armnn::armcomputetensorutils; + +namespace armnn +{ + +arm_compute::Status GpuFsaActivationValidate(const TensorInfo& input, + const ActivationDescriptor& descriptor) +{ + // Create a new workload sketch, for validation purposes + auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context(); + auto workloadContext = GpuWorkloadContext(&compileCtx); + GpuWorkloadSketch sketch{ &workloadContext }; + + arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions()); + aclInputInfo.set_are_values_constant(input.IsConstant()); + + arm_compute::ITensorInfo* inputInfo = workloadContext.create_tensor_info(aclInputInfo); + + switch (descriptor.m_Function) + { + case ActivationFunction::TanH: + { + if ( descriptor.m_A != 1 || descriptor.m_B != 1) + { + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, + "Activation function TanH only works with a=1 and b=1"); + } + return GpuTanh::validate_op(sketch, inputInfo); + } + case ActivationFunction::Sigmoid: + { + return GpuSigmoid::validate_op(sketch, inputInfo); + } + default: + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, + std::string("Activation function currently not supported in GpuFsa: ") + + GetActivationFunctionAsCString(descriptor.m_Function)); + } + +} + +void GpuFsaActivationCreateOp(GpuFsaPreCompiledBlob* blob, + const TensorInfo& input, + const ActivationDescriptor& descriptor) +{ + GpuWorkloadSketch* sketch = blob->sketch.get(); + GpuWorkloadContext* workloadContext = blob->workloadContext.get(); + std::vector inputTensorInfos = {}; + std::vector outputTensorInfos = {}; + + arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input, input.GetNumDimensions()); + + aclInput0Info.set_are_values_constant(input.IsConstant()); + + inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInput0Info)); + + // Validate operator, check status and update reasonIfUnsupported + arm_compute::Status aclStatus{}; + switch (descriptor.m_Function) + { + case ActivationFunction::TanH: + { + aclStatus = GpuTanh::validate_op(*sketch, inputTensorInfos[0]); + break; + } + case ActivationFunction::Sigmoid: + { + aclStatus = GpuSigmoid::validate_op(*sketch, inputTensorInfos[0]); + break; + } + default: + throw InvalidArgumentException(std::string("Activation function currently not supported in GpuFsa: ") + + GetActivationFunctionAsCString(descriptor.m_Function)); + + } + const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK; + if (!supported) + { + throw BackendCapabilityException("\"GpuFsa\" backend failed during Activation layer validation"); + } + + arm_compute::ITensorInfo* activationOutputInfo{}; + switch (descriptor.m_Function) + { + case ActivationFunction::TanH: + { + activationOutputInfo = GpuTanh::create_op(*sketch, inputTensorInfos[0]); + break; + } + case ActivationFunction::Sigmoid: + { + activationOutputInfo = GpuSigmoid::create_op(*sketch, inputTensorInfos[0]); + break; + } + default: + throw InvalidArgumentException(std::string("Activation function currently not supported in GpuFsa: ") + + GetActivationFunctionAsCString(descriptor.m_Function)); + + } + + // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created. + outputTensorInfos.emplace_back(workloadContext->create_tensor_info()); + GpuOutput::create_op(*sketch, activationOutputInfo, outputTensorInfos[0]); + + // Store the TensorInfos within the blob as unique_ptrs to be used later + blob->inputTensorInfos = std::make_unique>(inputTensorInfos); + blob->outputTensorInfos = std::make_unique>(outputTensorInfos); +} + +} // namespace armnn \ No newline at end of file diff --git a/src/backends/gpuFsa/layers/GpuFsaActivation.hpp b/src/backends/gpuFsa/layers/GpuFsaActivation.hpp new file mode 100644 index 0000000000..bbf93482b7 --- /dev/null +++ b/src/backends/gpuFsa/layers/GpuFsaActivation.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include + +#include + +namespace armnn +{ +arm_compute::Status GpuFsaActivationValidate(const TensorInfo& input, + const ActivationDescriptor& descriptor); + +void GpuFsaActivationCreateOp(GpuFsaPreCompiledBlob* blob, + const TensorInfo& input, + const ActivationDescriptor& descriptor); + +} // namespace armnn \ No newline at end of file diff --git a/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp b/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp index da6431f857..06b2a71dee 100644 --- a/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp +++ b/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp @@ -5,6 +5,7 @@ #include "backendsCommon/test/EndToEndTestImpl.hpp" +#include "backendsCommon/test/ActivationEndToEndTestImpl.hpp" #include "backendsCommon/test/BatchMatMulEndToEndTestImpl.hpp" #include "backendsCommon/test/Convolution2dEndToEndTestImpl.hpp" #include "backendsCommon/test/layerTests/CastTestImpl.hpp" @@ -21,6 +22,18 @@ TEST_SUITE("GpuFsaEndToEnd") std::vector gpuFsaDefaultBackends = {"GpuFsa"}; +// Activation +// TanH +TEST_CASE("GpuFsaTanHEndToEndTestFloat32") +{ + ActivationEndToEndTest(gpuFsaDefaultBackends, ActivationFunction::TanH, 1.f, 0, 1.f, 1.f); +} +// Sigmoid +TEST_CASE("GpuFsaSigmoidEndToEndTestFloat32") +{ + ActivationEndToEndTest(gpuFsaDefaultBackends, ActivationFunction::Sigmoid); +} + // BatchMatMul TEST_CASE("RefBatchMatMulEndToEndFloat32Test") { diff --git a/src/backends/gpuFsa/test/GpuFsaLayerSupportTests.cpp b/src/backends/gpuFsa/test/GpuFsaLayerSupportTests.cpp index cb1ddd8182..cf465c28ff 100644 --- a/src/backends/gpuFsa/test/GpuFsaLayerSupportTests.cpp +++ b/src/backends/gpuFsa/test/GpuFsaLayerSupportTests.cpp @@ -17,6 +17,24 @@ using namespace armnn; TEST_SUITE("GpuFsaLayerSupport") { +TEST_CASE("IsLayerSupportedGpuFsaActivation") +{ + TensorInfo inputInfo ({ 1, 5, 5, 1 }, DataType::Float32); + TensorInfo outputInfo({ 1, 5, 5, 1 }, DataType::Float32); + + ActivationDescriptor desc{}; + + GpuFsaLayerSupport supportChecker; + std::string reasonIfNotSupported; + auto supported = supportChecker.IsLayerSupported(LayerType::Activation, + {inputInfo, outputInfo}, + desc, + EmptyOptional(), + EmptyOptional(), + reasonIfNotSupported); + CHECK(supported); +} + TEST_CASE("IsLayerSupportedGpuFsaBatchMatMul") { TensorInfo input0Info({ 2, 2 }, DataType::Float32); @@ -82,7 +100,6 @@ TEST_CASE("IsLayerSupportedGpuFsaConv2dUnsupported") TensorInfo outputInfo({ 1, 3, 3, 1 }, DataType::Float32); TensorInfo weightsInfo({ 1, 3, 3, 1 }, DataType::Float32, 0.0f, 0, true); - // NCHW is unsupported. Convolution2dDescriptor desc; desc.m_DataLayout = DataLayout::NCHW; diff --git a/src/backends/gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp b/src/backends/gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp index 1e5c976c00..ac341c2476 100644 --- a/src/backends/gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp +++ b/src/backends/gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp @@ -15,10 +15,56 @@ using namespace armnn; TEST_SUITE("GpuFsaOptimizedNetwork") { -TEST_CASE("BatchMatMulSupportedOptimizedNetwork") +TEST_CASE("ActivationSupportedOptimizedNetwork") { - using namespace armnn; + const float qScale = 1.0f; + const int32_t qOffset = 0; + + const TensorShape& inputShape = { 2, 2, 2 }; + const TensorShape& outputShape = { 2, 2, 2 }; + + TensorInfo inputTensorInfo(inputShape, DataType::Float32, qScale, qOffset, true); + TensorInfo outputTensorInfo(outputShape, DataType::Float32, qScale, qOffset); + + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + INetworkPtr network(INetwork::Create()); + + ActivationDescriptor desc; + SUBCASE("TanH") + { + desc.m_Function = ActivationFunction::TanH; + desc.m_A = 1.f; + desc.m_B = 1.f; + } + SUBCASE("Sigmoid") + { + desc.m_Function = ActivationFunction::Sigmoid; + } + + IConnectableLayer* input = network->AddInputLayer(0, "input"); + IConnectableLayer* activationLayer = network->AddActivationLayer(desc, "activation"); + IConnectableLayer* output = network->AddOutputLayer(1, "output"); + + Connect(input, activationLayer, inputTensorInfo, 0, 0); + Connect(activationLayer, output, outputTensorInfo, 0, 0); + std::vector backends = { "GpuFsa" }; + + OptimizerOptionsOpaque optimizedOptions; + IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec(), optimizedOptions); + CHECK(optNet); + + Graph& graph = GetGraphForTesting(optNet.get()); + + // Check graph layer sequence to ensure that the network has been replaced with a PreCompiledLayer + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType)); +} +TEST_CASE("BatchMatMulSupportedOptimizedNetwork") +{ const float qScale = 1.0f; const int32_t qOffset = 0; @@ -63,8 +109,6 @@ TEST_CASE("BatchMatMulSupportedOptimizedNetwork") TEST_CASE("CastSupportedOptimizedNetwork") { - using namespace armnn; - const float qScale = 1.0f; const int32_t qOffset = 0; @@ -221,8 +265,6 @@ TEST_CASE("TwoConv2dSupportedOptimizedNetwork") TEST_CASE("ElementwiseBinarySupportedOptimizedNetwork") { - using namespace armnn; - const float qScale = 1.0f; const int32_t qOffset = 0; -- cgit v1.2.1