diff options
author | Tracy Narine <tracy.narine@arm.com> | 2024-02-06 15:22:41 +0000 |
---|---|---|
committer | TeresaARM <teresa.charlinreyes@arm.com> | 2024-02-07 21:04:02 +0000 |
commit | bc5a5d59e07d2365c2c88b3430b4a311d186ebbc (patch) | |
tree | 345691d4b83824e13b99348617dd68866e5f530f /src/backends/gpuFsa | |
parent | 7bcae3c835468d9b0770514dc7127f02d47cec5f (diff) | |
download | armnn-bc5a5d59e07d2365c2c88b3430b4a311d186ebbc.tar.gz |
IVGCVSW-7622 GpuFsa Op: Add Cast operator
* Added cast operator support
Signed-off-by: Tracy Narine <tracy.narine@arm.com>
Change-Id: Ie12cb1559a7a059ff35e1c395bc77243499243cd
Diffstat (limited to 'src/backends/gpuFsa')
-rw-r--r-- | src/backends/gpuFsa/GpuFsaBackend.cpp | 8 | ||||
-rw-r--r-- | src/backends/gpuFsa/GpuFsaLayerSupport.cpp | 20 | ||||
-rw-r--r-- | src/backends/gpuFsa/layers/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/backends/gpuFsa/layers/GpuFsaCast.cpp | 101 | ||||
-rw-r--r-- | src/backends/gpuFsa/layers/GpuFsaCast.hpp | 21 | ||||
-rw-r--r-- | src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp | 19 | ||||
-rw-r--r-- | src/backends/gpuFsa/test/GpuFsaLayerSupportTests.cpp | 18 | ||||
-rw-r--r-- | src/backends/gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp | 39 |
8 files changed, 225 insertions, 3 deletions
diff --git a/src/backends/gpuFsa/GpuFsaBackend.cpp b/src/backends/gpuFsa/GpuFsaBackend.cpp index f14687b8e0..4a410309e3 100644 --- a/src/backends/gpuFsa/GpuFsaBackend.cpp +++ b/src/backends/gpuFsa/GpuFsaBackend.cpp @@ -20,6 +20,7 @@ #include <arm_compute/core/CL/CLKernelLibrary.h> #include <arm_compute/runtime/CL/CLBufferAllocator.h> +#include "layers/GpuFsaCast.hpp" #include "layers/GpuFsaConvolution2d.hpp" #include "layers/GpuFsaDepthwiseConvolution2d.hpp" #include "layers/GpuFsaElementwiseBinaryAdd.hpp" @@ -247,6 +248,13 @@ OptimizationViews GpuFsaBackend::OptimizeSubgraphView(const SubgraphView& subgra // Configure and setup the sketch for each supported op. Their data will be wrapped into a PreCompiled layer switch (base.GetType()) { + case (LayerType::Cast): + { + auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); + auto output = base.GetOutputSlot(0).GetTensorInfo(); + GpuFsaCastCreateOp(preCompiledBlobPtr, input, output); + break; + } case (LayerType::Convolution2d): { auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); diff --git a/src/backends/gpuFsa/GpuFsaLayerSupport.cpp b/src/backends/gpuFsa/GpuFsaLayerSupport.cpp index b73b3e9088..d9d95e5c92 100644 --- a/src/backends/gpuFsa/GpuFsaLayerSupport.cpp +++ b/src/backends/gpuFsa/GpuFsaLayerSupport.cpp @@ -10,6 +10,7 @@ #include <armnn/utility/PolymorphicDowncast.hpp> #if defined(ARMCOMPUTEGPUFSA_ENABLED) +#include "layers/GpuFsaCast.hpp" #include "layers/GpuFsaConvolution2d.hpp" #include "layers/GpuFsaDepthwiseConvolution2d.hpp" #include "layers/GpuFsaElementwiseBinaryAdd.hpp" @@ -73,7 +74,20 @@ bool GpuFsaLayerSupport::IsLayerSupported(const LayerType& type, IgnoreUnused(lstmParamsInfo); IgnoreUnused(quantizedLstmInputParamsInfo); - switch (type) { + switch (type) + { + case LayerType::Cast: + { + if (infos.size() != 2) + { + throw InvalidArgumentException("Invalid number of cast TensorInfos. " + "TensorInfos should be of format: {input, output}."); + } + FORWARD_LAYER_VALIDATE_FUNC(GpuFsaCastValidate, + reasonIfUnsupported, + infos[0], + infos[1]); + } case LayerType::Convolution2d: { if (infos.size() != 4) @@ -110,7 +124,7 @@ bool GpuFsaLayerSupport::IsLayerSupported(const LayerType& type, "TensorInfos should be of format: {input, output, weights, biases}."); } - auto desc = PolymorphicDowncast<const DepthwiseConvolution2dDescriptor *>(&descriptor); + auto desc = PolymorphicDowncast<const DepthwiseConvolution2dDescriptor*>(&descriptor); if (infos[3] == TensorInfo()) { FORWARD_LAYER_VALIDATE_FUNC(GpuFsaDepthwiseConvolution2dValidate, @@ -138,7 +152,7 @@ bool GpuFsaLayerSupport::IsLayerSupported(const LayerType& type, "TensorInfos should be of format: {input0, input1, output0}."); } - auto desc = PolymorphicDowncast<const ElementwiseBinaryDescriptor *>(&descriptor); + auto desc = PolymorphicDowncast<const ElementwiseBinaryDescriptor*>(&descriptor); if (desc->m_Operation == BinaryOperation::Add) { FORWARD_LAYER_VALIDATE_FUNC(GpuFsaElementwiseBinaryAddValidate, diff --git a/src/backends/gpuFsa/layers/CMakeLists.txt b/src/backends/gpuFsa/layers/CMakeLists.txt index 5e0d0e7486..8ffa934561 100644 --- a/src/backends/gpuFsa/layers/CMakeLists.txt +++ b/src/backends/gpuFsa/layers/CMakeLists.txt @@ -4,6 +4,8 @@ # list(APPEND armnnGpuFsaBackendLayers_sources + GpuFsaCast.cpp + GpuFsaCast.hpp GpuFsaConvolution2d.cpp GpuFsaConvolution2d.hpp GpuFsaDepthwiseConvolution2d.cpp diff --git a/src/backends/gpuFsa/layers/GpuFsaCast.cpp b/src/backends/gpuFsa/layers/GpuFsaCast.cpp new file mode 100644 index 0000000000..b5a0747428 --- /dev/null +++ b/src/backends/gpuFsa/layers/GpuFsaCast.cpp @@ -0,0 +1,101 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "GpuFsaCast.hpp" +#include "gpuFsa/GpuFsaBackendId.hpp" + +#include <aclCommon/ArmComputeTensorUtils.hpp> + +#include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h> +#include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h> +#include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h> +#include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h> + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace armnn +{ + +static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; + +static CastAttributes CastAttributesFromTensorInfo(const TensorInfo& input) +{ + using namespace armcomputetensorutils; + + CastAttributes cast_attr; + arm_compute::DataType dataType = GetArmComputeDataType(input.GetDataType(), false); + cast_attr.data_type(dataType).convert_policy(g_AclConvertPolicy); + return cast_attr; +} + +arm_compute::Status GpuFsaCastValidate(const TensorInfo& input, const TensorInfo& output) +{ + using namespace armcomputetensorutils; + + // Create a new workload sketch, for validation purposes + auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context(); + auto workloadContext = GpuWorkloadContext(&compileCtx); + GpuWorkloadSketch sketch{ &workloadContext }; + + arm_compute::TensorInfo aclinputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions()); + + aclinputInfo.set_are_values_constant(input.IsConstant()); + + arm_compute::ITensorInfo* inputInfo0 = workloadContext.create_tensor_info(aclinputInfo); + + CastAttributes cast_attr = CastAttributesFromTensorInfo(output); + + arm_compute::Status aclStatus = GpuCast::validate_op(sketch, inputInfo0, cast_attr); +#ifndef NDEBUG + const bool validated = aclStatus.error_code() == arm_compute::ErrorCode::OK; + if (!validated) + { + std::cout << "GpuFsaCastValidate failed: " << aclStatus.error_description() << std::endl; + } +#endif + return aclStatus; +} + +void GpuFsaCastCreateOp(GpuFsaPreCompiledBlob* blob, + const TensorInfo& input, + const TensorInfo& output) +{ + using namespace armcomputetensorutils; + + GpuWorkloadSketch* sketch = blob->sketch.get(); + GpuWorkloadContext* workloadContext = blob->workloadContext.get(); + std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {}; + std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {}; + + arm_compute::TensorInfo aclinputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions()); + + aclinputInfo.set_are_values_constant(input.IsConstant()); + + inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclinputInfo)); + + CastAttributes cast_attr = CastAttributesFromTensorInfo(output); + + // Validate operator, check status and update reasonIfUnsupported + arm_compute::Status aclStatus = GpuCast::validate_op(*sketch, inputTensorInfos[0], cast_attr); + const bool validated = aclStatus.error_code() == arm_compute::ErrorCode::OK; + if (!validated) + { + throw BackendCapabilityException("\"" + std::string(GpuFsaBackendId()) + + "\" backend failed during cast operator validation"); + } + + arm_compute::ITensorInfo* castOutputInfo = + GpuCast::create_op(*sketch, inputTensorInfos[0], cast_attr); + + // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created. + outputTensorInfos.emplace_back(workloadContext->create_tensor_info()); + GpuOutput::create_op(*sketch, castOutputInfo, outputTensorInfos[0]); + + // Store the TensorInfos within the blob as unique_ptrs to be used later + blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos); + blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos); +} + +} // namespace armnn
\ No newline at end of file diff --git a/src/backends/gpuFsa/layers/GpuFsaCast.hpp b/src/backends/gpuFsa/layers/GpuFsaCast.hpp new file mode 100644 index 0000000000..026142ad50 --- /dev/null +++ b/src/backends/gpuFsa/layers/GpuFsaCast.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include <armnn/Descriptors.hpp> + +#include <gpuFsa/GpuFsaBackend.hpp> + +namespace armnn +{ + +arm_compute::Status GpuFsaCastValidate(const TensorInfo& input, + const TensorInfo& output); + +void GpuFsaCastCreateOp(GpuFsaPreCompiledBlob* blob, + const TensorInfo& input, + const TensorInfo& output); + +} // namespace armnn
\ No newline at end of file diff --git a/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp b/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp index 93a4a81f9d..ccab0bf4f9 100644 --- a/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp +++ b/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp @@ -6,6 +6,7 @@ #include "backendsCommon/test/EndToEndTestImpl.hpp" #include "backendsCommon/test/Convolution2dEndToEndTestImpl.hpp" +#include "backendsCommon/test/layerTests/CastTestImpl.hpp" #include "backendsCommon/test/DepthwiseConvolution2dEndToEndTests.hpp" #include "backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp" @@ -19,6 +20,24 @@ TEST_SUITE("GpuFsaEndToEnd") std::vector<BackendId> gpuFsaDefaultBackends = {"GpuFsa"}; +TEST_CASE("GpuFsaCastEndtoEndTestFloat32ToFloat16") +{ + using namespace half_float::literal; + + std::vector<unsigned int> inputShape { 2, 2, 2 }; + + std::vector<float> inputValues { -3.5f, -1.2f, -8.6f, -2.0f, -1.5f, -1.3f, -0.5f, -0.4f }; + + std::vector<armnn::Half> outputValues { -3.50_h, -1.20_h, -8.6_h, -2._h, -1.50_h, -1.30_h, -0.50_h, -0.40_h }; + + CastSimpleTest<DataType::Float32, DataType::Float16, float, armnn::Half>(gpuFsaDefaultBackends, + inputShape, + inputValues, + outputValues, + 1.0f, + 0); +} + // Conv2d TEST_CASE("GpuFsaConv2dEndtoEndTestFloat32") { diff --git a/src/backends/gpuFsa/test/GpuFsaLayerSupportTests.cpp b/src/backends/gpuFsa/test/GpuFsaLayerSupportTests.cpp index fee0d07820..4e39a80b35 100644 --- a/src/backends/gpuFsa/test/GpuFsaLayerSupportTests.cpp +++ b/src/backends/gpuFsa/test/GpuFsaLayerSupportTests.cpp @@ -17,6 +17,24 @@ using namespace armnn; TEST_SUITE("GpuFsaLayerSupport") { +TEST_CASE("IsLayerSupportedCast") +{ + armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32); + armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float16); + + BaseDescriptor desc; + + GpuFsaLayerSupport supportChecker; + std::string reasonIfNotSupported; + auto supported = supportChecker.IsLayerSupported(LayerType::Cast, + {inputTensorInfo, outputTensorInfo}, + desc, + EmptyOptional(), + EmptyOptional(), + reasonIfNotSupported); + CHECK(supported); +} + TEST_CASE("IsLayerSupportedGpuFsaConv2d") { TensorInfo inputInfo ({ 1, 5, 5, 1 }, DataType::Float32); diff --git a/src/backends/gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp b/src/backends/gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp index 4b27f3bff0..aad3a0ff6f 100644 --- a/src/backends/gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp +++ b/src/backends/gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp @@ -15,6 +15,45 @@ using namespace armnn; TEST_SUITE("GpuFsaOptimizedNetwork") { +TEST_CASE("CastSupportedOptimizedNetwork") +{ + using namespace armnn; + + const float qScale = 1.0f; + const int32_t qOffset = 0; + + const TensorShape& inputShape = { 2, 2, 2 }; + const TensorShape& outputShape = { 2, 2, 2 }; + + TensorInfo inputTensorInfo(inputShape, DataType::Float32, qScale, qOffset, true); + TensorInfo outputTensorInfo(outputShape, DataType::Float16, qScale, qOffset); + + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + INetworkPtr network(INetwork::Create()); + + IConnectableLayer* input = network->AddInputLayer(0, "input"); + IConnectableLayer* castLayer = network->AddCastLayer("cast"); + IConnectableLayer* output = network->AddOutputLayer(1, "output"); + + Connect(input, castLayer, inputTensorInfo, 0, 0); + Connect(castLayer, output, outputTensorInfo, 0, 0); + + std::vector<BackendId> backends = { "GpuFsa" }; + + OptimizerOptionsOpaque optimizedOptions; + IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec(), optimizedOptions); + CHECK(optNet); + + Graph& graph = GetGraphForTesting(optNet.get()); + + // Check graph layer sequence to ensure that the network has been replaced with a PreCompiledLayer + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + &IsLayerOfType<InputLayer>, + &IsLayerOfType<PreCompiledLayer>, + &IsLayerOfType<OutputLayer>)); +} + TEST_CASE("SingleConv2dSupportedOptimizedNetwork") { IRuntime::CreationOptions options; |