From bc5a5d59e07d2365c2c88b3430b4a311d186ebbc Mon Sep 17 00:00:00 2001 From: Tracy Narine Date: Tue, 6 Feb 2024 15:22:41 +0000 Subject: IVGCVSW-7622 GpuFsa Op: Add Cast operator * Added cast operator support Signed-off-by: Tracy Narine Change-Id: Ie12cb1559a7a059ff35e1c395bc77243499243cd --- .../test/layerTests/CastTestImpl.hpp | 40 +++++++- src/backends/gpuFsa/GpuFsaBackend.cpp | 8 ++ src/backends/gpuFsa/GpuFsaLayerSupport.cpp | 20 +++- src/backends/gpuFsa/layers/CMakeLists.txt | 2 + src/backends/gpuFsa/layers/GpuFsaCast.cpp | 101 +++++++++++++++++++++ src/backends/gpuFsa/layers/GpuFsaCast.hpp | 21 +++++ src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp | 19 ++++ .../gpuFsa/test/GpuFsaLayerSupportTests.cpp | 18 ++++ .../gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp | 39 ++++++++ 9 files changed, 264 insertions(+), 4 deletions(-) create mode 100644 src/backends/gpuFsa/layers/GpuFsaCast.cpp create mode 100644 src/backends/gpuFsa/layers/GpuFsaCast.hpp diff --git a/src/backends/backendsCommon/test/layerTests/CastTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/CastTestImpl.hpp index a727950bb8..13512ad700 100644 --- a/src/backends/backendsCommon/test/layerTests/CastTestImpl.hpp +++ b/src/backends/backendsCommon/test/layerTests/CastTestImpl.hpp @@ -1,10 +1,12 @@ // -// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2021, 2024 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once +#include + #include #include @@ -82,3 +84,39 @@ LayerTestResult CastFloat32ToUInt82dTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory); + +template +void CastSimpleTest(const std::vector& backends, + const std::vector& shape, + const std::vector& inputValues, + const std::vector& outputValues, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + using namespace armnn; + + const TensorShape inputShape(static_cast(shape.size()), shape.data()); + const TensorShape outputShape(static_cast(shape.size()), shape.data()); + + TensorInfo inputTensorInfo(inputShape, inputDataType, qScale, qOffset, true); + TensorInfo outputTensorInfo(outputShape, outputDataType, qScale, qOffset); + + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + INetworkPtr network(INetwork::Create()); + + IConnectableLayer* input = network->AddInputLayer(0, "input"); + IConnectableLayer* castLayer = network->AddCastLayer("cast"); + IConnectableLayer* output = network->AddOutputLayer(0, "output"); + + Connect(input, castLayer, inputTensorInfo, 0, 0); + Connect(castLayer, output, outputTensorInfo, 0, 0); + + std::map> inputTensorData = {{ 0, inputValues }}; + std::map> expectedOutputData = {{ 0, outputValues }}; + + EndToEndLayerTestImpl(std::move(network), + inputTensorData, + expectedOutputData, + backends); +} \ No newline at end of file diff --git a/src/backends/gpuFsa/GpuFsaBackend.cpp b/src/backends/gpuFsa/GpuFsaBackend.cpp index f14687b8e0..4a410309e3 100644 --- a/src/backends/gpuFsa/GpuFsaBackend.cpp +++ b/src/backends/gpuFsa/GpuFsaBackend.cpp @@ -20,6 +20,7 @@ #include #include +#include "layers/GpuFsaCast.hpp" #include "layers/GpuFsaConvolution2d.hpp" #include "layers/GpuFsaDepthwiseConvolution2d.hpp" #include "layers/GpuFsaElementwiseBinaryAdd.hpp" @@ -247,6 +248,13 @@ OptimizationViews GpuFsaBackend::OptimizeSubgraphView(const SubgraphView& subgra // Configure and setup the sketch for each supported op. Their data will be wrapped into a PreCompiled layer switch (base.GetType()) { + case (LayerType::Cast): + { + auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); + auto output = base.GetOutputSlot(0).GetTensorInfo(); + GpuFsaCastCreateOp(preCompiledBlobPtr, input, output); + break; + } case (LayerType::Convolution2d): { auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); diff --git a/src/backends/gpuFsa/GpuFsaLayerSupport.cpp b/src/backends/gpuFsa/GpuFsaLayerSupport.cpp index b73b3e9088..d9d95e5c92 100644 --- a/src/backends/gpuFsa/GpuFsaLayerSupport.cpp +++ b/src/backends/gpuFsa/GpuFsaLayerSupport.cpp @@ -10,6 +10,7 @@ #include #if defined(ARMCOMPUTEGPUFSA_ENABLED) +#include "layers/GpuFsaCast.hpp" #include "layers/GpuFsaConvolution2d.hpp" #include "layers/GpuFsaDepthwiseConvolution2d.hpp" #include "layers/GpuFsaElementwiseBinaryAdd.hpp" @@ -73,7 +74,20 @@ bool GpuFsaLayerSupport::IsLayerSupported(const LayerType& type, IgnoreUnused(lstmParamsInfo); IgnoreUnused(quantizedLstmInputParamsInfo); - switch (type) { + switch (type) + { + case LayerType::Cast: + { + if (infos.size() != 2) + { + throw InvalidArgumentException("Invalid number of cast TensorInfos. " + "TensorInfos should be of format: {input, output}."); + } + FORWARD_LAYER_VALIDATE_FUNC(GpuFsaCastValidate, + reasonIfUnsupported, + infos[0], + infos[1]); + } case LayerType::Convolution2d: { if (infos.size() != 4) @@ -110,7 +124,7 @@ bool GpuFsaLayerSupport::IsLayerSupported(const LayerType& type, "TensorInfos should be of format: {input, output, weights, biases}."); } - auto desc = PolymorphicDowncast(&descriptor); + auto desc = PolymorphicDowncast(&descriptor); if (infos[3] == TensorInfo()) { FORWARD_LAYER_VALIDATE_FUNC(GpuFsaDepthwiseConvolution2dValidate, @@ -138,7 +152,7 @@ bool GpuFsaLayerSupport::IsLayerSupported(const LayerType& type, "TensorInfos should be of format: {input0, input1, output0}."); } - auto desc = PolymorphicDowncast(&descriptor); + auto desc = PolymorphicDowncast(&descriptor); if (desc->m_Operation == BinaryOperation::Add) { FORWARD_LAYER_VALIDATE_FUNC(GpuFsaElementwiseBinaryAddValidate, diff --git a/src/backends/gpuFsa/layers/CMakeLists.txt b/src/backends/gpuFsa/layers/CMakeLists.txt index 5e0d0e7486..8ffa934561 100644 --- a/src/backends/gpuFsa/layers/CMakeLists.txt +++ b/src/backends/gpuFsa/layers/CMakeLists.txt @@ -4,6 +4,8 @@ # list(APPEND armnnGpuFsaBackendLayers_sources + GpuFsaCast.cpp + GpuFsaCast.hpp GpuFsaConvolution2d.cpp GpuFsaConvolution2d.hpp GpuFsaDepthwiseConvolution2d.cpp diff --git a/src/backends/gpuFsa/layers/GpuFsaCast.cpp b/src/backends/gpuFsa/layers/GpuFsaCast.cpp new file mode 100644 index 0000000000..b5a0747428 --- /dev/null +++ b/src/backends/gpuFsa/layers/GpuFsaCast.cpp @@ -0,0 +1,101 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "GpuFsaCast.hpp" +#include "gpuFsa/GpuFsaBackendId.hpp" + +#include + +#include +#include +#include +#include + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace armnn +{ + +static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; + +static CastAttributes CastAttributesFromTensorInfo(const TensorInfo& input) +{ + using namespace armcomputetensorutils; + + CastAttributes cast_attr; + arm_compute::DataType dataType = GetArmComputeDataType(input.GetDataType(), false); + cast_attr.data_type(dataType).convert_policy(g_AclConvertPolicy); + return cast_attr; +} + +arm_compute::Status GpuFsaCastValidate(const TensorInfo& input, const TensorInfo& output) +{ + using namespace armcomputetensorutils; + + // Create a new workload sketch, for validation purposes + auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context(); + auto workloadContext = GpuWorkloadContext(&compileCtx); + GpuWorkloadSketch sketch{ &workloadContext }; + + arm_compute::TensorInfo aclinputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions()); + + aclinputInfo.set_are_values_constant(input.IsConstant()); + + arm_compute::ITensorInfo* inputInfo0 = workloadContext.create_tensor_info(aclinputInfo); + + CastAttributes cast_attr = CastAttributesFromTensorInfo(output); + + arm_compute::Status aclStatus = GpuCast::validate_op(sketch, inputInfo0, cast_attr); +#ifndef NDEBUG + const bool validated = aclStatus.error_code() == arm_compute::ErrorCode::OK; + if (!validated) + { + std::cout << "GpuFsaCastValidate failed: " << aclStatus.error_description() << std::endl; + } +#endif + return aclStatus; +} + +void GpuFsaCastCreateOp(GpuFsaPreCompiledBlob* blob, + const TensorInfo& input, + const TensorInfo& output) +{ + using namespace armcomputetensorutils; + + GpuWorkloadSketch* sketch = blob->sketch.get(); + GpuWorkloadContext* workloadContext = blob->workloadContext.get(); + std::vector inputTensorInfos = {}; + std::vector outputTensorInfos = {}; + + arm_compute::TensorInfo aclinputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions()); + + aclinputInfo.set_are_values_constant(input.IsConstant()); + + inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclinputInfo)); + + CastAttributes cast_attr = CastAttributesFromTensorInfo(output); + + // Validate operator, check status and update reasonIfUnsupported + arm_compute::Status aclStatus = GpuCast::validate_op(*sketch, inputTensorInfos[0], cast_attr); + const bool validated = aclStatus.error_code() == arm_compute::ErrorCode::OK; + if (!validated) + { + throw BackendCapabilityException("\"" + std::string(GpuFsaBackendId()) + + "\" backend failed during cast operator validation"); + } + + arm_compute::ITensorInfo* castOutputInfo = + GpuCast::create_op(*sketch, inputTensorInfos[0], cast_attr); + + // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created. + outputTensorInfos.emplace_back(workloadContext->create_tensor_info()); + GpuOutput::create_op(*sketch, castOutputInfo, outputTensorInfos[0]); + + // Store the TensorInfos within the blob as unique_ptrs to be used later + blob->inputTensorInfos = std::make_unique>(inputTensorInfos); + blob->outputTensorInfos = std::make_unique>(outputTensorInfos); +} + +} // namespace armnn \ No newline at end of file diff --git a/src/backends/gpuFsa/layers/GpuFsaCast.hpp b/src/backends/gpuFsa/layers/GpuFsaCast.hpp new file mode 100644 index 0000000000..026142ad50 --- /dev/null +++ b/src/backends/gpuFsa/layers/GpuFsaCast.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include + +#include + +namespace armnn +{ + +arm_compute::Status GpuFsaCastValidate(const TensorInfo& input, + const TensorInfo& output); + +void GpuFsaCastCreateOp(GpuFsaPreCompiledBlob* blob, + const TensorInfo& input, + const TensorInfo& output); + +} // namespace armnn \ No newline at end of file diff --git a/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp b/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp index 93a4a81f9d..ccab0bf4f9 100644 --- a/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp +++ b/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp @@ -6,6 +6,7 @@ #include "backendsCommon/test/EndToEndTestImpl.hpp" #include "backendsCommon/test/Convolution2dEndToEndTestImpl.hpp" +#include "backendsCommon/test/layerTests/CastTestImpl.hpp" #include "backendsCommon/test/DepthwiseConvolution2dEndToEndTests.hpp" #include "backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp" @@ -19,6 +20,24 @@ TEST_SUITE("GpuFsaEndToEnd") std::vector gpuFsaDefaultBackends = {"GpuFsa"}; +TEST_CASE("GpuFsaCastEndtoEndTestFloat32ToFloat16") +{ + using namespace half_float::literal; + + std::vector inputShape { 2, 2, 2 }; + + std::vector inputValues { -3.5f, -1.2f, -8.6f, -2.0f, -1.5f, -1.3f, -0.5f, -0.4f }; + + std::vector outputValues { -3.50_h, -1.20_h, -8.6_h, -2._h, -1.50_h, -1.30_h, -0.50_h, -0.40_h }; + + CastSimpleTest(gpuFsaDefaultBackends, + inputShape, + inputValues, + outputValues, + 1.0f, + 0); +} + // Conv2d TEST_CASE("GpuFsaConv2dEndtoEndTestFloat32") { diff --git a/src/backends/gpuFsa/test/GpuFsaLayerSupportTests.cpp b/src/backends/gpuFsa/test/GpuFsaLayerSupportTests.cpp index fee0d07820..4e39a80b35 100644 --- a/src/backends/gpuFsa/test/GpuFsaLayerSupportTests.cpp +++ b/src/backends/gpuFsa/test/GpuFsaLayerSupportTests.cpp @@ -17,6 +17,24 @@ using namespace armnn; TEST_SUITE("GpuFsaLayerSupport") { +TEST_CASE("IsLayerSupportedCast") +{ + armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32); + armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float16); + + BaseDescriptor desc; + + GpuFsaLayerSupport supportChecker; + std::string reasonIfNotSupported; + auto supported = supportChecker.IsLayerSupported(LayerType::Cast, + {inputTensorInfo, outputTensorInfo}, + desc, + EmptyOptional(), + EmptyOptional(), + reasonIfNotSupported); + CHECK(supported); +} + TEST_CASE("IsLayerSupportedGpuFsaConv2d") { TensorInfo inputInfo ({ 1, 5, 5, 1 }, DataType::Float32); diff --git a/src/backends/gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp b/src/backends/gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp index 4b27f3bff0..aad3a0ff6f 100644 --- a/src/backends/gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp +++ b/src/backends/gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp @@ -15,6 +15,45 @@ using namespace armnn; TEST_SUITE("GpuFsaOptimizedNetwork") { +TEST_CASE("CastSupportedOptimizedNetwork") +{ + using namespace armnn; + + const float qScale = 1.0f; + const int32_t qOffset = 0; + + const TensorShape& inputShape = { 2, 2, 2 }; + const TensorShape& outputShape = { 2, 2, 2 }; + + TensorInfo inputTensorInfo(inputShape, DataType::Float32, qScale, qOffset, true); + TensorInfo outputTensorInfo(outputShape, DataType::Float16, qScale, qOffset); + + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + INetworkPtr network(INetwork::Create()); + + IConnectableLayer* input = network->AddInputLayer(0, "input"); + IConnectableLayer* castLayer = network->AddCastLayer("cast"); + IConnectableLayer* output = network->AddOutputLayer(1, "output"); + + Connect(input, castLayer, inputTensorInfo, 0, 0); + Connect(castLayer, output, outputTensorInfo, 0, 0); + + std::vector backends = { "GpuFsa" }; + + OptimizerOptionsOpaque optimizedOptions; + IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec(), optimizedOptions); + CHECK(optNet); + + Graph& graph = GetGraphForTesting(optNet.get()); + + // Check graph layer sequence to ensure that the network has been replaced with a PreCompiledLayer + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType)); +} + TEST_CASE("SingleConv2dSupportedOptimizedNetwork") { IRuntime::CreationOptions options; -- cgit v1.2.1