From 3b3dcbf0321fadcb2b7b5b550a4d03f510d7cb7b Mon Sep 17 00:00:00 2001 From: Cian McGriskin Date: Wed, 26 Jul 2023 11:52:47 +0100 Subject: IVGCVSW-2291 TILE Operator CL Implementation * Added Tile Operator Implementation to CL * Added calls to the existing UnitTests * Added Documentation Signed-off-by: Cian McGriskin Change-Id: If7d25c7aa669c24e7816e5d445c7a3b9ce6972d4 --- delegate/test/TileTest.cpp | 2 - docs/02_operator_list.dox | 11 ++++- .../backendsCommon/test/TileEndToEndTestImpl.hpp | 18 +++---- .../test/layerTests/TileTestImpl.cpp | 56 +++++++++++++--------- src/backends/cl/ClLayerSupport.cpp | 18 +++++++ src/backends/cl/ClLayerSupport.hpp | 5 ++ src/backends/cl/ClWorkloadFactory.cpp | 5 ++ src/backends/cl/backend.mk | 1 + src/backends/cl/test/ClEndToEndTests.cpp | 37 ++++++++++++++ src/backends/cl/test/ClLayerTests.cpp | 18 +++++++ src/backends/cl/workloads/CMakeLists.txt | 2 + src/backends/cl/workloads/ClTileWorkload.cpp | 50 +++++++++++++++++++ src/backends/cl/workloads/ClTileWorkload.hpp | 27 +++++++++++ src/backends/cl/workloads/ClWorkloads.hpp | 1 + 14 files changed, 216 insertions(+), 35 deletions(-) create mode 100644 src/backends/cl/workloads/ClTileWorkload.cpp create mode 100644 src/backends/cl/workloads/ClTileWorkload.hpp diff --git a/delegate/test/TileTest.cpp b/delegate/test/TileTest.cpp index 2e20859f5b..32f5610bce 100644 --- a/delegate/test/TileTest.cpp +++ b/delegate/test/TileTest.cpp @@ -53,7 +53,6 @@ void TileFloat32Test(std::vector& backends) expectedOutputShape); } -#if defined(TILE_GPUACC) TEST_SUITE("TileTests_GpuAccTests") { @@ -64,7 +63,6 @@ TEST_SUITE("TileTests_GpuAccTests") } } // TEST_SUITE("Tile_Float32_GpuAcc_Test") -#endif TEST_SUITE("TileTests_CpuAccTests") { diff --git a/docs/02_operator_list.dox b/docs/02_operator_list.dox index e3c9f8c1f2..a37b6fa3a7 100644 --- a/docs/02_operator_list.dox +++ b/docs/02_operator_list.dox @@ -3328,6 +3328,7 @@ where N = batches, C = channels, H = height, W = width FLOAT32 QASYMMS8 QASYMMU8 + QSYMMS8 QSYMMS16 SIGNED32 @@ -3352,12 +3353,18 @@ where N = batches, C = channels, H = height, W = width GpuAcc
    -
  • None +
  • All
-
None +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS8 +
QSYMMS16 +
SIGNED32
TransposeConvolution2dLayer diff --git a/src/backends/backendsCommon/test/TileEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/TileEndToEndTestImpl.hpp index 4047e5ad8e..03b76849e1 100644 --- a/src/backends/backendsCommon/test/TileEndToEndTestImpl.hpp +++ b/src/backends/backendsCommon/test/TileEndToEndTestImpl.hpp @@ -32,25 +32,25 @@ void TileEndToEnd(const std::vector& backends) int32_t qOffset = 0; bool qConst = true; - const TensorShape inputTensorShape = { 2, 3 }; - const TensorShape outputTensorShape = { 4, 6 }; + const TensorShape inputTensorShape = { 6 }; + const TensorShape outputTensorShape = { 30 }; TensorInfo inputInfo (inputTensorShape, ArmnnType, qScale, qOffset, qConst); TensorInfo outputInfo (outputTensorShape, ArmnnType,qScale, qOffset); std::vector inputData = armnnUtils::QuantizedVector({ - 0.f, 1.f, 2.f, - 3.f, 4.f, 5.f + 65, 144, 91, 161, 56, 73 }, qScale, qOffset); std::vector expectedOutputData = armnnUtils::QuantizedVector({ - 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, - 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, - 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, - 3.f, 4.f, 5.f, 3.f, 4.f, 5.f + 65, 144, 91, 161, 56, 73, + 65, 144, 91, 161, 56, 73, + 65, 144, 91, 161, 56, 73, + 65, 144, 91, 161, 56, 73, + 65, 144, 91, 161, 56, 73 }, qScale, qOffset); - auto descriptor = armnn::TileDescriptor(std::vector{ 2, 2 }); + auto descriptor = armnn::TileDescriptor(std::vector{ 5 }); INetworkPtr network = CreateTileNetwork(descriptor, inputInfo, outputInfo); std::map> inputTensor = { { 0, inputData } }; diff --git a/src/backends/backendsCommon/test/layerTests/TileTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/TileTestImpl.cpp index 0b13bba425..c61da26bb2 100644 --- a/src/backends/backendsCommon/test/layerTests/TileTestImpl.cpp +++ b/src/backends/backendsCommon/test/layerTests/TileTestImpl.cpp @@ -110,13 +110,13 @@ LayerTestResult Tile2dTest(armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { - auto descriptor = armnn::TileDescriptor(std::vector{ 2, 2 }); + auto descriptor = armnn::TileDescriptor(std::vector{ 2, 5 }); float qScale = 1.0f; int32_t qOffset = 0; armnn::TensorShape inputShape = { 2, 3 }; - armnn::TensorShape outputShape = { 4, 6 }; + armnn::TensorShape outputShape = { 4, 15 }; armnn::TensorInfo inputInfo(inputShape, ArmnnType); armnn::TensorInfo outputInfo(outputShape, ArmnnType); @@ -130,11 +130,11 @@ LayerTestResult Tile2dTest(armnn::IWorkloadFactory& workloadFactory, std::vector expectedOutput = armnnUtils::QuantizedVector( { - 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, - 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, + 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, + 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, - 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, - 3.f, 4.f, 5.f, 3.f, 4.f, 5.f + 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, + 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, 3.f, 4.f, 5.f }, qScale, qOffset); return TileTestImpl(workloadFactory, @@ -152,37 +152,49 @@ LayerTestResult Tile3dTest(armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { - auto descriptor = armnn::TileDescriptor(std::vector{ 1, 2, 1 }); + auto descriptor = armnn::TileDescriptor(std::vector{ 1, 5, 5 }); float qScale = 1.0f; int32_t qOffset = 0; - armnn::TensorShape inputShape = { 2, 2, 3 }; - armnn::TensorShape outputShape = { 2, 4, 3 }; + armnn::TensorShape inputShape = { 2, 2, 2 }; + armnn::TensorShape outputShape = { 2, 10, 10 }; armnn::TensorInfo inputInfo(inputShape, ArmnnType); armnn::TensorInfo outputInfo(outputShape, ArmnnType); std::vector input = armnnUtils::QuantizedVector( { - 0.f, 1.f, 2.f, - 3.f, 4.f, 5.f, + 1.1f, 2.12f, + 3.3f, 4.12234f, - 6.f, 7.f, 8.f, - 9.f, 10.f, 11.f + 1.1f, 2.12f, + 3.3f, 4.12234f, }, qScale, qOffset); std::vector expectedOutput = armnnUtils::QuantizedVector( { - 0.f, 1.f, 2.f, - 3.f, 4.f, 5.f, - 0.f, 1.f, 2.f, - 3.f, 4.f, 5.f, - - 6.f, 7.f, 8.f, - 9.f, 10.f, 11.f, - 6.f, 7.f, 8.f, - 9.f, 10.f, 11.f + 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, + 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, + 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, + 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, + 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, + 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, + 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, + 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, + 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, + 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, + + 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, + 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, + 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, + 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, + 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, + 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, + 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, + 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, + 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, + 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f }, qScale, qOffset); return TileTestImpl(workloadFactory, diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp index ff2b576f3d..60dab0538a 100644 --- a/src/backends/cl/ClLayerSupport.cpp +++ b/src/backends/cl/ClLayerSupport.cpp @@ -81,6 +81,7 @@ #include "workloads/ClStackWorkload.hpp" #include "workloads/ClStridedSliceWorkload.hpp" #include "workloads/ClSubtractionWorkload.hpp" +#include "workloads/ClTileWorkload.hpp" #include "workloads/ClTransposeConvolution2dWorkload.hpp" #include "workloads/ClTransposeWorkload.hpp" #include "workloads/ClUnidirectionalSequenceLstmFloatWorkload.hpp" @@ -627,6 +628,11 @@ bool ClLayerSupport::IsLayerSupported(const LayerType& type, ARMNN_NO_DEPRECATE_WARN_BEGIN return IsSubtractionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); ARMNN_NO_DEPRECATE_WARN_END + case LayerType::Tile: + return IsTileSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::Transpose: return IsTransposeSupported(infos[0], infos[1], @@ -1527,6 +1533,18 @@ bool ClLayerSupport::IsSubtractionSupported(const TensorInfo& input0, nullptr); } +bool ClLayerSupport::IsTileSupported(const TensorInfo& input, + const TensorInfo& output, + const TileDescriptor& descriptor, + Optional reasonIfUnsupported) const +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClTileWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor); +} + bool ClLayerSupport::IsTransposeConvolution2dSupported(const TensorInfo& input, const TensorInfo& output, const TransposeConvolution2dDescriptor& descriptor, diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp index 737ab7a850..9a8f896cab 100644 --- a/src/backends/cl/ClLayerSupport.hpp +++ b/src/backends/cl/ClLayerSupport.hpp @@ -343,6 +343,11 @@ public: const Optional& biases, Optional reasonIfUnsupported = EmptyOptional()) const; + bool IsTileSupported(const TensorInfo& input, + const TensorInfo& output, + const TileDescriptor& descriptor, + Optional reasonIfUnsupported = EmptyOptional()) const; + bool IsTransposeSupported(const TensorInfo& input, const TensorInfo& output, const TransposeDescriptor& descriptor, diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp index 7b4e9be908..da05f67711 100644 --- a/src/backends/cl/ClWorkloadFactory.cpp +++ b/src/backends/cl/ClWorkloadFactory.cpp @@ -755,6 +755,11 @@ std::unique_ptr ClWorkloadFactory::CreateWorkload(LayerType type, auto subtractionQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*subtractionQueueDescriptor, info, m_CLCompileContext); } + case LayerType::Tile: + { + auto tileQueueDescriptor = PolymorphicDowncast(&descriptor); + return MakeWorkload(*tileQueueDescriptor, info, m_CLCompileContext); + } case LayerType::Transpose : { auto transposeQueueDescriptor = PolymorphicDowncast(&descriptor); diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk index 5ceab700e4..34993059c2 100644 --- a/src/backends/cl/backend.mk +++ b/src/backends/cl/backend.mk @@ -90,6 +90,7 @@ BACKEND_SOURCES := \ workloads/ClStackWorkload.cpp \ workloads/ClStridedSliceWorkload.cpp \ workloads/ClSubtractionWorkload.cpp \ + workloads/ClTileWorkload.cpp \ workloads/ClTransposeConvolution2dWorkload.cpp \ workloads/ClTransposeWorkload.cpp \ workloads/ClUnidirectionalSequenceLstmFloatWorkload.cpp diff --git a/src/backends/cl/test/ClEndToEndTests.cpp b/src/backends/cl/test/ClEndToEndTests.cpp index 2d7a813e60..50ba1dbb99 100644 --- a/src/backends/cl/test/ClEndToEndTests.cpp +++ b/src/backends/cl/test/ClEndToEndTests.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -466,6 +467,42 @@ TEST_CASE("ClSplitter4dDim3EndToEndUint8Test") Splitter4dDim3EndToEnd(clDefaultBackends); } +// Tile +TEST_CASE("ClTileEndToEndFloat32") +{ + TileEndToEnd(clDefaultBackends); +} + +TEST_CASE("ClTileEndToEndFloat16") +{ + TileEndToEnd(clDefaultBackends); +} + +TEST_CASE("ClTileEndToEndQAsymmS8") +{ + TileEndToEnd(clDefaultBackends); +} + +TEST_CASE("ClTileEndToEndQAsymmU8") +{ + TileEndToEnd(clDefaultBackends); +} + +TEST_CASE("ClTileEndToEndQSymmS8") +{ + TileEndToEnd(clDefaultBackends); +} + +TEST_CASE("ClTileEndToEndQSymmS16") +{ + TileEndToEnd(clDefaultBackends); +} + +TEST_CASE("ClTileEndToEndSigned32") +{ + TileEndToEnd(clDefaultBackends); +} + // TransposeConvolution2d TEST_CASE("ClTransposeConvolution2dEndToEndFloatNchwTest") { diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp index 93ad653441..d8d451ead3 100644 --- a/src/backends/cl/test/ClLayerTests.cpp +++ b/src/backends/cl/test/ClLayerTests.cpp @@ -2033,6 +2033,24 @@ ARMNN_AUTO_TEST_FIXTURE(DequantizeSimpleInt16, ClContextControlFixture, Dequanti ARMNN_AUTO_TEST_FIXTURE(DequantizeSimpleUint8ToFp16, ClContextControlFixture, DequantizeSimpleUint8ToFp16Test) ARMNN_AUTO_TEST_FIXTURE(DequantizeSimpleInt16ToFp16, ClContextControlFixture, DequantizeSimpleInt16ToFp16Test) +//Tile +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile1dTestFloat32, ClContextControlFixture, Tile1dTest) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile2dTestFloat32, ClContextControlFixture, Tile2dTest) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile3dTestFloat32, ClContextControlFixture, Tile3dTest) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile4dTestFloat32, ClContextControlFixture, Tile4dTest) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile1dTestFloat16, ClContextControlFixture, Tile1dTest) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile2dTestFloat16, ClContextControlFixture, Tile2dTest) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile3dTestFloat16, ClContextControlFixture, Tile3dTest) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile4dTestFloat16, ClContextControlFixture, Tile4dTest) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile1dTestInt8, ClContextControlFixture, Tile1dTest) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile2dTestInt8, ClContextControlFixture, Tile2dTest) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile3dTestInt8, ClContextControlFixture, Tile3dTest) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile4dTestInt8, ClContextControlFixture, Tile4dTest) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile1dTestUint8, ClContextControlFixture, Tile1dTest) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile2dTestUint8, ClContextControlFixture, Tile2dTest) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile3dTestUint8, ClContextControlFixture, Tile3dTest) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile4dTestUint8, ClContextControlFixture, Tile4dTest) + // Transpose ARMNN_AUTO_TEST_FIXTURE_WITH_THF( SimpleTransposeFloat32, ClContextControlFixture, SimpleTransposeTest) diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt index 030d71988f..cb16ab19e8 100644 --- a/src/backends/cl/workloads/CMakeLists.txt +++ b/src/backends/cl/workloads/CMakeLists.txt @@ -131,6 +131,8 @@ list(APPEND armnnClBackendWorkloads_sources ClStridedSliceWorkload.hpp ClSubtractionWorkload.cpp ClSubtractionWorkload.hpp + ClTileWorkload.cpp + ClTileWorkload.hpp ClTransposeConvolution2dWorkload.cpp ClTransposeConvolution2dWorkload.hpp ClTransposeWorkload.cpp diff --git a/src/backends/cl/workloads/ClTileWorkload.cpp b/src/backends/cl/workloads/ClTileWorkload.cpp new file mode 100644 index 0000000000..2c2f63faac --- /dev/null +++ b/src/backends/cl/workloads/ClTileWorkload.cpp @@ -0,0 +1,50 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClTileWorkload.hpp" +#include "ClWorkloadUtils.hpp" +#include +#include +#include +#include + +using namespace armnn::armcomputetensorutils; +namespace armnn +{ +arm_compute::Status ClTileWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TileDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); + + std::vector aclMultiples = descriptor.m_Multiples; + std::reverse(aclMultiples.begin(),aclMultiples.end()); + + return arm_compute::CLTile::validate(&aclInput, &aclOutput, aclMultiples); +} + +ClTileWorkload::ClTileWorkload(const armnn::TileQueueDescriptor& descriptor, + const armnn::WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) + : BaseWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClTileWorkload", 1, 1); + + std::vector aclMultiples = descriptor.m_Parameters.m_Multiples; + std::reverse(aclMultiples.begin(),aclMultiples.end()); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(clCompileContext, &input, &output, aclMultiples); +} + +void ClTileWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClTileWorkload_Execute", this->GetGuid()); + m_Layer.run(); +} + +} //namespace armnn \ No newline at end of file diff --git a/src/backends/cl/workloads/ClTileWorkload.hpp b/src/backends/cl/workloads/ClTileWorkload.hpp new file mode 100644 index 0000000000..0f68559f85 --- /dev/null +++ b/src/backends/cl/workloads/ClTileWorkload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once +#include "ClBaseWorkload.hpp" +#include + +namespace armnn +{ +arm_compute::Status ClTileWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TileDescriptor& descriptor); + +class ClTileWorkload : public BaseWorkload { +public: + ClTileWorkload(const TileQueueDescriptor &descriptor, + const WorkloadInfo &info, + const arm_compute::CLCompileContext& clCompileContext); + void Execute() const override; + +private: + mutable arm_compute::CLTile m_Layer; +}; + +} //namespace armnn \ No newline at end of file diff --git a/src/backends/cl/workloads/ClWorkloads.hpp b/src/backends/cl/workloads/ClWorkloads.hpp index d862aab949..cec8706911 100644 --- a/src/backends/cl/workloads/ClWorkloads.hpp +++ b/src/backends/cl/workloads/ClWorkloads.hpp @@ -68,6 +68,7 @@ #include "ClSubtractionWorkload.hpp" #include "ClConvertFp16ToFp32Workload.hpp" #include "ClConvertFp32ToFp16Workload.hpp" +#include "ClTileWorkload.hpp" #include "ClTransposeConvolution2dWorkload.hpp" #include "ClTransposeWorkload.hpp" #include "ClUnidirectionalSequenceLstmFloatWorkload.hpp" -- cgit v1.2.1