diff options
Diffstat (limited to 'src/backends')
-rw-r--r-- | src/backends/backendsCommon/test/TileEndToEndTestImpl.hpp | 18 | ||||
-rw-r--r-- | src/backends/backendsCommon/test/layerTests/TileTestImpl.cpp | 56 | ||||
-rw-r--r-- | src/backends/cl/ClLayerSupport.cpp | 18 | ||||
-rw-r--r-- | src/backends/cl/ClLayerSupport.hpp | 5 | ||||
-rw-r--r-- | src/backends/cl/ClWorkloadFactory.cpp | 5 | ||||
-rw-r--r-- | src/backends/cl/backend.mk | 1 | ||||
-rw-r--r-- | src/backends/cl/test/ClEndToEndTests.cpp | 37 | ||||
-rw-r--r-- | src/backends/cl/test/ClLayerTests.cpp | 18 | ||||
-rw-r--r-- | src/backends/cl/workloads/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/backends/cl/workloads/ClTileWorkload.cpp | 50 | ||||
-rw-r--r-- | src/backends/cl/workloads/ClTileWorkload.hpp | 27 | ||||
-rw-r--r-- | src/backends/cl/workloads/ClWorkloads.hpp | 1 |
12 files changed, 207 insertions, 31 deletions
diff --git a/src/backends/backendsCommon/test/TileEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/TileEndToEndTestImpl.hpp index 4047e5ad8e..03b76849e1 100644 --- a/src/backends/backendsCommon/test/TileEndToEndTestImpl.hpp +++ b/src/backends/backendsCommon/test/TileEndToEndTestImpl.hpp @@ -32,25 +32,25 @@ void TileEndToEnd(const std::vector<BackendId>& backends) int32_t qOffset = 0; bool qConst = true; - const TensorShape inputTensorShape = { 2, 3 }; - const TensorShape outputTensorShape = { 4, 6 }; + const TensorShape inputTensorShape = { 6 }; + const TensorShape outputTensorShape = { 30 }; TensorInfo inputInfo (inputTensorShape, ArmnnType, qScale, qOffset, qConst); TensorInfo outputInfo (outputTensorShape, ArmnnType,qScale, qOffset); std::vector<T> inputData = armnnUtils::QuantizedVector<T>({ - 0.f, 1.f, 2.f, - 3.f, 4.f, 5.f + 65, 144, 91, 161, 56, 73 }, qScale, qOffset); std::vector<T> expectedOutputData = armnnUtils::QuantizedVector<T>({ - 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, - 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, - 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, - 3.f, 4.f, 5.f, 3.f, 4.f, 5.f + 65, 144, 91, 161, 56, 73, + 65, 144, 91, 161, 56, 73, + 65, 144, 91, 161, 56, 73, + 65, 144, 91, 161, 56, 73, + 65, 144, 91, 161, 56, 73 }, qScale, qOffset); - auto descriptor = armnn::TileDescriptor(std::vector<uint32_t>{ 2, 2 }); + auto descriptor = armnn::TileDescriptor(std::vector<uint32_t>{ 5 }); INetworkPtr network = CreateTileNetwork(descriptor, inputInfo, outputInfo); std::map<int, std::vector<T>> inputTensor = { { 0, inputData } }; diff --git a/src/backends/backendsCommon/test/layerTests/TileTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/TileTestImpl.cpp index 0b13bba425..c61da26bb2 100644 --- a/src/backends/backendsCommon/test/layerTests/TileTestImpl.cpp +++ b/src/backends/backendsCommon/test/layerTests/TileTestImpl.cpp @@ -110,13 +110,13 @@ LayerTestResult<T, 2> Tile2dTest(armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { - auto descriptor = armnn::TileDescriptor(std::vector<uint32_t>{ 2, 2 }); + auto descriptor = armnn::TileDescriptor(std::vector<uint32_t>{ 2, 5 }); float qScale = 1.0f; int32_t qOffset = 0; armnn::TensorShape inputShape = { 2, 3 }; - armnn::TensorShape outputShape = { 4, 6 }; + armnn::TensorShape outputShape = { 4, 15 }; armnn::TensorInfo inputInfo(inputShape, ArmnnType); armnn::TensorInfo outputInfo(outputShape, ArmnnType); @@ -130,11 +130,11 @@ LayerTestResult<T, 2> Tile2dTest(armnn::IWorkloadFactory& workloadFactory, std::vector<T> expectedOutput = armnnUtils::QuantizedVector<T>( { - 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, - 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, + 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, + 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, - 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, - 3.f, 4.f, 5.f, 3.f, 4.f, 5.f + 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, + 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, 3.f, 4.f, 5.f }, qScale, qOffset); return TileTestImpl<T, 2>(workloadFactory, @@ -152,37 +152,49 @@ LayerTestResult<T, 3> Tile3dTest(armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { - auto descriptor = armnn::TileDescriptor(std::vector<uint32_t>{ 1, 2, 1 }); + auto descriptor = armnn::TileDescriptor(std::vector<uint32_t>{ 1, 5, 5 }); float qScale = 1.0f; int32_t qOffset = 0; - armnn::TensorShape inputShape = { 2, 2, 3 }; - armnn::TensorShape outputShape = { 2, 4, 3 }; + armnn::TensorShape inputShape = { 2, 2, 2 }; + armnn::TensorShape outputShape = { 2, 10, 10 }; armnn::TensorInfo inputInfo(inputShape, ArmnnType); armnn::TensorInfo outputInfo(outputShape, ArmnnType); std::vector<T> input = armnnUtils::QuantizedVector<T>( { - 0.f, 1.f, 2.f, - 3.f, 4.f, 5.f, + 1.1f, 2.12f, + 3.3f, 4.12234f, - 6.f, 7.f, 8.f, - 9.f, 10.f, 11.f + 1.1f, 2.12f, + 3.3f, 4.12234f, }, qScale, qOffset); std::vector<T> expectedOutput = armnnUtils::QuantizedVector<T>( { - 0.f, 1.f, 2.f, - 3.f, 4.f, 5.f, - 0.f, 1.f, 2.f, - 3.f, 4.f, 5.f, - - 6.f, 7.f, 8.f, - 9.f, 10.f, 11.f, - 6.f, 7.f, 8.f, - 9.f, 10.f, 11.f + 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, + 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, + 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, + 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, + 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, + 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, + 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, + 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, + 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, + 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, + + 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, + 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, + 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, + 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, + 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, + 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, + 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, + 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, + 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, + 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f }, qScale, qOffset); return TileTestImpl<T, 3>(workloadFactory, diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp index ff2b576f3d..60dab0538a 100644 --- a/src/backends/cl/ClLayerSupport.cpp +++ b/src/backends/cl/ClLayerSupport.cpp @@ -81,6 +81,7 @@ #include "workloads/ClStackWorkload.hpp" #include "workloads/ClStridedSliceWorkload.hpp" #include "workloads/ClSubtractionWorkload.hpp" +#include "workloads/ClTileWorkload.hpp" #include "workloads/ClTransposeConvolution2dWorkload.hpp" #include "workloads/ClTransposeWorkload.hpp" #include "workloads/ClUnidirectionalSequenceLstmFloatWorkload.hpp" @@ -627,6 +628,11 @@ bool ClLayerSupport::IsLayerSupported(const LayerType& type, ARMNN_NO_DEPRECATE_WARN_BEGIN return IsSubtractionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); ARMNN_NO_DEPRECATE_WARN_END + case LayerType::Tile: + return IsTileSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const TileDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::Transpose: return IsTransposeSupported(infos[0], infos[1], @@ -1527,6 +1533,18 @@ bool ClLayerSupport::IsSubtractionSupported(const TensorInfo& input0, nullptr); } +bool ClLayerSupport::IsTileSupported(const TensorInfo& input, + const TensorInfo& output, + const TileDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported) const +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClTileWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor); +} + bool ClLayerSupport::IsTransposeConvolution2dSupported(const TensorInfo& input, const TensorInfo& output, const TransposeConvolution2dDescriptor& descriptor, diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp index 737ab7a850..9a8f896cab 100644 --- a/src/backends/cl/ClLayerSupport.hpp +++ b/src/backends/cl/ClLayerSupport.hpp @@ -343,6 +343,11 @@ public: const Optional<TensorInfo>& biases, Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const; + bool IsTileSupported(const TensorInfo& input, + const TensorInfo& output, + const TileDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const; + bool IsTransposeSupported(const TensorInfo& input, const TensorInfo& output, const TransposeDescriptor& descriptor, diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp index 7b4e9be908..da05f67711 100644 --- a/src/backends/cl/ClWorkloadFactory.cpp +++ b/src/backends/cl/ClWorkloadFactory.cpp @@ -755,6 +755,11 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateWorkload(LayerType type, auto subtractionQueueDescriptor = PolymorphicDowncast<const SubtractionQueueDescriptor*>(&descriptor); return MakeWorkload<ClSubtractionWorkload>(*subtractionQueueDescriptor, info, m_CLCompileContext); } + case LayerType::Tile: + { + auto tileQueueDescriptor = PolymorphicDowncast<const TileQueueDescriptor*>(&descriptor); + return MakeWorkload<ClTileWorkload>(*tileQueueDescriptor, info, m_CLCompileContext); + } case LayerType::Transpose : { auto transposeQueueDescriptor = PolymorphicDowncast<const TransposeQueueDescriptor*>(&descriptor); diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk index 5ceab700e4..34993059c2 100644 --- a/src/backends/cl/backend.mk +++ b/src/backends/cl/backend.mk @@ -90,6 +90,7 @@ BACKEND_SOURCES := \ workloads/ClStackWorkload.cpp \ workloads/ClStridedSliceWorkload.cpp \ workloads/ClSubtractionWorkload.cpp \ + workloads/ClTileWorkload.cpp \ workloads/ClTransposeConvolution2dWorkload.cpp \ workloads/ClTransposeWorkload.cpp \ workloads/ClUnidirectionalSequenceLstmFloatWorkload.cpp diff --git a/src/backends/cl/test/ClEndToEndTests.cpp b/src/backends/cl/test/ClEndToEndTests.cpp index 2d7a813e60..50ba1dbb99 100644 --- a/src/backends/cl/test/ClEndToEndTests.cpp +++ b/src/backends/cl/test/ClEndToEndTests.cpp @@ -26,6 +26,7 @@ #include <backendsCommon/test/SpaceToDepthEndToEndTestImpl.hpp> #include <backendsCommon/test/SplitterEndToEndTestImpl.hpp> #include <backendsCommon/test/SubgraphUtilsTest.hpp> +#include <backendsCommon/test/TileEndToEndTestImpl.hpp> #include <backendsCommon/test/TransposeConvolution2dEndToEndTestImpl.hpp> #include <backendsCommon/test/TransposeEndToEndTestImpl.hpp> @@ -466,6 +467,42 @@ TEST_CASE("ClSplitter4dDim3EndToEndUint8Test") Splitter4dDim3EndToEnd<armnn::DataType::QAsymmU8>(clDefaultBackends); } +// Tile +TEST_CASE("ClTileEndToEndFloat32") +{ + TileEndToEnd<armnn::DataType::Float32>(clDefaultBackends); +} + +TEST_CASE("ClTileEndToEndFloat16") +{ + TileEndToEnd<armnn::DataType::Float16>(clDefaultBackends); +} + +TEST_CASE("ClTileEndToEndQAsymmS8") +{ + TileEndToEnd<armnn::DataType::QAsymmS8>(clDefaultBackends); +} + +TEST_CASE("ClTileEndToEndQAsymmU8") +{ + TileEndToEnd<armnn::DataType::QAsymmU8>(clDefaultBackends); +} + +TEST_CASE("ClTileEndToEndQSymmS8") +{ + TileEndToEnd<armnn::DataType::QSymmS8>(clDefaultBackends); +} + +TEST_CASE("ClTileEndToEndQSymmS16") +{ + TileEndToEnd<armnn::DataType::QSymmS16>(clDefaultBackends); +} + +TEST_CASE("ClTileEndToEndSigned32") +{ + TileEndToEnd<armnn::DataType::Signed32>(clDefaultBackends); +} + // TransposeConvolution2d TEST_CASE("ClTransposeConvolution2dEndToEndFloatNchwTest") { diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp index 93ad653441..d8d451ead3 100644 --- a/src/backends/cl/test/ClLayerTests.cpp +++ b/src/backends/cl/test/ClLayerTests.cpp @@ -2033,6 +2033,24 @@ ARMNN_AUTO_TEST_FIXTURE(DequantizeSimpleInt16, ClContextControlFixture, Dequanti ARMNN_AUTO_TEST_FIXTURE(DequantizeSimpleUint8ToFp16, ClContextControlFixture, DequantizeSimpleUint8ToFp16Test) ARMNN_AUTO_TEST_FIXTURE(DequantizeSimpleInt16ToFp16, ClContextControlFixture, DequantizeSimpleInt16ToFp16Test) +//Tile +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile1dTestFloat32, ClContextControlFixture, Tile1dTest<DataType::Float32>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile2dTestFloat32, ClContextControlFixture, Tile2dTest<DataType::Float32>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile3dTestFloat32, ClContextControlFixture, Tile3dTest<DataType::Float32>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile4dTestFloat32, ClContextControlFixture, Tile4dTest<DataType::Float32>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile1dTestFloat16, ClContextControlFixture, Tile1dTest<DataType::Float16>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile2dTestFloat16, ClContextControlFixture, Tile2dTest<DataType::Float16>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile3dTestFloat16, ClContextControlFixture, Tile3dTest<DataType::Float16>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile4dTestFloat16, ClContextControlFixture, Tile4dTest<DataType::Float16>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile1dTestInt8, ClContextControlFixture, Tile1dTest<DataType::QAsymmS8>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile2dTestInt8, ClContextControlFixture, Tile2dTest<DataType::QAsymmS8>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile3dTestInt8, ClContextControlFixture, Tile3dTest<DataType::QAsymmS8>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile4dTestInt8, ClContextControlFixture, Tile4dTest<DataType::QAsymmS8>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile1dTestUint8, ClContextControlFixture, Tile1dTest<DataType::QAsymmU8>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile2dTestUint8, ClContextControlFixture, Tile2dTest<DataType::QAsymmU8>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile3dTestUint8, ClContextControlFixture, Tile3dTest<DataType::QAsymmU8>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile4dTestUint8, ClContextControlFixture, Tile4dTest<DataType::QAsymmU8>) + // Transpose ARMNN_AUTO_TEST_FIXTURE_WITH_THF( SimpleTransposeFloat32, ClContextControlFixture, SimpleTransposeTest<DataType::Float32>) diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt index 030d71988f..cb16ab19e8 100644 --- a/src/backends/cl/workloads/CMakeLists.txt +++ b/src/backends/cl/workloads/CMakeLists.txt @@ -131,6 +131,8 @@ list(APPEND armnnClBackendWorkloads_sources ClStridedSliceWorkload.hpp ClSubtractionWorkload.cpp ClSubtractionWorkload.hpp + ClTileWorkload.cpp + ClTileWorkload.hpp ClTransposeConvolution2dWorkload.cpp ClTransposeConvolution2dWorkload.hpp ClTransposeWorkload.cpp diff --git a/src/backends/cl/workloads/ClTileWorkload.cpp b/src/backends/cl/workloads/ClTileWorkload.cpp new file mode 100644 index 0000000000..2c2f63faac --- /dev/null +++ b/src/backends/cl/workloads/ClTileWorkload.cpp @@ -0,0 +1,50 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClTileWorkload.hpp" +#include "ClWorkloadUtils.hpp" +#include <aclCommon/ArmComputeUtils.hpp> +#include <cl/ClTensorHandle.hpp> +#include <vector> +#include <algorithm> + +using namespace armnn::armcomputetensorutils; +namespace armnn +{ +arm_compute::Status ClTileWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TileDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); + + std::vector<uint32_t> aclMultiples = descriptor.m_Multiples; + std::reverse(aclMultiples.begin(),aclMultiples.end()); + + return arm_compute::CLTile::validate(&aclInput, &aclOutput, aclMultiples); +} + +ClTileWorkload::ClTileWorkload(const armnn::TileQueueDescriptor& descriptor, + const armnn::WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) + : BaseWorkload<TileQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClTileWorkload", 1, 1); + + std::vector<uint32_t> aclMultiples = descriptor.m_Parameters.m_Multiples; + std::reverse(aclMultiples.begin(),aclMultiples.end()); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(clCompileContext, &input, &output, aclMultiples); +} + +void ClTileWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClTileWorkload_Execute", this->GetGuid()); + m_Layer.run(); +} + +} //namespace armnn
\ No newline at end of file diff --git a/src/backends/cl/workloads/ClTileWorkload.hpp b/src/backends/cl/workloads/ClTileWorkload.hpp new file mode 100644 index 0000000000..0f68559f85 --- /dev/null +++ b/src/backends/cl/workloads/ClTileWorkload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once +#include "ClBaseWorkload.hpp" +#include <arm_compute/runtime/CL/functions/CLTile.h> + +namespace armnn +{ +arm_compute::Status ClTileWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TileDescriptor& descriptor); + +class ClTileWorkload : public BaseWorkload<TileQueueDescriptor> { +public: + ClTileWorkload(const TileQueueDescriptor &descriptor, + const WorkloadInfo &info, + const arm_compute::CLCompileContext& clCompileContext); + void Execute() const override; + +private: + mutable arm_compute::CLTile m_Layer; +}; + +} //namespace armnn
\ No newline at end of file diff --git a/src/backends/cl/workloads/ClWorkloads.hpp b/src/backends/cl/workloads/ClWorkloads.hpp index d862aab949..cec8706911 100644 --- a/src/backends/cl/workloads/ClWorkloads.hpp +++ b/src/backends/cl/workloads/ClWorkloads.hpp @@ -68,6 +68,7 @@ #include "ClSubtractionWorkload.hpp" #include "ClConvertFp16ToFp32Workload.hpp" #include "ClConvertFp32ToFp16Workload.hpp" +#include "ClTileWorkload.hpp" #include "ClTransposeConvolution2dWorkload.hpp" #include "ClTransposeWorkload.hpp" #include "ClUnidirectionalSequenceLstmFloatWorkload.hpp" |