aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCian McGriskin <cian.mcgriskin@arm.com>2023-07-26 11:52:47 +0100
committerNikhil Raj <nikhil.raj@arm.com>2023-08-04 20:48:03 +0100
commit3b3dcbf0321fadcb2b7b5b550a4d03f510d7cb7b (patch)
tree9658a4d20fc73bfb8498aa8617202c49406f9eaa
parent88bef3acd73069df13256e818f32555b19e1a8b4 (diff)
downloadarmnn-3b3dcbf0321fadcb2b7b5b550a4d03f510d7cb7b.tar.gz
IVGCVSW-2291 TILE Operator CL Implementation
* Added Tile Operator Implementation to CL * Added calls to the existing UnitTests * Added Documentation Signed-off-by: Cian McGriskin <cian.mcgriskin@arm.com> Change-Id: If7d25c7aa669c24e7816e5d445c7a3b9ce6972d4
-rw-r--r--delegate/test/TileTest.cpp2
-rw-r--r--docs/02_operator_list.dox11
-rw-r--r--src/backends/backendsCommon/test/TileEndToEndTestImpl.hpp18
-rw-r--r--src/backends/backendsCommon/test/layerTests/TileTestImpl.cpp56
-rw-r--r--src/backends/cl/ClLayerSupport.cpp18
-rw-r--r--src/backends/cl/ClLayerSupport.hpp5
-rw-r--r--src/backends/cl/ClWorkloadFactory.cpp5
-rw-r--r--src/backends/cl/backend.mk1
-rw-r--r--src/backends/cl/test/ClEndToEndTests.cpp37
-rw-r--r--src/backends/cl/test/ClLayerTests.cpp18
-rw-r--r--src/backends/cl/workloads/CMakeLists.txt2
-rw-r--r--src/backends/cl/workloads/ClTileWorkload.cpp50
-rw-r--r--src/backends/cl/workloads/ClTileWorkload.hpp27
-rw-r--r--src/backends/cl/workloads/ClWorkloads.hpp1
14 files changed, 216 insertions, 35 deletions
diff --git a/delegate/test/TileTest.cpp b/delegate/test/TileTest.cpp
index 2e20859f5b..32f5610bce 100644
--- a/delegate/test/TileTest.cpp
+++ b/delegate/test/TileTest.cpp
@@ -53,7 +53,6 @@ void TileFloat32Test(std::vector<armnn::BackendId>& backends)
expectedOutputShape);
}
-#if defined(TILE_GPUACC)
TEST_SUITE("TileTests_GpuAccTests")
{
@@ -64,7 +63,6 @@ TEST_SUITE("TileTests_GpuAccTests")
}
} // TEST_SUITE("Tile_Float32_GpuAcc_Test")
-#endif
TEST_SUITE("TileTests_CpuAccTests")
{
diff --git a/docs/02_operator_list.dox b/docs/02_operator_list.dox
index e3c9f8c1f2..a37b6fa3a7 100644
--- a/docs/02_operator_list.dox
+++ b/docs/02_operator_list.dox
@@ -3328,6 +3328,7 @@ where N = batches, C = channels, H = height, W = width
<tr><td>FLOAT32
<tr><td>QASYMMS8
<tr><td>QASYMMU8
+ <tr><td>QSYMMS8
<tr><td>QSYMMS16
<tr><td>SIGNED32
</table>
@@ -3352,12 +3353,18 @@ where N = batches, C = channels, H = height, W = width
<td>GpuAcc
<td>
<ul>
- <li>None
+ <li>All
</ul>
<td>
<table>
<tr><th>
- <tr><td>None
+ <tr><td>FLOAT16
+ <tr><td>FLOAT32
+ <tr><td>QASYMMS8
+ <tr><td>QASYMMU8
+ <tr><td>QSYMMS8
+ <tr><td>QSYMMS16
+ <tr><td>SIGNED32
</table>
<tr>
<td rowspan="3">TransposeConvolution2dLayer
diff --git a/src/backends/backendsCommon/test/TileEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/TileEndToEndTestImpl.hpp
index 4047e5ad8e..03b76849e1 100644
--- a/src/backends/backendsCommon/test/TileEndToEndTestImpl.hpp
+++ b/src/backends/backendsCommon/test/TileEndToEndTestImpl.hpp
@@ -32,25 +32,25 @@ void TileEndToEnd(const std::vector<BackendId>& backends)
int32_t qOffset = 0;
bool qConst = true;
- const TensorShape inputTensorShape = { 2, 3 };
- const TensorShape outputTensorShape = { 4, 6 };
+ const TensorShape inputTensorShape = { 6 };
+ const TensorShape outputTensorShape = { 30 };
TensorInfo inputInfo (inputTensorShape, ArmnnType, qScale, qOffset, qConst);
TensorInfo outputInfo (outputTensorShape, ArmnnType,qScale, qOffset);
std::vector<T> inputData = armnnUtils::QuantizedVector<T>({
- 0.f, 1.f, 2.f,
- 3.f, 4.f, 5.f
+ 65, 144, 91, 161, 56, 73
}, qScale, qOffset);
std::vector<T> expectedOutputData = armnnUtils::QuantizedVector<T>({
- 0.f, 1.f, 2.f, 0.f, 1.f, 2.f,
- 3.f, 4.f, 5.f, 3.f, 4.f, 5.f,
- 0.f, 1.f, 2.f, 0.f, 1.f, 2.f,
- 3.f, 4.f, 5.f, 3.f, 4.f, 5.f
+ 65, 144, 91, 161, 56, 73,
+ 65, 144, 91, 161, 56, 73,
+ 65, 144, 91, 161, 56, 73,
+ 65, 144, 91, 161, 56, 73,
+ 65, 144, 91, 161, 56, 73
}, qScale, qOffset);
- auto descriptor = armnn::TileDescriptor(std::vector<uint32_t>{ 2, 2 });
+ auto descriptor = armnn::TileDescriptor(std::vector<uint32_t>{ 5 });
INetworkPtr network = CreateTileNetwork(descriptor, inputInfo, outputInfo);
std::map<int, std::vector<T>> inputTensor = { { 0, inputData } };
diff --git a/src/backends/backendsCommon/test/layerTests/TileTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/TileTestImpl.cpp
index 0b13bba425..c61da26bb2 100644
--- a/src/backends/backendsCommon/test/layerTests/TileTestImpl.cpp
+++ b/src/backends/backendsCommon/test/layerTests/TileTestImpl.cpp
@@ -110,13 +110,13 @@ LayerTestResult<T, 2> Tile2dTest(armnn::IWorkloadFactory& workloadFactory,
const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
const armnn::ITensorHandleFactory& tensorHandleFactory)
{
- auto descriptor = armnn::TileDescriptor(std::vector<uint32_t>{ 2, 2 });
+ auto descriptor = armnn::TileDescriptor(std::vector<uint32_t>{ 2, 5 });
float qScale = 1.0f;
int32_t qOffset = 0;
armnn::TensorShape inputShape = { 2, 3 };
- armnn::TensorShape outputShape = { 4, 6 };
+ armnn::TensorShape outputShape = { 4, 15 };
armnn::TensorInfo inputInfo(inputShape, ArmnnType);
armnn::TensorInfo outputInfo(outputShape, ArmnnType);
@@ -130,11 +130,11 @@ LayerTestResult<T, 2> Tile2dTest(armnn::IWorkloadFactory& workloadFactory,
std::vector<T> expectedOutput = armnnUtils::QuantizedVector<T>(
{
- 0.f, 1.f, 2.f, 0.f, 1.f, 2.f,
- 3.f, 4.f, 5.f, 3.f, 4.f, 5.f,
+ 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, 0.f, 1.f, 2.f,
+ 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, 3.f, 4.f, 5.f,
- 0.f, 1.f, 2.f, 0.f, 1.f, 2.f,
- 3.f, 4.f, 5.f, 3.f, 4.f, 5.f
+ 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, 0.f, 1.f, 2.f, 0.f, 1.f, 2.f,
+ 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, 3.f, 4.f, 5.f, 3.f, 4.f, 5.f
}, qScale, qOffset);
return TileTestImpl<T, 2>(workloadFactory,
@@ -152,37 +152,49 @@ LayerTestResult<T, 3> Tile3dTest(armnn::IWorkloadFactory& workloadFactory,
const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
const armnn::ITensorHandleFactory& tensorHandleFactory)
{
- auto descriptor = armnn::TileDescriptor(std::vector<uint32_t>{ 1, 2, 1 });
+ auto descriptor = armnn::TileDescriptor(std::vector<uint32_t>{ 1, 5, 5 });
float qScale = 1.0f;
int32_t qOffset = 0;
- armnn::TensorShape inputShape = { 2, 2, 3 };
- armnn::TensorShape outputShape = { 2, 4, 3 };
+ armnn::TensorShape inputShape = { 2, 2, 2 };
+ armnn::TensorShape outputShape = { 2, 10, 10 };
armnn::TensorInfo inputInfo(inputShape, ArmnnType);
armnn::TensorInfo outputInfo(outputShape, ArmnnType);
std::vector<T> input = armnnUtils::QuantizedVector<T>(
{
- 0.f, 1.f, 2.f,
- 3.f, 4.f, 5.f,
+ 1.1f, 2.12f,
+ 3.3f, 4.12234f,
- 6.f, 7.f, 8.f,
- 9.f, 10.f, 11.f
+ 1.1f, 2.12f,
+ 3.3f, 4.12234f,
}, qScale, qOffset);
std::vector<T> expectedOutput = armnnUtils::QuantizedVector<T>(
{
- 0.f, 1.f, 2.f,
- 3.f, 4.f, 5.f,
- 0.f, 1.f, 2.f,
- 3.f, 4.f, 5.f,
-
- 6.f, 7.f, 8.f,
- 9.f, 10.f, 11.f,
- 6.f, 7.f, 8.f,
- 9.f, 10.f, 11.f
+ 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f,
+ 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f,
+ 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f,
+ 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f,
+ 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f,
+ 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f,
+ 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f,
+ 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f,
+ 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f,
+ 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f,
+
+ 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f,
+ 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f,
+ 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f,
+ 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f,
+ 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f,
+ 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f,
+ 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f,
+ 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f,
+ 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f, 1.1f, 2.12f,
+ 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f, 3.3f, 4.12234f
}, qScale, qOffset);
return TileTestImpl<T, 3>(workloadFactory,
diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp
index ff2b576f3d..60dab0538a 100644
--- a/src/backends/cl/ClLayerSupport.cpp
+++ b/src/backends/cl/ClLayerSupport.cpp
@@ -81,6 +81,7 @@
#include "workloads/ClStackWorkload.hpp"
#include "workloads/ClStridedSliceWorkload.hpp"
#include "workloads/ClSubtractionWorkload.hpp"
+#include "workloads/ClTileWorkload.hpp"
#include "workloads/ClTransposeConvolution2dWorkload.hpp"
#include "workloads/ClTransposeWorkload.hpp"
#include "workloads/ClUnidirectionalSequenceLstmFloatWorkload.hpp"
@@ -627,6 +628,11 @@ bool ClLayerSupport::IsLayerSupported(const LayerType& type,
ARMNN_NO_DEPRECATE_WARN_BEGIN
return IsSubtractionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
ARMNN_NO_DEPRECATE_WARN_END
+ case LayerType::Tile:
+ return IsTileSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const TileDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::Transpose:
return IsTransposeSupported(infos[0],
infos[1],
@@ -1527,6 +1533,18 @@ bool ClLayerSupport::IsSubtractionSupported(const TensorInfo& input0,
nullptr);
}
+bool ClLayerSupport::IsTileSupported(const TensorInfo& input,
+ const TensorInfo& output,
+ const TileDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported) const
+{
+ FORWARD_WORKLOAD_VALIDATE_FUNC(ClTileWorkloadValidate,
+ reasonIfUnsupported,
+ input,
+ output,
+ descriptor);
+}
+
bool ClLayerSupport::IsTransposeConvolution2dSupported(const TensorInfo& input,
const TensorInfo& output,
const TransposeConvolution2dDescriptor& descriptor,
diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp
index 737ab7a850..9a8f896cab 100644
--- a/src/backends/cl/ClLayerSupport.hpp
+++ b/src/backends/cl/ClLayerSupport.hpp
@@ -343,6 +343,11 @@ public:
const Optional<TensorInfo>& biases,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+ bool IsTileSupported(const TensorInfo& input,
+ const TensorInfo& output,
+ const TileDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
bool IsTransposeSupported(const TensorInfo& input,
const TensorInfo& output,
const TransposeDescriptor& descriptor,
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index 7b4e9be908..da05f67711 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -755,6 +755,11 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateWorkload(LayerType type,
auto subtractionQueueDescriptor = PolymorphicDowncast<const SubtractionQueueDescriptor*>(&descriptor);
return MakeWorkload<ClSubtractionWorkload>(*subtractionQueueDescriptor, info, m_CLCompileContext);
}
+ case LayerType::Tile:
+ {
+ auto tileQueueDescriptor = PolymorphicDowncast<const TileQueueDescriptor*>(&descriptor);
+ return MakeWorkload<ClTileWorkload>(*tileQueueDescriptor, info, m_CLCompileContext);
+ }
case LayerType::Transpose :
{
auto transposeQueueDescriptor = PolymorphicDowncast<const TransposeQueueDescriptor*>(&descriptor);
diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk
index 5ceab700e4..34993059c2 100644
--- a/src/backends/cl/backend.mk
+++ b/src/backends/cl/backend.mk
@@ -90,6 +90,7 @@ BACKEND_SOURCES := \
workloads/ClStackWorkload.cpp \
workloads/ClStridedSliceWorkload.cpp \
workloads/ClSubtractionWorkload.cpp \
+ workloads/ClTileWorkload.cpp \
workloads/ClTransposeConvolution2dWorkload.cpp \
workloads/ClTransposeWorkload.cpp \
workloads/ClUnidirectionalSequenceLstmFloatWorkload.cpp
diff --git a/src/backends/cl/test/ClEndToEndTests.cpp b/src/backends/cl/test/ClEndToEndTests.cpp
index 2d7a813e60..50ba1dbb99 100644
--- a/src/backends/cl/test/ClEndToEndTests.cpp
+++ b/src/backends/cl/test/ClEndToEndTests.cpp
@@ -26,6 +26,7 @@
#include <backendsCommon/test/SpaceToDepthEndToEndTestImpl.hpp>
#include <backendsCommon/test/SplitterEndToEndTestImpl.hpp>
#include <backendsCommon/test/SubgraphUtilsTest.hpp>
+#include <backendsCommon/test/TileEndToEndTestImpl.hpp>
#include <backendsCommon/test/TransposeConvolution2dEndToEndTestImpl.hpp>
#include <backendsCommon/test/TransposeEndToEndTestImpl.hpp>
@@ -466,6 +467,42 @@ TEST_CASE("ClSplitter4dDim3EndToEndUint8Test")
Splitter4dDim3EndToEnd<armnn::DataType::QAsymmU8>(clDefaultBackends);
}
+// Tile
+TEST_CASE("ClTileEndToEndFloat32")
+{
+ TileEndToEnd<armnn::DataType::Float32>(clDefaultBackends);
+}
+
+TEST_CASE("ClTileEndToEndFloat16")
+{
+ TileEndToEnd<armnn::DataType::Float16>(clDefaultBackends);
+}
+
+TEST_CASE("ClTileEndToEndQAsymmS8")
+{
+ TileEndToEnd<armnn::DataType::QAsymmS8>(clDefaultBackends);
+}
+
+TEST_CASE("ClTileEndToEndQAsymmU8")
+{
+ TileEndToEnd<armnn::DataType::QAsymmU8>(clDefaultBackends);
+}
+
+TEST_CASE("ClTileEndToEndQSymmS8")
+{
+ TileEndToEnd<armnn::DataType::QSymmS8>(clDefaultBackends);
+}
+
+TEST_CASE("ClTileEndToEndQSymmS16")
+{
+ TileEndToEnd<armnn::DataType::QSymmS16>(clDefaultBackends);
+}
+
+TEST_CASE("ClTileEndToEndSigned32")
+{
+ TileEndToEnd<armnn::DataType::Signed32>(clDefaultBackends);
+}
+
// TransposeConvolution2d
TEST_CASE("ClTransposeConvolution2dEndToEndFloatNchwTest")
{
diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp
index 93ad653441..d8d451ead3 100644
--- a/src/backends/cl/test/ClLayerTests.cpp
+++ b/src/backends/cl/test/ClLayerTests.cpp
@@ -2033,6 +2033,24 @@ ARMNN_AUTO_TEST_FIXTURE(DequantizeSimpleInt16, ClContextControlFixture, Dequanti
ARMNN_AUTO_TEST_FIXTURE(DequantizeSimpleUint8ToFp16, ClContextControlFixture, DequantizeSimpleUint8ToFp16Test)
ARMNN_AUTO_TEST_FIXTURE(DequantizeSimpleInt16ToFp16, ClContextControlFixture, DequantizeSimpleInt16ToFp16Test)
+//Tile
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile1dTestFloat32, ClContextControlFixture, Tile1dTest<DataType::Float32>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile2dTestFloat32, ClContextControlFixture, Tile2dTest<DataType::Float32>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile3dTestFloat32, ClContextControlFixture, Tile3dTest<DataType::Float32>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile4dTestFloat32, ClContextControlFixture, Tile4dTest<DataType::Float32>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile1dTestFloat16, ClContextControlFixture, Tile1dTest<DataType::Float16>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile2dTestFloat16, ClContextControlFixture, Tile2dTest<DataType::Float16>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile3dTestFloat16, ClContextControlFixture, Tile3dTest<DataType::Float16>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile4dTestFloat16, ClContextControlFixture, Tile4dTest<DataType::Float16>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile1dTestInt8, ClContextControlFixture, Tile1dTest<DataType::QAsymmS8>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile2dTestInt8, ClContextControlFixture, Tile2dTest<DataType::QAsymmS8>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile3dTestInt8, ClContextControlFixture, Tile3dTest<DataType::QAsymmS8>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile4dTestInt8, ClContextControlFixture, Tile4dTest<DataType::QAsymmS8>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile1dTestUint8, ClContextControlFixture, Tile1dTest<DataType::QAsymmU8>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile2dTestUint8, ClContextControlFixture, Tile2dTest<DataType::QAsymmU8>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile3dTestUint8, ClContextControlFixture, Tile3dTest<DataType::QAsymmU8>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile4dTestUint8, ClContextControlFixture, Tile4dTest<DataType::QAsymmU8>)
+
// Transpose
ARMNN_AUTO_TEST_FIXTURE_WITH_THF(
SimpleTransposeFloat32, ClContextControlFixture, SimpleTransposeTest<DataType::Float32>)
diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt
index 030d71988f..cb16ab19e8 100644
--- a/src/backends/cl/workloads/CMakeLists.txt
+++ b/src/backends/cl/workloads/CMakeLists.txt
@@ -131,6 +131,8 @@ list(APPEND armnnClBackendWorkloads_sources
ClStridedSliceWorkload.hpp
ClSubtractionWorkload.cpp
ClSubtractionWorkload.hpp
+ ClTileWorkload.cpp
+ ClTileWorkload.hpp
ClTransposeConvolution2dWorkload.cpp
ClTransposeConvolution2dWorkload.hpp
ClTransposeWorkload.cpp
diff --git a/src/backends/cl/workloads/ClTileWorkload.cpp b/src/backends/cl/workloads/ClTileWorkload.cpp
new file mode 100644
index 0000000000..2c2f63faac
--- /dev/null
+++ b/src/backends/cl/workloads/ClTileWorkload.cpp
@@ -0,0 +1,50 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClTileWorkload.hpp"
+#include "ClWorkloadUtils.hpp"
+#include <aclCommon/ArmComputeUtils.hpp>
+#include <cl/ClTensorHandle.hpp>
+#include <vector>
+#include <algorithm>
+
+using namespace armnn::armcomputetensorutils;
+namespace armnn
+{
+arm_compute::Status ClTileWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const TileDescriptor& descriptor)
+{
+ const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
+
+ std::vector<uint32_t> aclMultiples = descriptor.m_Multiples;
+ std::reverse(aclMultiples.begin(),aclMultiples.end());
+
+ return arm_compute::CLTile::validate(&aclInput, &aclOutput, aclMultiples);
+}
+
+ClTileWorkload::ClTileWorkload(const armnn::TileQueueDescriptor& descriptor,
+ const armnn::WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
+ : BaseWorkload<TileQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClTileWorkload", 1, 1);
+
+ std::vector<uint32_t> aclMultiples = descriptor.m_Parameters.m_Multiples;
+ std::reverse(aclMultiples.begin(),aclMultiples.end());
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ m_Layer.configure(clCompileContext, &input, &output, aclMultiples);
+}
+
+void ClTileWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClTileWorkload_Execute", this->GetGuid());
+ m_Layer.run();
+}
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/backends/cl/workloads/ClTileWorkload.hpp b/src/backends/cl/workloads/ClTileWorkload.hpp
new file mode 100644
index 0000000000..0f68559f85
--- /dev/null
+++ b/src/backends/cl/workloads/ClTileWorkload.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+#include "ClBaseWorkload.hpp"
+#include <arm_compute/runtime/CL/functions/CLTile.h>
+
+namespace armnn
+{
+arm_compute::Status ClTileWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const TileDescriptor& descriptor);
+
+class ClTileWorkload : public BaseWorkload<TileQueueDescriptor> {
+public:
+ ClTileWorkload(const TileQueueDescriptor &descriptor,
+ const WorkloadInfo &info,
+ const arm_compute::CLCompileContext& clCompileContext);
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLTile m_Layer;
+};
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/backends/cl/workloads/ClWorkloads.hpp b/src/backends/cl/workloads/ClWorkloads.hpp
index d862aab949..cec8706911 100644
--- a/src/backends/cl/workloads/ClWorkloads.hpp
+++ b/src/backends/cl/workloads/ClWorkloads.hpp
@@ -68,6 +68,7 @@
#include "ClSubtractionWorkload.hpp"
#include "ClConvertFp16ToFp32Workload.hpp"
#include "ClConvertFp32ToFp16Workload.hpp"
+#include "ClTileWorkload.hpp"
#include "ClTransposeConvolution2dWorkload.hpp"
#include "ClTransposeWorkload.hpp"
#include "ClUnidirectionalSequenceLstmFloatWorkload.hpp"