diff options
Diffstat (limited to 'src/backends/cl')
-rw-r--r-- | src/backends/cl/ClLayerSupport.cpp | 18 | ||||
-rw-r--r-- | src/backends/cl/ClLayerSupport.hpp | 5 | ||||
-rw-r--r-- | src/backends/cl/ClWorkloadFactory.cpp | 5 | ||||
-rw-r--r-- | src/backends/cl/backend.mk | 1 | ||||
-rw-r--r-- | src/backends/cl/test/ClEndToEndTests.cpp | 37 | ||||
-rw-r--r-- | src/backends/cl/test/ClLayerTests.cpp | 18 | ||||
-rw-r--r-- | src/backends/cl/workloads/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/backends/cl/workloads/ClTileWorkload.cpp | 50 | ||||
-rw-r--r-- | src/backends/cl/workloads/ClTileWorkload.hpp | 27 | ||||
-rw-r--r-- | src/backends/cl/workloads/ClWorkloads.hpp | 1 |
10 files changed, 164 insertions, 0 deletions
diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp index ff2b576f3d..60dab0538a 100644 --- a/src/backends/cl/ClLayerSupport.cpp +++ b/src/backends/cl/ClLayerSupport.cpp @@ -81,6 +81,7 @@ #include "workloads/ClStackWorkload.hpp" #include "workloads/ClStridedSliceWorkload.hpp" #include "workloads/ClSubtractionWorkload.hpp" +#include "workloads/ClTileWorkload.hpp" #include "workloads/ClTransposeConvolution2dWorkload.hpp" #include "workloads/ClTransposeWorkload.hpp" #include "workloads/ClUnidirectionalSequenceLstmFloatWorkload.hpp" @@ -627,6 +628,11 @@ bool ClLayerSupport::IsLayerSupported(const LayerType& type, ARMNN_NO_DEPRECATE_WARN_BEGIN return IsSubtractionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); ARMNN_NO_DEPRECATE_WARN_END + case LayerType::Tile: + return IsTileSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const TileDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::Transpose: return IsTransposeSupported(infos[0], infos[1], @@ -1527,6 +1533,18 @@ bool ClLayerSupport::IsSubtractionSupported(const TensorInfo& input0, nullptr); } +bool ClLayerSupport::IsTileSupported(const TensorInfo& input, + const TensorInfo& output, + const TileDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported) const +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClTileWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor); +} + bool ClLayerSupport::IsTransposeConvolution2dSupported(const TensorInfo& input, const TensorInfo& output, const TransposeConvolution2dDescriptor& descriptor, diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp index 737ab7a850..9a8f896cab 100644 --- a/src/backends/cl/ClLayerSupport.hpp +++ b/src/backends/cl/ClLayerSupport.hpp @@ -343,6 +343,11 @@ public: const Optional<TensorInfo>& biases, Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const; + bool IsTileSupported(const TensorInfo& input, + const TensorInfo& output, + const TileDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const; + bool IsTransposeSupported(const TensorInfo& input, const TensorInfo& output, const TransposeDescriptor& descriptor, diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp index 7b4e9be908..da05f67711 100644 --- a/src/backends/cl/ClWorkloadFactory.cpp +++ b/src/backends/cl/ClWorkloadFactory.cpp @@ -755,6 +755,11 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateWorkload(LayerType type, auto subtractionQueueDescriptor = PolymorphicDowncast<const SubtractionQueueDescriptor*>(&descriptor); return MakeWorkload<ClSubtractionWorkload>(*subtractionQueueDescriptor, info, m_CLCompileContext); } + case LayerType::Tile: + { + auto tileQueueDescriptor = PolymorphicDowncast<const TileQueueDescriptor*>(&descriptor); + return MakeWorkload<ClTileWorkload>(*tileQueueDescriptor, info, m_CLCompileContext); + } case LayerType::Transpose : { auto transposeQueueDescriptor = PolymorphicDowncast<const TransposeQueueDescriptor*>(&descriptor); diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk index 5ceab700e4..34993059c2 100644 --- a/src/backends/cl/backend.mk +++ b/src/backends/cl/backend.mk @@ -90,6 +90,7 @@ BACKEND_SOURCES := \ workloads/ClStackWorkload.cpp \ workloads/ClStridedSliceWorkload.cpp \ workloads/ClSubtractionWorkload.cpp \ + workloads/ClTileWorkload.cpp \ workloads/ClTransposeConvolution2dWorkload.cpp \ workloads/ClTransposeWorkload.cpp \ workloads/ClUnidirectionalSequenceLstmFloatWorkload.cpp diff --git a/src/backends/cl/test/ClEndToEndTests.cpp b/src/backends/cl/test/ClEndToEndTests.cpp index 2d7a813e60..50ba1dbb99 100644 --- a/src/backends/cl/test/ClEndToEndTests.cpp +++ b/src/backends/cl/test/ClEndToEndTests.cpp @@ -26,6 +26,7 @@ #include <backendsCommon/test/SpaceToDepthEndToEndTestImpl.hpp> #include <backendsCommon/test/SplitterEndToEndTestImpl.hpp> #include <backendsCommon/test/SubgraphUtilsTest.hpp> +#include <backendsCommon/test/TileEndToEndTestImpl.hpp> #include <backendsCommon/test/TransposeConvolution2dEndToEndTestImpl.hpp> #include <backendsCommon/test/TransposeEndToEndTestImpl.hpp> @@ -466,6 +467,42 @@ TEST_CASE("ClSplitter4dDim3EndToEndUint8Test") Splitter4dDim3EndToEnd<armnn::DataType::QAsymmU8>(clDefaultBackends); } +// Tile +TEST_CASE("ClTileEndToEndFloat32") +{ + TileEndToEnd<armnn::DataType::Float32>(clDefaultBackends); +} + +TEST_CASE("ClTileEndToEndFloat16") +{ + TileEndToEnd<armnn::DataType::Float16>(clDefaultBackends); +} + +TEST_CASE("ClTileEndToEndQAsymmS8") +{ + TileEndToEnd<armnn::DataType::QAsymmS8>(clDefaultBackends); +} + +TEST_CASE("ClTileEndToEndQAsymmU8") +{ + TileEndToEnd<armnn::DataType::QAsymmU8>(clDefaultBackends); +} + +TEST_CASE("ClTileEndToEndQSymmS8") +{ + TileEndToEnd<armnn::DataType::QSymmS8>(clDefaultBackends); +} + +TEST_CASE("ClTileEndToEndQSymmS16") +{ + TileEndToEnd<armnn::DataType::QSymmS16>(clDefaultBackends); +} + +TEST_CASE("ClTileEndToEndSigned32") +{ + TileEndToEnd<armnn::DataType::Signed32>(clDefaultBackends); +} + // TransposeConvolution2d TEST_CASE("ClTransposeConvolution2dEndToEndFloatNchwTest") { diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp index 93ad653441..d8d451ead3 100644 --- a/src/backends/cl/test/ClLayerTests.cpp +++ b/src/backends/cl/test/ClLayerTests.cpp @@ -2033,6 +2033,24 @@ ARMNN_AUTO_TEST_FIXTURE(DequantizeSimpleInt16, ClContextControlFixture, Dequanti ARMNN_AUTO_TEST_FIXTURE(DequantizeSimpleUint8ToFp16, ClContextControlFixture, DequantizeSimpleUint8ToFp16Test) ARMNN_AUTO_TEST_FIXTURE(DequantizeSimpleInt16ToFp16, ClContextControlFixture, DequantizeSimpleInt16ToFp16Test) +//Tile +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile1dTestFloat32, ClContextControlFixture, Tile1dTest<DataType::Float32>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile2dTestFloat32, ClContextControlFixture, Tile2dTest<DataType::Float32>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile3dTestFloat32, ClContextControlFixture, Tile3dTest<DataType::Float32>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile4dTestFloat32, ClContextControlFixture, Tile4dTest<DataType::Float32>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile1dTestFloat16, ClContextControlFixture, Tile1dTest<DataType::Float16>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile2dTestFloat16, ClContextControlFixture, Tile2dTest<DataType::Float16>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile3dTestFloat16, ClContextControlFixture, Tile3dTest<DataType::Float16>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile4dTestFloat16, ClContextControlFixture, Tile4dTest<DataType::Float16>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile1dTestInt8, ClContextControlFixture, Tile1dTest<DataType::QAsymmS8>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile2dTestInt8, ClContextControlFixture, Tile2dTest<DataType::QAsymmS8>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile3dTestInt8, ClContextControlFixture, Tile3dTest<DataType::QAsymmS8>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile4dTestInt8, ClContextControlFixture, Tile4dTest<DataType::QAsymmS8>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile1dTestUint8, ClContextControlFixture, Tile1dTest<DataType::QAsymmU8>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile2dTestUint8, ClContextControlFixture, Tile2dTest<DataType::QAsymmU8>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile3dTestUint8, ClContextControlFixture, Tile3dTest<DataType::QAsymmU8>) +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile4dTestUint8, ClContextControlFixture, Tile4dTest<DataType::QAsymmU8>) + // Transpose ARMNN_AUTO_TEST_FIXTURE_WITH_THF( SimpleTransposeFloat32, ClContextControlFixture, SimpleTransposeTest<DataType::Float32>) diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt index 030d71988f..cb16ab19e8 100644 --- a/src/backends/cl/workloads/CMakeLists.txt +++ b/src/backends/cl/workloads/CMakeLists.txt @@ -131,6 +131,8 @@ list(APPEND armnnClBackendWorkloads_sources ClStridedSliceWorkload.hpp ClSubtractionWorkload.cpp ClSubtractionWorkload.hpp + ClTileWorkload.cpp + ClTileWorkload.hpp ClTransposeConvolution2dWorkload.cpp ClTransposeConvolution2dWorkload.hpp ClTransposeWorkload.cpp diff --git a/src/backends/cl/workloads/ClTileWorkload.cpp b/src/backends/cl/workloads/ClTileWorkload.cpp new file mode 100644 index 0000000000..2c2f63faac --- /dev/null +++ b/src/backends/cl/workloads/ClTileWorkload.cpp @@ -0,0 +1,50 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClTileWorkload.hpp" +#include "ClWorkloadUtils.hpp" +#include <aclCommon/ArmComputeUtils.hpp> +#include <cl/ClTensorHandle.hpp> +#include <vector> +#include <algorithm> + +using namespace armnn::armcomputetensorutils; +namespace armnn +{ +arm_compute::Status ClTileWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TileDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); + + std::vector<uint32_t> aclMultiples = descriptor.m_Multiples; + std::reverse(aclMultiples.begin(),aclMultiples.end()); + + return arm_compute::CLTile::validate(&aclInput, &aclOutput, aclMultiples); +} + +ClTileWorkload::ClTileWorkload(const armnn::TileQueueDescriptor& descriptor, + const armnn::WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) + : BaseWorkload<TileQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClTileWorkload", 1, 1); + + std::vector<uint32_t> aclMultiples = descriptor.m_Parameters.m_Multiples; + std::reverse(aclMultiples.begin(),aclMultiples.end()); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(clCompileContext, &input, &output, aclMultiples); +} + +void ClTileWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClTileWorkload_Execute", this->GetGuid()); + m_Layer.run(); +} + +} //namespace armnn
\ No newline at end of file diff --git a/src/backends/cl/workloads/ClTileWorkload.hpp b/src/backends/cl/workloads/ClTileWorkload.hpp new file mode 100644 index 0000000000..0f68559f85 --- /dev/null +++ b/src/backends/cl/workloads/ClTileWorkload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once +#include "ClBaseWorkload.hpp" +#include <arm_compute/runtime/CL/functions/CLTile.h> + +namespace armnn +{ +arm_compute::Status ClTileWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TileDescriptor& descriptor); + +class ClTileWorkload : public BaseWorkload<TileQueueDescriptor> { +public: + ClTileWorkload(const TileQueueDescriptor &descriptor, + const WorkloadInfo &info, + const arm_compute::CLCompileContext& clCompileContext); + void Execute() const override; + +private: + mutable arm_compute::CLTile m_Layer; +}; + +} //namespace armnn
\ No newline at end of file diff --git a/src/backends/cl/workloads/ClWorkloads.hpp b/src/backends/cl/workloads/ClWorkloads.hpp index d862aab949..cec8706911 100644 --- a/src/backends/cl/workloads/ClWorkloads.hpp +++ b/src/backends/cl/workloads/ClWorkloads.hpp @@ -68,6 +68,7 @@ #include "ClSubtractionWorkload.hpp" #include "ClConvertFp16ToFp32Workload.hpp" #include "ClConvertFp32ToFp16Workload.hpp" +#include "ClTileWorkload.hpp" #include "ClTransposeConvolution2dWorkload.hpp" #include "ClTransposeWorkload.hpp" #include "ClUnidirectionalSequenceLstmFloatWorkload.hpp" |