aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Jackson <matthew.jackson@arm.com>2019-07-11 12:07:09 +0100
committerÁron Virginás-Tar <aron.virginas-tar@arm.com>2019-07-17 14:31:02 +0000
commit81e601c5a5ebf3de3dd6418942708158de50252a (patch)
tree48307f6d49639d7bc9bfa2db96a2de33d1095861
parent01bfd1781a18508577b9135408465ee76f346ae5 (diff)
downloadarmnn-81e601c5a5ebf3de3dd6418942708158de50252a.tar.gz
IVGCVSW-3419 Add reference workload support for the new Stack layer
* Added reference workload for the Stack layer * Added factory methods * Added validation support * Added unit tests Signed-off-by: Matthew Jackson <matthew.jackson@arm.com> Change-Id: Ib14b72c15f53a2a2ca152afc357ce2aa405ccc88
-rw-r--r--include/armnn/ILayerSupport.hpp2
-rw-r--r--src/armnn/test/CreateWorkload.hpp47
-rw-r--r--src/backends/backendsCommon/LayerSupportBase.cpp2
-rw-r--r--src/backends/backendsCommon/LayerSupportBase.hpp2
-rw-r--r--src/backends/backendsCommon/test/LayerTests.hpp386
-rw-r--r--src/backends/reference/RefLayerSupport.cpp31
-rw-r--r--src/backends/reference/RefLayerSupport.hpp5
-rw-r--r--src/backends/reference/RefWorkloadFactory.cpp10
-rw-r--r--src/backends/reference/RefWorkloadFactory.hpp3
-rw-r--r--src/backends/reference/backend.mk2
-rw-r--r--src/backends/reference/test/RefCreateWorkloadTests.cpp37
-rw-r--r--src/backends/reference/test/RefLayerTests.cpp7
-rw-r--r--src/backends/reference/workloads/CMakeLists.txt4
-rw-r--r--src/backends/reference/workloads/RefStackWorkload.cpp57
-rw-r--r--src/backends/reference/workloads/RefStackWorkload.hpp22
-rw-r--r--src/backends/reference/workloads/RefWorkloads.hpp1
-rw-r--r--src/backends/reference/workloads/Stack.cpp115
-rw-r--r--src/backends/reference/workloads/Stack.hpp20
18 files changed, 750 insertions, 3 deletions
diff --git a/include/armnn/ILayerSupport.hpp b/include/armnn/ILayerSupport.hpp
index 3cc6eabe9f..4301f9a196 100644
--- a/include/armnn/ILayerSupport.hpp
+++ b/include/armnn/ILayerSupport.hpp
@@ -271,7 +271,7 @@ public:
const ViewsDescriptor& descriptor,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
- virtual bool IsStackSupported(const std::vector<const TensorInfo*> inputs,
+ virtual bool IsStackSupported(const std::vector<const TensorInfo*>& inputs,
const TensorInfo& output,
const StackDescriptor& descriptor,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp
index 834aa0e620..4181db26d0 100644
--- a/src/armnn/test/CreateWorkload.hpp
+++ b/src/armnn/test/CreateWorkload.hpp
@@ -1376,4 +1376,51 @@ std::unique_ptr<SpaceToDepthWorkload> CreateSpaceToDepthWorkloadTest(armnn::IWor
return workload;
}
+template <typename StackWorkload>
+std::unique_ptr<StackWorkload> CreateStackWorkloadTest(armnn::IWorkloadFactory& factory,
+ armnn::Graph& graph,
+ const armnn::TensorShape& inputShape,
+ const armnn::TensorShape& outputShape,
+ unsigned int axis,
+ unsigned int numInputs,
+ armnn::DataType dataType)
+{
+ armnn::TensorInfo inputTensorInfo(inputShape, dataType);
+ armnn::TensorInfo outputTensorInfo(outputShape, dataType);
+
+ // Constructs the Stack layer.
+ armnn::StackDescriptor descriptor(axis, numInputs, inputShape);
+ Layer* const stackLayer = graph.AddLayer<StackLayer>(descriptor, "stack");
+ BOOST_CHECK(stackLayer != nullptr);
+
+ // Constructs layer inputs and output.
+ std::vector<Layer*> inputs;
+ for (unsigned int i=0; i<numInputs; ++i)
+ {
+ inputs.push_back(graph.AddLayer<InputLayer>(
+ static_cast<int>(i),
+ ("input" + std::to_string(i)).c_str()
+ ));
+ BOOST_CHECK(inputs[i] != nullptr);
+ }
+ Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
+ BOOST_CHECK(output != nullptr);
+
+ // Adds connections.
+ for (unsigned int i=0; i<numInputs; ++i)
+ {
+ Connect(inputs[i], stackLayer, inputTensorInfo, 0, i);
+ }
+ Connect(stackLayer, output, outputTensorInfo, 0, 0);
+
+ CreateTensorHandles(graph, factory);
+
+ auto stackWorkload = MakeAndCheckWorkload<StackWorkload>(*stackLayer, graph, factory);
+ StackQueueDescriptor queueDescriptor = stackWorkload->GetData();
+ BOOST_TEST(queueDescriptor.m_Inputs.size() == numInputs);
+ BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
+
+ return stackWorkload;
+}
+
} // Anonymous namespace
diff --git a/src/backends/backendsCommon/LayerSupportBase.cpp b/src/backends/backendsCommon/LayerSupportBase.cpp
index 26b98a22b2..e843423a92 100644
--- a/src/backends/backendsCommon/LayerSupportBase.cpp
+++ b/src/backends/backendsCommon/LayerSupportBase.cpp
@@ -415,7 +415,7 @@ bool LayerSupportBase::IsSplitterSupported(const TensorInfo& input,
return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
}
-bool LayerSupportBase::IsStackSupported(const std::vector<const TensorInfo*> inputs,
+bool LayerSupportBase::IsStackSupported(const std::vector<const TensorInfo*>& inputs,
const TensorInfo& output,
const StackDescriptor& descriptor,
Optional<std::string&> reasonIfUnsupported) const
diff --git a/src/backends/backendsCommon/LayerSupportBase.hpp b/src/backends/backendsCommon/LayerSupportBase.hpp
index dad07981fd..d49fc3e371 100644
--- a/src/backends/backendsCommon/LayerSupportBase.hpp
+++ b/src/backends/backendsCommon/LayerSupportBase.hpp
@@ -257,7 +257,7 @@ public:
const ViewsDescriptor& descriptor,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
- bool IsStackSupported(const std::vector<const TensorInfo*> inputs,
+ bool IsStackSupported(const std::vector<const TensorInfo*>& inputs,
const TensorInfo& output,
const StackDescriptor& descriptor,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp
index d6747f5898..3db826f379 100644
--- a/src/backends/backendsCommon/test/LayerTests.hpp
+++ b/src/backends/backendsCommon/test/LayerTests.hpp
@@ -4483,3 +4483,389 @@ LayerTestResult<int16_t, 4> UnbiasedStridedTransposeConvolution2dInt16NchwTest(
LayerTestResult<int16_t, 4> UnbiasedStridedTransposeConvolution2dInt16NhwcTest(
armnn::IWorkloadFactory& workloadFactory,
const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+template<armnn::DataType ArmnnType, typename T, std::size_t outputDimLength>
+LayerTestResult<T, outputDimLength> StackTestHelper(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::TensorInfo& inputTensorInfo,
+ const armnn::TensorInfo& outputTensorInfo,
+ unsigned int axis,
+ const std::vector<std::vector<T>>& inputData,
+ const std::vector<T>& outputExpectedData)
+{
+ unsigned int numInputs = static_cast<unsigned int>(inputData.size());
+ std::vector<boost::multi_array<T, outputDimLength-1>> inputs;
+ for (unsigned int i = 0; i < numInputs; ++i)
+ {
+ inputs.push_back(MakeTensor<T, outputDimLength-1>(inputTensorInfo, inputData[i]));
+ }
+
+ LayerTestResult<T, outputDimLength> result(outputTensorInfo);
+ result.outputExpected = MakeTensor<T, outputDimLength>(outputTensorInfo, outputExpectedData);
+
+ std::vector<std::unique_ptr<armnn::ITensorHandle>> inputHandles;
+ for (unsigned int i = 0; i < numInputs; ++i)
+ {
+ inputHandles.push_back(workloadFactory.CreateTensorHandle(inputTensorInfo));
+ }
+ std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+ armnn::StackQueueDescriptor descriptor;
+ descriptor.m_Parameters.m_Axis = axis;
+ descriptor.m_Parameters.m_InputShape = inputTensorInfo.GetShape();
+ descriptor.m_Parameters.m_NumInputs = numInputs;
+
+ armnn::WorkloadInfo info;
+ for (unsigned int i = 0; i < numInputs; ++i)
+ {
+ std::unique_ptr<armnn::ITensorHandle>& inputHandle = inputHandles[i];
+ AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+ inputHandle->Allocate();
+ CopyDataToITensorHandle(inputHandle.get(), inputs[i].origin());
+ }
+
+ AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+ outputHandle->Allocate();
+
+ std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateStack(descriptor, info);
+
+ workload->Execute();
+
+ CopyDataFromITensorHandle(result.output.origin(), outputHandle.get());
+
+ return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Stack0AxisTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+ armnn::TensorInfo inputTensorInfo ({ 3, 2, 3 }, ArmnnType);
+ armnn::TensorInfo outputTensorInfo({ 2, 3, 2, 3 }, ArmnnType);
+
+ std::vector<std::vector<T>> inputData;
+
+ inputData.push_back(
+ {
+ 1, 2, 3,
+ 4, 5, 6,
+
+ 7, 8, 9,
+ 10, 11, 12,
+
+ 13, 14, 15,
+ 16, 17, 18
+ });
+
+ inputData.push_back(
+ {
+ 19, 20, 21,
+ 22, 23, 24,
+
+ 25, 26, 27,
+ 28, 29, 30,
+
+ 31, 32, 33,
+ 34, 35, 36
+ });
+
+ std::vector<T> outputExpectedData =
+ {
+ 1, 2, 3,
+ 4, 5, 6,
+
+ 7, 8, 9,
+ 10, 11, 12,
+
+ 13, 14, 15,
+ 16, 17, 18,
+
+
+ 19, 20, 21,
+ 22, 23, 24,
+
+ 25, 26, 27,
+ 28, 29, 30,
+
+ 31, 32, 33,
+ 34, 35, 36
+ };
+
+ return StackTestHelper<ArmnnType, T, 4>(
+ workloadFactory,
+ memoryManager,
+ inputTensorInfo,
+ outputTensorInfo,
+ 0U,
+ inputData,
+ outputExpectedData
+ );
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Stack4dOutput1AxisTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+ armnn::TensorInfo inputTensorInfo ({ 3, 2, 3 }, ArmnnType);
+ armnn::TensorInfo outputTensorInfo({ 3, 2, 2, 3 }, ArmnnType);
+
+ std::vector<std::vector<T>> inputData;
+
+ inputData.push_back(
+ {
+ 1, 2, 3,
+ 4, 5, 6,
+
+ 7, 8, 9,
+ 10, 11, 12,
+
+ 13, 14, 15,
+ 16, 17, 18
+ });
+
+ inputData.push_back(
+ {
+ 19, 20, 21,
+ 22, 23, 24,
+
+ 25, 26, 27,
+ 28, 29, 30,
+
+ 31, 32, 33,
+ 34, 35, 36
+ });
+
+ std::vector<T> outputExpectedData =
+ {
+ 1, 2, 3,
+ 4, 5, 6,
+
+ 19, 20, 21,
+ 22, 23, 24,
+
+
+ 7, 8, 9,
+ 10, 11, 12,
+
+ 25, 26, 27,
+ 28, 29, 30,
+
+
+ 13, 14, 15,
+ 16, 17, 18,
+
+ 31, 32, 33,
+ 34, 35, 36
+ };
+
+ return StackTestHelper<ArmnnType, T, 4>(
+ workloadFactory,
+ memoryManager,
+ inputTensorInfo,
+ outputTensorInfo,
+ 1U,
+ inputData,
+ outputExpectedData
+ );
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Stack4dOutput2AxisTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+ armnn::TensorInfo inputTensorInfo ({ 3, 2, 3 }, ArmnnType);
+ armnn::TensorInfo outputTensorInfo({ 3, 2, 2, 3 }, ArmnnType);
+
+ std::vector<std::vector<T>> inputData;
+
+ inputData.push_back(
+ {
+ 1, 2, 3,
+ 4, 5, 6,
+
+ 7, 8, 9,
+ 10, 11, 12,
+
+ 13, 14, 15,
+ 16, 17, 18
+ });
+
+ inputData.push_back(
+ {
+ 19, 20, 21,
+ 22, 23, 24,
+
+ 25, 26, 27,
+ 28, 29, 30,
+
+ 31, 32, 33,
+ 34, 35, 36
+ });
+
+ std::vector<T> outputExpectedData =
+ {
+ 1, 2, 3,
+ 19, 20, 21,
+
+ 4, 5, 6,
+ 22, 23, 24,
+
+
+ 7, 8, 9,
+ 25, 26, 27,
+
+ 10, 11, 12,
+ 28, 29, 30,
+
+ 13, 14, 15,
+ 31, 32, 33,
+
+ 16, 17, 18,
+ 34, 35, 36
+ };
+
+ return StackTestHelper<ArmnnType, T, 4>(
+ workloadFactory,
+ memoryManager,
+ inputTensorInfo,
+ outputTensorInfo,
+ 2U,
+ inputData,
+ outputExpectedData
+ );
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Stack4dOutput3AxisTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+ armnn::TensorInfo inputTensorInfo ({ 3, 2, 3 }, ArmnnType);
+ armnn::TensorInfo outputTensorInfo({ 3, 2, 3, 2 }, ArmnnType);
+
+ std::vector<std::vector<T>> inputData;
+
+ inputData.push_back(
+ {
+ 1, 2, 3,
+ 4, 5, 6,
+
+ 7, 8, 9,
+ 10, 11, 12,
+
+ 13, 14, 15,
+ 16, 17, 18
+ });
+
+ inputData.push_back(
+ {
+ 19, 20, 21,
+ 22, 23, 24,
+
+ 25, 26, 27,
+ 28, 29, 30,
+
+ 31, 32, 33,
+ 34, 35, 36
+ });
+
+ std::vector<T> outputExpectedData =
+ {
+ 1, 19,
+ 2, 20,
+ 3, 21,
+
+ 4, 22,
+ 5, 23,
+ 6, 24,
+
+
+ 7, 25,
+ 8, 26,
+ 9, 27,
+
+ 10, 28,
+ 11, 29,
+ 12, 30,
+
+
+ 13, 31,
+ 14, 32,
+ 15, 33,
+
+ 16, 34,
+ 17, 35,
+ 18, 36
+ };
+
+ return StackTestHelper<ArmnnType, T, 4>(
+ workloadFactory,
+ memoryManager,
+ inputTensorInfo,
+ outputTensorInfo,
+ 3U,
+ inputData,
+ outputExpectedData
+ );
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 3> Stack3dOutput1Axis3InputTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+ armnn::TensorInfo inputTensorInfo ({ 3, 3 }, ArmnnType);
+ armnn::TensorInfo outputTensorInfo({ 3, 3, 3 }, ArmnnType);
+
+ std::vector<std::vector<T>> inputData;
+
+ inputData.push_back(
+ {
+ 1, 2, 3,
+ 4, 5, 6,
+ 7, 8, 9
+ });
+
+ inputData.push_back(
+ {
+ 10, 11, 12,
+ 13, 14, 15,
+ 16, 17, 18
+ });
+
+ inputData.push_back(
+ {
+ 19, 20, 21,
+ 22, 23, 24,
+ 25, 26, 27
+ });
+
+ std::vector<T> outputExpectedData =
+ {
+ 1, 2, 3,
+ 10, 11, 12,
+ 19, 20, 21,
+
+ 4, 5, 6,
+ 13, 14, 15,
+ 22, 23, 24,
+
+ 7, 8, 9,
+ 16, 17, 18,
+ 25, 26, 27
+ };
+
+ return StackTestHelper<ArmnnType, T, 3>(
+ workloadFactory,
+ memoryManager,
+ inputTensorInfo,
+ outputTensorInfo,
+ 1U,
+ inputData,
+ outputExpectedData
+ );
+}
diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
index 59c14c4490..b9aa126a8c 100644
--- a/src/backends/reference/RefLayerSupport.cpp
+++ b/src/backends/reference/RefLayerSupport.cpp
@@ -419,6 +419,7 @@ bool RefLayerSupport::IsConcatSupported(const std::vector<const TensorInfo*> inp
"Reference concatenation: output type not supported");
for (const TensorInfo* input : inputs)
{
+ BOOST_ASSERT(input != nullptr);
supported &= CheckSupportRule(TypeAnyOf(*input, supportedTypes), reasonIfUnsupported,
"Reference concatenation: input type not supported");
@@ -1592,6 +1593,36 @@ bool RefLayerSupport::IsSplitterSupported(const TensorInfo& input,
return supported;
}
+bool RefLayerSupport::IsStackSupported(const std::vector<const TensorInfo*>& inputs,
+ const TensorInfo& output,
+ const StackDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported) const
+{
+ ignore_unused(descriptor);
+
+ bool supported = true;
+ std::array<DataType,3> supportedTypes =
+ {
+ DataType::Float32,
+ DataType::QuantisedAsymm8,
+ DataType::QuantisedSymm16
+ };
+
+ supported &= CheckSupportRule(TypeAnyOf(output, supportedTypes), reasonIfUnsupported,
+ "Reference stack: output type not supported");
+ for (const TensorInfo* input : inputs)
+ {
+ BOOST_ASSERT(input != nullptr);
+ supported &= CheckSupportRule(TypeAnyOf(*input, supportedTypes), reasonIfUnsupported,
+ "Reference stack: input type not supported");
+
+ supported &= CheckSupportRule(TypesAreEqual(*input, output), reasonIfUnsupported,
+ "Reference stack: input and output types mismatched.");
+ }
+
+ return supported;
+}
+
bool RefLayerSupport::IsStridedSliceSupported(const TensorInfo& input,
const TensorInfo& output,
const StridedSliceDescriptor& descriptor,
diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp
index c0bf18824e..f8bbeb78bf 100644
--- a/src/backends/reference/RefLayerSupport.hpp
+++ b/src/backends/reference/RefLayerSupport.hpp
@@ -241,6 +241,11 @@ public:
const ViewsDescriptor& descriptor,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+ bool IsStackSupported(const std::vector<const TensorInfo*>& inputs,
+ const TensorInfo& output,
+ const StackDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
bool IsStridedSliceSupported(const TensorInfo& input,
const TensorInfo& output,
const StridedSliceDescriptor& descriptor,
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index 183103c40c..925eb6ad90 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -506,4 +506,14 @@ std::unique_ptr<IWorkload> RefWorkloadFactory::CreateTransposeConvolution2d(
return std::make_unique<RefTransposeConvolution2dWorkload>(descriptor, info);
}
+std::unique_ptr<IWorkload> RefWorkloadFactory::CreateStack(const StackQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const
+{
+ if (IsFloat16(info))
+ {
+ return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
+ }
+ return std::make_unique<RefStackWorkload>(descriptor, info);
+}
+
} // namespace armnn
diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp
index 9ef15221ef..b012fbc6f6 100644
--- a/src/backends/reference/RefWorkloadFactory.hpp
+++ b/src/backends/reference/RefWorkloadFactory.hpp
@@ -203,6 +203,9 @@ public:
std::unique_ptr<IWorkload> CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info) const override;
+ std::unique_ptr<IWorkload> CreateStack(const StackQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const override;
+
private:
template <typename F32Workload, typename U8Workload, typename QueueDescriptorType>
diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk
index 411ab7e615..6e1360a1db 100644
--- a/src/backends/reference/backend.mk
+++ b/src/backends/reference/backend.mk
@@ -62,6 +62,7 @@ BACKEND_SOURCES := \
workloads/RefSoftmaxWorkload.cpp \
workloads/RefSpaceToBatchNdWorkload.cpp \
workloads/RefSpaceToDepthWorkload.cpp \
+ workloads/RefStackWorkload.cpp \
workloads/RefStridedSliceWorkload.cpp \
workloads/RefSplitterWorkload.cpp \
workloads/RefTransposeConvolution2dWorkload.cpp \
@@ -69,6 +70,7 @@ BACKEND_SOURCES := \
workloads/Rsqrt.cpp \
workloads/SpaceToBatchNd.cpp \
workloads/SpaceToDepth.cpp \
+ workloads/Stack.cpp \
workloads/StridedSlice.cpp \
workloads/StringMapping.cpp \
workloads/Softmax.cpp \
diff --git a/src/backends/reference/test/RefCreateWorkloadTests.cpp b/src/backends/reference/test/RefCreateWorkloadTests.cpp
index 2fa6cbfd5d..f7999d0ffe 100644
--- a/src/backends/reference/test/RefCreateWorkloadTests.cpp
+++ b/src/backends/reference/test/RefCreateWorkloadTests.cpp
@@ -990,4 +990,41 @@ BOOST_AUTO_TEST_CASE(CreateSpaceToDepthWorkloadQSymm16)
RefCreateSpaceToDepthWorkloadTest<RefSpaceToDepthWorkload, armnn::DataType::QuantisedSymm16>();
}
+static void RefCreateStackWorkloadTest(const armnn::TensorShape& inputShape,
+ const armnn::TensorShape& outputShape,
+ unsigned int axis,
+ unsigned int numInputs,
+ armnn::DataType dataType)
+{
+ armnn::Graph graph;
+ RefWorkloadFactory factory;
+ auto workload = CreateStackWorkloadTest<RefStackWorkload>(factory,
+ graph,
+ inputShape,
+ outputShape,
+ axis,
+ numInputs,
+ dataType);
+
+ // Check output is as expected
+ auto queueDescriptor = workload->GetData();
+ auto outputHandle = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
+ BOOST_TEST((outputHandle->GetTensorInfo() == TensorInfo(outputShape, dataType)));
+}
+
+BOOST_AUTO_TEST_CASE(CreateStackFloat32Workload)
+{
+ RefCreateStackWorkloadTest({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2, armnn::DataType::Float32);
+}
+
+BOOST_AUTO_TEST_CASE(CreateStackUint8Workload)
+{
+ RefCreateStackWorkloadTest({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2, armnn::DataType::QuantisedAsymm8);
+}
+
+BOOST_AUTO_TEST_CASE(CreateStackUint16Workload)
+{
+ RefCreateStackWorkloadTest({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2, armnn::DataType::QuantisedSymm16);
+}
+
BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index 509dbf7960..4f46d27883 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -1175,4 +1175,11 @@ ARMNN_AUTO_TEST_CASE(UnbiasedStridedTransposeConvolution2dUint8Nchw, UnbiasedStr
ARMNN_AUTO_TEST_CASE(UnbiasedStridedTransposeConvolution2dInt16Nhwc, UnbiasedStridedTransposeConvolution2dInt16NhwcTest)
ARMNN_AUTO_TEST_CASE(UnbiasedStridedTransposeConvolution2dInt16Nchw, UnbiasedStridedTransposeConvolution2dInt16NchwTest)
+// Stack
+ARMNN_AUTO_TEST_CASE(Stack0Axis, Stack0AxisTest<armnn::DataType::Float32>)
+ARMNN_AUTO_TEST_CASE(Stack4dOutput1Axis, Stack4dOutput1AxisTest<armnn::DataType::Float32>)
+ARMNN_AUTO_TEST_CASE(Stack4dOutput2Axis, Stack4dOutput2AxisTest<armnn::DataType::Float32>)
+ARMNN_AUTO_TEST_CASE(Stack4dOutput3Axis, Stack4dOutput3AxisTest<armnn::DataType::Float32>)
+ARMNN_AUTO_TEST_CASE(Stack3dOutput1Axis3Input, Stack3dOutput1Axis3InputTest<armnn::DataType::Float32>)
+
BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index 696605d662..c9db057be5 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -109,6 +109,8 @@ list(APPEND armnnRefBackendWorkloads_sources
RefSpaceToDepthWorkload.hpp
RefSplitterWorkload.cpp
RefSplitterWorkload.hpp
+ RefStackWorkload.cpp
+ RefStackWorkload.hpp
RefStridedSliceWorkload.cpp
RefStridedSliceWorkload.hpp
RefTransposeConvolution2dWorkload.cpp
@@ -127,6 +129,8 @@ list(APPEND armnnRefBackendWorkloads_sources
SpaceToDepth.cpp
Splitter.hpp
Splitter.cpp
+ Stack.cpp
+ Stack.hpp
StridedSlice.hpp
StridedSlice.cpp
StringMapping.cpp
diff --git a/src/backends/reference/workloads/RefStackWorkload.cpp b/src/backends/reference/workloads/RefStackWorkload.cpp
new file mode 100644
index 0000000000..be36f40633
--- /dev/null
+++ b/src/backends/reference/workloads/RefStackWorkload.cpp
@@ -0,0 +1,57 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefStackWorkload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+#include "Stack.hpp"
+
+#include <Profiling.hpp>
+
+namespace armnn
+{
+
+RefStackWorkload::RefStackWorkload(const StackQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : BaseWorkload(descriptor, info)
+{}
+
+void RefStackWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefStackWorkload_Execute");
+
+ // Can perform a simple concatenation when axis == 0
+ if (!m_Data.m_Parameters.m_Axis)
+ {
+ float* output = GetOutputTensorData<float>(0, m_Data);
+ BOOST_ASSERT(output != nullptr);
+
+ unsigned int numInputs = m_Data.m_Parameters.m_NumInputs;
+ unsigned int inputLength = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements();
+
+ for (unsigned int inputIdx=0; inputIdx<numInputs; ++inputIdx)
+ {
+ const float* input = GetInputTensorData<float>(inputIdx, m_Data);
+ for (unsigned int elmt=0; elmt<inputLength; ++elmt)
+ {
+ output[(inputIdx * inputLength) + elmt] = input[elmt];
+ }
+ }
+ return;
+ }
+
+ std::vector<std::unique_ptr<Decoder<float>>> inputDecoders;
+ for (unsigned int i=0; i<m_Data.m_Inputs.size(); ++i)
+ {
+ inputDecoders.push_back(MakeDecoder<float>(GetTensorInfo(m_Data.m_Inputs[i]),
+ m_Data.m_Inputs[i]->Map()));
+ }
+ std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(m_Data.m_Outputs[0]),
+ m_Data.m_Outputs[0]->Map());
+
+ Stack(m_Data, inputDecoders, *outputEncoder);
+}
+
+} // namespace armnn
diff --git a/src/backends/reference/workloads/RefStackWorkload.hpp b/src/backends/reference/workloads/RefStackWorkload.hpp
new file mode 100644
index 0000000000..ceb27d9f60
--- /dev/null
+++ b/src/backends/reference/workloads/RefStackWorkload.hpp
@@ -0,0 +1,22 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+#include <backendsCommon/WorkloadData.hpp>
+
+namespace armnn
+{
+
+class RefStackWorkload : public BaseWorkload<StackQueueDescriptor>
+{
+public:
+ explicit RefStackWorkload(const StackQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+ virtual void Execute() const override;
+};
+
+} // namespace armnn
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index 4bdf05daa8..e86dccd5bf 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -46,6 +46,7 @@
#include "RefSplitterWorkload.hpp"
#include "RefSoftmaxWorkload.hpp"
#include "RefSpaceToBatchNdWorkload.hpp"
+#include "RefStackWorkload.hpp"
#include "RefStridedSliceWorkload.hpp"
#include "RefSpaceToDepthWorkload.hpp"
#include "RefTransposeConvolution2dWorkload.hpp"
diff --git a/src/backends/reference/workloads/Stack.cpp b/src/backends/reference/workloads/Stack.cpp
new file mode 100644
index 0000000000..386c8992eb
--- /dev/null
+++ b/src/backends/reference/workloads/Stack.cpp
@@ -0,0 +1,115 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "Stack.hpp"
+#include "RefWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+void Stack(const StackQueueDescriptor& data,
+ std::vector<std::unique_ptr<Decoder<float>>>& inputs,
+ Encoder<float>& output)
+{
+ const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]);
+
+ unsigned int outputNumDims = outputInfo.GetNumDimensions();
+ unsigned int inputNumDims = inputInfo.GetNumDimensions();
+
+ const armnn::TensorShape& outputDims = outputInfo.GetShape();
+ const armnn::TensorShape& inputDims = inputInfo.GetShape();
+
+ unsigned int axis = data.m_Parameters.m_Axis;
+
+ // Initialise output data
+ unsigned int numOutputElements = 1;
+ for (unsigned int i=0; i<outputNumDims; ++i)
+ {
+ numOutputElements *= outputDims[i];
+ }
+
+ const unsigned int iNumTensors = static_cast<unsigned int>(data.m_Inputs.size());
+ const unsigned int iBatchSize = inputDims[0];
+ const unsigned int iChannels = (inputNumDims > 1) ? inputDims[1] : 1;
+ const unsigned int iHeight = (inputNumDims > 2) ? inputDims[2] : 1;
+ const unsigned int iWidth = (inputNumDims > 3) ? inputDims[3] : 1;
+
+ const unsigned int oBatchSize = outputDims[1];
+ const unsigned int oChannels = (outputNumDims > 2) ? outputDims[2] : 1;
+ const unsigned int oHeight = (outputNumDims > 3) ? outputDims[3] : 1;
+ const unsigned int oWidth = (outputNumDims > 4) ? outputDims[4] : 1;
+
+ // Array to store the input coordinates
+ // iCoordinates[0] = i, iCoordinates[1] = bi, iCoordinates[2] = ci
+ // iCoordinates[3] = hi, iCoordinates[4] = wi, iCoordinates[5] = 0
+ // iCoordinates[5] will be always zero and used for not incrementing
+ // the output when the input has less than 4 dimensions
+ std::array<unsigned int, 6> iCoordinates{ 0 };
+
+ // Array of pointers used to map the output coordinates to the input ones, in accordance with the axis
+ // This array is initialized with &iCoordinates[5] since this will be always zero
+ std::array<unsigned int *, 5> oCoordinates = { &iCoordinates[5],
+ &iCoordinates[5],
+ &iCoordinates[5],
+ &iCoordinates[5],
+ &iCoordinates[5] };
+
+ // Set the axis coordinate
+ oCoordinates[axis] = &iCoordinates[0];
+
+ // Map the output coordinates, accounting for the axis
+ unsigned int dim_shift = 0;
+ for(unsigned int dim = 0; dim < inputNumDims; ++dim)
+ {
+ if(dim == axis)
+ {
+ dim_shift++;
+ }
+ oCoordinates[dim + dim_shift] = &iCoordinates[dim + 1];
+ }
+
+ // Alias for the input coordinates
+ unsigned int &i = iCoordinates[0];
+ unsigned int &bi = iCoordinates[1];
+ unsigned int &ci = iCoordinates[2];
+ unsigned int &hi = iCoordinates[3];
+ unsigned int &wi = iCoordinates[4];
+
+ // Alias for the output coordinates
+ unsigned int &o = *(oCoordinates[0]);
+ unsigned int &bo = *(oCoordinates[1]);
+ unsigned int &co = *(oCoordinates[2]);
+ unsigned int &ho = *(oCoordinates[3]);
+ unsigned int &wo = *(oCoordinates[4]);
+
+ // Stack tensors
+ for(; i < iNumTensors; ++(i))
+ {
+ for(bi = 0; bi < iBatchSize; ++(bi))
+ {
+ for(ci = 0; ci < iChannels; ++(ci))
+ {
+ for(hi = 0; hi < iHeight; ++(hi))
+ {
+ for(wi = 0; wi < iWidth; ++(wi))
+ {
+ output[o * oWidth * oHeight * oChannels * oBatchSize +
+ bo * oWidth * oHeight * oChannels +
+ co * oWidth * oHeight +
+ ho * oWidth +
+ wo];
+
+ output.Set(inputs[i]->Get());
+
+ ++(*(inputs[i]));
+ }
+ }
+ }
+ }
+ }
+}
+
+} // namespace armnn
diff --git a/src/backends/reference/workloads/Stack.hpp b/src/backends/reference/workloads/Stack.hpp
new file mode 100644
index 0000000000..cd86d41552
--- /dev/null
+++ b/src/backends/reference/workloads/Stack.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "Encoders.hpp"
+#include "Decoders.hpp"
+
+#include <backendsCommon/WorkloadData.hpp>
+
+namespace armnn
+{
+
+void Stack (const StackQueueDescriptor& data,
+ std::vector<std::unique_ptr<Decoder<float>>>& inputs,
+ Encoder<float>& output);
+
+} // namespace armnn