aboutsummaryrefslogtreecommitdiff
path: root/src/backends/reference
diff options
context:
space:
mode:
authorAron Virginas-Tar <Aron.Virginas-Tar@arm.com>2019-06-26 15:02:47 +0100
committerÁron Virginás-Tar <aron.virginas-tar@arm.com>2019-06-27 11:52:47 +0000
commit735a450d3b53a2d745b9a7a6d85747e25ec37ede (patch)
tree4f5af0ddada102cb51fe1f4ba84e3ccf8f51c6ab /src/backends/reference
parent05bf054f40eb551ea76722163b6ed1a1fde7bbf0 (diff)
downloadarmnn-735a450d3b53a2d745b9a7a6d85747e25ec37ede.tar.gz
IVGCVSW-3320 Add reference workload support for TransposeConvolution2dLayer
Signed-off-by: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com> Change-Id: Icc64f8148c9d8a0d14d772e6e4e7865e70585cd9
Diffstat (limited to 'src/backends/reference')
-rw-r--r--src/backends/reference/CMakeLists.txt1
-rw-r--r--src/backends/reference/RefWorkloadFactory.cpp7
-rw-r--r--src/backends/reference/RefWorkloadFactory.hpp3
-rw-r--r--src/backends/reference/backend.mk4
-rw-r--r--src/backends/reference/test/RefLayerTests.cpp44
-rw-r--r--src/backends/reference/workloads/CMakeLists.txt16
-rw-r--r--src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp67
-rw-r--r--src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp43
-rw-r--r--src/backends/reference/workloads/RefWorkloads.hpp1
-rw-r--r--src/backends/reference/workloads/TransposeConvolution2d.cpp248
-rw-r--r--src/backends/reference/workloads/TransposeConvolution2d.hpp26
11 files changed, 452 insertions, 8 deletions
diff --git a/src/backends/reference/CMakeLists.txt b/src/backends/reference/CMakeLists.txt
index 82880cf524..ff16f185b4 100644
--- a/src/backends/reference/CMakeLists.txt
+++ b/src/backends/reference/CMakeLists.txt
@@ -11,7 +11,6 @@ list(APPEND armnnRefBackend_sources
RefLayerSupport.hpp
RefWorkloadFactory.cpp
RefWorkloadFactory.hpp
-
)
add_library(armnnRefBackend OBJECT ${armnnRefBackend_sources})
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index 035a28b71a..5ede8b3f02 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -464,4 +464,11 @@ std::unique_ptr<IWorkload> RefWorkloadFactory::CreatePrelu(const PreluQueueDescr
return std::make_unique<RefPreluWorkload>(descriptor, info);
}
+std::unique_ptr<IWorkload> RefWorkloadFactory::CreateTransposeConvolution2d(
+ const TransposeConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const
+{
+ return std::make_unique<RefTransposeConvolution2dWorkload>(descriptor, info);
+}
+
} // namespace armnn
diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp
index ed513e1e3b..44cb079ea7 100644
--- a/src/backends/reference/RefWorkloadFactory.hpp
+++ b/src/backends/reference/RefWorkloadFactory.hpp
@@ -193,6 +193,9 @@ public:
std::unique_ptr<IWorkload> CreatePrelu(const PreluQueueDescriptor& descriptor,
const WorkloadInfo& info) const override;
+ std::unique_ptr<IWorkload> CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const override;
+
private:
template <typename F32Workload, typename U8Workload, typename QueueDescriptorType>
diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk
index 849d87c4ca..6fb17b563f 100644
--- a/src/backends/reference/backend.mk
+++ b/src/backends/reference/backend.mk
@@ -60,6 +60,7 @@ BACKEND_SOURCES := \
workloads/RefSpaceToDepthWorkload.cpp \
workloads/RefStridedSliceWorkload.cpp \
workloads/RefSplitterWorkload.cpp \
+ workloads/RefTransposeConvolution2dWorkload.cpp \
workloads/ResizeBilinear.cpp \
workloads/Rsqrt.cpp \
workloads/SpaceToBatchNd.cpp \
@@ -67,7 +68,8 @@ BACKEND_SOURCES := \
workloads/StridedSlice.cpp \
workloads/StringMapping.cpp \
workloads/Softmax.cpp \
- workloads/Splitter.cpp
+ workloads/Splitter.cpp \
+ workloads/TransposeConvolution2d.cpp
# BACKEND_TEST_SOURCES contains the list of files to be included
# in the Android unit test build (armnn-tests) and it is picked
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index b997a14e9d..3556df18b1 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -10,6 +10,7 @@
#include <reference/RefWorkloadFactory.hpp>
#include <backendsCommon/test/DetectionPostProcessLayerTestImpl.hpp>
+#include <backendsCommon/test/TransposeConvolution2dTestImpl.hpp>
#include <backendsCommon/test/LayerTests.hpp>
#include <boost/test/unit_test.hpp>
@@ -966,4 +967,47 @@ ARMNN_AUTO_TEST_CASE(PreluFloat32, PreluTest<armnn::DataType::Float32>)
ARMNN_AUTO_TEST_CASE(PreluUint8, PreluTest<armnn::DataType::QuantisedAsymm8>)
ARMNN_AUTO_TEST_CASE(PreluInt16, PreluTest<armnn::DataType::QuantisedSymm16>)
+// TransposeConvolution2d
+ARMNN_AUTO_TEST_CASE(SimpleTransposeConvolution2dFloatNchw, SimpleTransposeConvolution2dFloatNchwTest)
+ARMNN_AUTO_TEST_CASE(SimpleTransposeConvolution2dFloatNhwc, SimpleTransposeConvolution2dFloatNhwcTest)
+ARMNN_AUTO_TEST_CASE(SimpleTransposeConvolution2dUint8Nchw, SimpleTransposeConvolution2dUint8NchwTest)
+ARMNN_AUTO_TEST_CASE(SimpleTransposeConvolution2dUint8Nhwc, SimpleTransposeConvolution2dUint8NhwcTest)
+ARMNN_AUTO_TEST_CASE(SimpleTransposeConvolution2dInt16Nchw, SimpleTransposeConvolution2dInt16NchwTest)
+ARMNN_AUTO_TEST_CASE(SimpleTransposeConvolution2dInt16Nhwc, SimpleTransposeConvolution2dInt16NhwcTest)
+
+ARMNN_AUTO_TEST_CASE(UnbiasedSimpleTransposeConvolution2dFloatNchw, UnbiasedSimpleTransposeConvolution2dFloatNchwTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedSimpleTransposeConvolution2dFloatNhwc, UnbiasedSimpleTransposeConvolution2dFloatNhwcTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedSimpleTransposeConvolution2dUint8Nchw, UnbiasedSimpleTransposeConvolution2dUint8NchwTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedSimpleTransposeConvolution2dUint8Nhwc, UnbiasedSimpleTransposeConvolution2dUint8NhwcTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedSimpleTransposeConvolution2dInt16Nchw, UnbiasedSimpleTransposeConvolution2dInt16NchwTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedSimpleTransposeConvolution2dInt16Nhwc, UnbiasedSimpleTransposeConvolution2dInt16NhwcTest)
+
+ARMNN_AUTO_TEST_CASE(PaddedTransposeConvolution2dFloatNchw, PaddedTransposeConvolution2dFloatNchwTest)
+ARMNN_AUTO_TEST_CASE(PaddedTransposeConvolution2dFloatNhwc, PaddedTransposeConvolution2dFloatNhwcTest)
+ARMNN_AUTO_TEST_CASE(PaddedTransposeConvolution2dUint8Nchw, PaddedTransposeConvolution2dUint8NchwTest)
+ARMNN_AUTO_TEST_CASE(PaddedTransposeConvolution2dUint8Nhwc, PaddedTransposeConvolution2dUint8NhwcTest)
+ARMNN_AUTO_TEST_CASE(PaddedTransposeConvolution2dInt16Nchw, PaddedTransposeConvolution2dInt16NchwTest)
+ARMNN_AUTO_TEST_CASE(PaddedTransposeConvolution2dInt16Nhwc, PaddedTransposeConvolution2dInt16NhwcTest)
+
+ARMNN_AUTO_TEST_CASE(UnbiasedPaddedTransposeConvolution2dFloatNchw, UnbiasedPaddedTransposeConvolution2dFloatNchwTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedPaddedTransposeConvolution2dFloatNhwc, UnbiasedPaddedTransposeConvolution2dFloatNhwcTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedPaddedTransposeConvolution2dUint8Nchw, UnbiasedPaddedTransposeConvolution2dUint8NchwTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedPaddedTransposeConvolution2dUint8Nhwc, UnbiasedPaddedTransposeConvolution2dUint8NhwcTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedPaddedTransposeConvolution2dInt16Nchw, UnbiasedPaddedTransposeConvolution2dInt16NchwTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedPaddedTransposeConvolution2dInt16Nhwc, UnbiasedPaddedTransposeConvolution2dInt16NhwcTest)
+
+ARMNN_AUTO_TEST_CASE(StridedTransposeConvolution2dFloatNhwc, StridedTransposeConvolution2dFloatNhwcTest)
+ARMNN_AUTO_TEST_CASE(StridedTransposeConvolution2dFloatNchw, StridedTransposeConvolution2dFloatNchwTest)
+ARMNN_AUTO_TEST_CASE(StridedTransposeConvolution2dUint8Nhwc, StridedTransposeConvolution2dUint8NhwcTest)
+ARMNN_AUTO_TEST_CASE(StridedTransposeConvolution2dUint8Nchw, StridedTransposeConvolution2dUint8NchwTest)
+ARMNN_AUTO_TEST_CASE(StridedTransposeConvolution2dInt16Nhwc, StridedTransposeConvolution2dInt16NhwcTest)
+ARMNN_AUTO_TEST_CASE(StridedTransposeConvolution2dInt16Nchw, StridedTransposeConvolution2dInt16NchwTest)
+
+ARMNN_AUTO_TEST_CASE(UnbiasedStridedTransposeConvolution2dFloatNhwc, UnbiasedStridedTransposeConvolution2dFloatNhwcTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedStridedTransposeConvolution2dFloatNchw, UnbiasedStridedTransposeConvolution2dFloatNchwTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedStridedTransposeConvolution2dUint8Nhwc, UnbiasedStridedTransposeConvolution2dUint8NhwcTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedStridedTransposeConvolution2dUint8Nchw, UnbiasedStridedTransposeConvolution2dUint8NchwTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedStridedTransposeConvolution2dInt16Nhwc, UnbiasedStridedTransposeConvolution2dInt16NhwcTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedStridedTransposeConvolution2dInt16Nchw, UnbiasedStridedTransposeConvolution2dInt16NchwTest)
+
BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index daa004345c..9be245b1a7 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -29,6 +29,8 @@ list(APPEND armnnRefBackendWorkloads_sources
Gather.hpp
LstmUtils.hpp
Maximum.hpp
+ Mean.cpp
+ Mean.hpp
Concatenate.hpp
Concatenate.cpp
Minimum.hpp
@@ -44,6 +46,8 @@ list(APPEND armnnRefBackendWorkloads_sources
RefBatchNormalizationWorkload.hpp
RefBatchToSpaceNdWorkload.cpp
RefBatchToSpaceNdWorkload.hpp
+ RefConcatWorkload.cpp
+ RefConcatWorkload.hpp
RefConstantWorkload.cpp
RefConstantWorkload.hpp
RefConvertFp16ToFp32Workload.cpp
@@ -74,8 +78,8 @@ list(APPEND armnnRefBackendWorkloads_sources
RefL2NormalizationWorkload.hpp
RefLstmWorkload.cpp
RefLstmWorkload.hpp
- RefConcatWorkload.cpp
- RefConcatWorkload.hpp
+ RefMeanWorkload.cpp
+ RefMeanWorkload.hpp
RefNormalizationWorkload.cpp
RefNormalizationWorkload.hpp
RefPadWorkload.cpp
@@ -104,6 +108,8 @@ list(APPEND armnnRefBackendWorkloads_sources
RefSplitterWorkload.hpp
RefStridedSliceWorkload.cpp
RefStridedSliceWorkload.hpp
+ RefTransposeConvolution2dWorkload.cpp
+ RefTransposeConvolution2dWorkload.hpp
RefWorkloads.hpp
RefWorkloadUtils.hpp
ResizeBilinear.cpp
@@ -123,10 +129,8 @@ list(APPEND armnnRefBackendWorkloads_sources
StringMapping.cpp
StringMapping.hpp
TensorBufferArrayView.hpp
- Mean.cpp
- Mean.hpp
- RefMeanWorkload.cpp
- RefMeanWorkload.hpp
+ TransposeConvolution2d.cpp
+ TransposeConvolution2d.hpp
)
add_library(armnnRefBackendWorkloads OBJECT ${armnnRefBackendWorkloads_sources})
diff --git a/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp
new file mode 100644
index 0000000000..50dafcac3c
--- /dev/null
+++ b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp
@@ -0,0 +1,67 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefTransposeConvolution2dWorkload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+#include "TransposeConvolution2d.hpp"
+
+#include <Profiling.hpp>
+
+namespace armnn
+{
+
+RefTransposeConvolution2dWorkload::RefTransposeConvolution2dWorkload(
+ const TransposeConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) :
+ BaseWorkload<TransposeConvolution2dQueueDescriptor>(descriptor, info)
+{
+ // set up weights decoder
+ m_Weights = std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight));
+ const TensorInfo& weightsInfo = GetTensorInfo(m_Weights.get());
+
+ m_WeightsDecoder = MakeDecoder<float>(weightsInfo, m_Weights.get()->Map(true));
+ m_WeightsShape = weightsInfo.GetShape();
+
+ // set up biases decoder
+ if (descriptor.m_Parameters.m_BiasEnabled)
+ {
+ m_Biases = std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias));
+ const TensorInfo& biasesInfo = GetTensorInfo(m_Biases.get());
+ m_BiasesDecoder = MakeDecoder<float>(biasesInfo, m_Biases.get()->Map(true));
+ }
+}
+
+void RefTransposeConvolution2dWorkload::PostAllocationConfigure()
+{
+ // set up input decoder
+ const ITensorHandle* input = m_Data.m_Inputs[0];
+ const TensorInfo& inputInfo = GetTensorInfo(input);
+
+ m_InputShape = inputInfo.GetShape();
+ m_InputDecoder = MakeDecoder<float>(inputInfo, input->Map());
+
+ // set up output encoder
+ ITensorHandle* output = m_Data.m_Outputs[0];
+ const TensorInfo& outputInfo = GetTensorInfo(output);
+
+ m_OutputShape = outputInfo.GetShape();
+ m_OutputEncoder = MakeEncoder<float>(outputInfo, output->Map());
+}
+
+void RefTransposeConvolution2dWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefTransposeConvolution2dWorkload_Execute");
+
+ TransposeConvolution2dImpl(m_Data.m_Parameters,
+ m_InputShape,
+ *m_InputDecoder,
+ m_OutputShape,
+ *m_OutputEncoder,
+ m_WeightsShape,
+ *m_WeightsDecoder,
+ m_BiasesDecoder.get());
+}
+
+} // namespace armnn \ No newline at end of file
diff --git a/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp
new file mode 100644
index 0000000000..9ded8c971f
--- /dev/null
+++ b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp
@@ -0,0 +1,43 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "Decoders.hpp"
+#include "Encoders.hpp"
+
+#include <backendsCommon/CpuTensorHandle.hpp>
+#include <backendsCommon/Workload.hpp>
+
+namespace armnn
+{
+
+class RefTransposeConvolution2dWorkload : public BaseWorkload<TransposeConvolution2dQueueDescriptor>
+{
+public:
+ RefTransposeConvolution2dWorkload(const TransposeConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+ ~RefTransposeConvolution2dWorkload() = default;
+
+ void PostAllocationConfigure() override;
+
+ void Execute() const override;
+
+private:
+ std::unique_ptr<ScopedCpuTensorHandle> m_Weights;
+ std::unique_ptr<ScopedCpuTensorHandle> m_Biases;
+
+ std::unique_ptr<Decoder<float>> m_InputDecoder;
+ std::unique_ptr<Encoder<float>> m_OutputEncoder;
+
+ std::unique_ptr<Decoder<float>> m_WeightsDecoder;
+ std::unique_ptr<Decoder<float>> m_BiasesDecoder;
+
+ TensorShape m_InputShape;
+ TensorShape m_OutputShape;
+ TensorShape m_WeightsShape;
+};
+
+} // namespace armnn \ No newline at end of file
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index 90582812e7..3a094c8a32 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -47,6 +47,7 @@
#include "RefSpaceToBatchNdWorkload.hpp"
#include "RefStridedSliceWorkload.hpp"
#include "RefSpaceToDepthWorkload.hpp"
+#include "RefTransposeConvolution2dWorkload.hpp"
#include "RefWorkloadUtils.hpp"
#include "ResizeBilinear.hpp"
#include "Softmax.hpp"
diff --git a/src/backends/reference/workloads/TransposeConvolution2d.cpp b/src/backends/reference/workloads/TransposeConvolution2d.cpp
new file mode 100644
index 0000000000..db15cefe10
--- /dev/null
+++ b/src/backends/reference/workloads/TransposeConvolution2d.cpp
@@ -0,0 +1,248 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "TransposeConvolution2d.hpp"
+
+#include <DataLayoutIndexed.hpp>
+
+namespace armnn
+{
+
+using namespace armnnUtils;
+
+struct TensorData
+{
+ TensorShape shape;
+ std::vector<float> data;
+};
+
+TensorData SetUpStridedInput(const TensorShape& inputShape,
+ Decoder<float>& inputDecoder,
+ const TransposeConvolution2dDescriptor& descriptor,
+ const DataLayoutIndexed& dataLayoutIndexed)
+{
+ const unsigned int cIndex = dataLayoutIndexed.GetChannelsIndex();
+ const unsigned int hIndex = dataLayoutIndexed.GetHeightIndex();
+ const unsigned int wIndex = dataLayoutIndexed.GetWidthIndex();
+
+ const unsigned int batches = inputShape[0];
+ const unsigned int channels = inputShape[cIndex];
+
+ const unsigned int wInput = inputShape[wIndex];
+ const unsigned int hInput = inputShape[hIndex];
+
+ const unsigned int wStridedInput = 1u + descriptor.m_StrideX * (wInput - 1);
+ const unsigned int hStridedInput = 1u + descriptor.m_StrideY * (hInput - 1);
+
+ TensorData stridedInput;
+ stridedInput.data = std::vector<float>(batches * channels * wStridedInput * hStridedInput, 0.0f);
+ stridedInput.shape = TensorShape(4);
+
+ stridedInput.shape[0] = batches;
+ stridedInput.shape[cIndex] = channels;
+ stridedInput.shape[hIndex] = hStridedInput;
+ stridedInput.shape[wIndex] = wStridedInput;
+
+ // expand input data with strides
+ for (unsigned int batchIdx = 0u; batchIdx < batches; ++batchIdx)
+ {
+ for (unsigned int cInput = 0u; cInput < channels; ++cInput)
+ {
+ for (unsigned int yInput = 0u, yStrided = 0u;
+ yInput < hInput && yStrided < hStridedInput;
+ ++yInput, yStrided += descriptor.m_StrideY)
+ {
+ for (unsigned int xInput = 0u, xStrided = 0u;
+ xInput < wInput && xStrided < wStridedInput;
+ ++xInput, xStrided += descriptor.m_StrideX)
+ {
+ unsigned int inputIdx =
+ dataLayoutIndexed.GetIndex(inputShape, batchIdx, cInput, yInput, xInput);
+ unsigned int stridedInputIdx =
+ dataLayoutIndexed.GetIndex(stridedInput.shape, batchIdx, cInput, yStrided, xStrided);
+
+ inputDecoder[inputIdx];
+ stridedInput.data[stridedInputIdx] = inputDecoder.Get();
+ }
+ }
+ }
+ }
+
+ return stridedInput;
+}
+
+TensorData SetUpEmptyPaddedOutput(const TensorShape& outputShape,
+ const TransposeConvolution2dDescriptor& descriptor,
+ const DataLayoutIndexed& dataLayoutIndexed)
+{
+ const unsigned int cIndex = dataLayoutIndexed.GetChannelsIndex();
+ const unsigned int hIndex = dataLayoutIndexed.GetHeightIndex();
+ const unsigned int wIndex = dataLayoutIndexed.GetWidthIndex();
+
+ const unsigned int batches = outputShape[0];
+ const unsigned int channels = outputShape[cIndex];
+
+ const unsigned int wOutput = outputShape[wIndex];
+ const unsigned int hOutput = outputShape[hIndex];
+
+ const unsigned int wPaddedOutput = wOutput + descriptor.m_PadLeft + descriptor.m_PadRight;
+ const unsigned int hPaddedOutput = hOutput + descriptor.m_PadTop + descriptor.m_PadBottom;
+
+ TensorData paddedOutput;
+ paddedOutput.data = std::vector<float>(batches * channels * wPaddedOutput * hPaddedOutput, 0.0f);
+ paddedOutput.shape = TensorShape(4);
+
+ paddedOutput.shape[0] = batches;
+ paddedOutput.shape[cIndex] = channels;
+ paddedOutput.shape[hIndex] = hPaddedOutput;
+ paddedOutput.shape[wIndex] = wPaddedOutput;
+
+ return paddedOutput;
+}
+
+void Deconvolve(const TensorData& stridedInput,
+ TensorData& paddedOutput,
+ const TensorShape& weightsShape,
+ Decoder<float>& weightsDecoder,
+ const DataLayoutIndexed& dataLayoutIndexed)
+{
+ const unsigned int cIndex = dataLayoutIndexed.GetChannelsIndex();
+ const unsigned int hIndex = dataLayoutIndexed.GetHeightIndex();
+ const unsigned int wIndex = dataLayoutIndexed.GetWidthIndex();
+
+ const unsigned int batches = stridedInput.shape[0];
+ const unsigned int channels = stridedInput.shape[cIndex];
+
+ const unsigned int wKernel = weightsShape[wIndex];
+ const unsigned int hKernel = weightsShape[hIndex];
+
+ const unsigned int wStridedInput = stridedInput.shape[wIndex];
+ const unsigned int hStridedInput = stridedInput.shape[hIndex];
+
+ // loop through all input elements
+ for (unsigned int batchIdx = 0u; batchIdx < batches; ++batchIdx)
+ {
+ for (unsigned int cInput = 0u; cInput < channels; ++cInput)
+ {
+ for (unsigned int yInput = 0u; yInput < hStridedInput; ++yInput)
+ {
+ for (unsigned int xInput = 0u; xInput < wStridedInput; ++xInput)
+ {
+ // obtain input value
+ unsigned int inputIdx =
+ dataLayoutIndexed.GetIndex(stridedInput.shape, batchIdx, cInput, yInput, xInput);
+ float inputValue = stridedInput.data[inputIdx];
+
+ // loop through kernel
+ for (unsigned int yKernel = 0u; yKernel < hKernel; ++yKernel)
+ {
+ for (unsigned int xKernel = 0; xKernel < wKernel; ++xKernel)
+ {
+ unsigned int kernelIdx =
+ dataLayoutIndexed.GetIndex(weightsShape, batchIdx, cInput, yKernel, xKernel);
+
+ weightsDecoder[kernelIdx];
+ float kernelValue = weightsDecoder.Get();
+
+ unsigned int xOutput = xInput + xKernel;
+ unsigned int yOutput = yInput + yKernel;
+
+ // compute output increment
+ float outputValue = inputValue * kernelValue;
+
+ unsigned int outputIdx = dataLayoutIndexed.GetIndex(paddedOutput.shape,
+ batchIdx,
+ cInput,
+ yOutput,
+ xOutput);
+
+ // set output value
+ paddedOutput.data[outputIdx] += outputValue;
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+void TransposeConvolution2dImpl(const TransposeConvolution2dDescriptor& descriptor,
+ const TensorShape& inputShape,
+ Decoder<float>& inputDecoder,
+ const TensorShape& outputShape,
+ Encoder<float>& outputEncoder,
+ const TensorShape& weightsShape,
+ Decoder<float>& weightsDecoder,
+ Decoder<float>* biasesDecoder)
+{
+ if (descriptor.m_BiasEnabled && !biasesDecoder)
+ {
+ throw InvalidArgumentException("Biases enabled but no bias data provided");
+ }
+
+ const DataLayoutIndexed dataLayoutIndexed(descriptor.m_DataLayout);
+
+ const unsigned int cIndex = dataLayoutIndexed.GetChannelsIndex();
+ const unsigned int hIndex = dataLayoutIndexed.GetHeightIndex();
+ const unsigned int wIndex = dataLayoutIndexed.GetWidthIndex();
+
+ const unsigned int numBatches = inputShape[0];
+ const unsigned int numChannels = inputShape[cIndex];
+
+ // set up temporary strided input
+ TensorData stridedInput = SetUpStridedInput(inputShape, inputDecoder, descriptor, dataLayoutIndexed);
+
+ // set up temporary (empty) padded output
+ TensorData paddedOutput = SetUpEmptyPaddedOutput(outputShape, descriptor, dataLayoutIndexed);
+
+ // run deconvolution (without biases) on strided input to produce padded output
+ Deconvolve(stridedInput, paddedOutput, weightsShape, weightsDecoder, dataLayoutIndexed);
+
+ const unsigned int wPaddedOutput = paddedOutput.shape[wIndex];
+ const unsigned int hPaddedOutput = paddedOutput.shape[hIndex];
+
+ // remove padding and apply bias (if enabled)
+ for (unsigned int batchIdx = 0u; batchIdx < numBatches; ++batchIdx)
+ {
+ for (unsigned int cOutput = 0u; cOutput < numChannels; ++cOutput)
+ {
+ // update bias decoder iterator
+ if (descriptor.m_BiasEnabled)
+ {
+ (*biasesDecoder)[cOutput];
+ }
+
+ for (unsigned int yPaddedOutput = descriptor.m_PadTop;
+ yPaddedOutput < (hPaddedOutput - descriptor.m_PadBottom);
+ ++yPaddedOutput)
+ {
+ for (unsigned int xPaddedOutput = descriptor.m_PadLeft;
+ xPaddedOutput < (wPaddedOutput - descriptor.m_PadRight);
+ ++xPaddedOutput)
+ {
+ unsigned int xOutput = xPaddedOutput - descriptor.m_PadLeft;
+ unsigned int yOutput = yPaddedOutput - descriptor.m_PadTop;
+
+ unsigned int outputIdx =
+ dataLayoutIndexed.GetIndex(outputShape, batchIdx, cOutput, yOutput, xOutput);
+ unsigned int paddedOutputIdx =
+ dataLayoutIndexed.GetIndex(paddedOutput.shape, batchIdx, cOutput, yPaddedOutput, xPaddedOutput);
+
+ // encode (copy) output data
+ outputEncoder[outputIdx];
+ outputEncoder.Set(paddedOutput.data[paddedOutputIdx]);
+
+ // apply bias (if enabled)
+ if (descriptor.m_BiasEnabled)
+ {
+ outputEncoder.Set(outputEncoder.Get() + biasesDecoder->Get());
+ }
+ }
+ }
+ }
+ }
+}
+
+} // namespace armnn \ No newline at end of file
diff --git a/src/backends/reference/workloads/TransposeConvolution2d.hpp b/src/backends/reference/workloads/TransposeConvolution2d.hpp
new file mode 100644
index 0000000000..f20f327b38
--- /dev/null
+++ b/src/backends/reference/workloads/TransposeConvolution2d.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "Decoders.hpp"
+#include "Encoders.hpp"
+
+#include <armnn/Descriptors.hpp>
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+void TransposeConvolution2dImpl(const TransposeConvolution2dDescriptor& descriptor,
+ const TensorShape& inputShape,
+ Decoder<float>& inputDecoder,
+ const TensorShape& outputShape,
+ Encoder<float>& outputEncoder,
+ const TensorShape& weightsShape,
+ Decoder<float>& weightsDecoder,
+ Decoder<float>* biasesDecoder);
+
+} // namespace armnn \ No newline at end of file