IVGCVSW-3320 Add reference workload support for TransposeConvolution2dLayer

Signed-off-by: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com> Change-Id: Icc64f8148c9d8a0d14d772e6e4e7865e70585cd9
author: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com> 2019-06-26 15:02:47 +0100
committer: Áron Virginás-Tar <aron.virginas-tar@arm.com> 2019-06-27 11:52:47 +0000
commit: 735a450d3b53a2d745b9a7a6d85747e25ec37ede (patch)
tree: 4f5af0ddada102cb51fe1f4ba84e3ccf8f51c6ab /src/backends/reference
parent: 05bf054f40eb551ea76722163b6ed1a1fde7bbf0 (diff)
download: armnn-735a450d3b53a2d745b9a7a6d85747e25ec37ede.tar.gz
11 files changed, 452 insertions, 8 deletions
diff --git a/src/backends/reference/CMakeLists.txt b/src/backends/reference/CMakeLists.txt
index 82880cf524..ff16f185b4 100644
--- a/src/backends/reference/CMakeLists.txt
+++ b/src/backends/reference/CMakeLists.txt
@@ -11,7 +11,6 @@ list(APPEND armnnRefBackend_sources
     RefLayerSupport.hpp
     RefWorkloadFactory.cpp
     RefWorkloadFactory.hpp
-
 )
 
 add_library(armnnRefBackend OBJECT ${armnnRefBackend_sources})
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index 035a28b71a..5ede8b3f02 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -464,4 +464,11 @@ std::unique_ptr<IWorkload> RefWorkloadFactory::CreatePrelu(const PreluQueueDescr
     return std::make_unique<RefPreluWorkload>(descriptor, info);
 }
 
+std::unique_ptr<IWorkload> RefWorkloadFactory::CreateTransposeConvolution2d(
+    const TransposeConvolution2dQueueDescriptor& descriptor,
+    const WorkloadInfo& info) const
+{
+    return std::make_unique<RefTransposeConvolution2dWorkload>(descriptor, info);
+}
+
 } // namespace armnn
diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp
index ed513e1e3b..44cb079ea7 100644
--- a/src/backends/reference/RefWorkloadFactory.hpp
+++ b/src/backends/reference/RefWorkloadFactory.hpp
@@ -193,6 +193,9 @@ public:
     std::unique_ptr<IWorkload> CreatePrelu(const PreluQueueDescriptor& descriptor,
                                            const WorkloadInfo& info) const override;
 
+    std::unique_ptr<IWorkload> CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor& descriptor,
+                                                            const WorkloadInfo& info) const override;
+
 private:
 
     template <typename F32Workload, typename U8Workload, typename QueueDescriptorType>
diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk
index 849d87c4ca..6fb17b563f 100644
--- a/src/backends/reference/backend.mk
+++ b/src/backends/reference/backend.mk
@@ -60,6 +60,7 @@ BACKEND_SOURCES := \
         workloads/RefSpaceToDepthWorkload.cpp \
         workloads/RefStridedSliceWorkload.cpp \
         workloads/RefSplitterWorkload.cpp \
+        workloads/RefTransposeConvolution2dWorkload.cpp \
         workloads/ResizeBilinear.cpp \
         workloads/Rsqrt.cpp \
         workloads/SpaceToBatchNd.cpp \
@@ -67,7 +68,8 @@ BACKEND_SOURCES := \
         workloads/StridedSlice.cpp \
         workloads/StringMapping.cpp \
         workloads/Softmax.cpp \
-        workloads/Splitter.cpp
+        workloads/Splitter.cpp \
+        workloads/TransposeConvolution2d.cpp
 
 # BACKEND_TEST_SOURCES contains the list of files to be included
 # in the Android unit test build (armnn-tests) and it is picked
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index b997a14e9d..3556df18b1 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -10,6 +10,7 @@
 
 #include <reference/RefWorkloadFactory.hpp>
 #include <backendsCommon/test/DetectionPostProcessLayerTestImpl.hpp>
+#include <backendsCommon/test/TransposeConvolution2dTestImpl.hpp>
 #include <backendsCommon/test/LayerTests.hpp>
 
 #include <boost/test/unit_test.hpp>
@@ -966,4 +967,47 @@ ARMNN_AUTO_TEST_CASE(PreluFloat32, PreluTest<armnn::DataType::Float32>)
 ARMNN_AUTO_TEST_CASE(PreluUint8,   PreluTest<armnn::DataType::QuantisedAsymm8>)
 ARMNN_AUTO_TEST_CASE(PreluInt16,   PreluTest<armnn::DataType::QuantisedSymm16>)
 
+// TransposeConvolution2d
+ARMNN_AUTO_TEST_CASE(SimpleTransposeConvolution2dFloatNchw, SimpleTransposeConvolution2dFloatNchwTest)
+ARMNN_AUTO_TEST_CASE(SimpleTransposeConvolution2dFloatNhwc, SimpleTransposeConvolution2dFloatNhwcTest)
+ARMNN_AUTO_TEST_CASE(SimpleTransposeConvolution2dUint8Nchw, SimpleTransposeConvolution2dUint8NchwTest)
+ARMNN_AUTO_TEST_CASE(SimpleTransposeConvolution2dUint8Nhwc, SimpleTransposeConvolution2dUint8NhwcTest)
+ARMNN_AUTO_TEST_CASE(SimpleTransposeConvolution2dInt16Nchw, SimpleTransposeConvolution2dInt16NchwTest)
+ARMNN_AUTO_TEST_CASE(SimpleTransposeConvolution2dInt16Nhwc, SimpleTransposeConvolution2dInt16NhwcTest)
+
+ARMNN_AUTO_TEST_CASE(UnbiasedSimpleTransposeConvolution2dFloatNchw, UnbiasedSimpleTransposeConvolution2dFloatNchwTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedSimpleTransposeConvolution2dFloatNhwc, UnbiasedSimpleTransposeConvolution2dFloatNhwcTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedSimpleTransposeConvolution2dUint8Nchw, UnbiasedSimpleTransposeConvolution2dUint8NchwTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedSimpleTransposeConvolution2dUint8Nhwc, UnbiasedSimpleTransposeConvolution2dUint8NhwcTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedSimpleTransposeConvolution2dInt16Nchw, UnbiasedSimpleTransposeConvolution2dInt16NchwTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedSimpleTransposeConvolution2dInt16Nhwc, UnbiasedSimpleTransposeConvolution2dInt16NhwcTest)
+
+ARMNN_AUTO_TEST_CASE(PaddedTransposeConvolution2dFloatNchw, PaddedTransposeConvolution2dFloatNchwTest)
+ARMNN_AUTO_TEST_CASE(PaddedTransposeConvolution2dFloatNhwc, PaddedTransposeConvolution2dFloatNhwcTest)
+ARMNN_AUTO_TEST_CASE(PaddedTransposeConvolution2dUint8Nchw, PaddedTransposeConvolution2dUint8NchwTest)
+ARMNN_AUTO_TEST_CASE(PaddedTransposeConvolution2dUint8Nhwc, PaddedTransposeConvolution2dUint8NhwcTest)
+ARMNN_AUTO_TEST_CASE(PaddedTransposeConvolution2dInt16Nchw, PaddedTransposeConvolution2dInt16NchwTest)
+ARMNN_AUTO_TEST_CASE(PaddedTransposeConvolution2dInt16Nhwc, PaddedTransposeConvolution2dInt16NhwcTest)
+
+ARMNN_AUTO_TEST_CASE(UnbiasedPaddedTransposeConvolution2dFloatNchw, UnbiasedPaddedTransposeConvolution2dFloatNchwTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedPaddedTransposeConvolution2dFloatNhwc, UnbiasedPaddedTransposeConvolution2dFloatNhwcTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedPaddedTransposeConvolution2dUint8Nchw, UnbiasedPaddedTransposeConvolution2dUint8NchwTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedPaddedTransposeConvolution2dUint8Nhwc, UnbiasedPaddedTransposeConvolution2dUint8NhwcTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedPaddedTransposeConvolution2dInt16Nchw, UnbiasedPaddedTransposeConvolution2dInt16NchwTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedPaddedTransposeConvolution2dInt16Nhwc, UnbiasedPaddedTransposeConvolution2dInt16NhwcTest)
+
+ARMNN_AUTO_TEST_CASE(StridedTransposeConvolution2dFloatNhwc, StridedTransposeConvolution2dFloatNhwcTest)
+ARMNN_AUTO_TEST_CASE(StridedTransposeConvolution2dFloatNchw, StridedTransposeConvolution2dFloatNchwTest)
+ARMNN_AUTO_TEST_CASE(StridedTransposeConvolution2dUint8Nhwc, StridedTransposeConvolution2dUint8NhwcTest)
+ARMNN_AUTO_TEST_CASE(StridedTransposeConvolution2dUint8Nchw, StridedTransposeConvolution2dUint8NchwTest)
+ARMNN_AUTO_TEST_CASE(StridedTransposeConvolution2dInt16Nhwc, StridedTransposeConvolution2dInt16NhwcTest)
+ARMNN_AUTO_TEST_CASE(StridedTransposeConvolution2dInt16Nchw, StridedTransposeConvolution2dInt16NchwTest)
+
+ARMNN_AUTO_TEST_CASE(UnbiasedStridedTransposeConvolution2dFloatNhwc, UnbiasedStridedTransposeConvolution2dFloatNhwcTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedStridedTransposeConvolution2dFloatNchw, UnbiasedStridedTransposeConvolution2dFloatNchwTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedStridedTransposeConvolution2dUint8Nhwc, UnbiasedStridedTransposeConvolution2dUint8NhwcTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedStridedTransposeConvolution2dUint8Nchw, UnbiasedStridedTransposeConvolution2dUint8NchwTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedStridedTransposeConvolution2dInt16Nhwc, UnbiasedStridedTransposeConvolution2dInt16NhwcTest)
+ARMNN_AUTO_TEST_CASE(UnbiasedStridedTransposeConvolution2dInt16Nchw, UnbiasedStridedTransposeConvolution2dInt16NchwTest)
+
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index daa004345c..9be245b1a7 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -29,6 +29,8 @@ list(APPEND armnnRefBackendWorkloads_sources
     Gather.hpp
     LstmUtils.hpp
     Maximum.hpp
+    Mean.cpp
+    Mean.hpp
     Concatenate.hpp
     Concatenate.cpp
     Minimum.hpp
@@ -44,6 +46,8 @@ list(APPEND armnnRefBackendWorkloads_sources
     RefBatchNormalizationWorkload.hpp
     RefBatchToSpaceNdWorkload.cpp
     RefBatchToSpaceNdWorkload.hpp
+    RefConcatWorkload.cpp
+    RefConcatWorkload.hpp
     RefConstantWorkload.cpp
     RefConstantWorkload.hpp
     RefConvertFp16ToFp32Workload.cpp
@@ -74,8 +78,8 @@ list(APPEND armnnRefBackendWorkloads_sources
     RefL2NormalizationWorkload.hpp
     RefLstmWorkload.cpp
     RefLstmWorkload.hpp
-    RefConcatWorkload.cpp
-    RefConcatWorkload.hpp
+    RefMeanWorkload.cpp
+    RefMeanWorkload.hpp
     RefNormalizationWorkload.cpp
     RefNormalizationWorkload.hpp
     RefPadWorkload.cpp
@@ -104,6 +108,8 @@ list(APPEND armnnRefBackendWorkloads_sources
     RefSplitterWorkload.hpp
     RefStridedSliceWorkload.cpp
     RefStridedSliceWorkload.hpp
+    RefTransposeConvolution2dWorkload.cpp
+    RefTransposeConvolution2dWorkload.hpp
     RefWorkloads.hpp
     RefWorkloadUtils.hpp
     ResizeBilinear.cpp
@@ -123,10 +129,8 @@ list(APPEND armnnRefBackendWorkloads_sources
     StringMapping.cpp
     StringMapping.hpp
     TensorBufferArrayView.hpp
-    Mean.cpp
-    Mean.hpp
-    RefMeanWorkload.cpp
-    RefMeanWorkload.hpp
+    TransposeConvolution2d.cpp
+    TransposeConvolution2d.hpp
 )
 
 add_library(armnnRefBackendWorkloads OBJECT ${armnnRefBackendWorkloads_sources})
diff --git a/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp
new file mode 100644
index 0000000000..50dafcac3c
--- /dev/null
+++ b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp
@@ -0,0 +1,67 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefTransposeConvolution2dWorkload.hpp"
+
+#include "RefWorkloadUtils.hpp"
+#include "TransposeConvolution2d.hpp"
+
+#include <Profiling.hpp>
+
+namespace armnn
+{
+
+RefTransposeConvolution2dWorkload::RefTransposeConvolution2dWorkload(
+    const TransposeConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) :
+    BaseWorkload<TransposeConvolution2dQueueDescriptor>(descriptor, info)
+{
+    // set up weights decoder
+    m_Weights = std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight));
+    const TensorInfo& weightsInfo = GetTensorInfo(m_Weights.get());
+
+    m_WeightsDecoder = MakeDecoder<float>(weightsInfo, m_Weights.get()->Map(true));
+    m_WeightsShape   = weightsInfo.GetShape();
+
+    // set up biases decoder
+    if (descriptor.m_Parameters.m_BiasEnabled)
+    {
+        m_Biases = std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias));
+        const TensorInfo& biasesInfo = GetTensorInfo(m_Biases.get());
+        m_BiasesDecoder = MakeDecoder<float>(biasesInfo, m_Biases.get()->Map(true));
+    }
+}
+
+void RefTransposeConvolution2dWorkload::PostAllocationConfigure()
+{
+    // set up input decoder
+    const ITensorHandle* input  = m_Data.m_Inputs[0];
+    const TensorInfo& inputInfo = GetTensorInfo(input);
+
+    m_InputShape   = inputInfo.GetShape();
+    m_InputDecoder = MakeDecoder<float>(inputInfo, input->Map());
+
+    // set up output encoder
+    ITensorHandle* output        = m_Data.m_Outputs[0];
+    const TensorInfo& outputInfo = GetTensorInfo(output);
+
+    m_OutputShape   = outputInfo.GetShape();
+    m_OutputEncoder = MakeEncoder<float>(outputInfo, output->Map());
+}
+
+void RefTransposeConvolution2dWorkload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefTransposeConvolution2dWorkload_Execute");
+
+    TransposeConvolution2dImpl(m_Data.m_Parameters,
+                               m_InputShape,
+                               *m_InputDecoder,
+                               m_OutputShape,
+                               *m_OutputEncoder,
+                               m_WeightsShape,
+                               *m_WeightsDecoder,
+                               m_BiasesDecoder.get());
+}
+
+} // namespace armnn
+\ No newline at end of file
diff --git a/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp
new file mode 100644
index 0000000000..9ded8c971f
--- /dev/null
+++ b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp
@@ -0,0 +1,43 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "Decoders.hpp"
+#include "Encoders.hpp"
+
+#include <backendsCommon/CpuTensorHandle.hpp>
+#include <backendsCommon/Workload.hpp>
+
+namespace armnn
+{
+
+class RefTransposeConvolution2dWorkload : public BaseWorkload<TransposeConvolution2dQueueDescriptor>
+{
+public:
+    RefTransposeConvolution2dWorkload(const TransposeConvolution2dQueueDescriptor& descriptor,
+                                      const WorkloadInfo& info);
+    ~RefTransposeConvolution2dWorkload() = default;
+
+    void PostAllocationConfigure() override;
+
+    void Execute() const override;
+
+private:
+    std::unique_ptr<ScopedCpuTensorHandle> m_Weights;
+    std::unique_ptr<ScopedCpuTensorHandle> m_Biases;
+
+    std::unique_ptr<Decoder<float>> m_InputDecoder;
+    std::unique_ptr<Encoder<float>> m_OutputEncoder;
+
+    std::unique_ptr<Decoder<float>> m_WeightsDecoder;
+    std::unique_ptr<Decoder<float>> m_BiasesDecoder;
+
+    TensorShape m_InputShape;
+    TensorShape m_OutputShape;
+    TensorShape m_WeightsShape;
+};
+
+} // namespace armnn
+\ No newline at end of file
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index 90582812e7..3a094c8a32 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -47,6 +47,7 @@
 #include "RefSpaceToBatchNdWorkload.hpp"
 #include "RefStridedSliceWorkload.hpp"
 #include "RefSpaceToDepthWorkload.hpp"
+#include "RefTransposeConvolution2dWorkload.hpp"
 #include "RefWorkloadUtils.hpp"
 #include "ResizeBilinear.hpp"
 #include "Softmax.hpp"
diff --git a/src/backends/reference/workloads/TransposeConvolution2d.cpp b/src/backends/reference/workloads/TransposeConvolution2d.cpp
new file mode 100644
index 0000000000..db15cefe10
--- /dev/null
+++ b/src/backends/reference/workloads/TransposeConvolution2d.cpp
@@ -0,0 +1,248 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "TransposeConvolution2d.hpp"
+
+#include <DataLayoutIndexed.hpp>
+
+namespace armnn
+{
+
+using namespace armnnUtils;
+
+struct TensorData
+{
+    TensorShape        shape;
+    std::vector<float> data;
+};
+
+TensorData SetUpStridedInput(const TensorShape& inputShape,
+                             Decoder<float>& inputDecoder,
+                             const TransposeConvolution2dDescriptor& descriptor,
+                             const DataLayoutIndexed& dataLayoutIndexed)
+{
+    const unsigned int cIndex = dataLayoutIndexed.GetChannelsIndex();
+    const unsigned int hIndex = dataLayoutIndexed.GetHeightIndex();
+    const unsigned int wIndex = dataLayoutIndexed.GetWidthIndex();
+
+    const unsigned int batches  = inputShape[0];
+    const unsigned int channels = inputShape[cIndex];
+
+    const unsigned int wInput = inputShape[wIndex];
+    const unsigned int hInput = inputShape[hIndex];
+
+    const unsigned int wStridedInput = 1u + descriptor.m_StrideX * (wInput - 1);
+    const unsigned int hStridedInput = 1u + descriptor.m_StrideY * (hInput - 1);
+
+    TensorData stridedInput;
+    stridedInput.data  = std::vector<float>(batches * channels * wStridedInput * hStridedInput, 0.0f);
+    stridedInput.shape = TensorShape(4);
+
+    stridedInput.shape[0]      = batches;
+    stridedInput.shape[cIndex] = channels;
+    stridedInput.shape[hIndex] = hStridedInput;
+    stridedInput.shape[wIndex] = wStridedInput;
+
+    // expand input data with strides
+    for (unsigned int batchIdx = 0u; batchIdx < batches; ++batchIdx)
+    {
+        for (unsigned int cInput = 0u; cInput < channels; ++cInput)
+        {
+            for (unsigned int yInput = 0u, yStrided = 0u;
+                 yInput < hInput && yStrided < hStridedInput;
+                 ++yInput, yStrided += descriptor.m_StrideY)
+            {
+                for (unsigned int xInput = 0u, xStrided = 0u;
+                     xInput < wInput && xStrided < wStridedInput;
+                     ++xInput, xStrided += descriptor.m_StrideX)
+                {
+                    unsigned int inputIdx =
+                        dataLayoutIndexed.GetIndex(inputShape, batchIdx, cInput, yInput, xInput);
+                    unsigned int stridedInputIdx =
+                        dataLayoutIndexed.GetIndex(stridedInput.shape, batchIdx, cInput, yStrided, xStrided);
+
+                    inputDecoder[inputIdx];
+                    stridedInput.data[stridedInputIdx] = inputDecoder.Get();
+                }
+            }
+        }
+    }
+
+    return stridedInput;
+}
+
+TensorData SetUpEmptyPaddedOutput(const TensorShape& outputShape,
+                                  const TransposeConvolution2dDescriptor& descriptor,
+                                  const DataLayoutIndexed& dataLayoutIndexed)
+{
+    const unsigned int cIndex = dataLayoutIndexed.GetChannelsIndex();
+    const unsigned int hIndex = dataLayoutIndexed.GetHeightIndex();
+    const unsigned int wIndex = dataLayoutIndexed.GetWidthIndex();
+
+    const unsigned int batches  = outputShape[0];
+    const unsigned int channels = outputShape[cIndex];
+
+    const unsigned int wOutput = outputShape[wIndex];
+    const unsigned int hOutput = outputShape[hIndex];
+
+    const unsigned int wPaddedOutput = wOutput + descriptor.m_PadLeft + descriptor.m_PadRight;
+    const unsigned int hPaddedOutput = hOutput + descriptor.m_PadTop  + descriptor.m_PadBottom;
+
+    TensorData paddedOutput;
+    paddedOutput.data  = std::vector<float>(batches * channels * wPaddedOutput * hPaddedOutput, 0.0f);
+    paddedOutput.shape = TensorShape(4);
+
+    paddedOutput.shape[0]      = batches;
+    paddedOutput.shape[cIndex] = channels;
+    paddedOutput.shape[hIndex] = hPaddedOutput;
+    paddedOutput.shape[wIndex] = wPaddedOutput;
+
+    return paddedOutput;
+}
+
+void Deconvolve(const TensorData& stridedInput,
+                TensorData& paddedOutput,
+                const TensorShape& weightsShape,
+                Decoder<float>& weightsDecoder,
+                const DataLayoutIndexed& dataLayoutIndexed)
+{
+    const unsigned int cIndex = dataLayoutIndexed.GetChannelsIndex();
+    const unsigned int hIndex = dataLayoutIndexed.GetHeightIndex();
+    const unsigned int wIndex = dataLayoutIndexed.GetWidthIndex();
+
+    const unsigned int batches  = stridedInput.shape[0];
+    const unsigned int channels = stridedInput.shape[cIndex];
+
+    const unsigned int wKernel = weightsShape[wIndex];
+    const unsigned int hKernel = weightsShape[hIndex];
+
+    const unsigned int wStridedInput = stridedInput.shape[wIndex];
+    const unsigned int hStridedInput = stridedInput.shape[hIndex];
+
+    // loop through all input elements
+    for (unsigned int batchIdx = 0u; batchIdx < batches; ++batchIdx)
+    {
+        for (unsigned int cInput = 0u; cInput < channels; ++cInput)
+        {
+            for (unsigned int yInput = 0u; yInput < hStridedInput; ++yInput)
+            {
+                for (unsigned int xInput = 0u; xInput < wStridedInput; ++xInput)
+                {
+                    // obtain input value
+                    unsigned int inputIdx =
+                        dataLayoutIndexed.GetIndex(stridedInput.shape, batchIdx, cInput, yInput, xInput);
+                    float inputValue = stridedInput.data[inputIdx];
+
+                    // loop through kernel
+                    for (unsigned int yKernel = 0u; yKernel < hKernel; ++yKernel)
+                    {
+                        for (unsigned int xKernel = 0; xKernel < wKernel; ++xKernel)
+                        {
+                            unsigned int kernelIdx =
+                                dataLayoutIndexed.GetIndex(weightsShape, batchIdx, cInput, yKernel, xKernel);
+
+                            weightsDecoder[kernelIdx];
+                            float kernelValue = weightsDecoder.Get();
+
+                            unsigned int xOutput = xInput + xKernel;
+                            unsigned int yOutput = yInput + yKernel;
+
+                            // compute output increment
+                            float outputValue = inputValue * kernelValue;
+
+                            unsigned int outputIdx = dataLayoutIndexed.GetIndex(paddedOutput.shape,
+                                                                                batchIdx,
+                                                                                cInput,
+                                                                                yOutput,
+                                                                                xOutput);
+
+                            // set output value
+                            paddedOutput.data[outputIdx] += outputValue;
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+void TransposeConvolution2dImpl(const TransposeConvolution2dDescriptor& descriptor,
+                                const TensorShape& inputShape,
+                                Decoder<float>& inputDecoder,
+                                const TensorShape& outputShape,
+                                Encoder<float>& outputEncoder,
+                                const TensorShape& weightsShape,
+                                Decoder<float>& weightsDecoder,
+                                Decoder<float>* biasesDecoder)
+{
+    if (descriptor.m_BiasEnabled && !biasesDecoder)
+    {
+        throw InvalidArgumentException("Biases enabled but no bias data provided");
+    }
+
+    const DataLayoutIndexed dataLayoutIndexed(descriptor.m_DataLayout);
+
+    const unsigned int cIndex = dataLayoutIndexed.GetChannelsIndex();
+    const unsigned int hIndex = dataLayoutIndexed.GetHeightIndex();
+    const unsigned int wIndex = dataLayoutIndexed.GetWidthIndex();
+
+    const unsigned int numBatches  = inputShape[0];
+    const unsigned int numChannels = inputShape[cIndex];
+
+    // set up temporary strided input
+    TensorData stridedInput = SetUpStridedInput(inputShape, inputDecoder, descriptor, dataLayoutIndexed);
+
+    // set up temporary (empty) padded output
+    TensorData paddedOutput = SetUpEmptyPaddedOutput(outputShape, descriptor, dataLayoutIndexed);
+
+    // run deconvolution (without biases) on strided input to produce padded output
+    Deconvolve(stridedInput, paddedOutput, weightsShape, weightsDecoder, dataLayoutIndexed);
+
+    const unsigned int wPaddedOutput = paddedOutput.shape[wIndex];
+    const unsigned int hPaddedOutput = paddedOutput.shape[hIndex];
+
+    // remove padding and apply bias (if enabled)
+    for (unsigned int batchIdx = 0u; batchIdx < numBatches; ++batchIdx)
+    {
+        for (unsigned int cOutput = 0u; cOutput < numChannels; ++cOutput)
+        {
+            // update bias decoder iterator
+            if (descriptor.m_BiasEnabled)
+            {
+                (*biasesDecoder)[cOutput];
+            }
+
+            for (unsigned int yPaddedOutput = descriptor.m_PadTop;
+                 yPaddedOutput < (hPaddedOutput - descriptor.m_PadBottom);
+                 ++yPaddedOutput)
+            {
+                for (unsigned int xPaddedOutput = descriptor.m_PadLeft;
+                     xPaddedOutput < (wPaddedOutput - descriptor.m_PadRight);
+                     ++xPaddedOutput)
+                {
+                    unsigned int xOutput = xPaddedOutput - descriptor.m_PadLeft;
+                    unsigned int yOutput = yPaddedOutput - descriptor.m_PadTop;
+
+                    unsigned int outputIdx =
+                        dataLayoutIndexed.GetIndex(outputShape, batchIdx, cOutput, yOutput, xOutput);
+                    unsigned int paddedOutputIdx =
+                        dataLayoutIndexed.GetIndex(paddedOutput.shape, batchIdx, cOutput, yPaddedOutput, xPaddedOutput);
+
+                    // encode (copy) output data
+                    outputEncoder[outputIdx];
+                    outputEncoder.Set(paddedOutput.data[paddedOutputIdx]);
+
+                    // apply bias (if enabled)
+                    if (descriptor.m_BiasEnabled)
+                    {
+                        outputEncoder.Set(outputEncoder.Get() + biasesDecoder->Get());
+                    }
+                }
+            }
+        }
+    }
+}
+
+} // namespace armnn
+\ No newline at end of file
diff --git a/src/backends/reference/workloads/TransposeConvolution2d.hpp b/src/backends/reference/workloads/TransposeConvolution2d.hpp
new file mode 100644
index 0000000000..f20f327b38
--- /dev/null
+++ b/src/backends/reference/workloads/TransposeConvolution2d.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "Decoders.hpp"
+#include "Encoders.hpp"
+
+#include <armnn/Descriptors.hpp>
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+void TransposeConvolution2dImpl(const TransposeConvolution2dDescriptor& descriptor,
+                                const TensorShape& inputShape,
+                                Decoder<float>& inputDecoder,
+                                const TensorShape& outputShape,
+                                Encoder<float>& outputEncoder,
+                                const TensorShape& weightsShape,
+                                Decoder<float>& weightsDecoder,
+                                Decoder<float>* biasesDecoder);
+
+} // namespace armnn
+\ No newline at end of file
author	Aron Virginas-Tar <Aron.Virginas-Tar@arm.com>	2019-06-26 15:02:47 +0100
committer	Áron Virginás-Tar <aron.virginas-tar@arm.com>	2019-06-27 11:52:47 +0000
commit	735a450d3b53a2d745b9a7a6d85747e25ec37ede (patch)
tree	4f5af0ddada102cb51fe1f4ba84e3ccf8f51c6ab /src/backends/reference
parent	05bf054f40eb551ea76722163b6ed1a1fde7bbf0 (diff)
download	armnn-735a450d3b53a2d745b9a7a6d85747e25ec37ede.tar.gz