IVGCVSW-3615 Update Transpose Convolution algorithm

* Now supports channel multiplier != 1 in Ref TransposeConvolution2d Signed-off-by: Mike Kelly <mike.kelly@arm.com> Change-Id: Ice95444025311b8b952c8f7f5fa0f1ecf483df41
author: Mike Kelly <mike.kelly@arm.com> 2019-08-13 10:06:25 +0100
committer: Áron Virginás-Tar <aron.virginas-tar@arm.com> 2019-08-13 16:33:55 +0000
commit: a24d9c7970028e4aa2f210d908b2c4299bf4979d (patch)
tree: fc8601abd3c39790925b08e66039b0d344d00155
parent: 68db21f6d94237fee981a8b9e64fca76197bb07d (diff)
download: armnn-a24d9c7970028e4aa2f210d908b2c4299bf4979d.tar.gz
1 files changed, 84 insertions, 197 deletions
diff --git a/src/backends/reference/workloads/TransposeConvolution2d.cpp b/src/backends/reference/workloads/TransposeConvolution2d.cpp
index db15cefe10..b633d0db8a 100644
--- a/src/backends/reference/workloads/TransposeConvolution2d.cpp
+++ b/src/backends/reference/workloads/TransposeConvolution2d.cpp
@@ -12,232 +12,119 @@ namespace armnn
 
 using namespace armnnUtils;
 
-struct TensorData
-{
-    TensorShape        shape;
-    std::vector<float> data;
-};
-
-TensorData SetUpStridedInput(const TensorShape& inputShape,
-                             Decoder<float>& inputDecoder,
-                             const TransposeConvolution2dDescriptor& descriptor,
-                             const DataLayoutIndexed& dataLayoutIndexed)
+void TransposeConvolution2dImpl(const TransposeConvolution2dDescriptor& descriptor,
+                                const TensorShape& inputShape,
+                                Decoder<float>& inputDecoder,
+                                const TensorShape& outputShape,
+                                Encoder<float>& outputEncoder,
+                                const TensorShape& weightsShape,
+                                Decoder<float>& weightsDecoder,
+                                Decoder<float>* biasesDecoder)
 {
-    const unsigned int cIndex = dataLayoutIndexed.GetChannelsIndex();
-    const unsigned int hIndex = dataLayoutIndexed.GetHeightIndex();
-    const unsigned int wIndex = dataLayoutIndexed.GetWidthIndex();
-
-    const unsigned int batches  = inputShape[0];
-    const unsigned int channels = inputShape[cIndex];
-
-    const unsigned int wInput = inputShape[wIndex];
-    const unsigned int hInput = inputShape[hIndex];
-
-    const unsigned int wStridedInput = 1u + descriptor.m_StrideX * (wInput - 1);
-    const unsigned int hStridedInput = 1u + descriptor.m_StrideY * (hInput - 1);
-
-    TensorData stridedInput;
-    stridedInput.data  = std::vector<float>(batches * channels * wStridedInput * hStridedInput, 0.0f);
-    stridedInput.shape = TensorShape(4);
-
-    stridedInput.shape[0]      = batches;
-    stridedInput.shape[cIndex] = channels;
-    stridedInput.shape[hIndex] = hStridedInput;
-    stridedInput.shape[wIndex] = wStridedInput;
-
-    // expand input data with strides
-    for (unsigned int batchIdx = 0u; batchIdx < batches; ++batchIdx)
+    if (descriptor.m_BiasEnabled && !biasesDecoder)
     {
-        for (unsigned int cInput = 0u; cInput < channels; ++cInput)
-        {
-            for (unsigned int yInput = 0u, yStrided = 0u;
-                 yInput < hInput && yStrided < hStridedInput;
-                 ++yInput, yStrided += descriptor.m_StrideY)
-            {
-                for (unsigned int xInput = 0u, xStrided = 0u;
-                     xInput < wInput && xStrided < wStridedInput;
-                     ++xInput, xStrided += descriptor.m_StrideX)
-                {
-                    unsigned int inputIdx =
-                        dataLayoutIndexed.GetIndex(inputShape, batchIdx, cInput, yInput, xInput);
-                    unsigned int stridedInputIdx =
-                        dataLayoutIndexed.GetIndex(stridedInput.shape, batchIdx, cInput, yStrided, xStrided);
-
-                    inputDecoder[inputIdx];
-                    stridedInput.data[stridedInputIdx] = inputDecoder.Get();
-                }
-            }
-        }
+        throw InvalidArgumentException("Biases enabled but no bias data provided");
     }
+    const DataLayoutIndexed dataLayoutIndexed(descriptor.m_DataLayout);
+    const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
+    const unsigned int heightIndex   = dataLayoutIndexed.GetHeightIndex();
+    const unsigned int widthIndex    = dataLayoutIndexed.GetWidthIndex();
 
-    return stridedInput;
-}
-
-TensorData SetUpEmptyPaddedOutput(const TensorShape& outputShape,
-                                  const TransposeConvolution2dDescriptor& descriptor,
-                                  const DataLayoutIndexed& dataLayoutIndexed)
-{
-    const unsigned int cIndex = dataLayoutIndexed.GetChannelsIndex();
-    const unsigned int hIndex = dataLayoutIndexed.GetHeightIndex();
-    const unsigned int wIndex = dataLayoutIndexed.GetWidthIndex();
-
-    const unsigned int batches  = outputShape[0];
-    const unsigned int channels = outputShape[cIndex];
+    unsigned int numBatches = inputShape[0];
 
-    const unsigned int wOutput = outputShape[wIndex];
-    const unsigned int hOutput = outputShape[hIndex];
+    unsigned int inputWidth  = inputShape[widthIndex];
+    unsigned int inputHeight = inputShape[heightIndex];
+    unsigned int inputDepth  = inputShape[channelsIndex];
 
-    const unsigned int wPaddedOutput = wOutput + descriptor.m_PadLeft + descriptor.m_PadRight;
-    const unsigned int hPaddedOutput = hOutput + descriptor.m_PadTop  + descriptor.m_PadBottom;
+    unsigned int weightsHeight = weightsShape[heightIndex];
+    unsigned int weightsWidth  = weightsShape[widthIndex];
 
-    TensorData paddedOutput;
-    paddedOutput.data  = std::vector<float>(batches * channels * wPaddedOutput * hPaddedOutput, 0.0f);
-    paddedOutput.shape = TensorShape(4);
+    unsigned int outputHeight = outputShape[heightIndex];
+    unsigned int outputWidth  = outputShape[widthIndex];
+    unsigned int outputDepth  = outputShape[channelsIndex];
 
-    paddedOutput.shape[0]      = batches;
-    paddedOutput.shape[cIndex] = channels;
-    paddedOutput.shape[hIndex] = hPaddedOutput;
-    paddedOutput.shape[wIndex] = wPaddedOutput;
+    unsigned int paddingLeft = descriptor.m_PadLeft;
+    unsigned int paddingTop  = descriptor.m_PadTop;
 
-    return paddedOutput;
-}
-
-void Deconvolve(const TensorData& stridedInput,
-                TensorData& paddedOutput,
-                const TensorShape& weightsShape,
-                Decoder<float>& weightsDecoder,
-                const DataLayoutIndexed& dataLayoutIndexed)
-{
-    const unsigned int cIndex = dataLayoutIndexed.GetChannelsIndex();
-    const unsigned int hIndex = dataLayoutIndexed.GetHeightIndex();
-    const unsigned int wIndex = dataLayoutIndexed.GetWidthIndex();
+    unsigned int strideX = descriptor.m_StrideX;
+    unsigned int strideY = descriptor.m_StrideY;
 
-    const unsigned int batches  = stridedInput.shape[0];
-    const unsigned int channels = stridedInput.shape[cIndex];
-
-    const unsigned int wKernel = weightsShape[wIndex];
-    const unsigned int hKernel = weightsShape[hIndex];
-
-    const unsigned int wStridedInput = stridedInput.shape[wIndex];
-    const unsigned int hStridedInput = stridedInput.shape[hIndex];
+    // Set the initial output values to be logically 0 otherwise the algorithm doesn't work.
+    for (unsigned int i = 0u; i < outputShape.GetNumElements(); ++i)
+    {
+        outputEncoder.Set(0.f);
+        ++outputEncoder;
+    }
 
-    // loop through all input elements
-    for (unsigned int batchIdx = 0u; batchIdx < batches; ++batchIdx)
+    for (unsigned int batch = 0u; batch < numBatches; ++batch)
     {
-        for (unsigned int cInput = 0u; cInput < channels; ++cInput)
+        for (unsigned int yInput = 0u; yInput < inputHeight; ++yInput)
         {
-            for (unsigned int yInput = 0u; yInput < hStridedInput; ++yInput)
+            for (unsigned int xInput = 0u; xInput < inputWidth; ++xInput)
             {
-                for (unsigned int xInput = 0u; xInput < wStridedInput; ++xInput)
-                {
-                    // obtain input value
-                    unsigned int inputIdx =
-                        dataLayoutIndexed.GetIndex(stridedInput.shape, batchIdx, cInput, yInput, xInput);
-                    float inputValue = stridedInput.data[inputIdx];
+                unsigned int xOutputOrigin = xInput * strideX - paddingLeft;
+                unsigned int yOutputOrigin = yInput * strideY - paddingTop;
 
-                    // loop through kernel
-                    for (unsigned int yKernel = 0u; yKernel < hKernel; ++yKernel)
+                unsigned int weightsBaseIndex = 0u;
+                for (unsigned int dOutput = 0u; dOutput < outputDepth; ++dOutput)
+                {
+                    for (unsigned int yWeights = 0u; yWeights < weightsHeight; ++yWeights)
                     {
-                        for (unsigned int xKernel = 0; xKernel < wKernel; ++xKernel)
+                        for (unsigned int xWeights = 0u; xWeights < weightsWidth;
+                             ++xWeights, weightsBaseIndex += inputDepth)
                         {
-                            unsigned int kernelIdx =
-                                dataLayoutIndexed.GetIndex(weightsShape, batchIdx, cInput, yKernel, xKernel);
-
-                            weightsDecoder[kernelIdx];
-                            float kernelValue = weightsDecoder.Get();
-
-                            unsigned int xOutput = xInput + xKernel;
-                            unsigned int yOutput = yInput + yKernel;
-
-                            // compute output increment
-                            float outputValue = inputValue * kernelValue;
-
-                            unsigned int outputIdx = dataLayoutIndexed.GetIndex(paddedOutput.shape,
-                                                                                batchIdx,
-                                                                                cInput,
-                                                                                yOutput,
-                                                                                xOutput);
-
-                            // set output value
-                            paddedOutput.data[outputIdx] += outputValue;
+                            unsigned int yOutput = yOutputOrigin + yWeights;
+                            unsigned int xOutput = xOutputOrigin + xWeights;
+
+                            if (yOutput < outputHeight && xOutput< outputWidth)
+                            {
+                                for (unsigned int dInput = 0u; dInput < inputDepth; dInput++)
+                                {
+                                    const unsigned int inputIndex =
+                                        dataLayoutIndexed.GetIndex(inputShape, batch, dInput, yInput, xInput);
+                                    inputDecoder[inputIndex];
+
+                                    const unsigned int weightsIndex =
+                                        dataLayoutIndexed.GetIndex(weightsShape, batch, dOutput, yWeights, xWeights);
+                                    weightsDecoder[weightsIndex];
+
+                                    const unsigned int outputIndex =
+                                        dataLayoutIndexed.GetIndex(outputShape, batch, dOutput, yOutput, xOutput);
+                                    outputEncoder[outputIndex];
+
+                                    float output = outputEncoder.Get();
+                                    output += inputDecoder.Get() * weightsDecoder.Get();
+
+                                    outputEncoder.Set(output);
+                                }
+                            }
                         }
                     }
                 }
             }
         }
     }
-}
 
-void TransposeConvolution2dImpl(const TransposeConvolution2dDescriptor& descriptor,
-                                const TensorShape& inputShape,
-                                Decoder<float>& inputDecoder,
-                                const TensorShape& outputShape,
-                                Encoder<float>& outputEncoder,
-                                const TensorShape& weightsShape,
-                                Decoder<float>& weightsDecoder,
-                                Decoder<float>* biasesDecoder)
-{
-    if (descriptor.m_BiasEnabled && !biasesDecoder)
+    // Apply bias (if enabled)
+    if (descriptor.m_BiasEnabled)
     {
-        throw InvalidArgumentException("Biases enabled but no bias data provided");
-    }
-
-    const DataLayoutIndexed dataLayoutIndexed(descriptor.m_DataLayout);
-
-    const unsigned int cIndex = dataLayoutIndexed.GetChannelsIndex();
-    const unsigned int hIndex = dataLayoutIndexed.GetHeightIndex();
-    const unsigned int wIndex = dataLayoutIndexed.GetWidthIndex();
-
-    const unsigned int numBatches  = inputShape[0];
-    const unsigned int numChannels = inputShape[cIndex];
-
-    // set up temporary strided input
-    TensorData stridedInput = SetUpStridedInput(inputShape, inputDecoder, descriptor, dataLayoutIndexed);
-
-    // set up temporary (empty) padded output
-    TensorData paddedOutput = SetUpEmptyPaddedOutput(outputShape, descriptor, dataLayoutIndexed);
-
-    // run deconvolution (without biases) on strided input to produce padded output
-    Deconvolve(stridedInput, paddedOutput, weightsShape, weightsDecoder, dataLayoutIndexed);
+        outputEncoder[0];
+        Decoder<float>& rBiasesDecoder = *biasesDecoder;
 
-    const unsigned int wPaddedOutput = paddedOutput.shape[wIndex];
-    const unsigned int hPaddedOutput = paddedOutput.shape[hIndex];
-
-    // remove padding and apply bias (if enabled)
-    for (unsigned int batchIdx = 0u; batchIdx < numBatches; ++batchIdx)
-    {
-        for (unsigned int cOutput = 0u; cOutput < numChannels; ++cOutput)
+        for (unsigned int batch = 0u; batch < numBatches; ++batch)
         {
-            // update bias decoder iterator
-            if (descriptor.m_BiasEnabled)
-            {
-                (*biasesDecoder)[cOutput];
-            }
-
-            for (unsigned int yPaddedOutput = descriptor.m_PadTop;
-                 yPaddedOutput < (hPaddedOutput - descriptor.m_PadBottom);
-                 ++yPaddedOutput)
+            for (unsigned int dOutput = 0u; dOutput < outputDepth; ++dOutput)
             {
-                for (unsigned int xPaddedOutput = descriptor.m_PadLeft;
-                     xPaddedOutput < (wPaddedOutput - descriptor.m_PadRight);
-                     ++xPaddedOutput)
+                rBiasesDecoder[dOutput];
+                for (unsigned int yOutput = 0u; yOutput < outputHeight; ++yOutput)
                 {
-                    unsigned int xOutput = xPaddedOutput - descriptor.m_PadLeft;
-                    unsigned int yOutput = yPaddedOutput - descriptor.m_PadTop;
-
-                    unsigned int outputIdx =
-                        dataLayoutIndexed.GetIndex(outputShape, batchIdx, cOutput, yOutput, xOutput);
-                    unsigned int paddedOutputIdx =
-                        dataLayoutIndexed.GetIndex(paddedOutput.shape, batchIdx, cOutput, yPaddedOutput, xPaddedOutput);
-
-                    // encode (copy) output data
-                    outputEncoder[outputIdx];
-                    outputEncoder.Set(paddedOutput.data[paddedOutputIdx]);
-
-                    // apply bias (if enabled)
-                    if (descriptor.m_BiasEnabled)
+                    for (unsigned int xOutput = 0u; xOutput < outputWidth; ++xOutput)
                     {
-                        outputEncoder.Set(outputEncoder.Get() + biasesDecoder->Get());
+                        const unsigned int outputIndex =
+                            dataLayoutIndexed.GetIndex(outputShape, batch, dOutput, yOutput, xOutput);
+
+                        outputEncoder[outputIndex];
+                        outputEncoder.Set(outputEncoder.Get() + rBiasesDecoder.Get());
                     }
                 }
             }
@@ -245,4 +132,4 @@ void TransposeConvolution2dImpl(const TransposeConvolution2dDescriptor& descript
     }
 }
 
-} // namespace armnn
-\ No newline at end of file
+} // namespace armnn
author	Mike Kelly <mike.kelly@arm.com>	2019-08-13 10:06:25 +0100
committer	Áron Virginás-Tar <aron.virginas-tar@arm.com>	2019-08-13 16:33:55 +0000
commit	a24d9c7970028e4aa2f210d908b2c4299bf4979d (patch)
tree	fc8601abd3c39790925b08e66039b0d344d00155
parent	68db21f6d94237fee981a8b9e64fca76197bb07d (diff)
download	armnn-a24d9c7970028e4aa2f210d908b2c4299bf4979d.tar.gz