From a24d9c7970028e4aa2f210d908b2c4299bf4979d Mon Sep 17 00:00:00 2001 From: Mike Kelly Date: Tue, 13 Aug 2019 10:06:25 +0100 Subject: IVGCVSW-3615 Update Transpose Convolution algorithm * Now supports channel multiplier != 1 in Ref TransposeConvolution2d Signed-off-by: Mike Kelly Change-Id: Ice95444025311b8b952c8f7f5fa0f1ecf483df41 --- .../reference/workloads/TransposeConvolution2d.cpp | 281 ++++++--------------- 1 file changed, 84 insertions(+), 197 deletions(-) diff --git a/src/backends/reference/workloads/TransposeConvolution2d.cpp b/src/backends/reference/workloads/TransposeConvolution2d.cpp index db15cefe10..b633d0db8a 100644 --- a/src/backends/reference/workloads/TransposeConvolution2d.cpp +++ b/src/backends/reference/workloads/TransposeConvolution2d.cpp @@ -12,232 +12,119 @@ namespace armnn using namespace armnnUtils; -struct TensorData -{ - TensorShape shape; - std::vector data; -}; - -TensorData SetUpStridedInput(const TensorShape& inputShape, - Decoder& inputDecoder, - const TransposeConvolution2dDescriptor& descriptor, - const DataLayoutIndexed& dataLayoutIndexed) +void TransposeConvolution2dImpl(const TransposeConvolution2dDescriptor& descriptor, + const TensorShape& inputShape, + Decoder& inputDecoder, + const TensorShape& outputShape, + Encoder& outputEncoder, + const TensorShape& weightsShape, + Decoder& weightsDecoder, + Decoder* biasesDecoder) { - const unsigned int cIndex = dataLayoutIndexed.GetChannelsIndex(); - const unsigned int hIndex = dataLayoutIndexed.GetHeightIndex(); - const unsigned int wIndex = dataLayoutIndexed.GetWidthIndex(); - - const unsigned int batches = inputShape[0]; - const unsigned int channels = inputShape[cIndex]; - - const unsigned int wInput = inputShape[wIndex]; - const unsigned int hInput = inputShape[hIndex]; - - const unsigned int wStridedInput = 1u + descriptor.m_StrideX * (wInput - 1); - const unsigned int hStridedInput = 1u + descriptor.m_StrideY * (hInput - 1); - - TensorData stridedInput; - stridedInput.data = std::vector(batches * channels * wStridedInput * hStridedInput, 0.0f); - stridedInput.shape = TensorShape(4); - - stridedInput.shape[0] = batches; - stridedInput.shape[cIndex] = channels; - stridedInput.shape[hIndex] = hStridedInput; - stridedInput.shape[wIndex] = wStridedInput; - - // expand input data with strides - for (unsigned int batchIdx = 0u; batchIdx < batches; ++batchIdx) + if (descriptor.m_BiasEnabled && !biasesDecoder) { - for (unsigned int cInput = 0u; cInput < channels; ++cInput) - { - for (unsigned int yInput = 0u, yStrided = 0u; - yInput < hInput && yStrided < hStridedInput; - ++yInput, yStrided += descriptor.m_StrideY) - { - for (unsigned int xInput = 0u, xStrided = 0u; - xInput < wInput && xStrided < wStridedInput; - ++xInput, xStrided += descriptor.m_StrideX) - { - unsigned int inputIdx = - dataLayoutIndexed.GetIndex(inputShape, batchIdx, cInput, yInput, xInput); - unsigned int stridedInputIdx = - dataLayoutIndexed.GetIndex(stridedInput.shape, batchIdx, cInput, yStrided, xStrided); - - inputDecoder[inputIdx]; - stridedInput.data[stridedInputIdx] = inputDecoder.Get(); - } - } - } + throw InvalidArgumentException("Biases enabled but no bias data provided"); } + const DataLayoutIndexed dataLayoutIndexed(descriptor.m_DataLayout); + const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex(); + const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex(); + const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex(); - return stridedInput; -} - -TensorData SetUpEmptyPaddedOutput(const TensorShape& outputShape, - const TransposeConvolution2dDescriptor& descriptor, - const DataLayoutIndexed& dataLayoutIndexed) -{ - const unsigned int cIndex = dataLayoutIndexed.GetChannelsIndex(); - const unsigned int hIndex = dataLayoutIndexed.GetHeightIndex(); - const unsigned int wIndex = dataLayoutIndexed.GetWidthIndex(); - - const unsigned int batches = outputShape[0]; - const unsigned int channels = outputShape[cIndex]; + unsigned int numBatches = inputShape[0]; - const unsigned int wOutput = outputShape[wIndex]; - const unsigned int hOutput = outputShape[hIndex]; + unsigned int inputWidth = inputShape[widthIndex]; + unsigned int inputHeight = inputShape[heightIndex]; + unsigned int inputDepth = inputShape[channelsIndex]; - const unsigned int wPaddedOutput = wOutput + descriptor.m_PadLeft + descriptor.m_PadRight; - const unsigned int hPaddedOutput = hOutput + descriptor.m_PadTop + descriptor.m_PadBottom; + unsigned int weightsHeight = weightsShape[heightIndex]; + unsigned int weightsWidth = weightsShape[widthIndex]; - TensorData paddedOutput; - paddedOutput.data = std::vector(batches * channels * wPaddedOutput * hPaddedOutput, 0.0f); - paddedOutput.shape = TensorShape(4); + unsigned int outputHeight = outputShape[heightIndex]; + unsigned int outputWidth = outputShape[widthIndex]; + unsigned int outputDepth = outputShape[channelsIndex]; - paddedOutput.shape[0] = batches; - paddedOutput.shape[cIndex] = channels; - paddedOutput.shape[hIndex] = hPaddedOutput; - paddedOutput.shape[wIndex] = wPaddedOutput; + unsigned int paddingLeft = descriptor.m_PadLeft; + unsigned int paddingTop = descriptor.m_PadTop; - return paddedOutput; -} - -void Deconvolve(const TensorData& stridedInput, - TensorData& paddedOutput, - const TensorShape& weightsShape, - Decoder& weightsDecoder, - const DataLayoutIndexed& dataLayoutIndexed) -{ - const unsigned int cIndex = dataLayoutIndexed.GetChannelsIndex(); - const unsigned int hIndex = dataLayoutIndexed.GetHeightIndex(); - const unsigned int wIndex = dataLayoutIndexed.GetWidthIndex(); + unsigned int strideX = descriptor.m_StrideX; + unsigned int strideY = descriptor.m_StrideY; - const unsigned int batches = stridedInput.shape[0]; - const unsigned int channels = stridedInput.shape[cIndex]; - - const unsigned int wKernel = weightsShape[wIndex]; - const unsigned int hKernel = weightsShape[hIndex]; - - const unsigned int wStridedInput = stridedInput.shape[wIndex]; - const unsigned int hStridedInput = stridedInput.shape[hIndex]; + // Set the initial output values to be logically 0 otherwise the algorithm doesn't work. + for (unsigned int i = 0u; i < outputShape.GetNumElements(); ++i) + { + outputEncoder.Set(0.f); + ++outputEncoder; + } - // loop through all input elements - for (unsigned int batchIdx = 0u; batchIdx < batches; ++batchIdx) + for (unsigned int batch = 0u; batch < numBatches; ++batch) { - for (unsigned int cInput = 0u; cInput < channels; ++cInput) + for (unsigned int yInput = 0u; yInput < inputHeight; ++yInput) { - for (unsigned int yInput = 0u; yInput < hStridedInput; ++yInput) + for (unsigned int xInput = 0u; xInput < inputWidth; ++xInput) { - for (unsigned int xInput = 0u; xInput < wStridedInput; ++xInput) - { - // obtain input value - unsigned int inputIdx = - dataLayoutIndexed.GetIndex(stridedInput.shape, batchIdx, cInput, yInput, xInput); - float inputValue = stridedInput.data[inputIdx]; + unsigned int xOutputOrigin = xInput * strideX - paddingLeft; + unsigned int yOutputOrigin = yInput * strideY - paddingTop; - // loop through kernel - for (unsigned int yKernel = 0u; yKernel < hKernel; ++yKernel) + unsigned int weightsBaseIndex = 0u; + for (unsigned int dOutput = 0u; dOutput < outputDepth; ++dOutput) + { + for (unsigned int yWeights = 0u; yWeights < weightsHeight; ++yWeights) { - for (unsigned int xKernel = 0; xKernel < wKernel; ++xKernel) + for (unsigned int xWeights = 0u; xWeights < weightsWidth; + ++xWeights, weightsBaseIndex += inputDepth) { - unsigned int kernelIdx = - dataLayoutIndexed.GetIndex(weightsShape, batchIdx, cInput, yKernel, xKernel); - - weightsDecoder[kernelIdx]; - float kernelValue = weightsDecoder.Get(); - - unsigned int xOutput = xInput + xKernel; - unsigned int yOutput = yInput + yKernel; - - // compute output increment - float outputValue = inputValue * kernelValue; - - unsigned int outputIdx = dataLayoutIndexed.GetIndex(paddedOutput.shape, - batchIdx, - cInput, - yOutput, - xOutput); - - // set output value - paddedOutput.data[outputIdx] += outputValue; + unsigned int yOutput = yOutputOrigin + yWeights; + unsigned int xOutput = xOutputOrigin + xWeights; + + if (yOutput < outputHeight && xOutput< outputWidth) + { + for (unsigned int dInput = 0u; dInput < inputDepth; dInput++) + { + const unsigned int inputIndex = + dataLayoutIndexed.GetIndex(inputShape, batch, dInput, yInput, xInput); + inputDecoder[inputIndex]; + + const unsigned int weightsIndex = + dataLayoutIndexed.GetIndex(weightsShape, batch, dOutput, yWeights, xWeights); + weightsDecoder[weightsIndex]; + + const unsigned int outputIndex = + dataLayoutIndexed.GetIndex(outputShape, batch, dOutput, yOutput, xOutput); + outputEncoder[outputIndex]; + + float output = outputEncoder.Get(); + output += inputDecoder.Get() * weightsDecoder.Get(); + + outputEncoder.Set(output); + } + } } } } } } } -} -void TransposeConvolution2dImpl(const TransposeConvolution2dDescriptor& descriptor, - const TensorShape& inputShape, - Decoder& inputDecoder, - const TensorShape& outputShape, - Encoder& outputEncoder, - const TensorShape& weightsShape, - Decoder& weightsDecoder, - Decoder* biasesDecoder) -{ - if (descriptor.m_BiasEnabled && !biasesDecoder) + // Apply bias (if enabled) + if (descriptor.m_BiasEnabled) { - throw InvalidArgumentException("Biases enabled but no bias data provided"); - } - - const DataLayoutIndexed dataLayoutIndexed(descriptor.m_DataLayout); - - const unsigned int cIndex = dataLayoutIndexed.GetChannelsIndex(); - const unsigned int hIndex = dataLayoutIndexed.GetHeightIndex(); - const unsigned int wIndex = dataLayoutIndexed.GetWidthIndex(); - - const unsigned int numBatches = inputShape[0]; - const unsigned int numChannels = inputShape[cIndex]; - - // set up temporary strided input - TensorData stridedInput = SetUpStridedInput(inputShape, inputDecoder, descriptor, dataLayoutIndexed); - - // set up temporary (empty) padded output - TensorData paddedOutput = SetUpEmptyPaddedOutput(outputShape, descriptor, dataLayoutIndexed); - - // run deconvolution (without biases) on strided input to produce padded output - Deconvolve(stridedInput, paddedOutput, weightsShape, weightsDecoder, dataLayoutIndexed); + outputEncoder[0]; + Decoder& rBiasesDecoder = *biasesDecoder; - const unsigned int wPaddedOutput = paddedOutput.shape[wIndex]; - const unsigned int hPaddedOutput = paddedOutput.shape[hIndex]; - - // remove padding and apply bias (if enabled) - for (unsigned int batchIdx = 0u; batchIdx < numBatches; ++batchIdx) - { - for (unsigned int cOutput = 0u; cOutput < numChannels; ++cOutput) + for (unsigned int batch = 0u; batch < numBatches; ++batch) { - // update bias decoder iterator - if (descriptor.m_BiasEnabled) - { - (*biasesDecoder)[cOutput]; - } - - for (unsigned int yPaddedOutput = descriptor.m_PadTop; - yPaddedOutput < (hPaddedOutput - descriptor.m_PadBottom); - ++yPaddedOutput) + for (unsigned int dOutput = 0u; dOutput < outputDepth; ++dOutput) { - for (unsigned int xPaddedOutput = descriptor.m_PadLeft; - xPaddedOutput < (wPaddedOutput - descriptor.m_PadRight); - ++xPaddedOutput) + rBiasesDecoder[dOutput]; + for (unsigned int yOutput = 0u; yOutput < outputHeight; ++yOutput) { - unsigned int xOutput = xPaddedOutput - descriptor.m_PadLeft; - unsigned int yOutput = yPaddedOutput - descriptor.m_PadTop; - - unsigned int outputIdx = - dataLayoutIndexed.GetIndex(outputShape, batchIdx, cOutput, yOutput, xOutput); - unsigned int paddedOutputIdx = - dataLayoutIndexed.GetIndex(paddedOutput.shape, batchIdx, cOutput, yPaddedOutput, xPaddedOutput); - - // encode (copy) output data - outputEncoder[outputIdx]; - outputEncoder.Set(paddedOutput.data[paddedOutputIdx]); - - // apply bias (if enabled) - if (descriptor.m_BiasEnabled) + for (unsigned int xOutput = 0u; xOutput < outputWidth; ++xOutput) { - outputEncoder.Set(outputEncoder.Get() + biasesDecoder->Get()); + const unsigned int outputIndex = + dataLayoutIndexed.GetIndex(outputShape, batch, dOutput, yOutput, xOutput); + + outputEncoder[outputIndex]; + outputEncoder.Set(outputEncoder.Get() + rBiasesDecoder.Get()); } } } @@ -245,4 +132,4 @@ void TransposeConvolution2dImpl(const TransposeConvolution2dDescriptor& descript } } -} // namespace armnn \ No newline at end of file +} // namespace armnn -- cgit v1.2.1