plain/23.08/_conv3d_impl_8cpp_source.html

//

// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.

// SPDX-License-Identifier: MIT

//


#include "Conv3dImpl.hpp"


namespace armnn

{


void Convolve3d(const TensorShape& rInputShape,

                Decoder<float>& rInputDecoder,

                const TensorShape& rOutputShape,

                Encoder<float>& rOutputEncoder,

                const TensorShape& rFilterShape,

                Decoder<float>& rFilterDecoder,

                bool biasEnabled,

                Decoder<float>* pBiasDecoder,

                DataLayout dataLayout,

                unsigned int paddingTop,

                unsigned int paddingLeft,

                unsigned int paddingFront,

                unsigned int xStride,

                unsigned int yStride,

                unsigned int zStride,

                unsigned int xDilation,

                unsigned int yDilation,

                unsigned int zDilation)

{

    if (biasEnabled && !pBiasDecoder)

    {

        throw InvalidArgumentException("Bias is enabled but the bias data is invalid");

    }

    const armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);


    const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();

    const unsigned int heightIndex   = dataLayoutIndexed.GetHeightIndex();

    const unsigned int widthIndex    = dataLayoutIndexed.GetWidthIndex();

    const unsigned int depthIndex    = dataLayoutIndexed.GetDepthIndex();


    const unsigned int inChannels   = rInputShape[channelsIndex];

    const unsigned int outChannels  = rOutputShape[channelsIndex];


    const unsigned int batchSize    = rOutputShape[0];

    const unsigned int outputHeight = rOutputShape[heightIndex];

    const unsigned int outputWidth  = rOutputShape[widthIndex];

    const unsigned int outputDepth  = rOutputShape[depthIndex];

    const unsigned int inputHeight  = rInputShape[heightIndex];

    const unsigned int inputWidth   = rInputShape[widthIndex];

    const unsigned int inputDepth   = rInputShape[depthIndex];


    // Conv3d weights layout: [D,H,W,I,O]

    const unsigned int filterDepth  = rFilterShape[0];

    const unsigned int filterHeight = rFilterShape[1];

    const unsigned int filterWidth  = rFilterShape[2];


    const std::vector<float> inputVec = rInputDecoder.DecodeTensor(rInputShape);

    const std::vector<float> filterVec = rFilterDecoder.DecodeTensor(rFilterShape);


    const TensorShape biasShape{outChannels};

    const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();


    for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)

    {

        for (unsigned int zOutput = 0; zOutput < outputDepth; zOutput++)

        {

            for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)

            {

                for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)

                {

                    for (unsigned int cOutput = 0; cOutput < outChannels; cOutput++)

                    {

                        // This loop goes over each output element.

                        float sum = 0.0f;


                        // Loop over each input channel.

                        for (unsigned int zFilter = 0; zFilter < filterDepth; zFilter++)

                        {

                            for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)

                            {

                                for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)

                                {

                                    for (unsigned int cInput = 0; cInput < inChannels; cInput++)

                                    {

                                        // This loop goes over each input element for each output element.

                                        unsigned int filterIndex = 0;


                                        // Conv3d weights layout: [D,H,W,I,O]

                                        // Keep this implementation, as using DataLayoutIndexed::GetIndex

                                        // causes large performance regression.

                                        filterIndex = zFilter * filterHeight * filterWidth * inChannels * outChannels +

                                                      yFilter * filterWidth * inChannels * outChannels +

                                                      xFilter * inChannels * outChannels +

                                                      cInput * outChannels +

                                                      cOutput;


                                        unsigned int yInput = yOutput * yStride + yFilter * yDilation;

                                        unsigned int xInput = xOutput * xStride + xFilter * xDilation;

                                        unsigned int zInput = zOutput * zStride + zFilter * zDilation;


                                        float inputValue;


                                        // Check if we're in the padding.

                                        if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||

                                            xInput < paddingLeft || xInput >= inputWidth + paddingLeft ||

                                            zInput < paddingFront || zInput >= inputDepth + paddingFront)

                                        {

                                            inputValue = 0.0f;

                                        }

                                        else

                                        {

                                            unsigned int inputIndex = 0;


                                            // Keep this implementation, as using DataLayoutIndexed::GetIndex

                                            // causes large performance regression.

                                            if (dataLayoutIndexed.GetDataLayout() == DataLayout::NDHWC)

                                            {

                                                inputIndex =

                                                        batchIdx * inputDepth * inputHeight * inputWidth * inChannels +

                                                        (zInput-paddingFront) * inputHeight * inputWidth * inChannels +

                                                        (yInput-paddingTop) * inputWidth * inChannels +

                                                        (xInput-paddingLeft) * inChannels +

                                                        cInput;

                                            }

                                            else

                                            {

                                                // NCDHW DataLayout

                                                inputIndex =

                                                        batchIdx * inputDepth * inputHeight * inputWidth * inChannels +

                                                        inputDepth * inputHeight * inputWidth * cInput +

                                                        (zInput-paddingFront) * inputHeight * inputWidth +

                                                        (yInput-paddingTop) * inputWidth +

                                                        xInput-paddingLeft;

                                            }


                                            inputValue = inputVec[inputIndex];

                                        }


                                        sum += filterVec[filterIndex] * inputValue;

                                    }

                                }

                            }

                        }


                        if (biasEnabled)

                        {

                            sum += biasVec[cOutput];

                        }


                        unsigned int outIdx;

                        if (dataLayoutIndexed.GetDataLayout() == DataLayout::NDHWC)

                        {

                            outIdx = batchIdx * outputDepth * outputHeight * outputWidth * outChannels +

                                     zOutput * outputHeight * outputWidth * outChannels +

                                     yOutput * outputWidth * outChannels +

                                     xOutput * outChannels +

                                     cOutput;

                        }

                        else

                        {

                            // NCDHW DataLayout

                            outIdx = batchIdx * outputDepth * outputHeight * outputWidth * outChannels +

                                     cOutput * outputDepth * outputHeight * outputWidth +

                                     zOutput * outputHeight * outputWidth +

                                     yOutput * outputWidth +

                                     xOutput;

                        }


                        rOutputEncoder[outIdx];

                        rOutputEncoder.Set(sum);

                    }

                }

            }

        }

    }

}


} // namespace armnn