From 7b885b3cce70154596b1994b013ea91527117c26 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tam=C3=A1s=20Ny=C3=ADri?= <tamas.nyiri@arm.com>
Date: Tue, 26 Oct 2021 14:47:57 +0100
Subject: IVGCVSW-6509 Front End + Reference Workload implementation

Subtask of story: IVGCVSW-6164 Add a Pooling3d FrontEnd and Ref Implementation

* Add front end
* Add reference workload
* Add corresponding unit tests

Change-Id: Icce4146dd0a06a1da46a2def00a82d343e171750
Signed-off-by: Tamas Nyiri <tamas.nyiri@arm.com>
---
 src/backends/reference/workloads/CMakeLists.txt    |   4 +
 src/backends/reference/workloads/Pooling3d.cpp     | 328 +++++++++++++++++++++
 src/backends/reference/workloads/Pooling3d.hpp     |  21 ++
 .../reference/workloads/RefPooling3dWorkload.cpp   |  42 +++
 .../reference/workloads/RefPooling3dWorkload.hpp   |  26 ++
 src/backends/reference/workloads/RefWorkloads.hpp  |   2 +
 6 files changed, 423 insertions(+)
 create mode 100644 src/backends/reference/workloads/Pooling3d.cpp
 create mode 100644 src/backends/reference/workloads/Pooling3d.hpp
 create mode 100644 src/backends/reference/workloads/RefPooling3dWorkload.cpp
 create mode 100644 src/backends/reference/workloads/RefPooling3dWorkload.hpp

(limited to 'src/backends/reference/workloads')
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index f212522895..60d8255454 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -58,6 +58,8 @@ list(APPEND armnnRefBackendWorkloads_sources
     Pad.hpp
     Pooling2d.cpp
     Pooling2d.hpp
+    Pooling3d.cpp
+    Pooling3d.hpp
     PreluImpl.cpp
     PreluImpl.hpp
     Reduce.cpp
@@ -139,6 +141,8 @@ list(APPEND armnnRefBackendWorkloads_sources
     RefPermuteWorkload.hpp
     RefPooling2dWorkload.cpp
     RefPooling2dWorkload.hpp
+    RefPooling3dWorkload.cpp
+    RefPooling3dWorkload.hpp
     RefPreluWorkload.cpp
     RefPreluWorkload.hpp
     RefQuantizeWorkload.cpp
diff --git a/src/backends/reference/workloads/Pooling3d.cpp b/src/backends/reference/workloads/Pooling3d.cpp
new file mode 100644
index 0000000000..3cae2a94b9
--- /dev/null
+++ b/src/backends/reference/workloads/Pooling3d.cpp
@@ -0,0 +1,328 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "Pooling3d.hpp"
+
+#include <armnn/Exceptions.hpp>
+#include <armnn/Types.hpp>
+
+#include <armnnUtils/DataLayoutIndexed.hpp>
+#include <armnn/utility/NumericCast.hpp>
+
+#include <limits>
+#include <algorithm>
+#include <functional>
+
+namespace
+{
+    using PoolingAlgorithm = armnn::PoolingAlgorithm;
+
+    float DefaultInitializer(PoolingAlgorithm algorithm)
+    {
+        switch (algorithm)
+        {
+            case PoolingAlgorithm::Max:
+            {
+                return std::numeric_limits<float>::lowest();
+            }
+            case PoolingAlgorithm::Average:
+            case PoolingAlgorithm::L2:
+            {
+                return 0.0f;
+            }
+            default:
+            {
+                throw armnn::InvalidArgumentException("Unsupported pooling algorithm");
+            }
+        }
+    }
+
+    using Accumulator = std::function<void(float & accu, float value)>;
+
+    Accumulator GetAccumulator(PoolingAlgorithm algorithm)
+    {
+        switch (algorithm)
+        {
+            case PoolingAlgorithm::Max:
+            {
+                return [](float & accu, float value) {
+                    if (value > accu) {
+                        accu = value;
+                    }
+                };
+            }
+
+            case PoolingAlgorithm::Average:
+            {
+                return [](float & accu, float value) {
+                    accu += value;
+                };
+            }
+
+            case PoolingAlgorithm::L2:
+            {
+                return [](float & accu, float value) {
+                    accu += (value*value);
+                };
+            }
+
+            default:
+            {
+                throw armnn::InvalidArgumentException("Unsupported pooling algorithm");
+            }
+        }
+    }
+
+    using Executor = std::function<void(float & accumulated, float kernelSize)>;
+
+    Executor GetExecutor(PoolingAlgorithm algorithm)
+    {
+        switch (algorithm)
+        {
+            case PoolingAlgorithm::Max:
+            {
+                return [](float & /*accumulated*/, float /*kernelSize*/) {};
+            }
+
+            case PoolingAlgorithm::Average:
+            {
+                return [](float & accumulated, float kernelSize) {
+                    accumulated /= kernelSize;
+                };
+            }
+
+            case PoolingAlgorithm::L2:
+            {
+                return [](float & accumulated, float kernelSize) {
+                    accumulated = sqrtf(accumulated / kernelSize);
+                };
+            }
+
+            default:
+            {
+                throw armnn::InvalidArgumentException("Unsupported pooling algorithm");
+            }
+        }
+    }
+
+    bool OnPaddingOnly(int start, int end, int maxRange)
+    {
+        if (end <= 0 || start > maxRange)
+        {
+            return true;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+
+    bool ClampRange(int & start, int & end, int maxRange)
+    {
+        if (start < 0 || end > maxRange)
+        {
+            start = std::min(std::max(start, 0), maxRange);
+            end   = std::min(std::max(end, 0), maxRange);
+            return true;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    int CalculateIndex(int channels, int depth, int height, int width,
+                             int n, int c, int z, int y, int x,
+                            armnnUtils::DataLayoutIndexed dataLayout) {
+        switch (dataLayout.GetDataLayout())
+        {
+            case armnn::DataLayout::NDHWC:
+            {
+                int outputIndex = n * depth * height * width * channels +
+                            z * height * width * channels +
+                            y * width * channels +
+                            x * channels +
+                            c;
+                return outputIndex;
+            }
+            case armnn::DataLayout::NCDHW:
+            {
+                int outputIndex = n * channels * depth * height * width +
+                            c * depth * height * width +
+                            z * height * width +
+                            y * width +
+                            x;
+                return outputIndex;
+            }
+            default:
+            {
+                throw armnn::InvalidArgumentException("Unsupported data layout.");
+            }
+        }
+    }
+}
+
+using namespace armnnUtils;
+
+namespace armnn
+{
+void Pooling3d(Decoder<float>& rInputDecoder,
+               Encoder<float>& rOutputEncoder,
+               const TensorInfo& inputInfo,
+               const TensorInfo& outputInfo,
+               const Pooling3dDescriptor& params)
+{
+    const DataLayoutIndexed dataLayout(params.m_DataLayout);
+
+    auto channelsIndex = dataLayout.GetChannelsIndex();
+
+    auto depthIndex = dataLayout.GetDepthIndex();
+    auto heightIndex = dataLayout.GetHeightIndex();
+    auto widthIndex = dataLayout.GetWidthIndex();
+
+    const int batchSize    = armnn::numeric_cast<int>(outputInfo.GetShape()[0]);
+    const int channels     = armnn::numeric_cast<int>(outputInfo.GetShape()[channelsIndex]);
+
+    const int depthOutput  = armnn::numeric_cast<int>(outputInfo.GetShape()[depthIndex]);
+    const int heightOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[heightIndex]);
+    const int widthOutput  = armnn::numeric_cast<int>(outputInfo.GetShape()[widthIndex]);
+
+    const int depthInput   = armnn::numeric_cast<int>(inputInfo.GetShape()[depthIndex]);
+    const int heightInput  = armnn::numeric_cast<int>(inputInfo.GetShape()[heightIndex]);
+    const int widthInput   = armnn::numeric_cast<int>(inputInfo.GetShape()[widthIndex]);
+
+    const int padLeft      = armnn::numeric_cast<int>(params.m_PadLeft);
+    const int padRight     = armnn::numeric_cast<int>(params.m_PadRight);
+    const int padTop       = armnn::numeric_cast<int>(params.m_PadTop);
+    const int padBottom    = armnn::numeric_cast<int>(params.m_PadBottom);
+    const int padFront     = armnn::numeric_cast<int>(params.m_PadFront);
+    const int padBack      = armnn::numeric_cast<int>(params.m_PadBack);
+
+    const int strideX      = armnn::numeric_cast<int>(params.m_StrideX);
+    const int strideY      = armnn::numeric_cast<int>(params.m_StrideY);
+    const int strideZ      = armnn::numeric_cast<int>(params.m_StrideZ);
+
+    const int poolHeight   = armnn::numeric_cast<int>(params.m_PoolHeight);
+    const int poolWidth    = armnn::numeric_cast<int>(params.m_PoolWidth);
+    const int poolDepth    = armnn::numeric_cast<int>(params.m_PoolDepth);
+
+    float defaultInitializer = DefaultInitializer(params.m_PoolType);
+    Accumulator accumulate = GetAccumulator(params.m_PoolType);
+    Executor execute       = GetExecutor(params.m_PoolType);
+
+    // Check supported padding methods outside the loop to simplify
+    // the inner loop.
+    if (params.m_PaddingMethod != PaddingMethod::Exclude &&
+        params.m_PaddingMethod != PaddingMethod::IgnoreValue)
+    {
+        throw armnn::InvalidArgumentException("Unsupported padding type");
+    }
+
+    const std::vector<float> decodedInputVec = rInputDecoder.DecodeTensor(inputInfo.GetShape());
+
+    for (int n = 0; n < batchSize; n++)
+    {
+        for (int c = 0; c < channels; c++)
+        {
+            for (int zOutput = 0; zOutput < depthOutput; zOutput++)
+            {
+                //  Calculate values independent of the x and y axis
+                int dstart = (zOutput * strideZ) - padFront;
+                int dend = dstart + poolDepth;
+                // Clamp the pooling region inside the valid input area (which includes the padding).
+                // This is necessary because the final pooling in a row may overlap beyond the padding.
+                dend = std::min(dend, depthInput + padBack);
+
+                int depth = dend - dstart;
+                bool dclamped = ClampRange(dstart, dend, depthInput);
+                int depthClamped = dend - dstart;
+
+                for (int yOutput = 0; yOutput < heightOutput; yOutput++)
+                {
+                    int hstart = (yOutput * strideY) - padTop;
+                    int hend = hstart + poolHeight;
+                    // Clamp the pooling region inside the valid input area (which includes the padding).
+                    // This is necessary because the final pooling in a row may overlap beyond the padding.
+                    hend = std::min(hend, heightInput + padBottom);
+
+                    int height = hend - hstart;
+                    bool hclamped = ClampRange(hstart, hend, heightInput);
+                    int heightClamped = hend - hstart;
+
+                    for (int xOutput = 0; xOutput < widthOutput; xOutput++)
+                    {
+                        int wstart = (xOutput * strideX) - padLeft;
+                        int wend = wstart + poolWidth;
+                        // Clamp the pooling region inside the valid input area (which includes the padding).
+                        // This is necessary because the final pooling in a row may overlap beyond the padding.
+                        wend = std::min(wend, widthInput + padRight);
+
+                        int width = wend - wstart;
+                        bool wclamped = ClampRange(wstart, wend, widthInput);
+                        int widthClamped = wend - wstart;
+
+                        float result = defaultInitializer;
+                        float poolAreaSize = armnn::numeric_cast<float>(depth * height * width);
+
+                        // Special case: when the pooling kernel is over a padding region and the padding
+                        //               size is larger or equal to the kernel and the kernel only covers
+                        //               padding and no real values, then we initialize the result as zero
+                        //               by convention. This is because we need to choose a value here and
+                        //               all values we have are padding, which we ignore.
+                        if (OnPaddingOnly(dstart, dend, depthInput) ||
+                            OnPaddingOnly(hstart, hend, heightInput) ||
+                            OnPaddingOnly(wstart, wend, widthInput))
+                        {
+                            result = 0.0f;
+
+                            int outputIndex = CalculateIndex(channels, depthOutput, heightOutput, widthOutput,
+                                n, c, zOutput, yOutput, xOutput, dataLayout);
+
+                            rOutputEncoder[static_cast<unsigned int>(outputIndex)];
+                            rOutputEncoder.Set(result);
+
+                            continue;
+                        }
+
+                        bool clamped = (dclamped | hclamped | wclamped);
+
+                        if (clamped && params.m_PaddingMethod == PaddingMethod::Exclude)
+                        {
+                            // When we exclude the padding, it means we calculate with a smaller
+                            // kernel size, so I changed the divisor here.
+                            poolAreaSize = armnn::numeric_cast<float>(depthClamped * heightClamped * widthClamped);
+                        }
+
+                        for (auto zInput = dstart; zInput < dend; zInput++)
+                        {
+                            for (auto yInput = hstart; yInput < hend; yInput++)
+                            {
+                                for (auto xInput = wstart; xInput < wend; xInput++)
+                                {
+
+                                    int inputIndex = CalculateIndex(channels, depthInput, heightInput, widthInput,
+                                n, c, zInput, yInput, xInput, dataLayout);
+
+                                    accumulate(result, decodedInputVec[static_cast<unsigned int>(inputIndex)]);
+                                }
+                            }
+                        }
+
+                        execute(result, poolAreaSize);
+
+                        int outputIndex = CalculateIndex(channels, depthOutput, heightOutput, widthOutput,
+                            n, c, zOutput, yOutput, xOutput, dataLayout);
+
+                        rOutputEncoder[static_cast<unsigned int>(outputIndex)];
+                        rOutputEncoder.Set(result);
+                    }
+                }
+            }
+        }
+    }
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/Pooling3d.hpp b/src/backends/reference/workloads/Pooling3d.hpp
new file mode 100644
index 0000000000..dd3c919975
--- /dev/null
+++ b/src/backends/reference/workloads/Pooling3d.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Descriptors.hpp>
+#include <armnn/Tensor.hpp>
+
+#include "BaseIterator.hpp"
+
+namespace armnn
+{
+/// Computes the Pooling3d operation.
+void Pooling3d(Decoder<float>& rInputDecoder,
+               Encoder<float>& rOutputEncoder,
+               const TensorInfo& inputInfo,
+               const TensorInfo& outputInfo,
+               const Pooling3dDescriptor& params);
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefPooling3dWorkload.cpp b/src/backends/reference/workloads/RefPooling3dWorkload.cpp
new file mode 100644
index 0000000000..d1e00aa5f7
--- /dev/null
+++ b/src/backends/reference/workloads/RefPooling3dWorkload.cpp
@@ -0,0 +1,42 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefPooling3dWorkload.hpp"
+
+#include "Pooling3d.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+#include "BaseIterator.hpp"
+
+namespace armnn
+{
+void RefPooling3dWorkload::Execute() const
+{
+    Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefPooling3dWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+    Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefPooling3dWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPooling3dWorkload_Execute");
+
+    const TensorInfo& inputInfo  = GetTensorInfo(inputs[0]);
+    const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
+
+    auto inputDecoder  = MakeDecoder<float>(inputInfo,  inputs[0] ->Map());
+    auto outputEncoder = MakeEncoder<float>(outputInfo, outputs[0]->Map());
+
+    Pooling3d(*inputDecoder,
+              *outputEncoder,
+              inputInfo,
+              outputInfo,
+              m_Data.m_Parameters);
+}
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefPooling3dWorkload.hpp b/src/backends/reference/workloads/RefPooling3dWorkload.hpp
new file mode 100644
index 0000000000..1188af23ca
--- /dev/null
+++ b/src/backends/reference/workloads/RefPooling3dWorkload.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+#include <backendsCommon/WorkloadData.hpp>
+
+#include "Decoders.hpp"
+#include "Encoders.hpp"
+
+namespace armnn
+{
+class RefPooling3dWorkload : public BaseWorkload<Pooling3dQueueDescriptor>
+{
+public:
+    using BaseWorkload<Pooling3dQueueDescriptor>::BaseWorkload;
+
+    void Execute() const override;
+    void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor)  override;
+private:
+    void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
+};
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index 914137c23d..700a1d6184 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -14,6 +14,7 @@
 #include "FullyConnected.hpp"
 #include "Gather.hpp"
 #include "Pooling2d.hpp"
+#include "Pooling3d.hpp"
 #include "RefActivationWorkload.hpp"
 #include "RefArgMinMaxWorkload.hpp"
 #include "RefBatchNormalizationWorkload.hpp"
@@ -51,6 +52,7 @@
 #include "RefMeanWorkload.hpp"
 #include "RefNormalizationWorkload.hpp"
 #include "RefPooling2dWorkload.hpp"
+#include "RefPooling3dWorkload.hpp"
 #include "RefPermuteWorkload.hpp"
 #include "RefPadWorkload.hpp"
 #include "RefPreluWorkload.hpp"
-- 
cgit v1.2.1