From 8eb675eb77865b5d2491f5b2d650ce993cab738c Mon Sep 17 00:00:00 2001
From: Matteo Martincigh <matteo.martincigh@arm.com>
Date: Wed, 17 Oct 2018 14:43:29 +0100
Subject: IVGCVSW-2038 + IVGCVSW-2039 + IVGCVSW-2040 Add NHWC support to the
 Float32 and UInt8 BatchNormalization workloads

 * Enabled NHWC support in RefBatchNormalizationFloat32Workload
 * Added NHWC unit tests for both FP32 and U8
 * Refactored the existing unit tests

Change-Id: I6aa18f1dcc0666b80a17a7ed229cf53607bae147
---
 include/armnn/Descriptors.hpp                      |   2 +
 src/backends/reference/test/RefLayerTests.cpp      |   2 +
 src/backends/reference/workloads/BatchNormImpl.hpp |  36 +++--
 .../RefBatchNormalizationFloat32Workload.hpp       |   2 +-
 src/backends/test/BatchNormTestImpl.hpp            |  97 ++++++-------
 src/backends/test/LayerTests.cpp                   | 152 ++++++++++++++++++++-
 src/backends/test/LayerTests.hpp                   |   4 +-
 7 files changed, 218 insertions(+), 77 deletions(-)
diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp
index c0510055f2..a5b1d64732 100644
--- a/include/armnn/Descriptors.hpp
+++ b/include/armnn/Descriptors.hpp
@@ -289,9 +289,11 @@ struct BatchNormalizationDescriptor
 {
     BatchNormalizationDescriptor()
     : m_Eps(0.0001f)
+    , m_DataLayout(DataLayout::NCHW)
     {}
 
     float m_Eps;
+    DataLayout m_DataLayout;
 };
 
 struct FakeQuantizationDescriptor
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index 2815e342c0..6cfa4a3926 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -176,7 +176,9 @@ ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVectorUint8, MultiplicationBroadca
 
 // Batch Norm
 ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest)
+ARMNN_AUTO_TEST_CASE(BatchNormNhwc, BatchNormNhwcTest)
 ARMNN_AUTO_TEST_CASE(BatchNormUint8, BatchNormUint8Test)
+ARMNN_AUTO_TEST_CASE(BatchNormUint8Nhwc, BatchNormUint8NhwcTest)
 
 // Resize Bilinear - NCHW
 ARMNN_AUTO_TEST_CASE(SimpleResizeBilinear, SimpleResizeBilinearTest)
diff --git a/src/backends/reference/workloads/BatchNormImpl.hpp b/src/backends/reference/workloads/BatchNormImpl.hpp
index a7579c8373..fbcb2fdf5a 100644
--- a/src/backends/reference/workloads/BatchNormImpl.hpp
+++ b/src/backends/reference/workloads/BatchNormImpl.hpp
@@ -6,6 +6,7 @@
 #pragma once
 
 #include "RefWorkloadUtils.hpp"
+#include "TensorBufferArrayView.hpp"
 
 #include <armnn/Tensor.hpp>
 
@@ -15,16 +16,27 @@ namespace armnn
 {
 
 template<typename NormData>
-static void BatchNormImpl(NormData data,
+static void BatchNormImpl(NormData     data,
                           const float* varIn,
                           const float* meanIn,
                           const float* gammaIn,
                           const float* betaIn,
-                          float * outputData,
-                          const float * inputData)
+                          float*       outputData,
+                          const float* inputData)
 {
-    const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]);
-    for (unsigned int c = 0; c < inputInfo0.GetShape()[1]; c++)
+    const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]);
+    const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]);
+
+    TensorBufferArrayView<const float> input(inputInfo.GetShape(),
+                                             inputData,
+                                             data.m_Parameters.m_DataLayout);
+    TensorBufferArrayView<float> output(outputInfo.GetShape(),
+                                        outputData,
+                                        data.m_Parameters.m_DataLayout);
+
+    DataLayoutIndexed dataLayout(data.m_Parameters.m_DataLayout);
+
+    for (unsigned int c = 0; c < inputInfo.GetShape()[dataLayout.GetChannelsIndex()]; c++)
     {
         float var   = varIn[c];
         float mean  = meanIn[c];
@@ -34,19 +46,13 @@ static void BatchNormImpl(NormData data,
         float mult = gamma / sqrtf(var + data.m_Parameters.m_Eps);
         float add  = beta - mult * mean;
 
-        for (unsigned int n = 0; n < inputInfo0.GetShape()[0]; n++)
+        for (unsigned int n = 0; n < inputInfo.GetShape()[0]; n++)
         {
-            for (unsigned int j = 0; j < inputInfo0.GetShape()[2]; j++)
+            for (unsigned int h = 0; h < inputInfo.GetShape()[dataLayout.GetHeightIndex()]; h++)
             {
-                for (unsigned int i = 0; i < inputInfo0.GetShape()[3]; i++)
+                for (unsigned int w = 0; w < inputInfo.GetShape()[dataLayout.GetWidthIndex()]; w++)
                 {
-                    unsigned int index = i +
-                                         j*inputInfo0.GetShape()[3] +
-                                         c*inputInfo0.GetShape()[3] * inputInfo0.GetShape()[2] +
-                                         n*inputInfo0.GetShape()[3] * inputInfo0.GetShape()[2]
-                                                                     * inputInfo0.GetShape()[1];
-
-                    outputData[index] = mult * inputData[index] + add;
+                    output.Get(n, c, h, w) = mult * input.Get(n, c, h, w) + add;
                 }
             }
         }
diff --git a/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.hpp b/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.hpp
index 17f80ca5e0..b51d94f979 100644
--- a/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.hpp
+++ b/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.hpp
@@ -15,7 +15,7 @@ class RefBatchNormalizationFloat32Workload : public Float32Workload<BatchNormali
 {
 public:
     explicit RefBatchNormalizationFloat32Workload(const BatchNormalizationQueueDescriptor& descriptor,
-                                          const WorkloadInfo& info);
+                                                  const WorkloadInfo& info);
     virtual void Execute() const override;
 
 private:
diff --git a/src/backends/test/BatchNormTestImpl.hpp b/src/backends/test/BatchNormTestImpl.hpp
index ab5413d277..4941b00a49 100644
--- a/src/backends/test/BatchNormTestImpl.hpp
+++ b/src/backends/test/BatchNormTestImpl.hpp
@@ -14,23 +14,25 @@
 
 #include <backends/test/QuantizeHelper.hpp>
 
-
 template<typename T>
-LayerTestResult<T,4> BatchNormTestImpl(armnn::IWorkloadFactory& workloadFactory,
-                                       float qScale,
-                                       int32_t qOffset)
+LayerTestResult<T, 4> BatchNormTestImpl(armnn::IWorkloadFactory& workloadFactory,
+                                        const armnn::TensorShape& inputOutputTensorShape,
+                                        const std::vector<float>& inputValues,
+                                        const std::vector<float>& expectedOutputValues,
+                                        float qScale,
+                                        int32_t qOffset,
+                                        armnn::DataLayout dataLayout)
 {
-    const unsigned int width    = 2;
-    const unsigned int height   = 3;
-    const unsigned int channels = 2;
-    const unsigned int num      = 1;
+    armnn::TensorInfo inputTensorInfo(inputOutputTensorShape, armnn::GetDataType<T>());
+    armnn::TensorInfo outputTensorInfo(inputOutputTensorShape, armnn::GetDataType<T>());
+
+    armnn::DataLayoutIndexed dataLayoutIndexed(dataLayout);
 
-    armnn::TensorInfo inputTensorInfo({num, channels, height, width}, armnn::GetDataType<T>());
-    armnn::TensorInfo outputTensorInfo({num, channels, height, width}, armnn::GetDataType<T>());
-    armnn::TensorInfo tensorInfo({channels}, armnn::GetDataType<T>());
+    armnn::TensorInfo tensorInfo({ inputOutputTensorShape[dataLayoutIndexed.GetChannelsIndex()] },
+                                 armnn::GetDataType<T>());
 
     // Set quantization parameters if the requested type is a quantized type.
-    if(armnn::IsQuantizedType<T>())
+    if (armnn::IsQuantizedType<T>())
     {
         inputTensorInfo.SetQuantizationScale(qScale);
         inputTensorInfo.SetQuantizationOffset(qOffset);
@@ -40,73 +42,56 @@ LayerTestResult<T,4> BatchNormTestImpl(armnn::IWorkloadFactory& workloadFactory,
         tensorInfo.SetQuantizationOffset(qOffset);
     }
 
-    auto input = MakeTensor<T, 4>(inputTensorInfo,
-        QuantizedVector<T>(qScale, qOffset,
-        {
-            1.f, 4.f,
-            4.f, 2.f,
-            1.f, 6.f,
-
-            1.f, 1.f,
-            4.f, 1.f,
-            -2.f, 4.f
-        }));
+    auto inputTensor = MakeTensor<T, 4>(inputTensorInfo,
+                                        QuantizedVector<T>(qScale, qOffset, inputValues));
+
     // These values are per-channel of the input.
     auto mean     = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {3, -2}));
-    auto variance = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {4, 9}));
-    auto beta     = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {3, 2}));
-    auto gamma    = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {2, 1}));
-    LayerTestResult<T,4> ret(outputTensorInfo);
+    auto variance = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {4,  9}));
+    auto beta     = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {3,  2}));
+    auto gamma    = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {2,  1}));
+
+    LayerTestResult<T, 4> result(outputTensorInfo);
+
+    result.outputExpected = MakeTensor<T, 4>(inputTensorInfo,
+                                             QuantizedVector<T>(qScale, qOffset, expectedOutputValues));
 
     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
 
-    armnn::BatchNormalizationQueueDescriptor data;
-    armnn::WorkloadInfo info;
     armnn::ScopedCpuTensorHandle meanTensor(tensorInfo);
     armnn::ScopedCpuTensorHandle varianceTensor(tensorInfo);
     armnn::ScopedCpuTensorHandle betaTensor(tensorInfo);
     armnn::ScopedCpuTensorHandle gammaTensor(tensorInfo);
 
+    armnn::BatchNormalizationQueueDescriptor descriptor;
+    descriptor.m_Mean                    = &meanTensor;
+    descriptor.m_Variance                = &varianceTensor;
+    descriptor.m_Beta                    = &betaTensor;
+    descriptor.m_Gamma                   = &gammaTensor;
+    descriptor.m_Parameters.m_Eps        = 0.0f;
+    descriptor.m_Parameters.m_DataLayout = dataLayout;
+    armnn::WorkloadInfo info;
+
     AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]);
     AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]);
     AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]);
     AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]);
 
-    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-    data.m_Mean             = &meanTensor;
-    data.m_Variance         = &varianceTensor;
-    data.m_Beta             = &betaTensor;
-    data.m_Gamma            = &gammaTensor;
-    data.m_Parameters.m_Eps = 0.0f;
-
-    // For each channel:
-    // substract mean, divide by standard deviation (with an epsilon to avoid div by 0),
-    // multiply by gamma and add beta
-    ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo,
-        QuantizedVector<T>(qScale, qOffset,
-        {
-            1.f, 4.f,
-            4.f, 2.f,
-            1.f, 6.f,
-
-            3.f, 3.f,
-            4.f, 3.f,
-            2.f, 4.f
-        }));
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateBatchNormalization(data, info);
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateBatchNormalization(descriptor, info);
 
     inputHandle->Allocate();
     outputHandle->Allocate();
 
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+    CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0][0][0]);
 
     workloadFactory.Finalize();
     workload->Execute();
 
-    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
+    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
 
-    return ret;
+    return result;
 }
diff --git a/src/backends/test/LayerTests.cpp b/src/backends/test/LayerTests.cpp
index c28a1d46ad..1faacacb5c 100755
--- a/src/backends/test/LayerTests.cpp
+++ b/src/backends/test/LayerTests.cpp
@@ -5338,14 +5338,158 @@ LayerTestResult<uint8_t, 4> ResizeBilinearMagUint8Test(armnn::IWorkloadFactory&
 
 LayerTestResult<float, 4> BatchNormTest(armnn::IWorkloadFactory& workloadFactory)
 {
-    auto ret = BatchNormTestImpl<float>(workloadFactory, 0.f, 0);
-    return ret;
+    // BatchSize: 1
+    // Channels: 2
+    // Height: 3
+    // Width: 2
+
+    const armnn::TensorShape inputOutputShape{ 1, 2, 3, 2 };
+    std::vector<float> inputValues
+    {
+        // Batch 0, Channel 0, Height (3) x Width (2)
+         1.f, 4.f,
+         4.f, 2.f,
+         1.f, 6.f,
+
+        // Batch 0, Channel 1, Height (3) x Width (2)
+         1.f, 1.f,
+         4.f, 1.f,
+        -2.f, 4.f
+    };
+    std::vector<float> expectedOutputValues
+    {
+        // Batch 0, Channel 0, Height (3) x Width (2)
+        1.f, 4.f,
+        4.f, 2.f,
+        1.f, 6.f,
+
+        // Batch 0, Channel 1, Height (3) x Width (2)
+        3.f, 3.f,
+        4.f, 3.f,
+        2.f, 4.f
+    };
+
+    return BatchNormTestImpl<float>(workloadFactory, inputOutputShape, inputValues, expectedOutputValues,
+                                    0.f, 0, armnn::DataLayout::NCHW);
+}
+
+LayerTestResult<float, 4> BatchNormNhwcTest(armnn::IWorkloadFactory& workloadFactory)
+{
+    // BatchSize: 1
+    // Height: 3
+    // Width: 2
+    // Channels: 2
+
+    const armnn::TensorShape inputOutputShape{ 1, 3, 2, 2 };
+    std::vector<float> inputValues
+    {
+        // Batch 0, Height 0, Width (2) x Channel (2)
+        1.f,  1.f,
+        4.f,  1.f,
+
+        // Batch 0, Height 1, Width (2) x Channel (2)
+        4.f,  4.f,
+        2.f,  1.f,
+
+        // Batch 0, Height 2, Width (2) x Channel (2)
+        1.f, -2.f,
+        6.f,  4.f
+    };
+    std::vector<float> expectedOutputValues
+    {
+        // Batch 0, Height 0, Width (2) x Channel (2)
+        1.f, 3.f,
+        4.f, 3.f,
+
+        // Batch 0, Height 1, Width (2) x Channel (2)
+        4.f, 4.f,
+        2.f, 3.f,
+
+        // Batch 0, Height 2, Width (2) x Channel (2)
+        1.f, 2.f,
+        6.f, 4.f
+    };
+
+    return BatchNormTestImpl<float>(workloadFactory, inputOutputShape, inputValues, expectedOutputValues,
+                                    0.f, 0, armnn::DataLayout::NHWC);
 }
 
 LayerTestResult<uint8_t, 4> BatchNormUint8Test(armnn::IWorkloadFactory& workloadFactory)
 {
-    auto ret = BatchNormTestImpl<uint8_t>(workloadFactory, 1.f/20.f, 50);
-    return ret;
+    // BatchSize: 1
+    // Channels: 2
+    // Height: 3
+    // Width: 2
+
+    const armnn::TensorShape inputOutputShape{ 1, 2, 3, 2 };
+    std::vector<float> inputValues
+    {
+        // Batch 0, Channel 0, Height (3) x Width (2)
+         1.f, 4.f,
+         4.f, 2.f,
+         1.f, 6.f,
+
+        // Batch 0, Channel 1, Height (3) x Width (2)
+         1.f, 1.f,
+         4.f, 1.f,
+        -2.f, 4.f
+    };
+    std::vector<float> expectedOutputValues
+    {
+        // Batch 0, Channel 0, Height (3) x Width (2)
+        1.f, 4.f,
+        4.f, 2.f,
+        1.f, 6.f,
+
+        // Batch 0, Channel 1, Height (3) x Width (2)
+        3.f, 3.f,
+        4.f, 3.f,
+        2.f, 4.f
+    };
+
+    return BatchNormTestImpl<uint8_t>(workloadFactory, inputOutputShape, inputValues, expectedOutputValues,
+                                      1.f/20.f, 50, armnn::DataLayout::NCHW);
+}
+
+LayerTestResult<uint8_t, 4> BatchNormUint8NhwcTest(armnn::IWorkloadFactory& workloadFactory)
+{
+    // BatchSize: 1
+    // Height: 3
+    // Width: 2
+    // Channels: 2
+
+    const armnn::TensorShape inputOutputShape{ 1, 3, 2, 2 };
+    std::vector<float> inputValues
+    {
+        // Batch 0, Height 0, Width (2) x Channel (2)
+        1.f,  1.f,
+        4.f,  1.f,
+
+        // Batch 0, Height 1, Width (2) x Channel (2)
+        4.f,  4.f,
+        2.f,  1.f,
+
+        // Batch 0, Height 2, Width (2) x Channel (2)
+        1.f, -2.f,
+        6.f,  4.f
+    };
+    std::vector<float> expectedOutputValues
+    {
+        // Batch 0, Height 0, Width (2) x Channel (2)
+        1.f, 3.f,
+        4.f, 3.f,
+
+        // Batch 0, Height 1, Width (2) x Channel (2)
+        4.f, 4.f,
+        2.f, 3.f,
+
+        // Batch 0, Height 2, Width (2) x Channel (2)
+        1.f, 2.f,
+        6.f, 4.f
+    };
+
+    return BatchNormTestImpl<uint8_t>(workloadFactory, inputOutputShape, inputValues, expectedOutputValues,
+                                      1.f/20.f, 50, armnn::DataLayout::NHWC);
 }
 
 LayerTestResult<uint8_t, 4> ConstantUint8Test(armnn::IWorkloadFactory& workloadFactory)
diff --git a/src/backends/test/LayerTests.hpp b/src/backends/test/LayerTests.hpp
index d9d4fb909e..b6651ce070 100644
--- a/src/backends/test/LayerTests.hpp
+++ b/src/backends/test/LayerTests.hpp
@@ -217,6 +217,7 @@ LayerTestResult<float, 4> CompareMultiplicationTest(armnn::IWorkloadFactory& wor
                                              armnn::IWorkloadFactory& refWorkloadFactory);
 
 LayerTestResult<float, 4> BatchNormTest(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 4> BatchNormNhwcTest(armnn::IWorkloadFactory& workloadFactory);
 
 LayerTestResult<float, 4> CompareBatchNormTest(armnn::IWorkloadFactory& workloadFactory,
                                         armnn::IWorkloadFactory& refWorkloadFactory);
@@ -329,6 +330,7 @@ LayerTestResult<uint8_t, 4> ResizeBilinearMinUint8Test(armnn::IWorkloadFactory&
 LayerTestResult<uint8_t, 4> ResizeBilinearMagUint8Test(armnn::IWorkloadFactory& workloadFactory);
 
 LayerTestResult<uint8_t, 4> BatchNormUint8Test(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<uint8_t, 4> BatchNormUint8NhwcTest(armnn::IWorkloadFactory& workloadFactory);
 
 LayerTestResult<uint8_t, 4> ConstantUint8Test(armnn::IWorkloadFactory& workloadFactory);
 
@@ -381,4 +383,4 @@ LayerTestResult<float, 4> MeanFloatKeepDimsTest(armnn::IWorkloadFactory& workloa
 LayerTestResult<float, 4> MeanFloatMultipleDimsTest(armnn::IWorkloadFactory& workloadFactory);
 LayerTestResult<float, 1> MeanVtsFloat1Test(armnn::IWorkloadFactory& workloadFactory);
 LayerTestResult<float, 3> MeanVtsFloat2Test(armnn::IWorkloadFactory& workloadFactory);
-LayerTestResult<float, 4> AdditionAfterMaxPoolTest(armnn::IWorkloadFactory& workloadFactory);
\ No newline at end of file
+LayerTestResult<float, 4> AdditionAfterMaxPoolTest(armnn::IWorkloadFactory& workloadFactory);
-- 
cgit v1.2.1