From 7e9391bb14d219cda310bff355669b5964b1f576 Mon Sep 17 00:00:00 2001
From: Michalis Spyrou <michalis.spyrou@arm.com>
Date: Fri, 5 Oct 2018 14:49:28 +0100
Subject: COMPMID-1574 Implement ReduceMean in OpenCL

Change-Id: Id331199f569f52a37280a9ada5bf84694580b93c
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/152843
Tested-by: bsgcomp <bsgcomp@arm.com>
Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com>
---
 tests/validation/reference/ReductionOperation.cpp | 184 ++++++++++++++++++++--
 1 file changed, 173 insertions(+), 11 deletions(-)

(limited to 'tests/validation/reference/ReductionOperation.cpp')
diff --git a/tests/validation/reference/ReductionOperation.cpp b/tests/validation/reference/ReductionOperation.cpp
index 871a761b1a..11947bd293 100644
--- a/tests/validation/reference/ReductionOperation.cpp
+++ b/tests/validation/reference/ReductionOperation.cpp
@@ -47,6 +47,15 @@ struct square
     }
 };
 
+template <typename T>
+struct sum
+{
+    T operator()(const T &lhs, const T &rhs) const
+    {
+        return (lhs + rhs);
+    }
+};
+
 template <typename T>
 T reduce_operation(T *ptr, int reduce_elements, ReductionOperation op)
 {
@@ -54,6 +63,9 @@ T reduce_operation(T *ptr, int reduce_elements, ReductionOperation op)
     {
         case ReductionOperation::SUM_SQUARE:
             return std::accumulate(ptr, ptr + reduce_elements, static_cast<T>(0), square<T>());
+        case ReductionOperation::SUM:
+        case ReductionOperation::MEAN_SUM:
+            return std::accumulate(ptr, ptr + reduce_elements, static_cast<T>(0), sum<T>());
         default:
             ARM_COMPUTE_ERROR("Unsupported reduction operation");
     }
@@ -64,23 +76,172 @@ template <typename T>
 SimpleTensor<T> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op)
 {
     // Create reference
-    SimpleTensor<T> dst{ dst_shape, src.data_type() };
+    SimpleTensor<T>    dst{ dst_shape, src.data_type() };
+    const unsigned int src_width  = src.shape().x();
+    const unsigned int src_height = src.shape().y();
+    const unsigned int src_depth  = src.shape().z();
+    const unsigned int src_batch  = src.shape()[3];
+    const bool         mean       = op == ReductionOperation::MEAN_SUM;
 
-    // Compute reference
-    const int reduce_elems = src.shape()[axis];
-    const int upper_dims   = src.shape().total_size_upper(axis + 1);
-
-    for(int du = 0; du < upper_dims; ++du)
+    switch(axis)
     {
-        if(axis == 0)
+        case 0:
+        {
+            const int          reduce_elems = src.shape()[axis];
+            const unsigned int upper_dims   = src.shape().total_size_upper(1);
+            for(unsigned int du = 0; du < upper_dims; ++du)
+            {
+                if(std::is_integral<T>::value)
+                {
+                    uint32_t res = 0;
+                    for(unsigned int x = 0; x < src_width; ++x)
+                    {
+                        res += static_cast<uint32_t>(src[du * src_width + x]);
+                    }
+                    if(mean && src_width > 0)
+                    {
+                        res /= src_width;
+                    }
+                    dst[du] = static_cast<uint8_t>(res);
+                }
+                else
+                {
+                    const T *src_row_ptr = src.data() + du * reduce_elems;
+
+                    auto res = reduce_operation(src_row_ptr, reduce_elems, op);
+                    if(mean && src_width > 0)
+                    {
+                        res /= src_width;
+                    }
+                    dst[du] = res;
+                }
+            }
+        }
+        break;
+        case 1:
         {
-            const T *src_row_ptr = src.data() + du * reduce_elems;
-            dst[du]              = reduce_operation(src_row_ptr, reduce_elems, op);
+            const unsigned int upper_dims = src.shape().total_size_upper(2);
+            for(unsigned int du = 0; du < upper_dims; ++du)
+            {
+                for(unsigned int x = 0; x < src_width; ++x)
+                {
+                    if(std::is_integral<T>::value)
+                    {
+                        uint32_t res = 0;
+                        for(unsigned int y = 0; y < src_height; ++y)
+                        {
+                            res += static_cast<uint32_t>(src[du * src_height * src_width + y * src_width + x]);
+                        }
+                        if(mean && src_height > 0)
+                        {
+                            res /= src_height;
+                        }
+                        dst[du * src_width + x] = static_cast<uint8_t>(res);
+                    }
+                    else
+                    {
+                        auto res = T(0);
+                        for(unsigned int y = 0; y < src_height; ++y)
+                        {
+                            res += src[du * src_height * src_width + y * src_width + x];
+                        }
+                        if(mean && src_height > 0)
+                        {
+                            res /= src_height;
+                        }
+                        dst[du * src_width + x] = res;
+                    }
+                }
+            }
         }
-        else
+        break;
+        case 2:
         {
-            ARM_COMPUTE_ERROR("Unsupported reduction axis");
+            const unsigned int upper_dims = src.shape().total_size_upper(3);
+            for(unsigned int du = 0; du < upper_dims; ++du)
+            {
+                for(unsigned int x = 0; x < src_width; ++x)
+                {
+                    for(unsigned int y = 0; y < src_height; ++y)
+                    {
+                        if(std::is_integral<T>::value)
+                        {
+                            uint32_t res = T(0);
+                            for(unsigned int z = 0; z < src_depth; ++z)
+                            {
+                                res += static_cast<uint32_t>(src[du * src_depth * src_height * src_width + z * src_height * src_width + y * src_width + x]);
+                            }
+                            if(mean && src_depth > 0)
+                            {
+                                res /= src_depth;
+                            }
+                            dst[du * src_width * src_height + y * src_width + x] = static_cast<uint8_t>(res);
+                        }
+                        else
+                        {
+                            auto res = T(0);
+                            for(unsigned int z = 0; z < src_depth; ++z)
+                            {
+                                res += src[du * src_depth * src_height * src_width + z * src_height * src_width + y * src_width + x];
+                            }
+                            if(mean && src_depth > 0)
+                            {
+                                res /= src_depth;
+                            }
+                            dst[du * src_width * src_height + y * src_width + x] = res;
+                        }
+                    }
+                }
+            }
         }
+        break;
+        case 3:
+        {
+            const unsigned int upper_dims = src.shape().total_size_upper(4);
+            for(unsigned int du = 0; du < upper_dims; ++du)
+            {
+                for(unsigned int z = 0; z < src_depth; ++z)
+                {
+                    for(unsigned int y = 0; y < src_height; ++y)
+                    {
+                        for(unsigned int x = 0; x < src_width; ++x)
+                        {
+                            if(std::is_integral<T>::value)
+                            {
+                                uint32_t res = 0;
+                                for(unsigned int w = 0; w < src_batch; ++w)
+                                {
+                                    res += static_cast<uint32_t>(src[du * src_batch * src_depth * src_height * src_width + w * src_width * src_height * src_depth + z * src_width * src_height + y * src_width + x]);
+                                }
+                                if(mean && src_batch > 0)
+                                {
+                                    res /= src_batch;
+                                }
+
+                                dst[du * src_depth * src_height * src_width + z * src_width * src_height + y * src_width + x] = static_cast<uint8_t>(res);
+                            }
+                            else
+                            {
+                                auto res = T(0);
+                                for(unsigned int w = 0; w < src_batch; ++w)
+                                {
+                                    res += src[du * src_batch * src_depth * src_height * src_width + w * src_width * src_height * src_depth + z * src_width * src_height + y * src_width + x];
+                                }
+                                if(mean && src_batch > 0)
+                                {
+                                    res /= src_batch;
+                                }
+
+                                dst[du * src_depth * src_height * src_width + z * src_width * src_height + y * src_width + x] = res;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        break;
+        default:
+            ARM_COMPUTE_ERROR("Unsupported reduction axis");
     }
 
     return dst;
@@ -88,6 +249,7 @@ SimpleTensor<T> reduction_operation(const SimpleTensor<T> &src, const TensorShap
 
 template SimpleTensor<float> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
 template SimpleTensor<half> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
+template SimpleTensor<uint8_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
 } // namespace reference
 } // namespace validation
 } // namespace test
-- 
cgit v1.2.1