COMPMID-1226 Extend CLMeanStdDev to support FP32 / FP16

- Extend support for FP16 in CLReduction. - For F16/F32 MeanStdDev we perform one reduction operation for mean and one for stddev and we calculate the final result in the host CPU. Change-Id: Iad2099f26c0ba7969737d22f00c6c275634d875c Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/135870 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
author: Michalis Spyrou <michalis.spyrou@arm.com> 2018-06-15 16:15:26 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:54:54 +0000
commit: d1794ebfa10d05af7d2458c5d506152fd38068d3 (patch)
tree: e3f286aaba86b1f0bcda3390ad4d8af96b965fc7
parent: 7777b1aa865d3c17dcef31573d44fae421176109 (diff)
download: ComputeLibrary-d1794ebfa10d05af7d2458c5d506152fd38068d3.tar.gz
15 files changed, 310 insertions, 48 deletions
diff --git a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h
index a6898fde3e..46e266e07d 100644
--- a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h
+++ b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -59,6 +59,17 @@ public:
      * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong).
      */
     void configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
+    /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevKernel.
+     *
+     * @param[in] input              Input image info. Data types supported: U8.
+     * @param[in] mean               Input average pixel value.
+     * @param[in] global_sum         Keeps global sum of pixel values.
+     * @param[in] stddev             (Optional) Output standard deviation of pixel values.
+     * @param[in] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
index 56f75e5fb7..60e2f08005 100644
--- a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
+++ b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
@@ -50,7 +50,7 @@ public:
 
     /** Set the input and output tensors.
      *
-     * @param[in]  input  Source tensor. Data types supported: F32. Data layouts supported: NCHW.
+     * @param[in]  input  Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW.
      * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
      *                    Output will have the same number of dimensions as input.
      * @param[in]  axis   Axis along which to reduce. Supported reduction axis : 0
@@ -60,7 +60,7 @@ public:
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperationKernel.
      *
-     * @param[in] input  Source tensor info. Data types supported: F32. Data layouts supported: NCHW.
+     * @param[in] input  Source tensor info. Data types supported: F16/F32. Data layouts supported: NCHW.
      * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input.
      *                   Output will have the same number of dimensions as input.
      * @param[in] axis   Axis along which to reduce. Supported reduction axis : 0
diff --git a/arm_compute/core/Validate.h b/arm_compute/core/Validate.h
index 1646ebe719..918c8e5fc3 100644
--- a/arm_compute/core/Validate.h
+++ b/arm_compute/core/Validate.h
@@ -787,6 +787,19 @@ inline arm_compute::Status error_on_unsupported_fp16(const char *function, const
  */
 arm_compute::Status error_on_tensor_not_2d(const char *function, const char *file, const int line,
                                            const ITensor *tensor);
+
+/** Return an error if the tensor info is not 2D.
+ *
+ * @param[in] function Function in which the error occurred.
+ * @param[in] file     Name of the file where the error occurred.
+ * @param[in] line     Line on which the error occurred.
+ * @param[in] tensor   Tensor info to validate.
+ *
+ * @return Status
+ */
+arm_compute::Status error_on_tensor_not_2d(const char *function, const char *file, const int line,
+                                           const ITensorInfo *tensor);
+
 #define ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(t) \
     ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_tensor_not_2d(__func__, __FILE__, __LINE__, t))
 #define ARM_COMPUTE_RETURN_ERROR_ON_TENSOR_NOT_2D(t) \
diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDev.h b/arm_compute/runtime/CL/functions/CLMeanStdDev.h
index 7622138236..2e46563423 100644
--- a/arm_compute/runtime/CL/functions/CLMeanStdDev.h
+++ b/arm_compute/runtime/CL/functions/CLMeanStdDev.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,7 +27,10 @@
 #include "arm_compute/core/CL/OpenCL.h"
 #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
 #include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+#include "arm_compute/runtime/CL/functions/CLReductionOperation.h"
 #include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
 
 namespace arm_compute
 {
@@ -36,23 +39,56 @@ class CLMeanStdDev : public IFunction
 {
 public:
     /** Default Constructor. */
-    CLMeanStdDev();
+    CLMeanStdDev(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLMeanStdDev(const CLMeanStdDev &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLMeanStdDev &operator=(const CLMeanStdDev &) = delete;
+    /** Allow instances of this class to be moved */
+    CLMeanStdDev(CLMeanStdDev &&) = default;
+    /** Allow instances of this class to be moved */
+    CLMeanStdDev &operator=(CLMeanStdDev &&) = default;
+    /** Default destructor */
+    ~CLMeanStdDev() = default;
     /** Initialise the kernel's inputs and outputs.
      *
-     * @param[in, out] input  Input image. Data types supported: U8. (Written to only for border filling)
+     * @param[in, out] input  Input image. Data types supported: U8/F16/F32. (Written to only for border filling)
      * @param[out]     mean   Output average pixel value.
-     * @param[out]     stddev (Optional)Output standard deviation of pixel values.
+     * @param[out]     stddev (Optional) Output standard deviation of pixel values.
      */
     void configure(ICLImage *input, float *mean, float *stddev = nullptr);
+    /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDev
+     *
+     * @param[in] input  Input image. Data types supported: U8/F16/F32.
+     * @param[in] mean   Output average pixel value.
+     * @param[in] stddev (Optional) Output standard deviation of pixel values.
+     *
+     * @return a status
+     */
+    static Status validate(ITensorInfo *input, float *mean, float *stddev = nullptr);
 
     // Inherited methods overridden:
     void run() override;
 
 private:
-    CLMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */
-    CLFillBorderKernel _fill_border_kernel; /**< Kernel that fills the border with zeroes. */
-    cl::Buffer         _global_sum;         /**< Variable that holds the global sum among calls in order to ease reduction */
-    cl::Buffer         _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */
+    template <typename T>
+    void run_float();
+    void run_int();
+
+    CLMemoryGroup        _memory_group;               /**< Function's memory group */
+    DataType             _data_type;                  /**< Input data type. */
+    unsigned int         _num_pixels;                 /**< Number of image's pixels. */
+    bool                 _run_stddev;                 /**< Flag for knowing if we should run stddev reduction function. */
+    CLReductionOperation _reduction_operation_mean;   /**< Reduction operation function for computing mean value. */
+    CLReductionOperation _reduction_operation_stddev; /**< Reduction operation function for computing standard deviation. */
+    CLTensor             _reduction_output_mean;      /**< Reduction operation output tensor for mean value. */
+    CLTensor             _reduction_output_stddev;    /**< Reduction operation output tensor for standard deviation value. */
+    float               *_mean;                       /**< Pointer that holds the mean value. */
+    float               *_stddev;                     /**< Pointer that holds the standard deviation value. */
+    CLMeanStdDevKernel   _mean_stddev_kernel;         /**< Kernel that standard deviation calculation. */
+    CLFillBorderKernel   _fill_border_kernel;         /**< Kernel that fills the border with zeroes. */
+    cl::Buffer           _global_sum;                 /**< Variable that holds the global sum among calls in order to ease reduction */
+    cl::Buffer           _global_sum_squared;         /**< Variable that holds the global sum of squared values among calls in order to ease reduction */
 };
 }
 #endif /*__ARM_COMPUTE_CLMEANSTDDEV_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h
index b8108b507b..d862aff7b7 100644
--- a/arm_compute/runtime/CL/functions/CLReductionOperation.h
+++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h
@@ -53,7 +53,7 @@ public:
 
     /** Set the input and output tensors.
      *
-     * @param[in]  input  Source tensor. Data types supported: F32. Data layouts supported: NCHW.
+     * @param[in]  input  Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW.
      * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
      * @param[in]  axis   Axis along which to reduce. Supported reduction axis : 0
      * @param[in]  op     Reduction operation to perform.
@@ -62,7 +62,7 @@ public:
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperation.
      *
-     * @param[in] input  Source tensor info. Data types supported: F32. Data layouts supported: NCHW.
+     * @param[in] input  Source tensor info. Data types supported: F16/F32. Data layouts supported: NCHW.
      * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input.
      * @param[in] axis   Axis along which to reduce. Supported reduction axis : 0
      * @param[in] op     Reduction operation to perform.
diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp
index fc8764dbfe..bd31131fe5 100644
--- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp
+++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,6 +23,7 @@
  */
 #include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h"
 
+#include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibrary.h"
 #include "arm_compute/core/CL/CLValidate.h"
 #include "arm_compute/core/CL/ICLTensor.h"
@@ -49,14 +50,24 @@ BorderSize CLMeanStdDevKernel::border_size() const
     return _border_size;
 }
 
+Status CLMeanStdDevKernel::validate(const ITensorInfo *input, float *mean, cl::Buffer *global_sum, float *stddev, cl::Buffer *global_sum_squared)
+{
+    ARM_COMPUTE_UNUSED(mean);
+    ARM_COMPUTE_UNUSED(stddev);
+    ARM_COMPUTE_UNUSED(global_sum);
+    ARM_COMPUTE_UNUSED(global_sum_squared);
+    ARM_COMPUTE_RETURN_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED();
+    ARM_COMPUTE_RETURN_ERROR_ON_TENSOR_NOT_2D(input);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+
+    return Status{};
+}
+
 void CLMeanStdDevKernel::configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev, cl::Buffer *global_sum_squared)
 {
-    ARM_COMPUTE_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED();
-    ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
-    ARM_COMPUTE_ERROR_ON(nullptr == mean);
-    ARM_COMPUTE_ERROR_ON(nullptr == global_sum);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, mean, global_sum);
     ARM_COMPUTE_ERROR_ON(stddev && nullptr == global_sum_squared);
+    ARM_COMPUTE_ERROR_THROW_ON(CLMeanStdDevKernel::validate(input->info(), mean, global_sum, stddev, global_sum_squared));
 
     _input              = input;
     _mean               = mean;
diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp
index d64f0d89c5..95967fa974 100644
--- a/src/core/CL/kernels/CLReductionOperationKernel.cpp
+++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp
@@ -44,7 +44,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u
     ARM_COMPUTE_UNUSED(op);
 
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW);
 
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions");
@@ -69,7 +69,7 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITe
     const unsigned int num_elems_processed_per_iteration = 16;
 
     Window             win          = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
-    const unsigned int border_width = ((input->dimension(0) % 128) != 0) ? 128 - input->dimension(0) % 128 : 0; // TODO (COMPMID-1143): Fix padding (possible value 127!)
+    const unsigned int border_width = ((input->dimension(0) % num_elems_processed_per_iteration) != 0) ? num_elems_processed_per_iteration - input->dimension(0) % num_elems_processed_per_iteration : 0;
 
     AccessWindowStatic     input_access(input, 0, 0, input->dimension(0) + border_width, 1);
     AccessWindowHorizontal output_access(output, 0, 1);
diff --git a/src/core/Validate.cpp b/src/core/Validate.cpp
index d4fabd4a0f..60a97dfcc2 100644
--- a/src/core/Validate.cpp
+++ b/src/core/Validate.cpp
@@ -100,6 +100,16 @@ arm_compute::Status arm_compute::error_on_tensor_not_2d(const char *function, co
     return arm_compute::Status{};
 }
 
+arm_compute::Status arm_compute::error_on_tensor_not_2d(const char *function, const char *file, const int line,
+                                                        const arm_compute::ITensorInfo *tensor)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
+    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(tensor->num_dimensions() != 2,
+                                        function, file, line,
+                                        "Only 2D Tensors are supported by this kernel (%d passed)", tensor->num_dimensions());
+    return arm_compute::Status{};
+}
+
 arm_compute::Status arm_compute::error_on_channel_not_in_known_format(const char *function, const char *file, const int line,
                                                                       arm_compute::Format fmt, arm_compute::Channel cn)
 {
diff --git a/src/runtime/CL/functions/CLMeanStdDev.cpp b/src/runtime/CL/functions/CLMeanStdDev.cpp
index 838f7e73d2..157f306d0c 100644
--- a/src/runtime/CL/functions/CLMeanStdDev.cpp
+++ b/src/runtime/CL/functions/CLMeanStdDev.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,35 +21,149 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "arm_compute/runtime/CL/functions/CLMeanStdDev.h"
+#include "arm_compute/core/TensorInfo.h"
 
 #include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/functions/CLMeanStdDev.h"
 
 using namespace arm_compute;
 
-CLMeanStdDev::CLMeanStdDev()
-    : _mean_stddev_kernel(),
+CLMeanStdDev::CLMeanStdDev(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
+    : _memory_group(std::move(memory_manager)),
+      _data_type(),
+      _num_pixels(),
+      _run_stddev(),
+      _reduction_operation_mean(),
+      _reduction_operation_stddev(),
+      _reduction_output_mean(),
+      _reduction_output_stddev(),
+      _mean(nullptr),
+      _stddev(nullptr),
+      _mean_stddev_kernel(),
       _fill_border_kernel(),
       _global_sum(),
       _global_sum_squared()
 {
 }
 
+Status CLMeanStdDev::validate(ITensorInfo *input, float *mean, float *stddev)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_TENSOR_NOT_2D(input);
+    if(is_data_type_float(input->data_type()))
+    {
+        ARM_COMPUTE_UNUSED(mean);
+        ARM_COMPUTE_UNUSED(stddev);
+
+        TensorShape output_shape      = TensorShape{ 1, input->dimension(1) };
+        TensorInfo  output_shape_info = TensorInfo(output_shape, 1, DataType::U8);
+        return CLReductionOperation::validate(input, &output_shape_info, 0, ReductionOperation::SUM);
+    }
+    else
+    {
+        return CLMeanStdDevKernel::validate(input, mean, nullptr, stddev, nullptr);
+    }
+}
+
 void CLMeanStdDev::configure(ICLImage *input, float *mean, float *stddev)
 {
-    _global_sum = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong));
+    // In the case of F16/F32 we call reduction operation for calculating CLMeanStdDev
+    _data_type = input->info()->data_type();
 
-    if(stddev != nullptr)
+    if(is_data_type_float(_data_type))
     {
-        _global_sum_squared = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong));
+        _num_pixels = input->info()->dimension(0) * input->info()->dimension(1);
+
+        _memory_group.manage(&_reduction_output_mean);
+        _reduction_operation_mean.configure(input, &_reduction_output_mean, 0, ReductionOperation::SUM);
+        _reduction_output_mean.allocator()->allocate();
+        _mean = mean;
+
+        if(stddev != nullptr)
+        {
+            _memory_group.manage(&_reduction_output_stddev);
+            _reduction_operation_stddev.configure(input, &_reduction_output_stddev, 0, ReductionOperation::SUM_SQUARE);
+            _reduction_output_stddev.allocator()->allocate();
+            _stddev     = stddev;
+            _run_stddev = true;
+        }
     }
+    else
+    {
+        _global_sum = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong));
 
-    _mean_stddev_kernel.configure(input, mean, &_global_sum, stddev, &_global_sum_squared);
-    _fill_border_kernel.configure(input, _mean_stddev_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint8_t>(0)));
+        if(stddev != nullptr)
+        {
+            _global_sum_squared = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong));
+        }
+
+        _mean_stddev_kernel.configure(input, mean, &_global_sum, stddev, &_global_sum_squared);
+        _fill_border_kernel.configure(input, _mean_stddev_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint8_t>(0)));
+    }
 }
 
-void CLMeanStdDev::run()
+template <typename T>
+void CLMeanStdDev::run_float()
+{
+    _memory_group.acquire();
+
+    // Perform reduction on x-axis
+    _reduction_operation_mean.run();
+    if(_run_stddev)
+    {
+        _reduction_operation_stddev.run();
+        _reduction_output_stddev.map(true);
+    }
+
+    _reduction_output_mean.map(true);
+
+    auto mean = static_cast<T>(0);
+
+    // Calculate final result for mean
+    for(unsigned int i = 0; i < _reduction_output_mean.info()->dimension(1); ++i)
+    {
+        mean += *reinterpret_cast<T *>(_reduction_output_mean.buffer() + _reduction_output_mean.info()->offset_element_in_bytes(Coordinates(0, i)));
+    }
+
+    mean /= _num_pixels;
+    *_mean = mean;
+
+    if(_run_stddev)
+    {
+        auto stddev = static_cast<T>(0);
+        // Calculate final result for stddev
+        for(unsigned int i = 0; i < _reduction_output_stddev.info()->dimension(1); ++i)
+        {
+            stddev += *reinterpret_cast<T *>(_reduction_output_stddev.buffer() + _reduction_output_stddev.info()->offset_element_in_bytes(Coordinates(0, i)));
+        }
+        *_stddev = std::sqrt((stddev / _num_pixels) - (mean * mean));
+
+        _reduction_output_stddev.unmap();
+    }
+    _reduction_output_mean.unmap();
+
+    _memory_group.release();
+}
+
+void CLMeanStdDev::run_int()
 {
     CLScheduler::get().enqueue(_fill_border_kernel);
     CLScheduler::get().enqueue(_mean_stddev_kernel);
 }
+
+void CLMeanStdDev::run()
+{
+    switch(_data_type)
+    {
+        case DataType::F16:
+            run_float<half>();
+            break;
+        case DataType::F32:
+            run_float<float>();
+            break;
+        case DataType::U8:
+            run_int();
+            break;
+        default:
+            ARM_COMPUTE_ERROR_ON("Not supported");
+    }
+}
diff --git a/tests/validation/CL/MeanStdDev.cpp b/tests/validation/CL/MeanStdDev.cpp
index 92d87e09f2..8ccb757364 100644
--- a/tests/validation/CL/MeanStdDev.cpp
+++ b/tests/validation/CL/MeanStdDev.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,12 +39,17 @@ namespace
 {
 RelativeTolerance<float> tolerance_rel_high_error(0.05f);
 RelativeTolerance<float> tolerance_rel_low_error(0.0005f);
+RelativeTolerance<float> tolerance_rel_high_error_f32(0.001f);
+RelativeTolerance<float> tolerance_rel_low_error_f32(0.00001f);
+RelativeTolerance<float> tolerance_rel_high_error_f16(0.1f);
+RelativeTolerance<float> tolerance_rel_low_error_f16(0.01f);
 } // namespace
 
 TEST_SUITE(CL)
 TEST_SUITE(MeanStdDev)
 
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), framework::dataset::make("DataType", DataType::U8)), shape, data_type)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), framework::dataset::make("DataType", { DataType::U8 })), shape,
+               data_type)
 {
     // Create tensors
     CLTensor src = create_tensor<CLTensor>(shape, data_type);
@@ -71,6 +76,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datase
 template <typename T>
 using CLMeanStdDevFixture = MeanStdDevValidationFixture<CLTensor, CLAccessor, CLMeanStdDev, T>;
 
+TEST_SUITE(U8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLMeanStdDevFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
                                                                                                           DataType::U8)))
 {
@@ -89,9 +95,43 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLMeanStdDevFixture<uint8_t>, framework::Datase
     // Validate std_dev output
     validate(_target.second, _reference.second, tolerance_rel_high_error);
 }
+TEST_SUITE_END() // U8
 
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE(F16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMeanStdDevFixture<half>, framework::DatasetMode::ALL, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
+                                                                                                 DataType::F16)))
+{
+    // Validate mean output
+    validate(_target.first, _reference.first, tolerance_rel_low_error_f16);
+
+    // Validate std_dev output
+    validate(_target.second, _reference.second, tolerance_rel_high_error_f16);
+}
+TEST_SUITE_END() // F16
+
+TEST_SUITE(F32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMeanStdDevFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
+                                                                                                        DataType::F32)))
+{
+    // Validate mean output
+    validate(_target.first, _reference.first, tolerance_rel_low_error_f32);
+
+    // Validate std_dev output
+    validate(_target.second, _reference.second, tolerance_rel_high_error_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLMeanStdDevFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
+                                                                                                      DataType::F32)))
+{
+    // Validate mean output
+    validate(_target.first, _reference.first, tolerance_rel_low_error_f32);
+
+    // Validate std_dev output
+    validate(_target.second, _reference.second, tolerance_rel_high_error_f32);
+}
+TEST_SUITE_END() // F32
+
+TEST_SUITE_END() // MeanStdDev
+TEST_SUITE_END() // CL
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/CL/ReductionOperation.cpp b/tests/validation/CL/ReductionOperation.cpp
index a48e2f9d5f..ca0988f955 100644
--- a/tests/validation/CL/ReductionOperation.cpp
+++ b/tests/validation/CL/ReductionOperation.cpp
@@ -45,6 +45,7 @@ namespace
 {
 /** Tolerance for float operations */
 RelativeTolerance<float> tolerance_f32(0.00001f);
+RelativeTolerance<float> tolerance_f16(0.1f);
 } // namespace
 
 TEST_SUITE(CL)
@@ -55,7 +56,7 @@ TEST_SUITE(ReductionOperation)
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
     framework::dataset::make("InputInfo",          { TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Mismatching data type input/output
                                                      TensorInfo(TensorShape(128U, 64U), 2, DataType::F32), // Number of Input channels != 1
-                                                     TensorInfo(TensorShape(128U, 64U), 1, DataType::S16), // DataType != F32
+                                                     TensorInfo(TensorShape(128U, 64U), 1, DataType::S16), // DataType != F16/F32
                                                      TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis >= num_max_dimensions
                                                      TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis > 0
                                                      TensorInfo(TensorShape(128U, 64U), 1, DataType::F32)
@@ -84,9 +85,23 @@ template <typename T>
 using CLReductionOperationFixture = ReductionOperationValidationFixture<CLTensor, CLAccessor, CLReductionOperation, T>;
 
 TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLReductionOperationFixture<half>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0 })), datasets::ReductionOperations()))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLReductionOperationFixture<half>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0 })), datasets::ReductionOperations()))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+TEST_SUITE_END() // F16
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLReductionOperationFixture<float>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), datasets::ReductionOperations()))
+                       combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), datasets::ReductionOperations()))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
@@ -97,11 +112,11 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLReductionOperationFixture<float>, framework::
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // F32
+TEST_SUITE_END() // Float
 
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // Reduction
+TEST_SUITE_END() // CL
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/fixtures/MeanStdDevFixture.h b/tests/validation/fixtures/MeanStdDevFixture.h
index 17dfe78dbd..58d4644069 100644
--- a/tests/validation/fixtures/MeanStdDevFixture.h
+++ b/tests/validation/fixtures/MeanStdDevFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,7 +50,15 @@ protected:
     template <typename U>
     void fill(U &&tensor)
     {
-        library->fill_tensor_uniform(tensor, 0);
+        if(is_data_type_float(tensor.data_type()))
+        {
+            std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+            library->fill(tensor, distribution, 0);
+        }
+        else
+        {
+            library->fill_tensor_uniform(tensor, 0);
+        }
     }
 
     std::pair<float, float> compute_target(const TensorShape &shape, DataType data_type)
diff --git a/tests/validation/fixtures/ReductionOperationFixture.h b/tests/validation/fixtures/ReductionOperationFixture.h
index 6fa5f0c44f..0dee7eb707 100644
--- a/tests/validation/fixtures/ReductionOperationFixture.h
+++ b/tests/validation/fixtures/ReductionOperationFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,7 +56,8 @@ protected:
     template <typename U>
     void fill(U &&tensor)
     {
-        library->fill_tensor_uniform(tensor, 0);
+        std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+        library->fill(tensor, distribution, 0);
     }
 
     TensorType compute_target(const TensorShape &src_shape, const TensorShape &dst_shape, DataType data_type, unsigned int axis, ReductionOperation op)
diff --git a/tests/validation/reference/MeanStdDev.cpp b/tests/validation/reference/MeanStdDev.cpp
index 4a39b13d56..f48fcb11d2 100644
--- a/tests/validation/reference/MeanStdDev.cpp
+++ b/tests/validation/reference/MeanStdDev.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -51,6 +51,8 @@ std::pair<float, float> mean_and_standard_deviation(const SimpleTensor<T> &in)
 }
 
 template std::pair<float, float> mean_and_standard_deviation(const SimpleTensor<uint8_t> &in);
+template std::pair<float, float> mean_and_standard_deviation(const SimpleTensor<half> &in);
+template std::pair<float, float> mean_and_standard_deviation(const SimpleTensor<float> &in);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/ReductionOperation.cpp b/tests/validation/reference/ReductionOperation.cpp
index acfcc09cea..871a761b1a 100644
--- a/tests/validation/reference/ReductionOperation.cpp
+++ b/tests/validation/reference/ReductionOperation.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -53,7 +53,7 @@ T reduce_operation(T *ptr, int reduce_elements, ReductionOperation op)
     switch(op)
     {
         case ReductionOperation::SUM_SQUARE:
-            return std::accumulate(ptr, ptr + reduce_elements, 0.f, square<T>());
+            return std::accumulate(ptr, ptr + reduce_elements, static_cast<T>(0), square<T>());
         default:
             ARM_COMPUTE_ERROR("Unsupported reduction operation");
     }
@@ -87,6 +87,7 @@ SimpleTensor<T> reduction_operation(const SimpleTensor<T> &src, const TensorShap
 }
 
 template SimpleTensor<float> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
+template SimpleTensor<half> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
 } // namespace reference
 } // namespace validation
 } // namespace test
author	Michalis Spyrou <michalis.spyrou@arm.com>	2018-06-15 16:15:26 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:54:54 +0000
commit	d1794ebfa10d05af7d2458c5d506152fd38068d3 (patch)
tree	e3f286aaba86b1f0bcda3390ad4d8af96b965fc7
parent	7777b1aa865d3c17dcef31573d44fae421176109 (diff)
download	ComputeLibrary-d1794ebfa10d05af7d2458c5d506152fd38068d3.tar.gz