aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2018-06-15 16:15:26 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commitd1794ebfa10d05af7d2458c5d506152fd38068d3 (patch)
treee3f286aaba86b1f0bcda3390ad4d8af96b965fc7
parent7777b1aa865d3c17dcef31573d44fae421176109 (diff)
downloadComputeLibrary-d1794ebfa10d05af7d2458c5d506152fd38068d3.tar.gz
COMPMID-1226 Extend CLMeanStdDev to support FP32 / FP16
- Extend support for FP16 in CLReduction. - For F16/F32 MeanStdDev we perform one reduction operation for mean and one for stddev and we calculate the final result in the host CPU. Change-Id: Iad2099f26c0ba7969737d22f00c6c275634d875c Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/135870 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
-rw-r--r--arm_compute/core/CL/kernels/CLMeanStdDevKernel.h13
-rw-r--r--arm_compute/core/CL/kernels/CLReductionOperationKernel.h4
-rw-r--r--arm_compute/core/Validate.h13
-rw-r--r--arm_compute/runtime/CL/functions/CLMeanStdDev.h52
-rw-r--r--arm_compute/runtime/CL/functions/CLReductionOperation.h4
-rw-r--r--src/core/CL/kernels/CLMeanStdDevKernel.cpp23
-rw-r--r--src/core/CL/kernels/CLReductionOperationKernel.cpp4
-rw-r--r--src/core/Validate.cpp10
-rw-r--r--src/runtime/CL/functions/CLMeanStdDev.cpp134
-rw-r--r--tests/validation/CL/MeanStdDev.cpp48
-rw-r--r--tests/validation/CL/ReductionOperation.cpp27
-rw-r--r--tests/validation/fixtures/MeanStdDevFixture.h12
-rw-r--r--tests/validation/fixtures/ReductionOperationFixture.h5
-rw-r--r--tests/validation/reference/MeanStdDev.cpp4
-rw-r--r--tests/validation/reference/ReductionOperation.cpp5
15 files changed, 310 insertions, 48 deletions
diff --git a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h
index a6898fde3e..46e266e07d 100644
--- a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h
+++ b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -59,6 +59,17 @@ public:
* @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong).
*/
void configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevKernel.
+ *
+ * @param[in] input Input image info. Data types supported: U8.
+ * @param[in] mean Input average pixel value.
+ * @param[in] global_sum Keeps global sum of pixel values.
+ * @param[in] stddev (Optional) Output standard deviation of pixel values.
+ * @param[in] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
index 56f75e5fb7..60e2f08005 100644
--- a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
+++ b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
@@ -50,7 +50,7 @@ public:
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: F32. Data layouts supported: NCHW.
+ * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW.
* @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
* Output will have the same number of dimensions as input.
* @param[in] axis Axis along which to reduce. Supported reduction axis : 0
@@ -60,7 +60,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperationKernel.
*
- * @param[in] input Source tensor info. Data types supported: F32. Data layouts supported: NCHW.
+ * @param[in] input Source tensor info. Data types supported: F16/F32. Data layouts supported: NCHW.
* @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input.
* Output will have the same number of dimensions as input.
* @param[in] axis Axis along which to reduce. Supported reduction axis : 0
diff --git a/arm_compute/core/Validate.h b/arm_compute/core/Validate.h
index 1646ebe719..918c8e5fc3 100644
--- a/arm_compute/core/Validate.h
+++ b/arm_compute/core/Validate.h
@@ -787,6 +787,19 @@ inline arm_compute::Status error_on_unsupported_fp16(const char *function, const
*/
arm_compute::Status error_on_tensor_not_2d(const char *function, const char *file, const int line,
const ITensor *tensor);
+
+/** Return an error if the tensor info is not 2D.
+ *
+ * @param[in] function Function in which the error occurred.
+ * @param[in] file Name of the file where the error occurred.
+ * @param[in] line Line on which the error occurred.
+ * @param[in] tensor Tensor info to validate.
+ *
+ * @return Status
+ */
+arm_compute::Status error_on_tensor_not_2d(const char *function, const char *file, const int line,
+ const ITensorInfo *tensor);
+
#define ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(t) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_tensor_not_2d(__func__, __FILE__, __LINE__, t))
#define ARM_COMPUTE_RETURN_ERROR_ON_TENSOR_NOT_2D(t) \
diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDev.h b/arm_compute/runtime/CL/functions/CLMeanStdDev.h
index 7622138236..2e46563423 100644
--- a/arm_compute/runtime/CL/functions/CLMeanStdDev.h
+++ b/arm_compute/runtime/CL/functions/CLMeanStdDev.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,7 +27,10 @@
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+#include "arm_compute/runtime/CL/functions/CLReductionOperation.h"
#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
namespace arm_compute
{
@@ -36,23 +39,56 @@ class CLMeanStdDev : public IFunction
{
public:
/** Default Constructor. */
- CLMeanStdDev();
+ CLMeanStdDev(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMeanStdDev(const CLMeanStdDev &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMeanStdDev &operator=(const CLMeanStdDev &) = delete;
+ /** Allow instances of this class to be moved */
+ CLMeanStdDev(CLMeanStdDev &&) = default;
+ /** Allow instances of this class to be moved */
+ CLMeanStdDev &operator=(CLMeanStdDev &&) = default;
+ /** Default destructor */
+ ~CLMeanStdDev() = default;
/** Initialise the kernel's inputs and outputs.
*
- * @param[in, out] input Input image. Data types supported: U8. (Written to only for border filling)
+ * @param[in, out] input Input image. Data types supported: U8/F16/F32. (Written to only for border filling)
* @param[out] mean Output average pixel value.
- * @param[out] stddev (Optional)Output standard deviation of pixel values.
+ * @param[out] stddev (Optional) Output standard deviation of pixel values.
*/
void configure(ICLImage *input, float *mean, float *stddev = nullptr);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDev
+ *
+ * @param[in] input Input image. Data types supported: U8/F16/F32.
+ * @param[in] mean Output average pixel value.
+ * @param[in] stddev (Optional) Output standard deviation of pixel values.
+ *
+ * @return a status
+ */
+ static Status validate(ITensorInfo *input, float *mean, float *stddev = nullptr);
// Inherited methods overridden:
void run() override;
private:
- CLMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */
- CLFillBorderKernel _fill_border_kernel; /**< Kernel that fills the border with zeroes. */
- cl::Buffer _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */
- cl::Buffer _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */
+ template <typename T>
+ void run_float();
+ void run_int();
+
+ CLMemoryGroup _memory_group; /**< Function's memory group */
+ DataType _data_type; /**< Input data type. */
+ unsigned int _num_pixels; /**< Number of image's pixels. */
+ bool _run_stddev; /**< Flag for knowing if we should run stddev reduction function. */
+ CLReductionOperation _reduction_operation_mean; /**< Reduction operation function for computing mean value. */
+ CLReductionOperation _reduction_operation_stddev; /**< Reduction operation function for computing standard deviation. */
+ CLTensor _reduction_output_mean; /**< Reduction operation output tensor for mean value. */
+ CLTensor _reduction_output_stddev; /**< Reduction operation output tensor for standard deviation value. */
+ float *_mean; /**< Pointer that holds the mean value. */
+ float *_stddev; /**< Pointer that holds the standard deviation value. */
+ CLMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */
+ CLFillBorderKernel _fill_border_kernel; /**< Kernel that fills the border with zeroes. */
+ cl::Buffer _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */
+ cl::Buffer _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */
};
}
#endif /*__ARM_COMPUTE_CLMEANSTDDEV_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h
index b8108b507b..d862aff7b7 100644
--- a/arm_compute/runtime/CL/functions/CLReductionOperation.h
+++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h
@@ -53,7 +53,7 @@ public:
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: F32. Data layouts supported: NCHW.
+ * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW.
* @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
* @param[in] axis Axis along which to reduce. Supported reduction axis : 0
* @param[in] op Reduction operation to perform.
@@ -62,7 +62,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperation.
*
- * @param[in] input Source tensor info. Data types supported: F32. Data layouts supported: NCHW.
+ * @param[in] input Source tensor info. Data types supported: F16/F32. Data layouts supported: NCHW.
* @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input.
* @param[in] axis Axis along which to reduce. Supported reduction axis : 0
* @param[in] op Reduction operation to perform.
diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp
index fc8764dbfe..bd31131fe5 100644
--- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp
+++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,6 +23,7 @@
*/
#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h"
+#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
@@ -49,14 +50,24 @@ BorderSize CLMeanStdDevKernel::border_size() const
return _border_size;
}
+Status CLMeanStdDevKernel::validate(const ITensorInfo *input, float *mean, cl::Buffer *global_sum, float *stddev, cl::Buffer *global_sum_squared)
+{
+ ARM_COMPUTE_UNUSED(mean);
+ ARM_COMPUTE_UNUSED(stddev);
+ ARM_COMPUTE_UNUSED(global_sum);
+ ARM_COMPUTE_UNUSED(global_sum_squared);
+ ARM_COMPUTE_RETURN_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED();
+ ARM_COMPUTE_RETURN_ERROR_ON_TENSOR_NOT_2D(input);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+
+ return Status{};
+}
+
void CLMeanStdDevKernel::configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev, cl::Buffer *global_sum_squared)
{
- ARM_COMPUTE_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED();
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON(nullptr == mean);
- ARM_COMPUTE_ERROR_ON(nullptr == global_sum);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, mean, global_sum);
ARM_COMPUTE_ERROR_ON(stddev && nullptr == global_sum_squared);
+ ARM_COMPUTE_ERROR_THROW_ON(CLMeanStdDevKernel::validate(input->info(), mean, global_sum, stddev, global_sum_squared));
_input = input;
_mean = mean;
diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp
index d64f0d89c5..95967fa974 100644
--- a/src/core/CL/kernels/CLReductionOperationKernel.cpp
+++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp
@@ -44,7 +44,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u
ARM_COMPUTE_UNUSED(op);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions");
@@ -69,7 +69,7 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITe
const unsigned int num_elems_processed_per_iteration = 16;
Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
- const unsigned int border_width = ((input->dimension(0) % 128) != 0) ? 128 - input->dimension(0) % 128 : 0; // TODO (COMPMID-1143): Fix padding (possible value 127!)
+ const unsigned int border_width = ((input->dimension(0) % num_elems_processed_per_iteration) != 0) ? num_elems_processed_per_iteration - input->dimension(0) % num_elems_processed_per_iteration : 0;
AccessWindowStatic input_access(input, 0, 0, input->dimension(0) + border_width, 1);
AccessWindowHorizontal output_access(output, 0, 1);
diff --git a/src/core/Validate.cpp b/src/core/Validate.cpp
index d4fabd4a0f..60a97dfcc2 100644
--- a/src/core/Validate.cpp
+++ b/src/core/Validate.cpp
@@ -100,6 +100,16 @@ arm_compute::Status arm_compute::error_on_tensor_not_2d(const char *function, co
return arm_compute::Status{};
}
+arm_compute::Status arm_compute::error_on_tensor_not_2d(const char *function, const char *file, const int line,
+ const arm_compute::ITensorInfo *tensor)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(tensor->num_dimensions() != 2,
+ function, file, line,
+ "Only 2D Tensors are supported by this kernel (%d passed)", tensor->num_dimensions());
+ return arm_compute::Status{};
+}
+
arm_compute::Status arm_compute::error_on_channel_not_in_known_format(const char *function, const char *file, const int line,
arm_compute::Format fmt, arm_compute::Channel cn)
{
diff --git a/src/runtime/CL/functions/CLMeanStdDev.cpp b/src/runtime/CL/functions/CLMeanStdDev.cpp
index 838f7e73d2..157f306d0c 100644
--- a/src/runtime/CL/functions/CLMeanStdDev.cpp
+++ b/src/runtime/CL/functions/CLMeanStdDev.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,35 +21,149 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/runtime/CL/functions/CLMeanStdDev.h"
+#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/functions/CLMeanStdDev.h"
using namespace arm_compute;
-CLMeanStdDev::CLMeanStdDev()
- : _mean_stddev_kernel(),
+CLMeanStdDev::CLMeanStdDev(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
+ : _memory_group(std::move(memory_manager)),
+ _data_type(),
+ _num_pixels(),
+ _run_stddev(),
+ _reduction_operation_mean(),
+ _reduction_operation_stddev(),
+ _reduction_output_mean(),
+ _reduction_output_stddev(),
+ _mean(nullptr),
+ _stddev(nullptr),
+ _mean_stddev_kernel(),
_fill_border_kernel(),
_global_sum(),
_global_sum_squared()
{
}
+Status CLMeanStdDev::validate(ITensorInfo *input, float *mean, float *stddev)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_TENSOR_NOT_2D(input);
+ if(is_data_type_float(input->data_type()))
+ {
+ ARM_COMPUTE_UNUSED(mean);
+ ARM_COMPUTE_UNUSED(stddev);
+
+ TensorShape output_shape = TensorShape{ 1, input->dimension(1) };
+ TensorInfo output_shape_info = TensorInfo(output_shape, 1, DataType::U8);
+ return CLReductionOperation::validate(input, &output_shape_info, 0, ReductionOperation::SUM);
+ }
+ else
+ {
+ return CLMeanStdDevKernel::validate(input, mean, nullptr, stddev, nullptr);
+ }
+}
+
void CLMeanStdDev::configure(ICLImage *input, float *mean, float *stddev)
{
- _global_sum = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong));
+ // In the case of F16/F32 we call reduction operation for calculating CLMeanStdDev
+ _data_type = input->info()->data_type();
- if(stddev != nullptr)
+ if(is_data_type_float(_data_type))
{
- _global_sum_squared = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong));
+ _num_pixels = input->info()->dimension(0) * input->info()->dimension(1);
+
+ _memory_group.manage(&_reduction_output_mean);
+ _reduction_operation_mean.configure(input, &_reduction_output_mean, 0, ReductionOperation::SUM);
+ _reduction_output_mean.allocator()->allocate();
+ _mean = mean;
+
+ if(stddev != nullptr)
+ {
+ _memory_group.manage(&_reduction_output_stddev);
+ _reduction_operation_stddev.configure(input, &_reduction_output_stddev, 0, ReductionOperation::SUM_SQUARE);
+ _reduction_output_stddev.allocator()->allocate();
+ _stddev = stddev;
+ _run_stddev = true;
+ }
}
+ else
+ {
+ _global_sum = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong));
- _mean_stddev_kernel.configure(input, mean, &_global_sum, stddev, &_global_sum_squared);
- _fill_border_kernel.configure(input, _mean_stddev_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint8_t>(0)));
+ if(stddev != nullptr)
+ {
+ _global_sum_squared = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong));
+ }
+
+ _mean_stddev_kernel.configure(input, mean, &_global_sum, stddev, &_global_sum_squared);
+ _fill_border_kernel.configure(input, _mean_stddev_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint8_t>(0)));
+ }
}
-void CLMeanStdDev::run()
+template <typename T>
+void CLMeanStdDev::run_float()
+{
+ _memory_group.acquire();
+
+ // Perform reduction on x-axis
+ _reduction_operation_mean.run();
+ if(_run_stddev)
+ {
+ _reduction_operation_stddev.run();
+ _reduction_output_stddev.map(true);
+ }
+
+ _reduction_output_mean.map(true);
+
+ auto mean = static_cast<T>(0);
+
+ // Calculate final result for mean
+ for(unsigned int i = 0; i < _reduction_output_mean.info()->dimension(1); ++i)
+ {
+ mean += *reinterpret_cast<T *>(_reduction_output_mean.buffer() + _reduction_output_mean.info()->offset_element_in_bytes(Coordinates(0, i)));
+ }
+
+ mean /= _num_pixels;
+ *_mean = mean;
+
+ if(_run_stddev)
+ {
+ auto stddev = static_cast<T>(0);
+ // Calculate final result for stddev
+ for(unsigned int i = 0; i < _reduction_output_stddev.info()->dimension(1); ++i)
+ {
+ stddev += *reinterpret_cast<T *>(_reduction_output_stddev.buffer() + _reduction_output_stddev.info()->offset_element_in_bytes(Coordinates(0, i)));
+ }
+ *_stddev = std::sqrt((stddev / _num_pixels) - (mean * mean));
+
+ _reduction_output_stddev.unmap();
+ }
+ _reduction_output_mean.unmap();
+
+ _memory_group.release();
+}
+
+void CLMeanStdDev::run_int()
{
CLScheduler::get().enqueue(_fill_border_kernel);
CLScheduler::get().enqueue(_mean_stddev_kernel);
}
+
+void CLMeanStdDev::run()
+{
+ switch(_data_type)
+ {
+ case DataType::F16:
+ run_float<half>();
+ break;
+ case DataType::F32:
+ run_float<float>();
+ break;
+ case DataType::U8:
+ run_int();
+ break;
+ default:
+ ARM_COMPUTE_ERROR_ON("Not supported");
+ }
+}
diff --git a/tests/validation/CL/MeanStdDev.cpp b/tests/validation/CL/MeanStdDev.cpp
index 92d87e09f2..8ccb757364 100644
--- a/tests/validation/CL/MeanStdDev.cpp
+++ b/tests/validation/CL/MeanStdDev.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,12 +39,17 @@ namespace
{
RelativeTolerance<float> tolerance_rel_high_error(0.05f);
RelativeTolerance<float> tolerance_rel_low_error(0.0005f);
+RelativeTolerance<float> tolerance_rel_high_error_f32(0.001f);
+RelativeTolerance<float> tolerance_rel_low_error_f32(0.00001f);
+RelativeTolerance<float> tolerance_rel_high_error_f16(0.1f);
+RelativeTolerance<float> tolerance_rel_low_error_f16(0.01f);
} // namespace
TEST_SUITE(CL)
TEST_SUITE(MeanStdDev)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), framework::dataset::make("DataType", DataType::U8)), shape, data_type)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), framework::dataset::make("DataType", { DataType::U8 })), shape,
+ data_type)
{
// Create tensors
CLTensor src = create_tensor<CLTensor>(shape, data_type);
@@ -71,6 +76,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datase
template <typename T>
using CLMeanStdDevFixture = MeanStdDevValidationFixture<CLTensor, CLAccessor, CLMeanStdDev, T>;
+TEST_SUITE(U8)
FIXTURE_DATA_TEST_CASE(RunSmall, CLMeanStdDevFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
DataType::U8)))
{
@@ -89,9 +95,43 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLMeanStdDevFixture<uint8_t>, framework::Datase
// Validate std_dev output
validate(_target.second, _reference.second, tolerance_rel_high_error);
}
+TEST_SUITE_END() // U8
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE(F16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMeanStdDevFixture<half>, framework::DatasetMode::ALL, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
+ DataType::F16)))
+{
+ // Validate mean output
+ validate(_target.first, _reference.first, tolerance_rel_low_error_f16);
+
+ // Validate std_dev output
+ validate(_target.second, _reference.second, tolerance_rel_high_error_f16);
+}
+TEST_SUITE_END() // F16
+
+TEST_SUITE(F32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMeanStdDevFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
+ DataType::F32)))
+{
+ // Validate mean output
+ validate(_target.first, _reference.first, tolerance_rel_low_error_f32);
+
+ // Validate std_dev output
+ validate(_target.second, _reference.second, tolerance_rel_high_error_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLMeanStdDevFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
+ DataType::F32)))
+{
+ // Validate mean output
+ validate(_target.first, _reference.first, tolerance_rel_low_error_f32);
+
+ // Validate std_dev output
+ validate(_target.second, _reference.second, tolerance_rel_high_error_f32);
+}
+TEST_SUITE_END() // F32
+
+TEST_SUITE_END() // MeanStdDev
+TEST_SUITE_END() // CL
} // namespace validation
} // namespace test
} // namespace arm_compute
diff --git a/tests/validation/CL/ReductionOperation.cpp b/tests/validation/CL/ReductionOperation.cpp
index a48e2f9d5f..ca0988f955 100644
--- a/tests/validation/CL/ReductionOperation.cpp
+++ b/tests/validation/CL/ReductionOperation.cpp
@@ -45,6 +45,7 @@ namespace
{
/** Tolerance for float operations */
RelativeTolerance<float> tolerance_f32(0.00001f);
+RelativeTolerance<float> tolerance_f16(0.1f);
} // namespace
TEST_SUITE(CL)
@@ -55,7 +56,7 @@ TEST_SUITE(ReductionOperation)
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
framework::dataset::make("InputInfo", { TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Mismatching data type input/output
TensorInfo(TensorShape(128U, 64U), 2, DataType::F32), // Number of Input channels != 1
- TensorInfo(TensorShape(128U, 64U), 1, DataType::S16), // DataType != F32
+ TensorInfo(TensorShape(128U, 64U), 1, DataType::S16), // DataType != F16/F32
TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis >= num_max_dimensions
TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis > 0
TensorInfo(TensorShape(128U, 64U), 1, DataType::F32)
@@ -84,9 +85,23 @@ template <typename T>
using CLReductionOperationFixture = ReductionOperationValidationFixture<CLTensor, CLAccessor, CLReductionOperation, T>;
TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLReductionOperationFixture<half>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0 })), datasets::ReductionOperations()))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLReductionOperationFixture<half>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0 })), datasets::ReductionOperations()))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+TEST_SUITE_END() // F16
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(RunSmall, CLReductionOperationFixture<float>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), datasets::ReductionOperations()))
+ combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), datasets::ReductionOperations()))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f32);
@@ -97,11 +112,11 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLReductionOperationFixture<float>, framework::
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f32);
}
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // F32
+TEST_SUITE_END() // Float
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // Reduction
+TEST_SUITE_END() // CL
} // namespace validation
} // namespace test
} // namespace arm_compute
diff --git a/tests/validation/fixtures/MeanStdDevFixture.h b/tests/validation/fixtures/MeanStdDevFixture.h
index 17dfe78dbd..58d4644069 100644
--- a/tests/validation/fixtures/MeanStdDevFixture.h
+++ b/tests/validation/fixtures/MeanStdDevFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -50,7 +50,15 @@ protected:
template <typename U>
void fill(U &&tensor)
{
- library->fill_tensor_uniform(tensor, 0);
+ if(is_data_type_float(tensor.data_type()))
+ {
+ std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+ library->fill(tensor, distribution, 0);
+ }
+ else
+ {
+ library->fill_tensor_uniform(tensor, 0);
+ }
}
std::pair<float, float> compute_target(const TensorShape &shape, DataType data_type)
diff --git a/tests/validation/fixtures/ReductionOperationFixture.h b/tests/validation/fixtures/ReductionOperationFixture.h
index 6fa5f0c44f..0dee7eb707 100644
--- a/tests/validation/fixtures/ReductionOperationFixture.h
+++ b/tests/validation/fixtures/ReductionOperationFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -56,7 +56,8 @@ protected:
template <typename U>
void fill(U &&tensor)
{
- library->fill_tensor_uniform(tensor, 0);
+ std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+ library->fill(tensor, distribution, 0);
}
TensorType compute_target(const TensorShape &src_shape, const TensorShape &dst_shape, DataType data_type, unsigned int axis, ReductionOperation op)
diff --git a/tests/validation/reference/MeanStdDev.cpp b/tests/validation/reference/MeanStdDev.cpp
index 4a39b13d56..f48fcb11d2 100644
--- a/tests/validation/reference/MeanStdDev.cpp
+++ b/tests/validation/reference/MeanStdDev.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -51,6 +51,8 @@ std::pair<float, float> mean_and_standard_deviation(const SimpleTensor<T> &in)
}
template std::pair<float, float> mean_and_standard_deviation(const SimpleTensor<uint8_t> &in);
+template std::pair<float, float> mean_and_standard_deviation(const SimpleTensor<half> &in);
+template std::pair<float, float> mean_and_standard_deviation(const SimpleTensor<float> &in);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/ReductionOperation.cpp b/tests/validation/reference/ReductionOperation.cpp
index acfcc09cea..871a761b1a 100644
--- a/tests/validation/reference/ReductionOperation.cpp
+++ b/tests/validation/reference/ReductionOperation.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -53,7 +53,7 @@ T reduce_operation(T *ptr, int reduce_elements, ReductionOperation op)
switch(op)
{
case ReductionOperation::SUM_SQUARE:
- return std::accumulate(ptr, ptr + reduce_elements, 0.f, square<T>());
+ return std::accumulate(ptr, ptr + reduce_elements, static_cast<T>(0), square<T>());
default:
ARM_COMPUTE_ERROR("Unsupported reduction operation");
}
@@ -87,6 +87,7 @@ SimpleTensor<T> reduction_operation(const SimpleTensor<T> &src, const TensorShap
}
template SimpleTensor<float> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
+template SimpleTensor<half> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
} // namespace reference
} // namespace validation
} // namespace test