From d1794ebfa10d05af7d2458c5d506152fd38068d3 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Fri, 15 Jun 2018 16:15:26 +0100 Subject: COMPMID-1226 Extend CLMeanStdDev to support FP32 / FP16 - Extend support for FP16 in CLReduction. - For F16/F32 MeanStdDev we perform one reduction operation for mean and one for stddev and we calculate the final result in the host CPU. Change-Id: Iad2099f26c0ba7969737d22f00c6c275634d875c Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/135870 Tested-by: Jenkins Reviewed-by: Georgios Pinitas --- arm_compute/core/CL/kernels/CLMeanStdDevKernel.h | 13 +- .../core/CL/kernels/CLReductionOperationKernel.h | 4 +- arm_compute/core/Validate.h | 13 ++ arm_compute/runtime/CL/functions/CLMeanStdDev.h | 52 ++++++-- .../runtime/CL/functions/CLReductionOperation.h | 4 +- src/core/CL/kernels/CLMeanStdDevKernel.cpp | 23 +++- src/core/CL/kernels/CLReductionOperationKernel.cpp | 4 +- src/core/Validate.cpp | 10 ++ src/runtime/CL/functions/CLMeanStdDev.cpp | 134 +++++++++++++++++++-- tests/validation/CL/MeanStdDev.cpp | 48 +++++++- tests/validation/CL/ReductionOperation.cpp | 27 ++++- tests/validation/fixtures/MeanStdDevFixture.h | 12 +- .../fixtures/ReductionOperationFixture.h | 5 +- tests/validation/reference/MeanStdDev.cpp | 4 +- tests/validation/reference/ReductionOperation.cpp | 5 +- 15 files changed, 310 insertions(+), 48 deletions(-) diff --git a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h index a6898fde3e..46e266e07d 100644 --- a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h +++ b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -59,6 +59,17 @@ public: * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong). */ void configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr); + /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevKernel. + * + * @param[in] input Input image info. Data types supported: U8. + * @param[in] mean Input average pixel value. + * @param[in] global_sum Keeps global sum of pixel values. + * @param[in] stddev (Optional) Output standard deviation of pixel values. + * @param[in] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h index 56f75e5fb7..60e2f08005 100644 --- a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h +++ b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h @@ -50,7 +50,7 @@ public: /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: F32. Data layouts supported: NCHW. + * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW. * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. * Output will have the same number of dimensions as input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 @@ -60,7 +60,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperationKernel. * - * @param[in] input Source tensor info. Data types supported: F32. Data layouts supported: NCHW. + * @param[in] input Source tensor info. Data types supported: F16/F32. Data layouts supported: NCHW. * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. * Output will have the same number of dimensions as input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 diff --git a/arm_compute/core/Validate.h b/arm_compute/core/Validate.h index 1646ebe719..918c8e5fc3 100644 --- a/arm_compute/core/Validate.h +++ b/arm_compute/core/Validate.h @@ -787,6 +787,19 @@ inline arm_compute::Status error_on_unsupported_fp16(const char *function, const */ arm_compute::Status error_on_tensor_not_2d(const char *function, const char *file, const int line, const ITensor *tensor); + +/** Return an error if the tensor info is not 2D. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor Tensor info to validate. + * + * @return Status + */ +arm_compute::Status error_on_tensor_not_2d(const char *function, const char *file, const int line, + const ITensorInfo *tensor); + #define ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(t) \ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_tensor_not_2d(__func__, __FILE__, __LINE__, t)) #define ARM_COMPUTE_RETURN_ERROR_ON_TENSOR_NOT_2D(t) \ diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDev.h b/arm_compute/runtime/CL/functions/CLMeanStdDev.h index 7622138236..2e46563423 100644 --- a/arm_compute/runtime/CL/functions/CLMeanStdDev.h +++ b/arm_compute/runtime/CL/functions/CLMeanStdDev.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -27,7 +27,10 @@ #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" +#include "arm_compute/runtime/CL/functions/CLReductionOperation.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" namespace arm_compute { @@ -36,23 +39,56 @@ class CLMeanStdDev : public IFunction { public: /** Default Constructor. */ - CLMeanStdDev(); + CLMeanStdDev(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMeanStdDev(const CLMeanStdDev &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMeanStdDev &operator=(const CLMeanStdDev &) = delete; + /** Allow instances of this class to be moved */ + CLMeanStdDev(CLMeanStdDev &&) = default; + /** Allow instances of this class to be moved */ + CLMeanStdDev &operator=(CLMeanStdDev &&) = default; + /** Default destructor */ + ~CLMeanStdDev() = default; /** Initialise the kernel's inputs and outputs. * - * @param[in, out] input Input image. Data types supported: U8. (Written to only for border filling) + * @param[in, out] input Input image. Data types supported: U8/F16/F32. (Written to only for border filling) * @param[out] mean Output average pixel value. - * @param[out] stddev (Optional)Output standard deviation of pixel values. + * @param[out] stddev (Optional) Output standard deviation of pixel values. */ void configure(ICLImage *input, float *mean, float *stddev = nullptr); + /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDev + * + * @param[in] input Input image. Data types supported: U8/F16/F32. + * @param[in] mean Output average pixel value. + * @param[in] stddev (Optional) Output standard deviation of pixel values. + * + * @return a status + */ + static Status validate(ITensorInfo *input, float *mean, float *stddev = nullptr); // Inherited methods overridden: void run() override; private: - CLMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */ - CLFillBorderKernel _fill_border_kernel; /**< Kernel that fills the border with zeroes. */ - cl::Buffer _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */ - cl::Buffer _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */ + template + void run_float(); + void run_int(); + + CLMemoryGroup _memory_group; /**< Function's memory group */ + DataType _data_type; /**< Input data type. */ + unsigned int _num_pixels; /**< Number of image's pixels. */ + bool _run_stddev; /**< Flag for knowing if we should run stddev reduction function. */ + CLReductionOperation _reduction_operation_mean; /**< Reduction operation function for computing mean value. */ + CLReductionOperation _reduction_operation_stddev; /**< Reduction operation function for computing standard deviation. */ + CLTensor _reduction_output_mean; /**< Reduction operation output tensor for mean value. */ + CLTensor _reduction_output_stddev; /**< Reduction operation output tensor for standard deviation value. */ + float *_mean; /**< Pointer that holds the mean value. */ + float *_stddev; /**< Pointer that holds the standard deviation value. */ + CLMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */ + CLFillBorderKernel _fill_border_kernel; /**< Kernel that fills the border with zeroes. */ + cl::Buffer _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */ + cl::Buffer _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */ }; } #endif /*__ARM_COMPUTE_CLMEANSTDDEV_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h index b8108b507b..d862aff7b7 100644 --- a/arm_compute/runtime/CL/functions/CLReductionOperation.h +++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h @@ -53,7 +53,7 @@ public: /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: F32. Data layouts supported: NCHW. + * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW. * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 * @param[in] op Reduction operation to perform. @@ -62,7 +62,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperation. * - * @param[in] input Source tensor info. Data types supported: F32. Data layouts supported: NCHW. + * @param[in] input Source tensor info. Data types supported: F16/F32. Data layouts supported: NCHW. * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 * @param[in] op Reduction operation to perform. diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp index fc8764dbfe..bd31131fe5 100644 --- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp +++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -23,6 +23,7 @@ */ #include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h" +#include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" @@ -49,14 +50,24 @@ BorderSize CLMeanStdDevKernel::border_size() const return _border_size; } +Status CLMeanStdDevKernel::validate(const ITensorInfo *input, float *mean, cl::Buffer *global_sum, float *stddev, cl::Buffer *global_sum_squared) +{ + ARM_COMPUTE_UNUSED(mean); + ARM_COMPUTE_UNUSED(stddev); + ARM_COMPUTE_UNUSED(global_sum); + ARM_COMPUTE_UNUSED(global_sum_squared); + ARM_COMPUTE_RETURN_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED(); + ARM_COMPUTE_RETURN_ERROR_ON_TENSOR_NOT_2D(input); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + + return Status{}; +} + void CLMeanStdDevKernel::configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev, cl::Buffer *global_sum_squared) { - ARM_COMPUTE_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED(); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON(nullptr == mean); - ARM_COMPUTE_ERROR_ON(nullptr == global_sum); + ARM_COMPUTE_ERROR_ON_NULLPTR(input, mean, global_sum); ARM_COMPUTE_ERROR_ON(stddev && nullptr == global_sum_squared); + ARM_COMPUTE_ERROR_THROW_ON(CLMeanStdDevKernel::validate(input->info(), mean, global_sum, stddev, global_sum_squared)); _input = input; _mean = mean; diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp index d64f0d89c5..95967fa974 100644 --- a/src/core/CL/kernels/CLReductionOperationKernel.cpp +++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp @@ -44,7 +44,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u ARM_COMPUTE_UNUSED(op); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); @@ -69,7 +69,7 @@ std::tuple validate_and_configure_window(ITensorInfo *input, ITe const unsigned int num_elems_processed_per_iteration = 16; Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); - const unsigned int border_width = ((input->dimension(0) % 128) != 0) ? 128 - input->dimension(0) % 128 : 0; // TODO (COMPMID-1143): Fix padding (possible value 127!) + const unsigned int border_width = ((input->dimension(0) % num_elems_processed_per_iteration) != 0) ? num_elems_processed_per_iteration - input->dimension(0) % num_elems_processed_per_iteration : 0; AccessWindowStatic input_access(input, 0, 0, input->dimension(0) + border_width, 1); AccessWindowHorizontal output_access(output, 0, 1); diff --git a/src/core/Validate.cpp b/src/core/Validate.cpp index d4fabd4a0f..60a97dfcc2 100644 --- a/src/core/Validate.cpp +++ b/src/core/Validate.cpp @@ -100,6 +100,16 @@ arm_compute::Status arm_compute::error_on_tensor_not_2d(const char *function, co return arm_compute::Status{}; } +arm_compute::Status arm_compute::error_on_tensor_not_2d(const char *function, const char *file, const int line, + const arm_compute::ITensorInfo *tensor) +{ + ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(tensor->num_dimensions() != 2, + function, file, line, + "Only 2D Tensors are supported by this kernel (%d passed)", tensor->num_dimensions()); + return arm_compute::Status{}; +} + arm_compute::Status arm_compute::error_on_channel_not_in_known_format(const char *function, const char *file, const int line, arm_compute::Format fmt, arm_compute::Channel cn) { diff --git a/src/runtime/CL/functions/CLMeanStdDev.cpp b/src/runtime/CL/functions/CLMeanStdDev.cpp index 838f7e73d2..157f306d0c 100644 --- a/src/runtime/CL/functions/CLMeanStdDev.cpp +++ b/src/runtime/CL/functions/CLMeanStdDev.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -21,35 +21,149 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/runtime/CL/functions/CLMeanStdDev.h" +#include "arm_compute/core/TensorInfo.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/functions/CLMeanStdDev.h" using namespace arm_compute; -CLMeanStdDev::CLMeanStdDev() - : _mean_stddev_kernel(), +CLMeanStdDev::CLMeanStdDev(std::shared_ptr memory_manager) // NOLINT + : _memory_group(std::move(memory_manager)), + _data_type(), + _num_pixels(), + _run_stddev(), + _reduction_operation_mean(), + _reduction_operation_stddev(), + _reduction_output_mean(), + _reduction_output_stddev(), + _mean(nullptr), + _stddev(nullptr), + _mean_stddev_kernel(), _fill_border_kernel(), _global_sum(), _global_sum_squared() { } +Status CLMeanStdDev::validate(ITensorInfo *input, float *mean, float *stddev) +{ + ARM_COMPUTE_RETURN_ERROR_ON_TENSOR_NOT_2D(input); + if(is_data_type_float(input->data_type())) + { + ARM_COMPUTE_UNUSED(mean); + ARM_COMPUTE_UNUSED(stddev); + + TensorShape output_shape = TensorShape{ 1, input->dimension(1) }; + TensorInfo output_shape_info = TensorInfo(output_shape, 1, DataType::U8); + return CLReductionOperation::validate(input, &output_shape_info, 0, ReductionOperation::SUM); + } + else + { + return CLMeanStdDevKernel::validate(input, mean, nullptr, stddev, nullptr); + } +} + void CLMeanStdDev::configure(ICLImage *input, float *mean, float *stddev) { - _global_sum = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong)); + // In the case of F16/F32 we call reduction operation for calculating CLMeanStdDev + _data_type = input->info()->data_type(); - if(stddev != nullptr) + if(is_data_type_float(_data_type)) { - _global_sum_squared = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong)); + _num_pixels = input->info()->dimension(0) * input->info()->dimension(1); + + _memory_group.manage(&_reduction_output_mean); + _reduction_operation_mean.configure(input, &_reduction_output_mean, 0, ReductionOperation::SUM); + _reduction_output_mean.allocator()->allocate(); + _mean = mean; + + if(stddev != nullptr) + { + _memory_group.manage(&_reduction_output_stddev); + _reduction_operation_stddev.configure(input, &_reduction_output_stddev, 0, ReductionOperation::SUM_SQUARE); + _reduction_output_stddev.allocator()->allocate(); + _stddev = stddev; + _run_stddev = true; + } } + else + { + _global_sum = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong)); - _mean_stddev_kernel.configure(input, mean, &_global_sum, stddev, &_global_sum_squared); - _fill_border_kernel.configure(input, _mean_stddev_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast(0))); + if(stddev != nullptr) + { + _global_sum_squared = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong)); + } + + _mean_stddev_kernel.configure(input, mean, &_global_sum, stddev, &_global_sum_squared); + _fill_border_kernel.configure(input, _mean_stddev_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast(0))); + } } -void CLMeanStdDev::run() +template +void CLMeanStdDev::run_float() +{ + _memory_group.acquire(); + + // Perform reduction on x-axis + _reduction_operation_mean.run(); + if(_run_stddev) + { + _reduction_operation_stddev.run(); + _reduction_output_stddev.map(true); + } + + _reduction_output_mean.map(true); + + auto mean = static_cast(0); + + // Calculate final result for mean + for(unsigned int i = 0; i < _reduction_output_mean.info()->dimension(1); ++i) + { + mean += *reinterpret_cast(_reduction_output_mean.buffer() + _reduction_output_mean.info()->offset_element_in_bytes(Coordinates(0, i))); + } + + mean /= _num_pixels; + *_mean = mean; + + if(_run_stddev) + { + auto stddev = static_cast(0); + // Calculate final result for stddev + for(unsigned int i = 0; i < _reduction_output_stddev.info()->dimension(1); ++i) + { + stddev += *reinterpret_cast(_reduction_output_stddev.buffer() + _reduction_output_stddev.info()->offset_element_in_bytes(Coordinates(0, i))); + } + *_stddev = std::sqrt((stddev / _num_pixels) - (mean * mean)); + + _reduction_output_stddev.unmap(); + } + _reduction_output_mean.unmap(); + + _memory_group.release(); +} + +void CLMeanStdDev::run_int() { CLScheduler::get().enqueue(_fill_border_kernel); CLScheduler::get().enqueue(_mean_stddev_kernel); } + +void CLMeanStdDev::run() +{ + switch(_data_type) + { + case DataType::F16: + run_float(); + break; + case DataType::F32: + run_float(); + break; + case DataType::U8: + run_int(); + break; + default: + ARM_COMPUTE_ERROR_ON("Not supported"); + } +} diff --git a/tests/validation/CL/MeanStdDev.cpp b/tests/validation/CL/MeanStdDev.cpp index 92d87e09f2..8ccb757364 100644 --- a/tests/validation/CL/MeanStdDev.cpp +++ b/tests/validation/CL/MeanStdDev.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -39,12 +39,17 @@ namespace { RelativeTolerance tolerance_rel_high_error(0.05f); RelativeTolerance tolerance_rel_low_error(0.0005f); +RelativeTolerance tolerance_rel_high_error_f32(0.001f); +RelativeTolerance tolerance_rel_low_error_f32(0.00001f); +RelativeTolerance tolerance_rel_high_error_f16(0.1f); +RelativeTolerance tolerance_rel_low_error_f16(0.01f); } // namespace TEST_SUITE(CL) TEST_SUITE(MeanStdDev) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), framework::dataset::make("DataType", DataType::U8)), shape, data_type) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), framework::dataset::make("DataType", { DataType::U8 })), shape, + data_type) { // Create tensors CLTensor src = create_tensor(shape, data_type); @@ -71,6 +76,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datase template using CLMeanStdDevFixture = MeanStdDevValidationFixture; +TEST_SUITE(U8) FIXTURE_DATA_TEST_CASE(RunSmall, CLMeanStdDevFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType", DataType::U8))) { @@ -89,9 +95,43 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLMeanStdDevFixture, framework::Datase // Validate std_dev output validate(_target.second, _reference.second, tolerance_rel_high_error); } +TEST_SUITE_END() // U8 -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE(F16) +FIXTURE_DATA_TEST_CASE(RunSmall, CLMeanStdDevFixture, framework::DatasetMode::ALL, combine(datasets::Small2DShapes(), framework::dataset::make("DataType", + DataType::F16))) +{ + // Validate mean output + validate(_target.first, _reference.first, tolerance_rel_low_error_f16); + + // Validate std_dev output + validate(_target.second, _reference.second, tolerance_rel_high_error_f16); +} +TEST_SUITE_END() // F16 + +TEST_SUITE(F32) +FIXTURE_DATA_TEST_CASE(RunSmall, CLMeanStdDevFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType", + DataType::F32))) +{ + // Validate mean output + validate(_target.first, _reference.first, tolerance_rel_low_error_f32); + + // Validate std_dev output + validate(_target.second, _reference.second, tolerance_rel_high_error_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLMeanStdDevFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType", + DataType::F32))) +{ + // Validate mean output + validate(_target.first, _reference.first, tolerance_rel_low_error_f32); + + // Validate std_dev output + validate(_target.second, _reference.second, tolerance_rel_high_error_f32); +} +TEST_SUITE_END() // F32 + +TEST_SUITE_END() // MeanStdDev +TEST_SUITE_END() // CL } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/CL/ReductionOperation.cpp b/tests/validation/CL/ReductionOperation.cpp index a48e2f9d5f..ca0988f955 100644 --- a/tests/validation/CL/ReductionOperation.cpp +++ b/tests/validation/CL/ReductionOperation.cpp @@ -45,6 +45,7 @@ namespace { /** Tolerance for float operations */ RelativeTolerance tolerance_f32(0.00001f); +RelativeTolerance tolerance_f16(0.1f); } // namespace TEST_SUITE(CL) @@ -55,7 +56,7 @@ TEST_SUITE(ReductionOperation) DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( framework::dataset::make("InputInfo", { TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Mismatching data type input/output TensorInfo(TensorShape(128U, 64U), 2, DataType::F32), // Number of Input channels != 1 - TensorInfo(TensorShape(128U, 64U), 1, DataType::S16), // DataType != F32 + TensorInfo(TensorShape(128U, 64U), 1, DataType::S16), // DataType != F16/F32 TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis >= num_max_dimensions TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis > 0 TensorInfo(TensorShape(128U, 64U), 1, DataType::F32) @@ -84,9 +85,23 @@ template using CLReductionOperationFixture = ReductionOperationValidationFixture; TEST_SUITE(Float) +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, CLReductionOperationFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0 })), datasets::ReductionOperations())) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLReductionOperationFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0 })), datasets::ReductionOperations())) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} +TEST_SUITE_END() // F16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, CLReductionOperationFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), datasets::ReductionOperations())) + combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), datasets::ReductionOperations())) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); @@ -97,11 +112,11 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLReductionOperationFixture, framework:: // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() // F32 +TEST_SUITE_END() // Float -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() // Reduction +TEST_SUITE_END() // CL } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/fixtures/MeanStdDevFixture.h b/tests/validation/fixtures/MeanStdDevFixture.h index 17dfe78dbd..58d4644069 100644 --- a/tests/validation/fixtures/MeanStdDevFixture.h +++ b/tests/validation/fixtures/MeanStdDevFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,7 +50,15 @@ protected: template void fill(U &&tensor) { - library->fill_tensor_uniform(tensor, 0); + if(is_data_type_float(tensor.data_type())) + { + std::uniform_real_distribution<> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, 0); + } + else + { + library->fill_tensor_uniform(tensor, 0); + } } std::pair compute_target(const TensorShape &shape, DataType data_type) diff --git a/tests/validation/fixtures/ReductionOperationFixture.h b/tests/validation/fixtures/ReductionOperationFixture.h index 6fa5f0c44f..0dee7eb707 100644 --- a/tests/validation/fixtures/ReductionOperationFixture.h +++ b/tests/validation/fixtures/ReductionOperationFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -56,7 +56,8 @@ protected: template void fill(U &&tensor) { - library->fill_tensor_uniform(tensor, 0); + std::uniform_real_distribution<> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, 0); } TensorType compute_target(const TensorShape &src_shape, const TensorShape &dst_shape, DataType data_type, unsigned int axis, ReductionOperation op) diff --git a/tests/validation/reference/MeanStdDev.cpp b/tests/validation/reference/MeanStdDev.cpp index 4a39b13d56..f48fcb11d2 100644 --- a/tests/validation/reference/MeanStdDev.cpp +++ b/tests/validation/reference/MeanStdDev.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -51,6 +51,8 @@ std::pair mean_and_standard_deviation(const SimpleTensor &in) } template std::pair mean_and_standard_deviation(const SimpleTensor &in); +template std::pair mean_and_standard_deviation(const SimpleTensor &in); +template std::pair mean_and_standard_deviation(const SimpleTensor &in); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/ReductionOperation.cpp b/tests/validation/reference/ReductionOperation.cpp index acfcc09cea..871a761b1a 100644 --- a/tests/validation/reference/ReductionOperation.cpp +++ b/tests/validation/reference/ReductionOperation.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -53,7 +53,7 @@ T reduce_operation(T *ptr, int reduce_elements, ReductionOperation op) switch(op) { case ReductionOperation::SUM_SQUARE: - return std::accumulate(ptr, ptr + reduce_elements, 0.f, square()); + return std::accumulate(ptr, ptr + reduce_elements, static_cast(0), square()); default: ARM_COMPUTE_ERROR("Unsupported reduction operation"); } @@ -87,6 +87,7 @@ SimpleTensor reduction_operation(const SimpleTensor &src, const TensorShap } template SimpleTensor reduction_operation(const SimpleTensor &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op); +template SimpleTensor reduction_operation(const SimpleTensor &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op); } // namespace reference } // namespace validation } // namespace test -- cgit v1.2.1