From b28905010a95044c7a1c0a5665fc886521a56541 Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Tue, 21 May 2019 11:48:37 +0100 Subject: COMPMID-2281: Implement REDUCE_MIN operator for CL Change-Id: I60fd3affad0ab5a2f1cef18aaa46dc2dc448caeb Signed-off-by: Usama Arif Reviewed-on: https://review.mlplatform.org/c/1194 Comments-Addressed: Arm Jenkins Reviewed-by: Pablo Marquez Reviewed-by: Michalis Spyrou Tested-by: Arm Jenkins --- src/core/CL/cl_kernels/reduction_operation.cl | 12 ++++++++ src/core/CL/kernels/CLReductionOperationKernel.cpp | 8 ++++-- src/runtime/CL/functions/CLReductionOperation.cpp | 32 ++++++++++++++++++++++ tests/validation/CL/ReductionOperation.cpp | 4 ++- 4 files changed, 52 insertions(+), 4 deletions(-) diff --git a/src/core/CL/cl_kernels/reduction_operation.cl b/src/core/CL/cl_kernels/reduction_operation.cl index 749e3cdaa3..86cf37e491 100644 --- a/src/core/CL/cl_kernels/reduction_operation.cl +++ b/src/core/CL/cl_kernels/reduction_operation.cl @@ -197,6 +197,8 @@ __kernel void reduction_operation_non_parallel_x( #elif defined(ARG_MIN) indx = select(indx, x, ISLESS(in, res)); res = select(res, in, CONVERT(ISLESS(in, res), COND_DATA_TYPE)); +#elif defined(MIN) + res = select(res, in, CONVERT(ISLESS(in, res), COND_DATA_TYPE)); #else // !(defined(ARG_MAX) || defined(ARG_MIN)) res += in; #endif // defined(ARG_MAX) || defined(ARG_MIN) @@ -209,7 +211,11 @@ __kernel void reduction_operation_non_parallel_x( #if defined(MEAN) res /= WIDTH; #endif // defined(MEAN) +#if defined(MIN) + *((__global DATA_TYPE_PROMOTED *)output.ptr) = res; +#else // defined(MIN) *((__global uchar *)output.ptr) = convert_uchar(res); +#endif // defined(MIN) #endif // defined(ARG_MAX) || defined(ARG_MIN) } #endif // defined(WIDTH) @@ -263,6 +269,8 @@ __kernel void reduction_operation_y( uint16 cond_conv = CONVERT(ISLESS(in, res), uint16); indx = select(indx, y, cond_conv); res = select(res, in, ISLESS(in, res)); +#elif defined(MIN) + res = select(res, in, ISLESS(in, res)); #else // !(defined(ARG_MAX) || defined(ARG_MIN)) #if defined(SUM_SQUARE) in *= in; @@ -350,6 +358,8 @@ __kernel void reduction_operation_z( uint16 cond_conv = CONVERT(ISLESS(in, res), uint16); indx = select(indx, z, cond_conv); res = select(res, in, ISLESS(in, res)); +#elif defined(MIN) + res = select(res, in, ISLESS(in, res)); #else // !(defined(ARG_MAX) || defined(ARG_MIN)) #if defined(SUM_SQUARE) in *= in; @@ -439,6 +449,8 @@ __kernel void reduction_operation_w( uint16 cond_conv = CONVERT(ISLESS(in, res), uint16); indx = select(indx, w, cond_conv); res = select(res, in, ISLESS(in, res)); +#elif defined(MIN) + res = select(res, in, ISLESS(in, res)); #else // !(defined(ARG_MAX) || defined(ARG_MIN)) #if defined(SUM_SQUARE) in *= in; diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp index cb57070612..eb76349a02 100644 --- a/src/core/CL/kernels/CLReductionOperationKernel.cpp +++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp @@ -89,7 +89,7 @@ std::tuple validate_and_configure_window(ITensorInfo *input, ITe const unsigned int num_elems_processed_per_iteration = (is_data_type_quantized(input->data_type()) && (axis == 0)) ? 1 : 16; Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); bool window_changed = false; - const bool is_serial_op = (op == ReductionOperation::ARG_IDX_MAX || op == ReductionOperation::ARG_IDX_MIN || is_data_type_quantized(input->data_type())); + const bool is_serial_op = (op == ReductionOperation::ARG_IDX_MAX || op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::MIN || is_data_type_quantized(input->data_type())); switch(axis) { @@ -169,6 +169,7 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou build_opts.add_option_if(op == ReductionOperation::ARG_IDX_MAX, "-DARG_MAX"); build_opts.add_option_if(op == ReductionOperation::ARG_IDX_MIN, "-DARG_MIN"); build_opts.add_option_if(op == ReductionOperation::PROD, "-DPROD"); + build_opts.add_option_if(op == ReductionOperation::MIN, "-DMIN"); build_opts.add_option_if(input->info()->num_channels() == 2, "-DCOMPLEX"); switch(op) @@ -182,6 +183,7 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou break; case ReductionOperation::ARG_IDX_MAX: case ReductionOperation::ARG_IDX_MIN: + case ReductionOperation::MIN: break; case ReductionOperation::PROD: build_opts.add_option(("-DOPERATION=product")); @@ -193,7 +195,7 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou // Create kernel cl::NDRange lws_hint = CLKernelLibrary::get().default_ndrange(); std::string kernel_axis_name; - const bool is_serial_op = (op == ReductionOperation::ARG_IDX_MAX || op == ReductionOperation::ARG_IDX_MIN || is_data_type_quantized(input->info()->data_type())); + const bool is_serial_op = (op == ReductionOperation::ARG_IDX_MAX || op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::MIN || is_data_type_quantized(input->info()->data_type())); switch(axis) { case 0: @@ -258,7 +260,7 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - const bool is_serial_op = (_op == ReductionOperation::ARG_IDX_MAX || _op == ReductionOperation::ARG_IDX_MIN || is_data_type_quantized(_input->info()->data_type())); + const bool is_serial_op = (_op == ReductionOperation::ARG_IDX_MAX || _op == ReductionOperation::ARG_IDX_MIN || _op == ReductionOperation::MIN || is_data_type_quantized(_input->info()->data_type())); switch(_reduction_axis) { case 0: diff --git a/src/runtime/CL/functions/CLReductionOperation.cpp b/src/runtime/CL/functions/CLReductionOperation.cpp index 9f99d2db6f..2e48897323 100644 --- a/src/runtime/CL/functions/CLReductionOperation.cpp +++ b/src/runtime/CL/functions/CLReductionOperation.cpp @@ -105,6 +105,11 @@ Status CLReductionOperation::validate(const ITensorInfo *input, const ITensorInf intermediate_kernel_op = ReductionOperation::PROD; last_kernel_op = ReductionOperation::PROD; break; + case ReductionOperation::MIN: + first_kernel_op = ReductionOperation::MIN; + intermediate_kernel_op = ReductionOperation::MIN; + last_kernel_op = ReductionOperation::MIN; + break; default: ARM_COMPUTE_ERROR("Not supported"); } @@ -179,6 +184,33 @@ void CLReductionOperation::configure(ICLTensor *input, ICLTensor *output, unsign last_kernel_op = ReductionOperation::PROD; pixelValue = PixelValue(1, input->info()->data_type()); break; + case ReductionOperation::MIN: + first_kernel_op = ReductionOperation::MIN; + intermediate_kernel_op = ReductionOperation::MIN; + last_kernel_op = ReductionOperation::MIN; + switch(input->info()->data_type()) + { + case DataType::F32: + { + pixelValue = PixelValue(std::numeric_limits::max()); + break; + } + case DataType::F16: + { + pixelValue = PixelValue(static_cast(65504.0f)); + break; + } + case DataType::QASYMM8: + { + pixelValue = PixelValue(255, input->info()->data_type(), input->info()->quantization_info()); + break; + } + default: + { + ARM_COMPUTE_ERROR("Unsupported DataType"); + } + } + break; default: ARM_COMPUTE_ERROR("Not supported"); } diff --git a/tests/validation/CL/ReductionOperation.cpp b/tests/validation/CL/ReductionOperation.cpp index 79308c8229..8fc2911a2b 100644 --- a/tests/validation/CL/ReductionOperation.cpp +++ b/tests/validation/CL/ReductionOperation.cpp @@ -51,7 +51,9 @@ RelativeTolerance rel_tolerance_f16(0.2f); const auto ReductionOperations = framework::dataset::make("ReductionOperation", { ReductionOperation::SUM, - ReductionOperation::PROD + ReductionOperation::PROD, + ReductionOperation::MIN, + }); } // namespace -- cgit v1.2.1