From 338ef4699735db16d346e52f05b822f8fd5e3263 Mon Sep 17 00:00:00 2001 From: Gunes Bayir Date: Tue, 18 Jul 2023 15:57:23 +0100 Subject: Optimize CLReduce for Min/Max Axis=0 Resolves: COMPMID-6400 Change-Id: Id9935f9727f77a824afc75c35f044e3f5c173e0d Signed-off-by: Gunes Bayir Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10120 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins --- src/core/CL/cl_kernels/common/reduction_operation.cl | 9 +++++++-- src/core/CL/cl_kernels/helpers.h | 10 ++++++++++ src/core/CL/kernels/CLReductionOperationKernel.cpp | 3 +++ src/core/Utils.cpp | 2 +- 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/core/CL/cl_kernels/common/reduction_operation.cl b/src/core/CL/cl_kernels/common/reduction_operation.cl index 9f2c6e23b5..1cb6664078 100644 --- a/src/core/CL/cl_kernels/common/reduction_operation.cl +++ b/src/core/CL/cl_kernels/common/reduction_operation.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -47,6 +47,8 @@ #define sum(in0, in1, size) (in0 + SUM_REDUCE(in1, size)) #define square_sum(in0, in1, size) (in0 + SUM_REDUCE((in1 * in1), size)) #define product(in0, in1, size) (in0 * PROD_REDUCE(in1, size)) +#define min_(in0, in1, size) (min(in0, MIN_REDUCE(in1, size))) +#define max_(in0, in1, size) (max(in0, MAX_REDUCE(in1, size))) /** This kernel performs parallel reduction given an operation on x-axis. * @@ -79,12 +81,15 @@ __kernel void reduction_operation_x( __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + y * input_stride_y + z * input_stride_z; __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + y * output_stride_y + z * output_stride_z; +#if !defined(MIN) && !defined(MAX) #if defined(PROD) DATA_TYPE res = (DATA_TYPE)1; #else // defined(PROD) DATA_TYPE res = (DATA_TYPE)0; #endif // defined(PROD) - +#else // #if !defined(MIN) && !defined(MAX) + DATA_TYPE res = *((__global DATA_TYPE *)input_addr); +#endif // #if defined(MIN) || defined(MAX) int x = 0; for(; x <= (WIDTH - VEC_SIZE); x += VEC_SIZE) diff --git a/src/core/CL/cl_kernels/helpers.h b/src/core/CL/cl_kernels/helpers.h index e0fd8dc3e3..b2ceaf92f3 100644 --- a/src/core/CL/cl_kernels/helpers.h +++ b/src/core/CL/cl_kernels/helpers.h @@ -824,6 +824,16 @@ #define MAX_REDUCE_STR(x, size) max_reduce_##size(x) #define MAX_REDUCE(x, size) MAX_REDUCE_STR(x, size) +#define min_reduce_1(x) (x) +#define min_reduce_2(x) min(((x).s0), ((x).s1)) +#define min_reduce_3(x) min(min_reduce_2((x).s01), ((x).s2)) +#define min_reduce_4(x) min(min_reduce_2((x).s01), min_reduce_2((x).s23)) +#define min_reduce_8(x) min(min_reduce_4((x).s0123), min_reduce_4((x).s4567)) +#define min_reduce_16(x) min(min_reduce_8((x).s01234567), min_reduce_8((x).s89ABCDEF)) + +#define MIN_REDUCE_STR(x, size) min_reduce_##size(x) +#define MIN_REDUCE(x, size) MIN_REDUCE_STR(x, size) + #define VECTOR_DECLARATION(name) \ __global uchar *name##_ptr, \ uint name##_stride_x, \ diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp index ee60b8e1df..e5cfb997ca 100644 --- a/src/core/CL/kernels/CLReductionOperationKernel.cpp +++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp @@ -143,7 +143,10 @@ void CLReductionOperationKernel::configure(const CLCompileContext &compile_conte build_opts.add_option(("-DOPERATION=sum")); break; case ReductionOperation::MIN: + build_opts.add_option(("-DOPERATION=min_")); + break; case ReductionOperation::MAX: + build_opts.add_option(("-DOPERATION=max_")); break; case ReductionOperation::PROD: build_opts.add_option(("-DOPERATION=product")); diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp index 0701ee7c90..1ca7adb3a8 100644 --- a/src/core/Utils.cpp +++ b/src/core/Utils.cpp @@ -390,7 +390,7 @@ bool needs_serialized_reduction(ReductionOperation op, DataType dt, unsigned int const bool is_quantized_type = is_data_type_quantized(dt); const bool is_first_dim = (axis == 0); - return !is_first_dim || is_min_max || is_quantized_type; + return !is_first_dim || (is_quantized_type && !is_min_max); } QuantizationInfo get_softmax_output_quantization_info(DataType input_type, bool is_log) -- cgit v1.2.1