diff options
author | Gunes Bayir <gunes.bayir@arm.com> | 2023-07-18 15:57:23 +0100 |
---|---|---|
committer | Gunes Bayir <gunes.bayir@arm.com> | 2023-08-14 09:57:15 +0000 |
commit | 338ef4699735db16d346e52f05b822f8fd5e3263 (patch) | |
tree | 91ffe06a4a3960ee577673a47be0a99629012679 /src/core/CL/cl_kernels/common/reduction_operation.cl | |
parent | 633ebd18d982496a6a626df3b8336f610ab09eb4 (diff) | |
download | ComputeLibrary-338ef4699735db16d346e52f05b822f8fd5e3263.tar.gz |
Optimize CLReduce for Min/Max Axis=0
Resolves: COMPMID-6400
Change-Id: Id9935f9727f77a824afc75c35f044e3f5c173e0d
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10120
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/common/reduction_operation.cl')
-rw-r--r-- | src/core/CL/cl_kernels/common/reduction_operation.cl | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/src/core/CL/cl_kernels/common/reduction_operation.cl b/src/core/CL/cl_kernels/common/reduction_operation.cl index 9f2c6e23b5..1cb6664078 100644 --- a/src/core/CL/cl_kernels/common/reduction_operation.cl +++ b/src/core/CL/cl_kernels/common/reduction_operation.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -47,6 +47,8 @@ #define sum(in0, in1, size) (in0 + SUM_REDUCE(in1, size)) #define square_sum(in0, in1, size) (in0 + SUM_REDUCE((in1 * in1), size)) #define product(in0, in1, size) (in0 * PROD_REDUCE(in1, size)) +#define min_(in0, in1, size) (min(in0, MIN_REDUCE(in1, size))) +#define max_(in0, in1, size) (max(in0, MAX_REDUCE(in1, size))) /** This kernel performs parallel reduction given an operation on x-axis. * @@ -79,12 +81,15 @@ __kernel void reduction_operation_x( __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + y * input_stride_y + z * input_stride_z; __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + y * output_stride_y + z * output_stride_z; +#if !defined(MIN) && !defined(MAX) #if defined(PROD) DATA_TYPE res = (DATA_TYPE)1; #else // defined(PROD) DATA_TYPE res = (DATA_TYPE)0; #endif // defined(PROD) - +#else // #if !defined(MIN) && !defined(MAX) + DATA_TYPE res = *((__global DATA_TYPE *)input_addr); +#endif // #if defined(MIN) || defined(MAX) int x = 0; for(; x <= (WIDTH - VEC_SIZE); x += VEC_SIZE) |