aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/helpers.h
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2021-04-28 16:11:51 +0100
committerGiorgio Arena <giorgio.arena@arm.com>2021-04-29 17:06:28 +0000
commit3ecf9fefa6f6299a0736599f150d4791cc8345d9 (patch)
tree749d73c8496d152600f528e442b4e4f11a81621e /src/core/CL/cl_kernels/helpers.h
parent8ed3ac11352d1ebb2c69787432fa5893997f1c50 (diff)
downloadComputeLibrary-3ecf9fefa6f6299a0736599f150d4791cc8345d9.tar.gz
Remove OpenCL padding: CLReductionOperationKernel
Change the parallel implementation across the X, now every thread computes one row Add missing test for MEAN_SUM Make reduction on any axis != 0 work with num_channels > 1 Resolve COMPMID-3917 Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Change-Id: Ib0f99540104e3c253bcd1ea637833db533f5e76e Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5522 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/helpers.h')
-rw-r--r--src/core/CL/cl_kernels/helpers.h10
1 files changed, 10 insertions, 0 deletions
diff --git a/src/core/CL/cl_kernels/helpers.h b/src/core/CL/cl_kernels/helpers.h
index 2eae5ee1c9..6cd76373d2 100644
--- a/src/core/CL/cl_kernels/helpers.h
+++ b/src/core/CL/cl_kernels/helpers.h
@@ -567,6 +567,16 @@
#define SUM_REDUCE_STR(x, size) sum_reduce_##size(x)
#define SUM_REDUCE(x, size) SUM_REDUCE_STR(x, size)
+#define prod_reduce_1(x) (x)
+#define prod_reduce_2(x) ((x).s0) * ((x).s1)
+#define prod_reduce_3(x) prod_reduce_2((x).s01) * ((x).s2)
+#define prod_reduce_4(x) prod_reduce_2((x).s01) * prod_reduce_2((x).s23)
+#define prod_reduce_8(x) prod_reduce_4((x).s0123) * prod_reduce_4((x).s4567)
+#define prod_reduce_16(x) prod_reduce_8((x).s01234567) * prod_reduce_8((x).s89ABCDEF)
+
+#define PROD_REDUCE_STR(x, size) prod_reduce_##size(x)
+#define PROD_REDUCE(x, size) PROD_REDUCE_STR(x, size)
+
#define max_reduce_1(x) (x)
#define max_reduce_2(x) max(((x).s0), ((x).s1))
#define max_reduce_3(x) max(max_reduce_2((x).s01), ((x).s2))