aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/reduction_operation.cl
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2018-10-11 17:33:32 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:55:45 +0000
commit8aaf93e8c12ce93d3d0082d4f4b70376f15536da (patch)
tree0922f3dde6fafae181e101df315ef36007801850 /src/core/CL/cl_kernels/reduction_operation.cl
parentc93691717a6e7ca67e32b4dedd233b8c63b6daf2 (diff)
downloadComputeLibrary-8aaf93e8c12ce93d3d0082d4f4b70376f15536da.tar.gz
COMPMID-1632 Add CLL2NormalizationLayer for NHWC and FP32
Change-Id: Iae22554d5fe893fd22a000eab5bfd8275ea06eb3 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/154102 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: bsgcomp <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/reduction_operation.cl')
-rw-r--r--src/core/CL/cl_kernels/reduction_operation.cl21
1 files changed, 18 insertions, 3 deletions
diff --git a/src/core/CL/cl_kernels/reduction_operation.cl b/src/core/CL/cl_kernels/reduction_operation.cl
index c1be4472a7..d76e12ac04 100644
--- a/src/core/CL/cl_kernels/reduction_operation.cl
+++ b/src/core/CL/cl_kernels/reduction_operation.cl
@@ -189,7 +189,12 @@ __kernel void reduction_operation_y(
for(unsigned int y = 0; y < HEIGHT; ++y)
{
- res += CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
+ VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
+ in = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
+#if defined(SUM_SQUARE)
+ in *= in;
+#endif // SQRSUM
+ res += in;
}
#if defined(MEAN)
@@ -236,7 +241,12 @@ __kernel void reduction_operation_z(
for(unsigned int z = 0; z < DEPTH; ++z)
{
- res += CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
+ VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
+ in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
+#if defined(SUM_SQUARE)
+ in *= in;
+#endif // SQRSUM
+ res += in;
}
#if defined(MEAN)
@@ -288,7 +298,12 @@ __kernel void reduction_operation_w(
for(unsigned int w = 0; w < BATCH; ++w)
{
- res += CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, w)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
+ VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
+ in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, w)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
+#if defined(SUM_SQUARE)
+ in *= in;
+#endif // SQRSUM
+ res += in;
}
#if defined(MEAN)