diff options
author | Michalis Spyrou <michalis.spyrou@arm.com> | 2018-10-11 17:33:32 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:55:45 +0000 |
commit | 8aaf93e8c12ce93d3d0082d4f4b70376f15536da (patch) | |
tree | 0922f3dde6fafae181e101df315ef36007801850 /src/core/CL/cl_kernels/reduction_operation.cl | |
parent | c93691717a6e7ca67e32b4dedd233b8c63b6daf2 (diff) | |
download | ComputeLibrary-8aaf93e8c12ce93d3d0082d4f4b70376f15536da.tar.gz |
COMPMID-1632 Add CLL2NormalizationLayer for NHWC and FP32
Change-Id: Iae22554d5fe893fd22a000eab5bfd8275ea06eb3
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/154102
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: bsgcomp <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/reduction_operation.cl')
-rw-r--r-- | src/core/CL/cl_kernels/reduction_operation.cl | 21 |
1 files changed, 18 insertions, 3 deletions
diff --git a/src/core/CL/cl_kernels/reduction_operation.cl b/src/core/CL/cl_kernels/reduction_operation.cl index c1be4472a7..d76e12ac04 100644 --- a/src/core/CL/cl_kernels/reduction_operation.cl +++ b/src/core/CL/cl_kernels/reduction_operation.cl @@ -189,7 +189,12 @@ __kernel void reduction_operation_y( for(unsigned int y = 0; y < HEIGHT; ++y) { - res += CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)); + VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16) + in = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)); +#if defined(SUM_SQUARE) + in *= in; +#endif // SQRSUM + res += in; } #if defined(MEAN) @@ -236,7 +241,12 @@ __kernel void reduction_operation_z( for(unsigned int z = 0; z < DEPTH; ++z) { - res += CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)); + VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16) + in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)); +#if defined(SUM_SQUARE) + in *= in; +#endif // SQRSUM + res += in; } #if defined(MEAN) @@ -288,7 +298,12 @@ __kernel void reduction_operation_w( for(unsigned int w = 0; w < BATCH; ++w) { - res += CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, w)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)); + VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16) + in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, w)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)); +#if defined(SUM_SQUARE) + in *= in; +#endif // SQRSUM + res += in; } #if defined(MEAN) |