From 36118524d2f387be53dc95e5eebabfcb3ec21f31 Mon Sep 17 00:00:00 2001
From: Pablo Marquez Tello <pablo.tello@arm.com>
Date: Mon, 18 Oct 2021 10:50:31 +0100
Subject: Fix precision issue in ChannelShuffleKernel

* Fixed the issue in NHWC Neon
* Fixed the rounding error in CL
* Added a new test case to reproduce the problem
* Resolves COMPMID-4831

Change-Id: I1613168cad580ca5acefe8ba340130af05cffaff
Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6454
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 src/core/CL/cl_kernels/nchw/channel_shuffle.cl        | 4 ++--
 src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp | 2 +-
 tests/datasets/ChannelShuffleLayerDataset.h           | 3 ++-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/core/CL/cl_kernels/nchw/channel_shuffle.cl b/src/core/CL/cl_kernels/nchw/channel_shuffle.cl
index 57d82e1e6f..84396e122f 100644
--- a/src/core/CL/cl_kernels/nchw/channel_shuffle.cl
+++ b/src/core/CL/cl_kernels/nchw/channel_shuffle.cl
@@ -33,7 +33,7 @@
 
 #define DIV_MOD_UINT(x, y, div_res, mod_res)                \
     ({                                                      \
-        div_res = (uint)((x) * (float)(1.0f / (float)(y))); \
+        div_res = (uint)((x)/(y)); \
         uint r  = div_res * (y);                            \
         mod_res = (x)-r;                                    \
     })
@@ -100,4 +100,4 @@ __kernel void channel_shuffle_nchw(TENSOR4D_DECLARATION(src),
     (u1, 0, (__global DATA_TYPE *)(output_ptr + 1 * dst_stride_y));
 }
 
-#endif // defined(DATA_TYPE) && defined(VEC_SIZE) && defined(NUM_GROUPS) && defined(K) && defined(SRC_DIM_Z)
\ No newline at end of file
+#endif // defined(DATA_TYPE) && defined(VEC_SIZE) && defined(NUM_GROUPS) && defined(K) && defined(SRC_DIM_Z)
diff --git a/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp
index 85ae974883..64da1f2262 100644
--- a/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp
@@ -68,7 +68,7 @@ void channel_shuffle_nhwc(const ITensor *input, ITensor *output, unsigned int nu
 
     const size_t       element_size = input->info()->element_size();
     const unsigned int K            = input->info()->dimension(channel_idx) / num_groups;
-    const float        rK           = 1.f / K;
+    const double       rK           = 1.0 / K;
 
     Iterator in(input, window);
 
diff --git a/tests/datasets/ChannelShuffleLayerDataset.h b/tests/datasets/ChannelShuffleLayerDataset.h
index afab893234..a851480fa1 100644
--- a/tests/datasets/ChannelShuffleLayerDataset.h
+++ b/tests/datasets/ChannelShuffleLayerDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018, 2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -105,6 +105,7 @@ class SmallRandomChannelShuffleLayerDataset final : public ChannelShuffleLayerDa
 public:
     SmallRandomChannelShuffleLayerDataset()
     {
+        add_config(TensorShape(1U, 1U, 605U, 16U), 5);
         add_config(TensorShape(15U, 16U, 4U, 12U), 2);
         add_config(TensorShape(21U, 11U, 12U, 7U), 4);
         add_config(TensorShape(21U, 11U, 12U, 7U), 6);
-- 
cgit v1.2.1