COMPMID-837: Fixed remap tests failures in Valgrind.

Some minor improvements in the test fixture, for example making sure the values in the mapx and mapy tensors are in the range of [-5, in_width+5] and [-5,in_height]. Tolerance was changed to 0, no mismatches expected. Change-Id: I2fad06defb293bf9fdd1988799b19547c102dee5 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/118044 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
author: Pablo Tello <pablo.tello@arm.com> 2018-01-23 08:31:41 +0000
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:45:00 +0000
commit: fc1ffe626278e75ba11c803280a6ef46a5bd1ad6 (patch)
tree: 4572cd84868fe0322884113b85575bff942ae924 /src/core/NEON/kernels/NERemapKernel.cpp
parent: 4df057551194ba079731f097ce840cc320353199 (diff)
download: ComputeLibrary-fc1ffe626278e75ba11c803280a6ef46a5bd1ad6.tar.gz
1 files changed, 23 insertions, 27 deletions
diff --git a/src/core/NEON/kernels/NERemapKernel.cpp b/src/core/NEON/kernels/NERemapKernel.cpp
index 9b8d931b39..66115bb8fc 100644
--- a/src/core/NEON/kernels/NERemapKernel.cpp
+++ b/src/core/NEON/kernels/NERemapKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,7 +46,7 @@ namespace
 {
 inline int32x4_t offset_nearest_interpolation(const float *mapx_ptr, const float *mapy_ptr, const float32x4_t &width, const float32x4_t &height, const int32x4_t &stride)
 {
-    static const float32x4_t lowerxy = vdupq_n_f32(-1.0f);
+    const float32x4_t lowerxy = vdupq_n_f32(-1.f);
 
     float32x4_t x = vld1q_f32(mapx_ptr);
     float32x4_t y = vld1q_f32(mapy_ptr);
@@ -113,11 +113,10 @@ void NERemapKernel::configure(const ITensor *input, const ITensor *map_x, const
     AccessWindowStatic input_access(input->info(), -border_size().left, -border_size().top, access_right, input->info()->dimension(1) + border_size().bottom);
 
     AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal mapx_access(map_x->info(), 0, 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal mapy_access(map_y->info(), 0, 0, num_elems_processed_per_iteration);
 
-    update_window_and_padding(win, input_access,
-                              AccessWindowRectangle(map_x->info(), 0, 0, num_elems_processed_per_iteration, 1),
-                              AccessWindowRectangle(map_y->info(), 0, 0, num_elems_processed_per_iteration, 1),
-                              output_access);
+    update_window_and_padding(win, input_access, mapx_access, mapy_access, output_access);
 
     output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
 
@@ -152,27 +151,24 @@ void NERemapKernel::remap_nearest(const Window &window)
         const int32x4_t offset2 = offset_nearest_interpolation(mapx_ptr + 8, mapy_ptr + 8, width, height, in_stride);
         const int32x4_t offset3 = offset_nearest_interpolation(mapx_ptr + 12, mapy_ptr + 12, width, height, in_stride);
 
-        uint8x8_t tmp0 = vdup_n_u8(0);
-        tmp0           = vset_lane_u8(in_ptr[vgetq_lane_s32(offset0, 0)], tmp0, 0);
-        tmp0           = vset_lane_u8(in_ptr[vgetq_lane_s32(offset0, 1)], tmp0, 1);
-        tmp0           = vset_lane_u8(in_ptr[vgetq_lane_s32(offset0, 2)], tmp0, 2);
-        tmp0           = vset_lane_u8(in_ptr[vgetq_lane_s32(offset0, 3)], tmp0, 3);
-        tmp0           = vset_lane_u8(in_ptr[vgetq_lane_s32(offset1, 0)], tmp0, 4);
-        tmp0           = vset_lane_u8(in_ptr[vgetq_lane_s32(offset1, 1)], tmp0, 5);
-        tmp0           = vset_lane_u8(in_ptr[vgetq_lane_s32(offset1, 2)], tmp0, 6);
-        tmp0           = vset_lane_u8(in_ptr[vgetq_lane_s32(offset1, 3)], tmp0, 7);
-
-        uint8x8_t tmp1 = vdup_n_u8(0);
-        tmp1           = vset_lane_u8(in_ptr[vgetq_lane_s32(offset2, 0)], tmp1, 0);
-        tmp1           = vset_lane_u8(in_ptr[vgetq_lane_s32(offset2, 1)], tmp1, 1);
-        tmp1           = vset_lane_u8(in_ptr[vgetq_lane_s32(offset2, 2)], tmp1, 2);
-        tmp1           = vset_lane_u8(in_ptr[vgetq_lane_s32(offset2, 3)], tmp1, 3);
-        tmp1           = vset_lane_u8(in_ptr[vgetq_lane_s32(offset3, 0)], tmp1, 4);
-        tmp1           = vset_lane_u8(in_ptr[vgetq_lane_s32(offset3, 1)], tmp1, 5);
-        tmp1           = vset_lane_u8(in_ptr[vgetq_lane_s32(offset3, 2)], tmp1, 6);
-        tmp1           = vset_lane_u8(in_ptr[vgetq_lane_s32(offset3, 3)], tmp1, 7);
-
-        vst1q_u8(out.ptr(), vcombine_u8(tmp0, tmp1));
+        uint8x16_t tmp = vdupq_n_u8(0);
+        tmp            = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset0, 0)], tmp, 0);
+        tmp            = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset0, 1)], tmp, 1);
+        tmp            = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset0, 2)], tmp, 2);
+        tmp            = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset0, 3)], tmp, 3);
+        tmp            = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset1, 0)], tmp, 4);
+        tmp            = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset1, 1)], tmp, 5);
+        tmp            = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset1, 2)], tmp, 6);
+        tmp            = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset1, 3)], tmp, 7);
+        tmp            = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset2, 0)], tmp, 8);
+        tmp            = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset2, 1)], tmp, 9);
+        tmp            = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset2, 2)], tmp, 10);
+        tmp            = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset2, 3)], tmp, 11);
+        tmp            = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset3, 0)], tmp, 12);
+        tmp            = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset3, 1)], tmp, 13);
+        tmp            = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset3, 2)], tmp, 14);
+        tmp            = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset3, 3)], tmp, 15);
+        vst1q_u8(out.ptr(), tmp);
     },
     in, out, mapx, mapy);
 }
author	Pablo Tello <pablo.tello@arm.com>	2018-01-23 08:31:41 +0000
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:45:00 +0000
commit	fc1ffe626278e75ba11c803280a6ef46a5bd1ad6 (patch)
tree	4572cd84868fe0322884113b85575bff942ae924 /src/core/NEON/kernels/NERemapKernel.cpp
parent	4df057551194ba079731f097ce840cc320353199 (diff)
download	ComputeLibrary-fc1ffe626278e75ba11c803280a6ef46a5bd1ad6.tar.gz