From daaa1fa506834c9d9ff44e5b38f05781ec416912 Mon Sep 17 00:00:00 2001
From: Pablo Tello <pablo.tello@arm.com>
Date: Wed, 25 Oct 2017 11:40:50 +0100
Subject: COMPMID-642: Fixed mismatches in NEDeconvolutionLayer.

Mismatches are caused by an incorrect implementation of round() in support/ToolchainSupport.h.

The current implementation produces incorrect results in aarch64 linux targets( e.g round(0.5) = 0)

Change-Id: I5448d9860f9994745466ba074ade92467508817a
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/93032
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
---
 scripts/clang_tidy_rules.py                 | 1 +
 tests/validation/CPP/DeconvolutionLayer.cpp | 9 ++++-----
 2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/scripts/clang_tidy_rules.py b/scripts/clang_tidy_rules.py
index 72eae6f417..e5e357e59d 100755
--- a/scripts/clang_tidy_rules.py
+++ b/scripts/clang_tidy_rules.py
@@ -89,6 +89,7 @@ def filter_clang_tidy_lines( lines ):
                ("ConvolutionLayer.cpp" in line and "move constructors should be marked noexcept" in line) or
                ("parameter 'memory_manager' is unused" in line) or
                ("parameter 'memory_manager' is copied for each invocation but only used as a const reference" in line) or
+               ("DeconvolutionLayer.cpp" in line and "casting (double + 0.5) to integer leads to incorrect rounding; consider using lround" in line) or
                "3rdparty" in line):
                 print_context=False
                 continue
diff --git a/tests/validation/CPP/DeconvolutionLayer.cpp b/tests/validation/CPP/DeconvolutionLayer.cpp
index 34f3d10edb..82c2188ade 100644
--- a/tests/validation/CPP/DeconvolutionLayer.cpp
+++ b/tests/validation/CPP/DeconvolutionLayer.cpp
@@ -49,8 +49,8 @@ SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTens
     const int          width_scaled  = scaled.shape().x();
     const int          height_scaled = scaled.shape().y();
     const int          num_2d_slices = src.shape().total_size() / (width_in * height_in);
-    const auto         width_ratio   = static_cast<float>(width_in) / static_cast<float>(width_scaled);
-    const auto         height_ratio  = static_cast<float>(height_in) / static_cast<float>(height_scaled);
+    const float        width_ratio   = static_cast<float>(width_in) / static_cast<float>(width_scaled);
+    const float        height_ratio  = static_cast<float>(height_in) / static_cast<float>(height_scaled);
     const int          ax            = a.first;  // The number of zeros added to right edge of the input.
     const int          ay            = a.second; // The number of zeros added to bottom edge of the input.
     const unsigned int kernel_size   = weights.shape().x();
@@ -59,7 +59,6 @@ SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTens
     const int transposed_convolution_pady = kernel_size - info.pad().second - 1;
     const int stridex                     = info.stride().first;
     const int stridey                     = info.stride().second;
-
     for(int j = 0; j < scaled.num_elements(); ++j)
     {
         scaled[j] = T(0);
@@ -83,8 +82,8 @@ SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTens
                 {
                     if(in_bounds)
                     {
-                        const int in_scaled_x = support::cpp11::round(x_src);
-                        const int in_scaled_y = support::cpp11::round(y_src);
+                        const int in_scaled_x = (x_src < 0.f) ? static_cast<int>(x_src - 0.5f) : static_cast<int>(x_src + 0.5f);
+                        const int in_scaled_y = (y_src < 0.f) ? static_cast<int>(y_src - 0.5f) : static_cast<int>(y_src + 0.5f);
                         const T *in          = src.data() + offset_slice_in + in_scaled_x + in_scaled_y * width_in;
                         *out                  = *in;
                     }
-- 
cgit v1.2.1