COMPMID-1736: Fixed out-of-bound write in CLIm2Col

The issue was related to CLIm2Col when the number of input channels was less than the number of elements processed by each thread. The bug has been fixed in the validate_and_configure_window() function setting the correct number of elements accessed in the output tensor. Also fixed an issue GEMM3D when we have a single output channel Change-Id: I094292d0c7662599c4a4c3916ec5f5821df5faef
author: Gian Marco Iodice <gianmarco.iodice@arm.com> 2018-11-05 14:26:32 +0000
committer: Gian Marco Iodice <gianmarco.iodice@arm.com> 2018-11-08 13:31:33 +0000
commit: 3139f03a74ede3b3bd7cfc6ff219e6c9bc556632 (patch)
tree: cd67b8c9ec1688fcf739c576553b10e435e6938e /src/core
parent: 421405b6a21b124288a750e2da26dc01eb7391cb (diff)
download: ComputeLibrary-3139f03a74ede3b3bd7cfc6ff219e6c9bc556632.tar.gz
4 files changed, 20 insertions, 11 deletions
diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
index 73b1d41eb1..b2fb3e0278 100644
--- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
@@ -112,7 +112,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITe
     unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0];
     unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1];
     bool          reinterpret_input_as_3d             = reshape_info.reinterpret_input_as_3d();
-    bool          reinterpret_output_as_3d            = (reshape_info.depth_output_gemm3d() != 1);
+    bool          reinterpret_output_as_3d            = (reshape_info.depth_output_gemm3d() != 0);
 
     Window win{};
     Window win_out{};
@@ -227,7 +227,7 @@ void CLGEMMLowpMatrixMultiplyKernel::configure(const ICLTensor *input0, const IC
     _input1                   = input1;
     _output                   = output;
     _reinterpret_input_as_3d  = reshape_info.reinterpret_input_as_3d();
-    _reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 1);
+    _reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 0);
 
     // In case both input and output have to be reinterpreted as 3D tensors,
     // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false.
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
index 715edae606..5e02dda9e3 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
@@ -110,7 +110,7 @@ inline std::pair<Status, Window> validate_and_configure_window(ITensorInfo *inpu
     unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0];
     unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1];
     bool           reinterpret_input_as_3d             = reshape_info.reinterpret_input_as_3d();
-    bool           reinterpret_output_as_3d            = (reshape_info.depth_output_gemm3d() != 1);
+    bool           reinterpret_output_as_3d            = (reshape_info.depth_output_gemm3d() != 0);
 
     // In case both input and output have to be reinterpreted as 3D tensors,
     // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false.
@@ -227,7 +227,7 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen
     _input1                   = input1;
     _output                   = output;
     _reinterpret_input_as_3d  = reshape_info.reinterpret_input_as_3d();
-    _reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 1);
+    _reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 0);
 
     // In case both input and output have to be reinterpreted as 3D tensors,
     // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false.
diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp
index 0ba0d0e2f9..54ef23f2a2 100644
--- a/src/core/CL/kernels/CLIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLIm2ColKernel.cpp
@@ -109,7 +109,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
         const int yin_end   = input->dimension(1);
 
         const int xout_start = 0;
-        const int xout_end   = input->dimension(0) < num_elems_processed_per_iteration ? ceil_to_multiple(output->dimension(0), num_elems_processed_per_iteration) : output->dimension(0);
+        const int xout_end   = input->dimension(0) < num_elems_processed_per_iteration ? output->dimension(0) + (num_elems_processed_per_iteration - input->dimension(0)) : output->dimension(0);
         const int yout_start = 0;
         const int yout_end   = output->dimension(1);
 
diff --git a/src/core/utils/quantization/AsymmHelpers.cpp b/src/core/utils/quantization/AsymmHelpers.cpp
index 8bb6d8e173..ea9ba776a9 100644
--- a/src/core/utils/quantization/AsymmHelpers.cpp
+++ b/src/core/utils/quantization/AsymmHelpers.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,21 +30,30 @@
 using namespace arm_compute::quantization;
 
 constexpr int64_t fixed_point_one_Q0 = (1ll << 31);
+constexpr float   epsilon            = 0.00001f;
 
-arm_compute::Status arm_compute::quantization::calculate_quantized_multiplier_less_than_one(double multiplier,
+arm_compute::Status arm_compute::quantization::calculate_quantized_multiplier_less_than_one(float multiplier,
                                                                                             int   *quant_multiplier,
                                                                                             int   *right_shift)
 {
     ARM_COMPUTE_RETURN_ERROR_ON(quant_multiplier == nullptr);
     ARM_COMPUTE_RETURN_ERROR_ON(right_shift == nullptr);
-    ARM_COMPUTE_RETURN_ERROR_ON(multiplier < 0);
-    ARM_COMPUTE_RETURN_ERROR_ON(multiplier >= 1);
-    if(multiplier == 0)
+    ARM_COMPUTE_RETURN_ERROR_ON(multiplier < -epsilon);
+    ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f + epsilon);
+    if(std::fabs(1.0f - multiplier) < epsilon)
+    {
+        *quant_multiplier = 1;
+        *right_shift      = 0;
+        return arm_compute::Status{};
+    }
+
+    if(std::fabs(0.0f - multiplier) < epsilon)
     {
         *quant_multiplier = 0;
         *right_shift      = 0;
         return arm_compute::Status{};
     }
+
     const double q = std::frexp(multiplier, right_shift);
     *right_shift *= -1;
     auto q_fixed = static_cast<int64_t>(round(q * fixed_point_one_Q0));
@@ -61,7 +70,7 @@ arm_compute::Status arm_compute::quantization::calculate_quantized_multiplier_le
     return arm_compute::Status{};
 }
 
-arm_compute::Status arm_compute::quantization::calculate_quantized_multiplier_greater_than_one(double multiplier,
+arm_compute::Status arm_compute::quantization::calculate_quantized_multiplier_greater_than_one(float multiplier,
                                                                                                int   *quantized_multiplier,
                                                                                                int   *left_shift)
 {
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	2018-11-05 14:26:32 +0000
committer	Gian Marco Iodice <gianmarco.iodice@arm.com>	2018-11-08 13:31:33 +0000
commit	3139f03a74ede3b3bd7cfc6ff219e6c9bc556632 (patch)
tree	cd67b8c9ec1688fcf739c576553b10e435e6938e /src/core
parent	421405b6a21b124288a750e2da26dc01eb7391cb (diff)
download	ComputeLibrary-3139f03a74ede3b3bd7cfc6ff219e6c9bc556632.tar.gz