COMPMID-3531: fix index offset overflows in NEDirectConvolutionLayerKernel

When a large input and kernel is used, the computation of "max_offset" variable can overflow. Adjust types of the variable as well as the variable compared with for consistency. The test spotted the overflow is added to nightly suite. Change-Id: I2f114e4b49167889a6d3729c71823c089d6f42e3 Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3527 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-by: Manuel Bottini <manuel.bottini@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Sang-Hoon Park <sang-hoon.park@arm.com> 2020-07-08 11:06:30 +0100
committer: Sang-Hoon Park <sang-hoon.park@arm.com> 2020-07-13 08:03:32 +0000
commit: 38515425707b239a1d02d3a4f480a9d97efbb9ba (patch)
tree: 1e68a4035e934d3e5ab829c0124bf09757e64a38 /src
parent: 3ef9b5fb7c3f393a32977250ce0c4cb5d45ae555 (diff)
download: ComputeLibrary-38515425707b239a1d02d3a4f480a9d97efbb9ba.tar.gz
1 files changed, 6 insertions, 6 deletions
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
index 91b03687d8..559b67316f 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
@@ -979,7 +979,7 @@ public:
         //  |__________________|
         //  |     pad_bottom   |
         //  |******************|
-        const int max_offset = input_stride_z * input_depth - (input->info()->padding().bottom + input->info()->padding().top) * input_stride_y;
+        const int64_t max_offset = input_stride_z * input_depth - (input->info()->padding().bottom + input->info()->padding().top) * input_stride_y;
         execute_window_loop(window_k, [&](const Coordinates & id_k) // loop on the batch size
         {
 
@@ -1002,34 +1002,34 @@ public:
                 for(int x = 0; x < input_width; x += num_elems_read_per_iteration)
                 {
                     // z == 0
-                    auto in_z   = static_cast<int>(id.z() * conv_stride_y - conv_pad_top);
+                    auto in_z   = static_cast<int64_t>(id.z() * conv_stride_y - conv_pad_top);
                     in_z        = std::min(static_cast<unsigned int>(in_z), static_cast<unsigned int>(input_depth));
                     auto offset = y_offset + in_z * input_stride_z;
                     offset      = std::min(offset, max_offset);
                     convolve_row1x9_nhwc(in_ptr + offset + x, weights_ptr + 0 * kernel_stride_z + x, input_stride_y, kernel_stride_y, out0, out1, out2, out3);
 
                     // z == 1
-                    in_z   = static_cast<int>(id.z() * conv_stride_y - conv_pad_top + 1);
+                    in_z   = static_cast<int64_t>(id.z() * conv_stride_y - conv_pad_top + 1);
                     in_z   = std::min(static_cast<unsigned int>(in_z), static_cast<unsigned int>(input_depth));
                     offset = y_offset + in_z * input_stride_z;
                     offset = std::min(offset, max_offset);
                     convolve_row1x9_nhwc(in_ptr + offset + x, weights_ptr + 1 * kernel_stride_z + x, input_stride_y, kernel_stride_y, out0, out1, out2, out3);
 
                     // z == 2
-                    in_z   = static_cast<int>(id.z() * conv_stride_y - conv_pad_top + 2);
+                    in_z   = static_cast<int64_t>(id.z() * conv_stride_y - conv_pad_top + 2);
                     in_z   = std::min(static_cast<unsigned int>(in_z), static_cast<unsigned int>(input_depth));
                     offset = y_offset + in_z * input_stride_z;
                     offset = std::min(offset, max_offset);
                     convolve_row1x9_nhwc(in_ptr + offset + x, weights_ptr + 2 * kernel_stride_z + x, input_stride_y, kernel_stride_y, out0, out1, out2, out3);
 
                     // z == 3
-                    in_z   = static_cast<int>(id.z() * conv_stride_y - conv_pad_top + 3);
+                    in_z   = static_cast<int64_t>(id.z() * conv_stride_y - conv_pad_top + 3);
                     offset = y_offset + in_z * input_stride_z;
                     offset = std::min(offset, max_offset);
                     convolve_row1x9_nhwc(in_ptr + offset + x, weights_ptr + 3 * kernel_stride_z + x, input_stride_y, kernel_stride_y, out0, out1, out2, out3);
 
                     // z == 4
-                    in_z   = static_cast<int>(id.z() * conv_stride_y - conv_pad_top + 4);
+                    in_z   = static_cast<int64_t>(id.z() * conv_stride_y - conv_pad_top + 4);
                     offset = y_offset + in_z * input_stride_z;
                     convolve_row1x9_nhwc(in_ptr + offset + x, weights_ptr + 4 * kernel_stride_z + x, input_stride_y, kernel_stride_y, out0, out1, out2, out3);
author	Sang-Hoon Park <sang-hoon.park@arm.com>	2020-07-08 11:06:30 +0100
committer	Sang-Hoon Park <sang-hoon.park@arm.com>	2020-07-13 08:03:32 +0000
commit	38515425707b239a1d02d3a4f480a9d97efbb9ba (patch)
tree	1e68a4035e934d3e5ab829c0124bf09757e64a38 /src
parent	3ef9b5fb7c3f393a32977250ce0c4cb5d45ae555 (diff)
download	ComputeLibrary-38515425707b239a1d02d3a4f480a9d97efbb9ba.tar.gz