aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSang-Hoon Park <sang-hoon.park@arm.com>2020-07-08 11:06:30 +0100
committerSang-Hoon Park <sang-hoon.park@arm.com>2020-07-13 08:03:32 +0000
commit38515425707b239a1d02d3a4f480a9d97efbb9ba (patch)
tree1e68a4035e934d3e5ab829c0124bf09757e64a38
parent3ef9b5fb7c3f393a32977250ce0c4cb5d45ae555 (diff)
downloadComputeLibrary-38515425707b239a1d02d3a4f480a9d97efbb9ba.tar.gz
COMPMID-3531: fix index offset overflows in NEDirectConvolutionLayerKernel
When a large input and kernel is used, the computation of "max_offset" variable can overflow. Adjust types of the variable as well as the variable compared with for consistency. The test spotted the overflow is added to nightly suite. Change-Id: I2f114e4b49167889a6d3729c71823c089d6f42e3 Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3527 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-by: Manuel Bottini <manuel.bottini@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp12
-rw-r--r--tests/validation/NEON/DirectConvolutionLayer.cpp27
2 files changed, 33 insertions, 6 deletions
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
index 91b03687d8..559b67316f 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
@@ -979,7 +979,7 @@ public:
// |__________________|
// | pad_bottom |
// |******************|
- const int max_offset = input_stride_z * input_depth - (input->info()->padding().bottom + input->info()->padding().top) * input_stride_y;
+ const int64_t max_offset = input_stride_z * input_depth - (input->info()->padding().bottom + input->info()->padding().top) * input_stride_y;
execute_window_loop(window_k, [&](const Coordinates & id_k) // loop on the batch size
{
@@ -1002,34 +1002,34 @@ public:
for(int x = 0; x < input_width; x += num_elems_read_per_iteration)
{
// z == 0
- auto in_z = static_cast<int>(id.z() * conv_stride_y - conv_pad_top);
+ auto in_z = static_cast<int64_t>(id.z() * conv_stride_y - conv_pad_top);
in_z = std::min(static_cast<unsigned int>(in_z), static_cast<unsigned int>(input_depth));
auto offset = y_offset + in_z * input_stride_z;
offset = std::min(offset, max_offset);
convolve_row1x9_nhwc(in_ptr + offset + x, weights_ptr + 0 * kernel_stride_z + x, input_stride_y, kernel_stride_y, out0, out1, out2, out3);
// z == 1
- in_z = static_cast<int>(id.z() * conv_stride_y - conv_pad_top + 1);
+ in_z = static_cast<int64_t>(id.z() * conv_stride_y - conv_pad_top + 1);
in_z = std::min(static_cast<unsigned int>(in_z), static_cast<unsigned int>(input_depth));
offset = y_offset + in_z * input_stride_z;
offset = std::min(offset, max_offset);
convolve_row1x9_nhwc(in_ptr + offset + x, weights_ptr + 1 * kernel_stride_z + x, input_stride_y, kernel_stride_y, out0, out1, out2, out3);
// z == 2
- in_z = static_cast<int>(id.z() * conv_stride_y - conv_pad_top + 2);
+ in_z = static_cast<int64_t>(id.z() * conv_stride_y - conv_pad_top + 2);
in_z = std::min(static_cast<unsigned int>(in_z), static_cast<unsigned int>(input_depth));
offset = y_offset + in_z * input_stride_z;
offset = std::min(offset, max_offset);
convolve_row1x9_nhwc(in_ptr + offset + x, weights_ptr + 2 * kernel_stride_z + x, input_stride_y, kernel_stride_y, out0, out1, out2, out3);
// z == 3
- in_z = static_cast<int>(id.z() * conv_stride_y - conv_pad_top + 3);
+ in_z = static_cast<int64_t>(id.z() * conv_stride_y - conv_pad_top + 3);
offset = y_offset + in_z * input_stride_z;
offset = std::min(offset, max_offset);
convolve_row1x9_nhwc(in_ptr + offset + x, weights_ptr + 3 * kernel_stride_z + x, input_stride_y, kernel_stride_y, out0, out1, out2, out3);
// z == 4
- in_z = static_cast<int>(id.z() * conv_stride_y - conv_pad_top + 4);
+ in_z = static_cast<int64_t>(id.z() * conv_stride_y - conv_pad_top + 4);
offset = y_offset + in_z * input_stride_z;
convolve_row1x9_nhwc(in_ptr + offset + x, weights_ptr + 4 * kernel_stride_z + x, input_stride_y, kernel_stride_y, out0, out1, out2, out3);
diff --git a/tests/validation/NEON/DirectConvolutionLayer.cpp b/tests/validation/NEON/DirectConvolutionLayer.cpp
index fe61a9d98a..88578ca586 100644
--- a/tests/validation/NEON/DirectConvolutionLayer.cpp
+++ b/tests/validation/NEON/DirectConvolutionLayer.cpp
@@ -98,6 +98,25 @@ const auto data_f16_nightly = combine(data_f16, framework::dataset::make("NumKer
const auto data_precommit = combine(data, framework::dataset::make("NumKernels", { 1 }));
const auto data_precommit9x9 = combine(data9x9, framework::dataset::make("NumKernels", { 4 }));
+/* The following tests is from real use-case that made DirectConvolution
+ * overflows in terms of its tensor indexing. This test case is using
+ * a separate tolerance due to the following reason.
+ * - It has shown that it requires generally larger absolute tolerance
+ * for large numbers or larger relative tolerance for small numbers.
+ * - With the first reason, since it is mainly testing index overflow,
+ * a value with a margin is used to avoid uninteded test failures
+ * during nightly.
+ */
+constexpr AbsoluteTolerance<float> usecase_tolerance_fp32(0.05f);
+
+const auto data_nightly_usecase = combine(framework::dataset::make("InputShape", { TensorShape{ 3U, 800U, 800U } }),
+ combine(framework::dataset::make("StrideX", { 1 }),
+ combine(framework::dataset::make("StrideY", { 1 }),
+ combine(framework::dataset::make("PadX", { 4 }),
+ combine(framework::dataset::make("PadY", { 4 }),
+ combine(framework::dataset::make("KernelSize", 9),
+ framework::dataset::make("NumKernels", { 16 })))))));
+
/** Activation function Dataset*/
const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
{
@@ -227,6 +246,14 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDirectConvolutionLayerFixture<float>, framewo
// Validate output
validate(Accessor(_target), _reference, tolerance_fp32);
}
+FIXTURE_DATA_TEST_CASE(RunLargeUsecase, NEDirectConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data_nightly_usecase, framework::dataset::make("DataType",
+ DataType::F32)),
+ framework::dataset::make("ActivationInfo", { ActivationLayerInfo() })),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, usecase_tolerance_fp32);
+}
TEST_SUITE_END() // FP32
TEST_SUITE_END() // Float
TEST_SUITE_END() // DirectConvolutionLayer