From 05187f2521ad50478e389a439fca7186bca6d349 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 25 Nov 2019 18:39:27 +0000 Subject: IVGBENCH-1661: Segfault on FP16 for NEON Failures were caused due to integer overflows as mixed calculation between int32_t and uint32_t were taking place. Change-Id: I72efb331c7b3093a71cf83639eb7e89f1c2c29fc Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/2356 Reviewed-by: SiCong Li Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins (cherry picked from commit 0922dbbfe403716e79541115ac17a1544022836f) Reviewed-on: https://review.mlplatform.org/c/2358 --- src/core/NEON/kernels/NEPoolingLayerKernel.cpp | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp index 58fa2d6b41..aaeb33f120 100644 --- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp @@ -977,8 +977,8 @@ void NEPoolingLayerKernel::poolingMxN_f16_nchw(const Window &window_input, const int x = 0; for(; x <= (pool_size_x - 8); x += 8) { - const float16x8_t data = vld1q_f16(reinterpret_cast(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + - (y - pool_pad_top) * _input->info()->strides_in_bytes().y())); + const float16x8_t data = vld1q_f16(reinterpret_cast(input.ptr() + (x - pool_pad_left) * static_cast(_input->info()->strides_in_bytes().x()) + + (y - pool_pad_top) * static_cast(_input->info()->strides_in_bytes().y()))); // Get power of 2 in case of l2 pooling and accumulate if(pooling_type == PoolingType::L2) @@ -994,7 +994,8 @@ void NEPoolingLayerKernel::poolingMxN_f16_nchw(const Window &window_input, const // Leftover for loop for(; x < pool_size_x; ++x) { - float16_t data = *(reinterpret_cast(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y())); + float16_t data = *(reinterpret_cast(input.ptr() + (x - pool_pad_left) * static_cast(_input->info()->strides_in_bytes().x()) + + (y - pool_pad_top) * static_cast(_input->info()->strides_in_bytes().y()))); // Get power of 2 in case of l2 pooling if(pooling_type == PoolingType::L2) @@ -1026,16 +1027,17 @@ void NEPoolingLayerKernel::poolingMxN_f16_nchw(const Window &window_input, const int x = 0; for(; x <= (pool_size_x - 8); x += 8) { - const float16x8_t data = vld1q_f16(reinterpret_cast(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + - (y - pool_pad_top) * _input->info()->strides_in_bytes().y())); + const float16x8_t data = vld1q_f16(reinterpret_cast(input.ptr() + (x - pool_pad_left) * static_cast(_input->info()->strides_in_bytes().x()) + + (y - pool_pad_top) * static_cast(_input->info()->strides_in_bytes().y()))); vres = vmaxq_f16(vres, data); } // Leftover for loop for(; x < pool_size_x; ++x) { - const float16_t data = *(reinterpret_cast(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y())); - res = std::max(res, data); + const float16_t data = *(reinterpret_cast(input.ptr() + (x - pool_pad_left) * static_cast(_input->info()->strides_in_bytes().x()) + + (y - pool_pad_top) * static_cast(_input->info()->strides_in_bytes().y()))); + res = std::max(res, data); } } @@ -1111,8 +1113,8 @@ void NEPoolingLayerKernel::poolingMxN_f16_nhwc(const Window &window_input, const { for(int x = pool_start_x; x < pool_end_x; ++x) { - const float16x8_t data = vld1q_f16(reinterpret_cast(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().y() + - (y - pool_pad_top) * _input->info()->strides_in_bytes().z())); + const float16x8_t data = vld1q_f16(reinterpret_cast(input.ptr() + (x - pool_pad_left) * static_cast(_input->info()->strides_in_bytes().y()) + + (y - pool_pad_top) * static_cast(_input->info()->strides_in_bytes().z()))); // Get power of 2 in case of l2 pooling and accumulate if(pooling_type == PoolingType::L2) @@ -1136,8 +1138,8 @@ void NEPoolingLayerKernel::poolingMxN_f16_nhwc(const Window &window_input, const { for(int x = pool_start_x; x < pool_end_x; ++x) { - const float16x8_t data = vld1q_f16(reinterpret_cast(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().y() + - (y - pool_pad_top) * _input->info()->strides_in_bytes().z())); + const float16x8_t data = vld1q_f16(reinterpret_cast(input.ptr() + (x - pool_pad_left) * static_cast(_input->info()->strides_in_bytes().y()) + + (y - pool_pad_top) * static_cast(_input->info()->strides_in_bytes().z()))); vres = vmaxq_f16(vres, data); } } -- cgit v1.2.1