From 1a0a4bc78a12e85e1bd6b3207f244c91566ebdce Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Thu, 12 Nov 2020 17:41:32 +0000 Subject: COMPMID-3851: Fix regression on NEDepthwiseConvolutionLayerNativeKernel The exit condition of some for loops in quantized version of the kernel with depth_multiplier=1 is decided during compilation to fix performance issue. Change-Id: I849b3d63b2a2cf5eb374ae681898ae1c296fb4fe Signed-off-by: Sang-Hoon Park Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4392 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp index 90a81b30c9..87315909d8 100644 --- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp @@ -372,7 +372,7 @@ void depthwise_loop_multiplier1_quantized(const ITensor *input, const ITensor *w out_of_bound_vector; const auto weights_vals = wrapper::vload(reinterpret_cast(weights_ptr + w * run_info.weights_stride_y) + x); - for(size_t i = 0; i < run_info.x_step; ++i) + for(size_t i = 0; i < element_per_vector; ++i) { acc.at(i) += input_vals[i] * weights_vals[i]; in_sum.at(i) += input_vals[i]; @@ -387,7 +387,7 @@ void depthwise_loop_multiplier1_quantized(const ITensor *input, const ITensor *w } VectorType out_vals = wrapper::vdup_n(static_cast(0), TagType{}); - for(size_t i = 0; i < run_info.x_step; ++i) + for(size_t i = 0; i < element_per_vector; ++i) { acc.at(i) -= in_sum.at(i) * weights_qoffset; acc.at(i) -= we_sum.at(i) * input_qoffset; -- cgit v1.2.1