diff options
Diffstat (limited to 'src/core/NEON/kernels/arm_conv')
4 files changed, 15 insertions, 12 deletions
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_depthfirst.hpp index 8d1a8698c3..57fa11151b 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_depthfirst.hpp @@ -275,11 +275,11 @@ class DepthwiseDepthfirst : public DepthwiseCommon<TInput, TWeight, TOutput> for (int start_out_j = 0; start_out_j < static_cast<int>(output_width);) { const int start_in_j = start_out_j * m_strat->get_stride_cols() - this->m_args.padding.left; - const int pad_left = -std::min(0, start_in_j); + int pad_left = std::min(0, start_in_j); // Compute how many output tiles we can compute with the direct kernel. int n_direct_tiles = 0; - if (!pad_top && !pad_bottom && !pad_left) + if (!pad_top && !pad_bottom && !pad_left) { // Determine the maximum number of tiles we could handle. n_direct_tiles = (output_width - start_out_j) / m_strat->get_output_cols(); @@ -323,7 +323,7 @@ class DepthwiseDepthfirst : public DepthwiseCommon<TInput, TWeight, TOutput> end_out_j - start_out_j, static_cast<int>(output_width) - start_out_j ); - + pad_left *= -1; // Construct the input pointer array - fill the array with pointers to // the input buffer and then fill in the required values. for (auto i = pad_top; i < m_strat->get_input_rows() - pad_bottom; i++) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_avg_generic_depthfirst/generic.cpp index f288a4119c..530ee06080 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_avg_generic_depthfirst/generic.cpp @@ -86,12 +86,13 @@ void a64_s8q_nhwc_avg_generic_depthfirst_impl( f_rescale_value *= 2.0f; } - rescale_value = static_cast<int32_t>(round(f_rescale_value * static_cast<float>(1ll << 31))); - if (static_cast<int64_t>(rescale_value) == (1ll << 31)) + int64_t large_rescale_value = round(f_rescale_value * static_cast<float>(1ll << 31)); + if (large_rescale_value == (1ll << 31)) { shift_value++; - rescale_value >>= 1; + large_rescale_value >>= 1; } + rescale_value = static_cast<int32_t>(large_rescale_value); } // Combine together the rescale value for the requantization and the scaling diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_avg_generic_depthfirst/generic.cpp index 2c8a29248d..baf23b4a4d 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_avg_generic_depthfirst/generic.cpp @@ -84,12 +84,13 @@ void a64_u8_nhwc_avg_generic_depthfirst_impl( f_rescale_value *= 2.0f; } - rescale_value = static_cast<int32_t>(round(f_rescale_value * static_cast<float>(1ll << 31))); - if (static_cast<int64_t>(rescale_value) == (1ll << 31)) + int64_t large_rescale_value = round(f_rescale_value * static_cast<float>(1ll << 31)); + if (large_rescale_value == (1ll << 31)) { shift_value++; - rescale_value >>= 1; + large_rescale_value >>= 1; } + rescale_value = static_cast<int32_t>(large_rescale_value); } __asm__ __volatile__( diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_avg_generic_depthfirst/generic.cpp index a57fe6df68..11376e0fe2 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_avg_generic_depthfirst/generic.cpp @@ -87,12 +87,13 @@ void a64_u8q_nhwc_avg_generic_depthfirst_impl( f_rescale_value *= 2.0f; } - rescale_value = static_cast<int32_t>(round(f_rescale_value * static_cast<float>(1ll << 31))); - if (static_cast<int64_t>(rescale_value) == (1ll << 31)) + int64_t large_rescale_value = round(f_rescale_value * static_cast<float>(1ll << 31)); + if (large_rescale_value == (1ll << 31)) { shift_value++; - rescale_value >>= 1; + large_rescale_value >>= 1; } + rescale_value = static_cast<int32_t>(large_rescale_value); } |