diff options
author | Giuseppe Rossini <giuseppe.rossini@arm.com> | 2019-11-06 14:57:49 +0000 |
---|---|---|
committer | Pablo Marquez <pablo.tello@arm.com> | 2019-12-04 12:45:12 +0000 |
commit | f01201abec0a102f6e7a517971f83fef1eaffd50 (patch) | |
tree | adf844c3c9c8e0e96af9c56de27a094fab515e35 /src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp | |
parent | 6e1791b1bfabc81f08d3117939f6eb5264ed4edf (diff) | |
download | ComputeLibrary-f01201abec0a102f6e7a517971f83fef1eaffd50.tar.gz |
COMPMID-2305: NEDepthwiseConvolution 3x3: support for QUANT8_PER_CHANNEL_SYMM
Change-Id: I9a917cff6a089ce6ae16fb4e6066a4194e2e9487
Signed-off-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2241
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp')
-rw-r--r-- | src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp | 48 |
1 files changed, 0 insertions, 48 deletions
diff --git a/src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp b/src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp index e8f44b6bfd..81eb7b306c 100644 --- a/src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp +++ b/src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp @@ -36,54 +36,6 @@ #include "impl_base.hpp" #include "depthwise_quantized.hpp" -#pragma once - -using namespace neon_convolution_kernels; -using namespace qasymm8; - -template <typename T> -inline T saturating_doubling_high_mul(const T&, const int32_t&); - -template <> -inline int32x4_t saturating_doubling_high_mul(const int32x4_t& a, const int32_t& b) -{ - return vqrdmulhq_n_s32(a, b); -} - -template <> -inline int32_t saturating_doubling_high_mul(const int32_t& a, const int32_t& b) -{ - return vget_lane_s32(vqrdmulh_n_s32(vdup_n_s32(a), b), 0); -} - -template <typename T> -inline T rounding_divide_by_exp2(const T& x, const int exponent); - -template <> -inline int32x4_t rounding_divide_by_exp2(const int32x4_t& x, const int exponent) -{ - const int32x4_t shift = vdupq_n_s32(-exponent); - const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift), 31); - const int32x4_t fixed = vqaddq_s32(x, fixup); - return vrshlq_s32(fixed, shift); -} - -template <> -inline int32x2_t rounding_divide_by_exp2(const int32x2_t& x, const int exponent) -{ - const int32x2_t shift = vdup_n_s32(-exponent); - const int32x2_t fixup = vshr_n_s32(vand_s32(x, shift), 31); - const int32x2_t fixed = vqadd_s32(x, fixup); - return vrshl_s32(fixed, shift); -} - -template <> -inline int32_t rounding_divide_by_exp2(const int32_t& x, const int exponent) -{ - const int32x2_t xs = vdup_n_s32(x); - return vget_lane_s32(rounding_divide_by_exp2(xs, exponent), 0); -} - namespace depthwise { template < |