diff options
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/NEON/wrapper/intrinsics/cvt.h | 20 | ||||
-rw-r--r-- | src/core/utils/ScaleUtils.cpp | 28 | ||||
-rw-r--r-- | src/core/utils/ScaleUtils.h | 6 |
3 files changed, 41 insertions, 13 deletions
diff --git a/src/core/NEON/wrapper/intrinsics/cvt.h b/src/core/NEON/wrapper/intrinsics/cvt.h index e52e3dd0c4..baad1319b2 100644 --- a/src/core/NEON/wrapper/intrinsics/cvt.h +++ b/src/core/NEON/wrapper/intrinsics/cvt.h @@ -59,19 +59,35 @@ VCVT_TO_F16_IMPL(float16x4_t, float32x4_t, vcvt, f16, f32) #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC template <typename T> -inline typename std::enable_if<std::is_same<T, uint8_t>::value, uint32x4_t>::type +inline typename std::enable_if < std::is_same<T, uint8_t>::value || std::is_same<T, uint32_t>::value, uint32x4_t >::type vcvt(const float32x4_t &a) { return vcvtq_u32_f32(a); } template <typename T> -inline typename std::enable_if<std::is_same<T, int8_t>::value, int32x4_t>::type +inline typename std::enable_if < std::is_same<T, int8_t>::value || std::is_same<T, int32_t>::value, int32x4_t >::type vcvt(const float32x4_t &a) { return vcvtq_s32_f32(a); } +#ifdef __aarch64__ +template <typename T> +inline typename std::enable_if<std::is_same<T, uint32_t>::value, uint32x4_t>::type +vcvta(const float32x4_t &a) +{ + return vcvtaq_u32_f32(a); +} + +template <typename T> +inline typename std::enable_if<std::is_same<T, int32_t>::value, int32x4_t>::type +vcvta(const float32x4_t &a) +{ + return vcvtaq_s32_f32(a); +} +#endif //__aarch64__ + #if defined(ARM_COMPUTE_ENABLE_BF16) /** Convert 2x128-bit floating point vectors into 1x128-bit bfloat16 vector * diff --git a/src/core/utils/ScaleUtils.cpp b/src/core/utils/ScaleUtils.cpp index 82c6405e89..ee57a8e7a7 100644 --- a/src/core/utils/ScaleUtils.cpp +++ b/src/core/utils/ScaleUtils.cpp @@ -40,12 +40,26 @@ float arm_compute::scale_utils::calculate_resize_ratio(size_t input_size, size_t return static_cast<float>(in) / static_cast<float>(out); } -bool arm_compute::scale_utils::is_precomputation_required(DataLayout data_layout, DataType data_type, InterpolationPolicy policy) +bool arm_compute::scale_utils::is_precomputation_required(DataLayout data_layout, DataType data_type, + InterpolationPolicy policy, BorderMode border_mode) { - // whether to precompute indices & weights - // The Neon™ kernels (which are preferred over SVE when policy is BILINEAR) do not use - // precomputed index and weights when data type is FP32/16. - // If policy is nearest_neighbor for SVE, then precompute because it's being used - // To be revised in COMPMID-5453/5454 - return data_layout != DataLayout::NHWC || (data_type != DataType::F32 && data_type != DataType::F16) || (CPUInfo::get().get_isa().sve == true && policy == InterpolationPolicy::NEAREST_NEIGHBOR); + // Do not calculate precomputed weights and indices if kernel code doesn't use them + if(data_layout == DataLayout::NHWC) + { + switch(data_type) + { + case DataType::F32: + case DataType::F16: + return (CPUInfo::get().get_isa().sve == true && policy == InterpolationPolicy::NEAREST_NEIGHBOR); + case DataType::U8: + case DataType::S8: + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: + return (border_mode != BorderMode::REPLICATE) || (policy == InterpolationPolicy::NEAREST_NEIGHBOR); + default: + return true; + } + } + + return true; }
\ No newline at end of file diff --git a/src/core/utils/ScaleUtils.h b/src/core/utils/ScaleUtils.h index c09509253c..1484824a7f 100644 --- a/src/core/utils/ScaleUtils.h +++ b/src/core/utils/ScaleUtils.h @@ -26,9 +26,6 @@ #include "arm_compute/core/Types.h" -#include <cstdint> -#include <cstdlib> - namespace arm_compute { namespace scale_utils @@ -59,10 +56,11 @@ inline bool is_align_corners_allowed_sampling_policy(SamplingPolicy sampling_pol * @param[in] data_layout Data layout * @param[in] data_type Data type * @param[in] policy Interpolation policy + * @param[in] border_mode Border Mode * * @return True if precomputation is required */ -bool is_precomputation_required(DataLayout data_layout, DataType data_type, InterpolationPolicy policy); +bool is_precomputation_required(DataLayout data_layout, DataType data_type, InterpolationPolicy policy, BorderMode border_mode); } // namespace scale_utils } // namespace arm_compute |