diff options
Diffstat (limited to 'arm_compute/core')
-rw-r--r-- | arm_compute/core/CL/kernels/CLFloorKernel.h | 14 | ||||
-rw-r--r-- | arm_compute/core/NEON/NEMath.inl | 9 | ||||
-rw-r--r-- | arm_compute/core/NEON/kernels/NEFloorKernel.h | 13 |
3 files changed, 31 insertions, 5 deletions
diff --git a/arm_compute/core/CL/kernels/CLFloorKernel.h b/arm_compute/core/CL/kernels/CLFloorKernel.h index ffe699e280..930d90f903 100644 --- a/arm_compute/core/CL/kernels/CLFloorKernel.h +++ b/arm_compute/core/CL/kernels/CLFloorKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -48,10 +48,18 @@ public: ~CLFloorKernel() = default; /** Set the source, destination of the kernel * - * @param[in] input Source tensor. Data type supported: F32. - * @param[out] output Destination tensor. Data type supported: F32. + * @param[in] input Source tensor. Data type supported: F16/F32. + * @param[out] output Destination tensor. Same as @p input */ void configure(const ICLTensor *input, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLFloorKernel + * + * @param[in] input Source tensor info. Data type supported: F16/F32. + * @param[in] output Destination tensor info. Same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl index 1ebc9c10af..2bc1ab7964 100644 --- a/arm_compute/core/NEON/NEMath.inl +++ b/arm_compute/core/NEON/NEMath.inl @@ -175,6 +175,15 @@ inline float32x4_t vpowq_f32(float32x4_t val, float32x4_t n) /** Exponent polynomial coefficients */ /** Logarithm polynomial coefficients */ #ifndef DOXYGEN_SKIP_THIS +inline float16x8_t vfloorq_f16(float16x8_t val) +{ + static const float16x8_t CONST_1 = vdupq_n_f16(1.f); + + const int16x8_t z = vcvtq_s16_f16(val); + const float16x8_t r = vcvtq_f16_s16(z); + + return vbslq_f16(vcgtq_f16(r, val), vsubq_f16(r, CONST_1), r); +} inline float16x4_t vinvsqrt_f16(float16x4_t x) { float16x4_t sqrt_reciprocal = vrsqrte_f16(x); diff --git a/arm_compute/core/NEON/kernels/NEFloorKernel.h b/arm_compute/core/NEON/kernels/NEFloorKernel.h index b72d0527cc..6269430ddc 100644 --- a/arm_compute/core/NEON/kernels/NEFloorKernel.h +++ b/arm_compute/core/NEON/kernels/NEFloorKernel.h @@ -40,10 +40,19 @@ public: } /** Set the source, destination of the kernel * - * @param[in] input Source tensor. Data type supported: F32. - * @param[out] output Destination tensor. Data type supported: F32. + * @param[in] input Source tensor. Data type supported: F16/F32. + * @param[out] output Destination tensor. Same as @p input */ void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEFloorKernel + * + * @param[in] input Source tensor info. Data type supported: F16/F32. + * @param[in] output Destination tensor info. Same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; }; |