diff options
-rw-r--r-- | arm_compute/core/NEON/NEMath.h | 20 | ||||
-rw-r--r-- | arm_compute/core/NEON/NEMath.inl | 26 | ||||
-rw-r--r-- | arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h | 1 | ||||
-rw-r--r-- | arm_compute/core/NEON/wrapper/intrinsics/round.h | 48 | ||||
-rw-r--r-- | arm_compute/core/Types.h | 1 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h | 20 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp | 9 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEReductionOperationKernel.cpp | 2 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp | 11 | ||||
-rw-r--r-- | tests/validation/NEON/ElementwiseRound.cpp | 107 | ||||
-rw-r--r-- | tests/validation/fixtures/ElementWiseUnaryFixture.h | 17 | ||||
-rw-r--r-- | tests/validation/reference/ElementWiseUnary.cpp | 3 |
12 files changed, 263 insertions, 2 deletions
diff --git a/arm_compute/core/NEON/NEMath.h b/arm_compute/core/NEON/NEMath.h index 5c60d73de4..46d97f6a0d 100644 --- a/arm_compute/core/NEON/NEMath.h +++ b/arm_compute/core/NEON/NEMath.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -36,6 +36,14 @@ namespace arm_compute */ float32x4_t vfloorq_f32(float32x4_t val); +/** Calculate round value of a vector to nearest with ties to even. + * + * @param[in] val Input vector value in F32 format. + * + * @return The calculated round vector. + */ +float32x4_t vroundq_rte_f32(float32x4_t val); + /** Calculate inverse square root. * * @param[in] x Input value. @@ -123,12 +131,20 @@ float32x4_t vpowq_f32(float32x4_t val, float32x4_t n); * * @note We clamp x to [-5,5] to avoid overflowing issues. * - * @param[in] val Input vector value in F32 format. + * @param[in] val Input vector value in F16 format. * * @return The calculated Hyperbolic Tangent. */ float16x8_t vtanhq_f16(float16x8_t val); +/** Calculate round value of a vector to nearest with ties to even. + * + * @param[in] val Input vector value in F16 format. + * + * @return The calculated round vector. + */ +float16x8_t vroundq_rte_f16(float16x8_t val); + /** Calculate reciprocal. * * @param[in] x Input value. diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl index 27b4fc2c1b..172aaef941 100644 --- a/arm_compute/core/NEON/NEMath.inl +++ b/arm_compute/core/NEON/NEMath.inl @@ -65,6 +65,26 @@ inline float32x4_t vfloorq_f32(float32x4_t val) return vbslq_f32(vcgtq_f32(r, val), vsubq_f32(r, CONST_1), r); } +inline float32x4_t vroundq_rte_f32(float32x4_t val) +{ +#ifdef __aarch64__ + return vrndnq_f32(val); +#else // __aarch64__ + static const float32x4_t CONST_HALF_FLOAT = vdupq_n_f32(0.5f); + static const float32x4_t CONST_1_FLOAT = vdupq_n_f32(1.f); + static const int32x4_t CONST_1_INT = vdupq_n_s32(1); + const float32x4_t floor_val = vfloorq_f32(val); + const float32x4_t diff = vsubq_f32(val, floor_val); + + /* + * Select the floor value when (diff<0.5 || (diff==0.5 && floor_val%2==0). + * This condition is checked by vorrq_u32(vcltq_f32(diff, CONST_HALF_FLOAT) ,vandq_u32(vceqq_f32(diff, CONST_HALF_FLOAT) , vmvnq_u32(vtstq_s32(vandq_s32(vcvtq_s32_f32(floor_val), CONST_1_INT),CONST_1_INT)))) + */ + + return vbslq_f32(vorrq_u32(vcltq_f32(diff, CONST_HALF_FLOAT) ,vandq_u32(vceqq_f32(diff, CONST_HALF_FLOAT) , vmvnq_u32(vtstq_s32(vandq_s32(vcvtq_s32_f32(floor_val), CONST_1_INT),CONST_1_INT)))), floor_val, vaddq_f32(floor_val, CONST_1_FLOAT)); +#endif // __aarch64__ +} + inline float32x2_t vinvsqrt_f32(float32x2_t x) { float32x2_t sqrt_reciprocal = vrsqrte_f32(x); @@ -184,6 +204,12 @@ inline float16x8_t vfloorq_f16(float16x8_t val) return vbslq_f16(vcgtq_f16(r, val), vsubq_f16(r, CONST_1), r); } + +inline float16x8_t vroundq_rte_f16(float16x8_t val) +{ + return vrndnq_f16(val); +} + inline float16x4_t vinvsqrt_f16(float16x4_t x) { float16x4_t sqrt_reciprocal = vrsqrte_f16(x); diff --git a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h index 012f6868d1..c9dbb2fa81 100644 --- a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h +++ b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h @@ -57,6 +57,7 @@ #include "arm_compute/core/NEON/wrapper/intrinsics/pmin.h" #include "arm_compute/core/NEON/wrapper/intrinsics/pow.h" #include "arm_compute/core/NEON/wrapper/intrinsics/rev64.h" +#include "arm_compute/core/NEON/wrapper/intrinsics/round.h" #include "arm_compute/core/NEON/wrapper/intrinsics/setlane.h" #include "arm_compute/core/NEON/wrapper/intrinsics/store.h" #include "arm_compute/core/NEON/wrapper/intrinsics/sub.h" diff --git a/arm_compute/core/NEON/wrapper/intrinsics/round.h b/arm_compute/core/NEON/wrapper/intrinsics/round.h new file mode 100644 index 0000000000..da63bf6cb2 --- /dev/null +++ b/arm_compute/core/NEON/wrapper/intrinsics/round.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_WRAPPER_ROUND_H__ +#define __ARM_COMPUTE_WRAPPER_ROUND_H__ + +#include "arm_compute/core/NEON/NEMath.h" +#include <arm_neon.h> + +namespace arm_compute +{ +namespace wrapper +{ +#define VROUNDQ_IMPL(vtype, postfix) \ + inline vtype vround(const vtype &a) \ + { \ + return vroundq_rte_##postfix(a); \ + } + +VROUNDQ_IMPL(float32x4_t, f32) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +VROUNDQ_IMPL(float16x8_t, f16) +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#undef VROUNDQ_IMPL + +} // namespace wrapper +} // namespace arm_compute +#endif /* __ARM_COMPUTE_WRAPPER_ROUND_H__ */ diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 5a2ac51308..972d6ef3c5 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -586,6 +586,7 @@ enum class ElementWiseUnary LOG, /**< Natural Logarithm */ ABS, /**< Absolute value */ SIN, /**< Sine */ + ROUND, /**< Round */ }; /** The normalization type used for the normalization layer */ diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h index bea9145b80..085e42d06d 100644 --- a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h +++ b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h @@ -129,5 +129,25 @@ public: */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); }; + +/** Basic function to compute the round value elementwise of an input tensor. */ +class NERoundLayer : public INESimpleFunction +{ +public: + /** Initialize the function + * + * @param[in] input Input tensor. Data types supported: F16/F32. + * @param[out] output Output tensor. Data types supported: same as @p input. + */ + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NERoundLayer + * + * @param[in] input First tensor input info. Data types supported: F16/F32. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); +}; } // namespace arm_compute #endif /* __ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H__ */ diff --git a/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp b/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp index 437676ddb3..4419169b23 100644 --- a/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp @@ -30,6 +30,7 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/NEON/NEAsymm.h" #include "arm_compute/core/NEON/NEFixedPoint.h" +#include "arm_compute/core/NEON/NEMath.h" #include "arm_compute/core/NEON/wrapper/wrapper.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" @@ -61,6 +62,8 @@ inline ScalarType elementwise_op_scalar(const ScalarType &a) return std::log(a); case ElementWiseUnary::ABS: return std::abs(a); + case ElementWiseUnary::ROUND: + return std::nearbyint(a); default: ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); } @@ -82,6 +85,8 @@ inline VectorType elementwise_op(const VectorType &a) return wrapper::vlog(a); case ElementWiseUnary::ABS: return wrapper::vabs(a); + case ElementWiseUnary::ROUND: + return wrapper::vround(a); default: ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); } @@ -206,6 +211,9 @@ void NEElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensor *inp case ElementWiseUnary::ABS: _function = configure_func<ElementWiseUnary::ABS>(input, output); break; + case ElementWiseUnary::ROUND: + _function = configure_func<ElementWiseUnary::ROUND>(input, output); + break; default: ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); } @@ -219,6 +227,7 @@ Status NEElementwiseUnaryKernel::validate_arguments(ElementWiseUnary op, const I case ElementWiseUnary::EXP: case ElementWiseUnary::RSQRT: case ElementWiseUnary::LOG: + case ElementWiseUnary::ROUND: ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::F16, DataType::F32); break; case ElementWiseUnary::NEG: diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp index e6edf22083..c6e853659c 100644 --- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp +++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp @@ -180,6 +180,7 @@ inline uint8x8_t calculate_max(uint8x16_t in) return wrapper::vpmax(pmax, pmax); } +template <> uint32_t calculate_vector_index(uint32x4x4_t vec_res_idx, uint8x16_t vec_res_value, ReductionOperation op) { uint32x4x4_t res_idx_mask{ { 0 } }; @@ -272,6 +273,7 @@ inline float16x4_t calculate_max(float16x8_t in) return wrapper::vpmax(pmax, pmax); } +template <> uint32_t calculate_vector_index(uint32x4x4_t vec_res_idx, float16x8_t vec_res_value, ReductionOperation op) { uint32x4x2_t res_idx_mask{ 0 }; diff --git a/src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp b/src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp index 231b8aa445..e4c9101274 100644 --- a/src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp +++ b/src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp @@ -85,4 +85,15 @@ Status NEAbsLayer::validate(const ITensorInfo *input, const ITensorInfo *output) return NEElementwiseUnaryKernel::validate(ElementWiseUnary::ABS, input, output); } +void NERoundLayer::configure(const ITensor *input, ITensor *output) +{ + auto k = arm_compute::support::cpp14::make_unique<NEElementwiseUnaryKernel>(); + k->configure(ElementWiseUnary::ROUND, input, output); + _kernel = std::move(k); +} +Status NERoundLayer::validate(const ITensorInfo *input, const ITensorInfo *output) +{ + return NEElementwiseUnaryKernel::validate(ElementWiseUnary::ROUND, input, output); +} + } // namespace arm_compute diff --git a/tests/validation/NEON/ElementwiseRound.cpp b/tests/validation/NEON/ElementwiseRound.cpp new file mode 100644 index 0000000000..1d8cff6d73 --- /dev/null +++ b/tests/validation/NEON/ElementwiseRound.cpp @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" +#include "tests/NEON/Accessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/ElementWiseUnaryFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +TEST_SUITE(NEON) +TEST_SUITE(RoundLayer) + +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)), shape, data_type) +{ + // Create tensors + Tensor src = create_tensor<Tensor>(shape, data_type); + Tensor dst = create_tensor<Tensor>(shape, data_type); + + ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Create and configure function + NERoundLayer round_layer; + round_layer.configure(&src, &dst); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(src.info()->valid_region(), valid_region); + validate(dst.info()->valid_region(), valid_region); +} + +template <typename T> +using NERoundLayerFixture = RoundValidationFixture<Tensor, Accessor, NERoundLayer, T>; + +TEST_SUITE(Float) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, NERoundLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::F16))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NERoundLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", + DataType::F16))) +{ + // Validate output + validate(Accessor(_target), _reference); +} + +TEST_SUITE_END() // FP16 +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, NERoundLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::F32))) +{ + // Validate output + validate(Accessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NERoundLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", + DataType::F32))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // FP32 +TEST_SUITE_END() // Float +TEST_SUITE_END() // RoundLayer +TEST_SUITE_END() // NEON +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/fixtures/ElementWiseUnaryFixture.h b/tests/validation/fixtures/ElementWiseUnaryFixture.h index 7837b085fa..fd66f630ba 100644 --- a/tests/validation/fixtures/ElementWiseUnaryFixture.h +++ b/tests/validation/fixtures/ElementWiseUnaryFixture.h @@ -104,6 +104,12 @@ protected: library->fill(tensor, distribution, i); break; } + case ElementWiseUnary::ROUND: + { + std::uniform_real_distribution<> distribution(100.0f, -100.0f); + library->fill(tensor, distribution, i); + break; + } default: ARM_COMPUTE_ERROR("Not implemented"); } @@ -219,6 +225,17 @@ public: ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, ElementWiseUnary::SIN); } }; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class RoundValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + template <typename...> + void setup(const TensorShape &shape, DataType data_type) + { + ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, ElementWiseUnary::ROUND); + } +}; } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/reference/ElementWiseUnary.cpp b/tests/validation/reference/ElementWiseUnary.cpp index dfd4c0600d..06beb2ac09 100644 --- a/tests/validation/reference/ElementWiseUnary.cpp +++ b/tests/validation/reference/ElementWiseUnary.cpp @@ -58,6 +58,9 @@ SimpleTensor<T> elementwise_unary(const SimpleTensor<T> &src, ElementWiseUnary o case ElementWiseUnary::SIN: dst[i] = std::sin(src[i]); break; + case ElementWiseUnary::ROUND: + dst[i] = std::nearbyint(src[i]); + break; default: ARM_COMPUTE_ERROR("Not implemented"); } |