From 92fd94336e4b169005d88af401fe57bcbd50521b Mon Sep 17 00:00:00 2001 From: giuros01 Date: Mon, 3 Dec 2018 17:30:00 +0000 Subject: COMPMID-1754: NEON: Implement Maximum, Minumum, SquaredDifference Change-Id: I77e8c6a8af6ad841293ed5e66ed582035cc1424b Reviewed-on: https://review.mlplatform.org/339 Reviewed-by: Michalis Spyrou Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice Reviewed-by: Georgios Pinitas --- arm_compute/core/NEON/NEKernels.h | 1 + .../NEON/kernels/NEElementwiseOperationKernel.h | 131 +++++++++++++++++++++ .../core/NEON/wrapper/intrinsics/intrinsics.h | 1 + arm_compute/core/NEON/wrapper/intrinsics/load.h | 1 - arm_compute/core/NEON/wrapper/intrinsics/sub.h | 68 +++++++++++ 5 files changed, 201 insertions(+), 1 deletion(-) create mode 100644 arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h create mode 100644 arm_compute/core/NEON/wrapper/intrinsics/sub.h (limited to 'arm_compute/core/NEON') diff --git a/arm_compute/core/NEON/NEKernels.h b/arm_compute/core/NEON/NEKernels.h index 755e68a2e1..57a1d4d52c 100644 --- a/arm_compute/core/NEON/NEKernels.h +++ b/arm_compute/core/NEON/NEKernels.h @@ -57,6 +57,7 @@ #include "arm_compute/core/NEON/kernels/NEDilateKernel.h" #include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" +#include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h" #include "arm_compute/core/NEON/kernels/NEErodeKernel.h" #include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h" #include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h" diff --git a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h new file mode 100644 index 0000000000..93ad437322 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INNEUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY NEAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H__ +#define __ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for an element-wise operation kernel + * + * Element-wise operation is computed by: + * @f[ output(x,y) = OP(input1(x,y), input2(x,y))@f] + * + */ +class NEElementwiseOperationKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEElementwiseOperationKernel"; + } + /** Default constructor */ + NEElementwiseOperationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseOperationKernel(const NEElementwiseOperationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseOperationKernel &operator=(const NEElementwiseOperationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEElementwiseOperationKernel(NEElementwiseOperationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEElementwiseOperationKernel &operator=(NEElementwiseOperationKernel &&) = default; + /** Default destructor */ + ~NEElementwiseOperationKernel() = default; + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +protected: + /** Validate the argument passed to the kernel + * + * @param[in] input1 First tensor input. Data types supported: S16/F16/S32/F32. + * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. + * @param[in] output Output tensor. Data types supported: Same as @p input1. + */ + virtual Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) = 0; + + /** Commmon configure function for element-wise operators with no additional options (e.g. Min, Max, SquaredDiff) + * + */ + template + void configure_common(const ITensor *input1, const ITensor *input2, ITensor *output); + + ArithmeticOperation _op; // Code of the operation to execute + +private: + /** Common signature for all the specialised add functions + * + * @param[in] input1 An input tensor. Data types supported: S16/F16/S32/F32 + * @param[in] input2 An input tensor. Data types supported: S16/F16/S32/F32 + * @param[out] output The output tensor. Data types supported: S16/F16/S32/F32 + * @param[in] window Region on which to execute the kernel. + */ + using ElementwiseFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window); + /** Add function to use for the particular tensor types passed to configure() */ + ElementwiseFunction *_func; + const ITensor *_input1; + const ITensor *_input2; + ITensor *_output; +}; + +class NEArithmeticOperationKernel : public NEElementwiseOperationKernel +{ +public: + NEArithmeticOperationKernel() + : NEElementwiseOperationKernel() + { + } + + /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel + * + * @param[in] op Arithmetic operation to be executed. + * @param[in] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32. + * @param[in] input1 First tensor input. Data types supported: Same as @p input1. + * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. + * @param[in] output Output tensor. Data types supported: Same as @p input1. + */ + void configure(ArithmeticOperation op, const ITensor *input1, const ITensor *input2, ITensor *output); + + /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel + * + * @param[in] op Arithmetic operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. + * @param[in] input1 First tensor input info. Data types supported: Same as @p input1 + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * + * @return a Status + */ + static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + +protected: + // Inherited methods overridden: + Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) override; +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h index 77787afcf4..46c8937adc 100644 --- a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h +++ b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h @@ -44,5 +44,6 @@ #include "arm_compute/core/NEON/wrapper/intrinsics/padd.h" #include "arm_compute/core/NEON/wrapper/intrinsics/pow.h" #include "arm_compute/core/NEON/wrapper/intrinsics/store.h" +#include "arm_compute/core/NEON/wrapper/intrinsics/sub.h" #endif /* __ARM_COMPUTE_WRAPPER_INTRINSICS_H__ */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/load.h b/arm_compute/core/NEON/wrapper/intrinsics/load.h index b5d9ed2a35..500ec78df6 100644 --- a/arm_compute/core/NEON/wrapper/intrinsics/load.h +++ b/arm_compute/core/NEON/wrapper/intrinsics/load.h @@ -67,7 +67,6 @@ VLOADQ_IMPL(float, float32x4_t, f32) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC VLOADQ_IMPL(float16_t, float16x8_t, f16) #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - #undef VLOAD_IMPL } // namespace wrapper } // namespace arm_compute diff --git a/arm_compute/core/NEON/wrapper/intrinsics/sub.h b/arm_compute/core/NEON/wrapper/intrinsics/sub.h new file mode 100644 index 0000000000..8119429d43 --- /dev/null +++ b/arm_compute/core/NEON/wrapper/intrinsics/sub.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_WRAPPER_SUB_H__ +#define __ARM_COMPUTE_WRAPPER_SUB_H__ + +#include + +namespace arm_compute +{ +namespace wrapper +{ +#define VSUB_IMPL(stype, vtype, prefix, postfix) \ + inline vtype vsub(const vtype &a, const vtype &b) \ + { \ + return prefix##_##postfix(a, b); \ + } + +VSUB_IMPL(uint8x8_t, uint8x8_t, vsub, u8) +VSUB_IMPL(int8x8_t, int8x8_t, vsub, s8) +VSUB_IMPL(uint16x4_t, uint16x4_t, vsub, u16) +VSUB_IMPL(int16x4_t, int16x4_t, vsub, s16) +VSUB_IMPL(uint32x2_t, uint32x2_t, vsub, u32) +VSUB_IMPL(int32x2_t, int32x2_t, vsub, s32) +VSUB_IMPL(uint64x1_t, uint64x1_t, vsub, u64) +VSUB_IMPL(int64x1_t, int64x1_t, vsub, s64) +VSUB_IMPL(float32x2_t, float32x2_t, vsub, f32) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +VSUB_IMPL(float16x4_t, float16x4_t, vsub, f16) +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +VSUB_IMPL(uint8x16_t, uint8x16_t, vsubq, u8) +VSUB_IMPL(int8x16_t, int8x16_t, vsubq, s8) +VSUB_IMPL(uint16x8_t, uint16x8_t, vsubq, u16) +VSUB_IMPL(int16x8_t, int16x8_t, vsubq, s16) +VSUB_IMPL(uint32x4_t, uint32x4_t, vsubq, u32) +VSUB_IMPL(int32x4_t, int32x4_t, vsubq, s32) +VSUB_IMPL(uint64x2_t, uint64x2_t, vsubq, u64) +VSUB_IMPL(int64x2_t, int64x2_t, vsubq, s64) +VSUB_IMPL(float32x4_t, float32x4_t, vsubq, f32) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +VSUB_IMPL(float16x8_t, float16x8_t, vsubq, f16) +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +#undef vsub_IMPL +} // namespace wrapper +} // namespace arm_compute +#endif /* __ARM_COMPUTE_WRAPPER_SUB_H__ */ -- cgit v1.2.1