aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorgiuros01 <giuseppe.rossini@arm.com>2018-12-03 17:30:00 +0000
committerGiuseppe Rossini <giuseppe.rossini@arm.com>2018-12-17 16:43:51 +0000
commit92fd94336e4b169005d88af401fe57bcbd50521b (patch)
treed43bc672d0f250e85c72ab310168baea3eb982dc /arm_compute
parentcc6129c06af98616a0e4d68475cfa3d92aaf63b3 (diff)
downloadComputeLibrary-92fd94336e4b169005d88af401fe57bcbd50521b.tar.gz
COMPMID-1754: NEON: Implement Maximum, Minumum, SquaredDifference
Change-Id: I77e8c6a8af6ad841293ed5e66ed582035cc1424b Reviewed-on: https://review.mlplatform.org/339 Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/NEON/NEKernels.h1
-rw-r--r--arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h131
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h1
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/load.h1
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/sub.h68
-rw-r--r--arm_compute/core/Rounding.h4
-rw-r--r--arm_compute/runtime/NEON/NEFunctions.h1
-rw-r--r--arm_compute/runtime/NEON/functions/NEElementwiseOperations.h112
8 files changed, 316 insertions, 3 deletions
diff --git a/arm_compute/core/NEON/NEKernels.h b/arm_compute/core/NEON/NEKernels.h
index 755e68a2e1..57a1d4d52c 100644
--- a/arm_compute/core/NEON/NEKernels.h
+++ b/arm_compute/core/NEON/NEKernels.h
@@ -57,6 +57,7 @@
#include "arm_compute/core/NEON/kernels/NEDilateKernel.h"
#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h"
#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
+#include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h"
#include "arm_compute/core/NEON/kernels/NEErodeKernel.h"
#include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h"
#include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h"
diff --git a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h
new file mode 100644
index 0000000000..93ad437322
--- /dev/null
+++ b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INNEUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY NEAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H__
+#define __ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for an element-wise operation kernel
+ *
+ * Element-wise operation is computed by:
+ * @f[ output(x,y) = OP(input1(x,y), input2(x,y))@f]
+ *
+ */
+class NEElementwiseOperationKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEElementwiseOperationKernel";
+ }
+ /** Default constructor */
+ NEElementwiseOperationKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseOperationKernel(const NEElementwiseOperationKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseOperationKernel &operator=(const NEElementwiseOperationKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEElementwiseOperationKernel(NEElementwiseOperationKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEElementwiseOperationKernel &operator=(NEElementwiseOperationKernel &&) = default;
+ /** Default destructor */
+ ~NEElementwiseOperationKernel() = default;
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+protected:
+ /** Validate the argument passed to the kernel
+ *
+ * @param[in] input1 First tensor input. Data types supported: S16/F16/S32/F32.
+ * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor. Data types supported: Same as @p input1.
+ */
+ virtual Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) = 0;
+
+ /** Commmon configure function for element-wise operators with no additional options (e.g. Min, Max, SquaredDiff)
+ *
+ */
+ template <ArithmeticOperation op>
+ void configure_common(const ITensor *input1, const ITensor *input2, ITensor *output);
+
+ ArithmeticOperation _op; // Code of the operation to execute
+
+private:
+ /** Common signature for all the specialised add functions
+ *
+ * @param[in] input1 An input tensor. Data types supported: S16/F16/S32/F32
+ * @param[in] input2 An input tensor. Data types supported: S16/F16/S32/F32
+ * @param[out] output The output tensor. Data types supported: S16/F16/S32/F32
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using ElementwiseFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window);
+ /** Add function to use for the particular tensor types passed to configure() */
+ ElementwiseFunction *_func;
+ const ITensor *_input1;
+ const ITensor *_input2;
+ ITensor *_output;
+};
+
+class NEArithmeticOperationKernel : public NEElementwiseOperationKernel
+{
+public:
+ NEArithmeticOperationKernel()
+ : NEElementwiseOperationKernel()
+ {
+ }
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
+ *
+ * @param[in] op Arithmetic operation to be executed.
+ * @param[in] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32.
+ * @param[in] input1 First tensor input. Data types supported: Same as @p input1.
+ * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor. Data types supported: Same as @p input1.
+ */
+ void configure(ArithmeticOperation op, const ITensor *input1, const ITensor *input2, ITensor *output);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
+ *
+ * @param[in] op Arithmetic operation to be executed.
+ * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
+ * @param[in] input1 First tensor input info. Data types supported: Same as @p input1
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ *
+ * @return a Status
+ */
+ static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+
+protected:
+ // Inherited methods overridden:
+ Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) override;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H__ */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
index 77787afcf4..46c8937adc 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
@@ -44,5 +44,6 @@
#include "arm_compute/core/NEON/wrapper/intrinsics/padd.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/pow.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/store.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/sub.h"
#endif /* __ARM_COMPUTE_WRAPPER_INTRINSICS_H__ */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/load.h b/arm_compute/core/NEON/wrapper/intrinsics/load.h
index b5d9ed2a35..500ec78df6 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/load.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/load.h
@@ -67,7 +67,6 @@ VLOADQ_IMPL(float, float32x4_t, f32)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
VLOADQ_IMPL(float16_t, float16x8_t, f16)
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
#undef VLOAD_IMPL
} // namespace wrapper
} // namespace arm_compute
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/sub.h b/arm_compute/core/NEON/wrapper/intrinsics/sub.h
new file mode 100644
index 0000000000..8119429d43
--- /dev/null
+++ b/arm_compute/core/NEON/wrapper/intrinsics/sub.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_WRAPPER_SUB_H__
+#define __ARM_COMPUTE_WRAPPER_SUB_H__
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace wrapper
+{
+#define VSUB_IMPL(stype, vtype, prefix, postfix) \
+ inline vtype vsub(const vtype &a, const vtype &b) \
+ { \
+ return prefix##_##postfix(a, b); \
+ }
+
+VSUB_IMPL(uint8x8_t, uint8x8_t, vsub, u8)
+VSUB_IMPL(int8x8_t, int8x8_t, vsub, s8)
+VSUB_IMPL(uint16x4_t, uint16x4_t, vsub, u16)
+VSUB_IMPL(int16x4_t, int16x4_t, vsub, s16)
+VSUB_IMPL(uint32x2_t, uint32x2_t, vsub, u32)
+VSUB_IMPL(int32x2_t, int32x2_t, vsub, s32)
+VSUB_IMPL(uint64x1_t, uint64x1_t, vsub, u64)
+VSUB_IMPL(int64x1_t, int64x1_t, vsub, s64)
+VSUB_IMPL(float32x2_t, float32x2_t, vsub, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VSUB_IMPL(float16x4_t, float16x4_t, vsub, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+VSUB_IMPL(uint8x16_t, uint8x16_t, vsubq, u8)
+VSUB_IMPL(int8x16_t, int8x16_t, vsubq, s8)
+VSUB_IMPL(uint16x8_t, uint16x8_t, vsubq, u16)
+VSUB_IMPL(int16x8_t, int16x8_t, vsubq, s16)
+VSUB_IMPL(uint32x4_t, uint32x4_t, vsubq, u32)
+VSUB_IMPL(int32x4_t, int32x4_t, vsubq, s32)
+VSUB_IMPL(uint64x2_t, uint64x2_t, vsubq, u64)
+VSUB_IMPL(int64x2_t, int64x2_t, vsubq, s64)
+VSUB_IMPL(float32x4_t, float32x4_t, vsubq, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VSUB_IMPL(float16x8_t, float16x8_t, vsubq, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#undef vsub_IMPL
+} // namespace wrapper
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_WRAPPER_SUB_H__ */
diff --git a/arm_compute/core/Rounding.h b/arm_compute/core/Rounding.h
index f95058c567..a9e9a713e4 100644
--- a/arm_compute/core/Rounding.h
+++ b/arm_compute/core/Rounding.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,7 +29,7 @@ namespace arm_compute
/** Rounding method */
enum class RoundingPolicy
{
- TO_ZERO, /**< Truncates the least significand values that are lost in operations. */
+ TO_ZERO, /**< Truncates the least significant values that are lost in operations. */
TO_NEAREST_UP, /**< Rounds to nearest value; half rounds away from zero */
TO_NEAREST_EVEN, /**< Rounds to nearest value; half rounds to nearest even */
};
diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h
index 9a023ec51f..2ffd92d541 100644
--- a/arm_compute/runtime/NEON/NEFunctions.h
+++ b/arm_compute/runtime/NEON/NEFunctions.h
@@ -56,6 +56,7 @@
#include "arm_compute/runtime/NEON/functions/NEDerivative.h"
#include "arm_compute/runtime/NEON/functions/NEDilate.h"
#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEElementwiseOperations.h"
#include "arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h"
#include "arm_compute/runtime/NEON/functions/NEErode.h"
#include "arm_compute/runtime/NEON/functions/NEFastCorners.h"
diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
new file mode 100644
index 0000000000..5cbf1237e4
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INNEUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY NEAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARI SING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEELEMENTWISEOPERATIONS_H__
+#define __ARM_COMPUTE_NEELEMENTWISEOPERATIONS_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to run @ref NEArithmeticOperationKernel for max
+ *
+ * @note The tensor data type for the inputs must be S16/F16/S32/F32.
+ * @note The function performs a max operation between two tensors.
+ */
+class NEElementwiseMax : public INESimpleFunction
+{
+public:
+ /** Initialise the kernel's inputs, output and conversion policy.
+ *
+ * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32.
+ * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
+ * @param[out] output Output tensor. Data types supported: Same as @p input1.
+ */
+ void configure(ITensor *input1, ITensor *input2, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for max
+ *
+ * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+};
+
+/** Basic function to run @ref NEArithmeticOperationKernel for min
+ *
+ * @note The tensor data type for the inputs must be S16/F16/S32/F32.
+ * @note The function performs a max operation between two tensors.
+ */
+class NEElementwiseMin : public INESimpleFunction
+{
+public:
+ /** Initialise the kernel's inputs, output and conversion policy.
+ *
+ * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32.
+ * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
+ * @param[out] output Output tensor. Data types supported: Same as @p input1.
+ */
+ void configure(ITensor *input1, ITensor *input2, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for min
+ *
+ * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+};
+
+/** Basic function to run @ref NEArithmeticOperationKernel for squared difference
+ *
+ * @note The tensor data type for the inputs must be S16/F16/S32/F32.
+ * @note The function performs a squared different operation between two tensors (i.e., out[i] = (in1[i] - in2[i])^2
+ */
+class NEElementwiseSquaredDiff : public INESimpleFunction
+{
+public:
+ /** Initialise the kernel's inputs, output and conversion policy.
+ *
+ * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32.
+ * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
+ * @param[out] output Output tensor. Data types supported: Same as @p input1.
+ */
+ void configure(ITensor *input1, ITensor *input2, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for squared difference
+ *
+ * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEELEMENTWISEOPERATIONS_H__ */