aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorGeorge Wort <george.wort@arm.com>2019-01-15 11:00:29 +0000
committerGeorge Wort <george.wort@arm.com>2019-01-21 16:37:31 +0000
commita1e7e2818ab282e4d3b707feb5783b4bd4fbe45b (patch)
tree30c4395d0b18e90f5e1cbbf5ee1612c2ea679774 /arm_compute
parentd88590f4022bfb6eda3bad4fa599727bab723667 (diff)
downloadComputeLibrary-a1e7e2818ab282e4d3b707feb5783b4bd4fbe45b.tar.gz
COMPMID-1768: NEON: Implement RealDiv
Change-Id: I0868669f7b733df141794fba1d79436e7581bd3a Reviewed-on: https://review.mlplatform.org/426 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h41
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/div.h73
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h1
-rw-r--r--arm_compute/runtime/NEON/functions/NEElementwiseOperations.h28
4 files changed, 134 insertions, 9 deletions
diff --git a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h
index f02f71b50e..1271da75a5 100644
--- a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h
+++ b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h
@@ -94,10 +94,8 @@ protected:
class NEArithmeticOperationKernel : public NEElementwiseOperationKernel
{
public:
- NEArithmeticOperationKernel()
- : NEElementwiseOperationKernel()
- {
- }
+ /** Default constructor */
+ NEArithmeticOperationKernel() = default;
/** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
*
@@ -126,13 +124,40 @@ protected:
static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
};
+class NEDivisionOperationKernel : public NEArithmeticOperationKernel
+{
+public:
+ /** Default constructor */
+ NEDivisionOperationKernel() = default;
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
+ *
+ * @param[in] input1 First tensor input. Data types supported: F16/F32.
+ * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor. Data types supported: Same as @p input1.
+ */
+ void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
+ *
+ * @param[in] input1 First tensor input info. Data types supported: F16/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ *
+ * @return a Status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+
+protected:
+ // Inherited methods overridden:
+ static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
+};
+
class NEComparisonOperationKernel : public NEElementwiseOperationKernel
{
public:
- NEComparisonOperationKernel()
- : NEElementwiseOperationKernel()
- {
- }
+ /** Default constructor */
+ NEComparisonOperationKernel() = default;
/** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel
*
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/div.h b/arm_compute/core/NEON/wrapper/intrinsics/div.h
new file mode 100644
index 0000000000..d9f80d061f
--- /dev/null
+++ b/arm_compute/core/NEON/wrapper/intrinsics/div.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_WRAPPER_DIV_H__
+#define __ARM_COMPUTE_WRAPPER_DIV_H__
+
+#include "arm_compute/core/NEON/NEMath.h"
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace wrapper
+{
+#ifdef __aarch64__
+
+#define VDIV_IMPL(stype, vtype, prefix, postfix) \
+ inline vtype vdiv(const vtype &a, const vtype &b) \
+ { \
+ return prefix##_##postfix(a, b); \
+ }
+VDIV_IMPL(float32x2_t, float32x2_t, vdiv, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VDIV_IMPL(float16x4_t, float16x4_t, vdiv, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+VDIV_IMPL(float32x4_t, float32x4_t, vdivq, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VDIV_IMPL(float16x8_t, float16x8_t, vdivq, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#else // __aarch64__
+
+#define VDIV_IMPL(stype, vtype, mul_prefix, inv_prefix, postfix) \
+ inline vtype vdiv(const vtype &a, const vtype &b) \
+ { \
+ return mul_prefix##_##postfix(a, inv_prefix##_##postfix(b)); \
+ }
+VDIV_IMPL(float32x2_t, float32x2_t, vmul, vinv, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VDIV_IMPL(float16x4_t, float16x4_t, vmul, vinv, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+VDIV_IMPL(float32x4_t, float32x4_t, vmulq, vinvq, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VDIV_IMPL(float16x8_t, float16x8_t, vmulq, vinvq, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#endif // __aarch64__
+
+#undef VDIV_IMPL
+} // namespace wrapper
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_WRAPPER_DIV_H__ */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
index c8f4a6e041..012f6868d1 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
@@ -33,6 +33,7 @@
#include "arm_compute/core/NEON/wrapper/intrinsics/cgt.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/clt.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/combine.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/div.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/dup_n.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/exp.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/gethigh.h"
diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
index cd9ed24bee..ca3717a709 100644
--- a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
+++ b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
@@ -109,7 +109,33 @@ public:
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
};
-/** Basic function to run @ref NEComparisonOperationKernel
+/** Basic function to run @ref NEArithmeticOperationKernel for division
+ *
+ * @note The tensor data type for the inputs must be F16/F32.
+ * @note The function performs a squared different operation between two tensors (i.e., out[i] = in1[i] / in2[i])
+ */
+class NEElementwiseDivision : public INESimpleFunction
+{
+public:
+ /** Initialise the kernel's inputs, output and conversion policy.
+ *
+ * @param[in, out] input1 First tensor input. Data types supported: F16/F32.
+ * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
+ * @param[out] output Output tensor. Data types supported: Same as @p input1.
+ */
+ void configure(ITensor *input1, ITensor *input2, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for division
+ *
+ * @param[in] input1 First tensor input info. Data types supported: F16/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+};
+
+/** Basic function to run @ref NEComparisonOperationKernel.
*
* @note The tensor data type for the inputs must be QASYMM8/S16/F16/S32/F32.
* @note The function performs a comparison operation between two tensors.