aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON/wrapper
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2018-11-22 11:22:18 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2018-11-23 17:02:27 +0000
commit0c71d0ba75a11720e39e2a7163e993d51350683d (patch)
tree089f7b293802944a7672c85f637141aad0b55c75 /arm_compute/core/NEON/wrapper
parentaaa27189e0e75c3ebad57854ac8901d0140677ac (diff)
downloadComputeLibrary-0c71d0ba75a11720e39e2a7163e993d51350683d.tar.gz
COMPMID-1647 NENormalizationLayer IN_MAP_2D support for NHWC for FP32/FP16
Change-Id: Id74cc7ba8e5cabee6acd3798d4779f88b1f00a9b
Diffstat (limited to 'arm_compute/core/NEON/wrapper')
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h1
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/mla.h13
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/pow.h48
3 files changed, 62 insertions, 0 deletions
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
index 7ea0aba565..77787afcf4 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
@@ -42,6 +42,7 @@
#include "arm_compute/core/NEON/wrapper/intrinsics/mul.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/neg.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/padd.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/pow.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/store.h"
#endif /* __ARM_COMPUTE_WRAPPER_INTRINSICS_H__ */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/mla.h b/arm_compute/core/NEON/wrapper/intrinsics/mla.h
index 32a650b57f..db6d7b957a 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/mla.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/mla.h
@@ -35,6 +35,13 @@ namespace wrapper
{ \
return prefix##_##postfix(a, b, c); \
}
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#define VMLA_IMPL2(stype, vtype, prefix1, prefix2, postfix) \
+ inline vtype vmla(const vtype &a, const vtype &b, const vtype &c) \
+ { \
+ return prefix1##_##postfix(a, prefix2##_##postfix(b, c)); \
+ }
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
VMLA_IMPL(uint8x8_t, uint8x8_t, vmla, u8)
VMLA_IMPL(int8x8_t, int8x8_t, vmla, s8)
@@ -43,6 +50,9 @@ VMLA_IMPL(int16x4_t, int16x4_t, vmla, s16)
VMLA_IMPL(uint32x2_t, uint32x2_t, vmla, u32)
VMLA_IMPL(int32x2_t, int32x2_t, vmla, s32)
VMLA_IMPL(float32x2_t, float32x2_t, vmla, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VMLA_IMPL2(float16x4_t, float16x4_t, vadd, vmul, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
VMLA_IMPL(uint8x16_t, uint8x16_t, vmlaq, u8)
VMLA_IMPL(int8x16_t, int8x16_t, vmlaq, s8)
@@ -51,6 +61,9 @@ VMLA_IMPL(int16x8_t, int16x8_t, vmlaq, s16)
VMLA_IMPL(uint32x4_t, uint32x4_t, vmlaq, u32)
VMLA_IMPL(int32x4_t, int32x4_t, vmlaq, s32)
VMLA_IMPL(float32x4_t, float32x4_t, vmlaq, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VMLA_IMPL2(float16x8_t, float16x8_t, vaddq, vmulq, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
#undef VMLA_IMPL
} // namespace wrapper
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/pow.h b/arm_compute/core/NEON/wrapper/intrinsics/pow.h
new file mode 100644
index 0000000000..865df416ee
--- /dev/null
+++ b/arm_compute/core/NEON/wrapper/intrinsics/pow.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_WRAPPER_POW_H__
+#define __ARM_COMPUTE_WRAPPER_POW_H__
+
+#include "arm_compute/core/NEON/NEMath.h"
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace wrapper
+{
+#define VPOW_IMPL(vtype, prefix, postfix) \
+ inline vtype vpow(const vtype &a, const vtype &b) \
+ { \
+ return prefix##_##postfix(a, b); \
+ }
+
+VPOW_IMPL(float32x4_t, vpowq, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VPOW_IMPL(float16x8_t, vpowq, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#undef VPOW_IMPL
+} // namespace wrapper
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_WRAPPER_POW_H__ */