aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/core/NEON')
-rw-r--r--arm_compute/core/NEON/kernels/NEReductionOperationKernel.h4
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/gethigh.h53
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/getlane.h204
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/getlow.h53
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h6
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/load.h6
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/movl.h49
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/movn.h62
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/mul.h6
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/padd.h53
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/store.h6
11 files changed, 500 insertions, 2 deletions
diff --git a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h b/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h
index a20cd46434..a4cb330445 100644
--- a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h
+++ b/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h
@@ -53,7 +53,7 @@ public:
/** Set the source, destination of the kernel
*
- * @param[in] input Source tensor. Data type supported: F32. Data layouts supported: NCHW.
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW.
* @param[out] output Destination tensor.Data types and data layouts supported: same as @p input.
* Output will have the same number of dimensions as input.
* @param[in] axis Axis along which to reduce. Supported reduction axis : 0
@@ -63,7 +63,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperationKernel.
*
- * @param[in] input Source tensor info. Data type supported: F32. Data layouts supported: NCHW.
+ * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW.
* @param[in] output Destination tensor info.Data types and data layouts supported: same as @p input.
* Output will have the same number of dimensions as input.
* @param[in] axis Axis along which to reduce. Supported reduction axis : 0
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/gethigh.h b/arm_compute/core/NEON/wrapper/intrinsics/gethigh.h
new file mode 100644
index 0000000000..47b0116b84
--- /dev/null
+++ b/arm_compute/core/NEON/wrapper/intrinsics/gethigh.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_WRAPPER_GET_HIGH_H__
+#define __ARM_COMPUTE_WRAPPER_GET_HIGH_H__
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace wrapper
+{
+#define VGETHIGH_IMPL(half_vtype, vtype, postfix) \
+ inline half_vtype vgethigh(const vtype val) \
+ { \
+ return vget_high_##postfix(val); \
+ }
+
+VGETHIGH_IMPL(uint8x8_t, uint8x16_t, u8)
+VGETHIGH_IMPL(int8x8_t, int8x16_t, s8)
+VGETHIGH_IMPL(uint16x4_t, uint16x8_t, u16)
+VGETHIGH_IMPL(int16x4_t, int16x8_t, s16)
+VGETHIGH_IMPL(uint32x2_t, uint32x4_t, u32)
+VGETHIGH_IMPL(int32x2_t, int32x4_t, s32)
+VGETHIGH_IMPL(float32x2_t, float32x4_t, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VGETHIGH_IMPL(float16x4_t, float16x8_t, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#undef VGETHIGH_IMPL
+} // namespace wrapper
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_WRAPPER_GET_HIGH_H__ */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/getlane.h b/arm_compute/core/NEON/wrapper/intrinsics/getlane.h
new file mode 100644
index 0000000000..107ce44e0c
--- /dev/null
+++ b/arm_compute/core/NEON/wrapper/intrinsics/getlane.h
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_WRAPPER_GET_LANE_H__
+#define __ARM_COMPUTE_WRAPPER_GET_LANE_H__
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace wrapper
+{
+#define VGETLANE_IMPL_8(stype, vtype, postfix) \
+ inline stype vgetlane(const vtype vector, const int lane) \
+ { \
+ switch(lane) \
+ { \
+ case 0: \
+ return vget_lane_##postfix(vector, 0); \
+ case 1: \
+ return vget_lane_##postfix(vector, 1); \
+ case 2: \
+ return vget_lane_##postfix(vector, 2); \
+ case 3: \
+ return vget_lane_##postfix(vector, 3); \
+ case 4: \
+ return vget_lane_##postfix(vector, 4); \
+ case 5: \
+ return vget_lane_##postfix(vector, 5); \
+ case 6: \
+ return vget_lane_##postfix(vector, 6); \
+ case 7: \
+ return vget_lane_##postfix(vector, 7); \
+ default: \
+ ARM_COMPUTE_ERROR("Invalid lane"); \
+ } \
+ }
+
+#define VGETLANE_IMPL_4(stype, vtype, postfix) \
+ inline stype vgetlane(const vtype vector, const int lane) \
+ { \
+ switch(lane) \
+ { \
+ case 0: \
+ return vget_lane_##postfix(vector, 0); \
+ case 1: \
+ return vget_lane_##postfix(vector, 1); \
+ case 2: \
+ return vget_lane_##postfix(vector, 2); \
+ case 3: \
+ return vget_lane_##postfix(vector, 3); \
+ default: \
+ ARM_COMPUTE_ERROR("Invalid lane"); \
+ } \
+ }
+
+#define VGETLANE_IMPL_2(stype, vtype, postfix) \
+ inline stype vgetlane(const vtype vector, const int lane) \
+ { \
+ switch(lane) \
+ { \
+ case 0: \
+ return vget_lane_##postfix(vector, 0); \
+ case 1: \
+ return vget_lane_##postfix(vector, 1); \
+ default: \
+ ARM_COMPUTE_ERROR("Invalid lane"); \
+ } \
+ }
+
+VGETLANE_IMPL_8(uint8_t, uint8x8_t, u8)
+VGETLANE_IMPL_8(int8_t, int8x8_t, s8)
+VGETLANE_IMPL_4(uint16_t, uint16x4_t, u16)
+VGETLANE_IMPL_4(int16_t, int16x4_t, s16)
+VGETLANE_IMPL_2(uint32_t, uint32x2_t, u32)
+VGETLANE_IMPL_2(int32_t, int32x2_t, s32)
+VGETLANE_IMPL_2(float, float32x2_t, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VGETLANE_IMPL_4(float16_t, float16x4_t, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#define VGETQLANE_IMPL_16(stype, vtype, postfix) \
+ inline stype vgetqlane(const vtype vector, const int lane) \
+ { \
+ switch(lane) \
+ { \
+ case 0: \
+ return vgetq_lane_##postfix(vector, 0); \
+ case 1: \
+ return vgetq_lane_##postfix(vector, 1); \
+ case 2: \
+ return vgetq_lane_##postfix(vector, 2); \
+ case 3: \
+ return vgetq_lane_##postfix(vector, 3); \
+ case 4: \
+ return vgetq_lane_##postfix(vector, 4); \
+ case 5: \
+ return vgetq_lane_##postfix(vector, 5); \
+ case 6: \
+ return vgetq_lane_##postfix(vector, 6); \
+ case 7: \
+ return vgetq_lane_##postfix(vector, 7); \
+ case 8: \
+ return vgetq_lane_##postfix(vector, 8); \
+ case 9: \
+ return vgetq_lane_##postfix(vector, 9); \
+ case 10: \
+ return vgetq_lane_##postfix(vector, 10); \
+ case 11: \
+ return vgetq_lane_##postfix(vector, 11); \
+ case 12: \
+ return vgetq_lane_##postfix(vector, 12); \
+ case 13: \
+ return vgetq_lane_##postfix(vector, 13); \
+ case 14: \
+ return vgetq_lane_##postfix(vector, 14); \
+ case 15: \
+ return vgetq_lane_##postfix(vector, 15); \
+ default: \
+ ARM_COMPUTE_ERROR("Invalid lane"); \
+ } \
+ }
+
+#define VGETQLANE_IMPL_8(stype, vtype, postfix) \
+ inline stype vgetqlane(const vtype vector, const int lane) \
+ { \
+ switch(lane) \
+ { \
+ case 0: \
+ return vgetq_lane_##postfix(vector, 0); \
+ case 1: \
+ return vgetq_lane_##postfix(vector, 1); \
+ case 2: \
+ return vgetq_lane_##postfix(vector, 2); \
+ case 3: \
+ return vgetq_lane_##postfix(vector, 3); \
+ case 4: \
+ return vgetq_lane_##postfix(vector, 4); \
+ case 5: \
+ return vgetq_lane_##postfix(vector, 5); \
+ case 6: \
+ return vgetq_lane_##postfix(vector, 6); \
+ case 7: \
+ return vgetq_lane_##postfix(vector, 7); \
+ default: \
+ ARM_COMPUTE_ERROR("Invalid lane"); \
+ } \
+ }
+
+#define VGETQLANE_IMPL_4(stype, vtype, postfix) \
+ inline stype vgetqlane(const vtype vector, const int lane) \
+ { \
+ switch(lane) \
+ { \
+ case 0: \
+ return vgetq_lane_##postfix(vector, 0); \
+ case 1: \
+ return vgetq_lane_##postfix(vector, 1); \
+ case 2: \
+ return vgetq_lane_##postfix(vector, 2); \
+ case 3: \
+ return vgetq_lane_##postfix(vector, 3); \
+ default: \
+ ARM_COMPUTE_ERROR("Invalid lane"); \
+ } \
+ }
+
+VGETQLANE_IMPL_16(uint8_t, uint8x16_t, u8)
+VGETQLANE_IMPL_16(int8_t, int8x16_t, s8)
+VGETQLANE_IMPL_8(uint16_t, uint16x8_t, u16)
+VGETQLANE_IMPL_8(int16_t, int16x8_t, s16)
+VGETQLANE_IMPL_4(uint32_t, uint32x4_t, u32)
+VGETQLANE_IMPL_4(int32_t, int32x4_t, s32)
+VGETQLANE_IMPL_4(float, float32x4_t, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VGETQLANE_IMPL_8(float16_t, float16x8_t, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#undef VGETLANE_IMPL_8
+#undef VGETLANE_IMPL_4
+#undef VGETLANE_IMPL_2
+} // namespace wrapper
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_WRAPPER_GET_LANE_H__ */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/getlow.h b/arm_compute/core/NEON/wrapper/intrinsics/getlow.h
new file mode 100644
index 0000000000..cc5d8bb2f2
--- /dev/null
+++ b/arm_compute/core/NEON/wrapper/intrinsics/getlow.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_WRAPPER_GET_LOW_H__
+#define __ARM_COMPUTE_WRAPPER_GET_LOW_H__
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace wrapper
+{
+#define VGETLOW_IMPL(half_vtype, vtype, postfix) \
+ inline half_vtype vgetlow(const vtype val) \
+ { \
+ return vget_low_##postfix(val); \
+ }
+
+VGETLOW_IMPL(uint8x8_t, uint8x16_t, u8)
+VGETLOW_IMPL(int8x8_t, int8x16_t, s8)
+VGETLOW_IMPL(uint16x4_t, uint16x8_t, u16)
+VGETLOW_IMPL(int16x4_t, int16x8_t, s16)
+VGETLOW_IMPL(uint32x2_t, uint32x4_t, u32)
+VGETLOW_IMPL(int32x2_t, int32x4_t, s32)
+VGETLOW_IMPL(float32x2_t, float32x4_t, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VGETLOW_IMPL(float16x4_t, float16x8_t, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#undef VGETLOW_IMPL
+} // namespace wrapper
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_WRAPPER_GET_LOW_H__ */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
index 58bfba9645..2e6fd75005 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
@@ -28,13 +28,19 @@
#include "arm_compute/core/NEON/wrapper/intrinsics/and.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/dup_n.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/exp.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/gethigh.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/getlane.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/getlow.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/inv.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/load.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/max.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/min.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/mla.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/movl.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/movn.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/mul.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/neg.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/padd.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/store.h"
#endif /* __ARM_COMPUTE_WRAPPER_INTRINSICS_H__ */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/load.h b/arm_compute/core/NEON/wrapper/intrinsics/load.h
index 442d857497..b5d9ed2a35 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/load.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/load.h
@@ -45,6 +45,9 @@ VLOAD_IMPL(int32_t, int32x2_t, s32)
//VLOAD_IMPL(uint64_t, uint64x1_t, u64)
//VLOAD_IMPL(int64_t, int64x1_t, s64)
VLOAD_IMPL(float, float32x2_t, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VLOAD_IMPL(float16_t, float16x4_t, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
#define VLOADQ_IMPL(stype, vtype, postfix) \
inline vtype vloadq(const stype *ptr) \
@@ -61,6 +64,9 @@ VLOADQ_IMPL(int32_t, int32x4_t, s32)
//VLOAD_IMPL(uint64_t, uint64x1_t, u64)
//VLOAD_IMPL(int64_t, int64x1_t, s64)
VLOADQ_IMPL(float, float32x4_t, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VLOADQ_IMPL(float16_t, float16x8_t, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
#undef VLOAD_IMPL
} // namespace wrapper
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/movl.h b/arm_compute/core/NEON/wrapper/intrinsics/movl.h
new file mode 100644
index 0000000000..728fe4e097
--- /dev/null
+++ b/arm_compute/core/NEON/wrapper/intrinsics/movl.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_WRAPPER_MOVL_H__
+#define __ARM_COMPUTE_WRAPPER_MOVL_H__
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace wrapper
+{
+#define VMOVL_IMPL(ptype, vtype, prefix, postfix) \
+ inline ptype vmovl(const vtype &a) \
+ { \
+ return prefix##_##postfix(a); \
+ }
+
+VMOVL_IMPL(uint16x8_t, uint8x8_t, vmovl, u8)
+VMOVL_IMPL(int16x8_t, int8x8_t, vmovl, s8)
+VMOVL_IMPL(uint32x4_t, uint16x4_t, vmovl, u16)
+VMOVL_IMPL(int32x4_t, int16x4_t, vmovl, s16)
+VMOVL_IMPL(uint64x2_t, uint32x2_t, vmovl, u32)
+VMOVL_IMPL(int64x2_t, int32x2_t, vmovl, s32)
+
+#undef VMOVL_IMPL
+} // namespace wrapper
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_WRAPPER_MOVL_H__ */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/movn.h b/arm_compute/core/NEON/wrapper/intrinsics/movn.h
new file mode 100644
index 0000000000..6ed262edb6
--- /dev/null
+++ b/arm_compute/core/NEON/wrapper/intrinsics/movn.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_WRAPPER_MOVN_H__
+#define __ARM_COMPUTE_WRAPPER_MOVN_H__
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace wrapper
+{
+#define VMOVN_IMPL(dtype, vtype, prefix, postfix) \
+ inline dtype vmovn(const vtype &a) \
+ { \
+ return prefix##_##postfix(a); \
+ }
+
+VMOVN_IMPL(uint32x2_t, uint64x2_t, vmovn, u64)
+VMOVN_IMPL(int32x2_t, int64x2_t, vmovn, s64)
+VMOVN_IMPL(uint16x4_t, uint32x4_t, vmovn, u32)
+VMOVN_IMPL(int16x4_t, int32x4_t, vmovn, s32)
+VMOVN_IMPL(uint8x8_t, uint16x8_t, vmovn, u16)
+VMOVN_IMPL(int8x8_t, int16x8_t, vmovn, s16)
+
+#define VQMOVN_IMPL(dtype, vtype, prefix, postfix) \
+ inline dtype vqmovn(const vtype &a) \
+ { \
+ return prefix##_##postfix(a); \
+ }
+
+VQMOVN_IMPL(uint32x2_t, uint64x2_t, vqmovn, u64)
+VQMOVN_IMPL(int32x2_t, int64x2_t, vqmovn, s64)
+VQMOVN_IMPL(uint16x4_t, uint32x4_t, vqmovn, u32)
+VQMOVN_IMPL(int16x4_t, int32x4_t, vqmovn, s32)
+VQMOVN_IMPL(uint8x8_t, uint16x8_t, vqmovn, u16)
+VQMOVN_IMPL(int8x8_t, int16x8_t, vqmovn, s16)
+
+#undef VMOVN_IMPL
+} // namespace wrapper
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_WRAPPER_MOVN_H__ */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/mul.h b/arm_compute/core/NEON/wrapper/intrinsics/mul.h
index c1908fc7b3..932b746965 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/mul.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/mul.h
@@ -43,6 +43,9 @@ VMUL_IMPL(int16x4_t, int16x4_t, vmul, s16)
VMUL_IMPL(uint32x2_t, uint32x2_t, vmul, u32)
VMUL_IMPL(int32x2_t, int32x2_t, vmul, s32)
VMUL_IMPL(float32x2_t, float32x2_t, vmul, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VMUL_IMPL(float16_t, float16x4_t, vmul, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
VMUL_IMPL(uint8_t, uint8x16_t, vmulq, u8)
VMUL_IMPL(int8_t, int8x16_t, vmulq, s8)
@@ -51,6 +54,9 @@ VMUL_IMPL(int16_t, int16x8_t, vmulq, s16)
VMUL_IMPL(uint32_t, uint32x4_t, vmulq, u32)
VMUL_IMPL(int32_t, int32x4_t, vmulq, s32)
VMUL_IMPL(float32x4_t, float32x4_t, vmulq, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VMUL_IMPL(float16_t, float16x8_t, vmulq, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
#undef VMUL_IMPL
} // namespace wrapper
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/padd.h b/arm_compute/core/NEON/wrapper/intrinsics/padd.h
new file mode 100644
index 0000000000..5ee2173df8
--- /dev/null
+++ b/arm_compute/core/NEON/wrapper/intrinsics/padd.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_WRAPPER_PADD_H__
+#define __ARM_COMPUTE_WRAPPER_PADD_H__
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace wrapper
+{
+#define VPADD_IMPL(stype, vtype, prefix, postfix) \
+ inline vtype vpadd(const vtype &a, const vtype &b) \
+ { \
+ return prefix##_##postfix(a, b); \
+ }
+
+VPADD_IMPL(uint8x8_t, uint8x8_t, vpadd, u8)
+VPADD_IMPL(int8x8_t, int8x8_t, vpadd, s8)
+VPADD_IMPL(uint16x4_t, uint16x4_t, vpadd, u16)
+VPADD_IMPL(int16x4_t, int16x4_t, vpadd, s16)
+VPADD_IMPL(uint32x2_t, uint32x2_t, vpadd, u32)
+VPADD_IMPL(int32x2_t, int32x2_t, vpadd, s32)
+VPADD_IMPL(float32x2_t, float32x2_t, vpadd, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VPADD_IMPL(float16x4_t, float16x4_t, vpadd, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#undef VPADD_IMPL
+} // namespace wrapper
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_WRAPPER_PADD_H__ */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/store.h b/arm_compute/core/NEON/wrapper/intrinsics/store.h
index be89602c09..35c427902e 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/store.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/store.h
@@ -45,6 +45,9 @@ VSTORE_IMPL(int32_t, int32x2_t, vst1, s32)
//VSTORE_IMPL(uint64_t, 1, vst1, u64)
//VSTORE_IMPL(int64_t, 1, vst1, s64)
VSTORE_IMPL(float, float32x2_t, vst1, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VSTORE_IMPL(float16_t, float16x4_t, vst1, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
VSTORE_IMPL(uint8_t, uint8x16_t, vst1q, u8)
VSTORE_IMPL(int8_t, int8x16_t, vst1q, s8)
@@ -55,6 +58,9 @@ VSTORE_IMPL(int32_t, int32x4_t, vst1q, s32)
//VSTORE_IMPL(uint64_t, 2, vst1q, u64)
//VSTORE_IMPL(int64_t, 2, vst1q, s64)
VSTORE_IMPL(float, float32x4_t, vst1q, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VSTORE_IMPL(float16_t, float16x8_t, vst1q, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
#undef VSTORE_IMPL
} // namespace wrapper