aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON/wrapper/intrinsics/getlane.h
diff options
context:
space:
mode:
authorSang-Hoon Park <sang-hoon.park@arm.com>2020-03-13 14:56:05 +0000
committerSang-Hoon Park <sang-hoon.park@arm.com>2020-04-07 09:00:09 +0000
commit0d008f77b0085619c446d0ab5dc1228a80776706 (patch)
treee1f6e91bf8da63e8ef98e11ab8eb6a6972a284f2 /arm_compute/core/NEON/wrapper/intrinsics/getlane.h
parent4df2cf3177129d10500d30056bf8404418f703d6 (diff)
downloadComputeLibrary-0d008f77b0085619c446d0ab5dc1228a80776706.tar.gz
COMPMID-3281: Implement QSYMM16 Layer Normalization for NEON QLSTM
- Reference kernel is modified to use the same algorithm as NEON kernel. - NEON kernel is implemented. - Tests for validation and run are added. Change-Id: I3533bc2bd12c6e9cc75d837ecf193f74ceddf796 Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2948 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/wrapper/intrinsics/getlane.h')
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/getlane.h17
1 files changed, 16 insertions, 1 deletions
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/getlane.h b/arm_compute/core/NEON/wrapper/intrinsics/getlane.h
index 5cd390fee4..533bf63603 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/getlane.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/getlane.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -185,6 +185,20 @@ VGETLANE_IMPL_4(float16_t, float16x4_t, f16)
} \
}
+#define VGETQLANE_IMPL_2(stype, vtype, postfix) \
+ inline stype vgetlane(const vtype vector, const unsigned int lane) \
+ { \
+ switch(lane) \
+ { \
+ case 0: \
+ return vgetq_lane_##postfix(vector, 0); \
+ case 1: \
+ return vgetq_lane_##postfix(vector, 1); \
+ default: \
+ ARM_COMPUTE_ERROR("Invalid lane"); \
+ } \
+ }
+
VGETQLANE_IMPL_16(uint8_t, uint8x16_t, u8)
VGETQLANE_IMPL_16(int8_t, int8x16_t, s8)
VGETQLANE_IMPL_8(uint16_t, uint16x8_t, u16)
@@ -192,6 +206,7 @@ VGETQLANE_IMPL_8(int16_t, int16x8_t, s16)
VGETQLANE_IMPL_4(uint32_t, uint32x4_t, u32)
VGETQLANE_IMPL_4(int32_t, int32x4_t, s32)
VGETQLANE_IMPL_4(float, float32x4_t, f32)
+VGETQLANE_IMPL_2(int64_t, int64x2_t, s64)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
VGETQLANE_IMPL_8(float16_t, float16x8_t, f16)
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC