diff options
author | Sang-Hoon Park <sang-hoon.park@arm.com> | 2020-03-13 14:56:05 +0000 |
---|---|---|
committer | Sang-Hoon Park <sang-hoon.park@arm.com> | 2020-04-07 09:00:09 +0000 |
commit | 0d008f77b0085619c446d0ab5dc1228a80776706 (patch) | |
tree | e1f6e91bf8da63e8ef98e11ab8eb6a6972a284f2 /arm_compute/core/NEON/wrapper/intrinsics/getlane.h | |
parent | 4df2cf3177129d10500d30056bf8404418f703d6 (diff) | |
download | ComputeLibrary-0d008f77b0085619c446d0ab5dc1228a80776706.tar.gz |
COMPMID-3281: Implement QSYMM16 Layer Normalization for NEON QLSTM
- Reference kernel is modified to use the same algorithm as NEON kernel.
- NEON kernel is implemented.
- Tests for validation and run are added.
Change-Id: I3533bc2bd12c6e9cc75d837ecf193f74ceddf796
Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2948
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/wrapper/intrinsics/getlane.h')
-rw-r--r-- | arm_compute/core/NEON/wrapper/intrinsics/getlane.h | 17 |
1 files changed, 16 insertions, 1 deletions
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/getlane.h b/arm_compute/core/NEON/wrapper/intrinsics/getlane.h index 5cd390fee4..533bf63603 100644 --- a/arm_compute/core/NEON/wrapper/intrinsics/getlane.h +++ b/arm_compute/core/NEON/wrapper/intrinsics/getlane.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -185,6 +185,20 @@ VGETLANE_IMPL_4(float16_t, float16x4_t, f16) } \ } +#define VGETQLANE_IMPL_2(stype, vtype, postfix) \ + inline stype vgetlane(const vtype vector, const unsigned int lane) \ + { \ + switch(lane) \ + { \ + case 0: \ + return vgetq_lane_##postfix(vector, 0); \ + case 1: \ + return vgetq_lane_##postfix(vector, 1); \ + default: \ + ARM_COMPUTE_ERROR("Invalid lane"); \ + } \ + } + VGETQLANE_IMPL_16(uint8_t, uint8x16_t, u8) VGETQLANE_IMPL_16(int8_t, int8x16_t, s8) VGETQLANE_IMPL_8(uint16_t, uint16x8_t, u16) @@ -192,6 +206,7 @@ VGETQLANE_IMPL_8(int16_t, int16x8_t, s16) VGETQLANE_IMPL_4(uint32_t, uint32x4_t, u32) VGETQLANE_IMPL_4(int32_t, int32x4_t, s32) VGETQLANE_IMPL_4(float, float32x4_t, f32) +VGETQLANE_IMPL_2(int64_t, int64x2_t, s64) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC VGETQLANE_IMPL_8(float16_t, float16x8_t, f16) #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC |