diff options
author | Murray Kornelsen <murray.kornelsen@mail.mcgill.ca> | 2022-07-13 21:40:26 -0400 |
---|---|---|
committer | Pablo Marquez Tello <pablo.tello@arm.com> | 2022-09-14 06:48:39 +0000 |
commit | 6e09e1404c635d948cf20eb6b4b5747dfb6656f2 (patch) | |
tree | 006199bd21b8a1330e1f1c86be60084bfb466706 /src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp | |
parent | a4814e8394ffdd7e268614d54cc22e30648f48ff (diff) | |
download | ComputeLibrary-6e09e1404c635d948cf20eb6b4b5747dfb6656f2.tar.gz |
INT8 Quantized MeanStdDevNorm (LayerNorm)
Implements LayerNorm for qasymm8 tensors.
Uses uint8x16 loads and stores.
Summation is performed in integer arithmetic (vpaddl)
Normalization is performed in float32 before requantizing back to int8.
Signed-off-by: Murray Kornelsen <murray.kornelsen@mail.mcgill.ca>
Change-Id: I2407c8b34717fb47adab98791bd76fb8a3c62f4a
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7922
Comments-Addressed: Pablo Marquez Tello <pablo.tello@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp')
-rw-r--r-- | src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp index 7d8fc7ec7f..37e88a8565 100644 --- a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp @@ -55,7 +55,7 @@ struct MeanStdDevNormKernel MeanStdDevNormUKernelPtr ukernel; }; -static const MeanStdDevNormKernel available_kernels[] = +static const std::vector<MeanStdDevNormKernel> available_kernels = { { "fp32_neon_meanstddevnorm", @@ -69,6 +69,11 @@ static const MeanStdDevNormKernel available_kernels[] = REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_meanstddevnorm) }, #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + { + "qasymm8_neon_meanstddevnorm", + [](const MeanStdDevNormSelectorData & data) { return data.dt == DataType::QASYMM8; }, + REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_meanstddevnorm) + }, }; /** Micro-kernel selector @@ -95,7 +100,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, f ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_dimensions() > 2, "Input tensor cannot have more than 2 dimensions"); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32, DataType::QASYMM8); // Checks performed when output is configured if((output != nullptr) && (output->total_size() != 0)) |