From 6e09e1404c635d948cf20eb6b4b5747dfb6656f2 Mon Sep 17 00:00:00 2001 From: Murray Kornelsen Date: Wed, 13 Jul 2022 21:40:26 -0400 Subject: INT8 Quantized MeanStdDevNorm (LayerNorm) Implements LayerNorm for qasymm8 tensors. Uses uint8x16 loads and stores. Summation is performed in integer arithmetic (vpaddl) Normalization is performed in float32 before requantizing back to int8. Signed-off-by: Murray Kornelsen Change-Id: I2407c8b34717fb47adab98791bd76fb8a3c62f4a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7922 Comments-Addressed: Pablo Marquez Tello Comments-Addressed: Arm Jenkins Reviewed-by: Viet-Hoa Do Reviewed-by: Pablo Marquez Tello Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/core/NEON') diff --git a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp index 7d8fc7ec7f..37e88a8565 100644 --- a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp @@ -55,7 +55,7 @@ struct MeanStdDevNormKernel MeanStdDevNormUKernelPtr ukernel; }; -static const MeanStdDevNormKernel available_kernels[] = +static const std::vector available_kernels = { { "fp32_neon_meanstddevnorm", @@ -69,6 +69,11 @@ static const MeanStdDevNormKernel available_kernels[] = REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_meanstddevnorm) }, #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + { + "qasymm8_neon_meanstddevnorm", + [](const MeanStdDevNormSelectorData & data) { return data.dt == DataType::QASYMM8; }, + REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_meanstddevnorm) + }, }; /** Micro-kernel selector @@ -95,7 +100,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, f ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_dimensions() > 2, "Input tensor cannot have more than 2 dimensions"); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32, DataType::QASYMM8); // Checks performed when output is configured if((output != nullptr) && (output->total_size() != 0)) -- cgit v1.2.1