From 1fe48cafde21a316011fff32a5b0f98a74fbe2b9 Mon Sep 17 00:00:00 2001 From: Ethan Doe Date: Wed, 1 Mar 2023 23:19:26 +0000 Subject: NEGEMMLowpMatrixMultiplyCore should be configured for optimized int8 kernel. Currently the validation routine incorrectly prevents optimized INT8 Gemm kernel from being used if the input is QASYMM8 and output type is S32. This change allows QASYMM8 input and S32 output types to leverage optimized kernel. Signed-off-by: Ethan Doe Change-Id: I65b060f522795db07d6d4df86fb7c6ddd1c626d4 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9250 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir Reviewed-by: Pablo Marquez Tello Benchmark: Arm Jenkins --- src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp index bf3ec5a1ac..9af98be41d 100644 --- a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp +++ b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp @@ -752,7 +752,8 @@ Status CpuGemmAssemblyDispatch::validate(const ITensorInfo *a, const ITensorInfo ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::BFLOAT16 && d->data_type() != DataType::F32, "Only F32 output supported for BFLOAT16 input"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::U8 && d->data_type() != DataType::U32, "Only U32 output supported for U8 input"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::S8 && d->data_type() != DataType::S32, "Only S32 output supported for S8 input"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::QASYMM8 && d->data_type() != DataType::QASYMM8, "Only QASYMM8 output supported for QASYMM8 input"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::QASYMM8 && (d->data_type() != DataType::QASYMM8 && d->data_type() != DataType::S32), + "Only QASYMM8/S32 output supported for QASYMM8 input"); arm_compute::WeightFormat expected_weight_format; const Status ret = CpuGemmAssemblyDispatch::has_opt_impl(expected_weight_format, a, b, c, d, info); if((bool)ret && expected_weight_format != arm_compute::WeightFormat::ANY) -- cgit v1.2.1