From 40b441905760846e9fdaca283a4a4de038a6ef0d Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Thu, 22 Sep 2022 10:24:23 +0100 Subject: Optimize CPU add layer on quantized data * Use fixed-point arithmetic where possible. * Various optimization for the FP32-based implementation. This implementation is kept as the fall-back solution in case of unrealistic quantization parameters that exceed the range of fixed-point solution. Resolves: COMPMID-5458 Signed-off-by: Viet-Hoa Do Change-Id: I221d2d3801ecaae4fe0b7cf6ae8ef00ca3743665 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8317 Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- tests/validation/NEON/ArithmeticAddition.cpp | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'tests/validation/NEON') diff --git a/tests/validation/NEON/ArithmeticAddition.cpp b/tests/validation/NEON/ArithmeticAddition.cpp index f94e329c9c..322cd7bea5 100644 --- a/tests/validation/NEON/ArithmeticAddition.cpp +++ b/tests/validation/NEON/ArithmeticAddition.cpp @@ -89,7 +89,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( } DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, concat(concat( - combine(combine(framework::dataset::make("CpuExt", std::string("NEON")), + combine(combine(combine(framework::dataset::make("CpuExt", std::string("NEON")), framework::dataset::make("DataType", { DataType::F32, DataType::F16, DataType::U8, @@ -100,21 +100,24 @@ DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, concat(concat( DataType::QSYMM16 })), framework::dataset::make("CanInterpretAs1D", {true, false})), - combine(combine(framework::dataset::make("CpuExt", std::string("SVE")), + framework::dataset::make("CanUseFixedpoint", {true, false})), + combine(combine(combine(framework::dataset::make("CpuExt", std::string("SVE")), framework::dataset::make("DataType", { DataType::F32, DataType::F16, DataType::U8, DataType::S16, DataType::S32 })), - framework::dataset::make("CanInterpretAs1D", {true, false}))), - combine(combine(framework::dataset::make("CpuExt", std::string("SVE2")), + framework::dataset::make("CanInterpretAs1D", {true, false})), + framework::dataset::make("CanUseFixedpoint", {true, false}))), + combine(combine(combine(framework::dataset::make("CpuExt", std::string("SVE2")), framework::dataset::make("DataType", { DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM16 })), - framework::dataset::make("CanInterpretAs1D", {false}))), - cpu_ext, data_type, can_interpret_inputs_as_1d_array) + framework::dataset::make("CanInterpretAs1D", {false})), + framework::dataset::make("CanUseFixedpoint", {true, false}))), + cpu_ext, data_type, can_interpret_inputs_as_1d_array, can_use_fixedpoint) { using namespace cpu::kernels; @@ -124,18 +127,23 @@ DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, concat(concat( cpu_isa.sve2 = (cpu_ext == "SVE2"); cpu_isa.fp16 = (data_type == DataType::F16); - const auto *selected_impl = CpuAddKernel::get_implementation(CpuAddKernelDataTypeISASelectorData{data_type, cpu_isa, can_interpret_inputs_as_1d_array}, cpu::KernelSelectionType::Preferred); + const auto *selected_impl = CpuAddKernel::get_implementation(CpuAddKernelDataTypeISASelectorData{data_type, cpu_isa, can_interpret_inputs_as_1d_array, can_use_fixedpoint}, cpu::KernelSelectionType::Preferred); ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl); bool float_or_integer = (data_type == DataType::F32 || data_type == DataType::F16 || data_type == DataType::U8 || data_type == DataType::S16 || data_type == DataType::S32); + bool qasymm8_any = (data_type == DataType::QASYMM8 || data_type == DataType::QASYMM8_SIGNED); std::string expected; if(can_interpret_inputs_as_1d_array && float_or_integer) { expected = "neon_" + cpu_impl_dt(data_type) + "_add_as_1d_array"; } + else if(qasymm8_any && can_use_fixedpoint) + { + expected = "neon_" + cpu_impl_dt(data_type) + "_add_fixedpoint"; + } else { expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_add"; -- cgit v1.2.1