diff options
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp')
-rw-r--r-- | src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp | 39 |
1 files changed, 34 insertions, 5 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp index 8bde3a6943..b747a1cf84 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,7 @@ #ifdef __aarch64__ +#include "../performance_parameters.hpp" #include "../std_transforms_fixed.hpp" namespace arm_gemm { @@ -32,7 +33,7 @@ namespace arm_gemm { // Kernel definition void a64_gemm_u8_4x4(const uint8_t *Apanel, const uint8_t *Bpanel, uint32_t *Cpanel, int ablocks, int bblocks, int K); -class gemm_u8_4x4 { +class cls_a64_gemm_u8_4x4 { public: typedef uint8_t operand_type; typedef uint32_t result_type; @@ -64,12 +65,40 @@ public: // Use the standard fixed size transforms. StdTransformsFixed<operand_type, result_type, 4, 4, 16> transforms = {}; + StdTransformsFixed<operand_type, result_type, 4, 4, 16, true> transforms_quantized = {}; + + template<typename T> + static PerformanceParameters get_performance_parameters(const CPUInfo *ci) { + if (std::is_same<T, uint32_t>::value) { + switch (ci->get_cpu_model()) { + case CPUModel::A55r0: + case CPUModel::A55r1: + return { 2.25, 2.92, 1.84 }; + case CPUModel::A510: + return { 2.64, 2.72, 2.64 }; + default: + return { 7.95, 3.76, 7.27 }; + } + } + + if (std::is_same<T, uint8_t>::value) { + switch(ci->get_cpu_model()) { + case CPUModel::A55r0: + case CPUModel::A55r1: + return { 2.25, 2.18, 0.09 }; + case CPUModel::A510: + return { 2.64, 1.79, 0.10 }; + default: + return { 7.95, 4.09, 0.33 }; + } + } + + return { 0.0 }; + } kern_type kernel = a64_gemm_u8_4x4; - gemm_u8_4x4(const CPUInfo *ci) { - UNUSED(ci); - } + cls_a64_gemm_u8_4x4(const CPUInfo *) { } }; } // namespace arm_gemm |