aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4.hpp')
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4.hpp35
1 files changed, 32 insertions, 3 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4.hpp
index fb21bfc863..1363b939ab 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,6 +26,7 @@
#ifdef __aarch64__
#include "../std_transforms_fixed.hpp"
+#include "../performance_parameters.hpp"
namespace arm_gemm {
@@ -34,7 +35,7 @@ void a64_gemm_s8_4x4(const int8_t *, const int8_t *, int32_t *, int, int, int);
#include "arm_gemm.hpp"
-class gemm_s8_4x4 {
+class cls_a64_gemm_s8_4x4 {
public:
typedef int8_t operand_type;
typedef int32_t result_type;
@@ -56,10 +57,38 @@ public:
// Use the standard fixed size transforms.
StdTransformsFixed<operand_type, result_type, 4, 4, 16> transforms = {};
+ StdTransformsFixed<operand_type, result_type, 4, 4, 16, true> transforms_quantized = {};
+
+ template<typename T>
+ static PerformanceParameters get_performance_parameters(const CPUInfo *ci) {
+ if (std::is_same<T, int32_t>::value) {
+ switch (ci->get_cpu_model()) {
+ case CPUModel::A55r0:
+ case CPUModel::A55r1:
+ return { 3.12, 2.93, 1.84 };
+ case CPUModel::A510:
+ return { 3.32, 2.56, 2.63 };
+ default:
+ return { 7.97, 3.72, 7.31 };
+ }
+ }
+
+ if (std::is_same<T, int8_t>::value) {
+ switch(ci->get_cpu_model()) {
+ case CPUModel::A55r0:
+ case CPUModel::A55r1:
+ return { 3.12, 2.18, 0.09 };
+ case CPUModel::A510:
+ return { 3.33, 2.89, 0.09 };
+ default:
+ return { 7.97, 3.74, 0.34 };
+ }
+ }
+ }
kern_type kernel=a64_gemm_s8_4x4;
- gemm_s8_4x4(const CPUInfo *ci) { UNUSED(ci); }
+ cls_a64_gemm_s8_4x4(const CPUInfo *) { }
};
} // namespace arm_gemm