aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32.hpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2021-01-14 13:43:40 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-01-18 12:39:30 +0000
commit33e03074c36d85de87e9032a2583b04ce8ddcd6b (patch)
tree4442ec5b5022fa5681d689e6ccf3a6423efa8a93 /src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32.hpp
parent8d5337ef18901f1b54d0c062ae7486bc5a4c6610 (diff)
downloadComputeLibrary-33e03074c36d85de87e9032a2583b04ce8ddcd6b.tar.gz
Cycle estimate-based kernel selection for dot product quantized s8/u8 kernels
Resolves: COMPMID-3990 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: If840c79209940535450f4ea1cbf6b0ec646a168e Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4866 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32.hpp')
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32.hpp19
1 files changed, 9 insertions, 10 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32.hpp
index 876b63c811..ca2696bebd 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,14 +25,15 @@
#if defined(__aarch64__) && (defined(FP16_KERNELS) || defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC))
#include "../std_transforms_fixed.hpp"
+#include "../performance_parameters.hpp"
#define ARGLIST \
- unsigned int, const unsigned int *, \
- IndirectInputArg<__fp16>, \
- size_t, size_t, \
- const __fp16 *, \
- IndirectOutputArg<__fp16>, \
- const __fp16 *, Activation, bool
+ unsigned int, const unsigned int *, \
+ IndirectInputArg<__fp16>, \
+ size_t, size_t, \
+ const __fp16 *, \
+ IndirectOutputArg<__fp16>, \
+ const __fp16 *, Activation, bool
namespace arm_gemm
{
@@ -71,12 +72,10 @@ public:
StdTransformsFixed<operand_type, result_type, 6, 32, 1> transforms = {};
- static PerformanceParameters get_performance_parameters(const CPUInfo *ci)
- {
+ static PerformanceParameters get_performance_parameters(const CPUInfo *ci) {
switch (ci->get_cpu_model()) {
case CPUModel::A55r1:
return { 5.22 };
-
default:
return { 14.53 };
}