From c67bb3d23ca7aa0e36f8c7c3c4eacbc0e2dbb36a Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Thu, 1 Feb 2018 12:11:55 +0000 Subject: COMPMID-878: Integrate AArch32 SGEMM Arm Cortex-A55r1 kernel from RSH Change-Id: If766dd0e6bbfe6209da6e630b8aba42d2e40ea87 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/118556 Tested-by: Jenkins Reviewed-by: Pablo Tello --- .../kernels/assembly/kernels/a32_sgemm_8x6.hpp | 16 +- .../assembly/kernels/a32_sgemm_8x6/a55r1.hpp | 413 +++++++++++++++++++++ 2 files changed, 426 insertions(+), 3 deletions(-) create mode 100644 arm_compute/core/NEON/kernels/assembly/kernels/a32_sgemm_8x6/a55r1.hpp diff --git a/arm_compute/core/NEON/kernels/assembly/kernels/a32_sgemm_8x6.hpp b/arm_compute/core/NEON/kernels/assembly/kernels/a32_sgemm_8x6.hpp index 65830641b6..d78d33c647 100644 --- a/arm_compute/core/NEON/kernels/assembly/kernels/a32_sgemm_8x6.hpp +++ b/arm_compute/core/NEON/kernels/assembly/kernels/a32_sgemm_8x6.hpp @@ -27,6 +27,7 @@ // Actual kernel implementations #include "a32_sgemm_8x6/a53.hpp" +#include "a32_sgemm_8x6/a55r1.hpp" #include "a32_sgemm_8x6/generic.hpp" // 8x6 SGEMM "strategy" class. @@ -62,9 +63,18 @@ public: kern_type kernel = nullptr; sgemm_8x6(const CPUInfo *ci) { - kernel = a32_sgemm_8x6; - if(ci->CPU == CPUTarget::A53) { - kernel = a32_sgemm_8x6_a53; + switch(ci->CPU) { + case CPUTarget::A53: + kernel = a32_sgemm_8x6_a53; + break; + + case CPUTarget::A55_DOT: + kernel = a32_sgemm_8x6_a55r1; + break; + + default: + kernel = a32_sgemm_8x6; + break; } } }; diff --git a/arm_compute/core/NEON/kernels/assembly/kernels/a32_sgemm_8x6/a55r1.hpp b/arm_compute/core/NEON/kernels/assembly/kernels/a32_sgemm_8x6/a55r1.hpp new file mode 100644 index 0000000000..4f0ef7cd21 --- /dev/null +++ b/arm_compute/core/NEON/kernels/assembly/kernels/a32_sgemm_8x6/a55r1.hpp @@ -0,0 +1,413 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#pragma once + +#ifdef __arm__ + +#include + +#include "../../asmlib.hpp" + +// Kernel implementation. +// +// Assume that "Apanel" points to a chunk of A blocks (each size 6xK) in read-order. +// Assume that "Bpanel" points to a chunk of B blocks (each size 8xK) in read-order. +// Assume that "Cpanel" points to a chunk of C output blocks (each size +// 8x6), the chunks being arranged in a row major fashion. +// +// Note that the intent of this is that either ablocks or bblocks will be 1 +// - this construction allows the output loop to proceed in either order. + +inline void a32_sgemm_8x6_a55r1(const float *Apanel, const float *Bpanel, float *Cpanel, int ablocks, int bblocks, int K) { + const float *a_ptr = Apanel; + float *c_ptr = Cpanel; + + /* Work out starting values for "k" and "tails" in the inner loop. */ + int tails_initial = (K & 3); + if (tails_initial == 0) { + tails_initial = 4; + } + + int k_initial = ((K+3)/4) - 1; + + for (int yb=0; yb