From cc441cb2321adefb0fb244b592d32af43766695d Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Wed, 22 Nov 2017 13:37:19 +0000 Subject: COMPMID-696: Integrated assembly SGEMM for Arm Cortex-A55 and Arm Cortex-A55r1. Change-Id: I9c8fa13d53c7310cacf3446faa42026fdf3396fa Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110209 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com Reviewed-by: Michalis Spyrou --- .../kernels/assembly/kernels/a64_sgemm_12x8.hpp | 9 + .../assembly/kernels/a64_sgemm_12x8/a55.hpp | 368 +++++++++++++++++++++ .../assembly/kernels/a64_sgemm_12x8/a55r1.hpp | 360 ++++++++++++++++++++ 3 files changed, 737 insertions(+) create mode 100644 arm_compute/core/NEON/kernels/assembly/kernels/a64_sgemm_12x8/a55.hpp create mode 100644 arm_compute/core/NEON/kernels/assembly/kernels/a64_sgemm_12x8/a55r1.hpp (limited to 'arm_compute/core/NEON/kernels/assembly') diff --git a/arm_compute/core/NEON/kernels/assembly/kernels/a64_sgemm_12x8.hpp b/arm_compute/core/NEON/kernels/assembly/kernels/a64_sgemm_12x8.hpp index e229e215ef..603ad8dc0a 100644 --- a/arm_compute/core/NEON/kernels/assembly/kernels/a64_sgemm_12x8.hpp +++ b/arm_compute/core/NEON/kernels/assembly/kernels/a64_sgemm_12x8.hpp @@ -28,6 +28,9 @@ // Actual kernel implementations #include "a64_sgemm_12x8/generic.hpp" #include "a64_sgemm_12x8/a53.hpp" +#include "a64_sgemm_12x8/a55.hpp" +#include "a64_sgemm_12x8/a55r1.hpp" + // 12x8 SGEMM "strategy" class. // @@ -66,6 +69,12 @@ public: if (ci->CPU == CPUTarget::A53) { kernel = a64_sgemm_asimd_12x8_a53; } + else if (ci->CPU == CPUTarget::A55) { + kernel = a64_sgemm_asimd_12x8_a55; + } + else if (ci->CPU == CPUTarget::A55_DOT) { + kernel = a64_sgemm_asimd_12x8_a55r1; + } } }; diff --git a/arm_compute/core/NEON/kernels/assembly/kernels/a64_sgemm_12x8/a55.hpp b/arm_compute/core/NEON/kernels/assembly/kernels/a64_sgemm_12x8/a55.hpp new file mode 100644 index 0000000000..85d8a502f8 --- /dev/null +++ b/arm_compute/core/NEON/kernels/assembly/kernels/a64_sgemm_12x8/a55.hpp @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#pragma once + +inline void a64_sgemm_asimd_12x8_a55(const float *Apanel, const float *Bpanel, float *Cpanel, int ablocks, int bblocks, int K) { + const float *a_ptr = Apanel; + float *c_ptr = Cpanel; + + for (int yb=0; yb