aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp
diff options
context:
space:
mode:
authorPablo Tello <pablo.tello@arm.com>2017-11-23 11:01:10 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:41:06 +0000
commit27066c2bed8fb88843308a70f375fd49835edd55 (patch)
tree4ef72c1bd6e11446ad3e185e9e8c8562a3322ccd /src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp
parent53d8c5c0185b2ee177857d4a008e4e3de218472c (diff)
downloadComputeLibrary-27066c2bed8fb88843308a70f375fd49835edd55.tar.gz
COMPMID-632: Integrated Assembly kernel GEMM S8 for Arm Cortex-A53.
Change-Id: I07faa0c984759a1b5db1e5de71f4ab3eef5888d8 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110334 Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp')
-rw-r--r--src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp17
1 files changed, 15 insertions, 2 deletions
diff --git a/src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp
index 1bf437eb5f..0423777217 100644
--- a/src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp
+++ b/src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp
@@ -29,6 +29,7 @@
#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+#include "arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64A53Kernel.h"
#include "arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.h"
#include "arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.h"
#include "arm_compute/core/TensorInfo.h"
@@ -41,10 +42,10 @@
namespace arm_compute
{
#include "arm_compute/core/NEON/kernels/assembly/gemm_interleaved.hpp"
+#include "arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_s16_12x8.hpp"
#include "arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_s8_12x8.hpp"
#include "arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_s8_4x4.hpp"
#include "arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_u8_4x4.hpp"
-
} // namespace arm_compute
using namespace arm_compute;
@@ -91,7 +92,19 @@ void NEGEMMLowpAssemblyMatrixMultiplyCore::configure(const ITensor *a, const ITe
}
else
#elif defined(ARM_COMPUTE_AARCH64_V8A)
- if(1)
+ if(ci.CPU == CPUTarget::A53)
+ {
+ // Configure matrix multiply kernel
+ GemmInterleaved<gemm_s16_12x8, int8_t, int32_t> gemm(&ci, M, N, K, false, false);
+ _workspace.allocator()->init(TensorInfo(TensorShape{ (gemm.get_working_size() + workspace_alignment - 1) * NEScheduler::get().num_threads() }, 1, DataType::U8));
+ _memory_group.manage(&_workspace);
+ // Configure matrix multiplication kernel
+ auto k = arm_compute::support::cpp14::make_unique<NEGEMMLowpAArch64A53Kernel>();
+ k->configure(a, b, output, &_workspace, 1.f, 1.f);
+ _mm_kernel = std::move(k);
+ _workspace.allocator()->allocate();
+ }
+ else if(1) // Generic v8a kernel
{
switch(a->info()->data_type())
{