diff options
author | Pablo Tello <pablo.tello@arm.com> | 2017-11-23 11:01:10 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:41:06 +0000 |
commit | 27066c2bed8fb88843308a70f375fd49835edd55 (patch) | |
tree | 4ef72c1bd6e11446ad3e185e9e8c8562a3322ccd /src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp | |
parent | 53d8c5c0185b2ee177857d4a008e4e3de218472c (diff) | |
download | ComputeLibrary-27066c2bed8fb88843308a70f375fd49835edd55.tar.gz |
COMPMID-632: Integrated Assembly kernel GEMM S8 for Arm Cortex-A53.
Change-Id: I07faa0c984759a1b5db1e5de71f4ab3eef5888d8
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110334
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp')
-rw-r--r-- | src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp | 17 |
1 files changed, 15 insertions, 2 deletions
diff --git a/src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp index 1bf437eb5f..0423777217 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp @@ -29,6 +29,7 @@ #include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64A53Kernel.h" #include "arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.h" #include "arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.h" #include "arm_compute/core/TensorInfo.h" @@ -41,10 +42,10 @@ namespace arm_compute { #include "arm_compute/core/NEON/kernels/assembly/gemm_interleaved.hpp" +#include "arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_s16_12x8.hpp" #include "arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_s8_12x8.hpp" #include "arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_s8_4x4.hpp" #include "arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_u8_4x4.hpp" - } // namespace arm_compute using namespace arm_compute; @@ -91,7 +92,19 @@ void NEGEMMLowpAssemblyMatrixMultiplyCore::configure(const ITensor *a, const ITe } else #elif defined(ARM_COMPUTE_AARCH64_V8A) - if(1) + if(ci.CPU == CPUTarget::A53) + { + // Configure matrix multiply kernel + GemmInterleaved<gemm_s16_12x8, int8_t, int32_t> gemm(&ci, M, N, K, false, false); + _workspace.allocator()->init(TensorInfo(TensorShape{ (gemm.get_working_size() + workspace_alignment - 1) * NEScheduler::get().num_threads() }, 1, DataType::U8)); + _memory_group.manage(&_workspace); + // Configure matrix multiplication kernel + auto k = arm_compute::support::cpp14::make_unique<NEGEMMLowpAArch64A53Kernel>(); + k->configure(a, b, output, &_workspace, 1.f, 1.f); + _mm_kernel = std::move(k); + _workspace.allocator()->allocate(); + } + else if(1) // Generic v8a kernel { switch(a->info()->data_type()) { |