diff options
author | David Mansell <David.Mansell@arm.com> | 2018-07-06 14:52:52 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:54:10 +0000 |
commit | d93991e290618a685b67506c78090350e6aee43f (patch) | |
tree | 1d5c3b3017cfccd3f0ec3f24e8e11334cf977ce3 /src/core/NEON/kernels/arm_gemm/kernels | |
parent | dec32a9edd4b3c6dc55c60d7436e79af6be58c3d (diff) | |
download | ComputeLibrary-d93991e290618a685b67506c78090350e6aee43f.tar.gz |
COMPMID-1380: Pre-work for SVE support.
This patch makes the needed infrastructure changes to allow SVE
kernels to be added later on.
Change-Id: Ide5bccac2f47278e93fff3d648231aee2d5f8c2e
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/139070
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/kernels')
9 files changed, 138 insertions, 89 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6.hpp index 01bf1f9297..06e62456dc 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6.hpp @@ -25,6 +25,8 @@ #ifdef __arm__ +#include "../std_transforms_fixed.hpp" + namespace arm_gemm { // Actual kernel implementations @@ -47,20 +49,21 @@ public: typedef void (*kern_type)(const float *, const float *, float *, int, int, int); - /* Describes the data layout for A input */ - static const int A_interleave = 6; - static const int A_block = 1; - static const int A_transpose = 0; + /* Kernel blocking parameters */ + static int out_width() { + return 8; + } - /* Same for B input */ - static const int B_interleave = 8; - static const int B_block = 1; - static const int B_transpose = 1; + static int out_height() { + return 6; + } - /* Kernel blocking parameters */ - static const int out_width = 8; - static const int out_height = 6; - static const int k_unroll = 1; + static int k_unroll() { + return 1; + } + + // Use the standard fixed size transforms. + StdTransformsFixed<operand_type, result_type, 6, 8> transforms = {}; kern_type kernel = a32_sgemm_8x6; diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_12x8.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_12x8.hpp index 27700b47d1..95a2bc2fbc 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_12x8.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_12x8.hpp @@ -25,6 +25,8 @@ #ifdef __aarch64__ +#include "../std_transforms_fixed.hpp" + namespace arm_gemm { // Actual kernel implementations @@ -45,20 +47,21 @@ public: typedef void (*kern_type)(const int16_t *, const int16_t *, int32_t *, int, int, int); - /* Describes the data layout for A input */ - static const int A_interleave = 8; - static const int A_block = 1; - static const int A_transpose = 0; + /* Kernel blocking parameters */ + static int out_width() { + return 12; + } + + static int out_height() { + return 8; + } - /* Same for B input */ - static const int B_interleave = 12; - static const int B_block = 1; - static const int B_transpose = 1; + static int k_unroll() { + return 1; + } - /* Kernel blocking parameters */ - static const int out_width = 12; - static const int out_height = 8; - static const int k_unroll = 1; + // Use the standard fixed size transforms. + StdTransformsFixed<operand_type, result_type, 8, 12> transforms = {}; kern_type kernel = a64_gemm_s16_asimd_12x8; diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_12x8.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_12x8.hpp index cb97270c24..fdc0200435 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_12x8.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_12x8.hpp @@ -27,6 +27,8 @@ #include "arm_gemm.hpp" +#include "../std_transforms_fixed.hpp" + namespace arm_gemm { // Load the actual kernel @@ -40,20 +42,21 @@ public: typedef void (*kern_type)(const int8_t *, const int8_t *, int32_t *, int, int, int); - /* Describes the data layout for A input */ - static const int A_interleave = 8; - static const int A_block = 4; - static const bool A_transpose = false; + /* Kernel blocking parameters */ + static int out_width() { + return 12; + } - /* Same for B input */ - static const int B_interleave = 12; - static const int B_block = 4; - static const bool B_transpose = true; + static int out_height() { + return 8; + } - /* Kernel blocking parameters */ - static const int out_width = 12; - static const int out_height = 8; - static const int k_unroll = 4; + static int k_unroll() { + return 4; + } + + // Use the standard fixed size transforms. + StdTransformsFixed<operand_type, result_type, 8, 12, 4> transforms = {}; kern_type kernel = a64_gemm_s8_12x8; diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4.hpp index b5b07b2c56..be7ead9f48 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4.hpp @@ -25,6 +25,8 @@ #ifdef __aarch64__ +#include "../std_transforms_fixed.hpp" + namespace arm_gemm { // Load the actual kernel @@ -39,20 +41,21 @@ public: typedef void (*kern_type)(const int8_t *, const int8_t *, int32_t *, int, int, int); - /* Describes the data layout for A input */ - static const int A_interleave = 4; - static const int A_block = 16; - static const bool A_transpose = false; + /* Kernel blocking parameters */ + static int out_width() { + return 4; + } + + static int out_height() { + return 4; + } - /* Same for B input */ - static const int B_interleave = 4; - static const int B_block = 16; - static const bool B_transpose = true; + static int k_unroll() { + return 16; + } - /* Kernel blocking parameters */ - static const int out_width = 4; - static const int out_height = 4; - static const int k_unroll = 16; + // Use the standard fixed size transforms. + StdTransformsFixed<operand_type, result_type, 4, 4, 16> transforms = {}; kern_type kernel=a64_gemm_s8_4x4; diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_12x8.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_12x8.hpp index 13dd570677..d2692ba77f 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_12x8.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_12x8.hpp @@ -25,6 +25,8 @@ #ifdef __aarch64__ +#include "../std_transforms_fixed.hpp" + namespace arm_gemm { // Actual kernel implementations @@ -45,20 +47,21 @@ public: typedef void (*kern_type)(const uint16_t *, const uint16_t *, uint32_t *, int, int, int); - /* Describes the data layout for A input */ - static const int A_interleave = 8; - static const int A_block = 1; - static const int A_transpose = 0; + /* Kernel blocking parameters */ + static int out_width() { + return 12; + } + + static int out_height() { + return 8; + } - /* Same for B input */ - static const int B_interleave = 12; - static const int B_block = 1; - static const int B_transpose = 1; + static int k_unroll() { + return 1; + } - /* Kernel blocking parameters */ - static const int out_width = 12; - static const int out_height = 8; - static const int k_unroll = 1; + // Use the standard fixed size transforms. + StdTransformsFixed<operand_type, result_type, 8, 12> transforms = {}; kern_type kernel = a64_gemm_u16_asimd_12x8; diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_12x8.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_12x8.hpp index c67aed7275..a252abfd3e 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_12x8.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_12x8.hpp @@ -27,6 +27,8 @@ #include "arm_gemm.hpp" +#include "../std_transforms_fixed.hpp" + namespace arm_gemm { // Load the actual kernel @@ -51,9 +53,20 @@ public: static const bool B_transpose = true; /* Kernel blocking parameters */ - static const int out_width = 12; - static const int out_height = 8; - static const int k_unroll = 4; + static int out_width() { + return 12; + } + + static int out_height() { + return 8; + } + + static int k_unroll() { + return 4; + } + + // Use the standard fixed sized transforms. + StdTransformsFixed<operand_type, result_type, 8, 12, 4> transforms = {}; kern_type kernel = a64_gemm_u8_12x8; diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp index 23f4c1d84f..2da3ecd4f8 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp @@ -25,6 +25,8 @@ #ifdef __aarch64__ +#include "../std_transforms_fixed.hpp" + namespace arm_gemm { // Kernel definition @@ -48,14 +50,24 @@ public: static const bool B_transpose = true; /* Kernel blocking parameters */ - static const int out_width = 4; - static const int out_height = 4; - static const int k_unroll = 16; + static int out_width() { + return 4; + } + + static int out_height() { + return 4; + } + + static int k_unroll() { + return 16; + } + + // Use the standard fixed size transforms. + StdTransformsFixed<operand_type, result_type, 4, 4, 16> transforms = {}; - kern_type kernel = nullptr; + kern_type kernel = a64_gemm_u8_4x4; gemm_u8_4x4(const CPUInfo *ci) { - kernel = a64_gemm_u8_4x4; } }; diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_24x8.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_24x8.hpp index fe74b994f5..911a4ebb01 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_24x8.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_24x8.hpp @@ -27,6 +27,8 @@ #include "arm_gemm.hpp" +#include "../std_transforms_fixed.hpp" + namespace arm_gemm { // Actual kernel implementations @@ -44,17 +46,21 @@ public: typedef void (*kern_type)(const __fp16 *, const __fp16 *, __fp16 *, int, int, int); - static const int A_block = 1; - static const int A_interleave = 8; - static const bool A_transpose = false; + /* Kernel blocking parameters */ + static int out_width() { + return 24; + } - static const int B_block = 1; - static const int B_interleave = 24; - static const bool B_transpose = true; + static int out_height() { + return 8; + } + + static int k_unroll() { + return 1; + } - static const int out_width = 24; - static const int out_height = 8; - static const int k_unroll = 1; + // Use the standard fixed size transforms. + StdTransformsFixed<operand_type, result_type, 8, 24> transforms = {}; // Default to the generic kernel kern_type kernel = a64_hgemm_asimd_24x8; diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_12x8.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_12x8.hpp index c91d50469f..10d1069417 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_12x8.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_12x8.hpp @@ -25,6 +25,8 @@ #ifdef __aarch64__ +#include "../std_transforms_fixed.hpp" + namespace arm_gemm { // Actual kernel implementations @@ -48,20 +50,21 @@ public: typedef void (*kern_type)(const float *, const float *, float *, int, int, int); - /* Describes the data layout for A input */ - static const int A_interleave = 8; - static const int A_block = 1; - static const int A_transpose = 0; + /* Kernel blocking parameters */ + static int out_width() { + return 12; + } - /* Same for B input */ - static const int B_interleave = 12; - static const int B_block = 1; - static const int B_transpose = 1; + static int out_height() { + return 8; + } - /* Kernel blocking parameters */ - static const int out_width = 12; - static const int out_height = 8; - static const int k_unroll = 1; + static int k_unroll() { + return 1; + } + + // Use the standard fixed size transforms. + StdTransformsFixed<operand_type, result_type, 8, 12> transforms = {}; kern_type kernel=a64_sgemm_asimd_12x8; |