From c0b6f76561580414f08633a804fc548ccad65659 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 2 Nov 2020 01:37:17 +0000 Subject: COMPMID-3776: Indirect GEMM Signed-off-by: Georgios Pinitas Change-Id: I51a1b0f098bc3a8c408c50c92221e4df3061e12c Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4343 Tested-by: Arm Jenkins Reviewed-by: Sang-Hoon Park Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- src/core/NEON/kernels/arm_gemm/utils.hpp | 93 +++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) (limited to 'src/core/NEON/kernels/arm_gemm/utils.hpp') diff --git a/src/core/NEON/kernels/arm_gemm/utils.hpp b/src/core/NEON/kernels/arm_gemm/utils.hpp index 6e47a97c78..6d483a3b9d 100644 --- a/src/core/NEON/kernels/arm_gemm/utils.hpp +++ b/src/core/NEON/kernels/arm_gemm/utils.hpp @@ -24,6 +24,8 @@ #pragma once +#include "arm_gemm.hpp" + #include // Macro for unreachable code (e.g. impossible default cases on switch) @@ -32,6 +34,8 @@ // Paranoid option for the above with assert // #define UNREACHABLE(why) assert(0 && why) +namespace arm_gemm { + template inline T iceildiv(const T a, const T b) { return (a + b - 1) / b; @@ -48,7 +52,94 @@ inline T roundup(const T a, const T b) { } } -namespace arm_gemm { +enum class VLType { + None, + SVE, +}; + +template +struct IndirectOutputArg { + struct { + T *base; + size_t stride; + } direct = {}; + struct { + T * const *ptr; + size_t offset; + } indirect = {}; + bool is_indirect; + + // Direct + IndirectOutputArg(T *base, size_t stride) : is_indirect(false) { + direct.base = base; + direct.stride = stride; + } + + // Indirect + IndirectOutputArg(T * const * ptr, size_t offset) : is_indirect(true) { + indirect.ptr = ptr; + indirect.offset = offset; + } + + IndirectOutputArg() : is_indirect(false) { + direct.base = nullptr; + direct.stride = 0; + } +}; + +// Check that the provided Requantize32 doesn't have a left shift. +inline bool quant_no_left_shift(const Requantize32 &qp) { + if (qp.per_channel_requant) { + return (qp.per_channel_left_shifts == nullptr); + } else { + return (qp.per_layer_left_shift == 0); + } +} + +// Check that the provided Requantize32 is compatible with the "symmetric" hybrid kernels. These don't include row +// sums, so the 'b_offset' has to be zero. +inline bool quant_hybrid_symmetric(const Requantize32 &qp) { + return quant_no_left_shift(qp) && qp.b_offset == 0; +} + +// Check that the provided Requantize32 is compatible with the "asymmetric" hybrid kernels. These don't support per +// channel quantization. Technically b_offset==0 cases would work, but it is a waste to sum and then multiply by 0... +inline bool quant_hybrid_asymmetric(const Requantize32 &qp) { + return quant_no_left_shift(qp) /* && qp.b_offset != 0 */ && qp.per_channel_requant==false; +} + +template +struct IndirectInputArg { + struct { + const T *base; + size_t stride; + } direct = {}; + struct { + const T * const * const * ptr; + unsigned int start_row; + unsigned int start_col; + } indirect = {}; + bool is_indirect; + + // Direct + IndirectInputArg(const T *base, size_t stride) : is_indirect(false) { + direct.base = base; + direct.stride = stride; + } + + // Indirect + IndirectInputArg(const T * const * const *ptr, unsigned int start_row, unsigned int start_col) : is_indirect(true) { + indirect.ptr = ptr; + indirect.start_row = start_row; + indirect.start_col = start_col; + } + + IndirectInputArg() : is_indirect(false) { + direct.base = nullptr; + direct.stride = 0; + } +}; + namespace utils { namespace { -- cgit v1.2.1