From 061eefd1b9935a9be158657416dc2e0c88e2f532 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Thu, 23 Apr 2020 13:40:00 +0100 Subject: COMPMID-3405: Fixed issue in gemmlowp_mm_native - OpenCL The issue was related to the creation of the transpose macro with N0 = 1 Change-Id: I1006bee583b99302d29346ffe7a54361f32d1ede Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3086 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- src/core/CL/cl_kernels/gemm_helpers.h | 44 +++++++++++++++++++++++++++++++++-- src/core/CL/cl_kernels/gemmlowp.cl | 4 ++++ 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/src/core/CL/cl_kernels/gemm_helpers.h b/src/core/CL/cl_kernels/gemm_helpers.h index 79a9f094df..af43477bd4 100644 --- a/src/core/CL/cl_kernels/gemm_helpers.h +++ b/src/core/CL/cl_kernels/gemm_helpers.h @@ -693,6 +693,34 @@ BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 16))((X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL, (X##3).s##IDX_COL, (X##4).s##IDX_COL, (X##5).s##IDX_COL, (X##6).s##IDX_COL, (X##7).s##IDX_COL, (X##8).s##IDX_COL, (X##9).s##IDX_COL, (X##A).s##IDX_COL, (X##B).s##IDX_COL, (X##C).s##IDX_COL, (X##D).s##IDX_COL, (X##E).s##IDX_COL, (X##F).s##IDX_COL); /** @} */ // end of group COLUMN_VECTORn +/** Create a new vector containing the values at the given index. Utility macros for transposing a colum-vector + * @name COLUMN_VECTOR_SCALARn + * + * @param[in] IDX_COL The index value + * @param[in] BASENAME The basename of the destination vectors + * @param[in] X The basename of the source vectors + * @param[in] TYPE The data type of the destination vectors + * @{ + */ +#define COLUMN_VECTOR_SCALAR1(IDX_COL, BASENAME, X, TYPE) \ + TYPE BASENAME##IDX_COL = (TYPE)((X##0)); +#define COLUMN_VECTOR_SCALAR2(IDX_COL, BASENAME, X, TYPE) \ + VEC_DATA_TYPE(TYPE, 2) \ + BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 2))((X##0), (X##1)); +#define COLUMN_VECTOR_SCALAR3(IDX_COL, BASENAME, X, TYPE) \ + VEC_DATA_TYPE(TYPE, 3) \ + BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 3))((X##0), (X##1), (X##2)); +#define COLUMN_VECTOR_SCALAR4(IDX_COL, BASENAME, X, TYPE) \ + VEC_DATA_TYPE(TYPE, 4) \ + BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 4))((X##0), (X##1), (X##2), (X##3)); +#define COLUMN_VECTOR_SCALAR8(IDX_COL, BASENAME, X, TYPE) \ + VEC_DATA_TYPE(TYPE, 8) \ + BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 8))((X##0), (X##1), (X##2), (X##3), (X##4), (X##5), (X##6), (X##7)); +#define COLUMN_VECTOR_SCALAR16(IDX_COL, BASENAME, X, TYPE) \ + VEC_DATA_TYPE(TYPE, 16) \ + BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 16))((X##0), (X##1), (X##2), (X##3), (X##4), (X##5), (X##6), (X##7), (X##8), (X##9), (X##A), (X##B), (X##C), (X##D), (X##E), (X##F)); +/** @} */ // end of group COLUMN_VECTORn + /** Create transposed vectors of the given vectors * @name TRANSPOSE_K0Xn * @@ -703,9 +731,9 @@ * @{ */ #define TRANSPOSE_K0X1(K0, BASENAME, B, TYPE) \ - COLUMN_VECTOR(K0, 0, BASENAME, B, TYPE); + COLUMN_VECTOR_SCALAR(K0, 0, BASENAME, B, TYPE); #define TRANSPOSE_K0X2(K0, BASENAME, B, TYPE) \ - TRANSPOSE_K0X1(K0, BASENAME, B, TYPE); \ + COLUMN_VECTOR(K0, 0, BASENAME, B, TYPE); \ COLUMN_VECTOR(K0, 1, BASENAME, B, TYPE); #define TRANSPOSE_K0X3(K0, BASENAME, B, TYPE) \ TRANSPOSE_K0X2(K0, BASENAME, B, TYPE); \ @@ -744,6 +772,18 @@ CONCAT(COLUMN_VECTOR, K0) \ (IDX_COL, BASENAME, B, TYPE); +/** Create column vectors to contain the values at the given index. Utility macro for transposing a column-vector + * + * @param[in] K0 The number of source vectors + * @param[in] IDX_COL The index value + * @param[in] BASENAME The basename of the destination vectors + * @param[in] B The basename of the source vectors + * @param[in] TYPE The data type of the destination vectors + */ +#define COLUMN_VECTOR_SCALAR(K0, IDX_COL, BASENAME, B, TYPE) \ + CONCAT(COLUMN_VECTOR_SCALAR, K0) \ + (IDX_COL, BASENAME, B, TYPE); + /** Create transposed vectors form the given source vectors * * @param[in] K0 The size of source vectors diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl index d9625e7117..080a6409eb 100644 --- a/src/core/CL/cl_kernels/gemmlowp.cl +++ b/src/core/CL/cl_kernels/gemmlowp.cl @@ -100,6 +100,10 @@ #endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8) /** Specialized macros to perform a broadcast dot product operation between one vector "a" and N0 vectors "b" of size K0 [1,16] */ +#define ARM_DOT_K0X1(k0, a, b, c) \ + ({ \ + ARM_DOT_K0(k0, (a), (b##0), (c)); \ + }) #define ARM_DOT_K0X2(k0, a, b, c) \ ({ \ ARM_DOT_K0(k0, (a), (b##0), (c.s0)); \ -- cgit v1.2.1