From a2bb80ea7111509c24caad8629533089decef430 Mon Sep 17 00:00:00 2001 From: Mohammed Suhail Munshi Date: Mon, 19 Jun 2023 14:57:57 +0100 Subject: Use MatMul in fully connected layer with dynamic weights when supported - Use MatMul kernels in FC layer when using dynamic weights without broadcasting or bias. - Fix minor typo in IClMatMulNativeKernelConfig.h Partially Resolves : [COMPMID-6193] Signed-off-by: Mohammed Suhail Munshi Change-Id: Id494062b5b4f4e75ff9714c202dde941955afa52 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9797 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Gunes Bayir Benchmark: Arm Jenkins --- src/core/CL/cl_kernels/common/mat_mul_quantized.cl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/core') diff --git a/src/core/CL/cl_kernels/common/mat_mul_quantized.cl b/src/core/CL/cl_kernels/common/mat_mul_quantized.cl index bd415bb4a7..8cf857dd84 100644 --- a/src/core/CL/cl_kernels/common/mat_mul_quantized.cl +++ b/src/core/CL/cl_kernels/common/mat_mul_quantized.cl @@ -21,9 +21,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include "activation_float_helpers.h" #include "helpers.h" #include "tile_helpers.h" -#include "activation_float_helpers.h" #if defined(MAT_MUL_NATIVE_QUANTIZED_NT_NT) /** This OpenCL kernel performs the batch matrix multiplication (BatchMatMul): LHS non-transposed, RHS non-transposed - buffer only @@ -189,7 +189,7 @@ __kernel void mat_mul_native_quantized_nt_nt( { LOOP_UNROLLING(int, j, 0, 1, N0, { - acc[i].s[j] += ((int)RHS_OFFSET) * a_sum[0].s[i] + ((int)(LHS_OFFSET)) * b_sum[0].s[j]; + acc[i].s[j] -= ((int)RHS_OFFSET) * a_sum[0].s[i] + ((int)(LHS_OFFSET)) * b_sum[0].s[j]; }) }) @@ -368,7 +368,7 @@ __kernel void mat_mul_native_quantized_nt_t( { LOOP_UNROLLING(int, j, 0, 1, N0, { - acc[i].s[j] += ((int)(RHS_OFFSET)) * a_sum[0].s[i] + ((int)(LHS_OFFSET)) * b_sum[0].s[j]; + acc[i].s[j] -= ((int)(RHS_OFFSET)) * a_sum[0].s[i] + ((int)(LHS_OFFSET)) * b_sum[0].s[j]; }) }) @@ -549,7 +549,7 @@ __kernel void mat_mul_native_quantized_t_nt( { LOOP_UNROLLING(int, j, 0, 1, N0, { - acc[i].s[j] += ((int)(RHS_OFFSET)) * a_sum[0].s[i] + ((int)(LHS_OFFSET)) * b_sum[0].s[j]; + acc[i].s[j] -= ((int)(RHS_OFFSET)) * a_sum[0].s[i] + ((int)(LHS_OFFSET)) * b_sum[0].s[j]; }) }) @@ -734,7 +734,7 @@ __kernel void mat_mul_native_quantized_t_t( { LOOP_UNROLLING(int, j, 0, 1, N0, { - acc[i].s[j] += ((int)RHS_OFFSET) * a_sum[0].s[i] + ((int)(LHS_OFFSET)) * b_sum[0].s[j]; + acc[i].s[j] -= ((int)RHS_OFFSET) * a_sum[0].s[i] + ((int)(LHS_OFFSET)) * b_sum[0].s[j]; }) }) -- cgit v1.2.1