aboutsummaryrefslogtreecommitdiff
path: root/src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h
diff options
context:
space:
mode:
authorFreddie Liardet <frederick.liardet@arm.com>2022-05-16 14:09:10 +0100
committerGunes Bayir <gunes.bayir@arm.com>2022-07-22 10:18:41 +0000
commite572dff7adc334a98ac4a0326d66037451d5d079 (patch)
tree9c4db3d743078de9bda67dfed674e3f371a4e238 /src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h
parente87120731ca65c54b082734af07f748ac9651427 (diff)
downloadComputeLibrary-e572dff7adc334a98ac4a0326d66037451d5d079.tar.gz
Add GemmLowp MMUL Reshaped Only Rhs Support for QASYMM8/QASYMM8_SIGNED
This patch introduces a GEMMLowp routine that is optimized for Arm(R) Mali(TM)-G715 and Arm(R) Mali(TM)-G615 Resolves: COMPMID-5398 Signed-off-by: Freddie Liardet <frederick.liardet@arm.com> Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Change-Id: I8d06453645688f3658b6c7c06f1ebc25a2505661 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7932 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com> Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h')
-rw-r--r--src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h38
1 files changed, 20 insertions, 18 deletions
diff --git a/src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h b/src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h
index 1965e3f97b..6fa4352bf8 100644
--- a/src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h
+++ b/src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,6 +40,7 @@ namespace kernels
class ClCastKernel;
class ClGemmLowpMatrixMultiplyNativeKernel;
class ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel;
+class ClGemmLowpMatrixMultiplyReshapedOnlyRhsMMULKernel;
class ClGemmReshapeRhsMatrixKernel;
class ClGemmLowpMatrixAReductionKernel;
class ClGemmLowpMatrixBReductionKernel;
@@ -120,14 +121,15 @@ private:
private:
// Kernels used
- std::unique_ptr<kernels::ClCastKernel> _weights_to_qasymm8;
- std::unique_ptr<kernels::ClGemmLowpMatrixMultiplyNativeKernel> _mm_native_kernel;
- std::unique_ptr<kernels::ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel> _mm_reshaped_only_rhs_kernel;
- std::unique_ptr<kernels::ClGemmReshapeRhsMatrixKernel> _mtx_b_reshape_kernel;
- std::unique_ptr<kernels::ClGemmLowpMatrixAReductionKernel> _mtx_a_reduction_kernel;
- std::unique_ptr<kernels::ClGemmLowpMatrixBReductionKernel> _mtx_b_reduction_kernel;
- std::unique_ptr<kernels::ClGemmLowpOffsetContributionKernel> _offset_contribution_kernel;
- std::unique_ptr<kernels::ClGemmLowpOffsetContributionOutputStageKernel> _offset_contribution_output_stage_kernel;
+ std::unique_ptr<kernels::ClCastKernel> _weights_to_qasymm8;
+ std::unique_ptr<kernels::ClGemmLowpMatrixMultiplyNativeKernel> _mm_native_kernel;
+ std::unique_ptr<kernels::ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel> _mm_reshaped_only_rhs_kernel;
+ std::unique_ptr<kernels::ClGemmLowpMatrixMultiplyReshapedOnlyRhsMMULKernel> _mm_reshaped_only_rhs_mmul_kernel;
+ std::unique_ptr<kernels::ClGemmReshapeRhsMatrixKernel> _mtx_b_reshape_kernel;
+ std::unique_ptr<kernels::ClGemmLowpMatrixAReductionKernel> _mtx_a_reduction_kernel;
+ std::unique_ptr<kernels::ClGemmLowpMatrixBReductionKernel> _mtx_b_reduction_kernel;
+ std::unique_ptr<kernels::ClGemmLowpOffsetContributionKernel> _offset_contribution_kernel;
+ std::unique_ptr<kernels::ClGemmLowpOffsetContributionOutputStageKernel> _offset_contribution_output_stage_kernel;
// Temporary tensors
TensorInfo _qasymm8_weights{};
@@ -138,15 +140,15 @@ private:
TensorInfo _gemm_output_stage_multipliers{};
TensorInfo _gemm_output_stage_shifts{};
- int32_t _a_offset{ 0 };
- int32_t _b_offset{ 0 };
- bool _is_gemm_reshaped{ true };
- bool _reshape_b_only_on_first_run{ false };
- bool _run_output_stage{ false };
- bool _convert_to_qasymm8{ false };
- bool _run_offset_contribution{ false };
- bool _is_prepared{ false };
- GEMMInfo _gemm_info{};
+ int32_t _a_offset{ 0 };
+ int32_t _b_offset{ 0 };
+ bool _reshape_b_only_on_first_run{ false };
+ bool _run_output_stage{ false };
+ bool _convert_to_qasymm8{ false };
+ bool _run_offset_contribution{ false };
+ bool _is_prepared{ false };
+ GEMMInfo _gemm_info{};
+ CLGEMMKernelType _gemm_kernel_type{};
experimental::MemoryRequirements _aux_mem{};
};