diff options
Diffstat (limited to 'src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h')
-rw-r--r-- | src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h | 38 |
1 files changed, 20 insertions, 18 deletions
diff --git a/src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h b/src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h index 1965e3f97b..6fa4352bf8 100644 --- a/src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h +++ b/src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,6 +40,7 @@ namespace kernels class ClCastKernel; class ClGemmLowpMatrixMultiplyNativeKernel; class ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel; +class ClGemmLowpMatrixMultiplyReshapedOnlyRhsMMULKernel; class ClGemmReshapeRhsMatrixKernel; class ClGemmLowpMatrixAReductionKernel; class ClGemmLowpMatrixBReductionKernel; @@ -120,14 +121,15 @@ private: private: // Kernels used - std::unique_ptr<kernels::ClCastKernel> _weights_to_qasymm8; - std::unique_ptr<kernels::ClGemmLowpMatrixMultiplyNativeKernel> _mm_native_kernel; - std::unique_ptr<kernels::ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel> _mm_reshaped_only_rhs_kernel; - std::unique_ptr<kernels::ClGemmReshapeRhsMatrixKernel> _mtx_b_reshape_kernel; - std::unique_ptr<kernels::ClGemmLowpMatrixAReductionKernel> _mtx_a_reduction_kernel; - std::unique_ptr<kernels::ClGemmLowpMatrixBReductionKernel> _mtx_b_reduction_kernel; - std::unique_ptr<kernels::ClGemmLowpOffsetContributionKernel> _offset_contribution_kernel; - std::unique_ptr<kernels::ClGemmLowpOffsetContributionOutputStageKernel> _offset_contribution_output_stage_kernel; + std::unique_ptr<kernels::ClCastKernel> _weights_to_qasymm8; + std::unique_ptr<kernels::ClGemmLowpMatrixMultiplyNativeKernel> _mm_native_kernel; + std::unique_ptr<kernels::ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel> _mm_reshaped_only_rhs_kernel; + std::unique_ptr<kernels::ClGemmLowpMatrixMultiplyReshapedOnlyRhsMMULKernel> _mm_reshaped_only_rhs_mmul_kernel; + std::unique_ptr<kernels::ClGemmReshapeRhsMatrixKernel> _mtx_b_reshape_kernel; + std::unique_ptr<kernels::ClGemmLowpMatrixAReductionKernel> _mtx_a_reduction_kernel; + std::unique_ptr<kernels::ClGemmLowpMatrixBReductionKernel> _mtx_b_reduction_kernel; + std::unique_ptr<kernels::ClGemmLowpOffsetContributionKernel> _offset_contribution_kernel; + std::unique_ptr<kernels::ClGemmLowpOffsetContributionOutputStageKernel> _offset_contribution_output_stage_kernel; // Temporary tensors TensorInfo _qasymm8_weights{}; @@ -138,15 +140,15 @@ private: TensorInfo _gemm_output_stage_multipliers{}; TensorInfo _gemm_output_stage_shifts{}; - int32_t _a_offset{ 0 }; - int32_t _b_offset{ 0 }; - bool _is_gemm_reshaped{ true }; - bool _reshape_b_only_on_first_run{ false }; - bool _run_output_stage{ false }; - bool _convert_to_qasymm8{ false }; - bool _run_offset_contribution{ false }; - bool _is_prepared{ false }; - GEMMInfo _gemm_info{}; + int32_t _a_offset{ 0 }; + int32_t _b_offset{ 0 }; + bool _reshape_b_only_on_first_run{ false }; + bool _run_output_stage{ false }; + bool _convert_to_qasymm8{ false }; + bool _run_offset_contribution{ false }; + bool _is_prepared{ false }; + GEMMInfo _gemm_info{}; + CLGEMMKernelType _gemm_kernel_type{}; experimental::MemoryRequirements _aux_mem{}; }; |