From 1c9efebf4344e8db97e6d9282b2bf48b52090b58 Mon Sep 17 00:00:00 2001 From: giuros01 Date: Fri, 11 Jan 2019 14:04:43 +0000 Subject: Issue COMPMID-1835: Remove CLGEMMInterleave4x4Kernel and replace with CLGEMMReshapeLHSMatrixKernel Change-Id: Id6a1bd78f9b1698b64a004e4adebc41002b15745 Reviewed-on: https://review.mlplatform.org/496 Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice --- src/runtime/CL/functions/CLGEMM.cpp | 39 +++++++++++++--------- .../CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp | 25 +++++++++++--- 2 files changed, 43 insertions(+), 21 deletions(-) (limited to 'src/runtime/CL') diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp index 9048b85114..a3612f3b5d 100644 --- a/src/runtime/CL/functions/CLGEMM.cpp +++ b/src/runtime/CL/functions/CLGEMM.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -35,7 +35,8 @@ #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/ITensorAllocator.h" -using namespace arm_compute; +namespace arm_compute +{ using namespace arm_compute::misc::shape_calculator; namespace @@ -117,7 +118,6 @@ inline void select_gemm_configuration(unsigned int m, unsigned int n, GEMMLHSMat CLGEMM::CLGEMM(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), - _interleave_kernel(), _mm_kernel(), _ma_kernel(), _reshape_lhs_kernel(), @@ -153,7 +153,7 @@ void CLGEMM::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor * const GPUTarget gpu_target = CLScheduler::get().target(); // Set the target for the kernels - _interleave_kernel.set_target(gpu_target); + _reshape_lhs_kernel.set_target(gpu_target); _mm_kernel.set_target(gpu_target); // Arguments used by GEMMReshapeInfo @@ -180,6 +180,13 @@ void CLGEMM::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor * rhs_info.interleave = false; rhs_info.transpose = false; + GEMMLHSMatrixInfo lhs_info; + lhs_info.m0 = 4; + lhs_info.k0 = 4; + lhs_info.v0 = mult_interleave4x4_height; + lhs_info.interleave = true; + lhs_info.transpose = true; + // Check if we need to reshape the matrix A and matrix B _is_interleaved_transposed = is_interleaved_transposed(m, n, k, a->info()->data_type(), _reshape_b_only_on_first_run, gpu_target); @@ -219,8 +226,7 @@ void CLGEMM::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor * else { // Configure interleave kernel - _interleave_kernel.configure(a, &_tmp_a, mult_interleave4x4_height, gemm_info.reinterpret_input_as_3d()); - + _reshape_lhs_kernel.configure(a, &_tmp_a, lhs_info, gemm_info.reinterpret_input_as_3d()); // Configure transpose kernel _reshape_rhs_kernel.configure(b, &_tmp_b, rhs_info); } @@ -296,6 +302,13 @@ Status CLGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso rhs_info.interleave = false; rhs_info.transpose = false; + GEMMLHSMatrixInfo lhs_info; + lhs_info.m0 = 4; + lhs_info.k0 = 4; + lhs_info.v0 = mult_interleave4x4_height; + lhs_info.interleave = true; + lhs_info.transpose = true; + // Check if we need to reshape the matrix A and matrix B const bool run_interleave_transpose = is_interleaved_transposed(m, n, k, a->data_type(), reshape_b_only_on_first_run, gpu_target); @@ -335,8 +348,8 @@ Status CLGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso else { // Validate interleave kernel - auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_interleaved_shape(*a, mult_interleave4x4_height, gemm_info.reinterpret_input_as_3d()))); - ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMInterleave4x4Kernel::validate(a, &tmp_a_info, mult_interleave4x4_height, gemm_info.reinterpret_input_as_3d())); + auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, gemm_info.reinterpret_input_as_3d()))); + ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMReshapeLHSMatrixKernel::validate(a, &tmp_a_info, lhs_info, gemm_info.reinterpret_input_as_3d())); // Validate transpose kernel auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info))); ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMReshapeRHSMatrixKernel::validate(b, &tmp_b_info, rhs_info)); @@ -367,14 +380,7 @@ void CLGEMM::run() if(_is_interleaved_transposed) { // Run interleave kernel - if(_is_G76_path) - { - CLScheduler::get().enqueue(_reshape_lhs_kernel, false); - } - else - { - CLScheduler::get().enqueue(_interleave_kernel, false); - } + CLScheduler::get().enqueue(_reshape_lhs_kernel, false); if(!_reshape_b_only_on_first_run) { @@ -417,3 +423,4 @@ void CLGEMM::prepare() _is_prepared = true; } } +} // namespace arm_compute diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp index cf20bc6a7a..edb3107173 100644 --- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -32,7 +32,8 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" -using namespace arm_compute; +namespace arm_compute +{ using namespace arm_compute::misc::shape_calculator; namespace @@ -109,6 +110,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor const ICLTensor *matrix_a = a; const ICLTensor *matrix_b = b; GEMMRHSMatrixInfo rhs_info; + GEMMLHSMatrixInfo lhs_info; // Arguments used by GEMMReshapeInfo // If we pass the matrix A and matrix B reshaped to CLGEMMMatrixMultiplyKernel, we need to pass m, n, k, mult_transpose1xW_width and mult_interleave4x4_height to CLGEMMReshapeInfo @@ -126,6 +128,11 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor rhs_info.h0 = mult_transpose1xW_width; rhs_info.interleave = false; rhs_info.transpose = false; + lhs_info.m0 = 4; + lhs_info.k0 = 4; + lhs_info.v0 = mult_interleave4x4_height; + lhs_info.interleave = true; + lhs_info.transpose = unroll_block; // Check if we need to reshape the matrix A and matrix B _is_interleaved_transposed = is_interleaved_transposed(m, n, k, _reshape_b_only_on_first_run, gpu_target); @@ -145,7 +152,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor } // Configure interleave kernel - _mtx_a_reshape_kernel.configure(a, &_tmp_a, mult_interleave4x4_height, gemm_info.reinterpret_input_as_3d(), unroll_block); + _mtx_a_reshape_kernel.configure(a, &_tmp_a, lhs_info, gemm_info.reinterpret_input_as_3d()); // Configure transpose kernel _mtx_b_reshape_kernel.configure(b, &_tmp_b, rhs_info); @@ -242,8 +249,10 @@ Status CLGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso TensorInfo tmp_a_info{}; TensorInfo tmp_b_info{}; GEMMRHSMatrixInfo rhs_info; + GEMMLHSMatrixInfo lhs_info; bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); + const bool unroll_block = dot8_supported(CLKernelLibrary::get().get_device()); const int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); const int n = b->dimension(0); const int k = a->dimension(0); @@ -255,6 +264,11 @@ Status CLGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso rhs_info.h0 = mult_transpose1xW_width; rhs_info.interleave = false; rhs_info.transpose = false; + lhs_info.m0 = 4; + lhs_info.k0 = 4; + lhs_info.v0 = mult_interleave4x4_height; + lhs_info.interleave = true; + lhs_info.transpose = unroll_block; bool reshape_matrices = is_interleaved_transposed(m, n, k, gemm_info.reshape_b_only_on_first_run(), CLScheduler::get().target()); @@ -272,8 +286,8 @@ Status CLGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso matrix_b_info = &tmp_b_info; // Validate interleave kernel - auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_interleaved_shape(*a, mult_interleave4x4_height, gemm_info.reinterpret_input_as_3d()))); - ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMInterleave4x4Kernel::validate(a, &tmp_a_info, mult_interleave4x4_height, gemm_info.reinterpret_input_as_3d())); + auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, gemm_info.reinterpret_input_as_3d()))); + ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMReshapeLHSMatrixKernel::validate(a, &tmp_a_info, lhs_info, gemm_info.reinterpret_input_as_3d())); // Validate transpose kernel @@ -408,3 +422,4 @@ void CLGEMMLowpMatrixMultiplyCore::prepare() _is_prepared = true; } } +} // namespace arm_compute -- cgit v1.2.1