aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2021-04-22 21:13:21 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-05-18 14:48:39 +0000
commit856f66e6c61b77d03f754cd0fa8439891f0e4aca (patch)
treef9379cd0853ac407109e54c3d53b385ceee066c2 /src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
parent37f4b2ef1ea225a90ccb563fcb2c08f8fb0fb5d5 (diff)
downloadComputeLibrary-856f66e6c61b77d03f754cd0fa8439891f0e4aca.tar.gz
Port CLGEMM to memory injecting interface
Moves the following kernels: - CLGEMMMatrixMultiplyKernel - CLGEMMMatrixMultiplyNativeKernel - CLGEMMMatrixMultipluReshapedKernel - CLGEMMMatrixMultiplyReshapedOnlyRHSKernel Moves the following functions - CLGEMM Introduces facilities to easy handling of auxiliary temporary buffers under then new run interface. Such are: - CLAuxTensorHandler: That allows wrapping of workspace buffers memory to CLBuffer objects - Ability to inject TensorInfo to allocator without transferring ownership. This reduce the copy overhead if needed. Resolves: COMPMID-4188 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I7055435d831b05b749b26302082e4ac45f26dfb0 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5498 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp')
-rw-r--r--src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp18
1 files changed, 12 insertions, 6 deletions
diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
index 5a9ff7990f..099a2c980f 100644
--- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
@@ -40,7 +40,7 @@
#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
-#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.h"
#include "utils/TypePrinter.h"
@@ -127,7 +127,7 @@ inline bool validate_lhs_rhs_info_reshaped_only_rhs(const GEMMLHSMatrixInfo &lhs
TensorInfo tmp_b_info{};
// Validate reshape RHS kernel
auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));
- if(!bool(CLGEMMReshapeRHSMatrixKernel::validate(b, &tmp_b_info, rhs_info)))
+ if(!bool(opencl::kernels::ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info)))
{
return false;
}
@@ -192,7 +192,7 @@ CLGEMMLowpMatrixMultiplyCore::CLGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemo
_weights_to_qasymm8(std::make_unique<CLDepthConvertLayerKernel>()),
_mm_native_kernel(std::make_unique<CLGEMMLowpMatrixMultiplyNativeKernel>()),
_mm_reshaped_only_rhs_kernel(std::make_unique<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel>()),
- _mtx_b_reshape_kernel(std::make_unique<CLGEMMReshapeRHSMatrixKernel>()),
+ _mtx_b_reshape_kernel(std::make_unique<opencl::kernels::ClGemmReshapeRhsMatrixKernel>()),
_mtx_a_reduction_kernel(std::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
_mtx_b_reduction_kernel(std::make_unique<CLGEMMLowpMatrixBReductionKernel>()),
_offset_contribution_kernel(std::make_unique<CLGEMMLowpOffsetContributionKernel>()),
@@ -292,7 +292,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con
a->info(), _convert_to_qasymm8 ? _qasymm8_weights.info() : b->info(), output->info());
// Configure reshape RHS kernel
- _mtx_b_reshape_kernel->configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_tmp_b, rhs_info);
+ _mtx_b_reshape_kernel->configure(compile_context, _convert_to_qasymm8 ? _qasymm8_weights.info() : b->info(), _tmp_b.info(), rhs_info);
}
// Using default reduction info
@@ -496,7 +496,7 @@ Status CLGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso
// Validate reshape RHS kernel
auto_init_if_empty(tmp_b_info, weights_info.clone()->set_tensor_shape(compute_rhs_reshaped_shape(weights_info, rhs_info)));
- ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMReshapeRHSMatrixKernel::validate(&weights_info, &tmp_b_info, rhs_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(opencl::kernels::ClGemmReshapeRhsMatrixKernel::validate(&weights_info, &tmp_b_info, rhs_info));
}
TensorInfo info_vector_sum_col{};
@@ -634,6 +634,9 @@ void CLGEMMLowpMatrixMultiplyCore::run()
if(!_reshape_b_only_on_first_run)
{
// Run reshape matrix B
+ ITensorPack mtx_b_pack;
+ mtx_b_pack.add_const_tensor(TensorType::ACL_SRC, _convert_to_qasymm8 ? &_qasymm8_weights : _original_b);
+ mtx_b_pack.add_tensor(TensorType::ACL_DST, &_tmp_b);
CLScheduler::get().enqueue(*_mtx_b_reshape_kernel, false);
}
}
@@ -687,7 +690,10 @@ void CLGEMMLowpMatrixMultiplyCore::prepare()
// Run reshape kernel and mark original weights tensor as unused
_tmp_b.allocator()->allocate();
- CLScheduler::get().enqueue(*_mtx_b_reshape_kernel, false);
+ ITensorPack mtx_b_pack;
+ mtx_b_pack.add_const_tensor(TensorType::ACL_SRC, _convert_to_qasymm8 ? &_qasymm8_weights : _original_b);
+ mtx_b_pack.add_tensor(TensorType::ACL_DST, &_tmp_b);
+ CLScheduler::get().enqueue_op(*_mtx_b_reshape_kernel, mtx_b_pack, false);
_original_b->mark_as_unused();
}