From db4a6c15e55aaffbe555c33f3e10795d822701e7 Mon Sep 17 00:00:00 2001 From: SiCong Li Date: Fri, 5 Feb 2021 09:30:57 +0000 Subject: Integrate MLGO into CLGEMM and CLGEMMLowpMatrixMultiplyCore: Part2 * Associate CLScheduler with CLGEMMHeuristicsHandle * Add option in arm_compute_validation for mlgo file path * Extend logging for the selection of gemm configurations Resolves: COMPMID-3843, COMPMID-3844 Signed-off-by: SiCong Li Change-Id: I869c3a4122414ae6a7bbd721966c1da37621ca11 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5002 Tested-by: Arm Jenkins Reviewed-by: Manuel Bottini Reviewed-by: Gian Marco Iodice Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins --- arm_compute/runtime/CL/CLScheduler.h | 19 ++++++++++++---- arm_compute/runtime/CL/CLTypes.h | 3 ++- src/runtime/CL/CLScheduler.cpp | 28 ++++++++++++++---------- src/runtime/CL/Utils.cpp | 4 +++- tests/main.cpp | 12 ++++++++--- utils/TypePrinter.h | 42 ++++++++++++++++++++++++++++++++++-- 6 files changed, 86 insertions(+), 22 deletions(-) diff --git a/arm_compute/runtime/CL/CLScheduler.h b/arm_compute/runtime/CL/CLScheduler.h index 8a22832792..d3a91da751 100644 --- a/arm_compute/runtime/CL/CLScheduler.h +++ b/arm_compute/runtime/CL/CLScheduler.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,7 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/experimental/Types.h" +#include "arm_compute/runtime/CL/CLGEMMHeuristicsHandle.h" #include "arm_compute/runtime/CL/ICLTuner.h" namespace arm_compute @@ -57,15 +58,17 @@ public: * and sets a default device and kernel path for the @ref CLKernelLibrary. * * @param[in] cl_tuner (Optional) Pointer to ICLTuner (default=nullptr) + * @param[in] gemm_h (Optional) Pointer to CLGEMMHeuristicsHandle (default = nullptr) */ - void default_init(ICLTuner *cl_tuner = nullptr); + void default_init(ICLTuner *cl_tuner = nullptr, CLGEMMHeuristicsHandle *gemm_h = nullptr); /** Initialises the scheduler with context and device provided by the user * * @param[in] device OpenCL device to be used * @param[in] ctx OpenCL ctx to be used * @param[in] cl_tuner (Optional) Pointer to ICLTuner (default=nullptr) + * @param[in] gemm_h (Optional) Pointer to CLGEMMHeuristicsHandle (default = nullptr) */ - void default_init_with_context(cl::Device &device, cl::Context &ctx, ICLTuner *cl_tuner = nullptr); + void default_init_with_context(cl::Device &device, cl::Context &ctx, ICLTuner *cl_tuner = nullptr, CLGEMMHeuristicsHandle *gemm_h = nullptr); /** Schedule the execution of the passed kernel if possible. * @@ -88,8 +91,9 @@ public: * @param[in] device A CL device. * @param[in] cl_tuner (Optional) Pointer to OpenCL tuner (default=nullptr) * Note: It is caller's responsibility to release the allocated memory for CLTuner + * @param[in] gemm_h (Optional) Pointer to CLGEMMHeuristicsHandle (default = nullptr) */ - void init(cl::Context context, cl::CommandQueue queue, const cl::Device &device, ICLTuner *cl_tuner = nullptr); + void init(cl::Context context, cl::CommandQueue queue, const cl::Device &device, ICLTuner *cl_tuner = nullptr, CLGEMMHeuristicsHandle *gemm_h = nullptr); /** Accessor for the associated CL context. * @@ -109,6 +113,12 @@ public: */ GPUTarget target() const; + /** Accessor for the associated CLGEMMHeuristicsHandle + * + * @return Pointer to CLGEMMHeuristicsHandle + */ + CLGEMMHeuristicsHandle *gemm_heuristics() const; + /** Accessor to set the CL context to be used by the scheduler. * * @param[in] context A CL context. @@ -161,6 +171,7 @@ private: bool _is_initialised; ICLTuner *_cl_tuner; std::unique_ptr _cl_default_static_tuner; + CLGEMMHeuristicsHandle *_gemm_heuristics; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLSCHEDULER_H */ diff --git a/arm_compute/runtime/CL/CLTypes.h b/arm_compute/runtime/CL/CLTypes.h index 19095a5589..ab973f973c 100644 --- a/arm_compute/runtime/CL/CLTypes.h +++ b/arm_compute/runtime/CL/CLTypes.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,6 +23,7 @@ */ #ifndef ARM_COMPUTE_RUNTIME_CLTYPES_H #define ARM_COMPUTE_RUNTIME_CLTYPES_H +#include "arm_compute/core/Types.h" namespace arm_compute { diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp index 6fc7baed63..ef5cb03b32 100644 --- a/src/runtime/CL/CLScheduler.cpp +++ b/src/runtime/CL/CLScheduler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -49,6 +49,11 @@ GPUTarget CLScheduler::target() const return _target; } +CLGEMMHeuristicsHandle *CLScheduler::gemm_heuristics() const +{ + return _gemm_heuristics; +} + void CLScheduler::set_queue(cl::CommandQueue queue) { _queue = std::move(queue); @@ -92,7 +97,7 @@ bool CLScheduler::is_initialised() const std::once_flag CLScheduler::_initialize_symbols; CLScheduler::CLScheduler() - : _context(), _queue(), _target(GPUTarget::MIDGARD), _is_initialised(false), _cl_tuner(nullptr), _cl_default_static_tuner(nullptr) + : _context(), _queue(), _target(GPUTarget::MIDGARD), _is_initialised(false), _cl_tuner(nullptr), _cl_default_static_tuner(nullptr), _gemm_heuristics(nullptr) { } @@ -103,20 +108,20 @@ CLScheduler &CLScheduler::get() return scheduler; } -void CLScheduler::default_init_with_context(cl::Device &device, cl::Context &ctx, ICLTuner *cl_tuner) +void CLScheduler::default_init_with_context(cl::Device &device, cl::Context &ctx, ICLTuner *cl_tuner, CLGEMMHeuristicsHandle *gemm_h) { if(!_is_initialised) { const std::string cl_kernels_folder("./cl_kernels/"); cl::CommandQueue queue = cl::CommandQueue(ctx, device); CLKernelLibrary::get().init(cl_kernels_folder, ctx, device); - init(ctx, queue, device, cl_tuner); + init(ctx, queue, device, cl_tuner, gemm_h); _cl_default_static_tuner = tuners::TunerFactory::create_tuner(_target); _cl_tuner = (cl_tuner == nullptr) ? _cl_default_static_tuner.get() : cl_tuner; } } -void CLScheduler::default_init(ICLTuner *cl_tuner) +void CLScheduler::default_init(ICLTuner *cl_tuner, CLGEMMHeuristicsHandle *gemm_h) { if(!_is_initialised) { @@ -127,7 +132,7 @@ void CLScheduler::default_init(ICLTuner *cl_tuner) ARM_COMPUTE_ERROR_ON_MSG(err != CL_SUCCESS, "Failed to create OpenCL context"); cl::CommandQueue queue = cl::CommandQueue(ctx, dev); CLKernelLibrary::get().init("./cl_kernels/", ctx, dev); - init(ctx, queue, dev, cl_tuner); + init(ctx, queue, dev, cl_tuner, gemm_h); // Create a default static tuner and set if none was provided _cl_default_static_tuner = tuners::TunerFactory::create_tuner(_target); } @@ -142,13 +147,14 @@ void CLScheduler::set_context(cl::Context context) CLKernelLibrary::get().set_context(_context); } -void CLScheduler::init(cl::Context context, cl::CommandQueue queue, const cl::Device &device, ICLTuner *cl_tuner) +void CLScheduler::init(cl::Context context, cl::CommandQueue queue, const cl::Device &device, ICLTuner *cl_tuner, CLGEMMHeuristicsHandle *gemm_h) { set_context(std::move(context)); - _queue = std::move(queue); - _target = get_target_from_device(device); - _is_initialised = true; - _cl_tuner = cl_tuner; + _queue = std::move(queue); + _target = get_target_from_device(device); + _is_initialised = true; + _cl_tuner = cl_tuner; + _gemm_heuristics = gemm_h; } void CLScheduler::enqueue_common(ICLKernel &kernel, ITensorPack &tensors, bool flush) diff --git a/src/runtime/CL/Utils.cpp b/src/runtime/CL/Utils.cpp index e04ce84e49..da3d4850bf 100644 --- a/src/runtime/CL/Utils.cpp +++ b/src/runtime/CL/Utils.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,6 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include "arm_compute/runtime/CL/Utils.h" + #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/runtime/CL/CLScheduler.h" diff --git a/tests/main.cpp b/tests/main.cpp index 46a081b6c8..ff641c0762 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -38,6 +38,7 @@ #ifdef ARM_COMPUTE_CL #include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/runtime/CL/CLGEMMHeuristicsHandle.h" #include "arm_compute/runtime/CL/CLHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTuner.h" @@ -143,6 +144,9 @@ int main(int argc, char **argv) auto tuner_file = parser.add_option>("tuner-file", ""); tuner_file->set_help("File to load/save CLTuner values"); + + auto mlgo_file = parser.add_option>("mlgo-file", ""); + mlgo_file->set_help("File to load MLGO heuristics"); #endif /* ARM_COMPUTE_CL */ auto threads = parser.add_option>("threads", 1); threads->set_help("Number of threads to use"); @@ -190,12 +194,14 @@ int main(int argc, char **argv) #endif /* ARM_COMPUTE_GC */ #ifdef ARM_COMPUTE_CL - CLTuner cl_tuner(false); + CLTuner cl_tuner(false); + CLGEMMHeuristicsHandle gemm_heuristics; if(opencl_is_available()) { auto ctx_dev_err = create_opencl_context_and_device(); ARM_COMPUTE_ERROR_ON_MSG(std::get<2>(ctx_dev_err) != CL_SUCCESS, "Failed to create OpenCL context"); - CLScheduler::get().default_init_with_context(std::get<1>(ctx_dev_err), std::get<0>(ctx_dev_err), &cl_tuner); + gemm_heuristics.reload_from_file(mlgo_file->value()); + CLScheduler::get().default_init_with_context(std::get<1>(ctx_dev_err), std::get<0>(ctx_dev_err), &cl_tuner, &gemm_heuristics); } if(enable_tuner->is_set()) diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h index 3996d0ddce..bf5b64a1eb 100644 --- a/utils/TypePrinter.h +++ b/utils/TypePrinter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -35,6 +35,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTunerTypes.h" +#include "arm_compute/runtime/CL/CLTypes.h" #include "support/StringSupport.h" #include @@ -282,7 +283,8 @@ inline ::std::ostream &operator<<(::std::ostream &os, const GEMMLHSMatrixInfo &g */ inline ::std::ostream &operator<<(::std::ostream &os, const GEMMRHSMatrixInfo &gemm_info) { - os << "( n0= " << (unsigned int)gemm_info.n0 << " k0= " << gemm_info.k0 << " h0= " << gemm_info.h0 << " trans= " << gemm_info.transpose << " inter= " << gemm_info.interleave << "})"; + os << "( n0= " << (unsigned int)gemm_info.n0 << " k0= " << gemm_info.k0 << " h0= " << gemm_info.h0 << " trans= " << gemm_info.transpose << " inter= " << gemm_info.interleave << " exp_img=" << + gemm_info.export_to_cl_image << "})"; return os; } @@ -2441,6 +2443,42 @@ inline std::string to_string(const CLTunerMode val) } } } +/** Converts a @ref CLGEMMKernelType to string + * + * @param[in] val CLGEMMKernelType value to be converted + * + * @return String representing the corresponding CLGEMMKernelType + */ +inline std::string to_string(CLGEMMKernelType val) +{ + switch(val) + { + case CLGEMMKernelType::NATIVE_V1: + { + return "Native_V1"; + } + case CLGEMMKernelType::RESHAPED_V1: + { + return "Reshaped_V1"; + } + case CLGEMMKernelType::NATIVE: + { + return "Native"; + } + case CLGEMMKernelType::RESHAPED_ONLY_RHS: + { + return "Reshaped_Only_RHS"; + } + case CLGEMMKernelType::RESHAPED: + { + return "Reshaped"; + } + default: + { + return "Unknown"; + } + } +} /** [Print CLTunerMode type] **/ /** Formatted output of the CLTunerMode type. * -- cgit v1.2.1