aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiCong Li <sicong.li@arm.com>2021-02-05 09:30:57 +0000
committerSiCong Li <sicong.li@arm.com>2021-02-09 11:59:33 +0000
commitdb4a6c15e55aaffbe555c33f3e10795d822701e7 (patch)
tree8c2aeddf0e71a92263735a26c44d0a0377f1381c
parentf5aad5166e430819ce8592e0e322d084c4b69d91 (diff)
downloadComputeLibrary-db4a6c15e55aaffbe555c33f3e10795d822701e7.tar.gz
Integrate MLGO into CLGEMM and CLGEMMLowpMatrixMultiplyCore: Part2
* Associate CLScheduler with CLGEMMHeuristicsHandle * Add option in arm_compute_validation for mlgo file path * Extend logging for the selection of gemm configurations Resolves: COMPMID-3843, COMPMID-3844 Signed-off-by: SiCong Li <sicong.li@arm.com> Change-Id: I869c3a4122414ae6a7bbd721966c1da37621ca11 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5002 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/runtime/CL/CLScheduler.h19
-rw-r--r--arm_compute/runtime/CL/CLTypes.h3
-rw-r--r--src/runtime/CL/CLScheduler.cpp28
-rw-r--r--src/runtime/CL/Utils.cpp4
-rw-r--r--tests/main.cpp12
-rw-r--r--utils/TypePrinter.h42
6 files changed, 86 insertions, 22 deletions
diff --git a/arm_compute/runtime/CL/CLScheduler.h b/arm_compute/runtime/CL/CLScheduler.h
index 8a22832792..d3a91da751 100644
--- a/arm_compute/runtime/CL/CLScheduler.h
+++ b/arm_compute/runtime/CL/CLScheduler.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,7 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/experimental/Types.h"
+#include "arm_compute/runtime/CL/CLGEMMHeuristicsHandle.h"
#include "arm_compute/runtime/CL/ICLTuner.h"
namespace arm_compute
@@ -57,15 +58,17 @@ public:
* and sets a default device and kernel path for the @ref CLKernelLibrary.
*
* @param[in] cl_tuner (Optional) Pointer to ICLTuner (default=nullptr)
+ * @param[in] gemm_h (Optional) Pointer to CLGEMMHeuristicsHandle (default = nullptr)
*/
- void default_init(ICLTuner *cl_tuner = nullptr);
+ void default_init(ICLTuner *cl_tuner = nullptr, CLGEMMHeuristicsHandle *gemm_h = nullptr);
/** Initialises the scheduler with context and device provided by the user
*
* @param[in] device OpenCL device to be used
* @param[in] ctx OpenCL ctx to be used
* @param[in] cl_tuner (Optional) Pointer to ICLTuner (default=nullptr)
+ * @param[in] gemm_h (Optional) Pointer to CLGEMMHeuristicsHandle (default = nullptr)
*/
- void default_init_with_context(cl::Device &device, cl::Context &ctx, ICLTuner *cl_tuner = nullptr);
+ void default_init_with_context(cl::Device &device, cl::Context &ctx, ICLTuner *cl_tuner = nullptr, CLGEMMHeuristicsHandle *gemm_h = nullptr);
/** Schedule the execution of the passed kernel if possible.
*
@@ -88,8 +91,9 @@ public:
* @param[in] device A CL device.
* @param[in] cl_tuner (Optional) Pointer to OpenCL tuner (default=nullptr)
* Note: It is caller's responsibility to release the allocated memory for CLTuner
+ * @param[in] gemm_h (Optional) Pointer to CLGEMMHeuristicsHandle (default = nullptr)
*/
- void init(cl::Context context, cl::CommandQueue queue, const cl::Device &device, ICLTuner *cl_tuner = nullptr);
+ void init(cl::Context context, cl::CommandQueue queue, const cl::Device &device, ICLTuner *cl_tuner = nullptr, CLGEMMHeuristicsHandle *gemm_h = nullptr);
/** Accessor for the associated CL context.
*
@@ -109,6 +113,12 @@ public:
*/
GPUTarget target() const;
+ /** Accessor for the associated CLGEMMHeuristicsHandle
+ *
+ * @return Pointer to CLGEMMHeuristicsHandle
+ */
+ CLGEMMHeuristicsHandle *gemm_heuristics() const;
+
/** Accessor to set the CL context to be used by the scheduler.
*
* @param[in] context A CL context.
@@ -161,6 +171,7 @@ private:
bool _is_initialised;
ICLTuner *_cl_tuner;
std::unique_ptr<ICLTuner> _cl_default_static_tuner;
+ CLGEMMHeuristicsHandle *_gemm_heuristics;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLSCHEDULER_H */
diff --git a/arm_compute/runtime/CL/CLTypes.h b/arm_compute/runtime/CL/CLTypes.h
index 19095a5589..ab973f973c 100644
--- a/arm_compute/runtime/CL/CLTypes.h
+++ b/arm_compute/runtime/CL/CLTypes.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,6 +23,7 @@
*/
#ifndef ARM_COMPUTE_RUNTIME_CLTYPES_H
#define ARM_COMPUTE_RUNTIME_CLTYPES_H
+#include "arm_compute/core/Types.h"
namespace arm_compute
{
diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp
index 6fc7baed63..ef5cb03b32 100644
--- a/src/runtime/CL/CLScheduler.cpp
+++ b/src/runtime/CL/CLScheduler.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -49,6 +49,11 @@ GPUTarget CLScheduler::target() const
return _target;
}
+CLGEMMHeuristicsHandle *CLScheduler::gemm_heuristics() const
+{
+ return _gemm_heuristics;
+}
+
void CLScheduler::set_queue(cl::CommandQueue queue)
{
_queue = std::move(queue);
@@ -92,7 +97,7 @@ bool CLScheduler::is_initialised() const
std::once_flag CLScheduler::_initialize_symbols;
CLScheduler::CLScheduler()
- : _context(), _queue(), _target(GPUTarget::MIDGARD), _is_initialised(false), _cl_tuner(nullptr), _cl_default_static_tuner(nullptr)
+ : _context(), _queue(), _target(GPUTarget::MIDGARD), _is_initialised(false), _cl_tuner(nullptr), _cl_default_static_tuner(nullptr), _gemm_heuristics(nullptr)
{
}
@@ -103,20 +108,20 @@ CLScheduler &CLScheduler::get()
return scheduler;
}
-void CLScheduler::default_init_with_context(cl::Device &device, cl::Context &ctx, ICLTuner *cl_tuner)
+void CLScheduler::default_init_with_context(cl::Device &device, cl::Context &ctx, ICLTuner *cl_tuner, CLGEMMHeuristicsHandle *gemm_h)
{
if(!_is_initialised)
{
const std::string cl_kernels_folder("./cl_kernels/");
cl::CommandQueue queue = cl::CommandQueue(ctx, device);
CLKernelLibrary::get().init(cl_kernels_folder, ctx, device);
- init(ctx, queue, device, cl_tuner);
+ init(ctx, queue, device, cl_tuner, gemm_h);
_cl_default_static_tuner = tuners::TunerFactory::create_tuner(_target);
_cl_tuner = (cl_tuner == nullptr) ? _cl_default_static_tuner.get() : cl_tuner;
}
}
-void CLScheduler::default_init(ICLTuner *cl_tuner)
+void CLScheduler::default_init(ICLTuner *cl_tuner, CLGEMMHeuristicsHandle *gemm_h)
{
if(!_is_initialised)
{
@@ -127,7 +132,7 @@ void CLScheduler::default_init(ICLTuner *cl_tuner)
ARM_COMPUTE_ERROR_ON_MSG(err != CL_SUCCESS, "Failed to create OpenCL context");
cl::CommandQueue queue = cl::CommandQueue(ctx, dev);
CLKernelLibrary::get().init("./cl_kernels/", ctx, dev);
- init(ctx, queue, dev, cl_tuner);
+ init(ctx, queue, dev, cl_tuner, gemm_h);
// Create a default static tuner and set if none was provided
_cl_default_static_tuner = tuners::TunerFactory::create_tuner(_target);
}
@@ -142,13 +147,14 @@ void CLScheduler::set_context(cl::Context context)
CLKernelLibrary::get().set_context(_context);
}
-void CLScheduler::init(cl::Context context, cl::CommandQueue queue, const cl::Device &device, ICLTuner *cl_tuner)
+void CLScheduler::init(cl::Context context, cl::CommandQueue queue, const cl::Device &device, ICLTuner *cl_tuner, CLGEMMHeuristicsHandle *gemm_h)
{
set_context(std::move(context));
- _queue = std::move(queue);
- _target = get_target_from_device(device);
- _is_initialised = true;
- _cl_tuner = cl_tuner;
+ _queue = std::move(queue);
+ _target = get_target_from_device(device);
+ _is_initialised = true;
+ _cl_tuner = cl_tuner;
+ _gemm_heuristics = gemm_h;
}
void CLScheduler::enqueue_common(ICLKernel &kernel, ITensorPack &tensors, bool flush)
diff --git a/src/runtime/CL/Utils.cpp b/src/runtime/CL/Utils.cpp
index e04ce84e49..da3d4850bf 100644
--- a/src/runtime/CL/Utils.cpp
+++ b/src/runtime/CL/Utils.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,6 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#include "arm_compute/runtime/CL/Utils.h"
+
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
diff --git a/tests/main.cpp b/tests/main.cpp
index 46a081b6c8..ff641c0762 100644
--- a/tests/main.cpp
+++ b/tests/main.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -38,6 +38,7 @@
#ifdef ARM_COMPUTE_CL
#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/runtime/CL/CLGEMMHeuristicsHandle.h"
#include "arm_compute/runtime/CL/CLHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTuner.h"
@@ -143,6 +144,9 @@ int main(int argc, char **argv)
auto tuner_file = parser.add_option<utils::SimpleOption<std::string>>("tuner-file", "");
tuner_file->set_help("File to load/save CLTuner values");
+
+ auto mlgo_file = parser.add_option<utils::SimpleOption<std::string>>("mlgo-file", "");
+ mlgo_file->set_help("File to load MLGO heuristics");
#endif /* ARM_COMPUTE_CL */
auto threads = parser.add_option<utils::SimpleOption<int>>("threads", 1);
threads->set_help("Number of threads to use");
@@ -190,12 +194,14 @@ int main(int argc, char **argv)
#endif /* ARM_COMPUTE_GC */
#ifdef ARM_COMPUTE_CL
- CLTuner cl_tuner(false);
+ CLTuner cl_tuner(false);
+ CLGEMMHeuristicsHandle gemm_heuristics;
if(opencl_is_available())
{
auto ctx_dev_err = create_opencl_context_and_device();
ARM_COMPUTE_ERROR_ON_MSG(std::get<2>(ctx_dev_err) != CL_SUCCESS, "Failed to create OpenCL context");
- CLScheduler::get().default_init_with_context(std::get<1>(ctx_dev_err), std::get<0>(ctx_dev_err), &cl_tuner);
+ gemm_heuristics.reload_from_file(mlgo_file->value());
+ CLScheduler::get().default_init_with_context(std::get<1>(ctx_dev_err), std::get<0>(ctx_dev_err), &cl_tuner, &gemm_heuristics);
}
if(enable_tuner->is_set())
diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h
index 3996d0ddce..bf5b64a1eb 100644
--- a/utils/TypePrinter.h
+++ b/utils/TypePrinter.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -35,6 +35,7 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTunerTypes.h"
+#include "arm_compute/runtime/CL/CLTypes.h"
#include "support/StringSupport.h"
#include <ostream>
@@ -282,7 +283,8 @@ inline ::std::ostream &operator<<(::std::ostream &os, const GEMMLHSMatrixInfo &g
*/
inline ::std::ostream &operator<<(::std::ostream &os, const GEMMRHSMatrixInfo &gemm_info)
{
- os << "( n0= " << (unsigned int)gemm_info.n0 << " k0= " << gemm_info.k0 << " h0= " << gemm_info.h0 << " trans= " << gemm_info.transpose << " inter= " << gemm_info.interleave << "})";
+ os << "( n0= " << (unsigned int)gemm_info.n0 << " k0= " << gemm_info.k0 << " h0= " << gemm_info.h0 << " trans= " << gemm_info.transpose << " inter= " << gemm_info.interleave << " exp_img=" <<
+ gemm_info.export_to_cl_image << "})";
return os;
}
@@ -2441,6 +2443,42 @@ inline std::string to_string(const CLTunerMode val)
}
}
}
+/** Converts a @ref CLGEMMKernelType to string
+ *
+ * @param[in] val CLGEMMKernelType value to be converted
+ *
+ * @return String representing the corresponding CLGEMMKernelType
+ */
+inline std::string to_string(CLGEMMKernelType val)
+{
+ switch(val)
+ {
+ case CLGEMMKernelType::NATIVE_V1:
+ {
+ return "Native_V1";
+ }
+ case CLGEMMKernelType::RESHAPED_V1:
+ {
+ return "Reshaped_V1";
+ }
+ case CLGEMMKernelType::NATIVE:
+ {
+ return "Native";
+ }
+ case CLGEMMKernelType::RESHAPED_ONLY_RHS:
+ {
+ return "Reshaped_Only_RHS";
+ }
+ case CLGEMMKernelType::RESHAPED:
+ {
+ return "Reshaped";
+ }
+ default:
+ {
+ return "Unknown";
+ }
+ }
+}
/** [Print CLTunerMode type] **/
/** Formatted output of the CLTunerMode type.
*