From db9116ff15170ff734aad0300b46c48abc2a3b7b Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Thu, 11 Jul 2019 16:50:37 +0100 Subject: COMPMID-2443: CL example use program cache by default. Change-Id: I9db5cf4ce98e86f7488f4041f0d0247d3d0cd663 Signed-off-by: Pablo Tello Reviewed-on: https://review.mlplatform.org/c/1528 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Michele Di Giorgio Reviewed-by: VidhyaSudhan Loganathan --- utils/CommonGraphOptions.cpp | 4 +++ utils/CommonGraphOptions.h | 3 ++ utils/Utils.cpp | 78 +++++++++++++++++++++++++++++++++++++++++++- utils/Utils.h | 12 +++++++ 4 files changed, 96 insertions(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/CommonGraphOptions.cpp b/utils/CommonGraphOptions.cpp index e2ca98a7dd..280ad4ee89 100644 --- a/utils/CommonGraphOptions.cpp +++ b/utils/CommonGraphOptions.cpp @@ -83,6 +83,7 @@ namespace utils os << "Data type : " << common_params.data_type << std::endl; os << "Data layout : " << common_params.data_layout << std::endl; os << "Tuner enabled? : " << (common_params.enable_tuner ? true_str : false_str) << std::endl; + os << "Cache enabled? : " << (common_params.enable_cl_cache ? true_str : false_str) << std::endl; os << "Tuner mode : " << common_params.tuner_mode << std::endl; os << "Tuner file : " << common_params.tuner_file << std::endl; os << "Fast math enabled? : " << (common_params.fast_math_hint == FastMathHint::Enabled ? true_str : false_str) << std::endl; @@ -118,6 +119,7 @@ CommonGraphOptions::CommonGraphOptions(CommandLineParser &parser) data_type(), data_layout(), enable_tuner(parser.add_option("enable-tuner")), + enable_cl_cache(parser.add_option("enable-cl-cache")), tuner_mode(), fast_math_hint(parser.add_option("fast-math")), data_path(parser.add_option>("data")), @@ -166,6 +168,7 @@ CommonGraphOptions::CommonGraphOptions(CommandLineParser &parser) data_type->set_help("Data type to use"); data_layout->set_help("Data layout to use"); enable_tuner->set_help("Enable OpenCL dynamic tuner"); + enable_cl_cache->set_help("Enable OpenCL program caches"); tuner_mode->set_help("Configures the time taken by the tuner to tune. Slow tuner produces the most performant LWS configuration"); fast_math_hint->set_help("Enable fast math"); data_path->set_help("Path where graph parameters reside"); @@ -192,6 +195,7 @@ CommonGraphParams consume_common_graph_parameters(CommonGraphOptions &options) common_params.data_layout = options.data_layout->value(); } common_params.enable_tuner = options.enable_tuner->is_set() ? options.enable_tuner->value() : false; + common_params.enable_cl_cache = common_params.target == arm_compute::graph::Target::CL ? (options.enable_cl_cache->is_set() ? options.enable_cl_cache->value() : true) : false; common_params.tuner_mode = options.tuner_mode->value(); common_params.fast_math_hint = options.fast_math_hint->is_set() ? fast_math_hint_value : FastMathHint::Disabled; common_params.data_path = options.data_path->value(); diff --git a/utils/CommonGraphOptions.h b/utils/CommonGraphOptions.h index 826cca1be9..3666462643 100644 --- a/utils/CommonGraphOptions.h +++ b/utils/CommonGraphOptions.h @@ -44,6 +44,7 @@ namespace utils * --type : Data type to be used by the examples. Supported data type options: QASYMM8, F16, F32. * --layout : Data layout to be used by the examples. Supported data layout options : NCHW, NHWC. * --enable-tuner : Toggle option to enable the OpenCL dynamic tuner. + * --enable-cl-cache : Toggle option to load the prebuilt opencl kernels from a cache file. * --fast-math : Toggle option to enable the fast math option. * --data : Path that contains the trainable parameter files of graph layers. * --image : Image to load and operate on. Image types supported: PPM, JPEG, NPY. @@ -94,6 +95,7 @@ struct CommonGraphParams arm_compute::DataType data_type{ DataType::F32 }; arm_compute::DataLayout data_layout{ DataLayout::NHWC }; bool enable_tuner{ false }; + bool enable_cl_cache{ false }; arm_compute::CLTunerMode tuner_mode{ CLTunerMode::NORMAL }; arm_compute::graph::FastMathHint fast_math_hint{ arm_compute::graph::FastMathHint::Disabled }; std::string data_path{}; @@ -149,6 +151,7 @@ public: EnumOption *data_type; /**< Graph data type */ EnumOption *data_layout; /**< Graph data layout */ ToggleOption *enable_tuner; /**< Enable tuner */ + ToggleOption *enable_cl_cache; /**< Enable opencl kernels cache */ SimpleOption *tuner_mode; /**< Tuner mode */ ToggleOption *fast_math_hint; /**< Fast math hint */ SimpleOption *data_path; /**< Trainable parameters path */ diff --git a/utils/Utils.cpp b/utils/Utils.cpp index 1d086765b3..47ec25963b 100644 --- a/utils/Utils.cpp +++ b/utils/Utils.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -23,6 +23,8 @@ */ #include "Utils.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + #include #include #include @@ -267,5 +269,79 @@ uint64_t get_mem_free_from_meminfo() // Nothing found or an error during opening the file return 0; } + +/** This function loads prebuilt opencl kernels from a file + * + * @param[in] filename Name of the file to be used to load the kernels + */ +void restore_program_cache_from_file(const std::string &filename) +{ + std::ifstream cache_file(filename, std::ios::binary); + if(cache_file.is_open()) + { + if(!CLScheduler::get().is_initialised()) + { + arm_compute::CLScheduler::get().default_init(); + } + + while(!cache_file.eof()) + { + size_t name_len = 0; + size_t binary_len = 0; + cache_file.read(reinterpret_cast(&name_len), sizeof(size_t)); + cache_file.read(reinterpret_cast(&binary_len), sizeof(size_t)); + if(name_len == 0 || binary_len == 0) + { + break; + } + std::vector tmp(name_len); + std::vector binary(binary_len); + std::string name; + cache_file.read(tmp.data(), name_len); + name.assign(tmp.data(), name_len); + tmp.resize(binary_len); + cache_file.read(reinterpret_cast(binary.data()), binary_len); + cl::Context context = arm_compute::CLScheduler::get().context(); + cl::Program::Binaries binaries{ binary }; + std::vector devices = context.getInfo(); + cl::Program program(context, devices, binaries); + program.build(); + CLKernelLibrary::get().add_built_program(name, program); + } + cache_file.close(); + } +} + +/** This function saves opencl kernels library to a file + * + * @param[in] filename Name of the file to be used to save the library + */ +void save_program_cache_to_file(const std::string &filename) +{ + if(CLScheduler::get().is_initialised()) + { + std::ofstream cache_file(filename, std::ios::binary); + if(cache_file.is_open()) + { + for(const auto &it : CLKernelLibrary::get().get_built_programs()) + { + std::vector> binaries = it.second.getInfo(); + ARM_COMPUTE_ERROR_ON(binaries.size() != 1); + const std::string kernel_name = it.first; + size_t kernel_name_size = kernel_name.length(); + size_t binary_size = binaries[0].size(); + cache_file.write(reinterpret_cast(&kernel_name_size), sizeof(size_t)); + cache_file.write(reinterpret_cast(&binary_size), sizeof(size_t)); + cache_file.write(kernel_name.c_str(), kernel_name_size); + cache_file.write(reinterpret_cast(binaries[0].data()), binaries[0].size()); + } + cache_file.close(); + } + else + { + ARM_COMPUTE_ERROR("Cannot open cache file"); + } + } +} } // namespace utils } // namespace arm_compute diff --git a/utils/Utils.h b/utils/Utils.h index eec6972470..ba10d7c803 100644 --- a/utils/Utils.h +++ b/utils/Utils.h @@ -823,6 +823,18 @@ int compare_tensor(ITensor &tensor1, ITensor &tensor2, T tolerance) return num_mismatches; } + +/** This function saves opencl kernels library to a file + * + * @param[in] filename Name of the file to be used to save the library + */ +void save_program_cache_to_file(const std::string &filename = "cache.bin"); + +/** This function loads prebuilt opencl kernels from a file + * + * @param[in] filename Name of the file to be used to load the kernels + */ +void restore_program_cache_from_file(const std::string &filename = "cache.bin"); } // namespace utils } // namespace arm_compute #endif /* __UTILS_UTILS_H__*/ -- cgit v1.2.1