aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPablo Tello <pablo.tello@arm.com>2019-07-11 16:50:37 +0100
committerPablo Marquez <pablo.tello@arm.com>2019-07-18 14:55:49 +0000
commitdb9116ff15170ff734aad0300b46c48abc2a3b7b (patch)
tree5846ab774aa5e6725e9144841b745e1ad2641583
parent06be6f8d2a316a307fa623150f8adf8f9c3416c5 (diff)
downloadComputeLibrary-db9116ff15170ff734aad0300b46c48abc2a3b7b.tar.gz
COMPMID-2443: CL example use program cache by default.
Change-Id: I9db5cf4ce98e86f7488f4041f0d0247d3d0cd663 Signed-off-by: Pablo Tello <pablo.tello@arm.com> Reviewed-on: https://review.mlplatform.org/c/1528 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-by: VidhyaSudhan Loganathan <vidhyasudhan.loganathan@arm.com>
-rw-r--r--examples/cl_cache.cpp63
-rw-r--r--examples/graph_alexnet.cpp23
-rw-r--r--examples/graph_inception_v4.cpp22
-rw-r--r--utils/CommonGraphOptions.cpp4
-rw-r--r--utils/CommonGraphOptions.h3
-rw-r--r--utils/Utils.cpp78
-rw-r--r--utils/Utils.h12
7 files changed, 141 insertions, 64 deletions
diff --git a/examples/cl_cache.cpp b/examples/cl_cache.cpp
index 87a3058956..998c4682ba 100644
--- a/examples/cl_cache.cpp
+++ b/examples/cl_cache.cpp
@@ -35,69 +35,6 @@ using namespace utils;
namespace
{
-/** This function loads prebuilt opencl kernels from a file
- *
- * @param[in] filename Name of the file to be used to load the kernels
- */
-void restore_program_cache_from_file(const std::string &filename = "cache.bin")
-{
- std::cout << "Loading kernels from file " << filename << std::endl;
- std::ifstream cache_file(filename, std::ios::binary);
- if(cache_file.is_open())
- {
- while(!cache_file.eof())
- {
- size_t name_len = 0;
- size_t binary_len = 0;
- cache_file.read(reinterpret_cast<char *>(&name_len), sizeof(size_t));
- cache_file.read(reinterpret_cast<char *>(&binary_len), sizeof(size_t));
- if(name_len == 0 || binary_len == 0)
- {
- break;
- }
- std::vector<char> tmp(name_len);
- std::vector<unsigned char> binary(binary_len);
- std::string name;
- cache_file.read(tmp.data(), name_len);
- name.assign(tmp.data(), name_len);
- tmp.resize(binary_len);
- cache_file.read(reinterpret_cast<char *>(binary.data()), binary_len);
- cl::Context context = arm_compute::CLScheduler::get().context();
- cl::Program::Binaries binaries{ binary };
- std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
- cl::Program program(context, devices, binaries);
- program.build();
- CLKernelLibrary::get().add_built_program(name, program);
- }
- cache_file.close();
- }
-}
-
-/** This function saves opencl kernels library to a file
- *
- * @param[in] filename Name of the file to be used to save the library
- */
-void save_program_cache_to_file(const std::string &filename = "cache.bin")
-{
- std::cout << "Saving opencl kernels to " << filename << std::endl;
- std::ofstream cache_file(filename, std::ios::binary);
- if(cache_file.is_open())
- {
- for(const auto &it : CLKernelLibrary::get().get_built_programs())
- {
- std::vector<std::vector<unsigned char>> binaries = it.second.getInfo<CL_PROGRAM_BINARIES>();
- ARM_COMPUTE_ERROR_ON(binaries.size() != 1);
- const std::string kernel_name = it.first;
- size_t kernel_name_size = kernel_name.length();
- size_t binary_size = binaries[0].size();
- cache_file.write(reinterpret_cast<char *>(&kernel_name_size), sizeof(size_t));
- cache_file.write(reinterpret_cast<char *>(&binary_size), sizeof(size_t));
- cache_file.write(kernel_name.c_str(), kernel_name_size);
- cache_file.write(reinterpret_cast<const char *>(binaries[0].data()), binaries[0].size());
- }
- cache_file.close();
- }
-}
} // namespace
class CLCacheExample : public Example
diff --git a/examples/graph_alexnet.cpp b/examples/graph_alexnet.cpp
index a785dea78d..f8b25a103a 100644
--- a/examples/graph_alexnet.cpp
+++ b/examples/graph_alexnet.cpp
@@ -27,6 +27,8 @@
#include "utils/GraphUtils.h"
#include "utils/Utils.h"
+#include <chrono>
+
using namespace arm_compute::utils;
using namespace arm_compute::graph::frontend;
using namespace arm_compute::graph_utils;
@@ -148,13 +150,34 @@ public:
// Finalize graph
GraphConfig config;
+
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
+ const auto config_start_time = std::chrono::high_resolution_clock::now();
+
+ // Load the precompiled kernels from a file into the kernel library, in this way the next time they are needed
+ // compilation won't be required.
+ if(common_params.enable_cl_cache)
+ {
+ restore_program_cache_from_file();
+ }
+
graph.finalize(common_params.target, config);
+ const auto config_end_time = std::chrono::high_resolution_clock::now();
+ const auto time_elapsed = config_end_time - config_start_time;
+ const auto time_elapsed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(time_elapsed).count();
+ std::cout << "Configuration time " << time_elapsed_ms << " ms " << std::endl;
+
+ // Save the opencl kernels to a file
+ if(common_opts.enable_cl_cache)
+ {
+ save_program_cache_to_file();
+ }
+
return true;
}
void do_run() override
diff --git a/examples/graph_inception_v4.cpp b/examples/graph_inception_v4.cpp
index 3ea2b2fd1c..15fd049fa3 100644
--- a/examples/graph_inception_v4.cpp
+++ b/examples/graph_inception_v4.cpp
@@ -27,6 +27,8 @@
#include "utils/GraphUtils.h"
#include "utils/Utils.h"
+#include <chrono>
+
using namespace arm_compute::utils;
using namespace arm_compute::graph::frontend;
using namespace arm_compute::graph_utils;
@@ -154,8 +156,28 @@ public:
config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
+ const auto config_start_time = std::chrono::high_resolution_clock::now();
+
+ // Load the precompiled kernels from a file into the kernel library, in this way the next time they are needed
+ // compilation won't be required.
+ if(common_params.enable_cl_cache)
+ {
+ restore_program_cache_from_file();
+ }
+
graph.finalize(common_params.target, config);
+ const auto config_end_time = std::chrono::high_resolution_clock::now();
+ const auto time_elapsed = config_end_time - config_start_time;
+ const auto time_elapsed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(time_elapsed).count();
+ std::cout << "Configuration time " << time_elapsed_ms << " ms " << std::endl;
+
+ // Save the opencl kernels to a file
+ if(common_opts.enable_cl_cache)
+ {
+ save_program_cache_to_file();
+ }
+
return true;
}
diff --git a/utils/CommonGraphOptions.cpp b/utils/CommonGraphOptions.cpp
index e2ca98a7dd..280ad4ee89 100644
--- a/utils/CommonGraphOptions.cpp
+++ b/utils/CommonGraphOptions.cpp
@@ -83,6 +83,7 @@ namespace utils
os << "Data type : " << common_params.data_type << std::endl;
os << "Data layout : " << common_params.data_layout << std::endl;
os << "Tuner enabled? : " << (common_params.enable_tuner ? true_str : false_str) << std::endl;
+ os << "Cache enabled? : " << (common_params.enable_cl_cache ? true_str : false_str) << std::endl;
os << "Tuner mode : " << common_params.tuner_mode << std::endl;
os << "Tuner file : " << common_params.tuner_file << std::endl;
os << "Fast math enabled? : " << (common_params.fast_math_hint == FastMathHint::Enabled ? true_str : false_str) << std::endl;
@@ -118,6 +119,7 @@ CommonGraphOptions::CommonGraphOptions(CommandLineParser &parser)
data_type(),
data_layout(),
enable_tuner(parser.add_option<ToggleOption>("enable-tuner")),
+ enable_cl_cache(parser.add_option<ToggleOption>("enable-cl-cache")),
tuner_mode(),
fast_math_hint(parser.add_option<ToggleOption>("fast-math")),
data_path(parser.add_option<SimpleOption<std::string>>("data")),
@@ -166,6 +168,7 @@ CommonGraphOptions::CommonGraphOptions(CommandLineParser &parser)
data_type->set_help("Data type to use");
data_layout->set_help("Data layout to use");
enable_tuner->set_help("Enable OpenCL dynamic tuner");
+ enable_cl_cache->set_help("Enable OpenCL program caches");
tuner_mode->set_help("Configures the time taken by the tuner to tune. Slow tuner produces the most performant LWS configuration");
fast_math_hint->set_help("Enable fast math");
data_path->set_help("Path where graph parameters reside");
@@ -192,6 +195,7 @@ CommonGraphParams consume_common_graph_parameters(CommonGraphOptions &options)
common_params.data_layout = options.data_layout->value();
}
common_params.enable_tuner = options.enable_tuner->is_set() ? options.enable_tuner->value() : false;
+ common_params.enable_cl_cache = common_params.target == arm_compute::graph::Target::CL ? (options.enable_cl_cache->is_set() ? options.enable_cl_cache->value() : true) : false;
common_params.tuner_mode = options.tuner_mode->value();
common_params.fast_math_hint = options.fast_math_hint->is_set() ? fast_math_hint_value : FastMathHint::Disabled;
common_params.data_path = options.data_path->value();
diff --git a/utils/CommonGraphOptions.h b/utils/CommonGraphOptions.h
index 826cca1be9..3666462643 100644
--- a/utils/CommonGraphOptions.h
+++ b/utils/CommonGraphOptions.h
@@ -44,6 +44,7 @@ namespace utils
* --type : Data type to be used by the examples. Supported data type options: QASYMM8, F16, F32.
* --layout : Data layout to be used by the examples. Supported data layout options : NCHW, NHWC.
* --enable-tuner : Toggle option to enable the OpenCL dynamic tuner.
+ * --enable-cl-cache : Toggle option to load the prebuilt opencl kernels from a cache file.
* --fast-math : Toggle option to enable the fast math option.
* --data : Path that contains the trainable parameter files of graph layers.
* --image : Image to load and operate on. Image types supported: PPM, JPEG, NPY.
@@ -94,6 +95,7 @@ struct CommonGraphParams
arm_compute::DataType data_type{ DataType::F32 };
arm_compute::DataLayout data_layout{ DataLayout::NHWC };
bool enable_tuner{ false };
+ bool enable_cl_cache{ false };
arm_compute::CLTunerMode tuner_mode{ CLTunerMode::NORMAL };
arm_compute::graph::FastMathHint fast_math_hint{ arm_compute::graph::FastMathHint::Disabled };
std::string data_path{};
@@ -149,6 +151,7 @@ public:
EnumOption<arm_compute::DataType> *data_type; /**< Graph data type */
EnumOption<arm_compute::DataLayout> *data_layout; /**< Graph data layout */
ToggleOption *enable_tuner; /**< Enable tuner */
+ ToggleOption *enable_cl_cache; /**< Enable opencl kernels cache */
SimpleOption<arm_compute::CLTunerMode> *tuner_mode; /**< Tuner mode */
ToggleOption *fast_math_hint; /**< Fast math hint */
SimpleOption<std::string> *data_path; /**< Trainable parameters path */
diff --git a/utils/Utils.cpp b/utils/Utils.cpp
index 1d086765b3..47ec25963b 100644
--- a/utils/Utils.cpp
+++ b/utils/Utils.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,6 +23,8 @@
*/
#include "Utils.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
#include <cctype>
#include <cerrno>
#include <iomanip>
@@ -267,5 +269,79 @@ uint64_t get_mem_free_from_meminfo()
// Nothing found or an error during opening the file
return 0;
}
+
+/** This function loads prebuilt opencl kernels from a file
+ *
+ * @param[in] filename Name of the file to be used to load the kernels
+ */
+void restore_program_cache_from_file(const std::string &filename)
+{
+ std::ifstream cache_file(filename, std::ios::binary);
+ if(cache_file.is_open())
+ {
+ if(!CLScheduler::get().is_initialised())
+ {
+ arm_compute::CLScheduler::get().default_init();
+ }
+
+ while(!cache_file.eof())
+ {
+ size_t name_len = 0;
+ size_t binary_len = 0;
+ cache_file.read(reinterpret_cast<char *>(&name_len), sizeof(size_t));
+ cache_file.read(reinterpret_cast<char *>(&binary_len), sizeof(size_t));
+ if(name_len == 0 || binary_len == 0)
+ {
+ break;
+ }
+ std::vector<char> tmp(name_len);
+ std::vector<unsigned char> binary(binary_len);
+ std::string name;
+ cache_file.read(tmp.data(), name_len);
+ name.assign(tmp.data(), name_len);
+ tmp.resize(binary_len);
+ cache_file.read(reinterpret_cast<char *>(binary.data()), binary_len);
+ cl::Context context = arm_compute::CLScheduler::get().context();
+ cl::Program::Binaries binaries{ binary };
+ std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
+ cl::Program program(context, devices, binaries);
+ program.build();
+ CLKernelLibrary::get().add_built_program(name, program);
+ }
+ cache_file.close();
+ }
+}
+
+/** This function saves opencl kernels library to a file
+ *
+ * @param[in] filename Name of the file to be used to save the library
+ */
+void save_program_cache_to_file(const std::string &filename)
+{
+ if(CLScheduler::get().is_initialised())
+ {
+ std::ofstream cache_file(filename, std::ios::binary);
+ if(cache_file.is_open())
+ {
+ for(const auto &it : CLKernelLibrary::get().get_built_programs())
+ {
+ std::vector<std::vector<unsigned char>> binaries = it.second.getInfo<CL_PROGRAM_BINARIES>();
+ ARM_COMPUTE_ERROR_ON(binaries.size() != 1);
+ const std::string kernel_name = it.first;
+ size_t kernel_name_size = kernel_name.length();
+ size_t binary_size = binaries[0].size();
+ cache_file.write(reinterpret_cast<char *>(&kernel_name_size), sizeof(size_t));
+ cache_file.write(reinterpret_cast<char *>(&binary_size), sizeof(size_t));
+ cache_file.write(kernel_name.c_str(), kernel_name_size);
+ cache_file.write(reinterpret_cast<const char *>(binaries[0].data()), binaries[0].size());
+ }
+ cache_file.close();
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Cannot open cache file");
+ }
+ }
+}
} // namespace utils
} // namespace arm_compute
diff --git a/utils/Utils.h b/utils/Utils.h
index eec6972470..ba10d7c803 100644
--- a/utils/Utils.h
+++ b/utils/Utils.h
@@ -823,6 +823,18 @@ int compare_tensor(ITensor &tensor1, ITensor &tensor2, T tolerance)
return num_mismatches;
}
+
+/** This function saves opencl kernels library to a file
+ *
+ * @param[in] filename Name of the file to be used to save the library
+ */
+void save_program_cache_to_file(const std::string &filename = "cache.bin");
+
+/** This function loads prebuilt opencl kernels from a file
+ *
+ * @param[in] filename Name of the file to be used to load the kernels
+ */
+void restore_program_cache_from_file(const std::string &filename = "cache.bin");
} // namespace utils
} // namespace arm_compute
#endif /* __UTILS_UTILS_H__*/