From 8db8318e2292d79f1971ae72198a3031f8ea65bd Mon Sep 17 00:00:00 2001 From: Anthony Barbier Date: Tue, 27 Feb 2018 13:08:00 +0000 Subject: COMPMID-978 Load/Store tuning data from file Change-Id: I1d1f402df3a58704c021b9866d489844fb5e7d7a Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/122395 Tested-by: Jenkins Reviewed-by: Georgios Pinitas --- arm_compute/runtime/CL/CLTuner.h | 67 ++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 24 deletions(-) (limited to 'arm_compute/runtime/CL/CLTuner.h') diff --git a/arm_compute/runtime/CL/CLTuner.h b/arm_compute/runtime/CL/CLTuner.h index 386994682d..251848814d 100644 --- a/arm_compute/runtime/CL/CLTuner.h +++ b/arm_compute/runtime/CL/CLTuner.h @@ -37,12 +37,27 @@ class ICLKernel; class CLTuner : public ICLTuner { public: - /** Constructor */ - CLTuner(); + /** Constructor + * + * @param[in] tune_new_kernels Find the optimal local workgroup size for kernels which are not present in the table ? + * + */ + CLTuner(bool tune_new_kernels = true); /** Destructor */ ~CLTuner() = default; + /* Setter for tune_new_kernels option + * + * @param[in] tune_new_kernels Find the optimal local workgroup size for kernels which are not present in the table ? + */ + void set_tune_new_kernels(bool tune_new_kernels); + /** Manually add a LWS for a kernel + * + * @param[in] kernel_id Unique identifiant of the kernel + * @param[in] optimal_lws Optimal local workgroup size to use for the given kernel + */ + void add_lws_to_table(const std::string &kernel_id, cl::NDRange optimal_lws); /** Import LWS table * * @param[in] lws_table The unordered_map container to import @@ -53,10 +68,7 @@ public: * * return The lws table as unordered_map container */ - const std::unordered_map &export_lws_table(); - - // Inherited methods overridden: - void tune_kernel(ICLKernel &kernel) override; + const std::unordered_map &export_lws_table() const; /** Set the OpenCL kernel event * @@ -66,7 +78,10 @@ public: */ void set_cl_kernel_event(cl_event kernel_event); - std::function real_function; + std::function real_clEnqueueNDRangeKernel; + + // Inherited methods overridden: + void tune_kernel(ICLKernel &kernel) override; private: /** Find optimal LWS using brute-force approach @@ -81,33 +96,37 @@ private: cl::CommandQueue _queue; cl::CommandQueue _queue_profiler; cl::Event _kernel_event; + bool _tune_new_kernels; }; -/* Function to be used to intercept kernel enqueues and store their OpenCL Event */ -class Interceptor +class CLFileTuner : public CLTuner { public: - explicit Interceptor(CLTuner &tuner); + /** Constructor + * + * @param[in] file_path File to load/store the tuning information from + * @param[in] update_file If true, save the new LWS table to the file on exit. + * @param[in] tune_new_kernels Find the optimal local workgroup size for kernels which are not present in the table ? + */ + CLFileTuner(std::string file_path = "acl_tuner.csv", bool update_file = false, bool tune_new_kernels = false); - /** clEnqueueNDRangeKernel interface + /** Save the content of the LWS table to file + */ + void save_to_file() const; + /* Setter for update_file option * - * @param[in] command_queue A valid command-queue. The kernel will be queued for execution on the device associated with command_queue. - * @param[in] kernel A valid kernel object. The OpenCL context associated with kernel and command_queue must be the same. - * @param[in] work_dim The number of dimensions used to specify the global work-items and work-items in the work-group. work_dim must be greater than zero and less than or equal to CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS. - * @param[in] gwo Global-Workgroup-Offset. It can be used to specify an array of work_dim unsigned values that describe the offset used to calculate the global ID of a work-item. If global_work_offset is NULL, the global IDs start at offset (0, 0, ... 0). - * @param[in] gws Global-Workgroup-Size. Points to an array of work_dim unsigned values that describe the number of global work-items in work_dim dimensions that will execute the kernel function. - * @param[in] lws Local-Workgroup-Size. Points to an array of work_dim unsigned values that describe the number of work-items that make up a work-group - * @param[in] num_events_in_wait_list Number of events in the waiting list - * @param[in] event_wait_list Event waiting list - * @param[in] event OpenCL kernel event + * @param[in] update_file If true, save the new LWS table to the file on exit. + */ + void set_update_file(bool update_file); + /** Destructor * - * @return the OpenCL status + * Will save the LWS table to the file if the CLFileTuner was created with update_file enabled. */ - cl_int operator()(cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *gwo, const size_t *gws, const size_t *lws, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event); + ~CLFileTuner(); + const std::string filename; private: - CLTuner &_tuner; + bool _update_file; }; } #endif /*__ARM_COMPUTE_CLTUNER_H__ */ -- cgit v1.2.1