diff options
author | SiCong Li <sicong.li@arm.com> | 2022-04-07 17:41:51 +0100 |
---|---|---|
committer | SiCong Li <sicong.li@arm.com> | 2022-04-14 12:59:27 +0000 |
commit | 0a486cf66c70b4bd9b0ea8ba9dc5b42f52ed16c3 (patch) | |
tree | 88eea0a182ea6dd8ec45b6aca7843d98cfd73764 /arm_compute/runtime | |
parent | ca364dfd87cab4cdb9179b68c42f10ff16e55002 (diff) | |
download | ComputeLibrary-0a486cf66c70b4bd9b0ea8ba9dc5b42f52ed16c3.tar.gz |
Enable dynamic cl tuning for dynamically fused kernels
* Add new tune_kernel_dynamic interface
* Add generate_config_id
Resolves: COMPMID-5154
Signed-off-by: SiCong Li <sicong.li@arm.com>
Change-Id: I39870e59fceda875487970061ceb2048995c5a45
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7400
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/runtime')
-rw-r--r-- | arm_compute/runtime/CL/CLScheduler.h | 5 | ||||
-rw-r--r-- | arm_compute/runtime/CL/CLTuner.h | 22 | ||||
-rw-r--r-- | arm_compute/runtime/CL/ICLTuner.h | 21 |
3 files changed, 43 insertions, 5 deletions
diff --git a/arm_compute/runtime/CL/CLScheduler.h b/arm_compute/runtime/CL/CLScheduler.h index 362d2ba137..5bfaaf4b5d 100644 --- a/arm_compute/runtime/CL/CLScheduler.h +++ b/arm_compute/runtime/CL/CLScheduler.h @@ -211,6 +211,11 @@ public: private: void enqueue_common(ICLKernel &kernel, ITensorPack &tensors, bool flush); + /** If job chain is disabled, then flush the command queue according to @p flush. Otherwise @p flush is ignored and the queue is only flushed when job chain count exceeds allocated job chain size + * + * @param[in] flush Flush the command queue. Ignored when job chain is enabled. + */ + void flush_queue(bool flush); #if defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION) void enqueue_common(ICLKernel &kernel, experimental::dynamic_fusion::TensorBinding &tensors, const experimental::dynamic_fusion::ClExecutionDescriptor &exec_desc, bool flush); diff --git a/arm_compute/runtime/CL/CLTuner.h b/arm_compute/runtime/CL/CLTuner.h index f96edc962b..e595f8f34b 100644 --- a/arm_compute/runtime/CL/CLTuner.h +++ b/arm_compute/runtime/CL/CLTuner.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -124,6 +124,9 @@ public: void tune_kernel_static(ICLKernel &kernel) override; void tune_kernel_dynamic(ICLKernel &kernel) override; void tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors) override; +#if defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION) + void tune_kernel_dynamic(ICLKernel &kernel, experimental::dynamic_fusion::TensorBinding &tensors, const experimental::dynamic_fusion::ClExecutionDescriptor &exec_desc) override; +#endif // defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION) /** Is the kernel_event set ? * @@ -131,15 +134,26 @@ public: */ bool kernel_event_is_set() const; + /** A wrapper wrapping tensors and other objects needed for running the kernel + */ + struct IKernelData; + private: + /** Perform tune_kernel_dynamic + * + * @param[in] kernel OpenCL kernel to be tuned with tuning parameters + * @param[in,out] data IKernelData object wrapping tensors and other objects needed for running the kernel + * + */ + void do_tune_kernel_dynamic(ICLKernel &kernel, IKernelData *data); /** Find optimal tuning parameters using brute-force approach * - * @param[in] kernel OpenCL kernel to be tuned with tuning parameters - * @param[in,out] tensors Tensors for the kernel to operate on + * @param[in] kernel OpenCL kernel to be tuned with tuning parameters + * @param[in,out] data IKernelData object wrapping tensors and other objects needed for running the kernel * * @return The optimal tuning parameters to use */ - CLTuningParams find_optimal_tuning_params(ICLKernel &kernel, ITensorPack &tensors); + CLTuningParams find_optimal_tuning_params(ICLKernel &kernel, IKernelData *data); std::unordered_map<std::string, CLTuningParams> _tuning_params_table; std::unordered_map<std::string, cl::NDRange> _lws_table; diff --git a/arm_compute/runtime/CL/ICLTuner.h b/arm_compute/runtime/CL/ICLTuner.h index 0f951c384e..a327497255 100644 --- a/arm_compute/runtime/CL/ICLTuner.h +++ b/arm_compute/runtime/CL/ICLTuner.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,16 @@ namespace arm_compute { class ICLKernel; +#if defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION) +namespace experimental +{ +namespace dynamic_fusion +{ +struct TensorBinding; +struct ClExecutionDescriptor; +} // namespace dynamic_fusion +} // namespace experimental +#endif // defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION) /** Basic interface for tuning the OpenCL kernels */ class ICLTuner { @@ -57,6 +67,15 @@ public: * @param[in, out] tensors Tensors for the kernel to use */ virtual void tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors) = 0; +#if defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION) + /** Tune OpenCL kernel dynamically for dynamic fusion interface + * + * @param[in] kernel Kernel to tune + * @param[in, out] tensors Tensors for the kernel to use + * @param[in] exec_desc Execution descriptor + */ + virtual void tune_kernel_dynamic(ICLKernel &kernel, experimental::dynamic_fusion::TensorBinding &tensors, const experimental::dynamic_fusion::ClExecutionDescriptor &exec_desc) = 0; +#endif // defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION) }; } // namespace arm_compute #endif /*ARM_COMPUTE_ICLTUNER_H */ |