From 050471e40fc58cb5ea745701a43ec5b2b9586b81 Mon Sep 17 00:00:00 2001 From: Vidhya Sudhan Loganathan Date: Thu, 25 Apr 2019 09:27:24 +0100 Subject: COMPMID-1974 : Extend CLTuner to support different of level of tuning Change-Id: I52e4a00a25e7f7a17050038cee7c30e508553722 Signed-off-by: Vidhya Sudhan Loganathan Reviewed-on: https://review.mlplatform.org/c/977 Comments-Addressed: Pablo Marquez Reviewed-by: Gian Marco Iodice Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins --- arm_compute/graph/Types.h | 4 +- arm_compute/graph/backends/CL/CLDeviceBackend.h | 7 +- arm_compute/runtime/CL/CLTuner.h | 20 ++- arm_compute/runtime/CL/CLTunerTypes.h | 86 ++++++++++ arm_compute/runtime/CL/tuners/CLLWSList.h | 213 ++++++++++++++++++++++++ 5 files changed, 326 insertions(+), 4 deletions(-) create mode 100644 arm_compute/runtime/CL/CLTunerTypes.h create mode 100644 arm_compute/runtime/CL/tuners/CLLWSList.h (limited to 'arm_compute') diff --git a/arm_compute/graph/Types.h b/arm_compute/graph/Types.h index 582e6f6434..4d9e031b91 100644 --- a/arm_compute/graph/Types.h +++ b/arm_compute/graph/Types.h @@ -26,6 +26,7 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTunerTypes.h" #include #include @@ -34,6 +35,7 @@ namespace arm_compute { namespace graph { +using arm_compute::CLTunerMode; using arm_compute::Status; using arm_compute::Coordinates; @@ -71,13 +73,13 @@ constexpr EdgeID EmptyEdgeID = std::numeric_limits::max(); // Forward declarations class TensorDescriptor; - /** Graph configuration structure */ struct GraphConfig { bool use_function_memory_manager{ true }; /**< Use a memory manager to manage per-funcion auxilary memory */ bool use_transition_memory_manager{ true }; /**< Use a memory manager to manager transition buffer memory */ bool use_tuner{ false }; /**< Use a tuner in tunable backends */ + CLTunerMode tuner_mode{ CLTunerMode::EXHAUSTIVE }; /**< Tuner mode to be used by the CL tuner */ int num_threads{ -1 }; /**< Number of threads to use (thread capable backends), if 0 the backend will auto-initialize, if -1 the backend will stay as it is. */ std::string tuner_file{ "acl_tuner.csv" }; /**< File to load/store tuning values from */ }; diff --git a/arm_compute/graph/backends/CL/CLDeviceBackend.h b/arm_compute/graph/backends/CL/CLDeviceBackend.h index 49e7596d58..afe01fff70 100644 --- a/arm_compute/graph/backends/CL/CLDeviceBackend.h +++ b/arm_compute/graph/backends/CL/CLDeviceBackend.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,6 +50,11 @@ public: * @param[in] enable_tuning Enables tuning if false else true */ void set_kernel_tuning(bool enable_tuning); + /** Set kernel tuning mode + * + * @param[in] tuning_mode Indicates how exhaustive the search for the optimal LWS should be while tuning + */ + void set_kernel_tuning_mode(CLTunerMode tuning_mode); // Inherited overridden methods void initialize_backend() override; diff --git a/arm_compute/runtime/CL/CLTuner.h b/arm_compute/runtime/CL/CLTuner.h index ee83f6933c..3f3df5f236 100644 --- a/arm_compute/runtime/CL/CLTuner.h +++ b/arm_compute/runtime/CL/CLTuner.h @@ -25,6 +25,7 @@ #define __ARM_COMPUTE_CLTUNER_H__ #include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/runtime/CL/CLTunerTypes.h" #include "arm_compute/runtime/CL/ICLTuner.h" #include @@ -57,12 +58,26 @@ public: * @return True if tuning of new kernels is enabled. */ bool tune_new_kernels() const; + + /** Set OpenCL tuner mode + * + * @param[in] mode Indicates how exhaustive the search for the optimal LWS should be while tuning. Default is Exhaustive mode + */ + void set_tuner_mode(CLTunerMode mode); + + /** Get the current OpenCL tuner mode + * + * @return tuner_mode Indicates how exhaustive the search for the optimal LWS should be while tuning + */ + CLTunerMode get_tuner_mode() const; + /** Manually add a LWS for a kernel * * @param[in] kernel_id Unique identifiant of the kernel * @param[in] optimal_lws Optimal local workgroup size to use for the given kernel */ void add_lws_to_table(const std::string &kernel_id, cl::NDRange optimal_lws); + /** Import LWS table * * @param[in] lws_table The unordered_map container to import @@ -118,8 +133,9 @@ private: cl::NDRange find_optimal_lws(ICLKernel &kernel); std::unordered_map _lws_table; - cl::Event _kernel_event; - bool _tune_new_kernels; + cl::Event _kernel_event; + bool _tune_new_kernels; + CLTunerMode _tuner_mode; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_CLTUNER_H__ */ diff --git a/arm_compute/runtime/CL/CLTunerTypes.h b/arm_compute/runtime/CL/CLTunerTypes.h new file mode 100644 index 0000000000..7d13b6d3fa --- /dev/null +++ b/arm_compute/runtime/CL/CLTunerTypes.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTUNER_TYPES_H__ +#define __ARM_COMPUTE_CLTUNER_TYPES_H__ + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/utils/misc/Utility.h" +#include + +namespace arm_compute +{ +/**< OpenCL tuner modes */ +enum class CLTunerMode +{ + EXHAUSTIVE, /**< Searches all possible LWS configurations while tuning */ + NORMAL, /**< Searches a subset of LWS configurations while tuning */ + RAPID /**< Searches a minimal subset of LWS configurations while tuning */ +}; + +/** Converts a string to a strong types enumeration @ref CLTunerMode + * + * @param[in] name String to convert + * + * @return Converted CLTunerMode enumeration + */ +inline CLTunerMode tuner_mode_from_name(const std::string &name) +{ + static const std::map tuner_modes = + { + { "exhaustive", CLTunerMode::EXHAUSTIVE }, + { "normal", CLTunerMode::NORMAL }, + { "rapid", CLTunerMode::RAPID }, + }; + +#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED + try + { +#endif /* ARM_COMPUTE_EXCEPTIONS_DISABLED */ + return tuner_modes.at(arm_compute::utility::tolower(name)); + +#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED + } + catch(const std::out_of_range &) + { + throw std::invalid_argument(name); + } +#endif /* ARM_COMPUTE_EXCEPTIONS_DISABLED */ +} + +/** Input Stream operator for @ref CLTunerMode + * + * @param[in] stream Stream to parse + * @param[out] tuner_mode Output tuner mode + * + * @return Updated stream + */ +inline ::std::istream &operator>>(::std::istream &stream, CLTunerMode &tuner_mode) +{ + std::string value; + stream >> value; + tuner_mode = tuner_mode_from_name(value); + return stream; +} +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLTUNER_TYPES_H__ */ diff --git a/arm_compute/runtime/CL/tuners/CLLWSList.h b/arm_compute/runtime/CL/tuners/CLLWSList.h new file mode 100644 index 0000000000..d623834208 --- /dev/null +++ b/arm_compute/runtime/CL/tuners/CLLWSList.h @@ -0,0 +1,213 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CL_LWS_LIST_H__ +#define __ARM_COMPUTE_CL_LWS_LIST_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/runtime/CL/CLTunerTypes.h" +#include "support/ToolchainSupport.h" +#include + +namespace arm_compute +{ +namespace cl_tuner +{ +constexpr unsigned int max_lws_supported_x{ 64u }; +constexpr unsigned int max_lws_supported_y{ 32u }; +constexpr unsigned int max_lws_supported_z{ 32u }; + +/** Interface for LWS lists */ +class ICLLWSList +{ +public: + /** Constructor */ + ICLLWSList() = default; + /** Copy Constructor */ + ICLLWSList(const ICLLWSList &) = default; + /** Move Constructor */ + ICLLWSList(ICLLWSList &&) noexcept(true) = default; + /** Assignment */ + ICLLWSList &operator=(const ICLLWSList &) = default; + /** Move Assignment */ + ICLLWSList &operator=(ICLLWSList &&) noexcept(true) = default; + /** Destructor */ + virtual ~ICLLWSList() = default; + + /** Return the LWS value at the given index. + * + * @return LWS value at the given index + */ + virtual cl::NDRange operator[](size_t) = 0; + + /** LWS list size. + * + * @return LWS list size + */ + virtual size_t size() = 0; +}; + +/** Non instantiable base class for LWS combinations that use Index2Cooard mapping */ +class CLLWSList : public ICLLWSList +{ +protected: + /* Shape of 3-D search space */ + TensorShape search_space_shape{ 0, 0, 0 }; + + /** Constructor */ + CLLWSList() = default; + /** Copy Constructor */ + CLLWSList(const CLLWSList &) = default; + /** Move Constructor */ + CLLWSList(CLLWSList &&) noexcept(true) = default; + /** Assignment */ + CLLWSList &operator=(const CLLWSList &) = default; + /** Move Assignment */ + CLLWSList &operator=(CLLWSList &&) noexcept(true) = default; + /** Destructor */ + virtual ~CLLWSList() = default; + + // Inherited methods overridden: + virtual size_t size() override; +}; + +/** Exhaustive list of all possible LWS values */ +class CLLWSListExhaustive : public CLLWSList +{ +public: + /** Prevent default constructor calls */ + CLLWSListExhaustive() = delete; + /** Constructor */ + CLLWSListExhaustive(const cl::NDRange &gws); + /** Copy Constructor */ + CLLWSListExhaustive(const CLLWSListExhaustive &) = default; + /** Move Constructor */ + CLLWSListExhaustive(CLLWSListExhaustive &&) noexcept(true) = default; + /** Assignment */ + CLLWSListExhaustive &operator=(const CLLWSListExhaustive &) = default; + /** Move Assignment */ + CLLWSListExhaustive &operator=(CLLWSListExhaustive &&) noexcept(true) = default; + /** Destructor */ + ~CLLWSListExhaustive() = default; + + // Inherited methods overridden: + cl::NDRange operator[](size_t) override; +}; + +/** A subset of LWS values that are either factors of gws when gws[2] < 16 or power of 2 */ +class CLLWSListNormal : public CLLWSList +{ +public: + /** Constructor */ + CLLWSListNormal(const cl::NDRange &gws); + /** Copy Constructor */ + CLLWSListNormal(const CLLWSListNormal &) = default; + /** Move Constructor */ + CLLWSListNormal(CLLWSListNormal &&) noexcept(true) = default; + /** Assignment */ + CLLWSListNormal &operator=(const CLLWSListNormal &) = default; + /** Move Assignment */ + CLLWSListNormal &operator=(CLLWSListNormal &&) noexcept(true) = default; + /** Destructor */ + ~CLLWSListNormal() = default; + + // Inherited methods overridden: + cl::NDRange operator[](size_t) override; + +protected: + std::vector _lws_x{}; + std::vector _lws_y{}; + std::vector _lws_z{}; + + /** Prevent default constructor calls */ + CLLWSListNormal() = default; + +private: + /** Utility function used to initialize the LWS values to test. + * Only the LWS values which are power of 2 or satisfy the modulo conditions with GWS are taken into account by the CLTuner + * + * @param[in, out] lws Vector of LWS to test + * @param[in] gws Size of the specific GWS + * @param[in] lws_max Max LWS value allowed to be tested + * @param[in] mod_let_one True if the results of the modulo operation between gws and the lws can be less than one. + */ + void initialize_lws_values(std::vector &lws, unsigned int gws, unsigned int lws_max, bool mod_let_one); +}; + +/** A minimal subset of LWS values that only have 1,2 and 4/8 */ +class CLLWSListRapid : public CLLWSListNormal +{ +public: + /** Prevent default constructor calls */ + CLLWSListRapid() = delete; + /** Constructor */ + CLLWSListRapid(const cl::NDRange &gws); + /** Copy Constructor */ + CLLWSListRapid(const CLLWSListRapid &) = default; + /** Move Constructor */ + CLLWSListRapid(CLLWSListRapid &&) noexcept(true) = default; + /** Assignment */ + CLLWSListRapid &operator=(const CLLWSListRapid &) = default; + /** Move Assignment */ + CLLWSListRapid &operator=(CLLWSListRapid &&) noexcept(true) = default; + /** Destructor */ + virtual ~CLLWSListRapid() = default; + +private: + /** Utility function used to initialize the LWS values to test. + * Only the LWS values that have 1,2 and 4/8 for each dimension are taken into account by the CLTuner + * + * @param[in, out] lws Vector of LWS to test + * @param[in] lws_max Max LWS value allowed to be tested + */ + void initialize_lws_values(std::vector &lws, unsigned int lws_max); +}; + +/** Factory to construct an ICLLWSList object based on the CL tuner mode */ +class CLLWSListFactory final +{ +public: + /** Construct an ICLLWSList object for the given tuner mode and gws configuration. + * + * @return unique_ptr to the requested ICLLWSList implementation. + */ + static std::unique_ptr get_lws_list(CLTunerMode mode, const cl::NDRange &gws) + { + switch(mode) + { + case CLTunerMode::EXHAUSTIVE: + return arm_compute::support::cpp14::make_unique(gws); + case CLTunerMode::NORMAL: + return arm_compute::support::cpp14::make_unique(gws); + case CLTunerMode::RAPID: + return arm_compute::support::cpp14::make_unique(gws); + default: + return nullptr; + } + } +}; +} // namespace cl_tuner +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CL_LWS_LIST_H__ */ -- cgit v1.2.1