From 050471e40fc58cb5ea745701a43ec5b2b9586b81 Mon Sep 17 00:00:00 2001 From: Vidhya Sudhan Loganathan Date: Thu, 25 Apr 2019 09:27:24 +0100 Subject: COMPMID-1974 : Extend CLTuner to support different of level of tuning Change-Id: I52e4a00a25e7f7a17050038cee7c30e508553722 Signed-off-by: Vidhya Sudhan Loganathan Reviewed-on: https://review.mlplatform.org/c/977 Comments-Addressed: Pablo Marquez Reviewed-by: Gian Marco Iodice Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins --- arm_compute/graph/Types.h | 4 +- arm_compute/graph/backends/CL/CLDeviceBackend.h | 7 +- arm_compute/runtime/CL/CLTuner.h | 20 ++- arm_compute/runtime/CL/CLTunerTypes.h | 86 ++++++++++ arm_compute/runtime/CL/tuners/CLLWSList.h | 213 ++++++++++++++++++++++++ examples/graph_alexnet.cpp | 3 +- examples/graph_googlenet.cpp | 1 + examples/graph_inception_resnet_v1.cpp | 3 +- examples/graph_inception_resnet_v2.cpp | 3 +- examples/graph_inception_v3.cpp | 3 +- examples/graph_inception_v4.cpp | 1 + examples/graph_lenet.cpp | 3 +- examples/graph_mobilenet.cpp | 1 + examples/graph_mobilenet_v2.cpp | 1 + examples/graph_resnet12.cpp | 1 + examples/graph_resnet50.cpp | 1 + examples/graph_resnet_v2_50.cpp | 1 + examples/graph_resnext50.cpp | 3 +- examples/graph_shufflenet.cpp | 1 + examples/graph_squeezenet.cpp | 1 + examples/graph_squeezenet_v1_1.cpp | 1 + examples/graph_srcnn955.cpp | 1 + examples/graph_ssd_mobilenet.cpp | 1 + examples/graph_vgg16.cpp | 3 +- examples/graph_vgg19.cpp | 3 +- examples/graph_vgg_vdsr.cpp | 3 +- examples/graph_yolov3.cpp | 1 + src/graph/backends/CL/CLDeviceBackend.cpp | 8 +- src/runtime/CL/CLTuner.cpp | 122 ++++++-------- src/runtime/CL/tuners/CLLWSList.cpp | 112 +++++++++++++ tests/main.cpp | 16 +- utils/CommonGraphOptions.cpp | 14 +- utils/CommonGraphOptions.h | 5 +- utils/TypePrinter.h | 44 +++++ 34 files changed, 599 insertions(+), 92 deletions(-) create mode 100644 arm_compute/runtime/CL/CLTunerTypes.h create mode 100644 arm_compute/runtime/CL/tuners/CLLWSList.h create mode 100644 src/runtime/CL/tuners/CLLWSList.cpp diff --git a/arm_compute/graph/Types.h b/arm_compute/graph/Types.h index 582e6f6434..4d9e031b91 100644 --- a/arm_compute/graph/Types.h +++ b/arm_compute/graph/Types.h @@ -26,6 +26,7 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTunerTypes.h" #include #include @@ -34,6 +35,7 @@ namespace arm_compute { namespace graph { +using arm_compute::CLTunerMode; using arm_compute::Status; using arm_compute::Coordinates; @@ -71,13 +73,13 @@ constexpr EdgeID EmptyEdgeID = std::numeric_limits::max(); // Forward declarations class TensorDescriptor; - /** Graph configuration structure */ struct GraphConfig { bool use_function_memory_manager{ true }; /**< Use a memory manager to manage per-funcion auxilary memory */ bool use_transition_memory_manager{ true }; /**< Use a memory manager to manager transition buffer memory */ bool use_tuner{ false }; /**< Use a tuner in tunable backends */ + CLTunerMode tuner_mode{ CLTunerMode::EXHAUSTIVE }; /**< Tuner mode to be used by the CL tuner */ int num_threads{ -1 }; /**< Number of threads to use (thread capable backends), if 0 the backend will auto-initialize, if -1 the backend will stay as it is. */ std::string tuner_file{ "acl_tuner.csv" }; /**< File to load/store tuning values from */ }; diff --git a/arm_compute/graph/backends/CL/CLDeviceBackend.h b/arm_compute/graph/backends/CL/CLDeviceBackend.h index 49e7596d58..afe01fff70 100644 --- a/arm_compute/graph/backends/CL/CLDeviceBackend.h +++ b/arm_compute/graph/backends/CL/CLDeviceBackend.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,6 +50,11 @@ public: * @param[in] enable_tuning Enables tuning if false else true */ void set_kernel_tuning(bool enable_tuning); + /** Set kernel tuning mode + * + * @param[in] tuning_mode Indicates how exhaustive the search for the optimal LWS should be while tuning + */ + void set_kernel_tuning_mode(CLTunerMode tuning_mode); // Inherited overridden methods void initialize_backend() override; diff --git a/arm_compute/runtime/CL/CLTuner.h b/arm_compute/runtime/CL/CLTuner.h index ee83f6933c..3f3df5f236 100644 --- a/arm_compute/runtime/CL/CLTuner.h +++ b/arm_compute/runtime/CL/CLTuner.h @@ -25,6 +25,7 @@ #define __ARM_COMPUTE_CLTUNER_H__ #include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/runtime/CL/CLTunerTypes.h" #include "arm_compute/runtime/CL/ICLTuner.h" #include @@ -57,12 +58,26 @@ public: * @return True if tuning of new kernels is enabled. */ bool tune_new_kernels() const; + + /** Set OpenCL tuner mode + * + * @param[in] mode Indicates how exhaustive the search for the optimal LWS should be while tuning. Default is Exhaustive mode + */ + void set_tuner_mode(CLTunerMode mode); + + /** Get the current OpenCL tuner mode + * + * @return tuner_mode Indicates how exhaustive the search for the optimal LWS should be while tuning + */ + CLTunerMode get_tuner_mode() const; + /** Manually add a LWS for a kernel * * @param[in] kernel_id Unique identifiant of the kernel * @param[in] optimal_lws Optimal local workgroup size to use for the given kernel */ void add_lws_to_table(const std::string &kernel_id, cl::NDRange optimal_lws); + /** Import LWS table * * @param[in] lws_table The unordered_map container to import @@ -118,8 +133,9 @@ private: cl::NDRange find_optimal_lws(ICLKernel &kernel); std::unordered_map _lws_table; - cl::Event _kernel_event; - bool _tune_new_kernels; + cl::Event _kernel_event; + bool _tune_new_kernels; + CLTunerMode _tuner_mode; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_CLTUNER_H__ */ diff --git a/arm_compute/runtime/CL/CLTunerTypes.h b/arm_compute/runtime/CL/CLTunerTypes.h new file mode 100644 index 0000000000..7d13b6d3fa --- /dev/null +++ b/arm_compute/runtime/CL/CLTunerTypes.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTUNER_TYPES_H__ +#define __ARM_COMPUTE_CLTUNER_TYPES_H__ + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/utils/misc/Utility.h" +#include + +namespace arm_compute +{ +/**< OpenCL tuner modes */ +enum class CLTunerMode +{ + EXHAUSTIVE, /**< Searches all possible LWS configurations while tuning */ + NORMAL, /**< Searches a subset of LWS configurations while tuning */ + RAPID /**< Searches a minimal subset of LWS configurations while tuning */ +}; + +/** Converts a string to a strong types enumeration @ref CLTunerMode + * + * @param[in] name String to convert + * + * @return Converted CLTunerMode enumeration + */ +inline CLTunerMode tuner_mode_from_name(const std::string &name) +{ + static const std::map tuner_modes = + { + { "exhaustive", CLTunerMode::EXHAUSTIVE }, + { "normal", CLTunerMode::NORMAL }, + { "rapid", CLTunerMode::RAPID }, + }; + +#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED + try + { +#endif /* ARM_COMPUTE_EXCEPTIONS_DISABLED */ + return tuner_modes.at(arm_compute::utility::tolower(name)); + +#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED + } + catch(const std::out_of_range &) + { + throw std::invalid_argument(name); + } +#endif /* ARM_COMPUTE_EXCEPTIONS_DISABLED */ +} + +/** Input Stream operator for @ref CLTunerMode + * + * @param[in] stream Stream to parse + * @param[out] tuner_mode Output tuner mode + * + * @return Updated stream + */ +inline ::std::istream &operator>>(::std::istream &stream, CLTunerMode &tuner_mode) +{ + std::string value; + stream >> value; + tuner_mode = tuner_mode_from_name(value); + return stream; +} +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLTUNER_TYPES_H__ */ diff --git a/arm_compute/runtime/CL/tuners/CLLWSList.h b/arm_compute/runtime/CL/tuners/CLLWSList.h new file mode 100644 index 0000000000..d623834208 --- /dev/null +++ b/arm_compute/runtime/CL/tuners/CLLWSList.h @@ -0,0 +1,213 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CL_LWS_LIST_H__ +#define __ARM_COMPUTE_CL_LWS_LIST_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/runtime/CL/CLTunerTypes.h" +#include "support/ToolchainSupport.h" +#include + +namespace arm_compute +{ +namespace cl_tuner +{ +constexpr unsigned int max_lws_supported_x{ 64u }; +constexpr unsigned int max_lws_supported_y{ 32u }; +constexpr unsigned int max_lws_supported_z{ 32u }; + +/** Interface for LWS lists */ +class ICLLWSList +{ +public: + /** Constructor */ + ICLLWSList() = default; + /** Copy Constructor */ + ICLLWSList(const ICLLWSList &) = default; + /** Move Constructor */ + ICLLWSList(ICLLWSList &&) noexcept(true) = default; + /** Assignment */ + ICLLWSList &operator=(const ICLLWSList &) = default; + /** Move Assignment */ + ICLLWSList &operator=(ICLLWSList &&) noexcept(true) = default; + /** Destructor */ + virtual ~ICLLWSList() = default; + + /** Return the LWS value at the given index. + * + * @return LWS value at the given index + */ + virtual cl::NDRange operator[](size_t) = 0; + + /** LWS list size. + * + * @return LWS list size + */ + virtual size_t size() = 0; +}; + +/** Non instantiable base class for LWS combinations that use Index2Cooard mapping */ +class CLLWSList : public ICLLWSList +{ +protected: + /* Shape of 3-D search space */ + TensorShape search_space_shape{ 0, 0, 0 }; + + /** Constructor */ + CLLWSList() = default; + /** Copy Constructor */ + CLLWSList(const CLLWSList &) = default; + /** Move Constructor */ + CLLWSList(CLLWSList &&) noexcept(true) = default; + /** Assignment */ + CLLWSList &operator=(const CLLWSList &) = default; + /** Move Assignment */ + CLLWSList &operator=(CLLWSList &&) noexcept(true) = default; + /** Destructor */ + virtual ~CLLWSList() = default; + + // Inherited methods overridden: + virtual size_t size() override; +}; + +/** Exhaustive list of all possible LWS values */ +class CLLWSListExhaustive : public CLLWSList +{ +public: + /** Prevent default constructor calls */ + CLLWSListExhaustive() = delete; + /** Constructor */ + CLLWSListExhaustive(const cl::NDRange &gws); + /** Copy Constructor */ + CLLWSListExhaustive(const CLLWSListExhaustive &) = default; + /** Move Constructor */ + CLLWSListExhaustive(CLLWSListExhaustive &&) noexcept(true) = default; + /** Assignment */ + CLLWSListExhaustive &operator=(const CLLWSListExhaustive &) = default; + /** Move Assignment */ + CLLWSListExhaustive &operator=(CLLWSListExhaustive &&) noexcept(true) = default; + /** Destructor */ + ~CLLWSListExhaustive() = default; + + // Inherited methods overridden: + cl::NDRange operator[](size_t) override; +}; + +/** A subset of LWS values that are either factors of gws when gws[2] < 16 or power of 2 */ +class CLLWSListNormal : public CLLWSList +{ +public: + /** Constructor */ + CLLWSListNormal(const cl::NDRange &gws); + /** Copy Constructor */ + CLLWSListNormal(const CLLWSListNormal &) = default; + /** Move Constructor */ + CLLWSListNormal(CLLWSListNormal &&) noexcept(true) = default; + /** Assignment */ + CLLWSListNormal &operator=(const CLLWSListNormal &) = default; + /** Move Assignment */ + CLLWSListNormal &operator=(CLLWSListNormal &&) noexcept(true) = default; + /** Destructor */ + ~CLLWSListNormal() = default; + + // Inherited methods overridden: + cl::NDRange operator[](size_t) override; + +protected: + std::vector _lws_x{}; + std::vector _lws_y{}; + std::vector _lws_z{}; + + /** Prevent default constructor calls */ + CLLWSListNormal() = default; + +private: + /** Utility function used to initialize the LWS values to test. + * Only the LWS values which are power of 2 or satisfy the modulo conditions with GWS are taken into account by the CLTuner + * + * @param[in, out] lws Vector of LWS to test + * @param[in] gws Size of the specific GWS + * @param[in] lws_max Max LWS value allowed to be tested + * @param[in] mod_let_one True if the results of the modulo operation between gws and the lws can be less than one. + */ + void initialize_lws_values(std::vector &lws, unsigned int gws, unsigned int lws_max, bool mod_let_one); +}; + +/** A minimal subset of LWS values that only have 1,2 and 4/8 */ +class CLLWSListRapid : public CLLWSListNormal +{ +public: + /** Prevent default constructor calls */ + CLLWSListRapid() = delete; + /** Constructor */ + CLLWSListRapid(const cl::NDRange &gws); + /** Copy Constructor */ + CLLWSListRapid(const CLLWSListRapid &) = default; + /** Move Constructor */ + CLLWSListRapid(CLLWSListRapid &&) noexcept(true) = default; + /** Assignment */ + CLLWSListRapid &operator=(const CLLWSListRapid &) = default; + /** Move Assignment */ + CLLWSListRapid &operator=(CLLWSListRapid &&) noexcept(true) = default; + /** Destructor */ + virtual ~CLLWSListRapid() = default; + +private: + /** Utility function used to initialize the LWS values to test. + * Only the LWS values that have 1,2 and 4/8 for each dimension are taken into account by the CLTuner + * + * @param[in, out] lws Vector of LWS to test + * @param[in] lws_max Max LWS value allowed to be tested + */ + void initialize_lws_values(std::vector &lws, unsigned int lws_max); +}; + +/** Factory to construct an ICLLWSList object based on the CL tuner mode */ +class CLLWSListFactory final +{ +public: + /** Construct an ICLLWSList object for the given tuner mode and gws configuration. + * + * @return unique_ptr to the requested ICLLWSList implementation. + */ + static std::unique_ptr get_lws_list(CLTunerMode mode, const cl::NDRange &gws) + { + switch(mode) + { + case CLTunerMode::EXHAUSTIVE: + return arm_compute::support::cpp14::make_unique(gws); + case CLTunerMode::NORMAL: + return arm_compute::support::cpp14::make_unique(gws); + case CLTunerMode::RAPID: + return arm_compute::support::cpp14::make_unique(gws); + default: + return nullptr; + } + } +}; +} // namespace cl_tuner +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CL_LWS_LIST_H__ */ diff --git a/examples/graph_alexnet.cpp b/examples/graph_alexnet.cpp index 989e23266f..a785dea78d 100644 --- a/examples/graph_alexnet.cpp +++ b/examples/graph_alexnet.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -150,6 +150,7 @@ public: GraphConfig config; config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; graph.finalize(common_params.target, config); diff --git a/examples/graph_googlenet.cpp b/examples/graph_googlenet.cpp index 583ca2cade..185680acf9 100644 --- a/examples/graph_googlenet.cpp +++ b/examples/graph_googlenet.cpp @@ -126,6 +126,7 @@ public: GraphConfig config; config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; graph.finalize(common_params.target, config); diff --git a/examples/graph_inception_resnet_v1.cpp b/examples/graph_inception_resnet_v1.cpp index e99f688319..64c35e1178 100644 --- a/examples/graph_inception_resnet_v1.cpp +++ b/examples/graph_inception_resnet_v1.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -213,6 +213,7 @@ public: GraphConfig config; config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; graph.finalize(common_params.target, config); diff --git a/examples/graph_inception_resnet_v2.cpp b/examples/graph_inception_resnet_v2.cpp index 8e799783f2..921fadac4f 100644 --- a/examples/graph_inception_resnet_v2.cpp +++ b/examples/graph_inception_resnet_v2.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -192,6 +192,7 @@ public: GraphConfig config; config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; graph.finalize(common_params.target, config); diff --git a/examples/graph_inception_v3.cpp b/examples/graph_inception_v3.cpp index 517e4920cb..0a1e312c1f 100644 --- a/examples/graph_inception_v3.cpp +++ b/examples/graph_inception_v3.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -200,6 +200,7 @@ public: GraphConfig config; config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; graph.finalize(common_params.target, config); diff --git a/examples/graph_inception_v4.cpp b/examples/graph_inception_v4.cpp index 0b0360acfb..3ea2b2fd1c 100644 --- a/examples/graph_inception_v4.cpp +++ b/examples/graph_inception_v4.cpp @@ -151,6 +151,7 @@ public: GraphConfig config; config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; graph.finalize(common_params.target, config); diff --git a/examples/graph_lenet.cpp b/examples/graph_lenet.cpp index 79cf12233e..c75a2f8526 100644 --- a/examples/graph_lenet.cpp +++ b/examples/graph_lenet.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -107,6 +107,7 @@ public: GraphConfig config; config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; graph.finalize(common_params.target, config); diff --git a/examples/graph_mobilenet.cpp b/examples/graph_mobilenet.cpp index a3c77fea26..e2e5eb972d 100644 --- a/examples/graph_mobilenet.cpp +++ b/examples/graph_mobilenet.cpp @@ -100,6 +100,7 @@ public: GraphConfig config; config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; graph.finalize(common_params.target, config); diff --git a/examples/graph_mobilenet_v2.cpp b/examples/graph_mobilenet_v2.cpp index 9138e540a8..25690aa6fc 100644 --- a/examples/graph_mobilenet_v2.cpp +++ b/examples/graph_mobilenet_v2.cpp @@ -91,6 +91,7 @@ public: GraphConfig config; config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; graph.finalize(common_params.target, config); diff --git a/examples/graph_resnet12.cpp b/examples/graph_resnet12.cpp index 59128630b2..db70b53d00 100644 --- a/examples/graph_resnet12.cpp +++ b/examples/graph_resnet12.cpp @@ -135,6 +135,7 @@ public: GraphConfig config; config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; graph.finalize(common_params.target, config); diff --git a/examples/graph_resnet50.cpp b/examples/graph_resnet50.cpp index b6e20d610b..7c9b95e47e 100644 --- a/examples/graph_resnet50.cpp +++ b/examples/graph_resnet50.cpp @@ -114,6 +114,7 @@ public: GraphConfig config; config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; graph.finalize(common_params.target, config); diff --git a/examples/graph_resnet_v2_50.cpp b/examples/graph_resnet_v2_50.cpp index 77807b83b4..78845a8f6f 100644 --- a/examples/graph_resnet_v2_50.cpp +++ b/examples/graph_resnet_v2_50.cpp @@ -117,6 +117,7 @@ public: GraphConfig config; config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; graph.finalize(common_params.target, config); diff --git a/examples/graph_resnext50.cpp b/examples/graph_resnext50.cpp index 8b33f90bc8..766b8ff5fb 100644 --- a/examples/graph_resnext50.cpp +++ b/examples/graph_resnext50.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -98,6 +98,7 @@ public: GraphConfig config; config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; graph.finalize(common_params.target, config); diff --git a/examples/graph_shufflenet.cpp b/examples/graph_shufflenet.cpp index e6016f0451..3704be7ac1 100644 --- a/examples/graph_shufflenet.cpp +++ b/examples/graph_shufflenet.cpp @@ -144,6 +144,7 @@ public: GraphConfig config; config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; graph.finalize(common_params.target, config); diff --git a/examples/graph_squeezenet.cpp b/examples/graph_squeezenet.cpp index f78fe5d506..4796dd3ff1 100644 --- a/examples/graph_squeezenet.cpp +++ b/examples/graph_squeezenet.cpp @@ -167,6 +167,7 @@ public: GraphConfig config; config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; graph.finalize(common_params.target, config); diff --git a/examples/graph_squeezenet_v1_1.cpp b/examples/graph_squeezenet_v1_1.cpp index 22a15df450..fd4561f49a 100644 --- a/examples/graph_squeezenet_v1_1.cpp +++ b/examples/graph_squeezenet_v1_1.cpp @@ -167,6 +167,7 @@ public: GraphConfig config; config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; graph.finalize(common_params.target, config); diff --git a/examples/graph_srcnn955.cpp b/examples/graph_srcnn955.cpp index a8976a1beb..066f16eb81 100644 --- a/examples/graph_srcnn955.cpp +++ b/examples/graph_srcnn955.cpp @@ -121,6 +121,7 @@ public: GraphConfig config; config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; graph.finalize(common_params.target, config); diff --git a/examples/graph_ssd_mobilenet.cpp b/examples/graph_ssd_mobilenet.cpp index 7fcc2804a2..55c9d75b7f 100644 --- a/examples/graph_ssd_mobilenet.cpp +++ b/examples/graph_ssd_mobilenet.cpp @@ -246,6 +246,7 @@ public: GraphConfig config; config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; graph.finalize(common_params.target, config); diff --git a/examples/graph_vgg16.cpp b/examples/graph_vgg16.cpp index 290d1e7e98..e8055d4eff 100644 --- a/examples/graph_vgg16.cpp +++ b/examples/graph_vgg16.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -225,6 +225,7 @@ public: GraphConfig config; config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; graph.finalize(common_params.target, config); diff --git a/examples/graph_vgg19.cpp b/examples/graph_vgg19.cpp index 298ffa06ef..63051fb056 100644 --- a/examples/graph_vgg19.cpp +++ b/examples/graph_vgg19.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -236,6 +236,7 @@ public: GraphConfig config; config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; graph.finalize(common_params.target, config); diff --git a/examples/graph_vgg_vdsr.cpp b/examples/graph_vgg_vdsr.cpp index ca7d10f4a0..9f0b357a9a 100644 --- a/examples/graph_vgg_vdsr.cpp +++ b/examples/graph_vgg_vdsr.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -139,6 +139,7 @@ public: GraphConfig config; config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; graph.finalize(common_params.target, config); diff --git a/examples/graph_yolov3.cpp b/examples/graph_yolov3.cpp index 6d0f67e1f5..c0a97da060 100644 --- a/examples/graph_yolov3.cpp +++ b/examples/graph_yolov3.cpp @@ -398,6 +398,7 @@ public: GraphConfig config; config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; graph.finalize(common_params.target, config); diff --git a/src/graph/backends/CL/CLDeviceBackend.cpp b/src/graph/backends/CL/CLDeviceBackend.cpp index ae7f0a50b3..0666ec0ccb 100644 --- a/src/graph/backends/CL/CLDeviceBackend.cpp +++ b/src/graph/backends/CL/CLDeviceBackend.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -81,6 +81,11 @@ void CLDeviceBackend::set_kernel_tuning(bool enable_tuning) _tuner.set_tune_new_kernels(enable_tuning); } +void CLDeviceBackend::set_kernel_tuning_mode(CLTunerMode tuning_mode) +{ + _tuner.set_tuner_mode(tuning_mode); +} + void CLDeviceBackend::initialize_backend() { // Setup Scheduler @@ -118,6 +123,7 @@ void CLDeviceBackend::setup_backend_context(GraphContext &ctx) } set_kernel_tuning(ctx.config().use_tuner); + set_kernel_tuning_mode(ctx.config().tuner_mode); // Setup a management backend if(ctx.memory_management_ctx(Target::CL) == nullptr) diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp index a262d6b95c..8f8d3e7c3a 100644 --- a/src/runtime/CL/CLTuner.cpp +++ b/src/runtime/CL/CLTuner.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/runtime/CL/CLTuner.h" +#include "arm_compute/runtime/CL/tuners/CLLWSList.h" #include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/Error.h" @@ -31,42 +32,13 @@ #include #include #include +#include #include namespace arm_compute { -namespace -{ -/** Utility function used to initialize the LWS values to test. - * Only the LWS values which are power of 2 or satisfy the modulo conditions with GWS are taken into account by the CLTuner - * - * @param[in, out] lws Vector of LWS to test for a specific dimension - * @param[in] gws Size of the GWS - * @param[in] lws_max Max LKWS value allowed to be tested - * @param[in] mod_let_one True if the results of the modulo operation between gws and the lws can be less than one. - */ -void initialize_lws_values(std::vector &lws, unsigned int gws, unsigned int lws_max, bool mod_let_one) -{ - lws.push_back(1); - - for(unsigned int i = 2; i <= lws_max; ++i) - { - // Power of two condition - const bool is_power_of_two = (i & (i - 1)) == 0; - - // Condition for the module accordingly with the mod_let_one flag - const bool mod_cond = mod_let_one ? (gws % i) <= 1 : (gws % i) == 0; - - if(mod_cond || is_power_of_two) - { - lws.push_back(i); - } - } -} -} // namespace - CLTuner::CLTuner(bool tune_new_kernels) - : real_clEnqueueNDRangeKernel(nullptr), _lws_table(), _kernel_event(), _tune_new_kernels(tune_new_kernels) + : real_clEnqueueNDRangeKernel(nullptr), _lws_table(), _kernel_event(), _tune_new_kernels(tune_new_kernels), _tuner_mode(CLTunerMode::EXHAUSTIVE) { } @@ -88,6 +60,15 @@ bool CLTuner::tune_new_kernels() const return _tune_new_kernels; } +void CLTuner::set_tuner_mode(CLTunerMode mode) +{ + _tuner_mode = mode; +} +CLTunerMode CLTuner::get_tuner_mode() const +{ + return _tuner_mode; +} + void CLTuner::tune_kernel_static(ICLKernel &kernel) { ARM_COMPUTE_UNUSED(kernel); @@ -182,61 +163,54 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel) }; CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor; - cl_ulong min_exec_time = std::numeric_limits::max(); + cl::NDRange gws = ICLKernel::gws_from_window(kernel.window()); - cl::NDRange gws = ICLKernel::gws_from_window(kernel.window()); - cl::NDRange opt_lws = cl::NullRange; + // Run the kernel with default lws to be used as baseline + kernel.run(kernel.window(), queue_profiler); - const unsigned int lws_x_max = std::min(static_cast(gws[0]), 64u); - const unsigned int lws_y_max = std::min(static_cast(gws[1]), 32u); - const unsigned int lws_z_max = std::min(static_cast(gws[2]), 32u); + queue_profiler.finish(); - std::vector lws_x; - std::vector lws_y; - std::vector lws_z; + const cl_ulong start = _kernel_event.getProfilingInfo(); + const cl_ulong end = _kernel_event.getProfilingInfo(); + cl_ulong min_exec_time = end - start; + _kernel_event = nullptr; - // Initialize the LWS values to test - initialize_lws_values(lws_x, gws[0], lws_x_max, gws[2] > 16); - initialize_lws_values(lws_y, gws[1], lws_y_max, gws[2] > 16); - initialize_lws_values(lws_z, gws[2], lws_z_max, false); + cl::NDRange opt_lws = cl::NullRange; - for(const auto &z : lws_z) + //Construct the list of LWS values to be tested based on the tuner mode. + auto lws_list = cl_tuner::CLLWSListFactory::get_lws_list(_tuner_mode, gws); + for(size_t i = 0; i < lws_list->size(); ++i) { - for(const auto &y : lws_y) - { - for(const auto &x : lws_x) - { - cl::NDRange lws_test = cl::NDRange(x, y, z); - - bool invalid_lws = (x * y * z > kernel.get_max_workgroup_size()) || (x == 1 && y == 1 && z == 1); + cl::NDRange lws_test = (*lws_list)[i]; + auto x = lws_test[0]; + auto y = lws_test[1]; + auto z = lws_test[2]; + bool invalid_lws = (x * y * z > kernel.get_max_workgroup_size()) || (x == 1 && y == 1 && z == 1); - invalid_lws = invalid_lws || (x > gws[0]) || (y > gws[1]) || (z > gws[2]); + invalid_lws = invalid_lws || (x > gws[0]) || (y > gws[1]) || (z > gws[2]); - if(invalid_lws) - { - continue; - } - - //Set the Local-Workgroup-Size - kernel.set_lws_hint(lws_test); + if(invalid_lws) + { + continue; + } - // Run the kernel - kernel.run(kernel.window(), queue_profiler); + //Set the Local-Workgroup-Size + kernel.set_lws_hint(lws_test); - queue_profiler.finish(); + // Run the kernel + kernel.run(kernel.window(), queue_profiler); - const cl_ulong start = _kernel_event.getProfilingInfo(); - const cl_ulong end = _kernel_event.getProfilingInfo(); - const cl_ulong diff = end - start; - _kernel_event = nullptr; + queue_profiler.finish(); - // Check the execution time - if(diff < min_exec_time) - { - min_exec_time = diff; - opt_lws = cl::NDRange(x, y, z); - } - } + const cl_ulong start = _kernel_event.getProfilingInfo(); + const cl_ulong end = _kernel_event.getProfilingInfo(); + const cl_ulong diff = end - start; + _kernel_event = nullptr; + // Check the execution time + if(diff < min_exec_time) + { + min_exec_time = diff; + opt_lws = cl::NDRange(x, y, z); } } diff --git a/src/runtime/CL/tuners/CLLWSList.cpp b/src/runtime/CL/tuners/CLLWSList.cpp new file mode 100644 index 0000000000..97134b1b2c --- /dev/null +++ b/src/runtime/CL/tuners/CLLWSList.cpp @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/tuners/CLLWSList.h" + +namespace arm_compute +{ +namespace cl_tuner +{ +size_t CLLWSList::size() +{ + return search_space_shape.total_size(); +} + +cl::NDRange CLLWSListExhaustive::operator[](size_t index) +{ + ARM_COMPUTE_ERROR_ON(index >= size()); + auto coords = index2coords(search_space_shape, index); + return cl::NDRange(coords[0] + 1, coords[1] + 1, coords[2] + 1); +} + +CLLWSListExhaustive::CLLWSListExhaustive(const cl::NDRange &gws) +{ + search_space_shape = TensorShape(std::min(static_cast(gws[0]), max_lws_supported_x), std::min(static_cast(gws[1]), max_lws_supported_y), + std::min(static_cast(gws[2]), max_lws_supported_z)); +} + +cl::NDRange CLLWSListNormal::operator[](size_t index) +{ + ARM_COMPUTE_ERROR_ON(index >= size()); + auto coords = index2coords(search_space_shape, index); + return cl::NDRange(_lws_x[coords[0]], _lws_y[coords[1]], _lws_z[coords[2]]); +} + +CLLWSListNormal::CLLWSListNormal(const cl::NDRange &gws) +{ + auto lws_x_max = std::min(static_cast(gws[0]), max_lws_supported_x); + auto lws_y_max = std::min(static_cast(gws[1]), max_lws_supported_y); + auto lws_z_max = std::min(static_cast(gws[2]), max_lws_supported_z); + + // Initialize the LWS values to test + initialize_lws_values(_lws_x, gws[0], lws_x_max, gws[2] > 16); // Explore lws that are not factors of gws only when gws[2] > 16 + initialize_lws_values(_lws_y, gws[1], lws_y_max, gws[2] > 16); // Explore lws that are not factors of gws only when gws[2] > 16 + initialize_lws_values(_lws_z, gws[2], lws_z_max, false); + + search_space_shape = TensorShape(_lws_x.size(), _lws_y.size(), _lws_z.size()); +} + +void CLLWSListNormal::initialize_lws_values(std::vector &lws, unsigned int gws, unsigned int lws_max, bool mod_let_one) +{ + lws.push_back(1); + + for(unsigned int i = 2; i <= lws_max; ++i) + { + // Power of two condition + const bool is_power_of_two = (i & (i - 1)) == 0; + + // Condition for the module accordingly with the mod_let_one flag + const bool mod_cond = mod_let_one ? (gws % i) <= 1 : (gws % i) == 0; + + if(mod_cond || is_power_of_two) + { + lws.push_back(i); + } + } +} + +CLLWSListRapid::CLLWSListRapid(const cl::NDRange &gws) +{ + auto lws_x_max = std::min(static_cast(gws[0]), 8u); // Limit exploration to 1 - 8 + auto lws_y_max = std::min(static_cast(gws[1]), 4u); // Limit exploration to 1 - 4 + auto lws_z_max = std::min(static_cast(gws[2]), 4u); // Limit exploration to 1 - 4 + + // Initialize the LWS values to test + initialize_lws_values(_lws_x, lws_x_max); + initialize_lws_values(_lws_y, lws_y_max); + initialize_lws_values(_lws_z, lws_z_max); + + search_space_shape = TensorShape(_lws_x.size(), _lws_y.size(), _lws_z.size()); +} + +void CLLWSListRapid::initialize_lws_values(std::vector &lws, unsigned int lws_max) +{ + lws.push_back(1); + + for(unsigned int i = 2; i <= lws_max; i *= 4) + { + lws.push_back(i); + } +} +} // namespace cl_tuner +} // namespace arm_compute diff --git a/tests/main.cpp b/tests/main.cpp index 0eca0c8f1b..bb35d0af98 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -39,7 +39,7 @@ #include "arm_compute/runtime/CL/CLHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTuner.h" - +#include "utils/TypePrinter.h" #endif /* ARM_COMPUTE_CL */ #ifdef ARM_COMPUTE_GC #include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" @@ -139,6 +139,16 @@ int main(int argc, char **argv) #ifdef ARM_COMPUTE_CL auto enable_tuner = parser.add_option("enable-tuner"); enable_tuner->set_help("Enable OpenCL dynamic tuner"); + + const std::set supported_tuner_modes + { + CLTunerMode::EXHAUSTIVE, + CLTunerMode::NORMAL, + CLTunerMode::RAPID + }; + auto tuner_mode = parser.add_option>("tuner-mode", supported_tuner_modes, CLTunerMode::EXHAUSTIVE); + tuner_mode->set_help("Configures the time taken by the tuner to tune. Slow tuner produces the most performant LWS configuration"); + auto tuner_file = parser.add_option>("tuner-file", ""); tuner_file->set_help("File to load/save CLTuner values"); #endif /* ARM_COMPUTE_CL */ @@ -161,7 +171,9 @@ int main(int argc, char **argv) #ifdef ARM_COMPUTE_CL if(enable_tuner->is_set()) { - cl_tuner.set_tune_new_kernels(enable_tuner->value()); + //set tuner mode + cl_tuner.set_tuner_mode(tuner_mode->value()); + // If that's the first run then the file won't exist yet if(file_exists(tuner_file->value())) { diff --git a/utils/CommonGraphOptions.cpp b/utils/CommonGraphOptions.cpp index 4247b2d3e5..7334d038ba 100644 --- a/utils/CommonGraphOptions.cpp +++ b/utils/CommonGraphOptions.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -83,6 +83,7 @@ namespace utils os << "Data type : " << common_params.data_type << std::endl; os << "Data layout : " << common_params.data_layout << std::endl; os << "Tuner enabled? : " << (common_params.enable_tuner ? true_str : false_str) << std::endl; + os << "Tuner mode : " << common_params.tuner_mode << std::endl; os << "Tuner file : " << common_params.tuner_file << std::endl; os << "Fast math enabled? : " << (common_params.fast_math_hint == FastMathHint::Enabled ? true_str : false_str) << std::endl; if(!common_params.data_path.empty()) @@ -117,6 +118,7 @@ CommonGraphOptions::CommonGraphOptions(CommandLineParser &parser) data_type(), data_layout(), enable_tuner(parser.add_option("enable-tuner")), + tuner_mode(), fast_math_hint(parser.add_option("fast-math")), data_path(parser.add_option>("data")), image(parser.add_option>("image")), @@ -146,9 +148,17 @@ CommonGraphOptions::CommonGraphOptions(CommandLineParser &parser) DataLayout::NCHW, }; + const std::set supported_tuner_modes + { + CLTunerMode::EXHAUSTIVE, + CLTunerMode::NORMAL, + CLTunerMode::RAPID + }; + target = parser.add_option>("target", supported_targets, Target::NEON); data_type = parser.add_option>("type", supported_data_types, DataType::F32); data_layout = parser.add_option>("layout", supported_data_layouts); + tuner_mode = parser.add_option>("tuner-mode", supported_tuner_modes, CLTunerMode::EXHAUSTIVE); help->set_help("Show this help message"); threads->set_help("Number of threads to use"); @@ -156,6 +166,7 @@ CommonGraphOptions::CommonGraphOptions(CommandLineParser &parser) data_type->set_help("Data type to use"); data_layout->set_help("Data layout to use"); enable_tuner->set_help("Enable OpenCL dynamic tuner"); + tuner_mode->set_help("Configures the time taken by the tuner to tune. Slow tuner produces the most performant LWS configuration"); fast_math_hint->set_help("Enable fast math"); data_path->set_help("Path where graph parameters reside"); image->set_help("Input image for the graph"); @@ -181,6 +192,7 @@ CommonGraphParams consume_common_graph_parameters(CommonGraphOptions &options) common_params.data_layout = options.data_layout->value(); } common_params.enable_tuner = options.enable_tuner->is_set() ? options.enable_tuner->value() : false; + common_params.tuner_mode = options.tuner_mode->value(); common_params.fast_math_hint = options.fast_math_hint->is_set() ? fast_math_hint_value : FastMathHint::Disabled; common_params.data_path = options.data_path->value(); common_params.image = options.image->value(); diff --git a/utils/CommonGraphOptions.h b/utils/CommonGraphOptions.h index 921889d7c7..d81cd8fbdb 100644 --- a/utils/CommonGraphOptions.h +++ b/utils/CommonGraphOptions.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,7 @@ #include "arm_compute/graph/TypeLoader.h" #include "arm_compute/graph/TypePrinter.h" +#include "arm_compute/runtime/CL/CLTunerTypes.h" namespace arm_compute { @@ -93,6 +94,7 @@ struct CommonGraphParams arm_compute::DataType data_type{ DataType::F32 }; arm_compute::DataLayout data_layout{ DataLayout::NHWC }; bool enable_tuner{ false }; + arm_compute::CLTunerMode tuner_mode{ CLTunerMode::EXHAUSTIVE }; arm_compute::graph::FastMathHint fast_math_hint{ arm_compute::graph::FastMathHint::Disabled }; std::string data_path{}; std::string image{}; @@ -147,6 +149,7 @@ public: EnumOption *data_type; /**< Graph data type */ EnumOption *data_layout; /**< Graph data layout */ ToggleOption *enable_tuner; /**< Enable tuner */ + SimpleOption *tuner_mode; /**< Tuner mode */ ToggleOption *fast_math_hint; /**< Fast math hint */ SimpleOption *data_path; /**< Trainable parameters path */ SimpleOption *image; /**< Image */ diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h index 70196882de..a71e03696a 100644 --- a/utils/TypePrinter.h +++ b/utils/TypePrinter.h @@ -34,6 +34,7 @@ #include "arm_compute/core/Strides.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTunerTypes.h" #include #include @@ -2218,6 +2219,49 @@ inline std::string to_string(const T &val) return support::cpp11::to_string(val); } +/** Convert a CLTunerMode value to a string + * + * @param val CLTunerMode value to be converted + * + * @return String representing the corresponding CLTunerMode. + */ +inline std::string to_string(const CLTunerMode val) +{ + switch(val) + { + case CLTunerMode::EXHAUSTIVE: + { + return std::string("Exhaustive"); + } + case CLTunerMode::NORMAL: + { + return std::string("Normal"); + } + case CLTunerMode::RAPID: + { + return std::string("Rapid"); + } + default: + { + ARM_COMPUTE_ERROR("Invalid tuner mode."); + return std::string("UNDEFINED"); + } + } +} +/** [Print CLTunerMode type] **/ +/** Formatted output of the CLTunerMode type. + * + * @param[out] os Output stream. + * @param[in] val CLTunerMode to output. + * + * @return Modified output stream. + */ +inline ::std::ostream &operator<<(::std::ostream &os, const CLTunerMode &val) +{ + os << to_string(val); + return os; +} + } // namespace arm_compute #endif /* __ARM_COMPUTE_TYPE_PRINTER_H__ */ -- cgit v1.2.1