aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVidhya Sudhan Loganathan <vidhyasudhan.loganathan@arm.com>2019-04-25 09:27:24 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2019-04-25 16:18:58 +0000
commit050471e40fc58cb5ea745701a43ec5b2b9586b81 (patch)
tree0ee684bcc93fae693686c391e42a2b824705aeb1
parentd038dafe3810d22c8664ceef4fe49aad77abdbd1 (diff)
downloadComputeLibrary-050471e40fc58cb5ea745701a43ec5b2b9586b81.tar.gz
COMPMID-1974 : Extend CLTuner to support different of level of tuning
Change-Id: I52e4a00a25e7f7a17050038cee7c30e508553722 Signed-off-by: Vidhya Sudhan Loganathan <vidhyasudhan.loganathan@arm.com> Reviewed-on: https://review.mlplatform.org/c/977 Comments-Addressed: Pablo Marquez <pablo.tello@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/graph/Types.h4
-rw-r--r--arm_compute/graph/backends/CL/CLDeviceBackend.h7
-rw-r--r--arm_compute/runtime/CL/CLTuner.h20
-rw-r--r--arm_compute/runtime/CL/CLTunerTypes.h86
-rw-r--r--arm_compute/runtime/CL/tuners/CLLWSList.h213
-rw-r--r--examples/graph_alexnet.cpp3
-rw-r--r--examples/graph_googlenet.cpp1
-rw-r--r--examples/graph_inception_resnet_v1.cpp3
-rw-r--r--examples/graph_inception_resnet_v2.cpp3
-rw-r--r--examples/graph_inception_v3.cpp3
-rw-r--r--examples/graph_inception_v4.cpp1
-rw-r--r--examples/graph_lenet.cpp3
-rw-r--r--examples/graph_mobilenet.cpp1
-rw-r--r--examples/graph_mobilenet_v2.cpp1
-rw-r--r--examples/graph_resnet12.cpp1
-rw-r--r--examples/graph_resnet50.cpp1
-rw-r--r--examples/graph_resnet_v2_50.cpp1
-rw-r--r--examples/graph_resnext50.cpp3
-rw-r--r--examples/graph_shufflenet.cpp1
-rw-r--r--examples/graph_squeezenet.cpp1
-rw-r--r--examples/graph_squeezenet_v1_1.cpp1
-rw-r--r--examples/graph_srcnn955.cpp1
-rw-r--r--examples/graph_ssd_mobilenet.cpp1
-rw-r--r--examples/graph_vgg16.cpp3
-rw-r--r--examples/graph_vgg19.cpp3
-rw-r--r--examples/graph_vgg_vdsr.cpp3
-rw-r--r--examples/graph_yolov3.cpp1
-rw-r--r--src/graph/backends/CL/CLDeviceBackend.cpp8
-rw-r--r--src/runtime/CL/CLTuner.cpp122
-rw-r--r--src/runtime/CL/tuners/CLLWSList.cpp112
-rw-r--r--tests/main.cpp16
-rw-r--r--utils/CommonGraphOptions.cpp14
-rw-r--r--utils/CommonGraphOptions.h5
-rw-r--r--utils/TypePrinter.h44
34 files changed, 599 insertions, 92 deletions
diff --git a/arm_compute/graph/Types.h b/arm_compute/graph/Types.h
index 582e6f6434..4d9e031b91 100644
--- a/arm_compute/graph/Types.h
+++ b/arm_compute/graph/Types.h
@@ -26,6 +26,7 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTunerTypes.h"
#include <limits>
#include <string>
@@ -34,6 +35,7 @@ namespace arm_compute
{
namespace graph
{
+using arm_compute::CLTunerMode;
using arm_compute::Status;
using arm_compute::Coordinates;
@@ -71,13 +73,13 @@ constexpr EdgeID EmptyEdgeID = std::numeric_limits<EdgeID>::max();
// Forward declarations
class TensorDescriptor;
-
/** Graph configuration structure */
struct GraphConfig
{
bool use_function_memory_manager{ true }; /**< Use a memory manager to manage per-funcion auxilary memory */
bool use_transition_memory_manager{ true }; /**< Use a memory manager to manager transition buffer memory */
bool use_tuner{ false }; /**< Use a tuner in tunable backends */
+ CLTunerMode tuner_mode{ CLTunerMode::EXHAUSTIVE }; /**< Tuner mode to be used by the CL tuner */
int num_threads{ -1 }; /**< Number of threads to use (thread capable backends), if 0 the backend will auto-initialize, if -1 the backend will stay as it is. */
std::string tuner_file{ "acl_tuner.csv" }; /**< File to load/store tuning values from */
};
diff --git a/arm_compute/graph/backends/CL/CLDeviceBackend.h b/arm_compute/graph/backends/CL/CLDeviceBackend.h
index 49e7596d58..afe01fff70 100644
--- a/arm_compute/graph/backends/CL/CLDeviceBackend.h
+++ b/arm_compute/graph/backends/CL/CLDeviceBackend.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -50,6 +50,11 @@ public:
* @param[in] enable_tuning Enables tuning if false else true
*/
void set_kernel_tuning(bool enable_tuning);
+ /** Set kernel tuning mode
+ *
+ * @param[in] tuning_mode Indicates how exhaustive the search for the optimal LWS should be while tuning
+ */
+ void set_kernel_tuning_mode(CLTunerMode tuning_mode);
// Inherited overridden methods
void initialize_backend() override;
diff --git a/arm_compute/runtime/CL/CLTuner.h b/arm_compute/runtime/CL/CLTuner.h
index ee83f6933c..3f3df5f236 100644
--- a/arm_compute/runtime/CL/CLTuner.h
+++ b/arm_compute/runtime/CL/CLTuner.h
@@ -25,6 +25,7 @@
#define __ARM_COMPUTE_CLTUNER_H__
#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/runtime/CL/CLTunerTypes.h"
#include "arm_compute/runtime/CL/ICLTuner.h"
#include <unordered_map>
@@ -57,12 +58,26 @@ public:
* @return True if tuning of new kernels is enabled.
*/
bool tune_new_kernels() const;
+
+ /** Set OpenCL tuner mode
+ *
+ * @param[in] mode Indicates how exhaustive the search for the optimal LWS should be while tuning. Default is Exhaustive mode
+ */
+ void set_tuner_mode(CLTunerMode mode);
+
+ /** Get the current OpenCL tuner mode
+ *
+ * @return tuner_mode Indicates how exhaustive the search for the optimal LWS should be while tuning
+ */
+ CLTunerMode get_tuner_mode() const;
+
/** Manually add a LWS for a kernel
*
* @param[in] kernel_id Unique identifiant of the kernel
* @param[in] optimal_lws Optimal local workgroup size to use for the given kernel
*/
void add_lws_to_table(const std::string &kernel_id, cl::NDRange optimal_lws);
+
/** Import LWS table
*
* @param[in] lws_table The unordered_map container to import
@@ -118,8 +133,9 @@ private:
cl::NDRange find_optimal_lws(ICLKernel &kernel);
std::unordered_map<std::string, cl::NDRange> _lws_table;
- cl::Event _kernel_event;
- bool _tune_new_kernels;
+ cl::Event _kernel_event;
+ bool _tune_new_kernels;
+ CLTunerMode _tuner_mode;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLTUNER_H__ */
diff --git a/arm_compute/runtime/CL/CLTunerTypes.h b/arm_compute/runtime/CL/CLTunerTypes.h
new file mode 100644
index 0000000000..7d13b6d3fa
--- /dev/null
+++ b/arm_compute/runtime/CL/CLTunerTypes.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLTUNER_TYPES_H__
+#define __ARM_COMPUTE_CLTUNER_TYPES_H__
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/utils/misc/Utility.h"
+#include <map>
+
+namespace arm_compute
+{
+/**< OpenCL tuner modes */
+enum class CLTunerMode
+{
+ EXHAUSTIVE, /**< Searches all possible LWS configurations while tuning */
+ NORMAL, /**< Searches a subset of LWS configurations while tuning */
+ RAPID /**< Searches a minimal subset of LWS configurations while tuning */
+};
+
+/** Converts a string to a strong types enumeration @ref CLTunerMode
+ *
+ * @param[in] name String to convert
+ *
+ * @return Converted CLTunerMode enumeration
+ */
+inline CLTunerMode tuner_mode_from_name(const std::string &name)
+{
+ static const std::map<std::string, CLTunerMode> tuner_modes =
+ {
+ { "exhaustive", CLTunerMode::EXHAUSTIVE },
+ { "normal", CLTunerMode::NORMAL },
+ { "rapid", CLTunerMode::RAPID },
+ };
+
+#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED
+ try
+ {
+#endif /* ARM_COMPUTE_EXCEPTIONS_DISABLED */
+ return tuner_modes.at(arm_compute::utility::tolower(name));
+
+#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED
+ }
+ catch(const std::out_of_range &)
+ {
+ throw std::invalid_argument(name);
+ }
+#endif /* ARM_COMPUTE_EXCEPTIONS_DISABLED */
+}
+
+/** Input Stream operator for @ref CLTunerMode
+ *
+ * @param[in] stream Stream to parse
+ * @param[out] tuner_mode Output tuner mode
+ *
+ * @return Updated stream
+ */
+inline ::std::istream &operator>>(::std::istream &stream, CLTunerMode &tuner_mode)
+{
+ std::string value;
+ stream >> value;
+ tuner_mode = tuner_mode_from_name(value);
+ return stream;
+}
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLTUNER_TYPES_H__ */
diff --git a/arm_compute/runtime/CL/tuners/CLLWSList.h b/arm_compute/runtime/CL/tuners/CLLWSList.h
new file mode 100644
index 0000000000..d623834208
--- /dev/null
+++ b/arm_compute/runtime/CL/tuners/CLLWSList.h
@@ -0,0 +1,213 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CL_LWS_LIST_H__
+#define __ARM_COMPUTE_CL_LWS_LIST_H__
+
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/runtime/CL/CLTunerTypes.h"
+#include "support/ToolchainSupport.h"
+#include <memory>
+
+namespace arm_compute
+{
+namespace cl_tuner
+{
+constexpr unsigned int max_lws_supported_x{ 64u };
+constexpr unsigned int max_lws_supported_y{ 32u };
+constexpr unsigned int max_lws_supported_z{ 32u };
+
+/** Interface for LWS lists */
+class ICLLWSList
+{
+public:
+ /** Constructor */
+ ICLLWSList() = default;
+ /** Copy Constructor */
+ ICLLWSList(const ICLLWSList &) = default;
+ /** Move Constructor */
+ ICLLWSList(ICLLWSList &&) noexcept(true) = default;
+ /** Assignment */
+ ICLLWSList &operator=(const ICLLWSList &) = default;
+ /** Move Assignment */
+ ICLLWSList &operator=(ICLLWSList &&) noexcept(true) = default;
+ /** Destructor */
+ virtual ~ICLLWSList() = default;
+
+ /** Return the LWS value at the given index.
+ *
+ * @return LWS value at the given index
+ */
+ virtual cl::NDRange operator[](size_t) = 0;
+
+ /** LWS list size.
+ *
+ * @return LWS list size
+ */
+ virtual size_t size() = 0;
+};
+
+/** Non instantiable base class for LWS combinations that use Index2Cooard mapping */
+class CLLWSList : public ICLLWSList
+{
+protected:
+ /* Shape of 3-D search space */
+ TensorShape search_space_shape{ 0, 0, 0 };
+
+ /** Constructor */
+ CLLWSList() = default;
+ /** Copy Constructor */
+ CLLWSList(const CLLWSList &) = default;
+ /** Move Constructor */
+ CLLWSList(CLLWSList &&) noexcept(true) = default;
+ /** Assignment */
+ CLLWSList &operator=(const CLLWSList &) = default;
+ /** Move Assignment */
+ CLLWSList &operator=(CLLWSList &&) noexcept(true) = default;
+ /** Destructor */
+ virtual ~CLLWSList() = default;
+
+ // Inherited methods overridden:
+ virtual size_t size() override;
+};
+
+/** Exhaustive list of all possible LWS values */
+class CLLWSListExhaustive : public CLLWSList
+{
+public:
+ /** Prevent default constructor calls */
+ CLLWSListExhaustive() = delete;
+ /** Constructor */
+ CLLWSListExhaustive(const cl::NDRange &gws);
+ /** Copy Constructor */
+ CLLWSListExhaustive(const CLLWSListExhaustive &) = default;
+ /** Move Constructor */
+ CLLWSListExhaustive(CLLWSListExhaustive &&) noexcept(true) = default;
+ /** Assignment */
+ CLLWSListExhaustive &operator=(const CLLWSListExhaustive &) = default;
+ /** Move Assignment */
+ CLLWSListExhaustive &operator=(CLLWSListExhaustive &&) noexcept(true) = default;
+ /** Destructor */
+ ~CLLWSListExhaustive() = default;
+
+ // Inherited methods overridden:
+ cl::NDRange operator[](size_t) override;
+};
+
+/** A subset of LWS values that are either factors of gws when gws[2] < 16 or power of 2 */
+class CLLWSListNormal : public CLLWSList
+{
+public:
+ /** Constructor */
+ CLLWSListNormal(const cl::NDRange &gws);
+ /** Copy Constructor */
+ CLLWSListNormal(const CLLWSListNormal &) = default;
+ /** Move Constructor */
+ CLLWSListNormal(CLLWSListNormal &&) noexcept(true) = default;
+ /** Assignment */
+ CLLWSListNormal &operator=(const CLLWSListNormal &) = default;
+ /** Move Assignment */
+ CLLWSListNormal &operator=(CLLWSListNormal &&) noexcept(true) = default;
+ /** Destructor */
+ ~CLLWSListNormal() = default;
+
+ // Inherited methods overridden:
+ cl::NDRange operator[](size_t) override;
+
+protected:
+ std::vector<unsigned int> _lws_x{};
+ std::vector<unsigned int> _lws_y{};
+ std::vector<unsigned int> _lws_z{};
+
+ /** Prevent default constructor calls */
+ CLLWSListNormal() = default;
+
+private:
+ /** Utility function used to initialize the LWS values to test.
+ * Only the LWS values which are power of 2 or satisfy the modulo conditions with GWS are taken into account by the CLTuner
+ *
+ * @param[in, out] lws Vector of LWS to test
+ * @param[in] gws Size of the specific GWS
+ * @param[in] lws_max Max LWS value allowed to be tested
+ * @param[in] mod_let_one True if the results of the modulo operation between gws and the lws can be less than one.
+ */
+ void initialize_lws_values(std::vector<unsigned int> &lws, unsigned int gws, unsigned int lws_max, bool mod_let_one);
+};
+
+/** A minimal subset of LWS values that only have 1,2 and 4/8 */
+class CLLWSListRapid : public CLLWSListNormal
+{
+public:
+ /** Prevent default constructor calls */
+ CLLWSListRapid() = delete;
+ /** Constructor */
+ CLLWSListRapid(const cl::NDRange &gws);
+ /** Copy Constructor */
+ CLLWSListRapid(const CLLWSListRapid &) = default;
+ /** Move Constructor */
+ CLLWSListRapid(CLLWSListRapid &&) noexcept(true) = default;
+ /** Assignment */
+ CLLWSListRapid &operator=(const CLLWSListRapid &) = default;
+ /** Move Assignment */
+ CLLWSListRapid &operator=(CLLWSListRapid &&) noexcept(true) = default;
+ /** Destructor */
+ virtual ~CLLWSListRapid() = default;
+
+private:
+ /** Utility function used to initialize the LWS values to test.
+ * Only the LWS values that have 1,2 and 4/8 for each dimension are taken into account by the CLTuner
+ *
+ * @param[in, out] lws Vector of LWS to test
+ * @param[in] lws_max Max LWS value allowed to be tested
+ */
+ void initialize_lws_values(std::vector<unsigned int> &lws, unsigned int lws_max);
+};
+
+/** Factory to construct an ICLLWSList object based on the CL tuner mode */
+class CLLWSListFactory final
+{
+public:
+ /** Construct an ICLLWSList object for the given tuner mode and gws configuration.
+ *
+ * @return unique_ptr to the requested ICLLWSList implementation.
+ */
+ static std::unique_ptr<ICLLWSList> get_lws_list(CLTunerMode mode, const cl::NDRange &gws)
+ {
+ switch(mode)
+ {
+ case CLTunerMode::EXHAUSTIVE:
+ return arm_compute::support::cpp14::make_unique<CLLWSListExhaustive>(gws);
+ case CLTunerMode::NORMAL:
+ return arm_compute::support::cpp14::make_unique<CLLWSListNormal>(gws);
+ case CLTunerMode::RAPID:
+ return arm_compute::support::cpp14::make_unique<CLLWSListRapid>(gws);
+ default:
+ return nullptr;
+ }
+ }
+};
+} // namespace cl_tuner
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CL_LWS_LIST_H__ */
diff --git a/examples/graph_alexnet.cpp b/examples/graph_alexnet.cpp
index 989e23266f..a785dea78d 100644
--- a/examples/graph_alexnet.cpp
+++ b/examples/graph_alexnet.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -150,6 +150,7 @@ public:
GraphConfig config;
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
+ config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
graph.finalize(common_params.target, config);
diff --git a/examples/graph_googlenet.cpp b/examples/graph_googlenet.cpp
index 583ca2cade..185680acf9 100644
--- a/examples/graph_googlenet.cpp
+++ b/examples/graph_googlenet.cpp
@@ -126,6 +126,7 @@ public:
GraphConfig config;
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
+ config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
graph.finalize(common_params.target, config);
diff --git a/examples/graph_inception_resnet_v1.cpp b/examples/graph_inception_resnet_v1.cpp
index e99f688319..64c35e1178 100644
--- a/examples/graph_inception_resnet_v1.cpp
+++ b/examples/graph_inception_resnet_v1.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -213,6 +213,7 @@ public:
GraphConfig config;
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
+ config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
graph.finalize(common_params.target, config);
diff --git a/examples/graph_inception_resnet_v2.cpp b/examples/graph_inception_resnet_v2.cpp
index 8e799783f2..921fadac4f 100644
--- a/examples/graph_inception_resnet_v2.cpp
+++ b/examples/graph_inception_resnet_v2.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -192,6 +192,7 @@ public:
GraphConfig config;
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
+ config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
graph.finalize(common_params.target, config);
diff --git a/examples/graph_inception_v3.cpp b/examples/graph_inception_v3.cpp
index 517e4920cb..0a1e312c1f 100644
--- a/examples/graph_inception_v3.cpp
+++ b/examples/graph_inception_v3.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -200,6 +200,7 @@ public:
GraphConfig config;
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
+ config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
graph.finalize(common_params.target, config);
diff --git a/examples/graph_inception_v4.cpp b/examples/graph_inception_v4.cpp
index 0b0360acfb..3ea2b2fd1c 100644
--- a/examples/graph_inception_v4.cpp
+++ b/examples/graph_inception_v4.cpp
@@ -151,6 +151,7 @@ public:
GraphConfig config;
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
+ config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
graph.finalize(common_params.target, config);
diff --git a/examples/graph_lenet.cpp b/examples/graph_lenet.cpp
index 79cf12233e..c75a2f8526 100644
--- a/examples/graph_lenet.cpp
+++ b/examples/graph_lenet.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -107,6 +107,7 @@ public:
GraphConfig config;
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
+ config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
graph.finalize(common_params.target, config);
diff --git a/examples/graph_mobilenet.cpp b/examples/graph_mobilenet.cpp
index a3c77fea26..e2e5eb972d 100644
--- a/examples/graph_mobilenet.cpp
+++ b/examples/graph_mobilenet.cpp
@@ -100,6 +100,7 @@ public:
GraphConfig config;
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
+ config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
graph.finalize(common_params.target, config);
diff --git a/examples/graph_mobilenet_v2.cpp b/examples/graph_mobilenet_v2.cpp
index 9138e540a8..25690aa6fc 100644
--- a/examples/graph_mobilenet_v2.cpp
+++ b/examples/graph_mobilenet_v2.cpp
@@ -91,6 +91,7 @@ public:
GraphConfig config;
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
+ config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
graph.finalize(common_params.target, config);
diff --git a/examples/graph_resnet12.cpp b/examples/graph_resnet12.cpp
index 59128630b2..db70b53d00 100644
--- a/examples/graph_resnet12.cpp
+++ b/examples/graph_resnet12.cpp
@@ -135,6 +135,7 @@ public:
GraphConfig config;
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
+ config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
graph.finalize(common_params.target, config);
diff --git a/examples/graph_resnet50.cpp b/examples/graph_resnet50.cpp
index b6e20d610b..7c9b95e47e 100644
--- a/examples/graph_resnet50.cpp
+++ b/examples/graph_resnet50.cpp
@@ -114,6 +114,7 @@ public:
GraphConfig config;
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
+ config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
graph.finalize(common_params.target, config);
diff --git a/examples/graph_resnet_v2_50.cpp b/examples/graph_resnet_v2_50.cpp
index 77807b83b4..78845a8f6f 100644
--- a/examples/graph_resnet_v2_50.cpp
+++ b/examples/graph_resnet_v2_50.cpp
@@ -117,6 +117,7 @@ public:
GraphConfig config;
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
+ config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
graph.finalize(common_params.target, config);
diff --git a/examples/graph_resnext50.cpp b/examples/graph_resnext50.cpp
index 8b33f90bc8..766b8ff5fb 100644
--- a/examples/graph_resnext50.cpp
+++ b/examples/graph_resnext50.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -98,6 +98,7 @@ public:
GraphConfig config;
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
+ config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
graph.finalize(common_params.target, config);
diff --git a/examples/graph_shufflenet.cpp b/examples/graph_shufflenet.cpp
index e6016f0451..3704be7ac1 100644
--- a/examples/graph_shufflenet.cpp
+++ b/examples/graph_shufflenet.cpp
@@ -144,6 +144,7 @@ public:
GraphConfig config;
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
+ config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
graph.finalize(common_params.target, config);
diff --git a/examples/graph_squeezenet.cpp b/examples/graph_squeezenet.cpp
index f78fe5d506..4796dd3ff1 100644
--- a/examples/graph_squeezenet.cpp
+++ b/examples/graph_squeezenet.cpp
@@ -167,6 +167,7 @@ public:
GraphConfig config;
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
+ config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
graph.finalize(common_params.target, config);
diff --git a/examples/graph_squeezenet_v1_1.cpp b/examples/graph_squeezenet_v1_1.cpp
index 22a15df450..fd4561f49a 100644
--- a/examples/graph_squeezenet_v1_1.cpp
+++ b/examples/graph_squeezenet_v1_1.cpp
@@ -167,6 +167,7 @@ public:
GraphConfig config;
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
+ config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
graph.finalize(common_params.target, config);
diff --git a/examples/graph_srcnn955.cpp b/examples/graph_srcnn955.cpp
index a8976a1beb..066f16eb81 100644
--- a/examples/graph_srcnn955.cpp
+++ b/examples/graph_srcnn955.cpp
@@ -121,6 +121,7 @@ public:
GraphConfig config;
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
+ config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
graph.finalize(common_params.target, config);
diff --git a/examples/graph_ssd_mobilenet.cpp b/examples/graph_ssd_mobilenet.cpp
index 7fcc2804a2..55c9d75b7f 100644
--- a/examples/graph_ssd_mobilenet.cpp
+++ b/examples/graph_ssd_mobilenet.cpp
@@ -246,6 +246,7 @@ public:
GraphConfig config;
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
+ config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
graph.finalize(common_params.target, config);
diff --git a/examples/graph_vgg16.cpp b/examples/graph_vgg16.cpp
index 290d1e7e98..e8055d4eff 100644
--- a/examples/graph_vgg16.cpp
+++ b/examples/graph_vgg16.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -225,6 +225,7 @@ public:
GraphConfig config;
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
+ config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
graph.finalize(common_params.target, config);
diff --git a/examples/graph_vgg19.cpp b/examples/graph_vgg19.cpp
index 298ffa06ef..63051fb056 100644
--- a/examples/graph_vgg19.cpp
+++ b/examples/graph_vgg19.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -236,6 +236,7 @@ public:
GraphConfig config;
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
+ config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
graph.finalize(common_params.target, config);
diff --git a/examples/graph_vgg_vdsr.cpp b/examples/graph_vgg_vdsr.cpp
index ca7d10f4a0..9f0b357a9a 100644
--- a/examples/graph_vgg_vdsr.cpp
+++ b/examples/graph_vgg_vdsr.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -139,6 +139,7 @@ public:
GraphConfig config;
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
+ config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
graph.finalize(common_params.target, config);
diff --git a/examples/graph_yolov3.cpp b/examples/graph_yolov3.cpp
index 6d0f67e1f5..c0a97da060 100644
--- a/examples/graph_yolov3.cpp
+++ b/examples/graph_yolov3.cpp
@@ -398,6 +398,7 @@ public:
GraphConfig config;
config.num_threads = common_params.threads;
config.use_tuner = common_params.enable_tuner;
+ config.tuner_mode = common_params.tuner_mode;
config.tuner_file = common_params.tuner_file;
graph.finalize(common_params.target, config);
diff --git a/src/graph/backends/CL/CLDeviceBackend.cpp b/src/graph/backends/CL/CLDeviceBackend.cpp
index ae7f0a50b3..0666ec0ccb 100644
--- a/src/graph/backends/CL/CLDeviceBackend.cpp
+++ b/src/graph/backends/CL/CLDeviceBackend.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -81,6 +81,11 @@ void CLDeviceBackend::set_kernel_tuning(bool enable_tuning)
_tuner.set_tune_new_kernels(enable_tuning);
}
+void CLDeviceBackend::set_kernel_tuning_mode(CLTunerMode tuning_mode)
+{
+ _tuner.set_tuner_mode(tuning_mode);
+}
+
void CLDeviceBackend::initialize_backend()
{
// Setup Scheduler
@@ -118,6 +123,7 @@ void CLDeviceBackend::setup_backend_context(GraphContext &ctx)
}
set_kernel_tuning(ctx.config().use_tuner);
+ set_kernel_tuning_mode(ctx.config().tuner_mode);
// Setup a management backend
if(ctx.memory_management_ctx(Target::CL) == nullptr)
diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp
index a262d6b95c..8f8d3e7c3a 100644
--- a/src/runtime/CL/CLTuner.cpp
+++ b/src/runtime/CL/CLTuner.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
#include "arm_compute/runtime/CL/CLTuner.h"
+#include "arm_compute/runtime/CL/tuners/CLLWSList.h"
#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Error.h"
@@ -31,42 +32,13 @@
#include <fstream>
#include <iostream>
#include <limits>
+#include <memory>
#include <string>
namespace arm_compute
{
-namespace
-{
-/** Utility function used to initialize the LWS values to test.
- * Only the LWS values which are power of 2 or satisfy the modulo conditions with GWS are taken into account by the CLTuner
- *
- * @param[in, out] lws Vector of LWS to test for a specific dimension
- * @param[in] gws Size of the GWS
- * @param[in] lws_max Max LKWS value allowed to be tested
- * @param[in] mod_let_one True if the results of the modulo operation between gws and the lws can be less than one.
- */
-void initialize_lws_values(std::vector<unsigned int> &lws, unsigned int gws, unsigned int lws_max, bool mod_let_one)
-{
- lws.push_back(1);
-
- for(unsigned int i = 2; i <= lws_max; ++i)
- {
- // Power of two condition
- const bool is_power_of_two = (i & (i - 1)) == 0;
-
- // Condition for the module accordingly with the mod_let_one flag
- const bool mod_cond = mod_let_one ? (gws % i) <= 1 : (gws % i) == 0;
-
- if(mod_cond || is_power_of_two)
- {
- lws.push_back(i);
- }
- }
-}
-} // namespace
-
CLTuner::CLTuner(bool tune_new_kernels)
- : real_clEnqueueNDRangeKernel(nullptr), _lws_table(), _kernel_event(), _tune_new_kernels(tune_new_kernels)
+ : real_clEnqueueNDRangeKernel(nullptr), _lws_table(), _kernel_event(), _tune_new_kernels(tune_new_kernels), _tuner_mode(CLTunerMode::EXHAUSTIVE)
{
}
@@ -88,6 +60,15 @@ bool CLTuner::tune_new_kernels() const
return _tune_new_kernels;
}
+void CLTuner::set_tuner_mode(CLTunerMode mode)
+{
+ _tuner_mode = mode;
+}
+CLTunerMode CLTuner::get_tuner_mode() const
+{
+ return _tuner_mode;
+}
+
void CLTuner::tune_kernel_static(ICLKernel &kernel)
{
ARM_COMPUTE_UNUSED(kernel);
@@ -182,61 +163,54 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel)
};
CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor;
- cl_ulong min_exec_time = std::numeric_limits<cl_ulong>::max();
+ cl::NDRange gws = ICLKernel::gws_from_window(kernel.window());
- cl::NDRange gws = ICLKernel::gws_from_window(kernel.window());
- cl::NDRange opt_lws = cl::NullRange;
+ // Run the kernel with default lws to be used as baseline
+ kernel.run(kernel.window(), queue_profiler);
- const unsigned int lws_x_max = std::min(static_cast<unsigned int>(gws[0]), 64u);
- const unsigned int lws_y_max = std::min(static_cast<unsigned int>(gws[1]), 32u);
- const unsigned int lws_z_max = std::min(static_cast<unsigned int>(gws[2]), 32u);
+ queue_profiler.finish();
- std::vector<unsigned int> lws_x;
- std::vector<unsigned int> lws_y;
- std::vector<unsigned int> lws_z;
+ const cl_ulong start = _kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
+ const cl_ulong end = _kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
+ cl_ulong min_exec_time = end - start;
+ _kernel_event = nullptr;
- // Initialize the LWS values to test
- initialize_lws_values(lws_x, gws[0], lws_x_max, gws[2] > 16);
- initialize_lws_values(lws_y, gws[1], lws_y_max, gws[2] > 16);
- initialize_lws_values(lws_z, gws[2], lws_z_max, false);
+ cl::NDRange opt_lws = cl::NullRange;
- for(const auto &z : lws_z)
+ //Construct the list of LWS values to be tested based on the tuner mode.
+ auto lws_list = cl_tuner::CLLWSListFactory::get_lws_list(_tuner_mode, gws);
+ for(size_t i = 0; i < lws_list->size(); ++i)
{
- for(const auto &y : lws_y)
- {
- for(const auto &x : lws_x)
- {
- cl::NDRange lws_test = cl::NDRange(x, y, z);
-
- bool invalid_lws = (x * y * z > kernel.get_max_workgroup_size()) || (x == 1 && y == 1 && z == 1);
+ cl::NDRange lws_test = (*lws_list)[i];
+ auto x = lws_test[0];
+ auto y = lws_test[1];
+ auto z = lws_test[2];
+ bool invalid_lws = (x * y * z > kernel.get_max_workgroup_size()) || (x == 1 && y == 1 && z == 1);
- invalid_lws = invalid_lws || (x > gws[0]) || (y > gws[1]) || (z > gws[2]);
+ invalid_lws = invalid_lws || (x > gws[0]) || (y > gws[1]) || (z > gws[2]);
- if(invalid_lws)
- {
- continue;
- }
-
- //Set the Local-Workgroup-Size
- kernel.set_lws_hint(lws_test);
+ if(invalid_lws)
+ {
+ continue;
+ }
- // Run the kernel
- kernel.run(kernel.window(), queue_profiler);
+ //Set the Local-Workgroup-Size
+ kernel.set_lws_hint(lws_test);
- queue_profiler.finish();
+ // Run the kernel
+ kernel.run(kernel.window(), queue_profiler);
- const cl_ulong start = _kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
- const cl_ulong end = _kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
- const cl_ulong diff = end - start;
- _kernel_event = nullptr;
+ queue_profiler.finish();
- // Check the execution time
- if(diff < min_exec_time)
- {
- min_exec_time = diff;
- opt_lws = cl::NDRange(x, y, z);
- }
- }
+ const cl_ulong start = _kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
+ const cl_ulong end = _kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
+ const cl_ulong diff = end - start;
+ _kernel_event = nullptr;
+ // Check the execution time
+ if(diff < min_exec_time)
+ {
+ min_exec_time = diff;
+ opt_lws = cl::NDRange(x, y, z);
}
}
diff --git a/src/runtime/CL/tuners/CLLWSList.cpp b/src/runtime/CL/tuners/CLLWSList.cpp
new file mode 100644
index 0000000000..97134b1b2c
--- /dev/null
+++ b/src/runtime/CL/tuners/CLLWSList.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/tuners/CLLWSList.h"
+
+namespace arm_compute
+{
+namespace cl_tuner
+{
+size_t CLLWSList::size()
+{
+ return search_space_shape.total_size();
+}
+
+cl::NDRange CLLWSListExhaustive::operator[](size_t index)
+{
+ ARM_COMPUTE_ERROR_ON(index >= size());
+ auto coords = index2coords(search_space_shape, index);
+ return cl::NDRange(coords[0] + 1, coords[1] + 1, coords[2] + 1);
+}
+
+CLLWSListExhaustive::CLLWSListExhaustive(const cl::NDRange &gws)
+{
+ search_space_shape = TensorShape(std::min(static_cast<unsigned int>(gws[0]), max_lws_supported_x), std::min(static_cast<unsigned int>(gws[1]), max_lws_supported_y),
+ std::min(static_cast<unsigned int>(gws[2]), max_lws_supported_z));
+}
+
+cl::NDRange CLLWSListNormal::operator[](size_t index)
+{
+ ARM_COMPUTE_ERROR_ON(index >= size());
+ auto coords = index2coords(search_space_shape, index);
+ return cl::NDRange(_lws_x[coords[0]], _lws_y[coords[1]], _lws_z[coords[2]]);
+}
+
+CLLWSListNormal::CLLWSListNormal(const cl::NDRange &gws)
+{
+ auto lws_x_max = std::min(static_cast<unsigned int>(gws[0]), max_lws_supported_x);
+ auto lws_y_max = std::min(static_cast<unsigned int>(gws[1]), max_lws_supported_y);
+ auto lws_z_max = std::min(static_cast<unsigned int>(gws[2]), max_lws_supported_z);
+
+ // Initialize the LWS values to test
+ initialize_lws_values(_lws_x, gws[0], lws_x_max, gws[2] > 16); // Explore lws that are not factors of gws only when gws[2] > 16
+ initialize_lws_values(_lws_y, gws[1], lws_y_max, gws[2] > 16); // Explore lws that are not factors of gws only when gws[2] > 16
+ initialize_lws_values(_lws_z, gws[2], lws_z_max, false);
+
+ search_space_shape = TensorShape(_lws_x.size(), _lws_y.size(), _lws_z.size());
+}
+
+void CLLWSListNormal::initialize_lws_values(std::vector<unsigned int> &lws, unsigned int gws, unsigned int lws_max, bool mod_let_one)
+{
+ lws.push_back(1);
+
+ for(unsigned int i = 2; i <= lws_max; ++i)
+ {
+ // Power of two condition
+ const bool is_power_of_two = (i & (i - 1)) == 0;
+
+ // Condition for the module accordingly with the mod_let_one flag
+ const bool mod_cond = mod_let_one ? (gws % i) <= 1 : (gws % i) == 0;
+
+ if(mod_cond || is_power_of_two)
+ {
+ lws.push_back(i);
+ }
+ }
+}
+
+CLLWSListRapid::CLLWSListRapid(const cl::NDRange &gws)
+{
+ auto lws_x_max = std::min(static_cast<unsigned int>(gws[0]), 8u); // Limit exploration to 1 - 8
+ auto lws_y_max = std::min(static_cast<unsigned int>(gws[1]), 4u); // Limit exploration to 1 - 4
+ auto lws_z_max = std::min(static_cast<unsigned int>(gws[2]), 4u); // Limit exploration to 1 - 4
+
+ // Initialize the LWS values to test
+ initialize_lws_values(_lws_x, lws_x_max);
+ initialize_lws_values(_lws_y, lws_y_max);
+ initialize_lws_values(_lws_z, lws_z_max);
+
+ search_space_shape = TensorShape(_lws_x.size(), _lws_y.size(), _lws_z.size());
+}
+
+void CLLWSListRapid::initialize_lws_values(std::vector<unsigned int> &lws, unsigned int lws_max)
+{
+ lws.push_back(1);
+
+ for(unsigned int i = 2; i <= lws_max; i *= 4)
+ {
+ lws.push_back(i);
+ }
+}
+} // namespace cl_tuner
+} // namespace arm_compute
diff --git a/tests/main.cpp b/tests/main.cpp
index 0eca0c8f1b..bb35d0af98 100644
--- a/tests/main.cpp
+++ b/tests/main.cpp
@@ -39,7 +39,7 @@
#include "arm_compute/runtime/CL/CLHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTuner.h"
-
+#include "utils/TypePrinter.h"
#endif /* ARM_COMPUTE_CL */
#ifdef ARM_COMPUTE_GC
#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
@@ -139,6 +139,16 @@ int main(int argc, char **argv)
#ifdef ARM_COMPUTE_CL
auto enable_tuner = parser.add_option<utils::ToggleOption>("enable-tuner");
enable_tuner->set_help("Enable OpenCL dynamic tuner");
+
+ const std::set<CLTunerMode> supported_tuner_modes
+ {
+ CLTunerMode::EXHAUSTIVE,
+ CLTunerMode::NORMAL,
+ CLTunerMode::RAPID
+ };
+ auto tuner_mode = parser.add_option<utils::EnumOption<CLTunerMode>>("tuner-mode", supported_tuner_modes, CLTunerMode::EXHAUSTIVE);
+ tuner_mode->set_help("Configures the time taken by the tuner to tune. Slow tuner produces the most performant LWS configuration");
+
auto tuner_file = parser.add_option<utils::SimpleOption<std::string>>("tuner-file", "");
tuner_file->set_help("File to load/save CLTuner values");
#endif /* ARM_COMPUTE_CL */
@@ -161,7 +171,9 @@ int main(int argc, char **argv)
#ifdef ARM_COMPUTE_CL
if(enable_tuner->is_set())
{
- cl_tuner.set_tune_new_kernels(enable_tuner->value());
+ //set tuner mode
+ cl_tuner.set_tuner_mode(tuner_mode->value());
+
// If that's the first run then the file won't exist yet
if(file_exists(tuner_file->value()))
{
diff --git a/utils/CommonGraphOptions.cpp b/utils/CommonGraphOptions.cpp
index 4247b2d3e5..7334d038ba 100644
--- a/utils/CommonGraphOptions.cpp
+++ b/utils/CommonGraphOptions.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -83,6 +83,7 @@ namespace utils
os << "Data type : " << common_params.data_type << std::endl;
os << "Data layout : " << common_params.data_layout << std::endl;
os << "Tuner enabled? : " << (common_params.enable_tuner ? true_str : false_str) << std::endl;
+ os << "Tuner mode : " << common_params.tuner_mode << std::endl;
os << "Tuner file : " << common_params.tuner_file << std::endl;
os << "Fast math enabled? : " << (common_params.fast_math_hint == FastMathHint::Enabled ? true_str : false_str) << std::endl;
if(!common_params.data_path.empty())
@@ -117,6 +118,7 @@ CommonGraphOptions::CommonGraphOptions(CommandLineParser &parser)
data_type(),
data_layout(),
enable_tuner(parser.add_option<ToggleOption>("enable-tuner")),
+ tuner_mode(),
fast_math_hint(parser.add_option<ToggleOption>("fast-math")),
data_path(parser.add_option<SimpleOption<std::string>>("data")),
image(parser.add_option<SimpleOption<std::string>>("image")),
@@ -146,9 +148,17 @@ CommonGraphOptions::CommonGraphOptions(CommandLineParser &parser)
DataLayout::NCHW,
};
+ const std::set<CLTunerMode> supported_tuner_modes
+ {
+ CLTunerMode::EXHAUSTIVE,
+ CLTunerMode::NORMAL,
+ CLTunerMode::RAPID
+ };
+
target = parser.add_option<EnumOption<Target>>("target", supported_targets, Target::NEON);
data_type = parser.add_option<EnumOption<DataType>>("type", supported_data_types, DataType::F32);
data_layout = parser.add_option<EnumOption<DataLayout>>("layout", supported_data_layouts);
+ tuner_mode = parser.add_option<EnumOption<CLTunerMode>>("tuner-mode", supported_tuner_modes, CLTunerMode::EXHAUSTIVE);
help->set_help("Show this help message");
threads->set_help("Number of threads to use");
@@ -156,6 +166,7 @@ CommonGraphOptions::CommonGraphOptions(CommandLineParser &parser)
data_type->set_help("Data type to use");
data_layout->set_help("Data layout to use");
enable_tuner->set_help("Enable OpenCL dynamic tuner");
+ tuner_mode->set_help("Configures the time taken by the tuner to tune. Slow tuner produces the most performant LWS configuration");
fast_math_hint->set_help("Enable fast math");
data_path->set_help("Path where graph parameters reside");
image->set_help("Input image for the graph");
@@ -181,6 +192,7 @@ CommonGraphParams consume_common_graph_parameters(CommonGraphOptions &options)
common_params.data_layout = options.data_layout->value();
}
common_params.enable_tuner = options.enable_tuner->is_set() ? options.enable_tuner->value() : false;
+ common_params.tuner_mode = options.tuner_mode->value();
common_params.fast_math_hint = options.fast_math_hint->is_set() ? fast_math_hint_value : FastMathHint::Disabled;
common_params.data_path = options.data_path->value();
common_params.image = options.image->value();
diff --git a/utils/CommonGraphOptions.h b/utils/CommonGraphOptions.h
index 921889d7c7..d81cd8fbdb 100644
--- a/utils/CommonGraphOptions.h
+++ b/utils/CommonGraphOptions.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,6 +29,7 @@
#include "arm_compute/graph/TypeLoader.h"
#include "arm_compute/graph/TypePrinter.h"
+#include "arm_compute/runtime/CL/CLTunerTypes.h"
namespace arm_compute
{
@@ -93,6 +94,7 @@ struct CommonGraphParams
arm_compute::DataType data_type{ DataType::F32 };
arm_compute::DataLayout data_layout{ DataLayout::NHWC };
bool enable_tuner{ false };
+ arm_compute::CLTunerMode tuner_mode{ CLTunerMode::EXHAUSTIVE };
arm_compute::graph::FastMathHint fast_math_hint{ arm_compute::graph::FastMathHint::Disabled };
std::string data_path{};
std::string image{};
@@ -147,6 +149,7 @@ public:
EnumOption<arm_compute::DataType> *data_type; /**< Graph data type */
EnumOption<arm_compute::DataLayout> *data_layout; /**< Graph data layout */
ToggleOption *enable_tuner; /**< Enable tuner */
+ SimpleOption<arm_compute::CLTunerMode> *tuner_mode; /**< Tuner mode */
ToggleOption *fast_math_hint; /**< Fast math hint */
SimpleOption<std::string> *data_path; /**< Trainable parameters path */
SimpleOption<std::string> *image; /**< Image */
diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h
index 70196882de..a71e03696a 100644
--- a/utils/TypePrinter.h
+++ b/utils/TypePrinter.h
@@ -34,6 +34,7 @@
#include "arm_compute/core/Strides.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTunerTypes.h"
#include <ostream>
#include <sstream>
@@ -2218,6 +2219,49 @@ inline std::string to_string(const T &val)
return support::cpp11::to_string(val);
}
+/** Convert a CLTunerMode value to a string
+ *
+ * @param val CLTunerMode value to be converted
+ *
+ * @return String representing the corresponding CLTunerMode.
+ */
+inline std::string to_string(const CLTunerMode val)
+{
+ switch(val)
+ {
+ case CLTunerMode::EXHAUSTIVE:
+ {
+ return std::string("Exhaustive");
+ }
+ case CLTunerMode::NORMAL:
+ {
+ return std::string("Normal");
+ }
+ case CLTunerMode::RAPID:
+ {
+ return std::string("Rapid");
+ }
+ default:
+ {
+ ARM_COMPUTE_ERROR("Invalid tuner mode.");
+ return std::string("UNDEFINED");
+ }
+ }
+}
+/** [Print CLTunerMode type] **/
+/** Formatted output of the CLTunerMode type.
+ *
+ * @param[out] os Output stream.
+ * @param[in] val CLTunerMode to output.
+ *
+ * @return Modified output stream.
+ */
+inline ::std::ostream &operator<<(::std::ostream &os, const CLTunerMode &val)
+{
+ os << to_string(val);
+ return os;
+}
+
} // namespace arm_compute
#endif /* __ARM_COMPUTE_TYPE_PRINTER_H__ */