aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorVidhya Sudhan Loganathan <vidhyasudhan.loganathan@arm.com>2019-04-25 09:27:24 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2019-04-25 16:18:58 +0000
commit050471e40fc58cb5ea745701a43ec5b2b9586b81 (patch)
tree0ee684bcc93fae693686c391e42a2b824705aeb1 /arm_compute
parentd038dafe3810d22c8664ceef4fe49aad77abdbd1 (diff)
downloadComputeLibrary-050471e40fc58cb5ea745701a43ec5b2b9586b81.tar.gz
COMPMID-1974 : Extend CLTuner to support different of level of tuning
Change-Id: I52e4a00a25e7f7a17050038cee7c30e508553722 Signed-off-by: Vidhya Sudhan Loganathan <vidhyasudhan.loganathan@arm.com> Reviewed-on: https://review.mlplatform.org/c/977 Comments-Addressed: Pablo Marquez <pablo.tello@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/graph/Types.h4
-rw-r--r--arm_compute/graph/backends/CL/CLDeviceBackend.h7
-rw-r--r--arm_compute/runtime/CL/CLTuner.h20
-rw-r--r--arm_compute/runtime/CL/CLTunerTypes.h86
-rw-r--r--arm_compute/runtime/CL/tuners/CLLWSList.h213
5 files changed, 326 insertions, 4 deletions
diff --git a/arm_compute/graph/Types.h b/arm_compute/graph/Types.h
index 582e6f6434..4d9e031b91 100644
--- a/arm_compute/graph/Types.h
+++ b/arm_compute/graph/Types.h
@@ -26,6 +26,7 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTunerTypes.h"
#include <limits>
#include <string>
@@ -34,6 +35,7 @@ namespace arm_compute
{
namespace graph
{
+using arm_compute::CLTunerMode;
using arm_compute::Status;
using arm_compute::Coordinates;
@@ -71,13 +73,13 @@ constexpr EdgeID EmptyEdgeID = std::numeric_limits<EdgeID>::max();
// Forward declarations
class TensorDescriptor;
-
/** Graph configuration structure */
struct GraphConfig
{
bool use_function_memory_manager{ true }; /**< Use a memory manager to manage per-funcion auxilary memory */
bool use_transition_memory_manager{ true }; /**< Use a memory manager to manager transition buffer memory */
bool use_tuner{ false }; /**< Use a tuner in tunable backends */
+ CLTunerMode tuner_mode{ CLTunerMode::EXHAUSTIVE }; /**< Tuner mode to be used by the CL tuner */
int num_threads{ -1 }; /**< Number of threads to use (thread capable backends), if 0 the backend will auto-initialize, if -1 the backend will stay as it is. */
std::string tuner_file{ "acl_tuner.csv" }; /**< File to load/store tuning values from */
};
diff --git a/arm_compute/graph/backends/CL/CLDeviceBackend.h b/arm_compute/graph/backends/CL/CLDeviceBackend.h
index 49e7596d58..afe01fff70 100644
--- a/arm_compute/graph/backends/CL/CLDeviceBackend.h
+++ b/arm_compute/graph/backends/CL/CLDeviceBackend.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -50,6 +50,11 @@ public:
* @param[in] enable_tuning Enables tuning if false else true
*/
void set_kernel_tuning(bool enable_tuning);
+ /** Set kernel tuning mode
+ *
+ * @param[in] tuning_mode Indicates how exhaustive the search for the optimal LWS should be while tuning
+ */
+ void set_kernel_tuning_mode(CLTunerMode tuning_mode);
// Inherited overridden methods
void initialize_backend() override;
diff --git a/arm_compute/runtime/CL/CLTuner.h b/arm_compute/runtime/CL/CLTuner.h
index ee83f6933c..3f3df5f236 100644
--- a/arm_compute/runtime/CL/CLTuner.h
+++ b/arm_compute/runtime/CL/CLTuner.h
@@ -25,6 +25,7 @@
#define __ARM_COMPUTE_CLTUNER_H__
#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/runtime/CL/CLTunerTypes.h"
#include "arm_compute/runtime/CL/ICLTuner.h"
#include <unordered_map>
@@ -57,12 +58,26 @@ public:
* @return True if tuning of new kernels is enabled.
*/
bool tune_new_kernels() const;
+
+ /** Set OpenCL tuner mode
+ *
+ * @param[in] mode Indicates how exhaustive the search for the optimal LWS should be while tuning. Default is Exhaustive mode
+ */
+ void set_tuner_mode(CLTunerMode mode);
+
+ /** Get the current OpenCL tuner mode
+ *
+ * @return tuner_mode Indicates how exhaustive the search for the optimal LWS should be while tuning
+ */
+ CLTunerMode get_tuner_mode() const;
+
/** Manually add a LWS for a kernel
*
* @param[in] kernel_id Unique identifiant of the kernel
* @param[in] optimal_lws Optimal local workgroup size to use for the given kernel
*/
void add_lws_to_table(const std::string &kernel_id, cl::NDRange optimal_lws);
+
/** Import LWS table
*
* @param[in] lws_table The unordered_map container to import
@@ -118,8 +133,9 @@ private:
cl::NDRange find_optimal_lws(ICLKernel &kernel);
std::unordered_map<std::string, cl::NDRange> _lws_table;
- cl::Event _kernel_event;
- bool _tune_new_kernels;
+ cl::Event _kernel_event;
+ bool _tune_new_kernels;
+ CLTunerMode _tuner_mode;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLTUNER_H__ */
diff --git a/arm_compute/runtime/CL/CLTunerTypes.h b/arm_compute/runtime/CL/CLTunerTypes.h
new file mode 100644
index 0000000000..7d13b6d3fa
--- /dev/null
+++ b/arm_compute/runtime/CL/CLTunerTypes.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLTUNER_TYPES_H__
+#define __ARM_COMPUTE_CLTUNER_TYPES_H__
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/utils/misc/Utility.h"
+#include <map>
+
+namespace arm_compute
+{
+/**< OpenCL tuner modes */
+enum class CLTunerMode
+{
+ EXHAUSTIVE, /**< Searches all possible LWS configurations while tuning */
+ NORMAL, /**< Searches a subset of LWS configurations while tuning */
+ RAPID /**< Searches a minimal subset of LWS configurations while tuning */
+};
+
+/** Converts a string to a strong types enumeration @ref CLTunerMode
+ *
+ * @param[in] name String to convert
+ *
+ * @return Converted CLTunerMode enumeration
+ */
+inline CLTunerMode tuner_mode_from_name(const std::string &name)
+{
+ static const std::map<std::string, CLTunerMode> tuner_modes =
+ {
+ { "exhaustive", CLTunerMode::EXHAUSTIVE },
+ { "normal", CLTunerMode::NORMAL },
+ { "rapid", CLTunerMode::RAPID },
+ };
+
+#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED
+ try
+ {
+#endif /* ARM_COMPUTE_EXCEPTIONS_DISABLED */
+ return tuner_modes.at(arm_compute::utility::tolower(name));
+
+#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED
+ }
+ catch(const std::out_of_range &)
+ {
+ throw std::invalid_argument(name);
+ }
+#endif /* ARM_COMPUTE_EXCEPTIONS_DISABLED */
+}
+
+/** Input Stream operator for @ref CLTunerMode
+ *
+ * @param[in] stream Stream to parse
+ * @param[out] tuner_mode Output tuner mode
+ *
+ * @return Updated stream
+ */
+inline ::std::istream &operator>>(::std::istream &stream, CLTunerMode &tuner_mode)
+{
+ std::string value;
+ stream >> value;
+ tuner_mode = tuner_mode_from_name(value);
+ return stream;
+}
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLTUNER_TYPES_H__ */
diff --git a/arm_compute/runtime/CL/tuners/CLLWSList.h b/arm_compute/runtime/CL/tuners/CLLWSList.h
new file mode 100644
index 0000000000..d623834208
--- /dev/null
+++ b/arm_compute/runtime/CL/tuners/CLLWSList.h
@@ -0,0 +1,213 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CL_LWS_LIST_H__
+#define __ARM_COMPUTE_CL_LWS_LIST_H__
+
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/runtime/CL/CLTunerTypes.h"
+#include "support/ToolchainSupport.h"
+#include <memory>
+
+namespace arm_compute
+{
+namespace cl_tuner
+{
+constexpr unsigned int max_lws_supported_x{ 64u };
+constexpr unsigned int max_lws_supported_y{ 32u };
+constexpr unsigned int max_lws_supported_z{ 32u };
+
+/** Interface for LWS lists */
+class ICLLWSList
+{
+public:
+ /** Constructor */
+ ICLLWSList() = default;
+ /** Copy Constructor */
+ ICLLWSList(const ICLLWSList &) = default;
+ /** Move Constructor */
+ ICLLWSList(ICLLWSList &&) noexcept(true) = default;
+ /** Assignment */
+ ICLLWSList &operator=(const ICLLWSList &) = default;
+ /** Move Assignment */
+ ICLLWSList &operator=(ICLLWSList &&) noexcept(true) = default;
+ /** Destructor */
+ virtual ~ICLLWSList() = default;
+
+ /** Return the LWS value at the given index.
+ *
+ * @return LWS value at the given index
+ */
+ virtual cl::NDRange operator[](size_t) = 0;
+
+ /** LWS list size.
+ *
+ * @return LWS list size
+ */
+ virtual size_t size() = 0;
+};
+
+/** Non instantiable base class for LWS combinations that use Index2Cooard mapping */
+class CLLWSList : public ICLLWSList
+{
+protected:
+ /* Shape of 3-D search space */
+ TensorShape search_space_shape{ 0, 0, 0 };
+
+ /** Constructor */
+ CLLWSList() = default;
+ /** Copy Constructor */
+ CLLWSList(const CLLWSList &) = default;
+ /** Move Constructor */
+ CLLWSList(CLLWSList &&) noexcept(true) = default;
+ /** Assignment */
+ CLLWSList &operator=(const CLLWSList &) = default;
+ /** Move Assignment */
+ CLLWSList &operator=(CLLWSList &&) noexcept(true) = default;
+ /** Destructor */
+ virtual ~CLLWSList() = default;
+
+ // Inherited methods overridden:
+ virtual size_t size() override;
+};
+
+/** Exhaustive list of all possible LWS values */
+class CLLWSListExhaustive : public CLLWSList
+{
+public:
+ /** Prevent default constructor calls */
+ CLLWSListExhaustive() = delete;
+ /** Constructor */
+ CLLWSListExhaustive(const cl::NDRange &gws);
+ /** Copy Constructor */
+ CLLWSListExhaustive(const CLLWSListExhaustive &) = default;
+ /** Move Constructor */
+ CLLWSListExhaustive(CLLWSListExhaustive &&) noexcept(true) = default;
+ /** Assignment */
+ CLLWSListExhaustive &operator=(const CLLWSListExhaustive &) = default;
+ /** Move Assignment */
+ CLLWSListExhaustive &operator=(CLLWSListExhaustive &&) noexcept(true) = default;
+ /** Destructor */
+ ~CLLWSListExhaustive() = default;
+
+ // Inherited methods overridden:
+ cl::NDRange operator[](size_t) override;
+};
+
+/** A subset of LWS values that are either factors of gws when gws[2] < 16 or power of 2 */
+class CLLWSListNormal : public CLLWSList
+{
+public:
+ /** Constructor */
+ CLLWSListNormal(const cl::NDRange &gws);
+ /** Copy Constructor */
+ CLLWSListNormal(const CLLWSListNormal &) = default;
+ /** Move Constructor */
+ CLLWSListNormal(CLLWSListNormal &&) noexcept(true) = default;
+ /** Assignment */
+ CLLWSListNormal &operator=(const CLLWSListNormal &) = default;
+ /** Move Assignment */
+ CLLWSListNormal &operator=(CLLWSListNormal &&) noexcept(true) = default;
+ /** Destructor */
+ ~CLLWSListNormal() = default;
+
+ // Inherited methods overridden:
+ cl::NDRange operator[](size_t) override;
+
+protected:
+ std::vector<unsigned int> _lws_x{};
+ std::vector<unsigned int> _lws_y{};
+ std::vector<unsigned int> _lws_z{};
+
+ /** Prevent default constructor calls */
+ CLLWSListNormal() = default;
+
+private:
+ /** Utility function used to initialize the LWS values to test.
+ * Only the LWS values which are power of 2 or satisfy the modulo conditions with GWS are taken into account by the CLTuner
+ *
+ * @param[in, out] lws Vector of LWS to test
+ * @param[in] gws Size of the specific GWS
+ * @param[in] lws_max Max LWS value allowed to be tested
+ * @param[in] mod_let_one True if the results of the modulo operation between gws and the lws can be less than one.
+ */
+ void initialize_lws_values(std::vector<unsigned int> &lws, unsigned int gws, unsigned int lws_max, bool mod_let_one);
+};
+
+/** A minimal subset of LWS values that only have 1,2 and 4/8 */
+class CLLWSListRapid : public CLLWSListNormal
+{
+public:
+ /** Prevent default constructor calls */
+ CLLWSListRapid() = delete;
+ /** Constructor */
+ CLLWSListRapid(const cl::NDRange &gws);
+ /** Copy Constructor */
+ CLLWSListRapid(const CLLWSListRapid &) = default;
+ /** Move Constructor */
+ CLLWSListRapid(CLLWSListRapid &&) noexcept(true) = default;
+ /** Assignment */
+ CLLWSListRapid &operator=(const CLLWSListRapid &) = default;
+ /** Move Assignment */
+ CLLWSListRapid &operator=(CLLWSListRapid &&) noexcept(true) = default;
+ /** Destructor */
+ virtual ~CLLWSListRapid() = default;
+
+private:
+ /** Utility function used to initialize the LWS values to test.
+ * Only the LWS values that have 1,2 and 4/8 for each dimension are taken into account by the CLTuner
+ *
+ * @param[in, out] lws Vector of LWS to test
+ * @param[in] lws_max Max LWS value allowed to be tested
+ */
+ void initialize_lws_values(std::vector<unsigned int> &lws, unsigned int lws_max);
+};
+
+/** Factory to construct an ICLLWSList object based on the CL tuner mode */
+class CLLWSListFactory final
+{
+public:
+ /** Construct an ICLLWSList object for the given tuner mode and gws configuration.
+ *
+ * @return unique_ptr to the requested ICLLWSList implementation.
+ */
+ static std::unique_ptr<ICLLWSList> get_lws_list(CLTunerMode mode, const cl::NDRange &gws)
+ {
+ switch(mode)
+ {
+ case CLTunerMode::EXHAUSTIVE:
+ return arm_compute::support::cpp14::make_unique<CLLWSListExhaustive>(gws);
+ case CLTunerMode::NORMAL:
+ return arm_compute::support::cpp14::make_unique<CLLWSListNormal>(gws);
+ case CLTunerMode::RAPID:
+ return arm_compute::support::cpp14::make_unique<CLLWSListRapid>(gws);
+ default:
+ return nullptr;
+ }
+ }
+};
+} // namespace cl_tuner
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CL_LWS_LIST_H__ */