diff options
author | steniu01 <steven.niu@arm.com> | 2017-07-11 09:22:58 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-09-17 14:15:39 +0100 |
commit | 34702479adcf3559e8ebe27915179af509b070d2 (patch) | |
tree | 683b6d81396a6515281f0575f9761efc50d538e8 | |
parent | bb4a79b1667eaf9bdb30853c595e6d22d2589f82 (diff) | |
download | ComputeLibrary-34702479adcf3559e8ebe27915179af509b070d2.tar.gz |
COMPMID-417 Checking CL non uniform support at runtime.
What have been done in the ticket are:
1. Add support to check whether cl-non-unform-workgroup is supported at
runtime
2. Add helper function to check the CL version at runtime
3. Add boolen to check whether CLSecheduler's init has been called.
Change-Id: I6e6df8eb5cebfac7229aa406242bb183477fd191
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/80265
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
-rw-r--r-- | arm_compute/core/CL/CLHelpers.h | 15 | ||||
-rw-r--r-- | arm_compute/core/CL/CLTypes.h | 10 | ||||
-rw-r--r-- | arm_compute/runtime/CL/CLScheduler.h | 10 | ||||
-rw-r--r-- | src/core/CL/CLHelpers.cpp | 53 | ||||
-rw-r--r-- | src/core/CL/CLKernelLibrary.cpp | 23 | ||||
-rw-r--r-- | src/runtime/CL/CLScheduler.cpp | 6 |
6 files changed, 110 insertions, 7 deletions
diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h index 5234ae192e..01980d9793 100644 --- a/arm_compute/core/CL/CLHelpers.h +++ b/arm_compute/core/CL/CLHelpers.h @@ -102,5 +102,20 @@ GPUTarget get_target_from_device(cl::Device &device); * @return the GPU target which shows the arch */ GPUTarget get_arch_from_target(GPUTarget target); + +/** Helper function to get the highest OpenCL version supported + * + * @param[in] device A CL device + * + * @return the highest OpenCL version supported + */ +CLVersion get_cl_version(const cl::Device &device); +/** Helper function to check whether the arm_non_uniform_work_group_size extension is supported + * + * @param[in] device A CL device + * + * @return True if the extension is supported + */ +bool non_uniform_workgroup_support(const cl::Device &device); } #endif /* __ARM_COMPUTE_CLHELPERS_H__ */ diff --git a/arm_compute/core/CL/CLTypes.h b/arm_compute/core/CL/CLTypes.h index c5643d8939..cf11f6ecd7 100644 --- a/arm_compute/core/CL/CLTypes.h +++ b/arm_compute/core/CL/CLTypes.h @@ -37,5 +37,15 @@ enum class GPUTarget T800 = 0x130, G70 = 0x210 }; + +/* Available OpenCL Version */ +enum class CLVersion +{ + CL10, /* the OpenCL 1.0 */ + CL11, /* the OpenCL 1.1 */ + CL12, /* the OpenCL 1.2 */ + CL20, /* the OpenCL 2.0 and above */ + UNKNOWN /* unkown version */ +}; } #endif /* __ARM_COMPUTE_CL_TYPES_H__ */ diff --git a/arm_compute/runtime/CL/CLScheduler.h b/arm_compute/runtime/CL/CLScheduler.h index 8e80259b59..3f3a8de753 100644 --- a/arm_compute/runtime/CL/CLScheduler.h +++ b/arm_compute/runtime/CL/CLScheduler.h @@ -72,9 +72,10 @@ public: void init(cl::Context context = cl::Context::getDefault(), cl::CommandQueue queue = cl::CommandQueue::getDefault(), cl::Device device = cl::Device::getDefault()) { - _context = std::move(context); - _queue = std::move(queue); - _target = get_target_from_device(device); + _context = std::move(context); + _queue = std::move(queue); + _target = get_target_from_device(device); + _is_initialised = true; } /** Accessor for the associated CL context. @@ -83,6 +84,7 @@ public: */ cl::Context &context() { + ARM_COMPUTE_ERROR_ON(!_is_initialised); return _context; } @@ -101,6 +103,7 @@ public: */ cl::CommandQueue &queue() { + ARM_COMPUTE_ERROR_ON(!_is_initialised); return _queue; } @@ -153,6 +156,7 @@ private: cl::Context _context; cl::CommandQueue _queue; GPUTarget _target; + bool _is_initialised; }; } #endif /* __ARM_COMPUTE_CLSCHEDULER_H__ */ diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp index 835260d35a..dd87e778d7 100644 --- a/src/core/CL/CLHelpers.cpp +++ b/src/core/CL/CLHelpers.cpp @@ -166,4 +166,57 @@ GPUTarget get_arch_from_target(GPUTarget target) { return (target & GPUTarget::GPU_ARCH_MASK); } + +bool non_uniform_workgroup_support(const cl::Device &device) +{ + std::vector<char> extension; + size_t extension_size = 0; + cl_int err = clGetDeviceInfo(device.get(), CL_DEVICE_EXTENSIONS, 0, nullptr, &extension_size); + ARM_COMPUTE_ERROR_ON_MSG((err != 0) || (extension_size == 0), "clGetDeviceInfo failed to return valid information"); + // Resize vector + extension.resize(extension_size); + // Query extension + err = clGetDeviceInfo(device.get(), CL_DEVICE_EXTENSIONS, extension_size, extension.data(), nullptr); + ARM_COMPUTE_ERROR_ON_MSG(err != 0, "clGetDeviceInfo failed to return valid information"); + ARM_COMPUTE_UNUSED(err); + + std::string extension_str(extension.begin(), extension.end()); + auto pos = extension_str.find("cl_arm_non_uniform_work_group_size"); + return (pos != std::string::npos); +} + +CLVersion get_cl_version(const cl::Device &device) +{ + std::vector<char> version; + size_t version_size = 0; + cl_int err = clGetDeviceInfo(device.get(), CL_DEVICE_VERSION, 0, nullptr, &version_size); + ARM_COMPUTE_ERROR_ON_MSG((err != 0) || (version_size == 0), "clGetDeviceInfo failed to return valid information"); + // Resize vector + version.resize(version_size); + // Query version + err = clGetDeviceInfo(device.get(), CL_DEVICE_VERSION, version_size, version.data(), nullptr); + ARM_COMPUTE_ERROR_ON_MSG(err != 0, "clGetDeviceInfo failed to return valid information"); + ARM_COMPUTE_UNUSED(err); + + std::string version_str(version.begin(), version.end()); + if(version_str.find("OpenCL 2") != std::string::npos) + { + return CLVersion::CL20; + } + else if(version_str.find("OpenCL 1.2") != std::string::npos) + { + return CLVersion::CL12; + } + else if(version_str.find("OpenCL 1.1") != std::string::npos) + { + return CLVersion::CL11; + } + else if(version_str.find("OpenCL 1.0") != std::string::npos) + { + return CLVersion::CL10; + } + + return CLVersion::UNKNOWN; +} + } // namespace arm_compute diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp index 72230435d8..769d50992d 100644 --- a/src/core/CL/CLKernelLibrary.cpp +++ b/src/core/CL/CLKernelLibrary.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Utils.h" @@ -514,9 +515,25 @@ Kernel CLKernelLibrary::create_kernel(const std::string &kernel_name, const Stri ARM_COMPUTE_ERROR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str()); } + std::string concat_str; + + if(non_uniform_workgroup_support(_device)) + { + concat_str += " -cl-arm-non-uniform-work-group-size "; + } + else if(get_cl_version(_device) == CLVersion::CL20) + { + concat_str += " -cl-std=CL2.0 "; + } + else + { + ARM_COMPUTE_ERROR("Non uniform workgroup size is not supported!!"); + } + // Check if the program has been built before with same build options. - const std::string program_name = kernel_program_it->second; - const std::string build_options = stringify_set(build_options_set); + const std::string program_name = kernel_program_it->second; + const std::string build_options = stringify_set(build_options_set) + concat_str; + const std::string built_program_name = program_name + "_" + build_options; auto built_program_it = _built_programs_map.find(built_program_name); @@ -591,7 +608,7 @@ const Program &CLKernelLibrary::load_program(const std::string &program_name) co std::string CLKernelLibrary::stringify_set(const StringSet &s) const { - std::string concat_set = "-cl-arm-non-uniform-work-group-size "; + std::string concat_set; #ifndef EMBEDDED_KERNELS concat_set += "-I" + _kernel_path + " "; diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp index fe25ce534c..f413f626eb 100644 --- a/src/runtime/CL/CLScheduler.cpp +++ b/src/runtime/CL/CLScheduler.cpp @@ -28,7 +28,7 @@ using namespace arm_compute; CLScheduler::CLScheduler() - : _context(), _queue(), _target(GPUTarget::MIDGARD) + : _context(), _queue(), _target(GPUTarget::MIDGARD), _is_initialised(false) { } @@ -40,6 +40,10 @@ CLScheduler &CLScheduler::get() void CLScheduler::enqueue(ICLKernel &kernel, bool flush) { + ARM_COMPUTE_ERROR_ON_MSG(!_is_initialised, + "The CLScheduler is not initialised yet! Please call the CLScheduler::get().default_init(), \ + or CLScheduler::get()::init() and CLKernelLibrary::get()::init() function before running functions!"); + kernel.run(kernel.window(), _queue); if(flush) |