diff options
-rw-r--r-- | arm_compute/core/CL/CLCompileContext.h | 6 | ||||
-rw-r--r-- | src/core/CL/CLCompileContext.cpp | 25 | ||||
-rw-r--r-- | src/gpu/cl/kernels/ClDirectConv2dKernel.cpp | 5 |
3 files changed, 27 insertions, 9 deletions
diff --git a/arm_compute/core/CL/CLCompileContext.h b/arm_compute/core/CL/CLCompileContext.h index 0ce074df2b..e8f2ff35da 100644 --- a/arm_compute/core/CL/CLCompileContext.h +++ b/arm_compute/core/CL/CLCompileContext.h @@ -304,6 +304,12 @@ public: */ bool is_wbsm_supported() const; + /** Return the DDK version. If the DDK version cannot be detected, return -1. + * + * @return The DDK version. + */ + int32_t get_ddk_version() const; + private: /** Load program and its dependencies. * diff --git a/src/core/CL/CLCompileContext.cpp b/src/core/CL/CLCompileContext.cpp index 81eb748ab8..fce8798b48 100644 --- a/src/core/CL/CLCompileContext.cpp +++ b/src/core/CL/CLCompileContext.cpp @@ -270,16 +270,9 @@ std::string CLCompileContext::generate_build_options(const StringSet &build_opti ARM_COMPUTE_ERROR("Non uniform workgroup size is not supported!!"); } - if(gpu_arch != GPUTarget::UNKNOWN && gpu_arch != GPUTarget::MIDGARD) + if(gpu_arch != GPUTarget::UNKNOWN && gpu_arch != GPUTarget::MIDGARD && get_ddk_version() >= 11) { - const std::string device_vers = _device.device_version(); - const std::regex ddk_regex("r([0-9]*)p[0-9]"); - std::smatch ddk_match; - - if(std::regex_search(device_vers, ddk_match, ddk_regex) && std::stoi(ddk_match[1]) >= 11) - { - concat_str += " -DUNROLL_WITH_PRAGMA "; - } + concat_str += " -DUNROLL_WITH_PRAGMA "; } std::string build_options = stringify_set(build_options_set, kernel_path) + concat_str; @@ -392,4 +385,18 @@ cl_uint CLCompileContext::get_num_compute_units() const { return _device.compute_units(); } + +int32_t CLCompileContext::get_ddk_version() const +{ + const std::string device_version = _device.device_version(); + const std::regex ddk_regex("r([0-9]*)p[0-9]"); + std::smatch ddk_match; + + if(std::regex_search(device_version, ddk_match, ddk_regex)) + { + return std::stoi(ddk_match[1]); + } + + return -1; +} } // namespace arm_compute diff --git a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp index c4b70ca82b..722c802138 100644 --- a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp +++ b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp @@ -292,6 +292,11 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT build_options.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a())); build_options.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b())); } + + if(compile_context.get_ddk_version() >= 30) + { + build_options.add_option("-fregister-allocation=64"); + } } else { |