diff options
author | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-09-21 11:31:46 +0100 |
---|---|---|
committer | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-10-03 16:46:42 +0000 |
commit | b5368fb3da65ca1d31e6acd6cd45b8b6b789f1eb (patch) | |
tree | 90786fcb5f55f90fec6124da6b241cb56ce0d4af /src | |
parent | 304dfdba67958f5987d88ad0ce538399c3e50bc8 (diff) | |
download | ComputeLibrary-b5368fb3da65ca1d31e6acd6cd45b8b6b789f1eb.tar.gz |
Force CL kernel compilation with 64 registers
* For DDK version 30 and higher, force the CL compiler to use
64 registers for NHWC direct convolution.
Resolves: COMPMID-5508
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: I7d9ecc3b5a4eceaff44542cd26f6f05e30ab2c1f
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8351
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/core/CL/CLCompileContext.cpp | 25 | ||||
-rw-r--r-- | src/gpu/cl/kernels/ClDirectConv2dKernel.cpp | 5 |
2 files changed, 21 insertions, 9 deletions
diff --git a/src/core/CL/CLCompileContext.cpp b/src/core/CL/CLCompileContext.cpp index 81eb748ab8..fce8798b48 100644 --- a/src/core/CL/CLCompileContext.cpp +++ b/src/core/CL/CLCompileContext.cpp @@ -270,16 +270,9 @@ std::string CLCompileContext::generate_build_options(const StringSet &build_opti ARM_COMPUTE_ERROR("Non uniform workgroup size is not supported!!"); } - if(gpu_arch != GPUTarget::UNKNOWN && gpu_arch != GPUTarget::MIDGARD) + if(gpu_arch != GPUTarget::UNKNOWN && gpu_arch != GPUTarget::MIDGARD && get_ddk_version() >= 11) { - const std::string device_vers = _device.device_version(); - const std::regex ddk_regex("r([0-9]*)p[0-9]"); - std::smatch ddk_match; - - if(std::regex_search(device_vers, ddk_match, ddk_regex) && std::stoi(ddk_match[1]) >= 11) - { - concat_str += " -DUNROLL_WITH_PRAGMA "; - } + concat_str += " -DUNROLL_WITH_PRAGMA "; } std::string build_options = stringify_set(build_options_set, kernel_path) + concat_str; @@ -392,4 +385,18 @@ cl_uint CLCompileContext::get_num_compute_units() const { return _device.compute_units(); } + +int32_t CLCompileContext::get_ddk_version() const +{ + const std::string device_version = _device.device_version(); + const std::regex ddk_regex("r([0-9]*)p[0-9]"); + std::smatch ddk_match; + + if(std::regex_search(device_version, ddk_match, ddk_regex)) + { + return std::stoi(ddk_match[1]); + } + + return -1; +} } // namespace arm_compute diff --git a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp index c4b70ca82b..722c802138 100644 --- a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp +++ b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp @@ -292,6 +292,11 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT build_options.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a())); build_options.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b())); } + + if(compile_context.get_ddk_version() >= 30) + { + build_options.add_option("-fregister-allocation=64"); + } } else { |