aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorViet-Hoa Do <viet-hoa.do@arm.com>2022-09-21 11:31:46 +0100
committerViet-Hoa Do <viet-hoa.do@arm.com>2022-10-03 16:46:42 +0000
commitb5368fb3da65ca1d31e6acd6cd45b8b6b789f1eb (patch)
tree90786fcb5f55f90fec6124da6b241cb56ce0d4af
parent304dfdba67958f5987d88ad0ce538399c3e50bc8 (diff)
downloadComputeLibrary-b5368fb3da65ca1d31e6acd6cd45b8b6b789f1eb.tar.gz
Force CL kernel compilation with 64 registers
* For DDK version 30 and higher, force the CL compiler to use 64 registers for NHWC direct convolution. Resolves: COMPMID-5508 Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com> Change-Id: I7d9ecc3b5a4eceaff44542cd26f6f05e30ab2c1f Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8351 Benchmark: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
-rw-r--r--arm_compute/core/CL/CLCompileContext.h6
-rw-r--r--src/core/CL/CLCompileContext.cpp25
-rw-r--r--src/gpu/cl/kernels/ClDirectConv2dKernel.cpp5
3 files changed, 27 insertions, 9 deletions
diff --git a/arm_compute/core/CL/CLCompileContext.h b/arm_compute/core/CL/CLCompileContext.h
index 0ce074df2b..e8f2ff35da 100644
--- a/arm_compute/core/CL/CLCompileContext.h
+++ b/arm_compute/core/CL/CLCompileContext.h
@@ -304,6 +304,12 @@ public:
*/
bool is_wbsm_supported() const;
+ /** Return the DDK version. If the DDK version cannot be detected, return -1.
+ *
+ * @return The DDK version.
+ */
+ int32_t get_ddk_version() const;
+
private:
/** Load program and its dependencies.
*
diff --git a/src/core/CL/CLCompileContext.cpp b/src/core/CL/CLCompileContext.cpp
index 81eb748ab8..fce8798b48 100644
--- a/src/core/CL/CLCompileContext.cpp
+++ b/src/core/CL/CLCompileContext.cpp
@@ -270,16 +270,9 @@ std::string CLCompileContext::generate_build_options(const StringSet &build_opti
ARM_COMPUTE_ERROR("Non uniform workgroup size is not supported!!");
}
- if(gpu_arch != GPUTarget::UNKNOWN && gpu_arch != GPUTarget::MIDGARD)
+ if(gpu_arch != GPUTarget::UNKNOWN && gpu_arch != GPUTarget::MIDGARD && get_ddk_version() >= 11)
{
- const std::string device_vers = _device.device_version();
- const std::regex ddk_regex("r([0-9]*)p[0-9]");
- std::smatch ddk_match;
-
- if(std::regex_search(device_vers, ddk_match, ddk_regex) && std::stoi(ddk_match[1]) >= 11)
- {
- concat_str += " -DUNROLL_WITH_PRAGMA ";
- }
+ concat_str += " -DUNROLL_WITH_PRAGMA ";
}
std::string build_options = stringify_set(build_options_set, kernel_path) + concat_str;
@@ -392,4 +385,18 @@ cl_uint CLCompileContext::get_num_compute_units() const
{
return _device.compute_units();
}
+
+int32_t CLCompileContext::get_ddk_version() const
+{
+ const std::string device_version = _device.device_version();
+ const std::regex ddk_regex("r([0-9]*)p[0-9]");
+ std::smatch ddk_match;
+
+ if(std::regex_search(device_version, ddk_match, ddk_regex))
+ {
+ return std::stoi(ddk_match[1]);
+ }
+
+ return -1;
+}
} // namespace arm_compute
diff --git a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp
index c4b70ca82b..722c802138 100644
--- a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp
+++ b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp
@@ -292,6 +292,11 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT
build_options.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a()));
build_options.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b()));
}
+
+ if(compile_context.get_ddk_version() >= 30)
+ {
+ build_options.add_option("-fregister-allocation=64");
+ }
}
else
{