aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2021-05-20 11:36:56 +0100
committerGiorgio Arena <giorgio.arena@arm.com>2021-05-20 13:13:20 +0000
commitea8d266515812c4dec936b2153ffd5335873e583 (patch)
treebf02d78bf7a70b4535ad0628b8ec13e2d1c2004e /src
parentb7b6066b676aca315153806ef3cbcddb70b8c60a (diff)
downloadComputeLibrary-ea8d266515812c4dec936b2153ffd5335873e583.tar.gz
Enable unroll through pragma based on DDK version
Change-Id: Id98a107d512369d3799961011a84e9cc4d99e775 Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5679 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/CL/CLCompileContext.cpp15
-rw-r--r--src/core/CL/cl_kernels/tile_helpers.h13
2 files changed, 27 insertions, 1 deletions
diff --git a/src/core/CL/CLCompileContext.cpp b/src/core/CL/CLCompileContext.cpp
index 3db0fe515a..bf3a866e4b 100644
--- a/src/core/CL/CLCompileContext.cpp
+++ b/src/core/CL/CLCompileContext.cpp
@@ -29,6 +29,8 @@
#include "arm_compute/core/Utils.h"
#include "support/StringSupport.h"
+#include <regex>
+
namespace arm_compute
{
CLBuildOptions::CLBuildOptions()
@@ -263,6 +265,19 @@ std::string CLCompileContext::generate_build_options(const StringSet &build_opti
ARM_COMPUTE_ERROR("Non uniform workgroup size is not supported!!");
}
+ const GPUTarget arch = get_arch_from_target(_device.target());
+ if(arch != GPUTarget::UNKNOWN && arch != GPUTarget::MIDGARD)
+ {
+ const std::string device_vers = _device.device_version();
+ const std::regex ddk_regex("r([0-9]*)p[0-9]");
+ std::smatch ddk_match;
+
+ if(std::regex_search(device_vers, ddk_match, ddk_regex) && std::stoi(ddk_match[1]) >= 9)
+ {
+ concat_str += " -DUNROLL_WITH_PRAGMA ";
+ }
+ }
+
std::string build_options = stringify_set(build_options_set, kernel_path) + concat_str;
return build_options;
diff --git a/src/core/CL/cl_kernels/tile_helpers.h b/src/core/CL/cl_kernels/tile_helpers.h
index 4959c04448..f2d2f26cf2 100644
--- a/src/core/CL/cl_kernels/tile_helpers.h
+++ b/src/core/CL/cl_kernels/tile_helpers.h
@@ -70,6 +70,7 @@
#define TENSOR4D_STR(name, type) TENSOR4D_##type(name)
#define TENSOR4D(name, type) TENSOR4D_STR(name, type)
+#if !defined(UNROLL_WITH_PRAGMA)
#define UNROLL_INCR(idx, step, macro) idx += (step); (macro)
#define LOOP_UNROLLING_1(idx, step, macro) (macro)
@@ -201,12 +202,22 @@
#define LOOP_UNROLLING_127(idx, step, macro) LOOP_UNROLLING_126(idx, step, macro); UNROLL_INCR(idx, step, macro)
#define LOOP_UNROLLING_128(idx, step, macro) LOOP_UNROLLING_127(idx, step, macro); UNROLL_INCR(idx, step, macro)
-#define LOOP_UNROLLING(type, idx, start, step, num, macro) LOOP_UNROLLING_STR(type, idx, start, step, num, macro)
#define LOOP_UNROLLING_STR(type, idx, start, step, num, macro) \
{ \
type idx = start; \
LOOP_UNROLLING_##num(idx, step, macro); \
}
+#else // !defined(UNROLL_WITH_PRAGMA)
+#define LOOP_UNROLLING_STR(type, idx, start, step, num, macro) \
+ { \
+ _Pragma("unroll") \
+ for(type idx = start; idx < (num * step); idx += step) \
+ { \
+ (macro); \
+ } \
+ }
+#endif // !defined(UNROLL_WITH_PRAGMA)
+#define LOOP_UNROLLING(type, idx, start, step, num, macro) LOOP_UNROLLING_STR(type, idx, start, step, num, macro)
/** Get the get_global_id with partial N0. This function is useful when the dimension is not multiple of N0 and we need to use a partial N0
* to avoid out-of-bound read/write