aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/tile_helpers.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/CL/cl_kernels/tile_helpers.h')
-rw-r--r--src/core/CL/cl_kernels/tile_helpers.h13
1 files changed, 12 insertions, 1 deletions
diff --git a/src/core/CL/cl_kernels/tile_helpers.h b/src/core/CL/cl_kernels/tile_helpers.h
index 4959c04448..f2d2f26cf2 100644
--- a/src/core/CL/cl_kernels/tile_helpers.h
+++ b/src/core/CL/cl_kernels/tile_helpers.h
@@ -70,6 +70,7 @@
#define TENSOR4D_STR(name, type) TENSOR4D_##type(name)
#define TENSOR4D(name, type) TENSOR4D_STR(name, type)
+#if !defined(UNROLL_WITH_PRAGMA)
#define UNROLL_INCR(idx, step, macro) idx += (step); (macro)
#define LOOP_UNROLLING_1(idx, step, macro) (macro)
@@ -201,12 +202,22 @@
#define LOOP_UNROLLING_127(idx, step, macro) LOOP_UNROLLING_126(idx, step, macro); UNROLL_INCR(idx, step, macro)
#define LOOP_UNROLLING_128(idx, step, macro) LOOP_UNROLLING_127(idx, step, macro); UNROLL_INCR(idx, step, macro)
-#define LOOP_UNROLLING(type, idx, start, step, num, macro) LOOP_UNROLLING_STR(type, idx, start, step, num, macro)
#define LOOP_UNROLLING_STR(type, idx, start, step, num, macro) \
{ \
type idx = start; \
LOOP_UNROLLING_##num(idx, step, macro); \
}
+#else // !defined(UNROLL_WITH_PRAGMA)
+#define LOOP_UNROLLING_STR(type, idx, start, step, num, macro) \
+ { \
+ _Pragma("unroll") \
+ for(type idx = start; idx < (num * step); idx += step) \
+ { \
+ (macro); \
+ } \
+ }
+#endif // !defined(UNROLL_WITH_PRAGMA)
+#define LOOP_UNROLLING(type, idx, start, step, num, macro) LOOP_UNROLLING_STR(type, idx, start, step, num, macro)
/** Get the get_global_id with partial N0. This function is useful when the dimension is not multiple of N0 and we need to use a partial N0
* to avoid out-of-bound read/write