aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2020-10-12 11:53:51 +0100
committerGiorgio Arena <giorgio.arena@arm.com>2020-10-12 14:06:24 +0000
commitd056e574f60ca731b2d078e56c6baca5a6c642ac (patch)
tree30a47f1df1341b0462f344f4234bcf5dbf5d4360
parentff9612cad3288ab96f94e86485e591401753f56b (diff)
downloadComputeLibrary-d056e574f60ca731b2d078e56c6baca5a6c642ac.tar.gz
COMPMID-3826 ArmNN Nightly failing for CL
Change-Id: I09f557b5cecafc669e12764e8592457212168d62 Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4131 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/CL/cl_kernels/activation_float_helpers.h32
-rw-r--r--src/core/CL/cl_kernels/activation_layer.cl2
-rw-r--r--src/core/CL/cl_kernels/activation_layer_quant.cl2
-rw-r--r--src/core/CL/cl_kernels/batchnormalization_layer.cl4
-rw-r--r--src/core/CL/cl_kernels/depthwise_convolution.cl40
-rw-r--r--src/core/CL/cl_kernels/elementwise_operation.cl2
-rw-r--r--src/core/CL/cl_kernels/gemm.cl46
-rw-r--r--src/core/CL/cl_kernels/gemm_helpers.h98
-rw-r--r--src/core/CL/cl_kernels/helpers.h14
-rw-r--r--src/core/CL/cl_kernels/pad_layer.cl8
-rw-r--r--src/core/CL/cl_kernels/pixelwise_mul_float.cl4
-rw-r--r--src/core/CL/cl_kernels/select.cl20
-rw-r--r--src/core/CL/cl_kernels/winograd_output_transform.cl69
-rw-r--r--src/core/CL/cl_kernels/yolo_layer.cl14
-rw-r--r--src/core/CL/kernels/CLPadLayerKernel.cpp1
-rw-r--r--src/core/CL/kernels/CLSelectKernel.cpp1
-rw-r--r--src/core/CL/kernels/CLYOLOLayerKernel.cpp1
17 files changed, 184 insertions, 174 deletions
diff --git a/src/core/CL/cl_kernels/activation_float_helpers.h b/src/core/CL/cl_kernels/activation_float_helpers.h
index bedde8349e..8bd6aad42e 100644
--- a/src/core/CL/cl_kernels/activation_float_helpers.h
+++ b/src/core/CL/cl_kernels/activation_float_helpers.h
@@ -31,47 +31,47 @@
#endif // GPU_ARCH == GPU_ARCH_BIFROST
// Hard-Swish
-#define hard_swish_op(DATA_TYPE, x, A_VAL, B_VAL) (x * ((min(max((x + (DATA_TYPE)3.0), (DATA_TYPE)0.0), (DATA_TYPE)6.0)) * (DATA_TYPE)0.166666667))
+#define hard_swish_op(DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL) (x * ((min(max((x + (DATA_TYPE)3.0), (DATA_TYPE)0.0), (DATA_TYPE)6.0)) * (DATA_TYPE)0.166666667))
// Logistic Activation
-#define logistic_op(DATA_TYPE, x, A_VAL, B_VAL) ((DATA_TYPE)1.0 / ((DATA_TYPE)1.0 + exp(-x)))
+#define logistic_op(DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL) ((DATA_TYPE)1.0 / ((DATA_TYPE)1.0 + exp(-x)))
// Hyperbolic Tangent Activation
-#define tanh_op(DATA_TYPE, x, A_VAL, B_VAL) ((DATA_TYPE)A_VAL * tanh((DATA_TYPE)B_VAL * x))
+#define tanh_op(DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL) ((DATA_TYPE)A_VAL * tanh((DATA_TYPE)B_VAL * x))
// RELU Tangent Activation
-#define relu_op(DATA_TYPE, x, A_VAL, B_VAL) (max((DATA_TYPE)0.0, x))
+#define relu_op(DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL) (max((DATA_TYPE)0.0, x))
// Bounded RELU Activation
-#define brelu_op(DATA_TYPE, x, A_VAL, B_VAL) (min((DATA_TYPE)A_VAL, max((DATA_TYPE)0.0, x)))
+#define brelu_op(DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL) (min((DATA_TYPE)A_VAL, max((DATA_TYPE)0.0, x)))
// Lower Upper Bounded RELU Activation
-#define lu_brelu_op(DATA_TYPE, x, A_VAL, B_VAL) (min(max(x, (DATA_TYPE)B_VAL), (DATA_TYPE)A_VAL))
+#define lu_brelu_op(DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL) (min(max(x, (DATA_TYPE)B_VAL), (DATA_TYPE)A_VAL))
// Leaky RELU Activation
-#define lrelu_op(DATA_TYPE, x, A_VAL, B_VAL) ((min(x, (DATA_TYPE)0.0) * (DATA_TYPE)A_VAL) + max(x, (DATA_TYPE)0.0))
+#define lrelu_op(DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL) ((min(x, (DATA_TYPE)0.0) * (DATA_TYPE)A_VAL) + max(x, (DATA_TYPE)0.0))
// Soft RELU Activation
-#define srelu_op(DATA_TYPE, x, A_VAL, B_VAL) (log((DATA_TYPE)1.0 + exp(x)))
+#define srelu_op(DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL) (log((DATA_TYPE)1.0 + exp(x)))
// ELU Activation
-#define elu_op(DATA_TYPE, x, A_VAL, B_VAL) (select(((DATA_TYPE)A_VAL * (exp(x) - (DATA_TYPE)1.0)), x, isgreaterequal(x, (DATA_TYPE)0.0)))
+#define elu_op(DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL) (select(((DATA_TYPE)A_VAL * (exp(x) - (DATA_TYPE)1.0)), x, (SELECT_DATA_TYPE(DATA_TYPE, VEC_SIZE))isgreaterequal(x, (DATA_TYPE)0.0)))
// Absolute Activation
-#define abs_op(DATA_TYPE, x, A_VAL, B_VAL) (fabs(x))
+#define abs_op(DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL) (fabs(x))
// Square Activation
-#define square_op(DATA_TYPE, x, A_VAL, B_VAL) (x * x)
+#define square_op(DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL) (x * x)
// Square-root Activation
-#define sqrt_op(DATA_TYPE, x, A_VAL, B_VAL) (sqrt(x))
+#define sqrt_op(DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL) (sqrt(x))
// Linear Activation
-#define linear_op(DATA_TYPE, x, A_VAL, B_VAL) (MLA((DATA_TYPE)B_VAL, (DATA_TYPE)A_VAL, x))
+#define linear_op(DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL) (MLA((DATA_TYPE)B_VAL, (DATA_TYPE)A_VAL, x))
// Identity Activation
-#define identity_op(DATA_TYPE, x, A_VAL, B_VAL) (x)
+#define identity_op(DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL) (x)
-#define ACT_OP(op, DATA_TYPE, x, A_VAL, B_VAL) op##_op(DATA_TYPE, x, A_VAL, B_VAL)
+#define ACT_OP(op, DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL) op##_op(DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL)
-#define ACTIVATION(op, DATA_TYPE, x, A_VAL, B_VAL) ACT_OP(op, DATA_TYPE, x, A_VAL, B_VAL)
+#define ACTIVATION(op, DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL) ACT_OP(op, DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL)
diff --git a/src/core/CL/cl_kernels/activation_layer.cl b/src/core/CL/cl_kernels/activation_layer.cl
index fca4e85255..174b75398d 100644
--- a/src/core/CL/cl_kernels/activation_layer.cl
+++ b/src/core/CL/cl_kernels/activation_layer.cl
@@ -76,7 +76,7 @@ __kernel void activation_layer(
TYPE data0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input_addr);
// Perform activation
- data0 = ACTIVATION(ACT, DATA_TYPE, data0, A_VAL, B_VAL);
+ data0 = ACTIVATION(ACT, DATA_TYPE, VEC_SIZE, data0, A_VAL, B_VAL);
// Store result
STORE_VECTOR_SELECT(data, DATA_TYPE, output_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0)
diff --git a/src/core/CL/cl_kernels/activation_layer_quant.cl b/src/core/CL/cl_kernels/activation_layer_quant.cl
index dbaefacc13..c031c86a5e 100644
--- a/src/core/CL/cl_kernels/activation_layer_quant.cl
+++ b/src/core/CL/cl_kernels/activation_layer_quant.cl
@@ -86,7 +86,7 @@ __kernel void activation_layer_quant_f32(
#else // defined(O1_VAL)
data_flt = round(data_flt) * ((float)S1_VAL);
#endif // defined(O1_VAL)
- data_flt = ACTIVATION(ACT, float, data_flt, A_VAL, B_VAL);
+ data_flt = ACTIVATION(ACT, float, VEC_SIZE, data_flt, A_VAL, B_VAL);
#if defined(O2_VAL)
data0 = CONVERT_SAT(round(data_flt / ((float)S2_VAL)) + (float)O2_VAL, TYPE);
diff --git a/src/core/CL/cl_kernels/batchnormalization_layer.cl b/src/core/CL/cl_kernels/batchnormalization_layer.cl
index ad27aa386c..16dbeaf2ad 100644
--- a/src/core/CL/cl_kernels/batchnormalization_layer.cl
+++ b/src/core/CL/cl_kernels/batchnormalization_layer.cl
@@ -139,7 +139,7 @@ __kernel void batchnormalization_layer_nchw(TENSOR3D_DECLARATION(input),
res = ADD_OP(res, beta_vec);
#endif /* USE_DEFAULT_BETA */
- res = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, res, A_VAL, B_VAL);
+ res = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, res, A_VAL, B_VAL);
VSTORE(VEC_SIZE)
(res, 0, (__global DATA_TYPE *)out.ptr);
@@ -252,7 +252,7 @@ __kernel void batchnormalization_layer_nhwc(TENSOR3D_DECLARATION(input),
res = ADD_OP(res, beta_vec);
#endif /* USE_DEFAULT_BETA */
- res = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, res, A_VAL, B_VAL);
+ res = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, res, A_VAL, B_VAL);
VSTORE(VEC_SIZE)
(res, 0, (__global DATA_TYPE *)out.ptr);
diff --git a/src/core/CL/cl_kernels/depthwise_convolution.cl b/src/core/CL/cl_kernels/depthwise_convolution.cl
index e1f6505df7..1c032b144f 100644
--- a/src/core/CL/cl_kernels/depthwise_convolution.cl
+++ b/src/core/CL/cl_kernels/depthwise_convolution.cl
@@ -370,7 +370,7 @@ __kernel void depthwise_convolution_3x3(
pixels += (float2)bias;
#endif //defined(HAS_BIAS)
- vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels, A_VAL, B_VAL), 0, (__global float *)dst.ptr);
+ vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, pixels, A_VAL, B_VAL), 0, (__global float *)dst.ptr);
}
#endif //defined(CONV_STRIDE_X)
@@ -568,10 +568,10 @@ __kernel void depthwise_convolution_3x3_stridex1_stridey1_bifrost_f32(
pixels3 += (float2)bias;
#endif /* defined(HAS_BIAS) */
- vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels0, A_VAL, B_VAL), 0, (__global float *)(dst.ptr + 0 * dst_stride_y));
- vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels1, A_VAL, B_VAL), 0, (__global float *)(dst.ptr + 1 * dst_stride_y));
- vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels2, A_VAL, B_VAL), 0, (__global float *)(dst.ptr + 2 * dst_stride_y));
- vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels3, A_VAL, B_VAL), 0, (__global float *)(dst.ptr + 3 * dst_stride_y));
+ vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, pixels0, A_VAL, B_VAL), 0, (__global float *)(dst.ptr + 0 * dst_stride_y));
+ vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, pixels1, A_VAL, B_VAL), 0, (__global float *)(dst.ptr + 1 * dst_stride_y));
+ vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, pixels2, A_VAL, B_VAL), 0, (__global float *)(dst.ptr + 2 * dst_stride_y));
+ vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, pixels3, A_VAL, B_VAL), 0, (__global float *)(dst.ptr + 3 * dst_stride_y));
}
/** This OpenCL kernel is optimized for Bifrost architectures and computes the depthwise convolution 3x3 when both
@@ -678,8 +678,8 @@ __kernel void depthwise_convolution_3x3_stridex2_stridey2_bifrost_f32(
pixels1 += (float2)bias;
#endif /* defined(HAS_BIAS) */
- vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels0, A_VAL, B_VAL), 0, (__global float *)(dst.ptr + 0 * dst_stride_y));
- vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels1, A_VAL, B_VAL), 0, (__global float *)(dst.ptr + 1 * dst_stride_y));
+ vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, pixels0, A_VAL, B_VAL), 0, (__global float *)(dst.ptr + 0 * dst_stride_y));
+ vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, pixels1, A_VAL, B_VAL), 0, (__global float *)(dst.ptr + 1 * dst_stride_y));
}
#endif // defined(DEPTH_MULTIPLIER) && defined(DST_CHANNELS) && defined(IS_F32)
@@ -1085,7 +1085,7 @@ __kernel void depthwise_convolution_3x3_f16(
pixels += (half4)(*((__global half *)(biases.ptr + channel * biases_stride_x)));
#endif //defined(HAS_BIAS)
- vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels, A_VAL, B_VAL), 0, (__global half *)dst.ptr);
+ vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, pixels, A_VAL, B_VAL), 0, (__global half *)dst.ptr);
}
#endif // defined(DEPTH_MULTIPLIER)
#endif // defined(CONV_STRIDE_X)
@@ -1207,10 +1207,10 @@ __kernel void depthwise_convolution_3x3_stridex1_stridey1_bifrost_f16(
pixels3 += (half4)bias;
#endif /* defined(HAS_BIAS) */
- vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels0, A_VAL, B_VAL), 0, (__global half *)(dst.ptr + 0 * dst_stride_y));
- vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels1, A_VAL, B_VAL), 0, (__global half *)(dst.ptr + 1 * dst_stride_y));
- vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels2, A_VAL, B_VAL), 0, (__global half *)(dst.ptr + 2 * dst_stride_y));
- vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels3, A_VAL, B_VAL), 0, (__global half *)(dst.ptr + 3 * dst_stride_y));
+ vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, pixels0, A_VAL, B_VAL), 0, (__global half *)(dst.ptr + 0 * dst_stride_y));
+ vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, pixels1, A_VAL, B_VAL), 0, (__global half *)(dst.ptr + 1 * dst_stride_y));
+ vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, pixels2, A_VAL, B_VAL), 0, (__global half *)(dst.ptr + 2 * dst_stride_y));
+ vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, pixels3, A_VAL, B_VAL), 0, (__global half *)(dst.ptr + 3 * dst_stride_y));
}
/** This OpenCL kernel is optimized for Bifrost architectures and computes 16bit floating point the depthwise convolution 3x3
@@ -1319,8 +1319,8 @@ __kernel void depthwise_convolution_3x3_stridex2_stridey2_bifrost_f16(
pixels1 += (half4)bias;
#endif /* defined(HAS_BIAS) */
- vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels0, A_VAL, B_VAL), 0, (__global half *)(dst.ptr + 0 * dst_stride_y));
- vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels1, A_VAL, B_VAL), 0, (__global half *)(dst.ptr + 1 * dst_stride_y));
+ vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, pixels0, A_VAL, B_VAL), 0, (__global half *)(dst.ptr + 0 * dst_stride_y));
+ vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, pixels1, A_VAL, B_VAL), 0, (__global half *)(dst.ptr + 1 * dst_stride_y));
}
#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(DEPTH_MULTIPLIER) && defined(DST_CHANNELS) && defined(IS_F16)
@@ -1452,7 +1452,7 @@ __kernel void dwc_MxN_native_fp_nhwc(
res += VLOAD(N0)(0, (__global DATA_TYPE *)(b_addr));
#endif // defined(HAS_BIAS)
- res = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, res, A_VAL, B_VAL);
+ res = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, res, A_VAL, B_VAL);
VSTORE(N0)
(res, 0, (__global DATA_TYPE *)(d_addr));
@@ -1624,7 +1624,7 @@ __kernel void depthwise_convolution_3x3_nhwc(
#endif /* defined(DST_DEPTH) */
VSTORE(VEC_SIZE)
- (ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, acc, A_VAL, B_VAL), 0, (__global DATA_TYPE *)(dst_addr));
+ (ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, acc, A_VAL, B_VAL), 0, (__global DATA_TYPE *)(dst_addr));
}
#endif // defined(CONV_STRIDE_X) && defined(CONV_STRIDE_Y)
@@ -1828,18 +1828,18 @@ __kernel void depthwise_convolution_3x3_nhwc_stride1(
#endif /* defined(DST_DEPTH) */
VSTORE(VEC_SIZE)
- (ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, acc0, A_VAL, B_VAL), 0, (__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y));
+ (ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, acc0, A_VAL, B_VAL), 0, (__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y));
VSTORE(VEC_SIZE)
- (ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, acc1, A_VAL, B_VAL), 0, (__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y));
+ (ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, acc1, A_VAL, B_VAL), 0, (__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y));
#if((DST_DIM_2 % NUM_PLANES_PROCESSED) != 0)
if((z * NUM_PLANES_PROCESSED + 1) < DST_DIM_2)
#endif // ((DST_DIM_2 % NUM_PLANES_PROCESSED) != 0)
{
VSTORE(VEC_SIZE)
- (ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, acc2, A_VAL, B_VAL), 0, (__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y + 1 * dst_stride_z));
+ (ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, acc2, A_VAL, B_VAL), 0, (__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y + 1 * dst_stride_z));
VSTORE(VEC_SIZE)
- (ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, acc3, A_VAL, B_VAL), 0, (__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y + 1 * dst_stride_z));
+ (ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, acc3, A_VAL, B_VAL), 0, (__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y + 1 * dst_stride_z));
}
}
diff --git a/src/core/CL/cl_kernels/elementwise_operation.cl b/src/core/CL/cl_kernels/elementwise_operation.cl
index 52a3309e96..26826e9b8a 100644
--- a/src/core/CL/cl_kernels/elementwise_operation.cl
+++ b/src/core/CL/cl_kernels/elementwise_operation.cl
@@ -101,7 +101,7 @@ __kernel void OP_FUN_NAME(OP)(
// Calculate and store result
#if defined(ACTIVATION_TYPE)
VSTORE(VEC_SIZE)
- (ACTIVATION(ACTIVATION_TYPE, DATA_TYPE_OUT, CONVERT(OP(in_a, in_b), VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), A_VAL, B_VAL), 0, (__global DATA_TYPE_OUT *)out.ptr);
+ (ACTIVATION(ACTIVATION_TYPE, DATA_TYPE_OUT, VEC_SIZE, CONVERT(OP(in_a, in_b), VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), A_VAL, B_VAL), 0, (__global DATA_TYPE_OUT *)out.ptr);
#else // defined(ACTIVATION_TYPE)
VSTORE(VEC_SIZE)
(OP(in_a, in_b), 0, (__global DATA_TYPE_OUT *)out.ptr);
diff --git a/src/core/CL/cl_kernels/gemm.cl b/src/core/CL/cl_kernels/gemm.cl
index 4ad22ec830..f29f2c1b48 100644
--- a/src/core/CL/cl_kernels/gemm.cl
+++ b/src/core/CL/cl_kernels/gemm.cl
@@ -1275,7 +1275,7 @@ __kernel void gemm_mm_reshaped_only_rhs_t(IMAGE_DECLARATION(lhs),
#endif // defined(BETA)
#if defined(ACTIVATION_TYPE)
- ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, c, A_VAL, B_VAL);
#endif // defined(ACTIVATION_TYPE)
const bool cond_y = y == 0;
@@ -1621,7 +1621,7 @@ __kernel void gemm_mm_reshaped_only_rhs_t_texture(IMAGE_DECLARATION(lhs),
#endif // defined(BETA)
#if defined(ACTIVATION_TYPE)
- ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, c, A_VAL, B_VAL);
#endif // defined(ACTIVATION_TYPE)
const bool cond_y = y == 0;
@@ -2017,7 +2017,7 @@ __kernel void gemm_mm_reshaped_only_rhs_nt(IMAGE_DECLARATION(lhs),
#endif // defined(BETA)
#if defined(ACTIVATION_TYPE)
- ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, c, A_VAL, B_VAL);
#endif // defined(ACTIVATION_TYPE)
const bool cond_y = y == 0;
@@ -2326,7 +2326,7 @@ __kernel void gemm_mm_reshaped_only_rhs_nt_texture(IMAGE_DECLARATION(lhs),
#endif // defined(BETA)
#if defined(ACTIVATION_TYPE)
- ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, c, A_VAL, B_VAL);
#endif // defined(ACTIVATION_TYPE)
const bool cond_y = y == 0;
@@ -2763,9 +2763,9 @@ __kernel void gemm_mm_reshaped_lhs_nt_rhs_t(IMAGE_DECLARATION(lhs),
#if defined(ACTIVATION_TYPE)
#if defined(MIXED_PRECISION)
- ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE_ACCUMULATOR, c, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE_ACCUMULATOR, VEC_SIZE, c, A_VAL, B_VAL);
#else // defined(MIXED_PRECISION)
- ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, c, A_VAL, B_VAL);
#endif // defined(MIXED_PRECISION)
#endif // defined(ACTIVATION_TYPE)
@@ -3033,9 +3033,9 @@ __kernel void gemm_mm_reshaped_lhs_nt_rhs_t_texture(IMAGE_DECLARATION(lhs),
#if defined(ACTIVATION_TYPE)
#if defined(MIXED_PRECISION)
- ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE_ACCUMULATOR, c, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE_ACCUMULATOR, VEC_SIZE, c, A_VAL, B_VAL);
#else // defined(MIXED_PRECISION)
- ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, c, A_VAL, B_VAL);
#endif // defined(MIXED_PRECISION)
#endif // defined(ACTIVATION_TYPE)
@@ -3527,9 +3527,9 @@ __kernel void gemm_mm_reshaped_lhs_t_rhs_nt(IMAGE_DECLARATION(lhs),
#if defined(ACTIVATION_TYPE)
#if defined(MIXED_PRECISION)
- ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE_ACCUMULATOR, c, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE_ACCUMULATOR, VEC_SIZE, c, A_VAL, B_VAL);
#else // defined(MIXED_PRECISION)
- ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, c, A_VAL, B_VAL);
#endif // defined(MIXED_PRECISION)
#endif // defined(ACTIVATION_TYPE)
@@ -3894,9 +3894,9 @@ __kernel void gemm_mm_reshaped_lhs_t_rhs_nt_texture(IMAGE_DECLARATION(lhs),
#if defined(ACTIVATION_TYPE)
#if defined(MIXED_PRECISION)
- ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE_ACCUMULATOR, c, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE_ACCUMULATOR, VEC_SIZE, c, A_VAL, B_VAL);
#else // defined(MIXED_PRECISION)
- ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, c, A_VAL, B_VAL);
#endif // defined(MIXED_PRECISION)
#endif // defined(ACTIVATION_TYPE)
@@ -4280,7 +4280,7 @@ __kernel void gemm_mm_native(IMAGE_DECLARATION(lhs),
#endif // defined(BETA)
#if defined(ACTIVATION_TYPE)
- ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, c, A_VAL, B_VAL);
#endif // defined(ACTIVATION_TYPE)
const bool cond_y = y == 0;
@@ -4505,7 +4505,7 @@ __kernel void gemm_mm_interleaved_transposed_f32(IMAGE_DECLARATION(src0),
#endif // defined(BETA)
#if defined(ACTIVATION_TYPE)
- ACTIVATION_BLOCK(4, ACTIVATION_TYPE, float, c, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(4, ACTIVATION_TYPE, float, VEC_SIZE, c, A_VAL, B_VAL);
#endif // defined(ACTIVATION_TYPE)
// Store 4x4 block
@@ -4833,7 +4833,7 @@ __kernel void gemm_mm_interleaved_transposed_f32_bifrost(IMAGE_DECLARATION(src0)
#endif // defined(BETA)
#if defined(ACTIVATION_TYPE)
- ACTIVATION_BLOCK(4, ACTIVATION_TYPE, float, c, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(4, ACTIVATION_TYPE, float, VEC_SIZE, c, A_VAL, B_VAL);
#endif // defined(ACTIVATION_TYPE)
// Store 4x4 block
@@ -5057,7 +5057,7 @@ __kernel void gemm_mm_interleaved_transposed_f16(IMAGE_DECLARATION(src0),
#endif // defined(BETA)
#if defined(ACTIVATION_TYPE)
- ACTIVATION_BLOCK(4, ACTIVATION_TYPE, half, c, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(4, ACTIVATION_TYPE, half, VEC_SIZE, c, A_VAL, B_VAL);
#endif // defined(ACTIVATION_TYPE)
// Store 4x8 block
@@ -5287,7 +5287,7 @@ __kernel void gemm_mm_interleaved_transposed_f16_acc32(IMAGE_DECLARATION(src0),
half8 c_h3 = convert_half8(c3);
#if defined(ACTIVATION_TYPE)
- ACTIVATION_BLOCK(4, ACTIVATION_TYPE, half, c_h, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(4, ACTIVATION_TYPE, half, VEC_SIZE, c_h, A_VAL, B_VAL);
#endif // defined(ACTIVATION_TYPE)
// Store 4x8 block
@@ -5587,7 +5587,7 @@ __kernel void gemm_mm_interleaved_transposed_f16_bifrost(IMAGE_DECLARATION(src0)
#endif // defined(BETA)
#if defined(ACTIVATION_TYPE)
- ACTIVATION_BLOCK(4, ACTIVATION_TYPE, half, c, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(4, ACTIVATION_TYPE, half, VEC_SIZE, c, A_VAL, B_VAL);
#endif // defined(ACTIVATION_TYPE)
// Store 4x8 block
@@ -5911,7 +5911,7 @@ __kernel void gemm_mm_floating_point(IMAGE_DECLARATION(src0),
#endif // defined(BETA)
#if defined(ACTIVATION_TYPE)
- ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, DATA_TYPE, acc, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, acc, A_VAL, B_VAL);
#endif // defined(ACTIVATION_TYPE)
// Store output block
@@ -6355,7 +6355,7 @@ __kernel void gemm_mm_floating_point_f32_bifrost(IMAGE_DECLARATION(src0),
#endif // defined(BETA)
#if defined(ACTIVATION_TYPE)
- ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, float, acc, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, float, VEC_SIZE, acc, A_VAL, B_VAL);
#endif // defined(ACTIVATION_TYPE)
// Store the output block
@@ -6765,7 +6765,7 @@ __kernel void gemm_mm_floating_point_f32_bifrost_1000(IMAGE_DECLARATION(src0),
#endif // defined(BETA)
#if defined(ACTIVATION_TYPE)
- ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, float, acc, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, float, VEC_SIZE, acc, A_VAL, B_VAL);
#endif // defined(ACTIVATION_TYPE)
// Store the output block
@@ -7144,7 +7144,7 @@ __kernel void gemm_mm_floating_point_f16_bifrost_acc32(IMAGE_DECLARATION(src0),
#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
#if defined(ACTIVATION_TYPE)
- ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, half, acc_h, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, half, VEC_SIZE, acc_h, A_VAL, B_VAL);
#endif // defined(ACTIVATION_TYPE)
// Store the output block
@@ -7490,7 +7490,7 @@ __kernel void gemm_mm_floating_point_f16_bifrost(IMAGE_DECLARATION(src0),
#endif // defined(BETA)
#if defined(ACTIVATION_TYPE)
- ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, half, acc, A_VAL, B_VAL);
+ ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, half, VEC_SIZE, acc, A_VAL, B_VAL);
#endif // defined(ACTIVATION_TYPE)
// Store the output block
diff --git a/src/core/CL/cl_kernels/gemm_helpers.h b/src/core/CL/cl_kernels/gemm_helpers.h
index 3943a8de78..1e020e106f 100644
--- a/src/core/CL/cl_kernels/gemm_helpers.h
+++ b/src/core/CL/cl_kernels/gemm_helpers.h
@@ -1334,68 +1334,68 @@
* @param[in] B_VAL Additional value required by the activation
* @{
*/
-#define ACTIVATION_ROW_1(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- BASENAME##0 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##0, A_VAL, B_VAL);
+#define ACTIVATION_ROW_1(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##0 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##0, A_VAL, B_VAL);
-#define ACTIVATION_ROW_2(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- ACTIVATION_ROW_1(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- BASENAME##1 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##1, A_VAL, B_VAL);
+#define ACTIVATION_ROW_2(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_1(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##1 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##1, A_VAL, B_VAL);
-#define ACTIVATION_ROW_3(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- ACTIVATION_ROW_2(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- BASENAME##2 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##2, A_VAL, B_VAL);
+#define ACTIVATION_ROW_3(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_2(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##2 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##2, A_VAL, B_VAL);
-#define ACTIVATION_ROW_4(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- ACTIVATION_ROW_3(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- BASENAME##3 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##3, A_VAL, B_VAL);
+#define ACTIVATION_ROW_4(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_3(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##3 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##3, A_VAL, B_VAL);
-#define ACTIVATION_ROW_5(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- ACTIVATION_ROW_4(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- BASENAME##4 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##4, A_VAL, B_VAL);
+#define ACTIVATION_ROW_5(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_4(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##4 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##4, A_VAL, B_VAL);
-#define ACTIVATION_ROW_6(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- ACTIVATION_ROW_5(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- BASENAME##5 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##5, A_VAL, B_VAL);
+#define ACTIVATION_ROW_6(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_5(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##5 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##5, A_VAL, B_VAL);
-#define ACTIVATION_ROW_7(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- ACTIVATION_ROW_6(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- BASENAME##6 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##6, A_VAL, B_VAL);
+#define ACTIVATION_ROW_7(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_6(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##6 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##6, A_VAL, B_VAL);
-#define ACTIVATION_ROW_8(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- ACTIVATION_ROW_7(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- BASENAME##7 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##7, A_VAL, B_VAL);
+#define ACTIVATION_ROW_8(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_7(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##7 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##7, A_VAL, B_VAL);
-#define ACTIVATION_ROW_9(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- ACTIVATION_ROW_8(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- BASENAME##8 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##8, A_VAL, B_VAL);
+#define ACTIVATION_ROW_9(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_8(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##8 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##8, A_VAL, B_VAL);
-#define ACTIVATION_ROW_10(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- ACTIVATION_ROW_9(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- BASENAME##9 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##9, A_VAL, B_VAL);
+#define ACTIVATION_ROW_10(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_9(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##9 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##9, A_VAL, B_VAL);
-#define ACTIVATION_ROW_11(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- ACTIVATION_ROW_10(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- BASENAME##A = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##A, A_VAL, B_VAL);
+#define ACTIVATION_ROW_11(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_10(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##A = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##A, A_VAL, B_VAL);
-#define ACTIVATION_ROW_12(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- ACTIVATION_ROW_11(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- BASENAME##B = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##B, A_VAL, B_VAL);
+#define ACTIVATION_ROW_12(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_11(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##B = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##B, A_VAL, B_VAL);
-#define ACTIVATION_ROW_13(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- ACTIVATION_ROW_12(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- BASENAME##C = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##C, A_VAL, B_VAL);
+#define ACTIVATION_ROW_13(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_12(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##C = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##C, A_VAL, B_VAL);
-#define ACTIVATION_ROW_14(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- ACTIVATION_ROW_13(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- BASENAME##D = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##D, A_VAL, B_VAL);
+#define ACTIVATION_ROW_14(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_13(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##D = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##D, A_VAL, B_VAL);
-#define ACTIVATION_ROW_15(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- ACTIVATION_ROW_14(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- BASENAME##E = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##E, A_VAL, B_VAL);
+#define ACTIVATION_ROW_15(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_14(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##E = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##E, A_VAL, B_VAL);
-#define ACTIVATION_ROW_16(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- ACTIVATION_ROW_15(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
- BASENAME##F = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##F, A_VAL, B_VAL);
+#define ACTIVATION_ROW_16(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_15(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##F = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##F, A_VAL, B_VAL);
/** @} */ // end of group ACTIVATION_ROW_n
/** Apply activation to a block (BASENAME)
@@ -1411,8 +1411,8 @@
* @param[in] B_VAL Additional value required by the activation
* @{
*/
-#define ACTIVATION_BLOCK_STR(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) ACTIVATION_ROW_##N(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)
-#define ACTIVATION_BLOCK(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) ACTIVATION_BLOCK_STR(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)
+#define ACTIVATION_BLOCK_STR(N, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) ACTIVATION_ROW_##N(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
+#define ACTIVATION_BLOCK(N, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) ACTIVATION_BLOCK_STR(N, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
/** @} */ // end of group ACTIVATION_BLOCK
/** Apply convert_<data_type> to the given variables
diff --git a/src/core/CL/cl_kernels/helpers.h b/src/core/CL/cl_kernels/helpers.h
index 10f04e9e31..0b36a55895 100644
--- a/src/core/CL/cl_kernels/helpers.h
+++ b/src/core/CL/cl_kernels/helpers.h
@@ -457,6 +457,20 @@
#define CONVERT_SAT_ROUND_STR(x, type, round) (convert_##type##_sat_##round((x)))
#define CONVERT_SAT_ROUND(x, type, round) CONVERT_SAT_ROUND_STR(x, type, round)
+#define select_dt_uchar(size) uchar##size
+#define select_dt_char(size) char##size
+#define select_dt_ushort(size) ushort##size
+#define select_dt_short(size) short##size
+#define select_dt_half(size) short##size
+#define select_dt_uint(size) uint##size
+#define select_dt_int(size) int##size
+#define select_dt_float(size) int##size
+#define select_dt_ulong(size) ulong##size
+#define select_dt_long(size) long##size
+
+#define SELECT_DATA_TYPE_STR(type, size) select_dt_##type(size)
+#define SELECT_DATA_TYPE(type, size) SELECT_DATA_TYPE_STR(type, size)
+
#define VECTOR_DECLARATION(name) \
__global uchar *name##_ptr, \
uint name##_stride_x, \
diff --git a/src/core/CL/cl_kernels/pad_layer.cl b/src/core/CL/cl_kernels/pad_layer.cl
index 4e4d2ad9e7..d2b43aac2b 100644
--- a/src/core/CL/cl_kernels/pad_layer.cl
+++ b/src/core/CL/cl_kernels/pad_layer.cl
@@ -23,11 +23,11 @@
*/
#include "helpers.h"
-#if defined(DATA_TYPE) && defined(SELECT_DT) && defined(VEC_SIZE) && defined(PAD_X_BEFORE) && defined(SRC_WIDTH)
+#if defined(DATA_TYPE) && defined(VEC_SIZE) && defined(PAD_X_BEFORE) && defined(SRC_WIDTH)
#define VEC_TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
#define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE)
-#define VEC_SELECT VEC_DATA_TYPE(SELECT_DT, VEC_SIZE)
+#define VEC_SELECT SELECT_DATA_TYPE(DATA_TYPE, VEC_SIZE)
#define OFFSETS VEC_OFFS(VEC_SELECT, VEC_SIZE)
#if defined(CONST_VAL)
@@ -38,7 +38,6 @@
* @note Constant value used to fill the pads must be passed using the -DCONST_VAL compile flag, e.g. -DCONST_VAL=1.27
* @note Pad to add to the left must be passed using the -DPAD_X_BEFORE compile flag, e.g. -DPAD_X_BEFORE=5
* @note Input tensor's width must be passed using the -DSRC_WIDTH compile flag, e.g. -DSRC_WIDTH=224
- * @note Data type to use for the select instruction must be passed using the -DSELECT_DT compile flag, e.g. -DSELECT_DT=float
* @note In case pad left is more than the vector size, the number of threads to skip along the X axis must be passed using the
* -DNUM_THREADS_TO_SKIP_X compile flag, e.g. -DNUM_THREADS_TO_SKIP_X=1. This is defined as (PAD_X_BEFORE / VEC_SIZE)
* @note If pad also needs to be added to the top of the tensor, the following compile flags must be passed at compile time:
@@ -149,7 +148,6 @@ __kernel void pad_layer_constant(TENSOR3D_DECLARATION(src),
* @note Constant value must be passed using the -DCONST_VAL compile flag, e.g. -DCONST_VAL=1.27
* @note Pad to add to the left must be passed using the -DPAD_X_BEFORE compile flag, e.g. -DPAD_X_BEFORE=5
* @note Input tensor's width must be passed using the -DSRC_WIDTH compile flag, e.g. -DSRC_WIDTH=224
- * @note Data type to use for the select instruction must be passed using the -DSELECT_DT compile flag, e.g. -DSELECT_DT=float
* @note Number of values to the left when operating across left padding must be passed using the -DPAD_X_BEFORE_REMAINDER compile flag, e.g. -DPAD_X_BEFORE_REMAINDER=5
* @note Number of values to the left when operating across right padding must be passed using the -DPAD_X_AFTER_REMAINDER compile flag, e.g. -DPAD_X_AFTER_REMAINDER=6
* @note To rearrange the vectors properly, (PAD_X_BEFORE_REMAINDER + 1) must be passed when mode is REFLECT using the -DPAD_X_BEFORE_REMAINDER_REFL compile flag, e.g. -DPAD_X_BEFORE_REMAINDER=6
@@ -250,4 +248,4 @@ __kernel void pad_layer_symmetric_reflect(TENSOR3D_DECLARATION(src),
#endif // SRC_WIDTH == 1
}
#endif // defined(PAD_X_BEFORE_REMAINDER) && defined(PAD_X_AFTER_REMAINDER) && defined(PAD_X_BEFORE_REMAINDER_REFL) && defined(PAD_X_AFTER_REMAINDER_REFL) && defined(AFTER_PAD_FACT_X)
-#endif // defined(DATA_TYPE) && defined(SELECT_DT) && defined(VEC_SIZE) && defined(PAD_X_BEFORE) && defined(SRC_WIDTH)
+#endif // defined(DATA_TYPE) && defined(VEC_SIZE) && defined(PAD_X_BEFORE) && defined(SRC_WIDTH)
diff --git a/src/core/CL/cl_kernels/pixelwise_mul_float.cl b/src/core/CL/cl_kernels/pixelwise_mul_float.cl
index d623226300..4fa1551b54 100644
--- a/src/core/CL/cl_kernels/pixelwise_mul_float.cl
+++ b/src/core/CL/cl_kernels/pixelwise_mul_float.cl
@@ -97,7 +97,7 @@ __kernel void pixelwise_mul_float(
#endif /* DATA_TYPE_FLOAT */
#if defined(ACTIVATION_TYPE)
- vstore16(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE_OUT, res, A_VAL, B_VAL), 0, (__global DATA_TYPE_OUT *)out.ptr);
+ vstore16(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE_OUT, VEC_SIZE, res, A_VAL, B_VAL), 0, (__global DATA_TYPE_OUT *)out.ptr);
#else // defined(ACTIVATION_TYPE)
// Store result
vstore16(res, 0, (__global DATA_TYPE_OUT *)out.ptr);
@@ -150,7 +150,7 @@ __kernel void pixelwise_mul_complex(
float2 res = { vin1.x *vin2.x - vin1.y * vin2.y, vin1.x *vin2.y + vin2.x * vin1.y };
#if defined(ACTIVATION_TYPE)
- vstore2(ACTIVATION(ACTIVATION_TYPE, float, res, A_VAL, B_VAL), 0, (__global float *)out.ptr);
+ vstore2(ACTIVATION(ACTIVATION_TYPE, float, VEC_SIZE, res, A_VAL, B_VAL), 0, (__global float *)out.ptr);
#else // defined(ACTIVATION_TYPE)
// Store result
vstore2(res, 0, (__global float *)out.ptr);
diff --git a/src/core/CL/cl_kernels/select.cl b/src/core/CL/cl_kernels/select.cl
index 52ef81560a..4752cc132f 100644
--- a/src/core/CL/cl_kernels/select.cl
+++ b/src/core/CL/cl_kernels/select.cl
@@ -23,11 +23,10 @@
*/
#include "helpers.h"
-#if defined(DATA_TYPE) && defined(SELECT_DATA_TYPE) && defined(VEC_SIZE)
+#if defined(DATA_TYPE) && defined(VEC_SIZE)
/** This function perform a select operation between two tensors when condition tensor has the same rank.
*
* @attention The data_type need to be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=uchar
- * @attention The select operation data_type need to be passed at compile time using -DSELECT_DATA_TYPE: e.g. -DSELECT_DATA_TYPE=uchar
* @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
*
* @param[in] c_ptr Pointer to the source tensor. Supported data types: U8
@@ -76,8 +75,8 @@ __kernel void select_same_rank(
Tensor3D out_t = CONVERT_TO_TENSOR3D_STRUCT(out);
// Load values
- VEC_DATA_TYPE(SELECT_DATA_TYPE, VEC_SIZE)
- in_c = CONVERT((VLOAD(VEC_SIZE)(0, (__global uchar *)c_t.ptr)), VEC_DATA_TYPE(SELECT_DATA_TYPE, VEC_SIZE));
+ SELECT_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+ in_c = CONVERT((VLOAD(VEC_SIZE)(0, (__global uchar *)c_t.ptr)), SELECT_DATA_TYPE(DATA_TYPE, VEC_SIZE));
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
in_x = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)x_t.ptr);
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
@@ -85,13 +84,12 @@ __kernel void select_same_rank(
// Calculate and store result
VSTORE(VEC_SIZE)
- (select(in_y, in_x, in_c > (SELECT_DATA_TYPE)0), 0, (__global DATA_TYPE *)out_t.ptr);
+ (select(in_y, in_x, in_c > (SELECT_DATA_TYPE(DATA_TYPE, VEC_SIZE))0), 0, (__global DATA_TYPE *)out_t.ptr);
}
/** This function perform a select operation between two tensors when condition tensor has a different rank.
*
* @attention The data_type need to be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=uchar
- * @attention The select operation data_type need to be passed at compile time using -DSELECT_DATA_TYPE: e.g. -DSELECT_DATA_TYPE=uchar
* @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
*
* @param[in] c_ptr Pointer to the source tensor. Supported data types: U8
@@ -138,7 +136,7 @@ __kernel void select_different_rank_2(
Tensor3D out_t = CONVERT_TO_TENSOR3D_STRUCT(out);
// Load values
- VEC_DATA_TYPE(SELECT_DATA_TYPE, VEC_SIZE)
+ SELECT_DATA_TYPE(DATA_TYPE, VEC_SIZE)
in_c = *((__global uchar *)(c_t.ptr + c_idx * c_t.stride_x));
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
in_x = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)x_t.ptr);
@@ -147,7 +145,7 @@ __kernel void select_different_rank_2(
// Calculate and store result
VSTORE(VEC_SIZE)
- (select(in_y, in_x, in_c > (SELECT_DATA_TYPE)0), 0, (__global DATA_TYPE *)out_t.ptr);
+ (select(in_y, in_x, in_c > (SELECT_DATA_TYPE(DATA_TYPE, VEC_SIZE))0), 0, (__global DATA_TYPE *)out_t.ptr);
}
#endif /* defined(DATA_TYPE) && defined(SELECT_DATA_TYPE) && defined(VEC_SIZE) */
@@ -202,7 +200,7 @@ __kernel void select_different_rank_n(
Tensor3D out_t = CONVERT_TO_TENSOR3D_STRUCT(out);
// Load values
- VEC_DATA_TYPE(SELECT_DATA_TYPE, VEC_SIZE)
+ SELECT_DATA_TYPE(DATA_TYPE, VEC_SIZE)
in_c = *((__global uchar *)(c_t.ptr + c_idx * c_t.stride_x));
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
in_x = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)x_t.ptr);
@@ -211,6 +209,6 @@ __kernel void select_different_rank_n(
// Calculate and store result
VSTORE(VEC_SIZE)
- (select(in_y, in_x, in_c > (SELECT_DATA_TYPE)0), 0, (__global DATA_TYPE *)out_t.ptr);
+ (select(in_y, in_x, in_c > (SELECT_DATA_TYPE(DATA_TYPE, VEC_SIZE))0), 0, (__global DATA_TYPE *)out_t.ptr);
}
-#endif /* defined(DATA_TYPE) && defined(SELECT_DATA_TYPE) && defined(VEC_SIZE) && defined(DEPTH_SIZE) */ \ No newline at end of file
+#endif /* defined(DATA_TYPE) && defined(VEC_SIZE) && defined(DEPTH_SIZE) */ \ No newline at end of file
diff --git a/src/core/CL/cl_kernels/winograd_output_transform.cl b/src/core/CL/cl_kernels/winograd_output_transform.cl
index efd8502657..e735bbafb6 100644
--- a/src/core/CL/cl_kernels/winograd_output_transform.cl
+++ b/src/core/CL/cl_kernels/winograd_output_transform.cl
@@ -158,11 +158,11 @@ __kernel void winograd_output_transform_2x2_3x3_nchw(
// Store the output tile
#if defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL)
const VEC_DATA_TYPE(DATA_TYPE, 2)
- out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 2))(out00, out01), VEC_DATA_TYPE(DATA_TYPE, 2)), A_VAL, B_VAL);
+ out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT((VEC_DATA_TYPE(float, 2))(out00, out01), VEC_DATA_TYPE(DATA_TYPE, 2)), A_VAL, B_VAL);
*((__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y)) = out0_dt.s0;
*((__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y)) = out0_dt.s1;
#else // defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL)
- vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 2))(out00, out01), VEC_DATA_TYPE(DATA_TYPE, 2)), A_VAL, B_VAL), 0,
+ vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT((VEC_DATA_TYPE(float, 2))(out00, out01), VEC_DATA_TYPE(DATA_TYPE, 2)), A_VAL, B_VAL), 0,
(__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y));
#endif // defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL)
@@ -172,7 +172,7 @@ __kernel void winograd_output_transform_2x2_3x3_nchw(
out10 += (DATA_TYPE)b;
out11 += (DATA_TYPE)b;
#endif // defined(HAS_BIAS)
- vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 2))(out10, out11), VEC_DATA_TYPE(DATA_TYPE, 2)), A_VAL, B_VAL), 0,
+ vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT((VEC_DATA_TYPE(float, 2))(out10, out11), VEC_DATA_TYPE(DATA_TYPE, 2)), A_VAL, B_VAL), 0,
(__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y));
#endif // !defined(WINOGRAD_OUTPUT_TRANSFORM_HORIZONTAL) && !defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL)
}
@@ -281,14 +281,15 @@ __kernel void winograd_output_transform_2x2_7x7_nhwc(
offset = min(offset + (int2)(0, 1) * (int2)dst_stride_z, (int2)dst_size); // If address is beyond the last plane, clamp it to dst_size (which points to the last padding).
VEC_DATA_TYPE(DATA_TYPE, 2)
- out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 2))(out00, out01), VEC_DATA_TYPE(DATA_TYPE, 2)), A_VAL, B_VAL);
+ out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT((VEC_DATA_TYPE(float, 2))(out00, out01), VEC_DATA_TYPE(DATA_TYPE, 2)), A_VAL, B_VAL);
*(__global DATA_TYPE *)(dst_ptr + offset.s0) = out0_dt.s0;
*(__global DATA_TYPE *)(dst_ptr + offset.s1) = out0_dt.s1;
#else // defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL)
// Get output address
int offset = dst_offset_first_element_in_bytes + x_out * sizeof(DATA_TYPE) + y_out * dst_stride_y + z_out * dst_stride_z;
VEC_DATA_TYPE(DATA_TYPE, 2)
- out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 2))(out00, out01), VEC_DATA_TYPE(DATA_TYPE, 2)), A_VAL, B_VAL);
+ out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT((VEC_DATA_TYPE(float, 2))(out00, out01), VEC_DATA_TYPE(DATA_TYPE, 2)), A_VAL,
+ B_VAL);
*(__global DATA_TYPE *)(dst_ptr + 0 * dst_stride_y + offset) = out0_dt.s0;
*(__global DATA_TYPE *)(dst_ptr + 1 * dst_stride_y + offset) = out0_dt.s1;
#endif // defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL)
@@ -398,9 +399,9 @@ __kernel void winograd_output_transform_2x2_7x7_nhwc(
// Store the output tile
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
- out_col0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT(out_col0, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), A_VAL, B_VAL);
+ out_col0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT(out_col0, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), A_VAL, B_VAL);
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
- out_col1_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT(out_col1, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), A_VAL, B_VAL);
+ out_col1_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT(out_col1, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), A_VAL, B_VAL);
*(__global DATA_TYPE *)(dst_ptr + mult_y.s0 * 0 * (int)dst_stride_y + offset.s0) = out_col0_dt.s0;
*(__global DATA_TYPE *)(dst_ptr + mult_y.s0 * 1 * (int)dst_stride_y + offset.s0) = out_col1_dt.s0;
@@ -599,14 +600,14 @@ __kernel void winograd_output_transform_4x4_3x3_nchw(
// Store the output tile
#if defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL)
VEC_DATA_TYPE(DATA_TYPE, 4)
- out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL,
- B_VAL);
+ out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL,
+ B_VAL);
*((__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y)) = out0_dt.s0;
*((__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y)) = out0_dt.s1;
*((__global DATA_TYPE *)(dst_addr + 2 * dst_stride_y)) = out0_dt.s2;
*((__global DATA_TYPE *)(dst_addr + 3 * dst_stride_y)) = out0_dt.s3;
#else // defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL)
- vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL), 0,
+ vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL), 0,
(__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y));
#endif // defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL)
@@ -628,11 +629,11 @@ __kernel void winograd_output_transform_4x4_3x3_nchw(
out32 += (float)b;
out33 += (float)b;
#endif // defined(HAS_BIAS)
- vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out10, out11, out12, out13), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL), 0,
+ vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT((VEC_DATA_TYPE(float, 4))(out10, out11, out12, out13), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL), 0,
(__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y));
- vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out20, out21, out22, out23), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL), 0,
+ vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT((VEC_DATA_TYPE(float, 4))(out20, out21, out22, out23), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL), 0,
(__global DATA_TYPE *)(dst_addr + 2 * dst_stride_y));
- vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out30, out31, out32, out33), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL), 0,
+ vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT((VEC_DATA_TYPE(float, 4))(out30, out31, out32, out33), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL), 0,
(__global DATA_TYPE *)(dst_addr + 3 * dst_stride_y));
#endif // !defined(WINOGRAD_OUTPUT_TRANSFORM_HORIZONTAL) && !defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL)
}
@@ -840,7 +841,8 @@ __kernel void winograd_output_transform_4x4_3x3_nhwc(
// Store the 1x4 output tile
VEC_DATA_TYPE(DATA_TYPE, 4)
- out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL);
+ out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL,
+ B_VAL);
*((__global DATA_TYPE *)(dst_ptr + offset.s0)) = out0_dt.s0;
*((__global DATA_TYPE *)(dst_ptr + offset.s1)) = out0_dt.s1;
*((__global DATA_TYPE *)(dst_ptr + offset.s2)) = out0_dt.s2;
@@ -851,7 +853,7 @@ __kernel void winograd_output_transform_4x4_3x3_nhwc(
int mult_y = min(dst_size - offset, 1);
VEC_DATA_TYPE(DATA_TYPE, 4)
- out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)),
+ out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)),
A_VAL, B_VAL);
*((__global DATA_TYPE *)(dst_ptr + mult_y * 0 * dst_stride_y + offset)) = out0_dt.s0;
*((__global DATA_TYPE *)(dst_ptr + mult_y * 1 * dst_stride_y + offset)) = out0_dt.s1;
@@ -869,14 +871,14 @@ __kernel void winograd_output_transform_4x4_3x3_nhwc(
// Store the 4x4 output tile
VEC_DATA_TYPE(DATA_TYPE, 4)
- out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL);
+ out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL);
VEC_DATA_TYPE(DATA_TYPE, 4)
- out1_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out10, out11, out12, out13), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL);
+ out1_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT((VEC_DATA_TYPE(float, 4))(out10, out11, out12, out13), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL);
VEC_DATA_TYPE(DATA_TYPE, 4)
- out2_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out20, out21, out22, out23), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL);
+ out2_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT((VEC_DATA_TYPE(float, 4))(out20, out21, out22, out23), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL);
VEC_DATA_TYPE(DATA_TYPE, 4)
- out3_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out30, out31, out32, out33),
- VEC_DATA_TYPE(DATA_TYPE, 4)),
+ out3_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT((VEC_DATA_TYPE(float, 4))(out30, out31, out32, out33),
+ VEC_DATA_TYPE(DATA_TYPE, 4)),
A_VAL, B_VAL);
*((__global DATA_TYPE *)(dst_ptr + mult_y.s0 * 0 * dst_stride_y + offset.s0)) = out0_dt.s0;
*((__global DATA_TYPE *)(dst_ptr + mult_y.s0 * 1 * dst_stride_y + offset.s0)) = out0_dt.s1;
@@ -1012,14 +1014,14 @@ __kernel void winograd_output_transform_4x4_5x5_nchw(
// Store the output tile
#if defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL)
VEC_DATA_TYPE(DATA_TYPE, 4)
- out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL,
- B_VAL);
+ out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL,
+ B_VAL);
*((__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y)) = out0_dt.s0;
*((__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y)) = out0_dt.s1;
*((__global DATA_TYPE *)(dst_addr + 2 * dst_stride_y)) = out0_dt.s2;
*((__global DATA_TYPE *)(dst_addr + 3 * dst_stride_y)) = out0_dt.s3;
#else // defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL)
- vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL), 0,
+ vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL), 0,
(__global DATA_TYPE *)(dst_addr));
#endif // defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL)
@@ -1135,13 +1137,13 @@ __kernel void winograd_output_transform_4x4_5x5_nchw(
#endif // defined(HAS_BIAS)
// Store the output tile
- vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, (VEC_DATA_TYPE(DATA_TYPE, 4))(out_col0.s0, out_col1.s0, out_col2.s0, out_col3.s0), A_VAL, B_VAL), 0,
+ vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, (VEC_DATA_TYPE(DATA_TYPE, 4))(out_col0.s0, out_col1.s0, out_col2.s0, out_col3.s0), A_VAL, B_VAL), 0,
(__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y));
- vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, (VEC_DATA_TYPE(DATA_TYPE, 4))(out_col0.s1, out_col1.s1, out_col2.s1, out_col3.s1), A_VAL, B_VAL), 0,
+ vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, (VEC_DATA_TYPE(DATA_TYPE, 4))(out_col0.s1, out_col1.s1, out_col2.s1, out_col3.s1), A_VAL, B_VAL), 0,
(__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y));
- vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, (VEC_DATA_TYPE(DATA_TYPE, 4))(out_col0.s2, out_col1.s2, out_col2.s2, out_col3.s2), A_VAL, B_VAL), 0,
+ vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, (VEC_DATA_TYPE(DATA_TYPE, 4))(out_col0.s2, out_col1.s2, out_col2.s2, out_col3.s2), A_VAL, B_VAL), 0,
(__global DATA_TYPE *)(dst_addr + 2 * dst_stride_y));
- vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, (VEC_DATA_TYPE(DATA_TYPE, 4))(out_col0.s3, out_col1.s3, out_col2.s3, out_col3.s3), A_VAL, B_VAL), 0,
+ vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, (VEC_DATA_TYPE(DATA_TYPE, 4))(out_col0.s3, out_col1.s3, out_col2.s3, out_col3.s3), A_VAL, B_VAL), 0,
(__global DATA_TYPE *)(dst_addr + 3 * dst_stride_y));
#endif // !defined(WINOGRAD_OUTPUT_TRANSFORM_HORIZONTAL) && !defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL)
}
@@ -1241,7 +1243,8 @@ __kernel void winograd_output_transform_4x4_5x5_nhwc(
offset = min(offset + (int4)(0, 1, 2, 3) * (int4)dst_stride_z, (int4)dst_size); // If address is beyond the last plane, clamp it to dst_size (which points to the last padding).
VEC_DATA_TYPE(DATA_TYPE, 4)
- out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL);
+ out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL,
+ B_VAL);
*(__global DATA_TYPE *)(dst_ptr + offset.s0) = out0_dt.s0;
*(__global DATA_TYPE *)(dst_ptr + offset.s1) = out0_dt.s1;
*(__global DATA_TYPE *)(dst_ptr + offset.s2) = out0_dt.s2;
@@ -1250,7 +1253,7 @@ __kernel void winograd_output_transform_4x4_5x5_nhwc(
// Get output address
int offset = dst_offset_first_element_in_bytes + x_out * sizeof(DATA_TYPE) + y_out * dst_stride_y + z_out * dst_stride_z;
VEC_DATA_TYPE(DATA_TYPE, 4)
- out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL,
+ out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL,
B_VAL);
*(__global DATA_TYPE *)(dst_ptr + 0 * dst_stride_y + offset) = out0_dt.s0;
*(__global DATA_TYPE *)(dst_ptr + 1 * dst_stride_y + offset) = out0_dt.s1;
@@ -1379,13 +1382,13 @@ __kernel void winograd_output_transform_4x4_5x5_nhwc(
// Store the output tile
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
- out_col0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT(out_col0, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), A_VAL, B_VAL);
+ out_col0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT(out_col0, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), A_VAL, B_VAL);
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
- out_col1_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT(out_col1, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), A_VAL, B_VAL);
+ out_col1_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT(out_col1, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), A_VAL, B_VAL);
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
- out_col2_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT(out_col2, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), A_VAL, B_VAL);
+ out_col2_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT(out_col2, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), A_VAL, B_VAL);
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
- out_col3_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT(out_col3, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), A_VAL, B_VAL);
+ out_col3_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, CONVERT(out_col3, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), A_VAL, B_VAL);
*(__global DATA_TYPE *)(dst_ptr + mult_y.s0 * 0 * (int)dst_stride_y + offset.s0) = out_col0_dt.s0;
*(__global DATA_TYPE *)(dst_ptr + mult_y.s0 * 1 * (int)dst_stride_y + offset.s0) = out_col1_dt.s0;
diff --git a/src/core/CL/cl_kernels/yolo_layer.cl b/src/core/CL/cl_kernels/yolo_layer.cl
index 2a15a32e2a..fe7b5cbb55 100644
--- a/src/core/CL/cl_kernels/yolo_layer.cl
+++ b/src/core/CL/cl_kernels/yolo_layer.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,13 +21,14 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#if defined(DATA_TYPE) && defined(SELECT_DATA_TYPE) && defined(ACTIVATION_TYPE) && defined(NUM_CLASSES) && defined(VEC_SIZE)
+#if defined(DATA_TYPE) && defined(ACTIVATION_TYPE) && defined(NUM_CLASSES) && defined(VEC_SIZE)
#include "activation_float_helpers.h"
+#define SELECT_TYPE SELECT_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+
#if VEC_SIZE != 1
#define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
-#define SELECT_TYPE VEC_DATA_TYPE(SELECT_DATA_TYPE, VEC_SIZE)
/** This performs a YOLO partial activation function for NCHW data layout
*
@@ -79,7 +80,7 @@ __kernel void yolo_layer_nchw(
{
// Load data
TYPE data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr);
- data = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, data, A_VAL, B_VAL); // select(1.0f, ACTIVATION_OP(ACTIVATION_TYPE, data), (SELECT_TYPE)activate);
+ data = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, data, A_VAL, B_VAL); // select(1.0f, ACTIVATION_OP(ACTIVATION_TYPE, data), (SELECT_TYPE)activate);
// Store result
VSTORE(VEC_SIZE)
@@ -100,7 +101,6 @@ __kernel void yolo_layer_nchw(
#else // VEC_SIZE != 1
-#define SELECT_TYPE SELECT_DATA_TYPE
/** This performs a YOLO partial activation function for NCHW data layout
*
* @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time
@@ -151,7 +151,7 @@ __kernel void yolo_layer_nhwc(
{
// Load data
DATA_TYPE data = *((__global DATA_TYPE *)input.ptr);
- data = select(data, ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, data, A_VAL, B_VAL), (SELECT_TYPE)activate);
+ data = select(data, ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, data, A_VAL, B_VAL), (SELECT_TYPE)activate);
// Store result
*((__global DATA_TYPE *)output.ptr) = data;
@@ -169,4 +169,4 @@ __kernel void yolo_layer_nhwc(
}
#endif // VEC_SIZE != 1
-#endif // defined(DATA_TYPE) && defined(SELECT_DATA_TYPE) && defined(ACTIVATION_TYPE) && defined(NUM_CLASSES) && defined(VEC_SIZE)
+#endif // defined(DATA_TYPE) && defined(ACTIVATION_TYPE) && defined(NUM_CLASSES) && defined(VEC_SIZE)
diff --git a/src/core/CL/kernels/CLPadLayerKernel.cpp b/src/core/CL/kernels/CLPadLayerKernel.cpp
index c05df61edf..b2432a058d 100644
--- a/src/core/CL/kernels/CLPadLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPadLayerKernel.cpp
@@ -135,7 +135,6 @@ void CLPadLayerKernel::configure(const CLCompileContext &compile_context, const
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
- build_opts.add_option("-DSELECT_DT=" + get_cl_select_type_from_data_type(data_type));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size));
build_opts.add_option("-DPAD_X_BEFORE=" + support::cpp11::to_string(pad_x_before));
build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(input_width));
diff --git a/src/core/CL/kernels/CLSelectKernel.cpp b/src/core/CL/kernels/CLSelectKernel.cpp
index 1244068816..dcac78cca3 100644
--- a/src/core/CL/kernels/CLSelectKernel.cpp
+++ b/src/core/CL/kernels/CLSelectKernel.cpp
@@ -123,7 +123,6 @@ void CLSelectKernel::configure(const CLCompileContext &compile_context, const IC
// Set build options
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(x->info()->data_type()));
- build_opts.add_option("-DSELECT_DATA_TYPE=" + get_cl_select_type_from_data_type(x->info()->data_type()));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
// Create kernel
diff --git a/src/core/CL/kernels/CLYOLOLayerKernel.cpp b/src/core/CL/kernels/CLYOLOLayerKernel.cpp
index 3dd9aa23ce..132d5d1cd0 100644
--- a/src/core/CL/kernels/CLYOLOLayerKernel.cpp
+++ b/src/core/CL/kernels/CLYOLOLayerKernel.cpp
@@ -123,7 +123,6 @@ void CLYOLOLayerKernel::configure(const CLCompileContext &compile_context, ICLTe
CLBuildOptions build_opts;
build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation())));
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(dt));
- build_opts.add_option("-DSELECT_DATA_TYPE=" + get_cl_select_type_from_data_type(dt));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
build_opts.add_option("-DA_VAL=" + float_to_string_with_full_precision(a_const));
build_opts.add_option("-DB_VAL=" + float_to_string_with_full_precision(b_const));