diff options
Diffstat (limited to 'src/core/CL/cl_kernels')
-rw-r--r-- | src/core/CL/cl_kernels/activation_layer.cl | 6 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/activation_layer_qa8.cl | 34 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/batchnormalization_layer.cl | 13 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/depthwise_convolution_quantized.cl | 13 |
4 files changed, 44 insertions, 22 deletions
diff --git a/src/core/CL/cl_kernels/activation_layer.cl b/src/core/CL/cl_kernels/activation_layer.cl index 4424a66b61..a8ea7387d6 100644 --- a/src/core/CL/cl_kernels/activation_layer.cl +++ b/src/core/CL/cl_kernels/activation_layer.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -115,6 +115,8 @@ inline TYPE linear_op(TYPE x) #define ACTIVATION_OP2(op, x) op##_op(x) #define ACTIVATION_OP(op, x) ACTIVATION_OP2(op, x) +#if defined(ACT) + /** This performs an activation function floating point inputs. * * @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time @@ -168,3 +170,5 @@ __kernel void activation_layer( VSTORE(VEC_SIZE) (data, 0, (__global DATA_TYPE *)output.ptr); } + +#endif /* defined(ACT) */
\ No newline at end of file diff --git a/src/core/CL/cl_kernels/activation_layer_qa8.cl b/src/core/CL/cl_kernels/activation_layer_qa8.cl index cb31e99efb..66e54ed6ad 100644 --- a/src/core/CL/cl_kernels/activation_layer_qa8.cl +++ b/src/core/CL/cl_kernels/activation_layer_qa8.cl @@ -44,6 +44,26 @@ inline TYPE lu_brelu_op(TYPE x) #define ACTIVATION_OP2(op, x) op##_op(x) #define ACTIVATION_OP(op, x) ACTIVATION_OP2(op, x) +#if defined(O1_VAL) && defined(O2_VAL) && defined(S1_VAL) && defined(S2_VAL) +#define PERFORM_ACTIVATION_QA8(act, data) \ + ({ \ + data = ACTIVATION_OP(act, data); \ + \ + VEC_DATA_TYPE(float, VEC_SIZE) \ + fdata = CONVERT(data, VEC_DATA_TYPE(float, VEC_SIZE)); \ + \ + fdata = round((fdata - (float)O1_VAL) * ((float)S1_VAL / (float)S2_VAL) + (float)O2_VAL); \ + data = CONVERT_SAT(fdata, VEC_DATA_TYPE(uchar, VEC_SIZE)); \ + }) +#else /* defined(O1_VAL) && defined(O2_VAL) && defined(S1_VAL) && defined(S2_VAL) */ +#define PERFORM_ACTIVATION_QA8(act, data) \ + ({ \ + data = ACTIVATION_OP(act, data); \ + }) +#endif /* defined(O1_VAL) && defined(O2_VAL) && defined(S1_VAL) && defined(S2_VAL) */ + +#if defined(ACT) + /** This performs an activation function on QASYMM8 inputs. * * @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time @@ -92,19 +112,11 @@ __kernel void activation_layer_qa8( // Load data TYPE data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr); - // Perform activation - data = ACTIVATION_OP(ACT, data); - -#if defined(O1_VAL) && defined(O2_VAL) && defined(S1_VAL) && defined(S2_VAL) - // requantize to output space - VEC_DATA_TYPE(float, VEC_SIZE) - fdata = CONVERT(data, VEC_DATA_TYPE(float, VEC_SIZE)); - - fdata = round((fdata - (float)O1_VAL) * ((float)S1_VAL / (float)S2_VAL) + (float)O2_VAL); - data = CONVERT_SAT(fdata, VEC_DATA_TYPE(uchar, VEC_SIZE)); -#endif // defined(O1_VAL) && defined(O2_VAL) && defined(S1_VAL) && defined(S2_VAL) + data = PERFORM_ACTIVATION_QA8(ACT, data); // Store result VSTORE(VEC_SIZE) (data, 0, (__global DATA_TYPE *)output.ptr); } + +#endif /* defined(ACT) */
\ No newline at end of file diff --git a/src/core/CL/cl_kernels/batchnormalization_layer.cl b/src/core/CL/cl_kernels/batchnormalization_layer.cl index 5ddeb1a6a1..0b61b5638c 100644 --- a/src/core/CL/cl_kernels/batchnormalization_layer.cl +++ b/src/core/CL/cl_kernels/batchnormalization_layer.cl @@ -44,15 +44,12 @@ #endif /* FIXED_POINT_POSITION */ -#if defined(LU_BRELU) -#define ACTIVATION_FUNC(x) CLAMP(x, (DATA_TYPE)B_VAL, (DATA_TYPE)A_VAL) -#elif defined(BRELU) -#define ACTIVATION_FUNC(x) CLAMP(x, (DATA_TYPE)0, (DATA_TYPE)A_VAL) -#elif defined(RELU) -#define ACTIVATION_FUNC(x) max(x, (DATA_TYPE)0) -#else /* FUSED_ACT */ +#if defined(FUSED_ACTIVATION) +#include "activation_layer.cl" +#define ACTIVATION_FUNC(x) ACTIVATION_OP(FUSED_ACTIVATION, x) +#else /* defined(FUSED_ACTIVATION) */ #define ACTIVATION_FUNC(x) (x) -#endif /* FUSED_ACT */ +#endif /* defined(FUSED_ACTIVATION) */ /** Apply batch normalization. * diff --git a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl index 40538a156d..21daee8230 100644 --- a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl +++ b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl @@ -26,6 +26,15 @@ #if defined(CONV_STRIDE_X) && defined(CONV_STRIDE_Y) && defined(WEIGHTS_OFFSET) && defined(INPUT_OFFSET) && defined(K_OFFSET) && defined(OUTPUT_OFFSET) && defined(OUTPUT_MULTIPLIER) && defined(OUTPUT_SHIFT) +#if defined(FUSED_ACTIVATION) +#define DATA_TYPE uchar +#define VEC_SIZE 8 +#include "activation_layer_qa8.cl" +#define ACTIVATION_FUNC(x) PERFORM_ACTIVATION_QA8(FUSED_ACTIVATION, x) +#else /* defined(FUSED_ACTIVATION) */ +#define ACTIVATION_FUNC(x) (x) +#endif /* defined(FUSED_ACTIVATION) */ + #if CONV_STRIDE_X > 3 #error "Stride X not supported" #endif /* CONV_STRIDE_X > 3 */ @@ -222,7 +231,7 @@ __kernel void depthwise_convolution_3x3_quantized( res0 = max(res0, (uchar8)0); res0 = min(res0, (uchar8)255); - vstore8(res0, 0, dst.ptr); + vstore8(ACTIVATION_FUNC(res0), 0, dst.ptr); #if CONV_STRIDE_Y == 1 values1 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(values1, OUTPUT_MULTIPLIER, OUTPUT_SHIFT, 8); @@ -231,7 +240,7 @@ __kernel void depthwise_convolution_3x3_quantized( res1 = max(res1, (uchar8)0); res1 = min(res1, (uchar8)255); - vstore8(res1, 0, dst.ptr + dst_stride_y); + vstore8(ACTIVATION_FUNC(res1), 0, dst.ptr + dst_stride_y); #endif /* CONV_STRIDE_Y == 1 */ } |