From 6a98a6e322bfb03f98ac9c4dfdc932ec4bea1fd7 Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Fri, 10 May 2019 17:07:27 +0100 Subject: COMPMID-2194: Refactor activation function macro in OpenCL. Change all activation calls to macro from activation_float_helpers.h The different kernels now call the macro from activation_float_helpers.h. activation_helpers.h is now removed. Change-Id: I2e1314c6bc891809e88590d99e048072541cca14 Signed-off-by: Usama Arif Reviewed-on: https://review.mlplatform.org/c/1123 Comments-Addressed: Arm Jenkins Reviewed-by: Gian Marco Iodice Tested-by: Arm Jenkins --- arm_compute/core/Types.h | 5 +- src/core/CL/cl_kernels/activation_float_helpers.h | 6 +- src/core/CL/cl_kernels/activation_helpers.h | 99 ---------------------- src/core/CL/cl_kernels/batchnormalization_layer.cl | 24 +++--- src/core/CL/cl_kernels/depthwise_convolution.cl | 71 +++++++--------- .../cl_kernels/depthwise_convolution_quantized.cl | 8 +- .../CL/cl_kernels/winograd_output_transform.cl | 87 ++++++++++--------- src/core/CL/cl_kernels/yolo_layer.cl | 22 ++--- .../CL/kernels/CLBatchNormalizationLayerKernel.cpp | 5 +- .../CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp | 3 +- .../CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp | 3 +- .../CL/kernels/CLWinogradOutputTransformKernel.cpp | 4 +- src/core/CL/kernels/CLYOLOLayerKernel.cpp | 4 +- src/core/Utils.cpp | 1 + tests/validation/CL/Winograd.cpp | 13 +-- utils/TypePrinter.h | 3 + 16 files changed, 125 insertions(+), 233 deletions(-) delete mode 100644 src/core/CL/cl_kernels/activation_helpers.h diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 99ec44d5e0..c6c2728407 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -1479,7 +1479,8 @@ public: ABS, /**< Absolute ( \f$ f(x)= |x| \f$ ) */ SQUARE, /**< Square ( \f$ f(x)= x^2 \f$ )*/ SQRT, /**< Square root ( \f$ f(x) = \sqrt{x} \f$ )*/ - LINEAR /**< Linear ( \f$ f(x)= ax + b \f$ ) */ + LINEAR, /**< Linear ( \f$ f(x)= ax + b \f$ ) */ + IDENTITY /**< Identity ( \f$ f(x)= x \f$ ) */ }; ActivationLayerInfo() = default; @@ -1516,7 +1517,7 @@ public: } private: - ActivationFunction _act = { ActivationLayerInfo::ActivationFunction::LOGISTIC }; + ActivationFunction _act = { ActivationLayerInfo::ActivationFunction::IDENTITY }; float _a = {}; float _b = {}; bool _enabled = { false }; diff --git a/src/core/CL/cl_kernels/activation_float_helpers.h b/src/core/CL/cl_kernels/activation_float_helpers.h index fefbcab5df..2efd2699d7 100644 --- a/src/core/CL/cl_kernels/activation_float_helpers.h +++ b/src/core/CL/cl_kernels/activation_float_helpers.h @@ -63,8 +63,8 @@ #define linear_op(DATA_TYPE, x, A_VAL, B_VAL) (MLA((DATA_TYPE)B_VAL, (DATA_TYPE)A_VAL, x)) // Identity Activation -#define _op(DATA_TYPE, x, A_VAL, B_VAL) (x) +#define identity_op(DATA_TYPE, x, A_VAL, B_VAL) (x) -#define OP(op, DATA_TYPE, x, A_VAL, B_VAL) op##_op(x, DATA_TYPE, A_VAL, B_VAL) +#define OP(op, DATA_TYPE, x, A_VAL, B_VAL) op##_op(DATA_TYPE, x, A_VAL, B_VAL) -#define ACTIVATION(op, DATA_TYPE, x, A_VAL, B_VAL) OP(op, x, DATA_TYPE, A_VAL, B_VAL) +#define ACTIVATION(op, DATA_TYPE, x, A_VAL, B_VAL) OP(op, DATA_TYPE, x, A_VAL, B_VAL) diff --git a/src/core/CL/cl_kernels/activation_helpers.h b/src/core/CL/cl_kernels/activation_helpers.h deleted file mode 100644 index 9d4af8497a..0000000000 --- a/src/core/CL/cl_kernels/activation_helpers.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "helpers.h" - -#if defined(TYPE) && defined(SELECT_TYPE) - -#define CONST_ONE 1.f -#define ABS_OP(a) fabs((a)) -#define ADD_OP(a, b) ((a) + (b)) -#define SUB_OP(a, b) ((a) - (b)) -#define MUL_OP(a, b) ((a) * (b)) -#define MLA_OP(a, b, c) ((b) * (c) + (a)) -#define DIV_OP(a, b) ((a) / (b)) -#define EXP_OP(a) exp((a)) -#define LOG_OP(a) log((a)) -#define SQRT_OP(a) sqrt((a)) -#define TANH_OP(a) tanh((a)) - -// Logistic Activation -inline TYPE logistic_op(TYPE x) -{ - return DIV_OP((TYPE)CONST_ONE, ADD_OP((TYPE)CONST_ONE, EXP_OP(-x))); -} -// Hyperbolic Tangent Activation -inline TYPE tanh_op(TYPE x) -{ - return MUL_OP((TYPE)A_VAL, TANH_OP(MUL_OP((TYPE)B_VAL, x))); -} -// RELU Tangent Activation -inline TYPE relu_op(TYPE x) -{ - return max((TYPE)0, x); -} -// Bounded RELU Activation -inline TYPE brelu_op(TYPE x) -{ - return min((TYPE)A_VAL, max((TYPE)0, x)); -} -// Lower Upper Bounded RELU Activation -inline TYPE lu_brelu_op(TYPE x) -{ - return min(max(x, (TYPE)B_VAL), (TYPE)A_VAL); -} -// Leaky RELU Activation -inline TYPE lrelu_op(TYPE x) -{ - return select(MUL_OP((TYPE)A_VAL, x), x, CONVERT(x > (TYPE)0, SELECT_TYPE)); -} -// Soft RELU Activation -inline TYPE srelu_op(TYPE x) -{ - return CONVERT(LOG_OP(ADD_OP((VEC_DATA_TYPE(float, VEC_SIZE))CONST_ONE, EXP_OP(CONVERT(x, VEC_DATA_TYPE(float, VEC_SIZE))))), TYPE); -} -// Absolute Activation -inline TYPE abs_op(TYPE x) -{ - return ABS_OP(x); -} -// Square Activation -inline TYPE square_op(TYPE x) -{ - return MUL_OP(x, x); -} -// Square-root Activation -inline TYPE sqrt_op(TYPE x) -{ - return SQRT_OP(x); -} -// Linear Activation -inline TYPE linear_op(TYPE x) -{ - return MLA_OP((TYPE)B_VAL, (TYPE)A_VAL, x); -} - -#define ACTIVATION_OP2(op, x) op##_op(x) -#define ACTIVATION_OP(op, x) ACTIVATION_OP2(op, x) - -#endif // defined(TYPE) && defined(SELECT_TYPE) \ No newline at end of file diff --git a/src/core/CL/cl_kernels/batchnormalization_layer.cl b/src/core/CL/cl_kernels/batchnormalization_layer.cl index 0bb6cd7c5f..66d371c02f 100644 --- a/src/core/CL/cl_kernels/batchnormalization_layer.cl +++ b/src/core/CL/cl_kernels/batchnormalization_layer.cl @@ -29,18 +29,13 @@ #define INVSQRT_OP(a) rsqrt((a)) #define SQCVT_SAT(a) (a) -#if defined(VEC_SIZE) && defined(DATA_TYPE) - -#if defined(FUSED_ACTIVATION) -#define SELECT_TYPE VEC_DATA_TYPE(SELECT_DATA_TYPE, VEC_SIZE) -#define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) -#include "activation_helpers.h" -#define ACTIVATION_FUNC(x) ACTIVATION_OP(FUSED_ACTIVATION, x) -#else /* defined(FUSED_ACTIVATION) */ -#define ACTIVATION_FUNC(x) (x) -#endif /* defined(FUSED_ACTIVATION) */ +#if defined(VEC_SIZE) && defined(DATA_TYPE) && defined(ACTIVATION_TYPE) +#include "activation_float_helpers.h" /** Apply batch normalization. + * + * @note It is possible to select the activation function to apply using -DACTIVATION_TYPE e.g. -DACTIVATION_TYPE=relu + * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively * * @param[in] input_ptr Pointer to the first source tensor. Supported data types: F16/F32 * @param[in] input_stride_x Stride of the first source tensor in X dimension (in bytes) @@ -144,13 +139,16 @@ __kernel void batchnormalization_layer_nchw(TENSOR3D_DECLARATION(input), res = ADD_OP(res, beta_vec); #endif /* USE_DEFAULT_BETA */ - res = ACTIVATION_FUNC(res); + res = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, res, A_VAL, B_VAL); VSTORE(VEC_SIZE) (res, 0, (__global DATA_TYPE *)out.ptr); } /** Apply batch normalization on tensors with NHWC format. + * + * @note It is possible to select the activation function to apply using -DACTIVATION_TYPE e.g. -DACTIVATION_TYPE=relu + * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively * * @param[in] input_ptr Pointer to the first source tensor. Supported data types: F16/F32 * @param[in] input_stride_x Stride of the first source tensor in X dimension (in bytes) @@ -254,12 +252,12 @@ __kernel void batchnormalization_layer_nhwc(TENSOR3D_DECLARATION(input), res = ADD_OP(res, beta_vec); #endif /* USE_DEFAULT_BETA */ - res = ACTIVATION_FUNC(res); + res = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, res, A_VAL, B_VAL); VSTORE(VEC_SIZE) (res, 0, (__global DATA_TYPE *)out.ptr); } -#endif /* defined(VEC_SIZE) && defined(DATA_TYPE) */ +#endif /* defined(VEC_SIZE) && defined(DATA_TYPE) && defined(DATA_TYPE)*/ #if defined(NUM_CHANNELS) && defined(DATA_TYPE) && defined(EPSILON) /** Fuse batchnorm parameters to convolution layer parameters diff --git a/src/core/CL/cl_kernels/depthwise_convolution.cl b/src/core/CL/cl_kernels/depthwise_convolution.cl index a8611af98e..c55a3d91c2 100644 --- a/src/core/CL/cl_kernels/depthwise_convolution.cl +++ b/src/core/CL/cl_kernels/depthwise_convolution.cl @@ -24,14 +24,7 @@ #include "helpers.h" -#if defined(FUSED_ACTIVATION) -#define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) -#define SELECT_TYPE VEC_DATA_TYPE(SELECT_DATA_TYPE, VEC_SIZE) -#include "activation_helpers.h" -#define ACTIVATION_FUNC(x) ACTIVATION_OP(FUSED_ACTIVATION, x) -#else /* defined(FUSED_ACTIVATION) */ -#define ACTIVATION_FUNC(x) (x) -#endif /* defined(FUSED_ACTIVATION) */ +#include "activation_float_helpers.h" /** Get the pointer position at a certain offset in x and y direction. * @@ -302,6 +295,9 @@ inline float2 convolution3x3( } /** This OpenCL kernel computes the depthwise convolution 3x3 + * + * @note It is possible to select the activation function to apply using -DACTIVATION_TYPE e.g. -DACTIVATION_TYPE=relu + * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively * * @param[in] src_ptr Pointer to the source tensor. Supported data types: F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) @@ -368,7 +364,7 @@ __kernel void depthwise_convolution_3x3( pixels += (float2)(*((__global float *)(biases.ptr + channel * biases_stride_x))); #endif //defined(HAS_BIAS) - vstore2(ACTIVATION_FUNC(pixels), 0, (__global float *)dst.ptr); + vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels, A_VAL, B_VAL), 0, (__global float *)dst.ptr); } #endif //defined(CONV_STRIDE_X) @@ -455,11 +451,10 @@ inline float2 convolution_3x3_dilation_stridex2_stridey2_bifrost_f32(__global uc /** This OpenCL kernel is optimized for Bifrost architectures and computes the depthwise convolution 3x3 when both * stride_x and stride_y are equal to 1 * - * @note It is possible to select the activation function to apply using -DFUSED_ACTIVATION e.g. -DFUSED_ACTIVATION=relu + * @note It is possible to select the activation function to apply using -DACTIVATION_TYPE e.g. -DACTIVATION_TYPE=relu * @note If activation function is enabled, the data type must be passed at compile time using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types: float. * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size - * @note Select data type should be given too with -DSELECT_DATA_TYPE e.g -DSELECT_DATA_TYPE=float * * @param[in] src_ptr Pointer to the source tensor. Supported data types: F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) @@ -567,20 +562,19 @@ __kernel void depthwise_convolution_3x3_stridex1_stridey1_bifrost_f32( pixels3 += (float2)bias; #endif /* defined(HAS_BIAS) */ - vstore2(ACTIVATION_FUNC(pixels0), 0, (__global float *)(dst.ptr + 0 * dst_stride_y)); - vstore2(ACTIVATION_FUNC(pixels1), 0, (__global float *)(dst.ptr + 1 * dst_stride_y)); - vstore2(ACTIVATION_FUNC(pixels2), 0, (__global float *)(dst.ptr + 2 * dst_stride_y)); - vstore2(ACTIVATION_FUNC(pixels3), 0, (__global float *)(dst.ptr + 3 * dst_stride_y)); + vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels0, A_VAL, B_VAL), 0, (__global float *)(dst.ptr + 0 * dst_stride_y)); + vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels1, A_VAL, B_VAL), 0, (__global float *)(dst.ptr + 1 * dst_stride_y)); + vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels2, A_VAL, B_VAL), 0, (__global float *)(dst.ptr + 2 * dst_stride_y)); + vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels3, A_VAL, B_VAL), 0, (__global float *)(dst.ptr + 3 * dst_stride_y)); } /** This OpenCL kernel is optimized for Bifrost architectures and computes the depthwise convolution 3x3 when both * stride_x and stride_y are equal to 2 * - * @note It is possible to select the activation function to apply using -DFUSED_ACTIVATION e.g. -DFUSED_ACTIVATION=relu + * @note It is possible to select the activation function to apply using -DACTIVATION_TYPE e.g. -DACTIVATION_TYPE=relu * @note If activation function is enabled, the data type must be passed at compile time using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types: float. * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size - * @note Select data type should be given too with -DSELECT_DATA_TYPE e.g -DSELECT_DATA_TYPE=float * * @param[in] src_ptr Pointer to the source tensor. Supported data types: F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) @@ -678,8 +672,8 @@ __kernel void depthwise_convolution_3x3_stridex2_stridey2_bifrost_f32( pixels1 += (float2)bias; #endif /* defined(HAS_BIAS) */ - vstore2(ACTIVATION_FUNC(pixels0), 0, (__global float *)(dst.ptr + 0 * dst_stride_y)); - vstore2(ACTIVATION_FUNC(pixels1), 0, (__global float *)(dst.ptr + 1 * dst_stride_y)); + vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels0, A_VAL, B_VAL), 0, (__global float *)(dst.ptr + 0 * dst_stride_y)); + vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels1, A_VAL, B_VAL), 0, (__global float *)(dst.ptr + 1 * dst_stride_y)); } #endif // defined(DEPTH_MULTIPLIER) && defined(DST_CHANNELS) && defined(IS_F32) @@ -1182,11 +1176,10 @@ inline half4 convolution3x3_f16( /** This OpenCL kernel computes the depthwise convolution 3x3 * - * @note It is possible to select the activation function to apply using -DFUSED_ACTIVATION e.g. -DFUSED_ACTIVATION=relu + * @note It is possible to select the activation function to apply using -DACTIVATION_TYPE e.g. -DACTIVATION_TYPE=relu * @note If activation function is enabled, the data type must be passed at compile time using -DDATA_TYPE e.g. -DDATA_TYPE=half. Supported data types: half. * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size - * @note Select data type should be given too with -DSELECT_DATA_TYPE e.g -DSELECT_DATA_TYPE=half * * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) @@ -1253,7 +1246,7 @@ __kernel void depthwise_convolution_3x3_f16( pixels += (half4)(*((__global half *)(biases.ptr + channel * biases_stride_x))); #endif //defined(HAS_BIAS) - vstore4(ACTIVATION_FUNC(pixels), 0, (__global half *)dst.ptr); + vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels, A_VAL, B_VAL), 0, (__global half *)dst.ptr); } #endif // defined(DEPTH_MULTIPLIER) #endif // defined(CONV_STRIDE_X) @@ -1261,11 +1254,10 @@ __kernel void depthwise_convolution_3x3_f16( /** This OpenCL kernel is optimized for Bifrost architectures and computes the 16bit floating point depthwise convolution 3x3 * when both stride_x and stride_y are equal to 1 * - * @note It is possible to select the activation function to apply using -DFUSED_ACTIVATION e.g. -DFUSED_ACTIVATION=relu + * @note It is possible to select the activation function to apply using -DACTIVATION_TYPE e.g. -DACTIVATION_TYPE=relu * @note If activation function is enabled, the data type must be passed at compile time using -DDATA_TYPE e.g. -DDATA_TYPE=half. Supported data types: half. * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size - * @note Select data type should be given too with -DSELECT_DATA_TYPE e.g -DSELECT_DATA_TYPE=half * * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) @@ -1376,20 +1368,19 @@ __kernel void depthwise_convolution_3x3_stridex1_stridey1_bifrost_f16( pixels3 += (half4)bias; #endif /* defined(HAS_BIAS) */ - vstore4(ACTIVATION_FUNC(pixels0), 0, (__global half *)(dst.ptr + 0 * dst_stride_y)); - vstore4(ACTIVATION_FUNC(pixels1), 0, (__global half *)(dst.ptr + 1 * dst_stride_y)); - vstore4(ACTIVATION_FUNC(pixels2), 0, (__global half *)(dst.ptr + 2 * dst_stride_y)); - vstore4(ACTIVATION_FUNC(pixels3), 0, (__global half *)(dst.ptr + 3 * dst_stride_y)); + vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels0, A_VAL, B_VAL), 0, (__global half *)(dst.ptr + 0 * dst_stride_y)); + vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels1, A_VAL, B_VAL), 0, (__global half *)(dst.ptr + 1 * dst_stride_y)); + vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels2, A_VAL, B_VAL), 0, (__global half *)(dst.ptr + 2 * dst_stride_y)); + vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels3, A_VAL, B_VAL), 0, (__global half *)(dst.ptr + 3 * dst_stride_y)); } /** This OpenCL kernel is optimized for Bifrost architectures and computes 16bit floating point the depthwise convolution 3x3 * when both stride_x and stride_y are equal to 2 * - * @note It is possible to select the activation function to apply using -DFUSED_ACTIVATION e.g. -DFUSED_ACTIVATION=relu + * @note It is possible to select the activation function to apply using -DACTIVATION_TYPE e.g. -DACTIVATION_TYPE=relu * @note If activation function is enabled, the data type must be passed at compile time using -DDATA_TYPE e.g. -DDATA_TYPE=half. Supported data types: half. * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size - * @note Select data type should be given too with -DSELECT_DATA_TYPE e.g -DSELECT_DATA_TYPE=half * * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) @@ -1489,8 +1480,8 @@ __kernel void depthwise_convolution_3x3_stridex2_stridey2_bifrost_f16( pixels1 += (half4)bias; #endif /* defined(HAS_BIAS) */ - vstore4(ACTIVATION_FUNC(pixels0), 0, (__global half *)(dst.ptr + 0 * dst_stride_y)); - vstore4(ACTIVATION_FUNC(pixels1), 0, (__global half *)(dst.ptr + 1 * dst_stride_y)); + vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels0, A_VAL, B_VAL), 0, (__global half *)(dst.ptr + 0 * dst_stride_y)); + vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels1, A_VAL, B_VAL), 0, (__global half *)(dst.ptr + 1 * dst_stride_y)); } #endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(DEPTH_MULTIPLIER) && defined(DST_CHANNELS) && defined(IS_F16) @@ -1512,10 +1503,9 @@ __kernel void depthwise_convolution_3x3_stridex2_stridey2_bifrost_f16( * @note The convolution pad top must be passed at compile time using -DCONV_PAD_LEFT (e.g. -DCONV_PAD_LEFT=1) * @note The convolution stride along the width must be passed at compile time using -DCONV_STRIDE_X (e.g. -DCONV_STRIDE_Y=X) * @note The convolution stride along the height must be passed at compile time using -DCONV_STRIDE_Y (e.g. -DCONV_STRIDE_Y=1) - * @note It is possible to select the activation function to apply using -DFUSED_ACTIVATION e.g. -DFUSED_ACTIVATION=relu + * @note It is possible to select the activation function to apply using -DACTIVATION_TYPE e.g. -DACTIVATION_TYPE=relu * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size - * @note Select data type should be given too with -DSELECT_DATA_TYPE e.g -DSELECT_DATA_TYPE=half * * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) @@ -1652,7 +1642,7 @@ __kernel void depthwise_convolution_3x3_nhwc( #endif /* defined(DST_DEPTH) */ VSTORE(VEC_SIZE) - (ACTIVATION_FUNC(acc), 0, (__global DATA_TYPE *)(dst_addr)); + (ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, acc, A_VAL, B_VAL), 0, (__global DATA_TYPE *)(dst_addr)); } #endif // defined(CONV_STRIDE_X) && defined(CONV_STRIDE_Y) @@ -1666,10 +1656,9 @@ __kernel void depthwise_convolution_3x3_nhwc( * @note The number of planes processed per thread must be passed at compile time using -DNUM_PLANES_PROCESSED (i.e. -DNUM_PLANES_PROCESSED=2) * @note The convolution pad top must be passed at compile time using -DCONV_PAD_TOP (e.g. -DCONV_PAD_TOP=1) * @note The convolution pad top must be passed at compile time using -DCONV_PAD_LEFT (e.g. -DCONV_PAD_LEFT=1) - * @note It is possible to select the activation function to apply using -DFUSED_ACTIVATION e.g. -DFUSED_ACTIVATION=relu + * @note It is possible to select the activation function to apply using -DACTIVATION_TYPE e.g. -DACTIVATION_TYPE=relu * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size - * @note Select data type should be given too with -DSELECT_DATA_TYPE e.g -DSELECT_DATA_TYPE=half * * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) @@ -1857,18 +1846,18 @@ __kernel void depthwise_convolution_3x3_nhwc_stride1( #endif /* defined(DST_DEPTH) */ VSTORE(VEC_SIZE) - (ACTIVATION_FUNC(acc0), 0, (__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y)); + (ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, acc0, A_VAL, B_VAL), 0, (__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y)); VSTORE(VEC_SIZE) - (ACTIVATION_FUNC(acc1), 0, (__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y)); + (ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, acc1, A_VAL, B_VAL), 0, (__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y)); #if((DST_DIM_2 % NUM_PLANES_PROCESSED) != 0) if((z * NUM_PLANES_PROCESSED + 1) < DST_DIM_2) #endif // ((DST_DIM_2 % NUM_PLANES_PROCESSED) != 0) { VSTORE(VEC_SIZE) - (ACTIVATION_FUNC(acc2), 0, (__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y + 1 * dst_stride_z)); + (ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, acc2, A_VAL, B_VAL), 0, (__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y + 1 * dst_stride_z)); VSTORE(VEC_SIZE) - (ACTIVATION_FUNC(acc3), 0, (__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y + 1 * dst_stride_z)); + (ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, acc3, A_VAL, B_VAL), 0, (__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y + 1 * dst_stride_z)); } } diff --git a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl index 8d145a038e..13568b035d 100644 --- a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl +++ b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl @@ -26,16 +26,16 @@ #if defined(WEIGHTS_OFFSET) && defined(INPUT_OFFSET) && defined(K_OFFSET) && ((defined(OUTPUT_OFFSET) && defined(OUTPUT_MULTIPLIER) && defined(OUTPUT_SHIFT)) || defined(REAL_MULTIPLIER)) -#if defined(FUSED_ACTIVATION) +#if defined(ACTIVATION_TYPE) && defined(CONST_0) #define DATA_TYPE uchar #ifndef VEC_SIZE #define VEC_SIZE 8 #endif /* VEC_SIZE */ #include "activation_layer_qa8.cl" -#define ACTIVATION_FUNC(x) PERFORM_ACTIVATION_QA8(FUSED_ACTIVATION, x) -#else /* defined(FUSED_ACTIVATION) */ +#define ACTIVATION_FUNC(x) PERFORM_ACTIVATION_QA8(ACTIVATION_TYPE, x) +#else /* defined(ACTIVATION_TYPE) && defined(CONST_0) */ #define ACTIVATION_FUNC(x) (x) -#endif /* defined(FUSED_ACTIVATION) */ +#endif /* defined(ACTIVATION_TYPE) && defined(CONST_0) */ #if defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8) #if defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && defined(cl_arm_integer_dot_product_accumulate_int8) diff --git a/src/core/CL/cl_kernels/winograd_output_transform.cl b/src/core/CL/cl_kernels/winograd_output_transform.cl index f24bf06c50..8140cad0fb 100644 --- a/src/core/CL/cl_kernels/winograd_output_transform.cl +++ b/src/core/CL/cl_kernels/winograd_output_transform.cl @@ -23,14 +23,7 @@ */ #include "helpers.h" -#if defined(FUSED_ACTIVATION) -#define SELECT_TYPE VEC_DATA_TYPE(SELECT_DATA_TYPE, VEC_SIZE) -#define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) -#include "activation_helpers.h" -#define ACTIVATION_FUNC(x) ACTIVATION_OP(FUSED_ACTIVATION, x) -#else /* defined(FUSED_ACTIVATION) */ -#define ACTIVATION_FUNC(x) (x) -#endif /* defined(FUSED_ACTIVATION) */ +#include "activation_float_helpers.h" #if defined(NUM_TILES_X) && defined(OUTPUT_TILE_W) && defined(OUTPUT_TILE_H) #if defined(VEC_SIZE) && VEC_SIZE == 2 @@ -42,10 +35,9 @@ * @note If this kernel is used to perform Winograd output transform 3x1, -DWINOGRAD_OUTPUT_TRANSFORM_HORIZONTAL has to be passed at compile time * @note If this kernel is used to perform Winograd output transform 1x3, -DWINOGRAD_OUTPUT_TRANSFORM_VERTICAL has to be passed at compile time * @note The data type must be passed at compile time using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types: float/half. - * @note It is possible to select the activation function to apply using -DFUSED_ACTIVATION e.g. -DFUSED_ACTIVATION=relu + * @note It is possible to select the activation function to apply using -DACTIVATION_TYPE e.g. -DACTIVATION_TYPE=relu * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively. * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. Accepted values are -DVEC_SIZE=2 (for output_tile_size 2x2, 2x1, 1x2) and -DVEC_SIZE=4 (for output_tile_size 4x4, 4x1, 1x4) - * @note Select data type should be given too with -DSELECT_DATA_TYPE e.g -DSELECT_DATA_TYPE=int * * @param[in] src_ptr Pointer to the source tensor. Supported data types: F32/F16 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) @@ -166,11 +158,12 @@ __kernel void winograd_output_transform_2x2_3x3_nchw( // Store the output tile #if defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) const const VEC_DATA_TYPE(DATA_TYPE, 2) - out0_dt = ACTIVATION_FUNC(CONVERT((VEC_DATA_TYPE(float, 2))(out00, out01), VEC_DATA_TYPE(DATA_TYPE, 2))); + out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 2))(out00, out01), VEC_DATA_TYPE(DATA_TYPE, 2)), A_VAL, B_VAL); *((__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y)) = out0_dt.s0; *((__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y)) = out0_dt.s1; #else // defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) - vstore2(ACTIVATION_FUNC(CONVERT((VEC_DATA_TYPE(float, 2))(out00, out01), VEC_DATA_TYPE(DATA_TYPE, 2))), 0, (__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y)); + vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 2))(out00, out01), VEC_DATA_TYPE(DATA_TYPE, 2)), A_VAL, B_VAL), 0, + (__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y)); #endif // defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) #if !defined(WINOGRAD_OUTPUT_TRANSFORM_HORIZONTAL) && !defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) @@ -179,7 +172,8 @@ __kernel void winograd_output_transform_2x2_3x3_nchw( out10 += (DATA_TYPE)b; out11 += (DATA_TYPE)b; #endif // defined(HAS_BIAS) - vstore2(ACTIVATION_FUNC(CONVERT((VEC_DATA_TYPE(float, 2))(out10, out11), VEC_DATA_TYPE(DATA_TYPE, 2))), 0, (__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y)); + vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 2))(out10, out11), VEC_DATA_TYPE(DATA_TYPE, 2)), A_VAL, B_VAL), 0, + (__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y)); #endif // !defined(WINOGRAD_OUTPUT_TRANSFORM_HORIZONTAL) && !defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) } @@ -287,14 +281,14 @@ __kernel void winograd_output_transform_2x2_7x7_nhwc( offset = min(offset + (int2)(0, 1) * (int2)dst_stride_z, (int2)dst_size); // If address is beyond the last plane, clamp it to dst_size (which points to the last padding). VEC_DATA_TYPE(DATA_TYPE, 2) - out0_dt = ACTIVATION_FUNC(CONVERT((VEC_DATA_TYPE(float, 2))(out00, out01), VEC_DATA_TYPE(DATA_TYPE, 2))); + out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 2))(out00, out01), VEC_DATA_TYPE(DATA_TYPE, 2)), A_VAL, B_VAL); *(__global DATA_TYPE *)(dst_ptr + offset.s0) = out0_dt.s0; *(__global DATA_TYPE *)(dst_ptr + offset.s1) = out0_dt.s1; #else // defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) // Get output address int offset = dst_offset_first_element_in_bytes + x_out * sizeof(DATA_TYPE) + y_out * dst_stride_y + z_out * dst_stride_z; VEC_DATA_TYPE(DATA_TYPE, 2) - out0_dt = ACTIVATION_FUNC(CONVERT((VEC_DATA_TYPE(float, 2))(out00, out01), VEC_DATA_TYPE(DATA_TYPE, 2))); + out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 2))(out00, out01), VEC_DATA_TYPE(DATA_TYPE, 2)), A_VAL, B_VAL); *(__global DATA_TYPE *)(dst_ptr + 0 * dst_stride_y + offset) = out0_dt.s0; *(__global DATA_TYPE *)(dst_ptr + 1 * dst_stride_y + offset) = out0_dt.s1; #endif // defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) @@ -404,9 +398,9 @@ __kernel void winograd_output_transform_2x2_7x7_nhwc( // Store the output tile VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) - out_col0_dt = ACTIVATION_FUNC(CONVERT(out_col0, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))); + out_col0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT(out_col0, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), A_VAL, B_VAL); VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) - out_col1_dt = ACTIVATION_FUNC(CONVERT(out_col1, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))); + out_col1_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT(out_col1, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), A_VAL, B_VAL); *(__global DATA_TYPE *)(dst_ptr + mult_y.s0 * 0 * (int)dst_stride_y + offset.s0) = out_col0_dt.s0; *(__global DATA_TYPE *)(dst_ptr + mult_y.s0 * 1 * (int)dst_stride_y + offset.s0) = out_col1_dt.s0; @@ -605,13 +599,14 @@ __kernel void winograd_output_transform_4x4_3x3_nchw( // Store the output tile #if defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) VEC_DATA_TYPE(DATA_TYPE, 4) - out0_dt = ACTIVATION_FUNC(CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4))); + out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL); *((__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y)) = out0_dt.s0; *((__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y)) = out0_dt.s1; *((__global DATA_TYPE *)(dst_addr + 2 * dst_stride_y)) = out0_dt.s2; *((__global DATA_TYPE *)(dst_addr + 3 * dst_stride_y)) = out0_dt.s3; #else // defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) - vstore4(ACTIVATION_FUNC(CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4))), 0, (__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y)); + vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL), 0, + (__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y)); #endif // defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) #if !defined(WINOGRAD_OUTPUT_TRANSFORM_HORIZONTAL) && !defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) @@ -632,9 +627,12 @@ __kernel void winograd_output_transform_4x4_3x3_nchw( out32 += (float)b; out33 += (float)b; #endif // defined(HAS_BIAS) - vstore4(ACTIVATION_FUNC(CONVERT((VEC_DATA_TYPE(float, 4))(out10, out11, out12, out13), VEC_DATA_TYPE(DATA_TYPE, 4))), 0, (__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y)); - vstore4(ACTIVATION_FUNC(CONVERT((VEC_DATA_TYPE(float, 4))(out20, out21, out22, out23), VEC_DATA_TYPE(DATA_TYPE, 4))), 0, (__global DATA_TYPE *)(dst_addr + 2 * dst_stride_y)); - vstore4(ACTIVATION_FUNC(CONVERT((VEC_DATA_TYPE(float, 4))(out30, out31, out32, out33), VEC_DATA_TYPE(DATA_TYPE, 4))), 0, (__global DATA_TYPE *)(dst_addr + 3 * dst_stride_y)); + vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out10, out11, out12, out13), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL), 0, + (__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y)); + vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out20, out21, out22, out23), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL), 0, + (__global DATA_TYPE *)(dst_addr + 2 * dst_stride_y)); + vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out30, out31, out32, out33), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL), 0, + (__global DATA_TYPE *)(dst_addr + 3 * dst_stride_y)); #endif // !defined(WINOGRAD_OUTPUT_TRANSFORM_HORIZONTAL) && !defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) } @@ -841,7 +839,7 @@ __kernel void winograd_output_transform_4x4_3x3_nhwc( // Store the 1x4 output tile VEC_DATA_TYPE(DATA_TYPE, 4) - out0_dt = ACTIVATION_FUNC(CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4))); + out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL); *((__global DATA_TYPE *)(dst_ptr + offset.s0)) = out0_dt.s0; *((__global DATA_TYPE *)(dst_ptr + offset.s1)) = out0_dt.s1; *((__global DATA_TYPE *)(dst_ptr + offset.s2)) = out0_dt.s2; @@ -852,7 +850,8 @@ __kernel void winograd_output_transform_4x4_3x3_nhwc( int mult_y = min(dst_size - offset, 1); VEC_DATA_TYPE(DATA_TYPE, 4) - out0_dt = ACTIVATION_FUNC(CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4))); + out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), + A_VAL, B_VAL); *((__global DATA_TYPE *)(dst_ptr + mult_y * 0 * dst_stride_y + offset)) = out0_dt.s0; *((__global DATA_TYPE *)(dst_ptr + mult_y * 1 * dst_stride_y + offset)) = out0_dt.s1; *((__global DATA_TYPE *)(dst_ptr + mult_y * 2 * dst_stride_y + offset)) = out0_dt.s2; @@ -869,13 +868,15 @@ __kernel void winograd_output_transform_4x4_3x3_nhwc( // Store the 4x4 output tile VEC_DATA_TYPE(DATA_TYPE, 4) - out0_dt = ACTIVATION_FUNC(CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4))); + out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL); VEC_DATA_TYPE(DATA_TYPE, 4) - out1_dt = ACTIVATION_FUNC(CONVERT((VEC_DATA_TYPE(float, 4))(out10, out11, out12, out13), VEC_DATA_TYPE(DATA_TYPE, 4))); + out1_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out10, out11, out12, out13), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL); VEC_DATA_TYPE(DATA_TYPE, 4) - out2_dt = ACTIVATION_FUNC(CONVERT((VEC_DATA_TYPE(float, 4))(out20, out21, out22, out23), VEC_DATA_TYPE(DATA_TYPE, 4))); + out2_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out20, out21, out22, out23), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL); VEC_DATA_TYPE(DATA_TYPE, 4) - out3_dt = ACTIVATION_FUNC(CONVERT((VEC_DATA_TYPE(float, 4))(out30, out31, out32, out33), VEC_DATA_TYPE(DATA_TYPE, 4))); + out3_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out30, out31, out32, out33), + VEC_DATA_TYPE(DATA_TYPE, 4)), + A_VAL, B_VAL); *((__global DATA_TYPE *)(dst_ptr + mult_y.s0 * 0 * dst_stride_y + offset.s0)) = out0_dt.s0; *((__global DATA_TYPE *)(dst_ptr + mult_y.s0 * 1 * dst_stride_y + offset.s0)) = out0_dt.s1; *((__global DATA_TYPE *)(dst_ptr + mult_y.s0 * 2 * dst_stride_y + offset.s0)) = out0_dt.s2; @@ -1010,13 +1011,14 @@ __kernel void winograd_output_transform_4x4_5x5_nchw( // Store the output tile #if defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) VEC_DATA_TYPE(DATA_TYPE, 4) - out0_dt = ACTIVATION_FUNC(CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4))); + out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL); *((__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y)) = out0_dt.s0; *((__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y)) = out0_dt.s1; *((__global DATA_TYPE *)(dst_addr + 2 * dst_stride_y)) = out0_dt.s2; *((__global DATA_TYPE *)(dst_addr + 3 * dst_stride_y)) = out0_dt.s3; #else // defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) - vstore4(ACTIVATION_FUNC(CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4))), 0, (__global DATA_TYPE *)(dst_addr)); + vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL), 0, + (__global DATA_TYPE *)(dst_addr)); #endif // defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) #else // defined(WINOGRAD_OUTPUT_TRANSFORM_HORIZONTAL) || defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) @@ -1131,10 +1133,14 @@ __kernel void winograd_output_transform_4x4_5x5_nchw( #endif // defined(HAS_BIAS) // Store the output tile - vstore4(ACTIVATION_FUNC((VEC_DATA_TYPE(DATA_TYPE, 4))(out_col0.s0, out_col1.s0, out_col2.s0, out_col3.s0)), 0, (__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y)); - vstore4(ACTIVATION_FUNC((VEC_DATA_TYPE(DATA_TYPE, 4))(out_col0.s1, out_col1.s1, out_col2.s1, out_col3.s1)), 0, (__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y)); - vstore4(ACTIVATION_FUNC((VEC_DATA_TYPE(DATA_TYPE, 4))(out_col0.s2, out_col1.s2, out_col2.s2, out_col3.s2)), 0, (__global DATA_TYPE *)(dst_addr + 2 * dst_stride_y)); - vstore4(ACTIVATION_FUNC((VEC_DATA_TYPE(DATA_TYPE, 4))(out_col0.s3, out_col1.s3, out_col2.s3, out_col3.s3)), 0, (__global DATA_TYPE *)(dst_addr + 3 * dst_stride_y)); + vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, (VEC_DATA_TYPE(DATA_TYPE, 4))(out_col0.s0, out_col1.s0, out_col2.s0, out_col3.s0), A_VAL, B_VAL), 0, + (__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y)); + vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, (VEC_DATA_TYPE(DATA_TYPE, 4))(out_col0.s1, out_col1.s1, out_col2.s1, out_col3.s1), A_VAL, B_VAL), 0, + (__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y)); + vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, (VEC_DATA_TYPE(DATA_TYPE, 4))(out_col0.s2, out_col1.s2, out_col2.s2, out_col3.s2), A_VAL, B_VAL), 0, + (__global DATA_TYPE *)(dst_addr + 2 * dst_stride_y)); + vstore4(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, (VEC_DATA_TYPE(DATA_TYPE, 4))(out_col0.s3, out_col1.s3, out_col2.s3, out_col3.s3), A_VAL, B_VAL), 0, + (__global DATA_TYPE *)(dst_addr + 3 * dst_stride_y)); #endif // !defined(WINOGRAD_OUTPUT_TRANSFORM_HORIZONTAL) && !defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) } @@ -1233,7 +1239,7 @@ __kernel void winograd_output_transform_4x4_5x5_nhwc( offset = min(offset + (int4)(0, 1, 2, 3) * (int4)dst_stride_z, (int4)dst_size); // If address is beyond the last plane, clamp it to dst_size (which points to the last padding). VEC_DATA_TYPE(DATA_TYPE, 4) - out0_dt = ACTIVATION_FUNC(CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4))); + out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, B_VAL); *(__global DATA_TYPE *)(dst_ptr + offset.s0) = out0_dt.s0; *(__global DATA_TYPE *)(dst_ptr + offset.s1) = out0_dt.s1; *(__global DATA_TYPE *)(dst_ptr + offset.s2) = out0_dt.s2; @@ -1242,7 +1248,8 @@ __kernel void winograd_output_transform_4x4_5x5_nhwc( // Get output address int offset = dst_offset_first_element_in_bytes + x_out * sizeof(DATA_TYPE) + y_out * dst_stride_y + z_out * dst_stride_z; VEC_DATA_TYPE(DATA_TYPE, 4) - out0_dt = ACTIVATION_FUNC(CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4))); + out0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT((VEC_DATA_TYPE(float, 4))(out00, out01, out02, out03), VEC_DATA_TYPE(DATA_TYPE, 4)), A_VAL, + B_VAL); *(__global DATA_TYPE *)(dst_ptr + 0 * dst_stride_y + offset) = out0_dt.s0; *(__global DATA_TYPE *)(dst_ptr + 1 * dst_stride_y + offset) = out0_dt.s1; *(__global DATA_TYPE *)(dst_ptr + 2 * dst_stride_y + offset) = out0_dt.s2; @@ -1370,13 +1377,13 @@ __kernel void winograd_output_transform_4x4_5x5_nhwc( // Store the output tile VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) - out_col0_dt = ACTIVATION_FUNC(CONVERT(out_col0, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))); + out_col0_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT(out_col0, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), A_VAL, B_VAL); VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) - out_col1_dt = ACTIVATION_FUNC(CONVERT(out_col1, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))); + out_col1_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT(out_col1, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), A_VAL, B_VAL); VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) - out_col2_dt = ACTIVATION_FUNC(CONVERT(out_col2, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))); + out_col2_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT(out_col2, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), A_VAL, B_VAL); VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) - out_col3_dt = ACTIVATION_FUNC(CONVERT(out_col3, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))); + out_col3_dt = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, CONVERT(out_col3, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), A_VAL, B_VAL); *(__global DATA_TYPE *)(dst_ptr + mult_y.s0 * 0 * (int)dst_stride_y + offset.s0) = out_col0_dt.s0; *(__global DATA_TYPE *)(dst_ptr + mult_y.s0 * 1 * (int)dst_stride_y + offset.s0) = out_col1_dt.s0; diff --git a/src/core/CL/cl_kernels/yolo_layer.cl b/src/core/CL/cl_kernels/yolo_layer.cl index 2240d7c637..e59396d54a 100644 --- a/src/core/CL/cl_kernels/yolo_layer.cl +++ b/src/core/CL/cl_kernels/yolo_layer.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -21,21 +21,21 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#if defined(DATA_TYPE) && defined(SELECT_DATA_TYPE) && defined(ACT) && defined(NUM_CLASSES) && defined(VEC_SIZE) +#if defined(DATA_TYPE) && defined(SELECT_DATA_TYPE) && defined(ACTIVATION_TYPE) && defined(NUM_CLASSES) && defined(VEC_SIZE) + +#include "activation_float_helpers.h" #if VEC_SIZE != 1 #define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) #define SELECT_TYPE VEC_DATA_TYPE(SELECT_DATA_TYPE, VEC_SIZE) -#include "activation_helpers.h" - /** This performs a YOLO partial activation function for NCHW data layout * * @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time * * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 - * @note Activation function should be given as a preprocessor argument using -DACT=name. e.g. -DACT=TANH + * @note Activation function should be given as a preprocessor argument using -DACTIVATION_TYPE=name. e.g. -DACTIVATION_TYPE=TANH * @note The number of classes should be given as a preprocessor argument using -DNUM_CLASSES=num. e.g. -DNUM_CLASSES=80 * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively. * @@ -79,7 +79,7 @@ __kernel void yolo_layer_nchw( { // Load data TYPE data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr); - data = ACTIVATION_OP(ACT, data); // select(1.0f, ACTIVATION_OP(ACT, data), (SELECT_TYPE)activate); + data = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, data, A_VAL, B_VAL); // select(1.0f, ACTIVATION_OP(ACTIVATION_TYPE, data), (SELECT_TYPE)activate); // Store result VSTORE(VEC_SIZE) @@ -100,18 +100,14 @@ __kernel void yolo_layer_nchw( #else // VEC_SIZE != 1 -#define TYPE DATA_TYPE #define SELECT_TYPE SELECT_DATA_TYPE - -#include "activation_helpers.h" - /** This performs a YOLO partial activation function for NCHW data layout * * @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time * * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=1 - * @note Activation function should be given as a preprocessor argument using -DACT=name. e.g. -DACT=TANH + * @note Activation function should be given as a preprocessor argument using -DACTIVATION_TYPE=name. e.g. -DACTIVATION_TYPE=TANH * @note The number of classes should be given as a preprocessor argument using -DNUM_CLASSES=num. e.g. -DNUM_CLASSES=80 * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively. * @@ -155,7 +151,7 @@ __kernel void yolo_layer_nhwc( { // Load data DATA_TYPE data = *((__global DATA_TYPE *)input.ptr); - data = select(data, ACTIVATION_OP(ACT, data), (SELECT_TYPE)activate); + data = select(data, ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, data, A_VAL, B_VAL), (SELECT_TYPE)activate); // Store result *((__global DATA_TYPE *)output.ptr) = data; @@ -173,4 +169,4 @@ __kernel void yolo_layer_nhwc( } #endif // VEC_SIZE != 1 -#endif // defined(DATA_TYPE) && defined(SELECT_DATA_TYPE) && defined(ACT) && defined(NUM_CLASSES) && defined(VEC_SIZE) \ No newline at end of file +#endif // defined(DATA_TYPE) && defined(SELECT_DATA_TYPE) && defined(ACTIVATION_TYPE) && defined(NUM_CLASSES) && defined(VEC_SIZE) diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp index 07bcb75a6a..f9b975392c 100644 --- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -159,9 +159,8 @@ void CLBatchNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor *out // Set build options CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.add_option("-DSELECT_DATA_TYPE=" + get_cl_select_type_from_data_type(input->info()->data_type())); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.add_option_if(act_info.enabled(), "-DFUSED_ACTIVATION=" + lower_string(string_from_activation_func(act_info.activation()))); + build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation()))); build_opts.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a())); build_opts.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b())); build_opts.add_option_if(_run_in_place, "-DIN_PLACE"); diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp index 02d8c6d9c2..cd25bb1e7f 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp @@ -241,7 +241,7 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, // Set build options CLBuildOptions build_opts; - build_opts.add_option_if(act_info.enabled(), "-DFUSED_ACTIVATION=" + lower_string(string_from_activation_func(act_info.activation()))); + build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation()))); build_opts.add_option("-DDST_CHANNELS=" + support::cpp11::to_string(_output->info()->tensor_shape().z())); build_opts.add_option("-DDEPTH_MULTIPLIER=" + support::cpp11::to_string(depth_multiplier)); build_opts.add_option("-DCONV_STRIDE_X=" + support::cpp11::to_string(_conv_stride_x)); @@ -283,7 +283,6 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, { build_opts.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a())); build_opts.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b())); - build_opts.add_option_if(act_info.enabled(), "-DSELECT_DATA_TYPE=" + get_cl_select_type_from_data_type(input->info()->data_type())); build_opts.add_option_if(act_info.enabled(), "-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(win_config.second.x().step())); } diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp index c31825cc2c..758e99b77e 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp @@ -202,7 +202,7 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, const unsigned int num_elems_accessed_per_iteration = is_qasymm ? 4 : (8 / input->info()->element_size()); CLBuildOptions build_opts; - build_opts.add_option_if(act_info.enabled(), "-DFUSED_ACTIVATION=" + lower_string(string_from_activation_func(act_info.activation()))); + build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation()))); build_opts.add_option_if(_biases != nullptr, "-DHAS_BIAS"); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_accessed_per_iteration)); build_opts.add_option("-DSRC_DIM_2=" + support::cpp11::to_string(_input->info()->dimension(2))); @@ -245,7 +245,6 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, { build_opts.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a())); build_opts.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b())); - build_opts.add_option_if(act_info.enabled(), "-DSELECT_DATA_TYPE=" + get_cl_select_type_from_data_type(input->info()->data_type())); build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(_input->info()->data_type())); } diff --git a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp index bf3a00d199..dcfcd880b1 100644 --- a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp @@ -172,7 +172,7 @@ void CLWinogradOutputTransformKernel::configure(const ICLTensor *input, const IC // Set build options CLBuildOptions build_opts; - build_opts.add_option_if(act_info.enabled(), "-DFUSED_ACTIVATION=" + lower_string(string_from_activation_func(act_info.activation()))); + build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation()))); build_opts.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a())); build_opts.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b())); @@ -185,8 +185,6 @@ void CLWinogradOutputTransformKernel::configure(const ICLTensor *input, const IC build_opts.add_option("-DVEC_SIZE=4"); } - build_opts.add_option_if(act_info.enabled(), "-DSELECT_DATA_TYPE=" + get_cl_select_type_from_data_type(input->info()->data_type())); - build_opts.add_option_if(_bias != nullptr, std::string("-DHAS_BIAS")); build_opts.add_option("-DNUM_TILES_X=" + support::cpp11::to_string(num_tiles.width)); build_opts.add_option("-DOUTPUT_TILE_W=" + support::cpp11::to_string(output_tile_size.width)); diff --git a/src/core/CL/kernels/CLYOLOLayerKernel.cpp b/src/core/CL/kernels/CLYOLOLayerKernel.cpp index ee9bdecc22..4152337ac0 100644 --- a/src/core/CL/kernels/CLYOLOLayerKernel.cpp +++ b/src/core/CL/kernels/CLYOLOLayerKernel.cpp @@ -116,7 +116,7 @@ void CLYOLOLayerKernel::configure(ICLTensor *input, ICLTensor *output, const Act // Set build options CLBuildOptions build_opts; - build_opts.add_option("-DACT=" + lower_string(string_from_activation_func(act_info.activation()))); + build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation()))); build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(dt)); build_opts.add_option("-DSELECT_DATA_TYPE=" + get_cl_select_type_from_data_type(dt)); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); @@ -178,4 +178,4 @@ void CLYOLOLayerKernel::run(const Window &window, cl::CommandQueue &queue) } while(collapsed.slide_window_slice_3D(slice)); } -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp index 589b7375ae..aa795bd117 100644 --- a/src/core/Utils.cpp +++ b/src/core/Utils.cpp @@ -179,6 +179,7 @@ const std::string &arm_compute::string_from_activation_func(ActivationLayerInfo: { ActivationLayerInfo::ActivationFunction::SQRT, "SQRT" }, { ActivationLayerInfo::ActivationFunction::SQUARE, "SQUARE" }, { ActivationLayerInfo::ActivationFunction::TANH, "TANH" }, + { ActivationLayerInfo::ActivationFunction::IDENTITY, "IDENTITY" }, }; return act_map[act]; diff --git a/tests/validation/CL/Winograd.cpp b/tests/validation/CL/Winograd.cpp index 62f0335253..5894d7fabd 100644 --- a/tests/validation/CL/Winograd.cpp +++ b/tests/validation/CL/Winograd.cpp @@ -61,6 +61,7 @@ const AbsoluteTolerance tolerance_convolution_layer_f16(half(0.4f)); RelativeTolerance rel_tolerance_f16(half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for FP16 data types */ constexpr float tolerance_num = 0.05f; /**< Tolerance number */ constexpr float abs_tolerance_convolution_layer_f16 = 2.5f; /**< Tolerance number */ +constexpr float tolerance_num_convolution_f16 = 0.15f; /**< Tolerance number */ // Input transform const auto SmallWinogradInputTransformDatasetNCHW = @@ -753,7 +754,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16); + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_convolution_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, @@ -775,7 +776,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16); + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_convolution_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, @@ -797,7 +798,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16); + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_convolution_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, @@ -820,7 +821,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr { // Validate output - validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16); + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_convolution_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, @@ -844,7 +845,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr { // Validate output - validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16); + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_convolution_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, @@ -868,7 +869,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr { // Validate output - validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16); + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_convolution_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h index a71e03696a..b22e068c69 100644 --- a/utils/TypePrinter.h +++ b/utils/TypePrinter.h @@ -385,6 +385,9 @@ inline ::std::ostream &operator<<(::std::ostream &os, const ActivationLayerInfo: case ActivationLayerInfo::ActivationFunction::TANH: os << "TANH"; break; + case ActivationLayerInfo::ActivationFunction::IDENTITY: + os << "IDENTITY"; + break; default: ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); } -- cgit v1.2.1