From 3f217ec4ff11e20fe686beb9a28d0bbd80a56cd6 Mon Sep 17 00:00:00 2001 From: Isabella Gottardi Date: Mon, 12 Feb 2018 14:59:19 +0000 Subject: COMPMID-908 - Merge Activation layer with Convolution Layer (NEON. CL, GLES) Change-Id: Iab06d0768ecf805b841e601185608aae88cf9166 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120874 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- .../GLES_COMPUTE/cs_shaders/activation_layer.cs | 92 +--------------- .../cs_shaders/activation_layer_helpers_cs.h | 119 +++++++++++++++++++++ .../cs_shaders/direct_convolution1x1.cs | 49 ++++++++- .../cs_shaders/direct_convolution3x3.cs | 55 +++++++++- .../cs_shaders/direct_convolution5x5.cs | 14 ++- 5 files changed, 236 insertions(+), 93 deletions(-) create mode 100644 src/core/GLES_COMPUTE/cs_shaders/activation_layer_helpers_cs.h (limited to 'src/core/GLES_COMPUTE/cs_shaders') diff --git a/src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs index 7d3f4ee67e..9a1e233624 100644 --- a/src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs +++ b/src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -23,97 +23,9 @@ */ layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in; +#include "activation_layer_helpers_cs.h" #include "helpers_cs.h" -#ifdef DATA_TYPE_FP32 -precision highp float; -#elif defined(DATA_TYPE_FP16) -#if defined(LOGISTIC) || defined(TANH) || defined(SRELU) || defined(SQRT) -precision highp float; -#else /*LOGISTIC_TANH_SRELU_SQRT*/ -precision mediump float; -#endif /*LOGISTIC_TANH_SRELU_SQRT*/ -#endif /*DATA_TYPE_FP32*/ - -#define ABS_OP(a) abs((a)) -#define ADD_OP(a, b) ((a) + (b)) -#define SUB_OP(a, b) ((a) - (b)) -#define MUL_OP(a, b) ((a) * (b)) -#define MLA_OP(a, b, c) ((b) * (c) + (a)) -#define DIV_OP(a, b) ((a) / (b)) -#define EXP_OP(a) exp((a)) -#define LOG_OP(a) log((a)) -#define SQRT_OP(a) sqrt((a)) -#define CONST_ONE (1.f) - -// Logistic Activation -float logistic_op(float x) -{ - return DIV_OP(CONST_ONE, ADD_OP(CONST_ONE, EXP_OP(-x))); -} -// Hyperbolic Tangent Activation -float tanh_op(float x) -{ - float tmp = float(B_VAL) * x; - if(tmp > 10.f) - { - return MUL_OP(float(A_VAL), 1.f); - } - else if(tmp < -10.f) - { - return MUL_OP(float(A_VAL), -1.f); - } - else - { - return MUL_OP(float(A_VAL), tanh(tmp + 0.000001f)); - } -} -// RELU Tangent Activation -float relu_op(float x) -{ - return max(0.f, x); -} -// Bounded RELU Activation -float brelu_op(float x) -{ - return min(float(A_VAL), max(float(0.0), x)); -} -// Lower Upper Bounded RELU Activation -float lu_brelu_op(float x) -{ - return min(max(x, float(B_VAL)), float(A_VAL)); -} -// Leaky RELU Activation -float lrelu_op(float x) -{ - return (x > float(0.0)) ? x : MUL_OP(float(A_VAL), x); -} -// Soft RELU Activation -float srelu_op(float x) -{ - return LOG_OP(ADD_OP(CONST_ONE, EXP_OP(x))); -} -// Absolute Activation -float abs_op(float x) -{ - return ABS_OP(x); -} -// Square Activation -float square_op(float x) -{ - return MUL_OP(x, x); -} -// Square-root Activation -float sqrt_op(float x) -{ - return SQRT_OP(x); -} -// Linear Activation -float linear_op(float x) -{ - return MLA_OP(float(B_VAL), float(A_VAL), x); -} - /** This performs an activation function floating point inputs. * * @note The data type must be passed at compile time using "#define DATA_TYPE_NAME". e.g. "#define DATA_TYPE_FP32" diff --git a/src/core/GLES_COMPUTE/cs_shaders/activation_layer_helpers_cs.h b/src/core/GLES_COMPUTE/cs_shaders/activation_layer_helpers_cs.h new file mode 100644 index 0000000000..f43a33fe87 --- /dev/null +++ b/src/core/GLES_COMPUTE/cs_shaders/activation_layer_helpers_cs.h @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifdef DATA_TYPE_FP32 +precision highp float; +#elif defined(DATA_TYPE_FP16) +#if defined(LOGISTIC) || defined(TANH) || defined(SRELU) || defined(SQRT) +precision highp float; +#else /*LOGISTIC_TANH_SRELU_SQRT*/ +precision mediump float; +#endif /*LOGISTIC_TANH_SRELU_SQRT*/ +#endif /*DATA_TYPE_FP32*/ + +#define ABS_OP(a) abs((a)) +#define ADD_OP(a, b) ((a) + (b)) +#define SUB_OP(a, b) ((a) - (b)) +#define MUL_OP(a, b) ((a) * (b)) +#define MLA_OP(a, b, c) ((b) * (c) + (a)) +#define DIV_OP(a, b) ((a) / (b)) +#define EXP_OP(a) exp((a)) +#define LOG_OP(a) log((a)) +#define SQRT_OP(a) sqrt((a)) +#define CONST_ONE (1.f) + +// Logistic Activation +float logistic_op(float x) +{ + return DIV_OP(CONST_ONE, ADD_OP(CONST_ONE, EXP_OP(-x))); +} +vec4 logistic_op(vec4 x) +{ + return DIV_OP(vec4(CONST_ONE), ADD_OP(CONST_ONE, EXP_OP(-x))); +} +// Hyperbolic Tangent Activation +float tanh_op(float x) +{ + float tmp = float(B_VAL) * x; + if(tmp > 10.f) + { + return MUL_OP(float(A_VAL), 1.f); + } + else if(tmp < -10.f) + { + return MUL_OP(float(A_VAL), -1.f); + } + else + { + return MUL_OP(float(A_VAL), tanh(tmp + 0.000001f)); + } +} +// RELU Tangent Activation +float relu_op(float x) +{ + return max(0.f, x); +} +vec4 relu_op(vec4 x) +{ + return max(vec4(0.f), x); +} +// Bounded RELU Activation +float brelu_op(float x) +{ + return min(float(A_VAL), max(float(0.0), x)); +} +// Lower Upper Bounded RELU Activation +float lu_brelu_op(float x) +{ + return min(max(x, float(B_VAL)), float(A_VAL)); +} +// Leaky RELU Activation +float lrelu_op(float x) +{ + return (x > float(0.0)) ? x : MUL_OP(float(A_VAL), x); +} +// Soft RELU Activation +float srelu_op(float x) +{ + return LOG_OP(ADD_OP(CONST_ONE, EXP_OP(x))); +} +// Absolute Activation +float abs_op(float x) +{ + return ABS_OP(x); +} +// Square Activation +float square_op(float x) +{ + return MUL_OP(x, x); +} +// Square-root Activation +float sqrt_op(float x) +{ + return SQRT_OP(x); +} +// Linear Activation +float linear_op(float x) +{ + return MLA_OP(float(B_VAL), float(A_VAL), x); +} diff --git a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs index ea4e9c18e2..b42c09bbc7 100644 --- a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs +++ b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,10 @@ layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = #include "helpers_cs.h" +#ifdef FUSED_ACTIVATION +#include "activation_layer_helpers_cs.h" +#endif /* FUSED_ACTIVATION */ + #if defined(DATA_TYPE_FP16) precision mediump float; #endif // DATA_TYPE_FP16 @@ -99,6 +103,10 @@ void main() pixels += LOAD(biases_ptr, VECTOR_OFFSET(biases_iter, z_index)); #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels = ACT_OP(pixels); +#endif /* FUSED_ACTIVATION */ + STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels); } @@ -210,6 +218,10 @@ void main() pixels += b; #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels = ACT_OP(pixels); +#endif /* FUSED_ACTIVATION */ + STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels); } #elif defined(PROCESS_4X_2Y_1Z) @@ -333,6 +345,11 @@ void main() pixels[1] += b; #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); +#endif /* FUSED_ACTIVATION */ + STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]); } @@ -470,6 +487,12 @@ void main() pixels[2] += b; #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); + pixels[2] = ACT_OP(pixels[2]); +#endif /* FUSED_ACTIVATION */ + STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]); @@ -609,6 +632,13 @@ void main() pixels1[1] += b; #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); + pixels1[0] = ACT_OP(pixels1[0]); + pixels1[1] = ACT_OP(pixels1[1]); +#endif /* FUSED_ACTIVATION */ + STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels1[0]); @@ -745,6 +775,11 @@ void main() pixels[1] += b; #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); +#endif /* FUSED_ACTIVATION */ + STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]); @@ -868,6 +903,11 @@ void main() pixels[1] += b; #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); +#endif /* FUSED_ACTIVATION */ + STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels); } #elif defined(PROCESS_8X_2Y_1Z) @@ -1001,6 +1041,13 @@ void main() pixels1[1] += b; #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); + pixels1[0] = ACT_OP(pixels1[0]); + pixels1[1] = ACT_OP(pixels1[1]); +#endif /* FUSED_ACTIVATION */ + STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels); STORE_PACK8_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels1); } diff --git a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs index 855d450335..e51cc3785a 100644 --- a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs +++ b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,10 @@ layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = #include "helpers_cs.h" +#ifdef FUSED_ACTIVATION +#include "activation_layer_helpers_cs.h" +#endif /* FUSED_ACTIVATION */ + #if defined(DATA_TYPE_FP16) precision mediump float; #endif // DATA_TYPE_FP16 @@ -114,6 +118,10 @@ void main() pixels += LOAD(biases_ptr, VECTOR_OFFSET(biases_iter, z_index)); #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels = ACT_OP(pixels); +#endif /* FUSED_ACTIVATION */ + STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels); } @@ -238,6 +246,11 @@ void main() pixels[1] += vec4(b); #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); +#endif /* FUSED_ACTIVATION */ + VSTORE2_CURRENT_ITEM(dst_ptr, dst_iter, pixels); } @@ -335,6 +348,10 @@ void main() pixels += b; #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels = ACT_OP(pixels); +#endif /* FUSED_ACTIVATION */ + STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels); } @@ -434,6 +451,12 @@ void main() pixels[2] += vec4(b); #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); + pixels[2] = ACT_OP(pixels[2]); +#endif /* FUSED_ACTIVATION */ + STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels[0]); STORE(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]); STORE(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]); @@ -601,6 +624,12 @@ void main() } #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); + pixels[2] = ACT_OP(pixels[2]); +#endif /* FUSED_ACTIVATION */ + STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]); STORE_PACK8_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]); STORE_PACK8_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]); @@ -728,6 +757,10 @@ void main() pixels += vec4(b); #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels = ACT_OP(pixels); +#endif /* FUSED_ACTIVATION */ + STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels); } @@ -841,6 +874,12 @@ void main() } #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); + pixels[2] = ACT_OP(pixels[2]); +#endif /* FUSED_ACTIVATION */ + STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]); @@ -962,6 +1001,13 @@ void main() } #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); + pixels[2] = ACT_OP(pixels[2]); + pixels[3] = ACT_OP(pixels[3]); +#endif /* FUSED_ACTIVATION */ + STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]); @@ -1087,6 +1133,13 @@ void main() } #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); + pixels[2] = ACT_OP(pixels[2]); + pixels[3] = ACT_OP(pixels[3]); +#endif /* FUSED_ACTIVATION */ + STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]); diff --git a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution5x5.cs b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution5x5.cs index c919e4ed80..728e9644b2 100644 --- a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution5x5.cs +++ b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution5x5.cs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2018 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,10 @@ layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = #include "helpers_cs.h" +#ifdef FUSED_ACTIVATION +#include "activation_layer_helpers_cs.h" +#endif /* FUSED_ACTIVATION */ + #if defined(DATA_TYPE_FP16) precision mediump float; #endif // DATA_TYPE_FP16 @@ -116,6 +120,10 @@ void main() pixels += LOAD(biases_ptr, VECTOR_OFFSET(biases_iter, z_index)); #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels = ACT_OP(pixels); +#endif /* FUSED_ACTIVATION */ + STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels); } #elif defined(DATA_TYPE_FP16) @@ -204,6 +212,10 @@ void main() res += vec4(b); #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + res = ACT_OP(res); +#endif /* FUSED_ACTIVATION */ + STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, res); } -- cgit v1.2.1