From 3f217ec4ff11e20fe686beb9a28d0bbd80a56cd6 Mon Sep 17 00:00:00 2001 From: Isabella Gottardi Date: Mon, 12 Feb 2018 14:59:19 +0000 Subject: COMPMID-908 - Merge Activation layer with Convolution Layer (NEON. CL, GLES) Change-Id: Iab06d0768ecf805b841e601185608aae88cf9166 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120874 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- .../cs_shaders/direct_convolution1x1.cs | 49 +++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) (limited to 'src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs') diff --git a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs index ea4e9c18e2..b42c09bbc7 100644 --- a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs +++ b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,10 @@ layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = #include "helpers_cs.h" +#ifdef FUSED_ACTIVATION +#include "activation_layer_helpers_cs.h" +#endif /* FUSED_ACTIVATION */ + #if defined(DATA_TYPE_FP16) precision mediump float; #endif // DATA_TYPE_FP16 @@ -99,6 +103,10 @@ void main() pixels += LOAD(biases_ptr, VECTOR_OFFSET(biases_iter, z_index)); #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels = ACT_OP(pixels); +#endif /* FUSED_ACTIVATION */ + STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels); } @@ -210,6 +218,10 @@ void main() pixels += b; #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels = ACT_OP(pixels); +#endif /* FUSED_ACTIVATION */ + STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels); } #elif defined(PROCESS_4X_2Y_1Z) @@ -333,6 +345,11 @@ void main() pixels[1] += b; #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); +#endif /* FUSED_ACTIVATION */ + STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]); } @@ -470,6 +487,12 @@ void main() pixels[2] += b; #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); + pixels[2] = ACT_OP(pixels[2]); +#endif /* FUSED_ACTIVATION */ + STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]); @@ -609,6 +632,13 @@ void main() pixels1[1] += b; #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); + pixels1[0] = ACT_OP(pixels1[0]); + pixels1[1] = ACT_OP(pixels1[1]); +#endif /* FUSED_ACTIVATION */ + STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels1[0]); @@ -745,6 +775,11 @@ void main() pixels[1] += b; #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); +#endif /* FUSED_ACTIVATION */ + STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]); @@ -868,6 +903,11 @@ void main() pixels[1] += b; #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); +#endif /* FUSED_ACTIVATION */ + STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels); } #elif defined(PROCESS_8X_2Y_1Z) @@ -1001,6 +1041,13 @@ void main() pixels1[1] += b; #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); + pixels1[0] = ACT_OP(pixels1[0]); + pixels1[1] = ACT_OP(pixels1[1]); +#endif /* FUSED_ACTIVATION */ + STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels); STORE_PACK8_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels1); } -- cgit v1.2.1