diff options
author | Isabella Gottardi <isabella.gottardi@arm.com> | 2018-02-12 14:59:19 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:49:16 +0000 |
commit | 3f217ec4ff11e20fe686beb9a28d0bbd80a56cd6 (patch) | |
tree | 81db8baab925af5b416b66d0328be2eb49543824 /src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs | |
parent | d9eb27597eabe5b7c17520f4f9b3f8a282d72573 (diff) | |
download | ComputeLibrary-3f217ec4ff11e20fe686beb9a28d0bbd80a56cd6.tar.gz |
COMPMID-908 - Merge Activation layer with Convolution Layer (NEON. CL, GLES)
Change-Id: Iab06d0768ecf805b841e601185608aae88cf9166
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120874
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs')
-rw-r--r-- | src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs | 49 |
1 files changed, 48 insertions, 1 deletions
diff --git a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs index ea4e9c18e2..b42c09bbc7 100644 --- a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs +++ b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,10 @@ layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = #include "helpers_cs.h" +#ifdef FUSED_ACTIVATION +#include "activation_layer_helpers_cs.h" +#endif /* FUSED_ACTIVATION */ + #if defined(DATA_TYPE_FP16) precision mediump float; #endif // DATA_TYPE_FP16 @@ -99,6 +103,10 @@ void main() pixels += LOAD(biases_ptr, VECTOR_OFFSET(biases_iter, z_index)); #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels = ACT_OP(pixels); +#endif /* FUSED_ACTIVATION */ + STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels); } @@ -210,6 +218,10 @@ void main() pixels += b; #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels = ACT_OP(pixels); +#endif /* FUSED_ACTIVATION */ + STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels); } #elif defined(PROCESS_4X_2Y_1Z) @@ -333,6 +345,11 @@ void main() pixels[1] += b; #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); +#endif /* FUSED_ACTIVATION */ + STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]); } @@ -470,6 +487,12 @@ void main() pixels[2] += b; #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); + pixels[2] = ACT_OP(pixels[2]); +#endif /* FUSED_ACTIVATION */ + STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]); @@ -609,6 +632,13 @@ void main() pixels1[1] += b; #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); + pixels1[0] = ACT_OP(pixels1[0]); + pixels1[1] = ACT_OP(pixels1[1]); +#endif /* FUSED_ACTIVATION */ + STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels1[0]); @@ -745,6 +775,11 @@ void main() pixels[1] += b; #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); +#endif /* FUSED_ACTIVATION */ + STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]); STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]); @@ -868,6 +903,11 @@ void main() pixels[1] += b; #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); +#endif /* FUSED_ACTIVATION */ + STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels); } #elif defined(PROCESS_8X_2Y_1Z) @@ -1001,6 +1041,13 @@ void main() pixels1[1] += b; #endif /* BIAS */ +#ifdef FUSED_ACTIVATION + pixels[0] = ACT_OP(pixels[0]); + pixels[1] = ACT_OP(pixels[1]); + pixels1[0] = ACT_OP(pixels1[0]); + pixels1[1] = ACT_OP(pixels1[1]); +#endif /* FUSED_ACTIVATION */ + STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels); STORE_PACK8_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels1); } |