aboutsummaryrefslogtreecommitdiff
path: root/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs
diff options
context:
space:
mode:
authorIsabella Gottardi <isabella.gottardi@arm.com>2018-02-12 14:59:19 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:49:16 +0000
commit3f217ec4ff11e20fe686beb9a28d0bbd80a56cd6 (patch)
tree81db8baab925af5b416b66d0328be2eb49543824 /src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs
parentd9eb27597eabe5b7c17520f4f9b3f8a282d72573 (diff)
downloadComputeLibrary-3f217ec4ff11e20fe686beb9a28d0bbd80a56cd6.tar.gz
COMPMID-908 - Merge Activation layer with Convolution Layer (NEON. CL, GLES)
Change-Id: Iab06d0768ecf805b841e601185608aae88cf9166 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120874 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs')
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs49
1 files changed, 48 insertions, 1 deletions
diff --git a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs
index ea4e9c18e2..b42c09bbc7 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,6 +26,10 @@ layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z =
#include "helpers_cs.h"
+#ifdef FUSED_ACTIVATION
+#include "activation_layer_helpers_cs.h"
+#endif /* FUSED_ACTIVATION */
+
#if defined(DATA_TYPE_FP16)
precision mediump float;
#endif // DATA_TYPE_FP16
@@ -99,6 +103,10 @@ void main()
pixels += LOAD(biases_ptr, VECTOR_OFFSET(biases_iter, z_index));
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels = ACT_OP(pixels);
+#endif /* FUSED_ACTIVATION */
+
STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels);
}
@@ -210,6 +218,10 @@ void main()
pixels += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels = ACT_OP(pixels);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels);
}
#elif defined(PROCESS_4X_2Y_1Z)
@@ -333,6 +345,11 @@ void main()
pixels[1] += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
}
@@ -470,6 +487,12 @@ void main()
pixels[2] += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels[2] = ACT_OP(pixels[2]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);
@@ -609,6 +632,13 @@ void main()
pixels1[1] += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels1[0] = ACT_OP(pixels1[0]);
+ pixels1[1] = ACT_OP(pixels1[1]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels1[0]);
@@ -745,6 +775,11 @@ void main()
pixels[1] += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
@@ -868,6 +903,11 @@ void main()
pixels[1] += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels);
}
#elif defined(PROCESS_8X_2Y_1Z)
@@ -1001,6 +1041,13 @@ void main()
pixels1[1] += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels1[0] = ACT_OP(pixels1[0]);
+ pixels1[1] = ACT_OP(pixels1[1]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels);
STORE_PACK8_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels1);
}