aboutsummaryrefslogtreecommitdiff
path: root/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs
diff options
context:
space:
mode:
authorIsabella Gottardi <isabella.gottardi@arm.com>2018-02-12 14:59:19 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:49:16 +0000
commit3f217ec4ff11e20fe686beb9a28d0bbd80a56cd6 (patch)
tree81db8baab925af5b416b66d0328be2eb49543824 /src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs
parentd9eb27597eabe5b7c17520f4f9b3f8a282d72573 (diff)
downloadComputeLibrary-3f217ec4ff11e20fe686beb9a28d0bbd80a56cd6.tar.gz
COMPMID-908 - Merge Activation layer with Convolution Layer (NEON. CL, GLES)
Change-Id: Iab06d0768ecf805b841e601185608aae88cf9166 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120874 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs')
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs55
1 files changed, 54 insertions, 1 deletions
diff --git a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs
index 855d450335..e51cc3785a 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,6 +25,10 @@ layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z =
#include "helpers_cs.h"
+#ifdef FUSED_ACTIVATION
+#include "activation_layer_helpers_cs.h"
+#endif /* FUSED_ACTIVATION */
+
#if defined(DATA_TYPE_FP16)
precision mediump float;
#endif // DATA_TYPE_FP16
@@ -114,6 +118,10 @@ void main()
pixels += LOAD(biases_ptr, VECTOR_OFFSET(biases_iter, z_index));
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels = ACT_OP(pixels);
+#endif /* FUSED_ACTIVATION */
+
STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels);
}
@@ -238,6 +246,11 @@ void main()
pixels[1] += vec4(b);
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+#endif /* FUSED_ACTIVATION */
+
VSTORE2_CURRENT_ITEM(dst_ptr, dst_iter, pixels);
}
@@ -335,6 +348,10 @@ void main()
pixels += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels = ACT_OP(pixels);
+#endif /* FUSED_ACTIVATION */
+
STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels);
}
@@ -434,6 +451,12 @@ void main()
pixels[2] += vec4(b);
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels[2] = ACT_OP(pixels[2]);
+#endif /* FUSED_ACTIVATION */
+
STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels[0]);
STORE(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
STORE(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);
@@ -601,6 +624,12 @@ void main()
}
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels[2] = ACT_OP(pixels[2]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK8_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
STORE_PACK8_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);
@@ -728,6 +757,10 @@ void main()
pixels += vec4(b);
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels = ACT_OP(pixels);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels);
}
@@ -841,6 +874,12 @@ void main()
}
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels[2] = ACT_OP(pixels[2]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);
@@ -962,6 +1001,13 @@ void main()
}
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels[2] = ACT_OP(pixels[2]);
+ pixels[3] = ACT_OP(pixels[3]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);
@@ -1087,6 +1133,13 @@ void main()
}
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels[2] = ACT_OP(pixels[2]);
+ pixels[3] = ACT_OP(pixels[3]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);