aboutsummaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorIsabella Gottardi <isabella.gottardi@arm.com>2018-02-12 14:59:19 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:49:16 +0000
commit3f217ec4ff11e20fe686beb9a28d0bbd80a56cd6 (patch)
tree81db8baab925af5b416b66d0328be2eb49543824 /src/core
parentd9eb27597eabe5b7c17520f4f9b3f8a282d72573 (diff)
downloadComputeLibrary-3f217ec4ff11e20fe686beb9a28d0bbd80a56cd6.tar.gz
COMPMID-908 - Merge Activation layer with Convolution Layer (NEON. CL, GLES)
Change-Id: Iab06d0768ecf805b841e601185608aae88cf9166 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120874 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r--src/core/GLES_COMPUTE/GCKernelLibrary.cpp8
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs92
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/activation_layer_helpers_cs.h119
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs49
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs55
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/direct_convolution5x5.cs14
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp14
7 files changed, 257 insertions, 94 deletions
diff --git a/src/core/GLES_COMPUTE/GCKernelLibrary.cpp b/src/core/GLES_COMPUTE/GCKernelLibrary.cpp
index 5d1464ace4..25ac02e8f4 100644
--- a/src/core/GLES_COMPUTE/GCKernelLibrary.cpp
+++ b/src/core/GLES_COMPUTE/GCKernelLibrary.cpp
@@ -232,6 +232,14 @@ const std::map<std::string, std::string> GCKernelLibrary::_program_source_map =
{
#ifdef EMBEDDED_KERNELS
{
+ "helpers_cs.h",
+#include "./cs_shaders/helpers_cs.hembed"
+ },
+ {
+ "activation_layer_helpers_cs.h",
+#include "./cs_shaders/activation_layer_helpers_cs.hembed"
+ },
+ {
"absdiff.cs",
#include "./cs_shaders/absdiff.csembed"
},
diff --git a/src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs
index 7d3f4ee67e..9a1e233624 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,97 +23,9 @@
*/
layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
+#include "activation_layer_helpers_cs.h"
#include "helpers_cs.h"
-#ifdef DATA_TYPE_FP32
-precision highp float;
-#elif defined(DATA_TYPE_FP16)
-#if defined(LOGISTIC) || defined(TANH) || defined(SRELU) || defined(SQRT)
-precision highp float;
-#else /*LOGISTIC_TANH_SRELU_SQRT*/
-precision mediump float;
-#endif /*LOGISTIC_TANH_SRELU_SQRT*/
-#endif /*DATA_TYPE_FP32*/
-
-#define ABS_OP(a) abs((a))
-#define ADD_OP(a, b) ((a) + (b))
-#define SUB_OP(a, b) ((a) - (b))
-#define MUL_OP(a, b) ((a) * (b))
-#define MLA_OP(a, b, c) ((b) * (c) + (a))
-#define DIV_OP(a, b) ((a) / (b))
-#define EXP_OP(a) exp((a))
-#define LOG_OP(a) log((a))
-#define SQRT_OP(a) sqrt((a))
-#define CONST_ONE (1.f)
-
-// Logistic Activation
-float logistic_op(float x)
-{
- return DIV_OP(CONST_ONE, ADD_OP(CONST_ONE, EXP_OP(-x)));
-}
-// Hyperbolic Tangent Activation
-float tanh_op(float x)
-{
- float tmp = float(B_VAL) * x;
- if(tmp > 10.f)
- {
- return MUL_OP(float(A_VAL), 1.f);
- }
- else if(tmp < -10.f)
- {
- return MUL_OP(float(A_VAL), -1.f);
- }
- else
- {
- return MUL_OP(float(A_VAL), tanh(tmp + 0.000001f));
- }
-}
-// RELU Tangent Activation
-float relu_op(float x)
-{
- return max(0.f, x);
-}
-// Bounded RELU Activation
-float brelu_op(float x)
-{
- return min(float(A_VAL), max(float(0.0), x));
-}
-// Lower Upper Bounded RELU Activation
-float lu_brelu_op(float x)
-{
- return min(max(x, float(B_VAL)), float(A_VAL));
-}
-// Leaky RELU Activation
-float lrelu_op(float x)
-{
- return (x > float(0.0)) ? x : MUL_OP(float(A_VAL), x);
-}
-// Soft RELU Activation
-float srelu_op(float x)
-{
- return LOG_OP(ADD_OP(CONST_ONE, EXP_OP(x)));
-}
-// Absolute Activation
-float abs_op(float x)
-{
- return ABS_OP(x);
-}
-// Square Activation
-float square_op(float x)
-{
- return MUL_OP(x, x);
-}
-// Square-root Activation
-float sqrt_op(float x)
-{
- return SQRT_OP(x);
-}
-// Linear Activation
-float linear_op(float x)
-{
- return MLA_OP(float(B_VAL), float(A_VAL), x);
-}
-
/** This performs an activation function floating point inputs.
*
* @note The data type must be passed at compile time using "#define DATA_TYPE_NAME". e.g. "#define DATA_TYPE_FP32"
diff --git a/src/core/GLES_COMPUTE/cs_shaders/activation_layer_helpers_cs.h b/src/core/GLES_COMPUTE/cs_shaders/activation_layer_helpers_cs.h
new file mode 100644
index 0000000000..f43a33fe87
--- /dev/null
+++ b/src/core/GLES_COMPUTE/cs_shaders/activation_layer_helpers_cs.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifdef DATA_TYPE_FP32
+precision highp float;
+#elif defined(DATA_TYPE_FP16)
+#if defined(LOGISTIC) || defined(TANH) || defined(SRELU) || defined(SQRT)
+precision highp float;
+#else /*LOGISTIC_TANH_SRELU_SQRT*/
+precision mediump float;
+#endif /*LOGISTIC_TANH_SRELU_SQRT*/
+#endif /*DATA_TYPE_FP32*/
+
+#define ABS_OP(a) abs((a))
+#define ADD_OP(a, b) ((a) + (b))
+#define SUB_OP(a, b) ((a) - (b))
+#define MUL_OP(a, b) ((a) * (b))
+#define MLA_OP(a, b, c) ((b) * (c) + (a))
+#define DIV_OP(a, b) ((a) / (b))
+#define EXP_OP(a) exp((a))
+#define LOG_OP(a) log((a))
+#define SQRT_OP(a) sqrt((a))
+#define CONST_ONE (1.f)
+
+// Logistic Activation
+float logistic_op(float x)
+{
+ return DIV_OP(CONST_ONE, ADD_OP(CONST_ONE, EXP_OP(-x)));
+}
+vec4 logistic_op(vec4 x)
+{
+ return DIV_OP(vec4(CONST_ONE), ADD_OP(CONST_ONE, EXP_OP(-x)));
+}
+// Hyperbolic Tangent Activation
+float tanh_op(float x)
+{
+ float tmp = float(B_VAL) * x;
+ if(tmp > 10.f)
+ {
+ return MUL_OP(float(A_VAL), 1.f);
+ }
+ else if(tmp < -10.f)
+ {
+ return MUL_OP(float(A_VAL), -1.f);
+ }
+ else
+ {
+ return MUL_OP(float(A_VAL), tanh(tmp + 0.000001f));
+ }
+}
+// RELU Tangent Activation
+float relu_op(float x)
+{
+ return max(0.f, x);
+}
+vec4 relu_op(vec4 x)
+{
+ return max(vec4(0.f), x);
+}
+// Bounded RELU Activation
+float brelu_op(float x)
+{
+ return min(float(A_VAL), max(float(0.0), x));
+}
+// Lower Upper Bounded RELU Activation
+float lu_brelu_op(float x)
+{
+ return min(max(x, float(B_VAL)), float(A_VAL));
+}
+// Leaky RELU Activation
+float lrelu_op(float x)
+{
+ return (x > float(0.0)) ? x : MUL_OP(float(A_VAL), x);
+}
+// Soft RELU Activation
+float srelu_op(float x)
+{
+ return LOG_OP(ADD_OP(CONST_ONE, EXP_OP(x)));
+}
+// Absolute Activation
+float abs_op(float x)
+{
+ return ABS_OP(x);
+}
+// Square Activation
+float square_op(float x)
+{
+ return MUL_OP(x, x);
+}
+// Square-root Activation
+float sqrt_op(float x)
+{
+ return SQRT_OP(x);
+}
+// Linear Activation
+float linear_op(float x)
+{
+ return MLA_OP(float(B_VAL), float(A_VAL), x);
+}
diff --git a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs
index ea4e9c18e2..b42c09bbc7 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,6 +26,10 @@ layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z =
#include "helpers_cs.h"
+#ifdef FUSED_ACTIVATION
+#include "activation_layer_helpers_cs.h"
+#endif /* FUSED_ACTIVATION */
+
#if defined(DATA_TYPE_FP16)
precision mediump float;
#endif // DATA_TYPE_FP16
@@ -99,6 +103,10 @@ void main()
pixels += LOAD(biases_ptr, VECTOR_OFFSET(biases_iter, z_index));
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels = ACT_OP(pixels);
+#endif /* FUSED_ACTIVATION */
+
STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels);
}
@@ -210,6 +218,10 @@ void main()
pixels += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels = ACT_OP(pixels);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels);
}
#elif defined(PROCESS_4X_2Y_1Z)
@@ -333,6 +345,11 @@ void main()
pixels[1] += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
}
@@ -470,6 +487,12 @@ void main()
pixels[2] += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels[2] = ACT_OP(pixels[2]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);
@@ -609,6 +632,13 @@ void main()
pixels1[1] += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels1[0] = ACT_OP(pixels1[0]);
+ pixels1[1] = ACT_OP(pixels1[1]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels1[0]);
@@ -745,6 +775,11 @@ void main()
pixels[1] += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
@@ -868,6 +903,11 @@ void main()
pixels[1] += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels);
}
#elif defined(PROCESS_8X_2Y_1Z)
@@ -1001,6 +1041,13 @@ void main()
pixels1[1] += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels1[0] = ACT_OP(pixels1[0]);
+ pixels1[1] = ACT_OP(pixels1[1]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels);
STORE_PACK8_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels1);
}
diff --git a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs
index 855d450335..e51cc3785a 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,6 +25,10 @@ layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z =
#include "helpers_cs.h"
+#ifdef FUSED_ACTIVATION
+#include "activation_layer_helpers_cs.h"
+#endif /* FUSED_ACTIVATION */
+
#if defined(DATA_TYPE_FP16)
precision mediump float;
#endif // DATA_TYPE_FP16
@@ -114,6 +118,10 @@ void main()
pixels += LOAD(biases_ptr, VECTOR_OFFSET(biases_iter, z_index));
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels = ACT_OP(pixels);
+#endif /* FUSED_ACTIVATION */
+
STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels);
}
@@ -238,6 +246,11 @@ void main()
pixels[1] += vec4(b);
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+#endif /* FUSED_ACTIVATION */
+
VSTORE2_CURRENT_ITEM(dst_ptr, dst_iter, pixels);
}
@@ -335,6 +348,10 @@ void main()
pixels += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels = ACT_OP(pixels);
+#endif /* FUSED_ACTIVATION */
+
STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels);
}
@@ -434,6 +451,12 @@ void main()
pixels[2] += vec4(b);
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels[2] = ACT_OP(pixels[2]);
+#endif /* FUSED_ACTIVATION */
+
STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels[0]);
STORE(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
STORE(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);
@@ -601,6 +624,12 @@ void main()
}
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels[2] = ACT_OP(pixels[2]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK8_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
STORE_PACK8_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);
@@ -728,6 +757,10 @@ void main()
pixels += vec4(b);
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels = ACT_OP(pixels);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels);
}
@@ -841,6 +874,12 @@ void main()
}
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels[2] = ACT_OP(pixels[2]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);
@@ -962,6 +1001,13 @@ void main()
}
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels[2] = ACT_OP(pixels[2]);
+ pixels[3] = ACT_OP(pixels[3]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);
@@ -1087,6 +1133,13 @@ void main()
}
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels[2] = ACT_OP(pixels[2]);
+ pixels[3] = ACT_OP(pixels[3]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);
diff --git a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution5x5.cs b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution5x5.cs
index c919e4ed80..728e9644b2 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution5x5.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution5x5.cs
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,6 +26,10 @@ layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z =
#include "helpers_cs.h"
+#ifdef FUSED_ACTIVATION
+#include "activation_layer_helpers_cs.h"
+#endif /* FUSED_ACTIVATION */
+
#if defined(DATA_TYPE_FP16)
precision mediump float;
#endif // DATA_TYPE_FP16
@@ -116,6 +120,10 @@ void main()
pixels += LOAD(biases_ptr, VECTOR_OFFSET(biases_iter, z_index));
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels = ACT_OP(pixels);
+#endif /* FUSED_ACTIVATION */
+
STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels);
}
#elif defined(DATA_TYPE_FP16)
@@ -204,6 +212,10 @@ void main()
res += vec4(b);
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ res = ACT_OP(res);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, res);
}
diff --git a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
index bef30d5042..67a1530431 100644
--- a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
@@ -50,7 +50,8 @@ BorderSize GCDirectConvolutionLayerKernel<kernel_size>::border_size(
}
template <unsigned int kernel_size>
-void GCDirectConvolutionLayerKernel<kernel_size>::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *bias, IGCTensor *output, const PadStrideInfo &conv_info)
+void GCDirectConvolutionLayerKernel<kernel_size>::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *bias, IGCTensor *output,
+ const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON(weights->info()->dimension(2) != input->info()->dimension(2));
@@ -58,6 +59,7 @@ void GCDirectConvolutionLayerKernel<kernel_size>::configure(const IGCTensor *inp
ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4);
ARM_COMPUTE_ERROR_ON_MSG((kernel_size == 3 && std::get<0>(conv_info.stride()) > 2), "Strides larger than 2 not supported in 3x3 direct convolution!");
ARM_COMPUTE_ERROR_ON(kernel_size != weights->info()->dimension(0));
+ ARM_COMPUTE_ERROR_ON(act_info.enabled() && act_info.activation() != ActivationLayerInfo::ActivationFunction::RELU && act_info.activation() != ActivationLayerInfo::ActivationFunction::LOGISTIC);
if(bias != nullptr)
{
@@ -108,6 +110,16 @@ void GCDirectConvolutionLayerKernel<kernel_size>::configure(const IGCTensor *inp
std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
options.emplace(("#define " + dt_name));
+ // Activation information in case of a fused activation
+ if(act_info.enabled())
+ {
+ options.emplace("#define FUSED_ACTIVATION");
+ options.emplace(("#define " + string_from_activation_func(act_info.activation())));
+ options.emplace(("#define ACT_OP " + lower_string(string_from_activation_func(act_info.activation())) + "_op"));
+ options.emplace(("#define A_VAL " + float_to_string_with_full_precision(act_info.a())));
+ options.emplace(("#define B_VAL " + float_to_string_with_full_precision(act_info.b())));
+ }
+
unsigned int num_elems_read_per_iteration_x = kernel_size * _conv_stride_x;
unsigned int num_elems_read_per_iteration_y = 1;
unsigned int num_elems_written_per_iteration_x = 1;