aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorIsabella Gottardi <isabella.gottardi@arm.com>2018-02-12 14:59:19 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:49:16 +0000
commit3f217ec4ff11e20fe686beb9a28d0bbd80a56cd6 (patch)
tree81db8baab925af5b416b66d0328be2eb49543824 /src
parentd9eb27597eabe5b7c17520f4f9b3f8a282d72573 (diff)
downloadComputeLibrary-3f217ec4ff11e20fe686beb9a28d0bbd80a56cd6.tar.gz
COMPMID-908 - Merge Activation layer with Convolution Layer (NEON. CL, GLES)
Change-Id: Iab06d0768ecf805b841e601185608aae88cf9166 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120874 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/GLES_COMPUTE/GCKernelLibrary.cpp8
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs92
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/activation_layer_helpers_cs.h119
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs49
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs55
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/direct_convolution5x5.cs14
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp14
-rw-r--r--src/runtime/CL/functions/CLConvolutionLayer.cpp17
-rw-r--r--src/runtime/CL/functions/CLDirectConvolutionLayer.cpp28
-rw-r--r--src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp36
-rw-r--r--src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp30
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp22
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp11
-rw-r--r--src/runtime/NEON/functions/NEConvolutionLayer.cpp24
-rw-r--r--src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp27
-rw-r--r--src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp44
-rw-r--r--src/runtime/NEON/functions/NEWinogradLayer.cpp21
17 files changed, 459 insertions, 152 deletions
diff --git a/src/core/GLES_COMPUTE/GCKernelLibrary.cpp b/src/core/GLES_COMPUTE/GCKernelLibrary.cpp
index 5d1464ace4..25ac02e8f4 100644
--- a/src/core/GLES_COMPUTE/GCKernelLibrary.cpp
+++ b/src/core/GLES_COMPUTE/GCKernelLibrary.cpp
@@ -232,6 +232,14 @@ const std::map<std::string, std::string> GCKernelLibrary::_program_source_map =
{
#ifdef EMBEDDED_KERNELS
{
+ "helpers_cs.h",
+#include "./cs_shaders/helpers_cs.hembed"
+ },
+ {
+ "activation_layer_helpers_cs.h",
+#include "./cs_shaders/activation_layer_helpers_cs.hembed"
+ },
+ {
"absdiff.cs",
#include "./cs_shaders/absdiff.csembed"
},
diff --git a/src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs
index 7d3f4ee67e..9a1e233624 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,97 +23,9 @@
*/
layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
+#include "activation_layer_helpers_cs.h"
#include "helpers_cs.h"
-#ifdef DATA_TYPE_FP32
-precision highp float;
-#elif defined(DATA_TYPE_FP16)
-#if defined(LOGISTIC) || defined(TANH) || defined(SRELU) || defined(SQRT)
-precision highp float;
-#else /*LOGISTIC_TANH_SRELU_SQRT*/
-precision mediump float;
-#endif /*LOGISTIC_TANH_SRELU_SQRT*/
-#endif /*DATA_TYPE_FP32*/
-
-#define ABS_OP(a) abs((a))
-#define ADD_OP(a, b) ((a) + (b))
-#define SUB_OP(a, b) ((a) - (b))
-#define MUL_OP(a, b) ((a) * (b))
-#define MLA_OP(a, b, c) ((b) * (c) + (a))
-#define DIV_OP(a, b) ((a) / (b))
-#define EXP_OP(a) exp((a))
-#define LOG_OP(a) log((a))
-#define SQRT_OP(a) sqrt((a))
-#define CONST_ONE (1.f)
-
-// Logistic Activation
-float logistic_op(float x)
-{
- return DIV_OP(CONST_ONE, ADD_OP(CONST_ONE, EXP_OP(-x)));
-}
-// Hyperbolic Tangent Activation
-float tanh_op(float x)
-{
- float tmp = float(B_VAL) * x;
- if(tmp > 10.f)
- {
- return MUL_OP(float(A_VAL), 1.f);
- }
- else if(tmp < -10.f)
- {
- return MUL_OP(float(A_VAL), -1.f);
- }
- else
- {
- return MUL_OP(float(A_VAL), tanh(tmp + 0.000001f));
- }
-}
-// RELU Tangent Activation
-float relu_op(float x)
-{
- return max(0.f, x);
-}
-// Bounded RELU Activation
-float brelu_op(float x)
-{
- return min(float(A_VAL), max(float(0.0), x));
-}
-// Lower Upper Bounded RELU Activation
-float lu_brelu_op(float x)
-{
- return min(max(x, float(B_VAL)), float(A_VAL));
-}
-// Leaky RELU Activation
-float lrelu_op(float x)
-{
- return (x > float(0.0)) ? x : MUL_OP(float(A_VAL), x);
-}
-// Soft RELU Activation
-float srelu_op(float x)
-{
- return LOG_OP(ADD_OP(CONST_ONE, EXP_OP(x)));
-}
-// Absolute Activation
-float abs_op(float x)
-{
- return ABS_OP(x);
-}
-// Square Activation
-float square_op(float x)
-{
- return MUL_OP(x, x);
-}
-// Square-root Activation
-float sqrt_op(float x)
-{
- return SQRT_OP(x);
-}
-// Linear Activation
-float linear_op(float x)
-{
- return MLA_OP(float(B_VAL), float(A_VAL), x);
-}
-
/** This performs an activation function floating point inputs.
*
* @note The data type must be passed at compile time using "#define DATA_TYPE_NAME". e.g. "#define DATA_TYPE_FP32"
diff --git a/src/core/GLES_COMPUTE/cs_shaders/activation_layer_helpers_cs.h b/src/core/GLES_COMPUTE/cs_shaders/activation_layer_helpers_cs.h
new file mode 100644
index 0000000000..f43a33fe87
--- /dev/null
+++ b/src/core/GLES_COMPUTE/cs_shaders/activation_layer_helpers_cs.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifdef DATA_TYPE_FP32
+precision highp float;
+#elif defined(DATA_TYPE_FP16)
+#if defined(LOGISTIC) || defined(TANH) || defined(SRELU) || defined(SQRT)
+precision highp float;
+#else /*LOGISTIC_TANH_SRELU_SQRT*/
+precision mediump float;
+#endif /*LOGISTIC_TANH_SRELU_SQRT*/
+#endif /*DATA_TYPE_FP32*/
+
+#define ABS_OP(a) abs((a))
+#define ADD_OP(a, b) ((a) + (b))
+#define SUB_OP(a, b) ((a) - (b))
+#define MUL_OP(a, b) ((a) * (b))
+#define MLA_OP(a, b, c) ((b) * (c) + (a))
+#define DIV_OP(a, b) ((a) / (b))
+#define EXP_OP(a) exp((a))
+#define LOG_OP(a) log((a))
+#define SQRT_OP(a) sqrt((a))
+#define CONST_ONE (1.f)
+
+// Logistic Activation
+float logistic_op(float x)
+{
+ return DIV_OP(CONST_ONE, ADD_OP(CONST_ONE, EXP_OP(-x)));
+}
+vec4 logistic_op(vec4 x)
+{
+ return DIV_OP(vec4(CONST_ONE), ADD_OP(CONST_ONE, EXP_OP(-x)));
+}
+// Hyperbolic Tangent Activation
+float tanh_op(float x)
+{
+ float tmp = float(B_VAL) * x;
+ if(tmp > 10.f)
+ {
+ return MUL_OP(float(A_VAL), 1.f);
+ }
+ else if(tmp < -10.f)
+ {
+ return MUL_OP(float(A_VAL), -1.f);
+ }
+ else
+ {
+ return MUL_OP(float(A_VAL), tanh(tmp + 0.000001f));
+ }
+}
+// RELU Tangent Activation
+float relu_op(float x)
+{
+ return max(0.f, x);
+}
+vec4 relu_op(vec4 x)
+{
+ return max(vec4(0.f), x);
+}
+// Bounded RELU Activation
+float brelu_op(float x)
+{
+ return min(float(A_VAL), max(float(0.0), x));
+}
+// Lower Upper Bounded RELU Activation
+float lu_brelu_op(float x)
+{
+ return min(max(x, float(B_VAL)), float(A_VAL));
+}
+// Leaky RELU Activation
+float lrelu_op(float x)
+{
+ return (x > float(0.0)) ? x : MUL_OP(float(A_VAL), x);
+}
+// Soft RELU Activation
+float srelu_op(float x)
+{
+ return LOG_OP(ADD_OP(CONST_ONE, EXP_OP(x)));
+}
+// Absolute Activation
+float abs_op(float x)
+{
+ return ABS_OP(x);
+}
+// Square Activation
+float square_op(float x)
+{
+ return MUL_OP(x, x);
+}
+// Square-root Activation
+float sqrt_op(float x)
+{
+ return SQRT_OP(x);
+}
+// Linear Activation
+float linear_op(float x)
+{
+ return MLA_OP(float(B_VAL), float(A_VAL), x);
+}
diff --git a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs
index ea4e9c18e2..b42c09bbc7 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,6 +26,10 @@ layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z =
#include "helpers_cs.h"
+#ifdef FUSED_ACTIVATION
+#include "activation_layer_helpers_cs.h"
+#endif /* FUSED_ACTIVATION */
+
#if defined(DATA_TYPE_FP16)
precision mediump float;
#endif // DATA_TYPE_FP16
@@ -99,6 +103,10 @@ void main()
pixels += LOAD(biases_ptr, VECTOR_OFFSET(biases_iter, z_index));
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels = ACT_OP(pixels);
+#endif /* FUSED_ACTIVATION */
+
STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels);
}
@@ -210,6 +218,10 @@ void main()
pixels += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels = ACT_OP(pixels);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels);
}
#elif defined(PROCESS_4X_2Y_1Z)
@@ -333,6 +345,11 @@ void main()
pixels[1] += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
}
@@ -470,6 +487,12 @@ void main()
pixels[2] += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels[2] = ACT_OP(pixels[2]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);
@@ -609,6 +632,13 @@ void main()
pixels1[1] += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels1[0] = ACT_OP(pixels1[0]);
+ pixels1[1] = ACT_OP(pixels1[1]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels1[0]);
@@ -745,6 +775,11 @@ void main()
pixels[1] += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
@@ -868,6 +903,11 @@ void main()
pixels[1] += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels);
}
#elif defined(PROCESS_8X_2Y_1Z)
@@ -1001,6 +1041,13 @@ void main()
pixels1[1] += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels1[0] = ACT_OP(pixels1[0]);
+ pixels1[1] = ACT_OP(pixels1[1]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels);
STORE_PACK8_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels1);
}
diff --git a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs
index 855d450335..e51cc3785a 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,6 +25,10 @@ layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z =
#include "helpers_cs.h"
+#ifdef FUSED_ACTIVATION
+#include "activation_layer_helpers_cs.h"
+#endif /* FUSED_ACTIVATION */
+
#if defined(DATA_TYPE_FP16)
precision mediump float;
#endif // DATA_TYPE_FP16
@@ -114,6 +118,10 @@ void main()
pixels += LOAD(biases_ptr, VECTOR_OFFSET(biases_iter, z_index));
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels = ACT_OP(pixels);
+#endif /* FUSED_ACTIVATION */
+
STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels);
}
@@ -238,6 +246,11 @@ void main()
pixels[1] += vec4(b);
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+#endif /* FUSED_ACTIVATION */
+
VSTORE2_CURRENT_ITEM(dst_ptr, dst_iter, pixels);
}
@@ -335,6 +348,10 @@ void main()
pixels += b;
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels = ACT_OP(pixels);
+#endif /* FUSED_ACTIVATION */
+
STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels);
}
@@ -434,6 +451,12 @@ void main()
pixels[2] += vec4(b);
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels[2] = ACT_OP(pixels[2]);
+#endif /* FUSED_ACTIVATION */
+
STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels[0]);
STORE(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
STORE(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);
@@ -601,6 +624,12 @@ void main()
}
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels[2] = ACT_OP(pixels[2]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK8_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
STORE_PACK8_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);
@@ -728,6 +757,10 @@ void main()
pixels += vec4(b);
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels = ACT_OP(pixels);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels);
}
@@ -841,6 +874,12 @@ void main()
}
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels[2] = ACT_OP(pixels[2]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);
@@ -962,6 +1001,13 @@ void main()
}
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels[2] = ACT_OP(pixels[2]);
+ pixels[3] = ACT_OP(pixels[3]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);
@@ -1087,6 +1133,13 @@ void main()
}
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels[0] = ACT_OP(pixels[0]);
+ pixels[1] = ACT_OP(pixels[1]);
+ pixels[2] = ACT_OP(pixels[2]);
+ pixels[3] = ACT_OP(pixels[3]);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);
diff --git a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution5x5.cs b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution5x5.cs
index c919e4ed80..728e9644b2 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution5x5.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution5x5.cs
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,6 +26,10 @@ layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z =
#include "helpers_cs.h"
+#ifdef FUSED_ACTIVATION
+#include "activation_layer_helpers_cs.h"
+#endif /* FUSED_ACTIVATION */
+
#if defined(DATA_TYPE_FP16)
precision mediump float;
#endif // DATA_TYPE_FP16
@@ -116,6 +120,10 @@ void main()
pixels += LOAD(biases_ptr, VECTOR_OFFSET(biases_iter, z_index));
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ pixels = ACT_OP(pixels);
+#endif /* FUSED_ACTIVATION */
+
STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels);
}
#elif defined(DATA_TYPE_FP16)
@@ -204,6 +212,10 @@ void main()
res += vec4(b);
#endif /* BIAS */
+#ifdef FUSED_ACTIVATION
+ res = ACT_OP(res);
+#endif /* FUSED_ACTIVATION */
+
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, res);
}
diff --git a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
index bef30d5042..67a1530431 100644
--- a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
@@ -50,7 +50,8 @@ BorderSize GCDirectConvolutionLayerKernel<kernel_size>::border_size(
}
template <unsigned int kernel_size>
-void GCDirectConvolutionLayerKernel<kernel_size>::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *bias, IGCTensor *output, const PadStrideInfo &conv_info)
+void GCDirectConvolutionLayerKernel<kernel_size>::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *bias, IGCTensor *output,
+ const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON(weights->info()->dimension(2) != input->info()->dimension(2));
@@ -58,6 +59,7 @@ void GCDirectConvolutionLayerKernel<kernel_size>::configure(const IGCTensor *inp
ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4);
ARM_COMPUTE_ERROR_ON_MSG((kernel_size == 3 && std::get<0>(conv_info.stride()) > 2), "Strides larger than 2 not supported in 3x3 direct convolution!");
ARM_COMPUTE_ERROR_ON(kernel_size != weights->info()->dimension(0));
+ ARM_COMPUTE_ERROR_ON(act_info.enabled() && act_info.activation() != ActivationLayerInfo::ActivationFunction::RELU && act_info.activation() != ActivationLayerInfo::ActivationFunction::LOGISTIC);
if(bias != nullptr)
{
@@ -108,6 +110,16 @@ void GCDirectConvolutionLayerKernel<kernel_size>::configure(const IGCTensor *inp
std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
options.emplace(("#define " + dt_name));
+ // Activation information in case of a fused activation
+ if(act_info.enabled())
+ {
+ options.emplace("#define FUSED_ACTIVATION");
+ options.emplace(("#define " + string_from_activation_func(act_info.activation())));
+ options.emplace(("#define ACT_OP " + lower_string(string_from_activation_func(act_info.activation())) + "_op"));
+ options.emplace(("#define A_VAL " + float_to_string_with_full_precision(act_info.a())));
+ options.emplace(("#define B_VAL " + float_to_string_with_full_precision(act_info.b())));
+ }
+
unsigned int num_elems_read_per_iteration_x = kernel_size * _conv_stride_x;
unsigned int num_elems_read_per_iteration_y = 1;
unsigned int num_elems_written_per_iteration_x = 1;
diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp
index 64bda93ff0..bcb5424aab 100644
--- a/src/runtime/CL/functions/CLConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp
@@ -43,13 +43,13 @@ CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr<IMemoryManager> memory_ma
}
void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
- const Size2D &dilation)
+ const Size2D &dilation, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
- ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation));
+ ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info));
switch(CLConvolutionLayer::get_convolution_method(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info,
- weights_info, CLScheduler::get().target(), dilation))
+ weights_info, act_info, CLScheduler::get().target(), dilation))
{
case ConvolutionMethod::DIRECT:
{
@@ -72,25 +72,25 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c
}
Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info, const Size2D &dilation)
+ const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
//Configure if the parameters match the direct convolution or the gemm-based
const GPUTarget gpu_target = CLScheduler::get().target();
- switch(CLConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, gpu_target, dilation))
+ switch(CLConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, act_info, gpu_target, dilation))
{
case ConvolutionMethod::DIRECT:
{
// Validate direct convolution layer
- CLDirectConvolutionLayer::validate(input, weights, biases, output, conv_info);
+ CLDirectConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info);
break;
}
case ConvolutionMethod::GEMM:
{
// Validate gemm-based convolution layer
- CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation);
+ CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info);
break;
}
default:
@@ -102,7 +102,7 @@ Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo
}
ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info, const GPUTarget gpu_target, const Size2D &dilation)
+ const WeightsInfo &weights_info, const ActivationLayerInfo &act_info, const GPUTarget gpu_target, const Size2D &dilation)
{
ARM_COMPUTE_UNUSED(input);
ARM_COMPUTE_UNUSED(weights);
@@ -112,6 +112,7 @@ ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo *
ARM_COMPUTE_UNUSED(weights_info);
ARM_COMPUTE_UNUSED(gpu_target);
ARM_COMPUTE_UNUSED(dilation);
+ ARM_COMPUTE_UNUSED(act_info);
return ConvolutionMethod::GEMM;
}
diff --git a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
index c48865a0cc..c451bd4b4c 100644
--- a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
@@ -33,11 +33,11 @@
using namespace arm_compute;
CLDirectConvolutionLayer::CLDirectConvolutionLayer()
- : _direct_conv_kernel(), _input_border_handler()
+ : _direct_conv_kernel(), _input_border_handler(), _activationlayer_function(), _is_activationlayer_enabled(false)
{
}
-void CLDirectConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info)
+void CLDirectConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
{
// Set GPU target
_direct_conv_kernel.set_target(CLScheduler::get().target());
@@ -55,11 +55,25 @@ void CLDirectConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weig
// Tune kernels
CLScheduler::get().tune_kernel_static(_direct_conv_kernel);
+
+ _is_activationlayer_enabled = act_info.enabled();
+
+ //Configure Activation Layer
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function.configure(output, nullptr, act_info);
+ }
}
-Status CLDirectConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info)
+Status CLDirectConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info)
{
- return CLDirectConvolutionLayerKernel::validate(input, weights, biases, output, conv_info, CLScheduler::get().target());
+ ARM_COMPUTE_RETURN_ON_ERROR(CLDirectConvolutionLayerKernel::validate(input, weights, biases, output, conv_info, CLScheduler::get().target()));
+ if(act_info.enabled())
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(output, nullptr, act_info));
+ }
+ return Status{};
}
void CLDirectConvolutionLayer::run()
@@ -69,4 +83,10 @@ void CLDirectConvolutionLayer::run()
// Run direct convolution
CLScheduler::get().enqueue(_direct_conv_kernel);
+
+ //Run Activation Layer
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function.run();
+ }
}
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
index f43e100565..084c4df718 100644
--- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
@@ -90,8 +90,8 @@ void CLConvolutionLayerReshapeWeights::run()
}
CLGEMMConvolutionLayer::CLGEMMConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(memory_manager), _reshape_weights(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), _col2im_kernel(), _original_weights(nullptr),
- _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _is_quantized(false), _is_first_run(true)
+ : _memory_group(memory_manager), _reshape_weights(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), _col2im_kernel(), _activationlayer_function(),
+ _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _is_quantized(false), _is_first_run(true), _is_activationlayer_enabled(false)
{
}
@@ -152,7 +152,7 @@ Status CLGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens
}
void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
- const Size2D &dilation)
+ const Size2D &dilation, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
@@ -162,7 +162,8 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
output->info(),
conv_info,
weights_info,
- dilation));
+ dilation,
+ act_info));
_is_first_run = true;
_original_weights = weights;
@@ -260,11 +261,19 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
// Allocate intermediate tensor
_weights_reshaped.allocator()->allocate();
+ //Configure Activation Layer
+ _is_activationlayer_enabled = act_info.enabled();
+
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function.configure(output, nullptr, act_info);
+ }
+
ARM_COMPUTE_UNUSED(weights_info);
}
Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info, const Size2D &dilation)
+ const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!");
@@ -274,6 +283,11 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(2) != input->dimension(2));
ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
+ if(act_info.enabled())
+ {
+ ARM_COMPUTE_ERROR_ON(act_info.b() > act_info.a());
+ }
+
const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
const bool append_bias = (biases != nullptr) && (!is_quantized);
const unsigned bias_element = (append_bias) ? 1 : 0;
@@ -343,6 +357,12 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
}
+ //Validate Activation Layer
+ if(act_info.enabled())
+ {
+ CLActivationLayer::validate(output, nullptr, act_info);
+ }
+
return Status{};
}
@@ -383,5 +403,11 @@ void CLGEMMConvolutionLayer::run()
// Reshape output matrix
CLScheduler::get().enqueue(_col2im_kernel, false);
+ //Run Activation Layer if enabled
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function.run();
+ }
+
_memory_group.release();
}
diff --git a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
index a861e0072e..7af36bf06b 100644
--- a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
@@ -32,11 +32,12 @@
using namespace arm_compute;
CLWinogradConvolutionLayer::CLWinogradConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(memory_manager), _batched_mm(memory_manager), _input_transform(), _filter_transform(), _output_transform(), _input0(), _input1(), _batched_mm_output(), _is_first_run(true)
+ : _memory_group(memory_manager), _batched_mm(memory_manager), _input_transform(), _filter_transform(), _output_transform(), _activationlayer_function(), _input0(), _input1(), _batched_mm_output(),
+ _is_first_run(true), _is_activationlayer_enabled(false)
{
}
-void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info)
+void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
{
// TODO(COMPMID-1013): This part will be removed
// Get indeces for the width and height
@@ -73,13 +74,21 @@ void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *we
_output_transform.configure(&_batched_mm_output, biases, output, Size2D(kernel_w, kernel_h), Size2D(output_convolved_shape[idx_width], output_convolved_shape[idx_height]), Size2D(num_tiles_x,
num_tiles_y));
+ // Configure activation layer
+ _is_activationlayer_enabled = act_info.enabled();
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function.configure(output, nullptr, act_info);
+ }
+
// Allocate temporary tensors
_input0.allocator()->allocate();
_input1.allocator()->allocate();
_batched_mm_output.allocator()->allocate();
}
-Status CLWinogradConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info)
+Status CLWinogradConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info)
{
// TODO(COMPMID-1013): This part will be removed
// Get indeces for the width and height
@@ -107,17 +116,23 @@ Status CLWinogradConvolutionLayer::validate(const ITensorInfo *input, const ITen
const TensorInfo input1 = weights->clone()->set_tensor_shape(input1_shape);
ARM_COMPUTE_RETURN_ON_ERROR(CLWinogradFilterTransformKernel::validate(weights, &input1, Size2D(2U, 2U)));
- // Configure batched matrix multiply
+ // Validate batched matrix multiply
TensorShape batched_mm_output_shape = input0.tensor_shape();
batched_mm_output_shape[0] = input1.tensor_shape()[0];
const TensorInfo batched_mm_output = input0.clone()->set_tensor_shape(batched_mm_output_shape);
ARM_COMPUTE_RETURN_ON_ERROR(CLGEMM::validate(&input0, &input1, nullptr, &batched_mm_output, 1.0f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run*/)));
- // Configure output transform
+ // Validate output transform
ARM_COMPUTE_RETURN_ON_ERROR(CLWinogradOutputTransformKernel::validate(&batched_mm_output, biases, output, Size2D(kernel_w, kernel_h), Size2D(output_convolved_shape[idx_width],
output_convolved_shape[idx_height]),
Size2D(num_tiles_x, num_tiles_y)));
+ // Validate Activation Layer
+ if(act_info.enabled())
+ {
+ CLActivationLayer::validate(output, nullptr, act_info);
+ }
+
return Status{};
}
@@ -142,5 +157,10 @@ void CLWinogradConvolutionLayer::run()
// Run output transform
CLScheduler::get().enqueue(_output_transform);
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function.run();
+ }
+
_memory_group.release();
}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
index c2b7e02284..b1c8665216 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
@@ -92,8 +92,9 @@ void GCConvolutionLayerReshapeWeights::run()
}
GCConvolutionLayer::GCConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _reshape_weights(), _input_im2col_kernel(), _input_interleave_kernel(), _mm_kernel(), _output_col2im_kernel(), _fill_border(), _input_im2col_reshaped(),
- _input_interleaved_reshaped(), _weights_reshaped(), _weights_transposed(), _gemm_output(), _tmp_output(), _append_bias(false), _is_fully_connected_convolution(false), _are_weights_reshaped(false)
+ : _memory_group(std::move(memory_manager)), _reshape_weights(), _input_im2col_kernel(), _input_interleave_kernel(), _mm_kernel(), _output_col2im_kernel(), _fill_border(), _activationlayer_function(),
+ _input_im2col_reshaped(), _input_interleaved_reshaped(), _weights_reshaped(), _weights_transposed(), _gemm_output(), _tmp_output(), _append_bias(false), _is_fully_connected_convolution(false),
+ _are_weights_reshaped(false), _is_activationlayer_enabled(false)
{
}
@@ -103,7 +104,7 @@ void GCConvolutionLayer::configure_mm(const IGCTensor *input, const IGCTensor *w
}
void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
- const Size2D &dilation)
+ const Size2D &dilation, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
@@ -256,6 +257,14 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig
{
_weights_reshaped.allocator()->allocate();
}
+
+ //Configure Activation Layer
+ _is_activationlayer_enabled = act_info.enabled();
+
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function.configure(output, nullptr, act_info);
+ }
}
void GCConvolutionLayer::run()
@@ -290,4 +299,11 @@ void GCConvolutionLayer::run()
GCScheduler::get().dispatch(_output_col2im_kernel, false);
_memory_group.release();
+
+ GCScheduler::get().memory_barrier();
+ // Run Activation Layer
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function.run();
+ }
}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp
index a2607d4c2d..c0cf09836f 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp
@@ -39,26 +39,27 @@ GCDirectConvolutionLayer::GCDirectConvolutionLayer()
{
}
-void GCDirectConvolutionLayer::configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info)
+void GCDirectConvolutionLayer::configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info)
{
int kernel_size = weights->info()->dimension(0);
if(kernel_size == 1)
{
auto k = arm_compute::support::cpp14::make_unique<GCDirectConvolutionLayer1x1Kernel>();
- k->configure(input, weights, biases, output, conv_info);
+ k->configure(input, weights, biases, output, conv_info, act_info);
_kernel = std::move(k);
}
else if(kernel_size == 3)
{
auto k = arm_compute::support::cpp14::make_unique<GCDirectConvolutionLayer3x3Kernel>();
- k->configure(input, weights, biases, output, conv_info);
+ k->configure(input, weights, biases, output, conv_info, act_info);
_kernel = std::move(k);
}
else if(kernel_size == 5)
{
auto k = arm_compute::support::cpp14::make_unique<GCDirectConvolutionLayer5x5Kernel>();
- k->configure(input, weights, biases, output, conv_info);
+ k->configure(input, weights, biases, output, conv_info, act_info);
_kernel = std::move(k);
}
else
@@ -79,4 +80,6 @@ void GCDirectConvolutionLayer::run()
GCScheduler::get().dispatch(_border_handler, false);
GCScheduler::get().memory_barrier();
GCScheduler::get().dispatch(*_kernel);
+ GCScheduler::get().memory_barrier();
+ GCScheduler::get().dispatch(_shift_handler);
}
diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
index e659495b7c..badeb07405 100644
--- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
@@ -41,33 +41,33 @@ NEConvolutionLayer::NEConvolutionLayer(std::shared_ptr<IMemoryManager> memory_ma
}
void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
- const Size2D &dilation)
+ const Size2D &dilation, const ActivationLayerInfo &act_info)
{
// Perform validate step
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
- ARM_COMPUTE_ERROR_THROW_ON(NEConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation));
+ ARM_COMPUTE_ERROR_THROW_ON(NEConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info));
switch(NEConvolutionLayer::get_convolution_method(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info,
- weights_info, dilation))
+ weights_info, dilation, act_info))
{
case ConvolutionMethod::WINOGRAD:
{
auto f = arm_compute::support::cpp14::make_unique<NEWinogradLayer>(_memory_manager);
- f->configure(input, weights, biases, output, conv_info);
+ f->configure(input, weights, biases, output, conv_info, act_info);
_function = std::move(f);
break;
}
case ConvolutionMethod::GEMM:
{
auto f = arm_compute::support::cpp14::make_unique<NEGEMMConvolutionLayer>(_memory_manager);
- f->configure(input, weights, biases, output, conv_info, weights_info, dilation);
+ f->configure(input, weights, biases, output, conv_info, weights_info, dilation, act_info);
_function = std::move(f);
break;
}
case ConvolutionMethod::DIRECT:
{
auto f = arm_compute::support::cpp14::make_unique<NEDirectConvolutionLayer>(_memory_manager);
- f->configure(input, weights, biases, output, conv_info);
+ f->configure(input, weights, biases, output, conv_info, act_info);
_function = std::move(f);
break;
}
@@ -78,9 +78,9 @@ void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const
}
Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info, const Size2D &dilation)
+ const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info)
{
- switch(NEConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, dilation))
+ switch(NEConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, dilation, act_info))
{
case ConvolutionMethod::WINOGRAD:
//Validate Winograd
@@ -88,11 +88,11 @@ Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo
break;
case ConvolutionMethod::GEMM:
//Validate Gemm-based Convolution
- NEGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation);
+ NEGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info);
break;
case ConvolutionMethod::DIRECT:
//Validate Gemm-based Convolution
- NEDirectConvolutionLayer::validate(input, weights, biases, output, conv_info);
+ NEDirectConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info);
default:
ARM_COMPUTE_ERROR("Not supported.");
break;
@@ -102,10 +102,12 @@ Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo
}
ConvolutionMethod NEConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info, const Size2D &dilation)
+ const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_UNUSED(output);
ARM_COMPUTE_UNUSED(weights_info);
+ ARM_COMPUTE_UNUSED(act_info);
+
if((input->data_type() == DataType::F32) && (weights->dimension(0) == 3) && (weights->dimension(1) == 3) && (weights->num_dimensions() <= 4) && (conv_info.stride().first == 1)
&& (conv_info.stride().second == 1) && (biases != nullptr) && (dilation == Size2D(1U, 1U)))
{
diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
index c26c99a0f8..00776d7cf6 100644
--- a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,11 +34,12 @@
using namespace arm_compute;
NEDirectConvolutionLayer::NEDirectConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _output_stage_kernel(), _conv_kernel(), _input_border_handler(), _accumulator(), _has_bias(false), _is_fixed_point(false)
+ : _memory_group(std::move(memory_manager)), _output_stage_kernel(), _conv_kernel(), _input_border_handler(), _activationlayer_function(), _accumulator(), _has_bias(false), _is_fixed_point(false),
+ _is_activationlayer_enabled(false)
{
}
-void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &conv_info)
+void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
{
// Free accumulator
if(_accumulator.buffer() != nullptr)
@@ -73,9 +74,17 @@ void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights,
// Add zero padding XY
_input_border_handler.configure(input, _conv_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<float>(0.f)));
+
+ //Configure Activation Layer
+ _is_activationlayer_enabled = act_info.enabled();
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function.configure(output, nullptr, act_info);
+ }
}
-Status NEDirectConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &conv_info)
+Status NEDirectConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
@@ -101,6 +110,11 @@ Status NEDirectConvolutionLayer::validate(const ITensorInfo *input, const ITenso
// Validate bias kernel
ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&accumulator, bias, output));
+ if(act_info.enabled())
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
+ }
+
return Status{};
}
@@ -115,5 +129,10 @@ void NEDirectConvolutionLayer::run()
{
NEScheduler::get().schedule(&_output_stage_kernel, Window::DimY);
}
+
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function.run();
+ }
_memory_group.release();
}
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index cdbd32373a..c339947633 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -165,10 +165,11 @@ TensorShape get_reshaped_weights_shape_conv(const ITensorInfo *weights, bool app
}
}
-Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, DataType &dt,
+Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
+ const ActivationLayerInfo &act_info, DataType &dt,
bool &append_bias,
bool &are_weights_reshaped, unsigned int &kernel_width, unsigned int &kernel_height,
- bool &is_fully_connected_convolution, bool &is_interleaved, bool &is_quantized,
+ bool &is_fully_connected_convolution, bool &is_interleaved, bool &is_quantized, bool &is_activationlayer_enabled,
unsigned int &mat_weights_cols, unsigned int &mat_weights_rows,
unsigned int &conv_w, unsigned int &conv_h, const Size2D &dilation)
{
@@ -210,6 +211,7 @@ Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInf
// Check if its a "fully connected" convolution
is_fully_connected_convolution = ((conv_w == 1) && (conv_h == 1));
is_interleaved = (!is_fully_connected_convolution && !is_quantized);
+ is_activationlayer_enabled = act_info.enabled();
return Status{};
}
@@ -217,8 +219,8 @@ Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInf
NEGEMMConvolutionLayer::NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager)
: _asm_glue(), _memory_group(memory_manager), _input_im2col_kernel(), _input_interleave_kernel(), _reshape_weights(), _mm_kernel(), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(),
- _output_col2im_kernel(), _original_weights(nullptr), _input_im2col_reshaped(), _input_interleaved_reshaped(), _weights_reshaped(), _gemm_output(), _tmp_output(), _workspace(), _append_bias(false),
- _is_fully_connected_convolution(false), _are_weights_reshaped(false), _is_quantized(false), _is_interleaved(false)
+ _output_col2im_kernel(), _activationlayer_function(), _original_weights(nullptr), _input_im2col_reshaped(), _input_interleaved_reshaped(), _weights_reshaped(), _gemm_output(), _tmp_output(),
+ _workspace(), _append_bias(false), _is_fully_connected_convolution(false), _are_weights_reshaped(false), _is_quantized(false), _is_interleaved(false), _is_activationlayer_enabled(false)
{
}
@@ -247,7 +249,7 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w
}
void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
- const Size2D &dilation)
+ const Size2D &dilation, const ActivationLayerInfo &act_info)
{
// Perform validate step
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
@@ -260,9 +262,10 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
unsigned int conv_w = 0;
unsigned int conv_h = 0;
- Status status = validate_and_initialize_values(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(), conv_info, weights_info, dt, _append_bias, _are_weights_reshaped,
+ Status status = validate_and_initialize_values(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(), conv_info, weights_info, act_info, dt, _append_bias,
+ _are_weights_reshaped,
kernel_width, kernel_height,
- _is_fully_connected_convolution, _is_interleaved, _is_quantized,
+ _is_fully_connected_convolution, _is_interleaved, _is_quantized, _is_activationlayer_enabled,
mat_weights_cols, mat_weights_rows, conv_w, conv_h, dilation);
ARM_COMPUTE_ERROR_THROW_ON(status);
@@ -420,10 +423,16 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
{
_weights_reshaped.allocator()->allocate();
}
+
+ //Configure Activation Layer
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function.configure(output, nullptr, act_info);
+ }
}
Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info, const Size2D &dilation)
+ const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_UNUSED(output);
@@ -433,6 +442,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
bool is_fully_connected_convolution{};
bool is_interleaved{};
bool is_quantized{};
+ bool is_activationlayer_enabled{};
unsigned int kernel_width = 0;
unsigned int kernel_height = 0;
unsigned int mat_weights_cols = 0;
@@ -440,8 +450,8 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
unsigned int conv_w = 0;
unsigned int conv_h = 0;
- Status status = validate_and_initialize_values(input, weights, biases, conv_info, weights_info, dt, append_bias, are_weights_reshaped, kernel_width, kernel_height,
- is_fully_connected_convolution, is_interleaved, is_quantized, mat_weights_cols, mat_weights_rows,
+ Status status = validate_and_initialize_values(input, weights, biases, conv_info, weights_info, act_info, dt, append_bias, are_weights_reshaped, kernel_width, kernel_height,
+ is_fully_connected_convolution, is_interleaved, is_quantized, is_activationlayer_enabled, mat_weights_cols, mat_weights_rows,
conv_w, conv_h, dilation);
const Size2D kernel_weights = Size2D(kernel_width, kernel_height);
@@ -536,6 +546,15 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixMultiplyKernel::validate(&im2_col_info, weights, &gemm_output_info, 1.f, is_interleaved, GEMMReshapeInfo()));
}
+ ARM_COMPUTE_RETURN_ON_ERROR(NECol2ImKernel::validate(&gemm_output_info, output, Size2D(conv_w, conv_h)));
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG((output->dimension(0) != conv_w) || (output->dimension(1) != conv_h), "Output shape does not match the expected one");
+
+ if(act_info.enabled())
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
+ }
+
return Status{};
}
@@ -591,6 +610,11 @@ void NEGEMMConvolutionLayer::run()
// Reshape output matrix
NEScheduler::get().schedule(&_output_col2im_kernel, Window::DimY);
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function.run();
+ }
+
_memory_group.release();
}
} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEWinogradLayer.cpp b/src/runtime/NEON/functions/NEWinogradLayer.cpp
index 0a344f0cae..f82845c7ad 100644
--- a/src/runtime/NEON/functions/NEWinogradLayer.cpp
+++ b/src/runtime/NEON/functions/NEWinogradLayer.cpp
@@ -75,13 +75,13 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
} //namespace
NEWinogradLayer::NEWinogradLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _batched_gemm_kernel(nullptr), _transform_input_kernel(nullptr), _transform_output_kernel(nullptr), _transform_weights_kernel(nullptr), _permute_input(),
- _permute_weights(), _permute_output(), _input_workspace(), _output_workspace(), _kernel_storage(), _input_nhwc(), _output_nhwc(), _weights_hwio(), _input(), _weights(), _output(),
- _reshaped_kernel(false)
+ : _memory_group(std::move(memory_manager)), _batched_gemm_kernel(nullptr), _transform_input_kernel(nullptr), _transform_output_kernel(nullptr), _transform_weights_kernel(nullptr),
+ _activationlayer_function(), _permute_input(), _permute_weights(), _permute_output(), _input_workspace(), _output_workspace(), _kernel_storage(), _input_nhwc(), _output_nhwc(), _weights_hwio(),
+ _input(), _weights(), _output(), _reshaped_kernel(false), _is_activationlayer_enabled(false)
{
} /* arm_compute */
-void NEWinogradLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info)
+void NEWinogradLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, biases, output);
ARM_COMPUTE_UNUSED(conv_info);
@@ -217,6 +217,13 @@ void NEWinogradLayer::configure(const ITensor *input, const ITensor *weights, co
_transform_weights_kernel = std::move(transform_weights_kernel);
_transform_output_kernel = std::move(transform_output_kernel);
_batched_gemm_kernel = std::move(batched_gemm_kernel);
+
+ //Configure Activation Layer
+ _is_activationlayer_enabled = act_info.enabled();
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function.configure(output, nullptr, act_info);
+ }
}
void NEWinogradLayer::run()
@@ -242,6 +249,12 @@ void NEWinogradLayer::run()
// Reorder the convoluted output to ACL's ordering NCHW
_permute_output.run();
+
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function.run();
+ }
+
_memory_group.release();
}