aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2017-06-20 09:07:21 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 14:14:20 +0100
commitb30dcc5ab8eb2bd37f0ab742af1ec45113d54296 (patch)
treedfaac2b07af3ffb838d3ed559bacf8f37da3b592
parent9ea47f6967ce1c9e4a8bf4174613efdec78a5f44 (diff)
downloadComputeLibrary-b30dcc5ab8eb2bd37f0ab742af1ec45113d54296.tar.gz
COMPMID-345 - In-place computation for Activation Layer
Change-Id: I25ebfccc3d3e758cc8164e0b33805c0bb303891a Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78226 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Pablo Tello <pablo.tello@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
-rw-r--r--arm_compute/core/CL/kernels/CLActivationLayerKernel.h34
-rw-r--r--arm_compute/core/NEON/kernels/NEActivationLayerKernel.h17
-rw-r--r--arm_compute/runtime/CL/functions/CLActivationLayer.h11
-rw-r--r--arm_compute/runtime/NEON/functions/NEActivationLayer.h11
-rw-r--r--src/core/CL/cl_kernels/activation_layer.cl18
-rw-r--r--src/core/CL/kernels/CLActivationLayerKernel.cpp67
-rw-r--r--src/core/NEON/kernels/NEActivationLayerKernel.cpp51
-rw-r--r--src/runtime/CL/functions/CLActivationLayer.cpp2
-rw-r--r--src/runtime/NEON/functions/NEActivationLayer.cpp2
9 files changed, 166 insertions, 47 deletions
diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
index 490e70544b..df22574de8 100644
--- a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
@@ -24,23 +24,45 @@
#ifndef __ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__
#define __ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__
-#include "arm_compute/core/CL/ICLSimple3DKernel.h"
+#include "arm_compute/core/CL/ICLKernel.h"
namespace arm_compute
{
class ICLTensor;
/** Interface for the activation layer kernel. */
-class CLActivationLayerKernel : public ICLSimple3DKernel
+class CLActivationLayerKernel : public ICLKernel
{
public:
+ /** Default constructor */
+ CLActivationLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLActivationLayerKernel(const CLActivationLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLActivationLayerKernel &operator=(const CLActivationLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLActivationLayerKernel(CLActivationLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLActivationLayerKernel &operator=(CLActivationLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLActivationLayerKernel() = default;
/** Set the input and output tensor.
*
- * @param[in] input Source tensor. Data types supported: F16, F32, U16, S16.
- * @param[out] output Destination tensor. Data type should match the input data type.
- * @param[in] act_info Activation layer information.
+ * @note If the output tensor is a nullptr, the activation function will be performed in-place
+ *
+ * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
+ * of the activation function. Data types supported: F16/F32.
+ * @param[out] output Destination tensor. Data type should match the input data type.
+ * @param[in] act_info Activation layer information.
*/
- void configure(const ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info);
+ void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_input;
+ ICLTensor *_output;
};
}
#endif /*__ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
index 97f92d6a1e..539bca587a 100644
--- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
@@ -25,14 +25,14 @@
#define __ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H__
#include "arm_compute/core/FixedPoint.h"
-#include "arm_compute/core/NEON/INESimpleKernel.h"
+#include "arm_compute/core/NEON/INEKernel.h"
namespace arm_compute
{
class ITensor;
/** Interface for the activation layer kernel. */
-class NEActivationLayerKernel : public INESimpleKernel
+class NEActivationLayerKernel : public INEKernel
{
public:
/** Constructor */
@@ -47,11 +47,14 @@ public:
NEActivationLayerKernel &operator=(NEActivationLayerKernel &&) = default;
/** Set the input and output tensor.
*
- * @param[in] input Source tensor. Data types supported: QS8/F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] activation_info Activation layer information.
+ * @note If the output tensor is a nullptr, the activation function will be performed in-place
+ *
+ * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
+ * of the activation function. Data types supported: QS8/F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] activation_info Activation layer information.
*/
- void configure(const ITensor *input, ITensor *output, ActivationLayerInfo activation_info);
+ void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info);
// Inherited methods overridden:
void run(const Window &window) override;
@@ -77,6 +80,8 @@ private:
typename std::enable_if<std::is_same<T, qint8_t>::value, void>::type activation(const Window &window);
private:
+ ITensor *_input;
+ ITensor *_output;
ActivationFunctionExecutorPtr _func;
ActivationLayerInfo _act_info;
};
diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h
index 6468c996a2..3028afb25b 100644
--- a/arm_compute/runtime/CL/functions/CLActivationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h
@@ -41,11 +41,14 @@ class CLActivationLayer : public ICLSimpleFunction
public:
/** Set the input and output tensor.
*
- * @param[in] input Source tensor. Data types supported: F16, F32, U16, S16.
- * @param[out] output Destination tensor. Data type should match the input data type.
- * @param[in] act_info Activation layer parameters.
+ * @note If the output tensor is a nullptr, the activation function will be performed in-place
+ *
+ * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
+ * of the activation function. Data types supported: F16/F32.
+ * @param[out] output Destination tensor. Data type should match the input data type.
+ * @param[in] act_info Activation layer parameters.
*/
- void configure(const ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info);
+ void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info);
};
}
#endif /* __ARM_COMPUTE_CLACTIVATIONLAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
index 35366e16fb..b1a211553d 100644
--- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
@@ -41,11 +41,14 @@ class NEActivationLayer : public INESimpleFunction
public:
/** Set the input and output tensor.
*
- * @param[in] input Source tensor. Data type supported: QS8/F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] activation_info Activation layer parameters.
+ * @note If the output tensor is a nullptr, the activation function will be performed in-place
+ *
+ * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
+ * of the activation function. Data types supported: QS8/F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] activation_info Activation layer parameters.
*/
- void configure(const ITensor *input, ITensor *output, ActivationLayerInfo activation_info);
+ void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info);
};
}
#endif /* __ARM_COMPUTE_NEACTIVATIONLAYER_H__ */
diff --git a/src/core/CL/cl_kernels/activation_layer.cl b/src/core/CL/cl_kernels/activation_layer.cl
index e3cbb6c801..136191aa22 100644
--- a/src/core/CL/cl_kernels/activation_layer.cl
+++ b/src/core/CL/cl_kernels/activation_layer.cl
@@ -25,6 +25,8 @@
/** This performs an activation function floating point inputs.
*
+ * @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time
+ *
* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
* @note Activation function should be given as a preprocessor argument using -DNAME. e.g. -DTANH
* @note Distinction between floating point and integer is done using -DTYPE_FP and -DTYPE_INT preprocessor argument
@@ -48,12 +50,20 @@
* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
*/
__kernel void activation_layer(
- TENSOR3D_DECLARATION(input),
- TENSOR3D_DECLARATION(output))
+ TENSOR3D_DECLARATION(input)
+#if !defined IN_PLACE
+ ,
+ TENSOR3D_DECLARATION(output)
+#endif
+)
{
// Get pixels pointer
- Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
+ Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
+#if defined IN_PLACE
+ Tensor3D output = input;
+#else
Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
+#endif
// Load data
VEC_DATA_TYPE(DATA_TYPE, 16)
@@ -63,7 +73,7 @@ __kernel void activation_layer(
#if defined LOGISTIC
data = 1 / (1 + exp(-data));
#elif defined TANH
- data = (VEC_DATA_TYPE(DATA_TYPE, 16))A * tanh((VEC_DATA_TYPE(DATA_TYPE, 16))B * data);
+ data = (VEC_DATA_TYPE(DATA_TYPE, 16))A * tanh((VEC_DATA_TYPE(DATA_TYPE, 16))B * data);
#elif defined RELU
data = max(0, data);
#elif defined BRELU
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp
index 83bbe6a3be..6439426e83 100644
--- a/src/core/CL/kernels/CLActivationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp
@@ -35,17 +35,24 @@
using namespace arm_compute;
-void CLActivationLayerKernel::configure(const ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
+CLActivationLayerKernel::CLActivationLayerKernel()
+ : _input(nullptr), _output(nullptr)
+{
+}
+
+void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_NULLPTR(output);
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
+ if(output != nullptr)
+ {
+ // Output auto inizialitation if not yet initialized
+ auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
- ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
+ }
// Set build options
std::set<std::string> build_opts;
@@ -54,11 +61,55 @@ void CLActivationLayerKernel::configure(const ICLTensor *input, ICLTensor *outpu
build_opts.insert(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
build_opts.insert(("-DA=" + val_to_string(act_info.a())));
build_opts.insert(("-DB=" + val_to_string(act_info.b())));
+ build_opts.insert(output == nullptr ? "-DIN_PLACE" : "");
// Create kernel
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("activation_layer", build_opts));
// Make sure _kernel is initialized before calling the parent's configure
constexpr unsigned int num_elems_processed_per_iteration = 16;
- ICLSimple3DKernel::configure(input, output, num_elems_processed_per_iteration);
+
+ _input = input;
+ _output = output;
+
+ // Configure kernel window
+ Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
+
+ if(output != nullptr)
+ {
+ AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+
+ update_window_and_padding(win,
+ AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration),
+ output_access);
+
+ output_access.set_valid_region(win, input->info()->valid_region());
+ }
+ else
+ {
+ update_window_and_padding(win,
+ AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration));
+ }
+
+ ICLKernel::configure(win);
+}
+
+void CLActivationLayerKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+ Window slice = window.first_slice_window_3D();
+
+ do
+ {
+ unsigned int idx = 0;
+ add_3D_tensor_argument(idx, _input, slice);
+ if(_output != nullptr)
+ {
+ add_3D_tensor_argument(idx, _output, slice);
+ }
+ enqueue(queue, *this, slice);
+ }
+ while(window.slide_window_slice_3D(slice));
}
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index a878078007..1bd0353b93 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -41,21 +41,29 @@
using namespace arm_compute;
NEActivationLayerKernel::NEActivationLayerKernel()
- : _func(nullptr), _act_info(ActivationFunction::LOGISTIC)
+ : _input(nullptr), _output(nullptr), _func(nullptr), _act_info(ActivationFunction::LOGISTIC)
{
}
-void NEActivationLayerKernel::configure(const ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
+void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::QS8);
- ARM_COMPUTE_ERROR_ON_NULLPTR(output);
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
+ _input = input;
+ _act_info = activation_info;
+ _output = input;
+
+ if(output != nullptr)
+ {
+ // Output auto inizialitation if not yet initialized
+ auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
- ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
+
+ _output = output;
+ }
// Activation functions : FP32
static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f32 =
@@ -85,9 +93,6 @@ void NEActivationLayerKernel::configure(const ITensor *input, ITensor *output, A
{ ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qint8_t> },
};
- _input = input;
- _output = output;
- _act_info = activation_info;
switch(input->info()->data_type())
{
case DataType::F32:
@@ -102,7 +107,27 @@ void NEActivationLayerKernel::configure(const ITensor *input, ITensor *output, A
constexpr unsigned int num_elems_processed_per_iteration = 16;
- INESimpleKernel::configure(_input, _output, num_elems_processed_per_iteration);
+ // Configure kernel window
+ Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
+
+ if(output != nullptr)
+ {
+ AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+
+ update_window_and_padding(win,
+ AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration),
+ output_access);
+
+ output_access.set_valid_region(win, input->info()->valid_region());
+ }
+ else
+ {
+ // In-place computation
+ update_window_and_padding(win,
+ AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration));
+ }
+
+ ICPPKernel::configure(win);
}
template <ActivationLayerInfo::ActivationFunction F, typename T>
@@ -295,7 +320,7 @@ typename std::enable_if<std::is_same<T, int8_t>::value, void>::type NEActivation
void NEActivationLayerKernel::run(const Window &window)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
ARM_COMPUTE_ERROR_ON(_func == nullptr);
(this->*_func)(window);
diff --git a/src/runtime/CL/functions/CLActivationLayer.cpp b/src/runtime/CL/functions/CLActivationLayer.cpp
index 9b5bd8b663..0d0da0ca9a 100644
--- a/src/runtime/CL/functions/CLActivationLayer.cpp
+++ b/src/runtime/CL/functions/CLActivationLayer.cpp
@@ -28,7 +28,7 @@
using namespace arm_compute;
-void CLActivationLayer::configure(const ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
+void CLActivationLayer::configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
{
auto k = arm_compute::cpp14::make_unique<CLActivationLayerKernel>();
k->configure(input, output, act_info);
diff --git a/src/runtime/NEON/functions/NEActivationLayer.cpp b/src/runtime/NEON/functions/NEActivationLayer.cpp
index f5d81d7cd8..447ae6411b 100644
--- a/src/runtime/NEON/functions/NEActivationLayer.cpp
+++ b/src/runtime/NEON/functions/NEActivationLayer.cpp
@@ -28,7 +28,7 @@
using namespace arm_compute;
-void NEActivationLayer::configure(const ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
+void NEActivationLayer::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
{
auto k = arm_compute::cpp14::make_unique<NEActivationLayerKernel>();
k->configure(input, output, activation_info);