aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2018-02-13 15:24:04 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:47:18 +0000
commitdde9ec96f471127e5b6d8dfaeffce024b6326f1a (patch)
tree3aa88c0dec625feeb9d17da825b87398cac6cc68
parente3fba0afa892c66379da1e3d3843f2155a1fb29a (diff)
downloadComputeLibrary-dde9ec96f471127e5b6d8dfaeffce024b6326f1a.tar.gz
COMPMID-909: Enabling in-place computation for batchnormalization and activation at graph level
Change-Id: I84d4a212629b21794451ab5fb5c5b187b5e28f98 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120127 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/CL/kernels/CLActivationLayerKernel.h1
-rw-r--r--arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h1
-rw-r--r--arm_compute/graph/INode.h13
-rw-r--r--arm_compute/graph/NodeContext.h4
-rw-r--r--arm_compute/graph/nodes/BatchNormalizationLayer.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLActivationLayer.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h2
-rw-r--r--arm_compute/runtime/NEON/functions/NEActivationLayer.h4
-rw-r--r--arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h2
-rw-r--r--src/core/CL/kernels/CLActivationLayerKernel.cpp21
-rw-r--r--src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp17
-rw-r--r--src/graph/Graph.cpp13
-rw-r--r--src/graph/INode.cpp10
-rw-r--r--src/graph/SubGraph.cpp6
-rw-r--r--src/graph/nodes/ActivationLayer.cpp3
-rw-r--r--src/runtime/CL/functions/CLActivationLayer.cpp2
-rw-r--r--src/runtime/NEON/functions/NEActivationLayer.cpp2
17 files changed, 74 insertions, 32 deletions
diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
index fbe450c4f2..c6d8f96a87 100644
--- a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
@@ -73,6 +73,7 @@ public:
private:
ICLTensor *_input;
ICLTensor *_output;
+ bool _run_in_place;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
index fee5dd3bae..e9fd564fbd 100644
--- a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
@@ -96,6 +96,7 @@ private:
const ICLTensor *_beta;
const ICLTensor *_gamma;
float _epsilon;
+ bool _run_in_place;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/graph/INode.h b/arm_compute/graph/INode.h
index 56b50b9424..1969423074 100644
--- a/arm_compute/graph/INode.h
+++ b/arm_compute/graph/INode.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -58,6 +58,16 @@ public:
* @return The updated target hint
*/
TargetHint override_target_hint(TargetHint target_hint) const;
+ /** Method to check if the node supports in-place operations.
+ *
+ * @return True if the node supports in-place operations, false otherwise.
+ */
+ virtual bool supports_in_place() const;
+ /** Set the value of the _supports_in_place attribute.
+ *
+ * @param[in] value Boolean value to assign to _supports_in_place.
+ */
+ void set_supports_in_place(bool value);
protected:
/** Interface to be implement that override the hints
@@ -70,6 +80,7 @@ protected:
protected:
TargetHint _target_hint{ TargetHint::DONT_CARE };
+ bool _supports_in_place{ false };
};
} // namespace graph
} // namespace arm_compute
diff --git a/arm_compute/graph/NodeContext.h b/arm_compute/graph/NodeContext.h
index bc90f217a7..17ae49740b 100644
--- a/arm_compute/graph/NodeContext.h
+++ b/arm_compute/graph/NodeContext.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -60,7 +60,7 @@ public:
* @param[in] input Input to add
*/
void add_input(arm_compute::ITensor *input);
- /** Adds and output to the context
+ /** Adds an output to the context
*
* @param[in] output Output to add
*/
diff --git a/arm_compute/graph/nodes/BatchNormalizationLayer.h b/arm_compute/graph/nodes/BatchNormalizationLayer.h
index 266c3905d8..abbf09a54e 100644
--- a/arm_compute/graph/nodes/BatchNormalizationLayer.h
+++ b/arm_compute/graph/nodes/BatchNormalizationLayer.h
@@ -51,6 +51,7 @@ public:
BatchNormalizationLayer(AccessorType &&mean, AccessorType &&var, AccessorType &&gamma, AccessorType &&beta, float epsilon, ActivationLayerInfo act_info = ActivationLayerInfo())
: _mean(std::move(mean)), _var(std::move(var)), _gamma(std::move(gamma)), _beta(std::move(beta)), _epsilon(epsilon), _act_info(act_info)
{
+ set_supports_in_place(true);
}
// Inherited methods overriden:
diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h
index 5b99abc5fb..8cb41d225a 100644
--- a/arm_compute/runtime/CL/functions/CLActivationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,7 +41,7 @@ class CLActivationLayer : public ICLSimpleFunction
public:
/** Set the input and output tensor.
*
- * @note If the output tensor is a nullptr, the activation function will be performed in-place
+ * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
*
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
* of the activation function. Data types supported: QS8/QS16/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
index 3d5145a697..39f567d6a3 100644
--- a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
@@ -46,7 +46,7 @@ public:
CLBatchNormalizationLayer();
/** Set the input and output tensors.
*
- * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place
+ * @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place
*
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
* 3 lower dimensions represent a single input with dimensions [width, height, FM].
diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
index 007c53a0a8..31714216fb 100644
--- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,7 +41,7 @@ class NEActivationLayer : public INESimpleFunction
public:
/** Set the input and output tensor.
*
- * @note If the output tensor is a nullptr, the activation function will be performed in-place
+ * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
*
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
* of the activation function. Data types supported: QS8/QS16/F32.
diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
index 242144c987..85c62663ab 100644
--- a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
@@ -46,7 +46,7 @@ public:
NEBatchNormalizationLayer();
/** Set the input and output tensors.
*
- * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place
+ * @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place
*
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
* 3 lower dimensions represent a single input with dimensions [width, height, FM].
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp
index 8d4c0b82d2..a78b3e1b93 100644
--- a/src/core/CL/kernels/CLActivationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp
@@ -96,7 +96,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
} // namespace
CLActivationLayerKernel::CLActivationLayerKernel()
- : _input(nullptr), _output(nullptr)
+ : _input(nullptr), _output(nullptr), _run_in_place(false)
{
}
@@ -104,6 +104,8 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input);
+ _run_in_place = (output == nullptr) || (output == input);
+
if(output != nullptr)
{
// Output auto inizialitation if not yet initialized
@@ -147,12 +149,15 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
build_opts.emplace(("-DA_VAL=" + support::cpp11::to_string(a_const_int)));
build_opts.emplace(("-DB_VAL=" + support::cpp11::to_string(b_const_int)));
+ const int o1 = input->info()->quantization_info().offset;
+ // Quantized value of 0 corresponds to the offset o1
+ build_opts.emplace(("-DCONST_0=" + support::cpp11::to_string(o1)));
+
// Set scale and offset of the input and output if they have different quantization info
if(is_data_type_quantized_asymmetric(dt) && output != nullptr)
{
const float s1 = input->info()->quantization_info().scale;
const float s2 = output->info()->quantization_info().scale;
- const int o1 = input->info()->quantization_info().offset;
const int o2 = output->info()->quantization_info().offset;
if(o1 != o2 || s1 != s2)
@@ -162,9 +167,6 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
build_opts.emplace(("-DO1_VAL=" + support::cpp11::to_string(o1)));
build_opts.emplace(("-DO2_VAL=" + support::cpp11::to_string(o2)));
}
-
- // Quantized value of 0 corresponds to the offset o1
- build_opts.emplace(("-DCONST_0=" + support::cpp11::to_string(o1)));
}
}
else
@@ -173,7 +175,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
build_opts.emplace(("-DB_VAL=" + float_to_string_with_full_precision(b_const)));
}
- build_opts.emplace(output == nullptr ? "-DIN_PLACE" : "");
+ build_opts.emplace((_run_in_place) ? "-DIN_PLACE" : "");
if(is_data_type_fixed_point(dt))
{
build_opts.emplace(("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(fixed_point_position)));
@@ -188,7 +190,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
_output = output;
// Configure kernel window
- auto win_config = validate_and_configure_window(input->info(), (output == nullptr) ? nullptr : output->info());
+ auto win_config = validate_and_configure_window(input->info(), (_run_in_place) ? nullptr : output->info());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
ICLKernel::configure(win_config.second);
@@ -203,8 +205,9 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
Status CLActivationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
{
+ const bool run_in_place = (output == nullptr) || (output == input);
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, act_info));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (output == nullptr) ? nullptr : output->clone().get()).first);
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (run_in_place) ? nullptr : output->clone().get()).first);
return Status{};
}
@@ -221,7 +224,7 @@ void CLActivationLayerKernel::run(const Window &window, cl::CommandQueue &queue)
{
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
- if(_output != nullptr)
+ if(!_run_in_place)
{
add_3D_tensor_argument(idx, _output, slice);
}
diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
index 95487a23db..87fc1d097c 100644
--- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
@@ -101,7 +101,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
} // namespace
CLBatchNormalizationLayerKernel::CLBatchNormalizationLayerKernel()
- : _input(nullptr), _output(nullptr), _mean(nullptr), _var(nullptr), _beta(nullptr), _gamma(nullptr), _epsilon(0)
+ : _input(nullptr), _output(nullptr), _mean(nullptr), _var(nullptr), _beta(nullptr), _gamma(nullptr), _epsilon(0), _run_in_place(false)
{
}
@@ -118,6 +118,8 @@ void CLBatchNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor *out
_gamma = gamma;
_epsilon = epsilon;
+ _run_in_place = (output == nullptr) || (output == input);
+
if(output != nullptr)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input->info(), output->info());
@@ -137,19 +139,19 @@ void CLBatchNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor *out
build_opts.add_option_if(act_info.enabled(), "-D" + string_from_activation_func(act_info.activation()));
build_opts.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a()));
build_opts.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b()));
- build_opts.add_option_if(output == nullptr, "-DIN_PLACE");
+ build_opts.add_option_if(_run_in_place, "-DIN_PLACE");
build_opts.add_option_if(is_data_type_fixed_point(input->info()->data_type()), "-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()));
// Create kernel
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("batchnormalization_layer", build_opts.options()));
// Set kernel static arguments
- unsigned int include_output = (output != nullptr) ? 1 : 0;
+ unsigned int include_output = (!_run_in_place) ? 1 : 0;
unsigned int idx = (1 + include_output) * num_arguments_per_3D_tensor() + 4 * num_arguments_per_1D_tensor(); // Skip the input and output parameters
_kernel.setArg<cl_float>(idx++, _epsilon);
// Configure kernel window
- auto win_config = validate_and_configure_window(input->info(), (output == nullptr) ? nullptr : output->info());
+ auto win_config = validate_and_configure_window(input->info(), (_run_in_place) ? nullptr : output->info());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
ICLKernel::configure(win_config.second);
@@ -168,8 +170,9 @@ Status CLBatchNormalizationLayerKernel::validate(const ITensorInfo *input, const
const ITensorInfo *beta, const ITensorInfo *gamma,
float epsilon, ActivationLayerInfo act_info)
{
+ const bool run_in_place = (output == nullptr) || (output == input);
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, mean, var, beta, gamma, epsilon, act_info));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (output == nullptr) ? nullptr : output->clone().get()).first);
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (run_in_place) ? nullptr : output->clone().get()).first);
return Status{};
}
@@ -184,7 +187,7 @@ void CLBatchNormalizationLayerKernel::run(const Window &window, cl::CommandQueue
Window vector_slice = window.first_slice_window_1D();
vector_slice.set(Window::DimX, Window::Dimension(0, 0, 0));
- unsigned int include_output = (_output != nullptr) ? 1 : 0;
+ unsigned int include_output = (!_run_in_place) ? 1 : 0;
unsigned int idx = (1 + include_output) * num_arguments_per_3D_tensor();
add_1D_tensor_argument(idx, _mean, vector_slice);
add_1D_tensor_argument(idx, _var, vector_slice);
@@ -195,7 +198,7 @@ void CLBatchNormalizationLayerKernel::run(const Window &window, cl::CommandQueue
{
idx = 0;
add_3D_tensor_argument(idx, _input, slice);
- if(_output != nullptr)
+ if(!_run_in_place)
{
add_3D_tensor_argument(idx, _output, slice);
}
diff --git a/src/graph/Graph.cpp b/src/graph/Graph.cpp
index 7af313acbb..98d95904dc 100644
--- a/src/graph/Graph.cpp
+++ b/src/graph/Graph.cpp
@@ -131,6 +131,11 @@ void Graph::Private::configure(GraphHints _next_hints)
_previous_hints = _current_hints; // For the first node just assume the previous node was of the same type as this one
}
+ if(_current_node->supports_in_place())
+ {
+ _current_output = _current_input;
+ }
+
//Automatic output configuration ?
if(_current_output == nullptr)
{
@@ -152,8 +157,12 @@ void Graph::Private::configure(GraphHints _next_hints)
_ctx.hints() = _current_hints;
std::unique_ptr<arm_compute::IFunction> func = _current_node->instantiate_node(_ctx, _current_input, _current_output);
- // Allocate current input
- _current_input->allocate();
+ // If the operation is done in-place, do not allocate or it will prevent following layers from performing the configuration
+ if(!_current_node->supports_in_place())
+ {
+ // Allocate current input
+ _current_input->allocate();
+ }
// Map input if needed
if(_current_input->target() == TargetHint::OPENCL)
diff --git a/src/graph/INode.cpp b/src/graph/INode.cpp
index 582f936351..c753f66b43 100644
--- a/src/graph/INode.cpp
+++ b/src/graph/INode.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,6 +39,14 @@ TargetHint INode::override_target_hint(TargetHint target_hint) const
ARM_COMPUTE_ERROR_ON(target_hint == TargetHint::OPENCL && !opencl_is_available());
return target_hint;
}
+bool INode::supports_in_place() const
+{
+ return _supports_in_place;
+}
+void INode::set_supports_in_place(bool value)
+{
+ _supports_in_place = value;
+}
GraphHints INode::node_override_hints(GraphHints hints) const
{
TargetHint target_hint = hints.target_hint();
diff --git a/src/graph/SubGraph.cpp b/src/graph/SubGraph.cpp
index f62b2617c5..b1cbb9cc95 100644
--- a/src/graph/SubGraph.cpp
+++ b/src/graph/SubGraph.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -67,6 +67,10 @@ std::unique_ptr<Graph> SubGraph::construct(const GraphContext &ctx, std::unique_
}
graph->add_tensor_object(std::move(_input));
+ // Make sure first and last nodes of the subgraph always do operations out-of-place
+ _nodes.front()->set_supports_in_place(false);
+ _nodes.back()->set_supports_in_place(false);
+
// Construct nodes
for(auto &node : _nodes)
{
diff --git a/src/graph/nodes/ActivationLayer.cpp b/src/graph/nodes/ActivationLayer.cpp
index 54f30ef777..546c42a1e5 100644
--- a/src/graph/nodes/ActivationLayer.cpp
+++ b/src/graph/nodes/ActivationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -33,6 +33,7 @@ using namespace arm_compute::graph;
ActivationLayer::ActivationLayer(const ActivationLayerInfo activation_info)
: _activation_info(activation_info)
{
+ set_supports_in_place(true);
}
std::unique_ptr<arm_compute::IFunction> ActivationLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
diff --git a/src/runtime/CL/functions/CLActivationLayer.cpp b/src/runtime/CL/functions/CLActivationLayer.cpp
index eaf2ca586c..4aeb3a15e1 100644
--- a/src/runtime/CL/functions/CLActivationLayer.cpp
+++ b/src/runtime/CL/functions/CLActivationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/src/runtime/NEON/functions/NEActivationLayer.cpp b/src/runtime/NEON/functions/NEActivationLayer.cpp
index cdf1b54659..6af71a3580 100644
--- a/src/runtime/NEON/functions/NEActivationLayer.cpp
+++ b/src/runtime/NEON/functions/NEActivationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*