diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/CL/kernels/CLActivationLayerKernel.cpp | 21 | ||||
-rw-r--r-- | src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp | 17 | ||||
-rw-r--r-- | src/graph/Graph.cpp | 13 | ||||
-rw-r--r-- | src/graph/INode.cpp | 10 | ||||
-rw-r--r-- | src/graph/SubGraph.cpp | 6 | ||||
-rw-r--r-- | src/graph/nodes/ActivationLayer.cpp | 3 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLActivationLayer.cpp | 2 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEActivationLayer.cpp | 2 |
8 files changed, 51 insertions, 23 deletions
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp index 8d4c0b82d2..a78b3e1b93 100644 --- a/src/core/CL/kernels/CLActivationLayerKernel.cpp +++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp @@ -96,7 +96,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen } // namespace CLActivationLayerKernel::CLActivationLayerKernel() - : _input(nullptr), _output(nullptr) + : _input(nullptr), _output(nullptr), _run_in_place(false) { } @@ -104,6 +104,8 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act { ARM_COMPUTE_ERROR_ON_NULLPTR(input); + _run_in_place = (output == nullptr) || (output == input); + if(output != nullptr) { // Output auto inizialitation if not yet initialized @@ -147,12 +149,15 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act build_opts.emplace(("-DA_VAL=" + support::cpp11::to_string(a_const_int))); build_opts.emplace(("-DB_VAL=" + support::cpp11::to_string(b_const_int))); + const int o1 = input->info()->quantization_info().offset; + // Quantized value of 0 corresponds to the offset o1 + build_opts.emplace(("-DCONST_0=" + support::cpp11::to_string(o1))); + // Set scale and offset of the input and output if they have different quantization info if(is_data_type_quantized_asymmetric(dt) && output != nullptr) { const float s1 = input->info()->quantization_info().scale; const float s2 = output->info()->quantization_info().scale; - const int o1 = input->info()->quantization_info().offset; const int o2 = output->info()->quantization_info().offset; if(o1 != o2 || s1 != s2) @@ -162,9 +167,6 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act build_opts.emplace(("-DO1_VAL=" + support::cpp11::to_string(o1))); build_opts.emplace(("-DO2_VAL=" + support::cpp11::to_string(o2))); } - - // Quantized value of 0 corresponds to the offset o1 - build_opts.emplace(("-DCONST_0=" + support::cpp11::to_string(o1))); } } else @@ -173,7 +175,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act build_opts.emplace(("-DB_VAL=" + float_to_string_with_full_precision(b_const))); } - build_opts.emplace(output == nullptr ? "-DIN_PLACE" : ""); + build_opts.emplace((_run_in_place) ? "-DIN_PLACE" : ""); if(is_data_type_fixed_point(dt)) { build_opts.emplace(("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(fixed_point_position))); @@ -188,7 +190,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act _output = output; // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), (output == nullptr) ? nullptr : output->info()); + auto win_config = validate_and_configure_window(input->info(), (_run_in_place) ? nullptr : output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); ICLKernel::configure(win_config.second); @@ -203,8 +205,9 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act Status CLActivationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info) { + const bool run_in_place = (output == nullptr) || (output == input); ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, act_info)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (output == nullptr) ? nullptr : output->clone().get()).first); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (run_in_place) ? nullptr : output->clone().get()).first); return Status{}; } @@ -221,7 +224,7 @@ void CLActivationLayerKernel::run(const Window &window, cl::CommandQueue &queue) { unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); - if(_output != nullptr) + if(!_run_in_place) { add_3D_tensor_argument(idx, _output, slice); } diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp index 95487a23db..87fc1d097c 100644 --- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp @@ -101,7 +101,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen } // namespace CLBatchNormalizationLayerKernel::CLBatchNormalizationLayerKernel() - : _input(nullptr), _output(nullptr), _mean(nullptr), _var(nullptr), _beta(nullptr), _gamma(nullptr), _epsilon(0) + : _input(nullptr), _output(nullptr), _mean(nullptr), _var(nullptr), _beta(nullptr), _gamma(nullptr), _epsilon(0), _run_in_place(false) { } @@ -118,6 +118,8 @@ void CLBatchNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor *out _gamma = gamma; _epsilon = epsilon; + _run_in_place = (output == nullptr) || (output == input); + if(output != nullptr) { ARM_COMPUTE_ERROR_ON_NULLPTR(input->info(), output->info()); @@ -137,19 +139,19 @@ void CLBatchNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor *out build_opts.add_option_if(act_info.enabled(), "-D" + string_from_activation_func(act_info.activation())); build_opts.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a())); build_opts.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b())); - build_opts.add_option_if(output == nullptr, "-DIN_PLACE"); + build_opts.add_option_if(_run_in_place, "-DIN_PLACE"); build_opts.add_option_if(is_data_type_fixed_point(input->info()->data_type()), "-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position())); // Create kernel _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("batchnormalization_layer", build_opts.options())); // Set kernel static arguments - unsigned int include_output = (output != nullptr) ? 1 : 0; + unsigned int include_output = (!_run_in_place) ? 1 : 0; unsigned int idx = (1 + include_output) * num_arguments_per_3D_tensor() + 4 * num_arguments_per_1D_tensor(); // Skip the input and output parameters _kernel.setArg<cl_float>(idx++, _epsilon); // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), (output == nullptr) ? nullptr : output->info()); + auto win_config = validate_and_configure_window(input->info(), (_run_in_place) ? nullptr : output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); ICLKernel::configure(win_config.second); @@ -168,8 +170,9 @@ Status CLBatchNormalizationLayerKernel::validate(const ITensorInfo *input, const const ITensorInfo *beta, const ITensorInfo *gamma, float epsilon, ActivationLayerInfo act_info) { + const bool run_in_place = (output == nullptr) || (output == input); ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, mean, var, beta, gamma, epsilon, act_info)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (output == nullptr) ? nullptr : output->clone().get()).first); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (run_in_place) ? nullptr : output->clone().get()).first); return Status{}; } @@ -184,7 +187,7 @@ void CLBatchNormalizationLayerKernel::run(const Window &window, cl::CommandQueue Window vector_slice = window.first_slice_window_1D(); vector_slice.set(Window::DimX, Window::Dimension(0, 0, 0)); - unsigned int include_output = (_output != nullptr) ? 1 : 0; + unsigned int include_output = (!_run_in_place) ? 1 : 0; unsigned int idx = (1 + include_output) * num_arguments_per_3D_tensor(); add_1D_tensor_argument(idx, _mean, vector_slice); add_1D_tensor_argument(idx, _var, vector_slice); @@ -195,7 +198,7 @@ void CLBatchNormalizationLayerKernel::run(const Window &window, cl::CommandQueue { idx = 0; add_3D_tensor_argument(idx, _input, slice); - if(_output != nullptr) + if(!_run_in_place) { add_3D_tensor_argument(idx, _output, slice); } diff --git a/src/graph/Graph.cpp b/src/graph/Graph.cpp index 7af313acbb..98d95904dc 100644 --- a/src/graph/Graph.cpp +++ b/src/graph/Graph.cpp @@ -131,6 +131,11 @@ void Graph::Private::configure(GraphHints _next_hints) _previous_hints = _current_hints; // For the first node just assume the previous node was of the same type as this one } + if(_current_node->supports_in_place()) + { + _current_output = _current_input; + } + //Automatic output configuration ? if(_current_output == nullptr) { @@ -152,8 +157,12 @@ void Graph::Private::configure(GraphHints _next_hints) _ctx.hints() = _current_hints; std::unique_ptr<arm_compute::IFunction> func = _current_node->instantiate_node(_ctx, _current_input, _current_output); - // Allocate current input - _current_input->allocate(); + // If the operation is done in-place, do not allocate or it will prevent following layers from performing the configuration + if(!_current_node->supports_in_place()) + { + // Allocate current input + _current_input->allocate(); + } // Map input if needed if(_current_input->target() == TargetHint::OPENCL) diff --git a/src/graph/INode.cpp b/src/graph/INode.cpp index 582f936351..c753f66b43 100644 --- a/src/graph/INode.cpp +++ b/src/graph/INode.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -39,6 +39,14 @@ TargetHint INode::override_target_hint(TargetHint target_hint) const ARM_COMPUTE_ERROR_ON(target_hint == TargetHint::OPENCL && !opencl_is_available()); return target_hint; } +bool INode::supports_in_place() const +{ + return _supports_in_place; +} +void INode::set_supports_in_place(bool value) +{ + _supports_in_place = value; +} GraphHints INode::node_override_hints(GraphHints hints) const { TargetHint target_hint = hints.target_hint(); diff --git a/src/graph/SubGraph.cpp b/src/graph/SubGraph.cpp index f62b2617c5..b1cbb9cc95 100644 --- a/src/graph/SubGraph.cpp +++ b/src/graph/SubGraph.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -67,6 +67,10 @@ std::unique_ptr<Graph> SubGraph::construct(const GraphContext &ctx, std::unique_ } graph->add_tensor_object(std::move(_input)); + // Make sure first and last nodes of the subgraph always do operations out-of-place + _nodes.front()->set_supports_in_place(false); + _nodes.back()->set_supports_in_place(false); + // Construct nodes for(auto &node : _nodes) { diff --git a/src/graph/nodes/ActivationLayer.cpp b/src/graph/nodes/ActivationLayer.cpp index 54f30ef777..546c42a1e5 100644 --- a/src/graph/nodes/ActivationLayer.cpp +++ b/src/graph/nodes/ActivationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -33,6 +33,7 @@ using namespace arm_compute::graph; ActivationLayer::ActivationLayer(const ActivationLayerInfo activation_info) : _activation_info(activation_info) { + set_supports_in_place(true); } std::unique_ptr<arm_compute::IFunction> ActivationLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output) diff --git a/src/runtime/CL/functions/CLActivationLayer.cpp b/src/runtime/CL/functions/CLActivationLayer.cpp index eaf2ca586c..4aeb3a15e1 100644 --- a/src/runtime/CL/functions/CLActivationLayer.cpp +++ b/src/runtime/CL/functions/CLActivationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/runtime/NEON/functions/NEActivationLayer.cpp b/src/runtime/NEON/functions/NEActivationLayer.cpp index cdf1b54659..6af71a3580 100644 --- a/src/runtime/NEON/functions/NEActivationLayer.cpp +++ b/src/runtime/NEON/functions/NEActivationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * |