aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2020-07-13 21:21:33 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2020-07-14 14:28:46 +0000
commit4667dddc0ed403c636348294cd7f70261e5540cf (patch)
tree177b74f377dcbb32cf8a83d407c633df255665a0 /src
parent2232a201a9f72de483c12a7857c5f08b81cf7396 (diff)
downloadComputeLibrary-4667dddc0ed403c636348294cd7f70261e5540cf.tar.gz
COMPMID-3374: Remove memory state from NEConcatenateLayer kernels
* Allow the following kernels to accept backing memory at run-time: * NEBatchConcatenateLayerKernel * NEDepthConcatenateLayerKernel * NEHeightConcatenateLayerKernel * NEWidthConcatenateLayerKernel * Allow the following functions to accept backing memory at run-time: * NEConcatenateLayer Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: Ib0b6714cff7f06a52dc74d294bc3e0d72a1c2419 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3569 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp21
-rw-r--r--src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp21
-rw-r--r--src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp39
-rw-r--r--src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp34
-rw-r--r--src/graph/backends/CL/CLFunctionsFactory.cpp1
-rw-r--r--src/graph/backends/GLES/GCFunctionsFactory.cpp3
-rw-r--r--src/graph/backends/NEON/NEFunctionFactory.cpp1
-rw-r--r--src/runtime/NEON/functions/NEConcatenateLayer.cpp131
-rw-r--r--src/runtime/NEON/functions/NELSTMLayer.cpp4
-rw-r--r--src/runtime/NEON/functions/NEPadLayer.cpp2
10 files changed, 156 insertions, 101 deletions
diff --git a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp
index 65789160f6..c597afd804 100644
--- a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp
@@ -141,21 +141,19 @@ Status validate_arguments(const ITensorInfo *input, unsigned int batch_offset, c
} // namespace
NEBatchConcatenateLayerKernel::NEBatchConcatenateLayerKernel()
- : _func(nullptr), _input(nullptr), _output(nullptr), _batch_offset(0)
+ : _func(nullptr), _batch_offset(0)
{
}
-void NEBatchConcatenateLayerKernel::configure(const ITensor *input, unsigned int batch_offset, ITensor *output)
+void NEBatchConcatenateLayerKernel::configure(const ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), batch_offset, output->info()));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, batch_offset, output));
_func = nullptr;
- _input = input;
- _output = output;
_batch_offset = batch_offset;
- switch(input->info()->data_type())
+ switch(input->data_type())
{
case DataType::S8:
case DataType::U8:
@@ -178,10 +176,10 @@ void NEBatchConcatenateLayerKernel::configure(const ITensor *input, unsigned int
}
// Configure kernel window
- Window win = calculate_max_window(*output->info(), Steps());
+ Window win = calculate_max_window(*output, Steps());
Coordinates coord;
- coord.set_num_dimensions(output->info()->num_dimensions());
- output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
+ coord.set_num_dimensions(output->num_dimensions());
+ output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
INEKernel::configure(win);
}
@@ -193,13 +191,14 @@ Status NEBatchConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *i
return Status{};
}
-void NEBatchConcatenateLayerKernel::run(const Window &window, const ThreadInfo &info)
+void NEBatchConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
+ const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
ARM_COMPUTE_ERROR_ON(_func == nullptr);
- (*_func)(_input, _output, _batch_offset, window);
+ (*_func)(inputs.at(TensorType::ACL_SRC), outputs.at(TensorType::ACL_DST), _batch_offset, window);
}
} // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
index a95d711f43..49e10de94e 100644
--- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
@@ -142,21 +142,19 @@ Status validate_arguments(const ITensorInfo *input, unsigned int depth_offset, c
} // namespace
NEDepthConcatenateLayerKernel::NEDepthConcatenateLayerKernel()
- : _func(nullptr), _input(nullptr), _output(nullptr), _depth_offset(0)
+ : _func(nullptr), _depth_offset(0)
{
}
-void NEDepthConcatenateLayerKernel::configure(const ITensor *input, unsigned int depth_offset, ITensor *output)
+void NEDepthConcatenateLayerKernel::configure(const ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), depth_offset, output->info()));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, depth_offset, output));
_func = nullptr;
- _input = input;
- _output = output;
_depth_offset = depth_offset;
- switch(input->info()->data_type())
+ switch(input->data_type())
{
case DataType::QASYMM8:
_func = &depth_concat<uint8_t>;
@@ -175,11 +173,11 @@ void NEDepthConcatenateLayerKernel::configure(const ITensor *input, unsigned int
}
// Configure kernel window
- Window win = calculate_max_window(*output->info(), Steps());
+ Window win = calculate_max_window(*output, Steps());
Coordinates coord;
- coord.set_num_dimensions(output->info()->num_dimensions());
+ coord.set_num_dimensions(output->num_dimensions());
- output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
+ output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
INEKernel::configure(win);
}
@@ -191,13 +189,14 @@ Status NEDepthConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *i
return Status{};
}
-void NEDepthConcatenateLayerKernel::run(const Window &window, const ThreadInfo &info)
+void NEDepthConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
+ const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
ARM_COMPUTE_ERROR_ON(_func == nullptr);
- (*_func)(_input, _output, _depth_offset, window);
+ (*_func)(inputs.at(TensorType::ACL_SRC), outputs.at(TensorType::ACL_DST), _depth_offset, window);
}
} // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp
index 0adf996cca..d4043e02b7 100644
--- a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp
@@ -58,24 +58,23 @@ Status validate_arguments(const ITensorInfo *input, unsigned int height_offset,
} // namespace
NEHeightConcatenateLayerKernel::NEHeightConcatenateLayerKernel()
- : _input(nullptr), _output(nullptr), _height_offset(0)
+ : _height_offset(0)
{
}
-void NEHeightConcatenateLayerKernel::configure(const ITensor *input, unsigned int height_offset, ITensor *output)
+void NEHeightConcatenateLayerKernel::configure(const ITensorInfo *input, unsigned int height_offset, ITensorInfo *output)
{
+ ARM_COMPUTE_UNUSED(input);
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), height_offset, output->info()));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, height_offset, output));
- _input = input;
- _output = output;
_height_offset = height_offset;
// Configure kernel window
- Window win = calculate_max_window(*output->info(), Steps());
+ Window win = calculate_max_window(*output, Steps());
Coordinates coord;
- coord.set_num_dimensions(output->info()->num_dimensions());
- output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
+ coord.set_num_dimensions(output->num_dimensions());
+ output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
INEKernel::configure(win);
}
@@ -85,30 +84,34 @@ Status NEHeightConcatenateLayerKernel::validate(const ITensorInfo *input, unsign
return Status{};
}
-void NEHeightConcatenateLayerKernel::run(const Window &window, const ThreadInfo &info)
+void NEHeightConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
+ const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
+ const auto src = inputs.at(TensorType::ACL_SRC);
+ auto dst = outputs.at(TensorType::ACL_DST);
+
// Offset output pointer to the correct position
- uint8_t *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + _height_offset * _output->info()->strides_in_bytes()[Window::DimY];
+ uint8_t *output_ptr = dst->buffer() + dst->info()->offset_first_element_in_bytes() + _height_offset * dst->info()->strides_in_bytes()[Window::DimY];
const auto window_start_x = static_cast<int>(window.x().start());
- const auto window_end_x = static_cast<int>(window.x().end()) * static_cast<int>(_output->info()->element_size());
- const int window_step_x = 16;
+ const auto window_end_x = static_cast<int>(window.x().end()) * static_cast<int>(dst->info()->element_size());
+ const int window_step_x = 16;
Window win{ window };
win.set(Window::DimX, Window::Dimension(0, 1, 1));
- win.set(Window::DimY, Window::Dimension(0, _input->info()->tensor_shape().y(), 1));
+ win.set(Window::DimY, Window::Dimension(0, src->info()->tensor_shape().y(), 1));
// Create iterators
- Iterator input(_input, win);
- Iterator output(_output, win);
+ Iterator input(src, win);
+ Iterator output(dst, win);
- const DataType dt = _input->info()->data_type();
- const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform();
- const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform();
+ const DataType dt = src->info()->data_type();
+ const UniformQuantizationInfo &input_qinfo = src->info()->quantization_info().uniform();
+ const UniformQuantizationInfo &output_qinfo = dst->info()->quantization_info().uniform();
if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo)
{
execute_window_loop(win, [&](const Coordinates &)
diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp
index f5bdfae5d6..1b32e3614e 100644
--- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp
@@ -58,24 +58,22 @@ Status validate_arguments(const ITensorInfo *input, unsigned int width_offset, c
} // namespace
NEWidthConcatenateLayerKernel::NEWidthConcatenateLayerKernel()
- : _input(nullptr), _output(nullptr), _width_offset(0)
+ : _width_offset(0)
{
}
-void NEWidthConcatenateLayerKernel::configure(const ITensor *input, unsigned int width_offset, ITensor *output)
+void NEWidthConcatenateLayerKernel::configure(const ITensorInfo *input, unsigned int width_offset, ITensorInfo *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), width_offset, output->info()));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, width_offset, output));
- _input = input;
- _output = output;
_width_offset = width_offset;
// Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps());
+ Window win = calculate_max_window(*input, Steps());
Coordinates coord;
- coord.set_num_dimensions(output->info()->num_dimensions());
- output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
+ coord.set_num_dimensions(output->num_dimensions());
+ output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
INEKernel::configure(win);
}
@@ -86,28 +84,32 @@ Status NEWidthConcatenateLayerKernel::validate(const ITensorInfo *input, unsigne
return Status{};
}
-void NEWidthConcatenateLayerKernel::run(const Window &window, const ThreadInfo &info)
+void NEWidthConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
+ const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
+ const auto src = inputs.at(TensorType::ACL_SRC);
+ auto dst = outputs.at(TensorType::ACL_DST);
+
// Offset output pointer to the correct position
- uint8_t *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + _width_offset * _output->info()->strides_in_bytes()[0];
+ uint8_t *output_ptr = dst->buffer() + dst->info()->offset_first_element_in_bytes() + _width_offset * dst->info()->strides_in_bytes()[0];
const auto window_start_x = static_cast<int>(window.x().start());
- const auto window_end_x = static_cast<int>(window.x().end()) * static_cast<int>(_output->info()->element_size());
+ const auto window_end_x = static_cast<int>(window.x().end()) * static_cast<int>(dst->info()->element_size());
constexpr int window_step_x = 16;
Window win{ window };
win.set(Window::DimX, Window::Dimension(0, 1, 1));
// Create iterators
- Iterator input(_input, win);
- Iterator output(_output, win);
- const DataType dt = _input->info()->data_type();
- const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform();
- const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform();
+ Iterator input(src, win);
+ Iterator output(dst, win);
+ const DataType dt = src->info()->data_type();
+ const UniformQuantizationInfo &input_qinfo = src->info()->quantization_info().uniform();
+ const UniformQuantizationInfo &output_qinfo = dst->info()->quantization_info().uniform();
if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo)
{
execute_window_loop(win, [&](const Coordinates &)
diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp
index 917741a2b7..ceff6e5cef 100644
--- a/src/graph/backends/CL/CLFunctionsFactory.cpp
+++ b/src/graph/backends/CL/CLFunctionsFactory.cpp
@@ -42,6 +42,7 @@ namespace backends
struct CLTargetInfo
{
using TensorType = arm_compute::ICLTensor;
+ using SrcTensorType = TensorType;
using TensorConcreteType = CLTensor;
static Target TargetType;
};
diff --git a/src/graph/backends/GLES/GCFunctionsFactory.cpp b/src/graph/backends/GLES/GCFunctionsFactory.cpp
index a78f51cdbd..8ecb593e11 100644
--- a/src/graph/backends/GLES/GCFunctionsFactory.cpp
+++ b/src/graph/backends/GLES/GCFunctionsFactory.cpp
@@ -40,7 +40,8 @@ namespace backends
/** Target specific information structure used to pass information to the layer templates */
struct GCTargetInfo
{
- using TensorType = arm_compute::IGCTensor;
+ using TensorType = arm_compute::IGCTensor;
+ using SrcTensorType = TensorType;
static Target TargetType;
};
diff --git a/src/graph/backends/NEON/NEFunctionFactory.cpp b/src/graph/backends/NEON/NEFunctionFactory.cpp
index 2f313081e0..4fee630192 100644
--- a/src/graph/backends/NEON/NEFunctionFactory.cpp
+++ b/src/graph/backends/NEON/NEFunctionFactory.cpp
@@ -47,6 +47,7 @@ namespace backends
struct NETargetInfo
{
using TensorType = arm_compute::ITensor;
+ using SrcTensorType = const arm_compute::ITensor;
using TensorConcreteType = arm_compute::Tensor;
static Target TargetType;
};
diff --git a/src/runtime/NEON/functions/NEConcatenateLayer.cpp b/src/runtime/NEON/functions/NEConcatenateLayer.cpp
index 9c480a0d50..37cdd15529 100644
--- a/src/runtime/NEON/functions/NEConcatenateLayer.cpp
+++ b/src/runtime/NEON/functions/NEConcatenateLayer.cpp
@@ -39,58 +39,31 @@
namespace arm_compute
{
-NEConcatenateLayer::NEConcatenateLayer()
- : _concat_kernels(),
- _num_inputs(0),
- _axis(Window::DimX)
-{
-}
-
-void NEConcatenateLayer::configure(std::vector<ITensor *> inputs_vector, ITensor *output, size_t axis)
-{
- configure_internal(std::move(inputs_vector), output, axis);
-}
-
-void NEConcatenateLayer::configure(std::vector<const ITensor *> inputs_vector, ITensor *output, size_t axis)
+namespace experimental
{
- configure_internal(std::move(inputs_vector), output, axis);
-}
-
-Status NEConcatenateLayer::validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
-{
- return validate_internal(inputs_vector, output, axis);
-}
-
-Status NEConcatenateLayer::validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
+NEConcatenateLayer::NEConcatenateLayer()
+ : _concat_kernels(), _num_inputs(0), _axis(0)
{
- return validate_internal(inputs_vector, output, axis);
}
-template <typename TensorType, typename>
-void NEConcatenateLayer::configure_internal(std::vector<TensorType *> &&inputs_vector, ITensor *output, size_t axis)
+void NEConcatenateLayer::configure(const std::vector<const ITensorInfo *> &inputs_vector, ITensorInfo *output, size_t axis)
{
ARM_COMPUTE_ERROR_ON(output == nullptr);
+
_axis = axis;
_num_inputs = inputs_vector.size();
- std::vector<ITensorInfo *> inputs_vector_info;
- inputs_vector_info.reserve(_num_inputs);
- for(unsigned int i = 0; i < _num_inputs; ++i)
- {
- ARM_COMPUTE_ERROR_ON_NULLPTR(inputs_vector.at(i));
- inputs_vector_info.emplace_back(inputs_vector.at(i)->info());
- }
- TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, _axis);
+ TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, axis);
// Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type());
- ARM_COMPUTE_ERROR_THROW_ON(NEConcatenateLayer::validate(inputs_vector_info, output->info(), axis));
+ auto_init_if_empty(*output, output_shape, 1, inputs_vector[0]->data_type());
+ ARM_COMPUTE_ERROR_THROW_ON(NEConcatenateLayer::validate(inputs_vector, output, axis));
unsigned int offset = 0;
for(unsigned int i = 0; i < _num_inputs; ++i)
{
- switch(_axis)
+ switch(axis)
{
case Window::DimX:
{
@@ -123,12 +96,11 @@ void NEConcatenateLayer::configure_internal(std::vector<TensorType *> &&inputs_v
default:
ARM_COMPUTE_ERROR("Axis not supported");
}
- offset += inputs_vector.at(i)->info()->dimension(_axis);
+ offset += inputs_vector.at(i)->dimension(axis);
}
}
-template <typename TensorInfoType, typename>
-Status NEConcatenateLayer::validate_internal(const std::vector<TensorInfoType *> &inputs_vector, const ITensorInfo *output, size_t axis)
+Status NEConcatenateLayer::validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
ARM_COMPUTE_RETURN_ERROR_ON(inputs_vector.size() < 2);
@@ -174,11 +146,88 @@ Status NEConcatenateLayer::validate_internal(const std::vector<TensorInfoType *>
return Status{};
}
+MemoryRequirements NEConcatenateLayer::workspace() const
+{
+ return MemoryRequirements{};
+}
+
+void NEConcatenateLayer::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+{
+ ARM_COMPUTE_UNUSED(workspace);
+
+ if(inputs.empty() || outputs.empty())
+ {
+ ARM_COMPUTE_ERROR("No inputs provided");
+ }
+
+ if(inputs.size() != _num_inputs)
+ {
+ ARM_COMPUTE_ERROR("Configured with different number of inputs");
+ }
+
+ int i = 0;
+ for(auto &k : _concat_kernels)
+ {
+ const InputTensorMap input = { { TensorType::ACL_SRC, inputs.at(ACL_SRC_VEC + i) } };
+ NEScheduler::get().schedule_op(k.get(), Window::DimY, input, outputs);
+ ++i;
+ }
+}
+} // namespace experimental
+
+struct NEConcatenateLayer::Impl
+{
+ std::vector<const ITensor *> srcs{};
+ ITensor *dst{ nullptr };
+ unsigned int num_inputs{ 0 };
+ unsigned int axis{ 0 };
+ std::unique_ptr<experimental::NEConcatenateLayer> op{ nullptr };
+};
+
+NEConcatenateLayer::NEConcatenateLayer()
+ : _impl(support::cpp14::make_unique<Impl>())
+{
+}
+
+NEConcatenateLayer::NEConcatenateLayer(NEConcatenateLayer &&) = default;
+
+NEConcatenateLayer &NEConcatenateLayer::operator=(NEConcatenateLayer &&) = default;
+
+NEConcatenateLayer::~NEConcatenateLayer() = default;
+
+void NEConcatenateLayer::configure(std::vector<const ITensor *> inputs_vector, ITensor *output, size_t axis)
+{
+ ARM_COMPUTE_ERROR_ON(output == nullptr);
+
+ _impl->srcs = inputs_vector;
+ _impl->dst = output;
+ _impl->axis = axis;
+ _impl->num_inputs = inputs_vector.size();
+ _impl->op = arm_compute::support::cpp14::make_unique<experimental::NEConcatenateLayer>();
+
+ std::vector<const ITensorInfo *> inputs_vector_info;
+ for(unsigned int i = 0; i < inputs_vector.size(); ++i)
+ {
+ ARM_COMPUTE_ERROR_ON_NULLPTR(inputs_vector.at(i));
+ inputs_vector_info.emplace_back(inputs_vector.at(i)->info());
+ }
+ _impl->op->configure(inputs_vector_info, _impl->dst->info(), axis);
+}
+
+Status NEConcatenateLayer::validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
+{
+ return experimental::NEConcatenateLayer::validate(inputs_vector, output, axis);
+}
+
void NEConcatenateLayer::run()
{
- for(auto &kernel : _concat_kernels)
+ InputTensorMap srcs;
+ for(unsigned i = 0; i < _impl->num_inputs; ++i)
{
- NEScheduler::get().schedule(kernel.get(), Window::DimY);
+ srcs.insert(std::make_pair(TensorType::ACL_SRC_VEC + i, _impl->srcs.at(i)));
}
+ const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+
+ _impl->op->run(srcs, dst, {});
}
} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NELSTMLayer.cpp b/src/runtime/NEON/functions/NELSTMLayer.cpp
index f89b3e999c..dca274acd2 100644
--- a/src/runtime/NEON/functions/NELSTMLayer.cpp
+++ b/src/runtime/NEON/functions/NELSTMLayer.cpp
@@ -347,7 +347,7 @@ void NELSTMLayer::configure(const ITensor *input,
_copy_output.configure(output_state_out, output);
// Vector for holding the tensors to store in scratch buffer
- std::vector<ITensor *> scratch_inputs;
+ std::vector<const ITensor *> scratch_inputs;
if(!lstm_params.has_cifg_opt())
{
scratch_inputs.emplace_back(input_gate_out);
@@ -579,7 +579,7 @@ Status NELSTMLayer::validate(const ITensorInfo *input,
ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(output_state_out, output));
// Validate scratch concatenation
- std::vector<ITensorInfo *> inputs_vector_info_raw;
+ std::vector<const ITensorInfo *> inputs_vector_info_raw;
if(!lstm_params.has_cifg_opt())
{
inputs_vector_info_raw.push_back(&input_gate);
diff --git a/src/runtime/NEON/functions/NEPadLayer.cpp b/src/runtime/NEON/functions/NEPadLayer.cpp
index da9a425d9b..21c349ba95 100644
--- a/src/runtime/NEON/functions/NEPadLayer.cpp
+++ b/src/runtime/NEON/functions/NEPadLayer.cpp
@@ -117,7 +117,7 @@ void NEPadLayer::configure_reflect_symmetric_mode(ITensor *input, ITensor *outpu
const int32_t end_mask_after = ends_after[i] < 0 ? ~0 : ~(1u << i);
// Reflect the input values for the padding before and after the input.
- std::vector<ITensor *> concat_vector;
+ std::vector<const ITensor *> concat_vector;
if(_padding[i].first > 0)
{
if(i < prev->info()->num_dimensions())