diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2020-07-13 21:21:33 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2020-07-14 14:28:46 +0000 |
commit | 4667dddc0ed403c636348294cd7f70261e5540cf (patch) | |
tree | 177b74f377dcbb32cf8a83d407c633df255665a0 /src/core/NEON | |
parent | 2232a201a9f72de483c12a7857c5f08b81cf7396 (diff) | |
download | ComputeLibrary-4667dddc0ed403c636348294cd7f70261e5540cf.tar.gz |
COMPMID-3374: Remove memory state from NEConcatenateLayer kernels
* Allow the following kernels to accept backing memory at run-time:
* NEBatchConcatenateLayerKernel
* NEDepthConcatenateLayerKernel
* NEHeightConcatenateLayerKernel
* NEWidthConcatenateLayerKernel
* Allow the following functions to accept backing memory at run-time:
* NEConcatenateLayer
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: Ib0b6714cff7f06a52dc74d294bc3e0d72a1c2419
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3569
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON')
4 files changed, 59 insertions, 56 deletions
diff --git a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp index 65789160f6..c597afd804 100644 --- a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp @@ -141,21 +141,19 @@ Status validate_arguments(const ITensorInfo *input, unsigned int batch_offset, c } // namespace NEBatchConcatenateLayerKernel::NEBatchConcatenateLayerKernel() - : _func(nullptr), _input(nullptr), _output(nullptr), _batch_offset(0) + : _func(nullptr), _batch_offset(0) { } -void NEBatchConcatenateLayerKernel::configure(const ITensor *input, unsigned int batch_offset, ITensor *output) +void NEBatchConcatenateLayerKernel::configure(const ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), batch_offset, output->info())); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, batch_offset, output)); _func = nullptr; - _input = input; - _output = output; _batch_offset = batch_offset; - switch(input->info()->data_type()) + switch(input->data_type()) { case DataType::S8: case DataType::U8: @@ -178,10 +176,10 @@ void NEBatchConcatenateLayerKernel::configure(const ITensor *input, unsigned int } // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps()); + Window win = calculate_max_window(*output, Steps()); Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); + coord.set_num_dimensions(output->num_dimensions()); + output->set_valid_region(ValidRegion(coord, output->tensor_shape())); INEKernel::configure(win); } @@ -193,13 +191,14 @@ Status NEBatchConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *i return Status{}; } -void NEBatchConcatenateLayerKernel::run(const Window &window, const ThreadInfo &info) +void NEBatchConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); - (*_func)(_input, _output, _batch_offset, window); + (*_func)(inputs.at(TensorType::ACL_SRC), outputs.at(TensorType::ACL_DST), _batch_offset, window); } } // namespace arm_compute diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp index a95d711f43..49e10de94e 100644 --- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp @@ -142,21 +142,19 @@ Status validate_arguments(const ITensorInfo *input, unsigned int depth_offset, c } // namespace NEDepthConcatenateLayerKernel::NEDepthConcatenateLayerKernel() - : _func(nullptr), _input(nullptr), _output(nullptr), _depth_offset(0) + : _func(nullptr), _depth_offset(0) { } -void NEDepthConcatenateLayerKernel::configure(const ITensor *input, unsigned int depth_offset, ITensor *output) +void NEDepthConcatenateLayerKernel::configure(const ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), depth_offset, output->info())); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, depth_offset, output)); _func = nullptr; - _input = input; - _output = output; _depth_offset = depth_offset; - switch(input->info()->data_type()) + switch(input->data_type()) { case DataType::QASYMM8: _func = &depth_concat<uint8_t>; @@ -175,11 +173,11 @@ void NEDepthConcatenateLayerKernel::configure(const ITensor *input, unsigned int } // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps()); + Window win = calculate_max_window(*output, Steps()); Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); + coord.set_num_dimensions(output->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); + output->set_valid_region(ValidRegion(coord, output->tensor_shape())); INEKernel::configure(win); } @@ -191,13 +189,14 @@ Status NEDepthConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *i return Status{}; } -void NEDepthConcatenateLayerKernel::run(const Window &window, const ThreadInfo &info) +void NEDepthConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); - (*_func)(_input, _output, _depth_offset, window); + (*_func)(inputs.at(TensorType::ACL_SRC), outputs.at(TensorType::ACL_DST), _depth_offset, window); } } // namespace arm_compute diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp index 0adf996cca..d4043e02b7 100644 --- a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp @@ -58,24 +58,23 @@ Status validate_arguments(const ITensorInfo *input, unsigned int height_offset, } // namespace NEHeightConcatenateLayerKernel::NEHeightConcatenateLayerKernel() - : _input(nullptr), _output(nullptr), _height_offset(0) + : _height_offset(0) { } -void NEHeightConcatenateLayerKernel::configure(const ITensor *input, unsigned int height_offset, ITensor *output) +void NEHeightConcatenateLayerKernel::configure(const ITensorInfo *input, unsigned int height_offset, ITensorInfo *output) { + ARM_COMPUTE_UNUSED(input); ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), height_offset, output->info())); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, height_offset, output)); - _input = input; - _output = output; _height_offset = height_offset; // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps()); + Window win = calculate_max_window(*output, Steps()); Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); + coord.set_num_dimensions(output->num_dimensions()); + output->set_valid_region(ValidRegion(coord, output->tensor_shape())); INEKernel::configure(win); } @@ -85,30 +84,34 @@ Status NEHeightConcatenateLayerKernel::validate(const ITensorInfo *input, unsign return Status{}; } -void NEHeightConcatenateLayerKernel::run(const Window &window, const ThreadInfo &info) +void NEHeightConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); + const auto src = inputs.at(TensorType::ACL_SRC); + auto dst = outputs.at(TensorType::ACL_DST); + // Offset output pointer to the correct position - uint8_t *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + _height_offset * _output->info()->strides_in_bytes()[Window::DimY]; + uint8_t *output_ptr = dst->buffer() + dst->info()->offset_first_element_in_bytes() + _height_offset * dst->info()->strides_in_bytes()[Window::DimY]; const auto window_start_x = static_cast<int>(window.x().start()); - const auto window_end_x = static_cast<int>(window.x().end()) * static_cast<int>(_output->info()->element_size()); - const int window_step_x = 16; + const auto window_end_x = static_cast<int>(window.x().end()) * static_cast<int>(dst->info()->element_size()); + const int window_step_x = 16; Window win{ window }; win.set(Window::DimX, Window::Dimension(0, 1, 1)); - win.set(Window::DimY, Window::Dimension(0, _input->info()->tensor_shape().y(), 1)); + win.set(Window::DimY, Window::Dimension(0, src->info()->tensor_shape().y(), 1)); // Create iterators - Iterator input(_input, win); - Iterator output(_output, win); + Iterator input(src, win); + Iterator output(dst, win); - const DataType dt = _input->info()->data_type(); - const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform(); - const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform(); + const DataType dt = src->info()->data_type(); + const UniformQuantizationInfo &input_qinfo = src->info()->quantization_info().uniform(); + const UniformQuantizationInfo &output_qinfo = dst->info()->quantization_info().uniform(); if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo) { execute_window_loop(win, [&](const Coordinates &) diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp index f5bdfae5d6..1b32e3614e 100644 --- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp @@ -58,24 +58,22 @@ Status validate_arguments(const ITensorInfo *input, unsigned int width_offset, c } // namespace NEWidthConcatenateLayerKernel::NEWidthConcatenateLayerKernel() - : _input(nullptr), _output(nullptr), _width_offset(0) + : _width_offset(0) { } -void NEWidthConcatenateLayerKernel::configure(const ITensor *input, unsigned int width_offset, ITensor *output) +void NEWidthConcatenateLayerKernel::configure(const ITensorInfo *input, unsigned int width_offset, ITensorInfo *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), width_offset, output->info())); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, width_offset, output)); - _input = input; - _output = output; _width_offset = width_offset; // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps()); + Window win = calculate_max_window(*input, Steps()); Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); + coord.set_num_dimensions(output->num_dimensions()); + output->set_valid_region(ValidRegion(coord, output->tensor_shape())); INEKernel::configure(win); } @@ -86,28 +84,32 @@ Status NEWidthConcatenateLayerKernel::validate(const ITensorInfo *input, unsigne return Status{}; } -void NEWidthConcatenateLayerKernel::run(const Window &window, const ThreadInfo &info) +void NEWidthConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); + const auto src = inputs.at(TensorType::ACL_SRC); + auto dst = outputs.at(TensorType::ACL_DST); + // Offset output pointer to the correct position - uint8_t *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + _width_offset * _output->info()->strides_in_bytes()[0]; + uint8_t *output_ptr = dst->buffer() + dst->info()->offset_first_element_in_bytes() + _width_offset * dst->info()->strides_in_bytes()[0]; const auto window_start_x = static_cast<int>(window.x().start()); - const auto window_end_x = static_cast<int>(window.x().end()) * static_cast<int>(_output->info()->element_size()); + const auto window_end_x = static_cast<int>(window.x().end()) * static_cast<int>(dst->info()->element_size()); constexpr int window_step_x = 16; Window win{ window }; win.set(Window::DimX, Window::Dimension(0, 1, 1)); // Create iterators - Iterator input(_input, win); - Iterator output(_output, win); - const DataType dt = _input->info()->data_type(); - const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform(); - const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform(); + Iterator input(src, win); + Iterator output(dst, win); + const DataType dt = src->info()->data_type(); + const UniformQuantizationInfo &input_qinfo = src->info()->quantization_info().uniform(); + const UniformQuantizationInfo &output_qinfo = dst->info()->quantization_info().uniform(); if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo) { execute_window_loop(win, [&](const Coordinates &) |