aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp161
1 files changed, 100 insertions, 61 deletions
diff --git a/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp b/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp
index 673eace3c1..da023aeb96 100644
--- a/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp
+++ b/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp
@@ -26,11 +26,12 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "src/core/NEON/wrapper/wrapper.h"
+#include "arm_compute/core/Validate.h"
+
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/NEON/wrapper/wrapper.h"
#include <arm_neon.h>
#include <cstdint>
@@ -41,19 +42,22 @@ namespace arm_compute
{
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *block_info, const ITensorInfo *paddings, const ITensorInfo *output)
+Status validate_arguments(const ITensorInfo *input,
+ const ITensorInfo *block_info,
+ const ITensorInfo *paddings,
+ const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, block_info, paddings, output);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(block_info, 1, DataType::S32);
ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
ARM_COMPUTE_RETURN_ERROR_ON(block_info->num_dimensions() > 1);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(block_info->tensor_shape(), TensorShape{ 2 });
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(block_info->tensor_shape(), TensorShape{2});
ARM_COMPUTE_RETURN_ERROR_ON(paddings->num_dimensions() > 2);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(paddings->tensor_shape(), TensorShape{ 2, 2 });
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(paddings->tensor_shape(), TensorShape{2, 2});
// Validate output if initialized
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
const DataLayout data_layout = input->data_layout();
const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
@@ -64,7 +68,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *block_inf
return Status{};
}
-Status validate_arguments_static(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right,
+Status validate_arguments_static(const ITensorInfo *input,
+ const int block_shape_x,
+ const int block_shape_y,
+ const Size2D &padding_left,
+ const Size2D &padding_right,
const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
@@ -73,9 +81,10 @@ Status validate_arguments_static(const ITensorInfo *input, const int block_shape
ARM_COMPUTE_RETURN_ERROR_ON(block_shape_x < 1 || block_shape_y < 1);
// Validate output if initialized
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
- TensorShape expected_output_shape = misc::shape_calculator::compute_space_to_batch_shape(input, block_shape_x, block_shape_y, padding_left, padding_right);
+ TensorShape expected_output_shape = misc::shape_calculator::compute_space_to_batch_shape(
+ input, block_shape_x, block_shape_y, padding_left, padding_right);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), expected_output_shape);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
@@ -86,14 +95,25 @@ Status validate_arguments_static(const ITensorInfo *input, const int block_shape
} // namespace
NESpaceToBatchLayerKernel::NESpaceToBatchLayerKernel()
- : _input(nullptr), _block_shape(nullptr), _paddings(nullptr), _output(nullptr), _data_layout(DataLayout::UNKNOWN), _padding_left(), _block_shape_x(), _block_shape_y()
+ : _input(nullptr),
+ _block_shape(nullptr),
+ _paddings(nullptr),
+ _output(nullptr),
+ _data_layout(DataLayout::UNKNOWN),
+ _padding_left(),
+ _block_shape_x(),
+ _block_shape_y()
{
}
-void NESpaceToBatchLayerKernel::configure(const ITensor *input, const ITensor *block_shape, const ITensor *paddings, ITensor *output)
+void NESpaceToBatchLayerKernel::configure(const ITensor *input,
+ const ITensor *block_shape,
+ const ITensor *paddings,
+ ITensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, block_shape, paddings, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), block_shape->info(), paddings->info(), output->info()));
+ ARM_COMPUTE_ERROR_THROW_ON(
+ validate_arguments(input->info(), block_shape->info(), paddings->info(), output->info()));
_input = input;
_block_shape = block_shape;
@@ -106,15 +126,22 @@ void NESpaceToBatchLayerKernel::configure(const ITensor *input, const ITensor *b
ICPPKernel::configure(win);
}
-void NESpaceToBatchLayerKernel::configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right,
- ITensor *output)
+void NESpaceToBatchLayerKernel::configure(const ITensor *input,
+ const int block_shape_x,
+ const int block_shape_y,
+ const Size2D &padding_left,
+ const Size2D &padding_right,
+ ITensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- TensorShape output_shape = misc::shape_calculator::compute_space_to_batch_shape(input->info(), block_shape_x, block_shape_y, padding_left, padding_right);
- auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->quantization_info());
+ TensorShape output_shape = misc::shape_calculator::compute_space_to_batch_shape(
+ input->info(), block_shape_x, block_shape_y, padding_left, padding_right);
+ auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(),
+ input->info()->quantization_info());
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_static(input->info(), block_shape_x, block_shape_y, padding_left, padding_right, output->info()));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_static(input->info(), block_shape_x, block_shape_y, padding_left,
+ padding_right, output->info()));
_input = input;
_output = output;
@@ -128,15 +155,23 @@ void NESpaceToBatchLayerKernel::configure(const ITensor *input, const int block_
INEKernel::configure(win);
}
-Status NESpaceToBatchLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output)
+Status NESpaceToBatchLayerKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *block_shape,
+ const ITensorInfo *paddings,
+ const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, block_shape, paddings, output));
return Status{};
}
-Status NESpaceToBatchLayerKernel::validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right,
+Status NESpaceToBatchLayerKernel::validate(const ITensorInfo *input,
+ const int block_shape_x,
+ const int block_shape_y,
+ const Size2D &padding_left,
+ const Size2D &padding_right,
const ITensorInfo *output)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_static(input, block_shape_x, block_shape_y, padding_left, padding_right, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_arguments_static(input, block_shape_x, block_shape_y, padding_left, padding_right, output));
return Status{};
}
@@ -146,17 +181,17 @@ void NESpaceToBatchLayerKernel::run(const Window &window, const ThreadInfo &info
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);
- if(_block_shape != nullptr)
+ if (_block_shape != nullptr)
{
// Retrieve the block shapes dynamically
_block_shape_x = *(reinterpret_cast<const int *>(_block_shape->ptr_to_element(0)));
_block_shape_y = *(reinterpret_cast<const int *>(_block_shape->ptr_to_element(1)));
}
- if(_paddings != nullptr)
+ if (_paddings != nullptr)
{
- const size_t pad_left_x = *reinterpret_cast<const size_t *>(_paddings->ptr_to_element({ 0, 0 }));
- const size_t pad_left_y = *reinterpret_cast<const size_t *>(_paddings->ptr_to_element({ 1, 0 }));
+ const size_t pad_left_x = *reinterpret_cast<const size_t *>(_paddings->ptr_to_element({0, 0}));
+ const size_t pad_left_y = *reinterpret_cast<const size_t *>(_paddings->ptr_to_element({1, 0}));
_padding_left = Size2D(pad_left_x, pad_left_y);
}
const int height_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT);
@@ -173,57 +208,61 @@ void NESpaceToBatchLayerKernel::run(const Window &window, const ThreadInfo &info
int batch_id = 0;
// Main loop for NCHW and NHWC
- if(_data_layout == DataLayout::NCHW)
+ if (_data_layout == DataLayout::NCHW)
{
do
{
Iterator out(_output, slice_out);
- execute_window_loop(slice_out, [&](const Coordinates & id)
- {
- const size_t out_x = id.x();
- const size_t out_y = id.y();
- const size_t z = id.z();
- const size_t pos_x = out_x * _block_shape_x + (batch_id / batch_size) % _block_shape_x;
- const size_t pos_y = out_y * _block_shape_y + (batch_id / batch_size) / _block_shape_x;
- if(pos_y >= _padding_left.y() && pos_y < _padding_left.y() + height && pos_x >= _padding_left.x() && pos_x < _padding_left.x() + width)
+ execute_window_loop(
+ slice_out,
+ [&](const Coordinates &id)
{
- const int w = batch_id % batch_size;
- const int in_x = pos_x - _padding_left.x();
- const int in_y = pos_y - _padding_left.y();
- Coordinates input_coords{ in_x, in_y, z, w };
- memcpy(out.ptr(), _input->ptr_to_element(input_coords), element_size);
- }
- },
- out);
+ const size_t out_x = id.x();
+ const size_t out_y = id.y();
+ const size_t z = id.z();
+ const size_t pos_x = out_x * _block_shape_x + (batch_id / batch_size) % _block_shape_x;
+ const size_t pos_y = out_y * _block_shape_y + (batch_id / batch_size) / _block_shape_x;
+ if (pos_y >= _padding_left.y() && pos_y < _padding_left.y() + height &&
+ pos_x >= _padding_left.x() && pos_x < _padding_left.x() + width)
+ {
+ const int w = batch_id % batch_size;
+ const int in_x = pos_x - _padding_left.x();
+ const int in_y = pos_y - _padding_left.y();
+ Coordinates input_coords{in_x, in_y, z, w};
+ memcpy(out.ptr(), _input->ptr_to_element(input_coords), element_size);
+ }
+ },
+ out);
++batch_id;
- }
- while(window.slide_window_slice_3D(slice_out));
+ } while (window.slide_window_slice_3D(slice_out));
}
else
{
do
{
Iterator out(_output, slice_out);
- execute_window_loop(slice_out, [&](const Coordinates & id)
- {
- const size_t out_x = id.y();
- const size_t out_y = id.z();
- const size_t z = id.x();
- const size_t pos_x = out_x * _block_shape_x + (batch_id / batch_size) % _block_shape_x;
- const size_t pos_y = out_y * _block_shape_y + (batch_id / batch_size) / _block_shape_x;
- if(pos_y >= _padding_left.y() && pos_y < _padding_left.y() + height && pos_x >= _padding_left.x() && pos_x < _padding_left.x() + width)
+ execute_window_loop(
+ slice_out,
+ [&](const Coordinates &id)
{
- const int w = batch_id % batch_size;
- const int in_x = pos_x - _padding_left.x();
- const int in_y = pos_y - _padding_left.y();
- Coordinates input_coords{ z, in_x, in_y, w };
- memcpy(out.ptr(), _input->ptr_to_element(input_coords), element_size);
- }
- },
- out);
+ const size_t out_x = id.y();
+ const size_t out_y = id.z();
+ const size_t z = id.x();
+ const size_t pos_x = out_x * _block_shape_x + (batch_id / batch_size) % _block_shape_x;
+ const size_t pos_y = out_y * _block_shape_y + (batch_id / batch_size) / _block_shape_x;
+ if (pos_y >= _padding_left.y() && pos_y < _padding_left.y() + height &&
+ pos_x >= _padding_left.x() && pos_x < _padding_left.x() + width)
+ {
+ const int w = batch_id % batch_size;
+ const int in_x = pos_x - _padding_left.x();
+ const int in_y = pos_y - _padding_left.y();
+ Coordinates input_coords{z, in_x, in_y, w};
+ memcpy(out.ptr(), _input->ptr_to_element(input_coords), element_size);
+ }
+ },
+ out);
++batch_id;
- }
- while(window.slide_window_slice_3D(slice_out));
+ } while (window.slide_window_slice_3D(slice_out));
}
}
} // namespace arm_compute