aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEPadLayerKernel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels/NEPadLayerKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEPadLayerKernel.cpp115
1 files changed, 72 insertions, 43 deletions
diff --git a/src/core/NEON/kernels/NEPadLayerKernel.cpp b/src/core/NEON/kernels/NEPadLayerKernel.cpp
index 3e2c57a18c..c9bcbc9127 100644
--- a/src/core/NEON/kernels/NEPadLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPadLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,26 +28,31 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "src/core/NEON/wrapper/wrapper.h"
+#include "arm_compute/core/Validate.h"
+
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/NEON/wrapper/wrapper.h"
namespace arm_compute
{
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &paddings, const PaddingMode mode)
+Status validate_arguments(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const PaddingList &paddings,
+ const PaddingMode mode)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(mode != PaddingMode::CONSTANT, "Only constant padding mode is supported");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(paddings.size() > 4, "Padding list bigger than 4 dimensions");
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
- const TensorShape expected_output_shape = arm_compute::misc::shape_calculator::compute_padded_shape(input->tensor_shape(), paddings);
- const TensorInfo expected_output_info = input->clone()->set_tensor_shape(expected_output_shape);
+ const TensorShape expected_output_shape =
+ arm_compute::misc::shape_calculator::compute_padded_shape(input->tensor_shape(), paddings);
+ const TensorInfo expected_output_info = input->clone()->set_tensor_shape(expected_output_shape);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &expected_output_info);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
}
@@ -58,30 +63,34 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
template <typename T>
void NEPadLayerKernel::run_pad_constant(const Window &window)
{
- Window output_window{ window };
+ Window output_window{window};
output_window.set(Window::DimX, Window::Dimension(0, 1, 1));
const size_t element_size = _input->info()->element_size();
Iterator output_it(_output, output_window);
- execute_window_loop(output_window, [&](const Coordinates & id)
- {
- Coordinates idin{ id };
- for(size_t dim = _padding.size() - 1; dim > 0; --dim)
+ execute_window_loop(
+ output_window,
+ [&](const Coordinates &id)
{
- idin[dim] -= _padding[dim].first;
- if(idin[dim] < 0 || static_cast<int>(_input->info()->dimension(dim)) - 1 < idin[dim])
+ Coordinates idin{id};
+ for (size_t dim = _padding.size() - 1; dim > 0; --dim)
{
- std::fill_n(reinterpret_cast<T *>(output_it.ptr()), _output->info()->dimension(0), _constant_value.get<T>());
- return;
+ idin[dim] -= _padding[dim].first;
+ if (idin[dim] < 0 || static_cast<int>(_input->info()->dimension(dim)) - 1 < idin[dim])
+ {
+ std::fill_n(reinterpret_cast<T *>(output_it.ptr()), _output->info()->dimension(0),
+ _constant_value.get<T>());
+ return;
+ }
}
- }
- T *input_it_ptr = reinterpret_cast<T *>(_input->ptr_to_element(idin));
- T *output_it_ptr = reinterpret_cast<T *>(output_it.ptr());
- std::fill_n(output_it_ptr, _padding[0].first, _constant_value.get<T>());
- memcpy(output_it_ptr + _padding[0].first, input_it_ptr, _input->info()->dimension(0) * element_size);
- std::fill_n(output_it_ptr + _padding[0].first + _input->info()->dimension(0), _padding[0].second, _constant_value.get<T>());
- },
- output_it);
+ T *input_it_ptr = reinterpret_cast<T *>(_input->ptr_to_element(idin));
+ T *output_it_ptr = reinterpret_cast<T *>(output_it.ptr());
+ std::fill_n(output_it_ptr, _padding[0].first, _constant_value.get<T>());
+ memcpy(output_it_ptr + _padding[0].first, input_it_ptr, _input->info()->dimension(0) * element_size);
+ std::fill_n(output_it_ptr + _padding[0].first + _input->info()->dimension(0), _padding[0].second,
+ _constant_value.get<T>());
+ },
+ output_it);
}
void NEPadLayerKernel::run_pad_constant_uint8_3Dinput_3Dpad(const Window &window)
@@ -92,7 +101,7 @@ void NEPadLayerKernel::run_pad_constant_uint8_3Dinput_3Dpad(const Window &window
const size_t end_plane = window.z().end();
size_t start_plane_input = start_plane;
- if(_padding.size() > 2)
+ if (_padding.size() > 2)
{
start_plane_input = (start_plane < _padding[2].first) ? 0 : start_plane - _padding[2].first;
}
@@ -105,18 +114,20 @@ void NEPadLayerKernel::run_pad_constant_uint8_3Dinput_3Dpad(const Window &window
const size_t jump_to_next_row_input = _input->info()->dimension(0);
const size_t jump_to_next_row_output = _padding[0].first + _padding[0].second;
- uint8_t *output_row_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + start_plane * output_plane_size;
- const uint8_t *input_it_ptr = _input->buffer() + _input->info()->offset_first_element_in_bytes() + start_plane_input * input_plane_size;
- const auto pad_value = _constant_value.get<uint8_t>();
+ uint8_t *output_row_ptr =
+ _output->buffer() + _output->info()->offset_first_element_in_bytes() + start_plane * output_plane_size;
+ const uint8_t *input_it_ptr =
+ _input->buffer() + _input->info()->offset_first_element_in_bytes() + start_plane_input * input_plane_size;
+ const auto pad_value = _constant_value.get<uint8_t>();
- for(size_t z_i = start_plane; z_i < end_plane; ++z_i)
+ for (size_t z_i = start_plane; z_i < end_plane; ++z_i)
{
- if(_padding.size() > 2 && z_i < _padding[2].first)
+ if (_padding.size() > 2 && z_i < _padding[2].first)
{
memset(output_row_ptr, pad_value, output_plane_size);
output_row_ptr += output_plane_size;
}
- else if(_padding.size() > 2 && z_i > (_input->info()->dimension(2) + _padding[2].first - 1))
+ else if (_padding.size() > 2 && z_i > (_input->info()->dimension(2) + _padding[2].first - 1))
{
memset(output_row_ptr, pad_value, output_plane_size);
output_row_ptr += output_plane_size;
@@ -127,7 +138,7 @@ void NEPadLayerKernel::run_pad_constant_uint8_3Dinput_3Dpad(const Window &window
output_row_ptr += pad_y_elems_top;
size_t y_i = _input->info()->dimension(1);
// Basic loop unrolling
- for(; y_i > 3; y_i -= 4)
+ for (; y_i > 3; y_i -= 4)
{
memset(output_row_ptr, pad_value, _padding[0].first);
output_row_ptr += _padding[0].first;
@@ -160,7 +171,7 @@ void NEPadLayerKernel::run_pad_constant_uint8_3Dinput_3Dpad(const Window &window
memset(output_row_ptr, pad_value, _padding[0].second);
output_row_ptr += _padding[0].second;
}
- for(; y_i > 0; --y_i)
+ for (; y_i > 0; --y_i)
{
memset(output_row_ptr, pad_value, _padding[0].first);
output_row_ptr += _padding[0].first;
@@ -183,12 +194,17 @@ NEPadLayerKernel::NEPadLayerKernel()
{
}
-void NEPadLayerKernel::configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value, const PaddingMode mode)
+void NEPadLayerKernel::configure(ITensor *input,
+ ITensor *output,
+ const PaddingList &padding,
+ const PixelValue constant_value,
+ const PaddingMode mode)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
// Auto-init
- const TensorShape expected_output_shape = arm_compute::misc::shape_calculator::compute_padded_shape(input->info()->tensor_shape(), padding);
- const TensorInfo expected_output_info = input->info()->clone()->set_tensor_shape(expected_output_shape);
+ const TensorShape expected_output_shape =
+ arm_compute::misc::shape_calculator::compute_padded_shape(input->info()->tensor_shape(), padding);
+ const TensorInfo expected_output_info = input->info()->clone()->set_tensor_shape(expected_output_shape);
auto_init_if_empty(*output->info(), expected_output_info);
// Perform validation step
@@ -200,14 +216,14 @@ void NEPadLayerKernel::configure(ITensor *input, ITensor *output, const PaddingL
_constant_value = constant_value;
_mode = mode;
- if(_mode == PaddingMode::CONSTANT)
+ if (_mode == PaddingMode::CONSTANT)
{
- switch(_input->info()->element_size())
+ switch (_input->info()->element_size())
{
case 1:
- if(_input->info()->num_dimensions() == 3 && // Is 3D
- padding.size() <= 3 && // Has 3D padding
- !_input->info()->has_padding() && !_output->info()->has_padding()) // Input & Output have no padding
+ if (_input->info()->num_dimensions() == 3 && // Is 3D
+ padding.size() <= 3 && // Has 3D padding
+ !_input->info()->has_padding() && !_output->info()->has_padding()) // Input & Output have no padding
{
_func = &NEPadLayerKernel::run_pad_constant_uint8_3Dinput_3Dpad;
}
@@ -240,7 +256,11 @@ void NEPadLayerKernel::configure(ITensor *input, ITensor *output, const PaddingL
ICPPKernel::configure(win);
}
-Status NEPadLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, const PixelValue constant_value, const PaddingMode mode)
+Status NEPadLayerKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const PaddingList &padding,
+ const PixelValue constant_value,
+ const PaddingMode mode)
{
ARM_COMPUTE_UNUSED(constant_value);
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, padding, mode));
@@ -253,9 +273,18 @@ void NEPadLayerKernel::run(const Window &window, const ThreadInfo &info)
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
- if(_func != nullptr)
+ if (_func != nullptr)
{
(this->*_func)(window);
}
}
+
+size_t NEPadLayerKernel::get_mws(const CPUInfo &platform, size_t thread_count) const
+{
+ ARM_COMPUTE_UNUSED(thread_count);
+ ARM_COMPUTE_UNUSED(platform);
+
+ return ICPPKernel::default_mws;
+}
+
} // namespace arm_compute