aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
diff options
context:
space:
mode:
authorAbe Mbise <abe.mbise@arm.com>2018-05-31 16:48:41 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commit7784c837afd5844fb6dc4d166ff253d983abfd2d (patch)
tree3bc770240de148d565aa828e8f3471c354ac3837 /src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
parentb03f7c5c780fe2df23eb8c5c1b4b1d65bd7f0339 (diff)
downloadComputeLibrary-7784c837afd5844fb6dc4d166ff253d983abfd2d.tar.gz
COMPMID-1167: Validation for NEDepthwiseConvolutionLayer
Change-Id: I9689e1a0627dc015dd2ce98417e4c97bb55581bb Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/131327 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp204
1 files changed, 128 insertions, 76 deletions
diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
index 09728e2a8d..62dabc8d32 100644
--- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
@@ -144,6 +144,112 @@ inline void convolve_3x3(const Window &window, unsigned int num_elems_written_pe
ARM_COMPUTE_ERROR("Not implemented");
}
}
+
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, bool is_optimized)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
+
+ if(is_optimized)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(1) != 3 || weights->dimension(2) != 3);
+ }
+ else
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != 3 || weights->dimension(1) != 3);
+ ARM_COMPUTE_RETURN_ERROR_ON(conv_info.stride().first < 1 || conv_info.stride().first > 3);
+ }
+
+ if(output->total_size() != 0)
+ {
+ const TensorShape output_shape = compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
+
+ ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(input->data_type()) && (output->data_type() != DataType::S32));
+ ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_float(input->data_type()) && (output->data_type() != DataType::F32));
+ }
+
+ return Status{};
+}
+
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *weights, ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, bool is_optimized,
+ IDepthwiseConvolution *convolver = nullptr)
+{
+ Window win;
+ bool window_changed = false;
+
+ if(is_optimized)
+ {
+ if(convolver != nullptr)
+ {
+ auto win_last = convolver->get_window();
+ win.set(Window::DimX, Window::Dimension(0, win_last, 1));
+
+ // Auto-configure output
+ bool same_padding = conv_info.has_padding();
+ TensorShape output_shape{ input->tensor_shape() };
+
+ output_shape.set(1, convolver->output_size(output_shape.y(), same_padding)); // Set width
+ output_shape.set(2, convolver->output_size(output_shape.z(), same_padding)); // Set height
+
+ // Output auto inizialitation if not yet initialized
+ auto_init_if_empty(*output, input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape));
+
+ // Configure window (optimised)
+ // Set padding in channels
+ const int num_channels = weights->dimension(0);
+ if((num_channels >= 128) && (num_channels % 16 == 0))
+ {
+ input->extend_padding(PaddingSize(0, 4, 0, 0));
+ weights->extend_padding(PaddingSize(0, 4, 0, 0));
+ output->extend_padding(PaddingSize(0, 4, 0, 0));
+ }
+ }
+ }
+ else
+ {
+ // Get convolved dimensions
+ const TensorShape output_shape = compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier);
+ const DataType output_dt = (input->data_type() == DataType::QASYMM8) ? DataType::S32 : input->data_type();
+
+ // Output auto inizialitation if not yet initialized
+ auto_init_if_empty(*output, input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_data_type(output_dt));
+
+ // Configure kernel window (generic)
+ const unsigned int conv_stride_x = conv_info.stride().first;
+ const unsigned int conv_stride_y = conv_info.stride().second;
+ const unsigned int conv_pad_top = conv_info.pad_top();
+ const unsigned int conv_pad_left = conv_info.pad_left();
+
+ unsigned int num_elems_written_per_iteration = 16 >> conv_stride_x;
+ unsigned int num_elems_read_per_iteration = 0;
+
+ switch(input->data_type())
+ {
+ case DataType::QASYMM8:
+ num_elems_read_per_iteration = 16;
+ break;
+ case DataType::F32:
+ num_elems_read_per_iteration = 12;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Data type not supported.");
+ }
+
+ // Configure kernel window
+ win = calculate_max_window(*output, Steps(num_elems_written_per_iteration));
+
+ AccessWindowRectangle input_access(input, -conv_pad_left, -conv_pad_top, num_elems_read_per_iteration, 3, conv_stride_x, conv_stride_y);
+ AccessWindowStatic weights_access(weights, 0, 0, 3, 3);
+ AccessWindowHorizontal output_access(output, 0, num_elems_written_per_iteration);
+
+ window_changed = update_window_and_padding(win, input_access, weights_access, output_access);
+ output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
+ }
+
+ Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+ return std::make_pair(err, win);
+}
} // namespace
NEDepthwiseConvolutionLayer3x3Kernel::NEDepthwiseConvolutionLayer3x3Kernel()
@@ -159,8 +265,7 @@ BorderSize NEDepthwiseConvolutionLayer3x3Kernel::border_size() const
void NEDepthwiseConvolutionLayer3x3Kernel::configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
DataLayout data_layout)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
_input = input;
_output = output;
@@ -177,6 +282,17 @@ void NEDepthwiseConvolutionLayer3x3Kernel::configure(const ITensor *input, const
(_run_optimized) ? configure_optimized() : configure_generic();
}
+Status NEDepthwiseConvolutionLayer3x3Kernel::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
+
+ bool is_optimized = NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(input->tensor_shape(), conv_info, input->data_type(), depth_multiplier, input->data_layout());
+
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, output, conv_info, depth_multiplier, is_optimized));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), weights->clone().get(), output->clone().get(), conv_info, depth_multiplier, is_optimized).first);
+ return Status{};
+}
+
void NEDepthwiseConvolutionLayer3x3Kernel::run(const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
@@ -227,90 +343,26 @@ void NEDepthwiseConvolutionLayer3x3Kernel::generate_convolver()
void NEDepthwiseConvolutionLayer3x3Kernel::configure_generic()
{
- ARM_COMPUTE_ERROR_ON(_weights->info()->dimension(0) != 3 || _weights->info()->dimension(1) != 3);
-
- // Get convolved dimensions
- const TensorShape output_shape = compute_depthwise_convolution_shape(*_input->info(), *_weights->info(), _conv_info, _depth_multiplier);
- const DataType output_dt = (_input->info()->data_type() == DataType::QASYMM8) ? DataType::S32 : _input->info()->data_type();
-
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*_output->info(),
- _input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_data_type(output_dt));
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(_output->info()->tensor_shape(), output_shape);
-
- const unsigned int conv_stride_x = _conv_info.stride().first;
- const unsigned int conv_stride_y = _conv_info.stride().second;
- const unsigned int conv_pad_top = _conv_info.pad_top();
- const unsigned int conv_pad_right = _conv_info.pad_right();
- const unsigned int conv_pad_bottom = _conv_info.pad_bottom();
- const unsigned int conv_pad_left = _conv_info.pad_left();
-
- ARM_COMPUTE_ERROR_ON(conv_stride_x < 1 || conv_stride_x > 3);
-
- unsigned int num_elems_read_per_iteration = 0;
- switch(_input->info()->data_type())
- {
- case DataType::QASYMM8:
- num_elems_read_per_iteration = 16;
- _num_elems_written_per_iteration = 16 >> conv_stride_x;
- break;
- case DataType::F32:
- num_elems_read_per_iteration = 12;
- _num_elems_written_per_iteration = 16 >> conv_stride_x;
- break;
- default:
- ARM_COMPUTE_ERROR("Data type not supported.");
- }
- _border_size = BorderSize(conv_pad_top, conv_pad_right, conv_pad_bottom, conv_pad_left);
-
- // Configure kernel window
- Window win = calculate_max_window(*_output->info(), Steps(_num_elems_written_per_iteration));
-
- AccessWindowRectangle input_access(_input->info(), -conv_pad_left, -conv_pad_top,
- num_elems_read_per_iteration, 3,
- conv_stride_x, conv_stride_y);
- AccessWindowStatic weights_access(_weights->info(), 0, 0, 3, 3);
- AccessWindowHorizontal output_access(_output->info(), 0, _num_elems_written_per_iteration);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(_input->info(), _weights->info(), _output->info(), _conv_info, _depth_multiplier, _run_optimized));
- update_window_and_padding(win, input_access, weights_access, output_access);
- output_access.set_valid_region(win, ValidRegion(Coordinates(), _output->info()->tensor_shape()));
+ _num_elems_written_per_iteration = 16 >> _conv_info.stride().first;
+ _border_size = BorderSize(_conv_info.pad_top(), _conv_info.pad_right(), _conv_info.pad_bottom(), _conv_info.pad_left());
- INEKernel::configure(win);
+ auto win_config = validate_and_configure_window(_input->info(), _weights->info(), _output->info(), _conv_info, _depth_multiplier, false);
+ ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+ INEKernel::configure(win_config.second);
}
void NEDepthwiseConvolutionLayer3x3Kernel::configure_optimized()
{
- ARM_COMPUTE_ERROR_ON(_weights->info()->dimension(1) != 3 || _weights->info()->dimension(2) != 3);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(_input->info(), _weights->info(), _output->info(), _conv_info, _depth_multiplier, _run_optimized));
_border_size = BorderSize(0, 0);
_convolver = create_convolver_object(_conv_info, _weights, _input, _output);
- // Auto-configure output
- bool same_padding = _conv_info.has_padding();
- TensorShape output_shape{ _input->info()->tensor_shape() };
-
- output_shape.set(1, _convolver->output_size(output_shape.y(), same_padding)); // Set width
- output_shape.set(2, _convolver->output_size(output_shape.z(), same_padding)); // Set height
-
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*_output->info(),
- _input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape));
-
- // Set padding in channels
- const int num_channels = _weights->info()->dimension(0);
- if((num_channels >= 128) && (num_channels % 16 == 0))
- {
- _input->info()->extend_padding(PaddingSize(0, 4, 0, 0));
- _weights->info()->extend_padding(PaddingSize(0, 4, 0, 0));
- _output->info()->extend_padding(PaddingSize(0, 4, 0, 0));
- }
-
- // Configure window
- Window win;
- auto win_last = _convolver->get_window();
- win.set(Window::DimX, Window::Dimension(0, win_last, 1));
- INEKernel::configure(win);
+ auto win_config = validate_and_configure_window(_input->info(), _weights->info(), _output->info(), _conv_info, _depth_multiplier, true, _convolver.get());
+ ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+ INEKernel::configure(win_config.second);
}
void NEDepthwiseConvolutionLayer3x3Kernel::run_generic(const Window &window, const ThreadInfo &info)