From 9032ee32da54804806a3f26cbbf5a62b3c764f72 Mon Sep 17 00:00:00 2001 From: Manuel Bottini Date: Wed, 7 Aug 2019 17:04:11 +0100 Subject: MLCE-129: NEPad 30x slower than TensorFlow's implementation Change-Id: I44770e6a3134c70c4bd58f890d06cb43c9bd8bff Signed-off-by: Manuel Bottini Reviewed-on: https://review.mlplatform.org/c/1853 Reviewed-by: Giorgio Arena Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- src/runtime/NEON/functions/NEPadLayer.cpp | 50 +++---------------------------- 1 file changed, 4 insertions(+), 46 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/NEON/functions/NEPadLayer.cpp b/src/runtime/NEON/functions/NEPadLayer.cpp index c608edfdee..cf86240cab 100644 --- a/src/runtime/NEON/functions/NEPadLayer.cpp +++ b/src/runtime/NEON/functions/NEPadLayer.cpp @@ -34,33 +34,6 @@ namespace arm_compute { namespace { -TensorInfo get_expected_output_tensorinfo(const ITensorInfo &input, const PaddingList &paddings) -{ - const TensorShape expected_output_shape = arm_compute::misc::shape_calculator::compute_padded_shape(input.tensor_shape(), paddings); - const TensorInfo expected_output_info = input.clone()->set_tensor_shape(expected_output_shape); - return expected_output_info; -} - -Status validate_arguments(const ITensorInfo &input, ITensorInfo &output, const PaddingList &paddings) -{ - const TensorInfo expected_output_info = get_expected_output_tensorinfo(input, paddings); - auto_init_if_empty(output, expected_output_info); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&output, &expected_output_info); - - return Status{}; -} - -Coordinates get_subtensor_coords(const PaddingList &paddings) -{ - Coordinates coords; - for(unsigned int i = 0; i < paddings.size(); ++i) - { - coords.set(i, paddings[i].first); - } - - return coords; -} - uint32_t last_padding_dimension(const PaddingList &padding) { int last_padding_dim = padding.size() - 1; @@ -76,23 +49,13 @@ uint32_t last_padding_dimension(const PaddingList &padding) } // namespace NEPadLayer::NEPadLayer() - : _copy_kernel(), _mode(), _padding(), _memset_kernel(), _num_dimensions(0), _slice_functions(), _concat_functions(), _slice_results(), _concat_results(), _output_subtensor() + : _copy_kernel(), _pad_kernel(), _mode(), _padding(), _num_dimensions(0), _slice_functions(), _concat_functions(), _slice_results(), _concat_results() { } void NEPadLayer::configure_constant_mode(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value) { - // Auto-init - auto_init_if_empty(*output->info(), get_expected_output_tensorinfo(*input->info(), padding)); - - // Create SubTensor (Can use sub-tensor as the kernels to be executed do not require padding) - _output_subtensor = SubTensor(output, input->info()->tensor_shape(), get_subtensor_coords(padding), true); - - // Set the pages of the output to the specified value - _memset_kernel.configure(output, constant_value); - - // Copy the input to the output - _copy_kernel.configure(input, &_output_subtensor); + _pad_kernel.configure(input, output, padding, constant_value, PaddingMode::CONSTANT); } void NEPadLayer::configure_reflect_symmetric_mode(ITensor *input, ITensor *output) @@ -253,11 +216,7 @@ Status NEPadLayer::validate(const ITensorInfo *input, const ITensorInfo *output, { case PaddingMode::CONSTANT: { - auto output_clone = output->clone(); - SubTensorInfo output_subtensor_info(output_clone.get(), input->tensor_shape(), get_subtensor_coords(padding), true); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input, *output_clone, padding)); - ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(input, &output_subtensor_info)); - break; + return NEPadLayerKernel::validate(input, output, padding, constant_value, mode); } case PaddingMode::REFLECT: case PaddingMode::SYMMETRIC: @@ -293,8 +252,7 @@ void NEPadLayer::run() { case PaddingMode::CONSTANT: { - NEScheduler::get().schedule(&_memset_kernel, Window::DimY); - NEScheduler::get().schedule(&_copy_kernel, Window::DimY); + NEScheduler::get().schedule(&_pad_kernel, Window::DimZ); break; } case PaddingMode::REFLECT: -- cgit v1.2.1