From 4c268b97dff93eae3f71f2a6971f0d3f748b7b38 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Fri, 20 Sep 2019 14:01:48 +0100 Subject: COMPMID-2681: Use NE/CL/Pad in NE/CL/GenerateProposalsLayer Change-Id: Idf1c64224b0ddd4bdac1120cac4437eb2578bb2b Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/1963 Comments-Addressed: Arm Jenkins Reviewed-by: Pablo Marquez Tested-by: Arm Jenkins --- src/core/CL/kernels/CLPadLayerKernel.cpp | 15 +++++++++------ src/runtime/CL/functions/CLGenerateProposalsLayer.cpp | 13 ++++--------- src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp | 15 +++++---------- 3 files changed, 18 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/src/core/CL/kernels/CLPadLayerKernel.cpp b/src/core/CL/kernels/CLPadLayerKernel.cpp index 52b65c39b1..9dfd380f7c 100644 --- a/src/core/CL/kernels/CLPadLayerKernel.cpp +++ b/src/core/CL/kernels/CLPadLayerKernel.cpp @@ -24,6 +24,7 @@ #include "arm_compute/core/CL/kernels/CLPadLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" namespace arm_compute { @@ -41,6 +42,10 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode) { ARM_COMPUTE_UNUSED(constant_value, mode); + // Output auto initialization if not yet initialized + const TensorShape expected_output_shape = arm_compute::misc::shape_calculator::compute_padded_shape(input->tensor_shape(), padding); + auto_init_if_empty(*output, input->clone()->set_tensor_shape(expected_output_shape)); + const unsigned int num_elems_processed_per_iteration = std::min(16U, 32U / static_cast(element_size_from_data_type(input->data_type()))); // Configure kernel window @@ -67,10 +72,8 @@ CLPadLayerKernel::CLPadLayerKernel() void CLPadLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode) { + // Perform validation step ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - // Output tensor auto initialisation if not yet initialized - auto_init_if_empty(*output->info(), *input->info()->clone()); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), padding, constant_value, mode)); _input = input; @@ -134,9 +137,9 @@ void CLPadLayerKernel::run(const Window &window, cl::CommandQueue &queue) win_in.adjust(Window::DimX, _input_start_x, true); win_in.adjust(Window::DimY, _input_start_y, true); - Window slice_out = window.first_slice_window_3D(); - Window slice_in = win_in.first_slice_window_3D(); - unsigned int batch = 0; + Window slice_out = window.first_slice_window_3D(); + Window slice_in = win_in.first_slice_window_3D(); + unsigned int batch = 0; do { unsigned int idx = 0; diff --git a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp index d712a23325..94aa5e7198 100644 --- a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp +++ b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp @@ -37,8 +37,7 @@ CLGenerateProposalsLayer::CLGenerateProposalsLayer(std::shared_ptrallocate(); // Add the first column that represents the batch id. This will be all zeros, as we don't support multiple images - _padded_copy_kernel.configure(&_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } }); + _pad_kernel.configure(&_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } }); _proposals_4_roi_values.allocator()->allocate(); - - _memset_kernel.configure(proposals, PixelValue()); } Status CLGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITensorInfo *deltas, const ITensorInfo *anchors, const ITensorInfo *proposals, const ITensorInfo *scores_out, @@ -197,8 +194,7 @@ Status CLGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens ARM_COMPUTE_RETURN_ON_ERROR(CLBoundingBoxTransformKernel::validate(&all_anchors_info, &proposals_4_roi_values, &deltas_flattened_info, BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f))); - ARM_COMPUTE_RETURN_ON_ERROR(CLCopyKernel::validate(&proposals_4_roi_values, proposals, PaddingList{ { 0, 1 } })); - ARM_COMPUTE_RETURN_ON_ERROR(CLMemsetKernel::validate(proposals, PixelValue())); + ARM_COMPUTE_RETURN_ON_ERROR(CLPadLayerKernel::validate(&proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } })); if(num_valid_proposals->total_size() > 0) { @@ -275,7 +271,6 @@ void CLGenerateProposalsLayer::run() // Non maxima suppression run_cpp_nms_kernel(); // Add dummy batch indexes - CLScheduler::get().enqueue(_memset_kernel, true); - CLScheduler::get().enqueue(_padded_copy_kernel, true); + CLScheduler::get().enqueue(_pad_kernel, true); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp index 6e5da43a94..b2a6ca8c35 100644 --- a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp +++ b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp @@ -37,8 +37,7 @@ NEGenerateProposalsLayer::NEGenerateProposalsLayer(std::shared_ptrinfo(), TensorShape(scores_nms_size), 1, DataType::U32); // Initialize temporaries (unused) outputs - _classes_nms_unused.allocator()->init(TensorInfo(TensorShape(8, 1), 1, data_type)); + _classes_nms_unused.allocator()->init(TensorInfo(TensorShape(scores_nms_size), 1, data_type)); _keeps_nms_unused.allocator()->init(*scores_out->info()); // Save the output (to map and unmap them at run) @@ -157,10 +156,8 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d _scores_flattened.allocator()->allocate(); // Add the first column that represents the batch id. This will be all zeros, as we don't support multiple images - _padded_copy_kernel.configure(&_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } }); + _pad_kernel.configure(&_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } }); _proposals_4_roi_values.allocator()->allocate(); - - _memset_kernel.configure(proposals, PixelValue()); } Status NEGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITensorInfo *deltas, const ITensorInfo *anchors, const ITensorInfo *proposals, const ITensorInfo *scores_out, @@ -205,7 +202,7 @@ Status NEGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransformKernel::validate(&all_anchors_info, &proposals_4_roi_values, &deltas_flattened_info, BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f))); - ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(&proposals_4_roi_values, proposals, PaddingList{ { 0, 1 } })); + ARM_COMPUTE_RETURN_ON_ERROR(NEPadLayerKernel::validate(&proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } })); if(num_valid_proposals->total_size() > 0) { @@ -257,8 +254,6 @@ void NEGenerateProposalsLayer::run() CPPScheduler::get().schedule(&_cpp_nms_kernel, Window::DimX); // Add dummy batch indexes - - NEScheduler::get().schedule(&_memset_kernel, Window::DimY); - NEScheduler::get().schedule(&_padded_copy_kernel, Window::DimY); + NEScheduler::get().schedule(&_pad_kernel, Window::DimY); } } // namespace arm_compute -- cgit v1.2.1