COMPMID-2681: Use NE/CL/Pad in NE/CL/GenerateProposalsLayer

Change-Id: Idf1c64224b0ddd4bdac1120cac4437eb2578bb2b Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/1963 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Marquez <pablo.tello@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
author: Michele Di Giorgio <michele.digiorgio@arm.com> 2019-09-20 14:01:48 +0100
committer: Michele Di Giorgio <michele.digiorgio@arm.com> 2019-09-20 17:04:24 +0000
commit: 4c268b97dff93eae3f71f2a6971f0d3f748b7b38 (patch)
tree: 4f3023105369f21a4df540db28f9b2c191a51534 /src
parent: c9564cb3850b6675cef663d7cc0722567b55cc25 (diff)
download: ComputeLibrary-4c268b97dff93eae3f71f2a6971f0d3f748b7b38.tar.gz
3 files changed, 18 insertions, 25 deletions
diff --git a/src/core/CL/kernels/CLPadLayerKernel.cpp b/src/core/CL/kernels/CLPadLayerKernel.cpp
index 52b65c39b1..9dfd380f7c 100644
--- a/src/core/CL/kernels/CLPadLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPadLayerKernel.cpp
@@ -24,6 +24,7 @@
 #include "arm_compute/core/CL/kernels/CLPadLayerKernel.h"
 
 #include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 
 namespace arm_compute
 {
@@ -41,6 +42,10 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
 std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
 {
     ARM_COMPUTE_UNUSED(constant_value, mode);
+    // Output auto initialization if not yet initialized
+    const TensorShape expected_output_shape = arm_compute::misc::shape_calculator::compute_padded_shape(input->tensor_shape(), padding);
+    auto_init_if_empty(*output, input->clone()->set_tensor_shape(expected_output_shape));
+
     const unsigned int num_elems_processed_per_iteration = std::min(16U, 32U / static_cast<unsigned int>(element_size_from_data_type(input->data_type())));
 
     // Configure kernel window
@@ -67,10 +72,8 @@ CLPadLayerKernel::CLPadLayerKernel()
 
 void CLPadLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
 {
+    // Perform validation step
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
-    // Output tensor auto initialisation if not yet initialized
-    auto_init_if_empty(*output->info(), *input->info()->clone());
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), padding, constant_value, mode));
 
     _input  = input;
@@ -134,9 +137,9 @@ void CLPadLayerKernel::run(const Window &window, cl::CommandQueue &queue)
     win_in.adjust(Window::DimX, _input_start_x, true);
     win_in.adjust(Window::DimY, _input_start_y, true);
 
-    Window slice_out = window.first_slice_window_3D();
-    Window slice_in  = win_in.first_slice_window_3D();
-    unsigned int batch = 0;
+    Window       slice_out = window.first_slice_window_3D();
+    Window       slice_in  = win_in.first_slice_window_3D();
+    unsigned int batch     = 0;
     do
     {
         unsigned int idx = 0;
diff --git a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
index d712a23325..94aa5e7198 100644
--- a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
+++ b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
@@ -37,8 +37,7 @@ CLGenerateProposalsLayer::CLGenerateProposalsLayer(std::shared_ptr<IMemoryManage
       _flatten_scores_kernel(),
       _compute_anchors_kernel(),
       _bounding_box_kernel(),
-      _memset_kernel(),
-      _padded_copy_kernel(),
+      _pad_kernel(),
       _cpp_nms_kernel(),
       _is_nhwc(false),
       _deltas_permuted(),
@@ -149,10 +148,8 @@ void CLGenerateProposalsLayer::configure(const ICLTensor *scores, const ICLTenso
     _scores_flattened.allocator()->allocate();
 
     // Add the first column that represents the batch id. This will be all zeros, as we don't support multiple images
-    _padded_copy_kernel.configure(&_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } });
+    _pad_kernel.configure(&_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } });
     _proposals_4_roi_values.allocator()->allocate();
-
-    _memset_kernel.configure(proposals, PixelValue());
 }
 
 Status CLGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITensorInfo *deltas, const ITensorInfo *anchors, const ITensorInfo *proposals, const ITensorInfo *scores_out,
@@ -197,8 +194,7 @@ Status CLGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens
     ARM_COMPUTE_RETURN_ON_ERROR(CLBoundingBoxTransformKernel::validate(&all_anchors_info, &proposals_4_roi_values, &deltas_flattened_info, BoundingBoxTransformInfo(info.im_width(), info.im_height(),
                                                                        1.f)));
 
-    ARM_COMPUTE_RETURN_ON_ERROR(CLCopyKernel::validate(&proposals_4_roi_values, proposals, PaddingList{ { 0, 1 } }));
-    ARM_COMPUTE_RETURN_ON_ERROR(CLMemsetKernel::validate(proposals, PixelValue()));
+    ARM_COMPUTE_RETURN_ON_ERROR(CLPadLayerKernel::validate(&proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } }));
 
     if(num_valid_proposals->total_size() > 0)
     {
@@ -275,7 +271,6 @@ void CLGenerateProposalsLayer::run()
     // Non maxima suppression
     run_cpp_nms_kernel();
     // Add dummy batch indexes
-    CLScheduler::get().enqueue(_memset_kernel, true);
-    CLScheduler::get().enqueue(_padded_copy_kernel, true);
+    CLScheduler::get().enqueue(_pad_kernel, true);
 }
 } // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp
index 6e5da43a94..b2a6ca8c35 100644
--- a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp
+++ b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp
@@ -37,8 +37,7 @@ NEGenerateProposalsLayer::NEGenerateProposalsLayer(std::shared_ptr<IMemoryManage
       _flatten_scores_kernel(),
       _compute_anchors_kernel(),
       _bounding_box_kernel(),
-      _memset_kernel(),
-      _padded_copy_kernel(),
+      _pad_kernel(),
       _cpp_nms_kernel(),
       _is_nhwc(false),
       _deltas_permuted(),
@@ -130,7 +129,7 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d
     auto_init_if_empty(*num_valid_proposals->info(), TensorShape(scores_nms_size), 1, DataType::U32);
 
     // Initialize temporaries (unused) outputs
-    _classes_nms_unused.allocator()->init(TensorInfo(TensorShape(8, 1), 1, data_type));
+    _classes_nms_unused.allocator()->init(TensorInfo(TensorShape(scores_nms_size), 1, data_type));
     _keeps_nms_unused.allocator()->init(*scores_out->info());
 
     // Save the output (to map and unmap them at run)
@@ -157,10 +156,8 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d
     _scores_flattened.allocator()->allocate();
 
     // Add the first column that represents the batch id. This will be all zeros, as we don't support multiple images
-    _padded_copy_kernel.configure(&_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } });
+    _pad_kernel.configure(&_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } });
     _proposals_4_roi_values.allocator()->allocate();
-
-    _memset_kernel.configure(proposals, PixelValue());
 }
 
 Status NEGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITensorInfo *deltas, const ITensorInfo *anchors, const ITensorInfo *proposals, const ITensorInfo *scores_out,
@@ -205,7 +202,7 @@ Status NEGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens
     ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransformKernel::validate(&all_anchors_info, &proposals_4_roi_values, &deltas_flattened_info, BoundingBoxTransformInfo(info.im_width(), info.im_height(),
                                                                        1.f)));
 
-    ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(&proposals_4_roi_values, proposals, PaddingList{ { 0, 1 } }));
+    ARM_COMPUTE_RETURN_ON_ERROR(NEPadLayerKernel::validate(&proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } }));
 
     if(num_valid_proposals->total_size() > 0)
     {
@@ -257,8 +254,6 @@ void NEGenerateProposalsLayer::run()
     CPPScheduler::get().schedule(&_cpp_nms_kernel, Window::DimX);
 
     // Add dummy batch indexes
-
-    NEScheduler::get().schedule(&_memset_kernel, Window::DimY);
-    NEScheduler::get().schedule(&_padded_copy_kernel, Window::DimY);
+    NEScheduler::get().schedule(&_pad_kernel, Window::DimY);
 }
 } // namespace arm_compute
author	Michele Di Giorgio <michele.digiorgio@arm.com>	2019-09-20 14:01:48 +0100
committer	Michele Di Giorgio <michele.digiorgio@arm.com>	2019-09-20 17:04:24 +0000
commit	4c268b97dff93eae3f71f2a6971f0d3f748b7b38 (patch)
tree	4f3023105369f21a4df540db28f9b2c191a51534 /src
parent	c9564cb3850b6675cef663d7cc0722567b55cc25 (diff)
download	ComputeLibrary-4c268b97dff93eae3f71f2a6971f0d3f748b7b38.tar.gz