COMPMID-1529 Optimize PoolingLayer NHWC on NEON

Change-Id: Ib85e5cc203d6c71f83c6021c776ccdc0eef82acf Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145165 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
author: Giorgio Arena <giorgio.arena@arm.com> 2018-08-22 12:15:25 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:54:54 +0000
commit: 9fb6c7e5c9bd60727e119fc370fb4d5b5d605fd1 (patch)
tree: 9bf817f8c579a4632a609cbae4e88233c442fd5d /src/core/NEON/kernels/NEPoolingLayerKernel.cpp
parent: fb62908bd8148bd347bd204e881156f8ebf7835d (diff)
download: ComputeLibrary-9fb6c7e5c9bd60727e119fc370fb4d5b5d605fd1.tar.gz
1 files changed, 6 insertions, 15 deletions
diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
index ad4b8f76d5..a8e3be28fe 100644
--- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
@@ -35,6 +35,7 @@
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/core/Window.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 
 #include "support/ToolchainSupport.h"
 
@@ -47,18 +48,10 @@
 #include <tuple>
 
 using namespace arm_compute;
+using namespace misc::shape_calculator;
 
 namespace
 {
-void auto_init(const ITensorInfo *input, ITensorInfo *output, unsigned int pooled_w, unsigned int pooled_h)
-{
-    TensorShape output_shape{ input->tensor_shape() };
-    output_shape.set(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH), pooled_w);
-    output_shape.set(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT), pooled_h);
-
-    auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape));
-}
-
 template <bool exclude_padding, DataLayout data_layout>
 inline float calculate_avg_scale(const Coordinates &id, const int pool_size_x, const int pool_size_y, const int upper_bound_w, const int upper_bound_h,
                                  const int pad_x, const int pad_y, const int stride_x, const int stride_y)
@@ -166,7 +159,9 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
                                                         BorderSize &border_size,
                                                         unsigned int pooled_w, unsigned int pooled_h, int pool_size_x, int pool_size_y)
 {
-    // Get data layout
+    // Output auto inizialitation if not yet initialized
+    auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_pool_shape(*input, pool_info)));
+
     DataLayout          data_layout                  = input->data_layout();
     unsigned int        num_elems_read_per_iteration = 0;
     unsigned int        num_elems_horizontal_window  = 0;
@@ -190,7 +185,6 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
                                                      pool_size_x,
                                                      pool_size_y,
                                                      pad_stride_info);
-    auto_init(input, output, pooled_w, pooled_h);
 
     //If it's not squared and optimized will be executed the MxN
     num_elems_read_per_iteration      = 1;
@@ -248,7 +242,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
             case DataType::F32:
                 if(is_nhwc)
                 {
-                    num_elems_processed_per_iteration = 4;
+                    num_elems_processed_per_iteration = std::max(4, ceil_to_multiple<int>(input->dimension(0), 2));
                     break;
                 }
                 switch(pool_size_x)
@@ -371,9 +365,6 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons
                                                      pool_size_y,
                                                      pad_stride_info);
 
-    // Output auto initialization if not yet initialized
-    auto_init(input->info(), output->info(), pooled_w, pooled_h);
-
     // Perform validation step
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info, pooled_w, pooled_h));
author	Giorgio Arena <giorgio.arena@arm.com>	2018-08-22 12:15:25 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:54:54 +0000
commit	9fb6c7e5c9bd60727e119fc370fb4d5b5d605fd1 (patch)
tree	9bf817f8c579a4632a609cbae4e88233c442fd5d /src/core/NEON/kernels/NEPoolingLayerKernel.cpp
parent	fb62908bd8148bd347bd204e881156f8ebf7835d (diff)
download	ComputeLibrary-9fb6c7e5c9bd60727e119fc370fb4d5b5d605fd1.tar.gz