From 702dc0c71f2b2830b63e3b4079ede0ef76377f0a Mon Sep 17 00:00:00 2001
From: Michalis Spyrou <michalis.spyrou@arm.com>
Date: Fri, 19 Mar 2021 15:06:07 +0000
Subject: Remove usage of valid window region CL - NHWC

Resolves: COMPMID-4153

Change-Id: Ib0d60c9acaac8aaf3946c62fc2d740b5ec6cee5c
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5301
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp     |  3 ---
 src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp     |  3 ---
 src/core/gpu/cl/kernels/ClDequantizationKernel.cpp       |  3 ---
 src/core/gpu/cl/kernels/ClDirectConvolutionKernel.cpp    | 11 ++++-------
 src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp    |  3 ---
 src/core/gpu/cl/kernels/ClPermuteKernel.cpp              |  5 -----
 .../gpu/cl/kernels/ClPixelWiseMultiplicationKernel.cpp   | 16 ++++------------
 src/core/gpu/cl/kernels/ClPoolingKernel.cpp              |  7 -------
 src/core/gpu/cl/kernels/ClQuantizationKernel.cpp         |  2 --
 src/core/gpu/cl/kernels/ClReshapeKernel.cpp              |  3 ---
 .../gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp  |  2 --
 .../gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp  |  2 --
 src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp     |  3 ---
 13 files changed, 8 insertions(+), 55 deletions(-)

(limited to 'src/core/gpu/cl')

diff --git a/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp b/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp
index c16ff1f028..26f5113822 100644
--- a/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp
@@ -99,9 +99,6 @@ void ClBatchConcatenateKernel::configure(const CLCompileContext &compile_context
     win.set(3, Window::Dimension(0, src->tensor_shape()[3], 1));
     ICLKernel::configure_internal(win);
 
-    // Set dst valid region
-    dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
-
     // Set config_id for enabling LWS tuning
     _config_id = "concatenate_";
     _config_id += support::cpp11::to_string(3);
diff --git a/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp b/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp
index e8893d76d2..4039570da4 100644
--- a/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp
@@ -98,9 +98,6 @@ void ClDepthConcatenateKernel::configure(const CLCompileContext &compile_context
     win.set(Window::DimZ, Window::Dimension(0, src->tensor_shape().z(), 1));
     ICLKernel::configure_internal(win);
 
-    // Set dst valid region
-    dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
-
     ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
 }
 
diff --git a/src/core/gpu/cl/kernels/ClDequantizationKernel.cpp b/src/core/gpu/cl/kernels/ClDequantizationKernel.cpp
index 267ac9b2b4..612a03437b 100644
--- a/src/core/gpu/cl/kernels/ClDequantizationKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClDequantizationKernel.cpp
@@ -113,9 +113,6 @@ void ClDequantizationKernel::configure(const CLCompileContext &compile_context,
     }
     ICLKernel::configure_internal(win);
 
-    // Set output valid region
-    dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
-
     ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
 }
 
diff --git a/src/core/gpu/cl/kernels/ClDirectConvolutionKernel.cpp b/src/core/gpu/cl/kernels/ClDirectConvolutionKernel.cpp
index 72801fa6c8..c6ca084386 100644
--- a/src/core/gpu/cl/kernels/ClDirectConvolutionKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClDirectConvolutionKernel.cpp
@@ -279,11 +279,8 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *src, ITenso
         const unsigned int num_rows = dst->tensor_shape()[0] > 16 ? 2u : 1U;
 
         // Create window and update padding
-        Window win = calculate_max_window(*dst, Steps(vec_size, num_rows));
-        dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
-
-        Status err = Status{};
-        return std::make_pair(err, win);
+        Window win = calculate_max_window(output_shape, Steps(vec_size, num_rows));
+        return std::make_pair(Status{}, win);
     }
     else if(data_layout == DataLayout::NCHW)
     {
@@ -368,8 +365,8 @@ void ClDirectConvolutionKernel::configure(const CLCompileContext &compile_contex
 
         kernel_name << "direct_convolution_nhwc";
 
-        const unsigned int n0               = win_config.second.x().step();
-        const unsigned int m0               = win_config.second.y().step();
+        const unsigned int n0 = win_config.second.x().step();
+        const unsigned int m0 = win_config.second.y().step();
 
         const unsigned int k0               = adjust_vec_size(8u, src->dimension(channel_idx));
         const unsigned int partial_store_n0 = dst->dimension(channel_idx) % n0;
diff --git a/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp b/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp
index 83e976e10f..4436e98fe3 100644
--- a/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp
@@ -110,9 +110,6 @@ void ClHeightConcatenateKernel::configure(const CLCompileContext &compile_contex
     Window win = calculate_max_window(*src, Steps(num_elems_processed_per_iteration));
     ICLKernel::configure_internal(win.collapse(win, Window::DimZ));
 
-    // Set dst valid region
-    dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
-
     ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
 }
 
diff --git a/src/core/gpu/cl/kernels/ClPermuteKernel.cpp b/src/core/gpu/cl/kernels/ClPermuteKernel.cpp
index 992c2a89d3..04e649b911 100644
--- a/src/core/gpu/cl/kernels/ClPermuteKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClPermuteKernel.cpp
@@ -109,11 +109,6 @@ void ClPermuteKernel::configure(const CLCompileContext &compile_context, const I
     // Configure  kernel window
     Window win = calculate_max_window(*src, Steps());
 
-    // The CLPermute doesn't need padding so update_window_and_padding() can be skipped
-    Coordinates coord;
-    coord.set_num_dimensions(dst->num_dimensions());
-    dst->set_valid_region(ValidRegion(coord, dst->tensor_shape()));
-
     ICLKernel::configure_internal(win);
     ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
 }
diff --git a/src/core/gpu/cl/kernels/ClPixelWiseMultiplicationKernel.cpp b/src/core/gpu/cl/kernels/ClPixelWiseMultiplicationKernel.cpp
index f5303379be..56997dc8ad 100644
--- a/src/core/gpu/cl/kernels/ClPixelWiseMultiplicationKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClPixelWiseMultiplicationKernel.cpp
@@ -95,9 +95,7 @@ Status validate_arguments(const ITensorInfo *src1, const ITensorInfo *src2, cons
 
 std::pair<Status, Window> validate_and_configure_window(ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst)
 {
-    const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*src1, *src2);
-    const TensorShape &out_shape    = broadcast_pair.first;
-    const ValidRegion &valid_region = broadcast_pair.second;
+    const TensorShape &out_shape = TensorShape::broadcast_shape(src1->tensor_shape(), src2->tensor_shape());
 
     // Auto initialize dst if not initialized
     {
@@ -125,7 +123,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *src1, ITens
         }
     }
 
-    Window win        = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration));
+    Window win        = calculate_max_window(out_shape, Steps(num_elems_processed_per_iteration));
     Window win_input1 = win.broadcast_if_dimension_le_one(*src1);
     Window win_input2 = win.broadcast_if_dimension_le_one(*src2);
 
@@ -137,8 +135,6 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *src1, ITens
                           || update_window_and_padding(win_input2, input2_access)
                           || update_window_and_padding(win, output_access);
 
-    output_access.set_valid_region(win, valid_region);
-
     Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
     return std::make_pair(err, win);
 }
@@ -349,15 +345,13 @@ Status validate_arguments_complex(const ITensorInfo *src1, const ITensorInfo *sr
 
 std::pair<Status, Window> validate_and_configure_window_complex(ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst)
 {
-    const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*src1, *src2);
-    const TensorShape &out_shape    = broadcast_pair.first;
-    const ValidRegion &valid_region = broadcast_pair.second;
+    const TensorShape &out_shape = TensorShape::broadcast_shape(src1->tensor_shape(), src2->tensor_shape());
 
     // Auto initialize dst if not initialized
     const TensorInfo out_info(out_shape, src1->num_channels(), src1->data_type());
     auto_init_if_empty(*dst, out_info);
 
-    Window win        = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration_complex));
+    Window win        = calculate_max_window(out_shape, Steps(num_elems_processed_per_iteration_complex));
     Window win_input1 = win.broadcast_if_dimension_le_one(*src1);
     Window win_input2 = win.broadcast_if_dimension_le_one(*src2);
 
@@ -369,8 +363,6 @@ std::pair<Status, Window> validate_and_configure_window_complex(ITensorInfo *src
                           || update_window_and_padding(win_input2, input2_access)
                           || update_window_and_padding(win, output_access);
 
-    output_access.set_valid_region(win, valid_region);
-
     Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
     return std::make_pair(err, win);
 }
diff --git a/src/core/gpu/cl/kernels/ClPoolingKernel.cpp b/src/core/gpu/cl/kernels/ClPoolingKernel.cpp
index 567fec2a37..78243402bf 100644
--- a/src/core/gpu/cl/kernels/ClPoolingKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClPoolingKernel.cpp
@@ -177,13 +177,6 @@ std::tuple<Status, Window, ClPoolingConfig> validate_and_configure_window(ITenso
             border_size                       = BorderSize();
             num_elems_processed_per_iteration = adjust_vec_size(4, dst->dimension(0));
             win                               = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration));
-
-            if(indices != nullptr)
-            {
-                indices->set_valid_region(ValidRegion(Coordinates(), indices->tensor_shape()));
-            }
-
-            dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
             break;
         }
         default:
diff --git a/src/core/gpu/cl/kernels/ClQuantizationKernel.cpp b/src/core/gpu/cl/kernels/ClQuantizationKernel.cpp
index ea56289157..ced0d14391 100644
--- a/src/core/gpu/cl/kernels/ClQuantizationKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClQuantizationKernel.cpp
@@ -144,8 +144,6 @@ void ClQuantizationKernel::configure(const CLCompileContext &compile_context, IT
     }
     ICLKernel::configure_internal(win);
 
-    dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
-
     ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
 }
 
diff --git a/src/core/gpu/cl/kernels/ClReshapeKernel.cpp b/src/core/gpu/cl/kernels/ClReshapeKernel.cpp
index 4da3fa0e03..cbf6d0d51a 100644
--- a/src/core/gpu/cl/kernels/ClReshapeKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClReshapeKernel.cpp
@@ -93,9 +93,6 @@ void ClReshapeKernel::configure(const CLCompileContext &compile_context, const I
 
     // Configure kernel window
     Window win = calculate_max_window(*src);
-
-    // Set the dst valid region
-    dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
     ICLKernel::configure_internal(win);
 
     ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp
index 6a2ab3b50f..9f970719ed 100644
--- a/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp
@@ -113,8 +113,6 @@ void ClWidthConcatenate2TensorsKernel::configure(const CLCompileContext &compile
     Window win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration));
     ICLKernel::configure_internal(win.collapse(win, Window::DimZ));
 
-    // Set dst valid region
-    dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
     ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
 
     // Set config_id for enabling LWS tuning
diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp b/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp
index 4b49652a73..281d190381 100644
--- a/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp
@@ -131,8 +131,6 @@ void ClWidthConcatenate4TensorsKernel::configure(const CLCompileContext &compile
     Window win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration));
     ICLKernel::configure_internal(win.collapse(win, Window::DimZ));
 
-    // Set dst valid region
-    dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
     ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
 
     // Set config_id for enabling LWS tuning
diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp b/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp
index 8cbbc27444..d188a5226b 100644
--- a/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp
@@ -105,9 +105,6 @@ void ClWidthConcatenateKernel::configure(const CLCompileContext &compile_context
     Window win = calculate_max_window(*src, Steps(num_elems_processed_per_iteration));
     ICLKernel::configure_internal(win.collapse(win, Window::DimZ));
 
-    // Set dst valid region
-    dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
-
     ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
 }
 
-- 
cgit v1.2.1