From 702dc0c71f2b2830b63e3b4079ede0ef76377f0a Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Fri, 19 Mar 2021 15:06:07 +0000 Subject: Remove usage of valid window region CL - NHWC Resolves: COMPMID-4153 Change-Id: Ib0d60c9acaac8aaf3946c62fc2d740b5ec6cee5c Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5301 Comments-Addressed: Arm Jenkins Reviewed-by: Michele Di Giorgio Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins --- src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp | 3 --- src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp | 3 --- src/core/gpu/cl/kernels/ClDequantizationKernel.cpp | 3 --- src/core/gpu/cl/kernels/ClDirectConvolutionKernel.cpp | 11 ++++------- src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp | 3 --- src/core/gpu/cl/kernels/ClPermuteKernel.cpp | 5 ----- .../gpu/cl/kernels/ClPixelWiseMultiplicationKernel.cpp | 16 ++++------------ src/core/gpu/cl/kernels/ClPoolingKernel.cpp | 7 ------- src/core/gpu/cl/kernels/ClQuantizationKernel.cpp | 2 -- src/core/gpu/cl/kernels/ClReshapeKernel.cpp | 3 --- .../gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp | 2 -- .../gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp | 2 -- src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp | 3 --- 13 files changed, 8 insertions(+), 55 deletions(-) (limited to 'src/core/gpu/cl') diff --git a/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp b/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp index c16ff1f028..26f5113822 100644 --- a/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp +++ b/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp @@ -99,9 +99,6 @@ void ClBatchConcatenateKernel::configure(const CLCompileContext &compile_context win.set(3, Window::Dimension(0, src->tensor_shape()[3], 1)); ICLKernel::configure_internal(win); - // Set dst valid region - dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); - // Set config_id for enabling LWS tuning _config_id = "concatenate_"; _config_id += support::cpp11::to_string(3); diff --git a/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp b/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp index e8893d76d2..4039570da4 100644 --- a/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp +++ b/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp @@ -98,9 +98,6 @@ void ClDepthConcatenateKernel::configure(const CLCompileContext &compile_context win.set(Window::DimZ, Window::Dimension(0, src->tensor_shape().z(), 1)); ICLKernel::configure_internal(win); - // Set dst valid region - dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); - ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } diff --git a/src/core/gpu/cl/kernels/ClDequantizationKernel.cpp b/src/core/gpu/cl/kernels/ClDequantizationKernel.cpp index 267ac9b2b4..612a03437b 100644 --- a/src/core/gpu/cl/kernels/ClDequantizationKernel.cpp +++ b/src/core/gpu/cl/kernels/ClDequantizationKernel.cpp @@ -113,9 +113,6 @@ void ClDequantizationKernel::configure(const CLCompileContext &compile_context, } ICLKernel::configure_internal(win); - // Set output valid region - dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); - ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } diff --git a/src/core/gpu/cl/kernels/ClDirectConvolutionKernel.cpp b/src/core/gpu/cl/kernels/ClDirectConvolutionKernel.cpp index 72801fa6c8..c6ca084386 100644 --- a/src/core/gpu/cl/kernels/ClDirectConvolutionKernel.cpp +++ b/src/core/gpu/cl/kernels/ClDirectConvolutionKernel.cpp @@ -279,11 +279,8 @@ std::pair validate_and_configure_window(ITensorInfo *src, ITenso const unsigned int num_rows = dst->tensor_shape()[0] > 16 ? 2u : 1U; // Create window and update padding - Window win = calculate_max_window(*dst, Steps(vec_size, num_rows)); - dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); - - Status err = Status{}; - return std::make_pair(err, win); + Window win = calculate_max_window(output_shape, Steps(vec_size, num_rows)); + return std::make_pair(Status{}, win); } else if(data_layout == DataLayout::NCHW) { @@ -368,8 +365,8 @@ void ClDirectConvolutionKernel::configure(const CLCompileContext &compile_contex kernel_name << "direct_convolution_nhwc"; - const unsigned int n0 = win_config.second.x().step(); - const unsigned int m0 = win_config.second.y().step(); + const unsigned int n0 = win_config.second.x().step(); + const unsigned int m0 = win_config.second.y().step(); const unsigned int k0 = adjust_vec_size(8u, src->dimension(channel_idx)); const unsigned int partial_store_n0 = dst->dimension(channel_idx) % n0; diff --git a/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp b/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp index 83e976e10f..4436e98fe3 100644 --- a/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp +++ b/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp @@ -110,9 +110,6 @@ void ClHeightConcatenateKernel::configure(const CLCompileContext &compile_contex Window win = calculate_max_window(*src, Steps(num_elems_processed_per_iteration)); ICLKernel::configure_internal(win.collapse(win, Window::DimZ)); - // Set dst valid region - dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); - ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } diff --git a/src/core/gpu/cl/kernels/ClPermuteKernel.cpp b/src/core/gpu/cl/kernels/ClPermuteKernel.cpp index 992c2a89d3..04e649b911 100644 --- a/src/core/gpu/cl/kernels/ClPermuteKernel.cpp +++ b/src/core/gpu/cl/kernels/ClPermuteKernel.cpp @@ -109,11 +109,6 @@ void ClPermuteKernel::configure(const CLCompileContext &compile_context, const I // Configure kernel window Window win = calculate_max_window(*src, Steps()); - // The CLPermute doesn't need padding so update_window_and_padding() can be skipped - Coordinates coord; - coord.set_num_dimensions(dst->num_dimensions()); - dst->set_valid_region(ValidRegion(coord, dst->tensor_shape())); - ICLKernel::configure_internal(win); ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } diff --git a/src/core/gpu/cl/kernels/ClPixelWiseMultiplicationKernel.cpp b/src/core/gpu/cl/kernels/ClPixelWiseMultiplicationKernel.cpp index f5303379be..56997dc8ad 100644 --- a/src/core/gpu/cl/kernels/ClPixelWiseMultiplicationKernel.cpp +++ b/src/core/gpu/cl/kernels/ClPixelWiseMultiplicationKernel.cpp @@ -95,9 +95,7 @@ Status validate_arguments(const ITensorInfo *src1, const ITensorInfo *src2, cons std::pair validate_and_configure_window(ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst) { - const std::pair broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*src1, *src2); - const TensorShape &out_shape = broadcast_pair.first; - const ValidRegion &valid_region = broadcast_pair.second; + const TensorShape &out_shape = TensorShape::broadcast_shape(src1->tensor_shape(), src2->tensor_shape()); // Auto initialize dst if not initialized { @@ -125,7 +123,7 @@ std::pair validate_and_configure_window(ITensorInfo *src1, ITens } } - Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration)); + Window win = calculate_max_window(out_shape, Steps(num_elems_processed_per_iteration)); Window win_input1 = win.broadcast_if_dimension_le_one(*src1); Window win_input2 = win.broadcast_if_dimension_le_one(*src2); @@ -137,8 +135,6 @@ std::pair validate_and_configure_window(ITensorInfo *src1, ITens || update_window_and_padding(win_input2, input2_access) || update_window_and_padding(win, output_access); - output_access.set_valid_region(win, valid_region); - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; return std::make_pair(err, win); } @@ -349,15 +345,13 @@ Status validate_arguments_complex(const ITensorInfo *src1, const ITensorInfo *sr std::pair validate_and_configure_window_complex(ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst) { - const std::pair broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*src1, *src2); - const TensorShape &out_shape = broadcast_pair.first; - const ValidRegion &valid_region = broadcast_pair.second; + const TensorShape &out_shape = TensorShape::broadcast_shape(src1->tensor_shape(), src2->tensor_shape()); // Auto initialize dst if not initialized const TensorInfo out_info(out_shape, src1->num_channels(), src1->data_type()); auto_init_if_empty(*dst, out_info); - Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration_complex)); + Window win = calculate_max_window(out_shape, Steps(num_elems_processed_per_iteration_complex)); Window win_input1 = win.broadcast_if_dimension_le_one(*src1); Window win_input2 = win.broadcast_if_dimension_le_one(*src2); @@ -369,8 +363,6 @@ std::pair validate_and_configure_window_complex(ITensorInfo *src || update_window_and_padding(win_input2, input2_access) || update_window_and_padding(win, output_access); - output_access.set_valid_region(win, valid_region); - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; return std::make_pair(err, win); } diff --git a/src/core/gpu/cl/kernels/ClPoolingKernel.cpp b/src/core/gpu/cl/kernels/ClPoolingKernel.cpp index 567fec2a37..78243402bf 100644 --- a/src/core/gpu/cl/kernels/ClPoolingKernel.cpp +++ b/src/core/gpu/cl/kernels/ClPoolingKernel.cpp @@ -177,13 +177,6 @@ std::tuple validate_and_configure_window(ITenso border_size = BorderSize(); num_elems_processed_per_iteration = adjust_vec_size(4, dst->dimension(0)); win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration)); - - if(indices != nullptr) - { - indices->set_valid_region(ValidRegion(Coordinates(), indices->tensor_shape())); - } - - dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); break; } default: diff --git a/src/core/gpu/cl/kernels/ClQuantizationKernel.cpp b/src/core/gpu/cl/kernels/ClQuantizationKernel.cpp index ea56289157..ced0d14391 100644 --- a/src/core/gpu/cl/kernels/ClQuantizationKernel.cpp +++ b/src/core/gpu/cl/kernels/ClQuantizationKernel.cpp @@ -144,8 +144,6 @@ void ClQuantizationKernel::configure(const CLCompileContext &compile_context, IT } ICLKernel::configure_internal(win); - dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); - ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } diff --git a/src/core/gpu/cl/kernels/ClReshapeKernel.cpp b/src/core/gpu/cl/kernels/ClReshapeKernel.cpp index 4da3fa0e03..cbf6d0d51a 100644 --- a/src/core/gpu/cl/kernels/ClReshapeKernel.cpp +++ b/src/core/gpu/cl/kernels/ClReshapeKernel.cpp @@ -93,9 +93,6 @@ void ClReshapeKernel::configure(const CLCompileContext &compile_context, const I // Configure kernel window Window win = calculate_max_window(*src); - - // Set the dst valid region - dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); ICLKernel::configure_internal(win); ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp index 6a2ab3b50f..9f970719ed 100644 --- a/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp +++ b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp @@ -113,8 +113,6 @@ void ClWidthConcatenate2TensorsKernel::configure(const CLCompileContext &compile Window win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration)); ICLKernel::configure_internal(win.collapse(win, Window::DimZ)); - // Set dst valid region - dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); // Set config_id for enabling LWS tuning diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp b/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp index 4b49652a73..281d190381 100644 --- a/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp +++ b/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp @@ -131,8 +131,6 @@ void ClWidthConcatenate4TensorsKernel::configure(const CLCompileContext &compile Window win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration)); ICLKernel::configure_internal(win.collapse(win, Window::DimZ)); - // Set dst valid region - dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); // Set config_id for enabling LWS tuning diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp b/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp index 8cbbc27444..d188a5226b 100644 --- a/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp +++ b/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp @@ -105,9 +105,6 @@ void ClWidthConcatenateKernel::configure(const CLCompileContext &compile_context Window win = calculate_max_window(*src, Steps(num_elems_processed_per_iteration)); ICLKernel::configure_internal(win.collapse(win, Window::DimZ)); - // Set dst valid region - dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); - ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -- cgit v1.2.1