From e3d24cee3688b2ddffd5858aba4904bf51398f08 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Fri, 24 Aug 2018 14:44:08 +0100 Subject: COMPMID-708 Fix AccessWindowTranspose Change-Id: I68f65b6dea7889d71b4a10021f59e6f0ab82903b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145590 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- src/core/AccessWindowTranspose.cpp | 16 +++++++++---- src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp | 1 - src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp | 1 - src/core/CL/kernels/CLTransposeKernel.cpp | 5 ++-- .../NEDepthwiseConvolutionLayer3x3Kernel.cpp | 1 - src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp | 1 - .../kernels/NEDepthwiseVectorToTensorKernel.cpp | 1 - .../kernels/NEDepthwiseWeightsReshapeKernel.cpp | 1 - .../NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp | 1 - .../NELocallyConnectedMatrixMultiplyKernel.cpp | 1 - src/core/NEON/kernels/NETransposeKernel.cpp | 9 ++++--- tests/datasets/ShapeDatasets.h | 5 +++- tests/validation/NEON/Transpose.cpp | 28 ++++++++++++++-------- 13 files changed, 39 insertions(+), 32 deletions(-) diff --git a/src/core/AccessWindowTranspose.cpp b/src/core/AccessWindowTranspose.cpp index 3c45ab3571..70235a2d0a 100644 --- a/src/core/AccessWindowTranspose.cpp +++ b/src/core/AccessWindowTranspose.cpp @@ -53,7 +53,10 @@ ValidRegion AccessWindowTranspose::compute_valid_region(const Window &window, Va // the kernel to write back output values. // As the relation between input and output is transposed window.y() is // used for x anchor and window.x() for y anchor. - anchor.set(0, std::max(window.y().start() * _scale_x, anchor[1] + border_size.top) + _x); + if(_info->dimension(0) > 1) + { + anchor.set(0, std::max(window.y().start() * _scale_x, anchor[1] + border_size.top) + _x); + } anchor.set(1, std::max(window.x().start() * _scale_y, anchor[0] + border_size.left) + _y); // End of the valid region is equal to the start of the last write of the @@ -66,8 +69,11 @@ ValidRegion AccessWindowTranspose::compute_valid_region(const Window &window, Va // a size of the region. // As the relation between input and output is transposed window.y() is // used for x shape and window.x() for y shape. - shape.set(0, std::min((old_anchor[1] + old_shape[1]) * _scale_x - border_size.right, (window.y().end() - window.y().step()) * _scale_x + _width) - anchor[0]); - shape.set(1, std::min((old_anchor[0] + old_shape[0]) * _scale_y - border_size.bottom, (window.x().end() - window.x().step()) * _scale_y + _height) - anchor[1]); + if(_info->dimension(0) > 1) + { + shape.set(0, std::min((old_anchor[1] + old_shape[0]) * _scale_x - border_size.right, (window.y().end() - window.y().step()) * _scale_x + _width) - anchor[0]); + } + shape.set(1, std::min((old_anchor[0] + old_shape[1]) * _scale_y - border_size.bottom, (window.x().end() - window.x().step()) * _scale_y + _height) - anchor[1]); // For higher dimensions use the intersection of the window size and the // valid region of the input @@ -192,9 +198,9 @@ bool AccessWindowTranspose::update_padding_if_needed(const Window &window) ARM_COMPUTE_ERROR_ON(window.x().step() == 0); const int min_x = window.y().start() * _scale_x + _x; - const int max_x = window.y().end() * _scale_x + _x; + const int max_x = (window.y().end() - window.y().step()) * _scale_x + _x + _width; const int min_y = window.x().start() * _scale_y + _y; - const int max_y = window.x().end() * _scale_y + _y; + const int max_y = (window.x().end() - window.x().step()) * _scale_y + _y + _height; const TensorShape &shape = _info->tensor_shape(); diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp index 8530ed2fd3..a3cf18a648 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp @@ -24,7 +24,6 @@ #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" #include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/AccessWindowTranspose.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/CLValidate.h" diff --git a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp index 5b299052d4..aa1b92a685 100644 --- a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp +++ b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp @@ -24,7 +24,6 @@ #include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" #include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/AccessWindowTranspose.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/CLValidate.h" diff --git a/src/core/CL/kernels/CLTransposeKernel.cpp b/src/core/CL/kernels/CLTransposeKernel.cpp index 695bdf7f40..ccf22eacc3 100644 --- a/src/core/CL/kernels/CLTransposeKernel.cpp +++ b/src/core/CL/kernels/CLTransposeKernel.cpp @@ -24,6 +24,7 @@ #include "arm_compute/core/CL/kernels/CLTransposeKernel.h" #include "arm_compute/core/AccessWindowStatic.h" +#include "arm_compute/core/AccessWindowTranspose.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/CLValidate.h" @@ -86,9 +87,7 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen if(output->total_size() != 0) { - // TODO (COMPMID-708): Replace AccessWindowStatic with AccessWindowTranspose - AccessWindowStatic output_access(output, 0, 0, ceil_to_multiple(output->dimension(0), num_elems_processed_per_iteration), ceil_to_multiple(output->dimension(1), - num_elems_processed_per_iteration)); + AccessWindowTranspose output_access(output, 0, 0, num_elems_processed_per_iteration, num_elems_processed_per_iteration); window_changed = window_changed || update_window_and_padding(win, output_access); diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp index 09e4acda5f..94b438cb83 100644 --- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp @@ -25,7 +25,6 @@ #include "arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h" #include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/AccessWindowTranspose.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp b/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp index 92ee8d5809..e8fb8cd6b1 100644 --- a/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h" -#include "arm_compute/core/AccessWindowTranspose.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp b/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp index 2d17c237a3..921582a41d 100644 --- a/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h" -#include "arm_compute/core/AccessWindowTranspose.h" #include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp b/src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp index 22a2cf8f2d..77ab5adb14 100644 --- a/src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h" -#include "arm_compute/core/AccessWindowTranspose.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp index 0ca24748af..f182fb24c3 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp @@ -24,7 +24,6 @@ #include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" #include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/AccessWindowTranspose.h" #include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp index 4d3ec46e34..46b7913223 100644 --- a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h" -#include "arm_compute/core/AccessWindowTranspose.h" #include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NETransposeKernel.cpp b/src/core/NEON/kernels/NETransposeKernel.cpp index 32a5acd2f4..870d2c9660 100644 --- a/src/core/NEON/kernels/NETransposeKernel.cpp +++ b/src/core/NEON/kernels/NETransposeKernel.cpp @@ -24,6 +24,7 @@ #include "arm_compute/core/NEON/kernels/NETransposeKernel.h" #include "arm_compute/core/AccessWindowStatic.h" +#include "arm_compute/core/AccessWindowTranspose.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -101,14 +102,12 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen // Configure kernel window Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); - AccessWindowStatic input_access(input, 0, 0, input->dimension(0), input->dimension(1)); - - bool window_changed = update_window_and_padding(win, input_access); + AccessWindowRectangle input_access(input, 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y); + bool window_changed = update_window_and_padding(win, input_access); if(output->total_size() != 0) { - // TODO (COMPMID-708): Replace AccessWindowStatic with AccessWindowTranspose - AccessWindowStatic output_access(output, 0, 0, output->dimension(0), output->dimension(1)); + AccessWindowTranspose output_access(output, 0, 0, num_elems_processed_per_iteration_y, num_elems_processed_per_iteration_x); window_changed = window_changed || update_window_and_padding(win, output_access); diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h index 57bb9d01b0..4d75a16e47 100644 --- a/tests/datasets/ShapeDatasets.h +++ b/tests/datasets/ShapeDatasets.h @@ -45,7 +45,10 @@ public: Small1DShapes() : ShapeDataset("Shape", { - TensorShape{ 256U } + TensorShape{ 128U }, + TensorShape{ 256U }, + TensorShape{ 512U }, + TensorShape{ 1024U } }) { } diff --git a/tests/validation/NEON/Transpose.cpp b/tests/validation/NEON/Transpose.cpp index f2ef7162a2..f7c52809fc 100644 --- a/tests/validation/NEON/Transpose.cpp +++ b/tests/validation/NEON/Transpose.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -48,19 +48,19 @@ TEST_SUITE(Transpose) DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip( framework::dataset::make("InputInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::U8), // Input not a multiple of 8 TensorInfo(TensorShape(21U, 13U), 1, DataType::U16), // Invalid shape - TensorInfo(TensorShape(20U, 13U), 1, DataType::U32), + TensorInfo(TensorShape(20U, 13U), 1, DataType::U32), // Window shrink TensorInfo(TensorShape(20U, 13U), 1, DataType::U8), // Wrong data type - TensorInfo(TensorShape(20U, 13U), 1, DataType::U16), - TensorInfo(TensorShape(20U, 13U), 1, DataType::U32), + TensorInfo(TensorShape(20U, 16U), 1, DataType::U16), + TensorInfo(TensorShape(20U, 16U), 1, DataType::U32), }), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(13U, 21U), 1, DataType::U8), TensorInfo(TensorShape(21U, 13U), 1, DataType::U16), TensorInfo(TensorShape(13U, 20U), 1, DataType::U32), TensorInfo(TensorShape(31U, 20U), 1, DataType::U16), - TensorInfo(TensorShape(13U, 20U), 1, DataType::U16), - TensorInfo(TensorShape(13U, 20U), 1, DataType::U32), + TensorInfo(TensorShape(16U, 20U), 1, DataType::U16), + TensorInfo(TensorShape(16U, 20U), 1, DataType::U32), })), - framework::dataset::make("Expected", { true, false, true, false, true, true })), + framework::dataset::make("Expected", { false, false, false, false, true, true })), a_info, output_info, expected) { // Lock tensors @@ -90,9 +90,17 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datase validate(dst.info()->valid_region(), valid_region); // Validate padding - const PaddingSize padding(0, 0); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), padding); + const unsigned int num_elems_processed_per_iteration_x = 1; + const unsigned int num_elems_processed_per_iteration_y = std::max(4, static_cast(8 / src.info()->element_size())); + const unsigned int max_in_x = ceil_to_multiple(shape[0], num_elems_processed_per_iteration_x); + const unsigned int max_in_y = ceil_to_multiple(shape[1], num_elems_processed_per_iteration_y); + const unsigned int max_out_x = ceil_to_multiple(output_shape[0], num_elems_processed_per_iteration_y); + const unsigned int max_out_y = ceil_to_multiple(output_shape[1], num_elems_processed_per_iteration_x); + + const PaddingSize in_padding(0, max_in_x - shape[0], max_in_y - shape[1], 0); + const PaddingSize out_padding(0, max_out_x - output_shape[0], max_out_y - output_shape[1], 0); + validate(src.info()->padding(), in_padding); + validate(dst.info()->padding(), out_padding); } template -- cgit v1.2.1