From 7c435f2e32e3441ac6c288e786f25c86b65e1453 Mon Sep 17 00:00:00 2001 From: Gian Marco Date: Tue, 5 Dec 2017 16:17:23 +0000 Subject: COMPMID-728 - Added validation for transpose - Added validation in NETranspose - Added validation in CLTranspose Change-Id: I51aa1810c957fda75bdf899f33116d96a76d89a1 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111999 Reviewed-by: Anthony Barbier Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com --- arm_compute/core/CL/kernels/CLTransposeKernel.h | 8 ++ arm_compute/core/NEON/kernels/NETransposeKernel.h | 10 +- arm_compute/runtime/CL/functions/CLTranspose.h | 10 +- arm_compute/runtime/NEON/functions/NETranspose.h | 10 +- src/core/CL/kernels/CLTransposeKernel.cpp | 98 ++++++++++++----- src/core/NEON/kernels/NETransposeKernel.cpp | 125 ++++++++++++++++------ src/runtime/CL/functions/CLTranspose.cpp | 5 + src/runtime/NEON/functions/NETranspose.cpp | 5 + 8 files changed, 207 insertions(+), 64 deletions(-) diff --git a/arm_compute/core/CL/kernels/CLTransposeKernel.h b/arm_compute/core/CL/kernels/CLTransposeKernel.h index c4bfe706c5..e7367caf72 100644 --- a/arm_compute/core/CL/kernels/CLTransposeKernel.h +++ b/arm_compute/core/CL/kernels/CLTransposeKernel.h @@ -44,6 +44,14 @@ public: * @param[out] output Output tensor. Data type supported: Same as @p input */ void configure(const ICLTensor *input, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLTransposeKernel + * + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32 + * @param[in] output Output tensor. Data type supported: Same as @p input + * + * @return an error status + */ + static Error validate(const ITensorInfo *input, const ITensorInfo *output); }; } // namespace arm_compute #endif /* __ARM_COMPUTE_CLTRANSPOSEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NETransposeKernel.h b/arm_compute/core/NEON/kernels/NETransposeKernel.h index 71bd274372..66dc6b1989 100644 --- a/arm_compute/core/NEON/kernels/NETransposeKernel.h +++ b/arm_compute/core/NEON/kernels/NETransposeKernel.h @@ -53,10 +53,18 @@ public: /** Initialise the kernel's input and output. * - * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/QS16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32 * @param[out] output Output tensor. Data type supported: Same as @p input */ void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NETransposeKernel + * + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32 + * @param[in] output Output tensor. Data type supported: Same as @p input + * + * @return an error status + */ + static Error validate(const ITensorInfo *input, const ITensorInfo *output); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; diff --git a/arm_compute/runtime/CL/functions/CLTranspose.h b/arm_compute/runtime/CL/functions/CLTranspose.h index 9ac5458a93..6bfc31634f 100644 --- a/arm_compute/runtime/CL/functions/CLTranspose.h +++ b/arm_compute/runtime/CL/functions/CLTranspose.h @@ -40,10 +40,18 @@ class CLTranspose : public ICLSimpleFunction public: /** Initialise the kernel's inputs and output * - * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/F16/U32/S32/F32 * @param[out] output Output tensor. Data type supported: Same as @p input */ void configure(const ICLTensor *input, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLTranspose + * + * @param[in] input The input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] output The output tensor. Data types supported: Same as @p input + * + * @return an error status + */ + static Error validate(const ITensorInfo *input, const ITensorInfo *output); }; } diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h index 4b606e7282..3fe8a63163 100644 --- a/arm_compute/runtime/NEON/functions/NETranspose.h +++ b/arm_compute/runtime/NEON/functions/NETranspose.h @@ -41,10 +41,18 @@ class NETranspose : public INESimpleFunction public: /** Initialise the kernel's inputs and output * - * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/F16/U32/S32/F32 * @param[out] output Output tensor. Data type supported: Same as @p input */ void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NETranspose + * + * @param[in] input The input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] output The output tensor. Data types supported: Same as @p input + * + * @return an error status + */ + static Error validate(const ITensorInfo *input, const ITensorInfo *output); }; } diff --git a/src/core/CL/kernels/CLTransposeKernel.cpp b/src/core/CL/kernels/CLTransposeKernel.cpp index 81ab217923..abc9f0219c 100644 --- a/src/core/CL/kernels/CLTransposeKernel.cpp +++ b/src/core/CL/kernels/CLTransposeKernel.cpp @@ -39,25 +39,80 @@ using namespace arm_compute; -void CLTransposeKernel::configure(const ICLTensor *input, ICLTensor *output) +namespace +{ +TensorShape transposed_tensor_shape(const TensorShape &in) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::QASYMM8, DataType::U16, DataType::S16, DataType::QS16, DataType::U32, DataType::S32, - DataType::F16, - DataType::F32); - ARM_COMPUTE_ERROR_ON_NULLPTR(output); - - TensorShape output_shape{ input->info()->tensor_shape() }; - const size_t w_out = input->info()->dimension(1); - const size_t h_out = input->info()->dimension(0); + TensorShape output_shape{ in }; + const size_t w_out = in[1]; + const size_t h_out = in[0]; output_shape.set(0, w_out); output_shape.set(1, h_out); + return output_shape; +} + +Error validate_arguments(const ITensorInfo *input, const ITensorInfo *output) +{ + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::QASYMM8, DataType::U16, DataType::S16, DataType::QS16, DataType::U32, DataType::S32, + DataType::F16, + DataType::F32); + + if(output->total_size() != 0) + { + const TensorInfo tensor_info = input->clone()->set_tensor_shape(transposed_tensor_shape(input->tensor_shape())); + + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); + } + + return Error{}; +} + +std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) +{ + // Configure kernel window + const unsigned int num_elems_processed_per_iteration = max_cl_vector_width / input->element_size(); + + Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration, num_elems_processed_per_iteration)); + + AccessWindowRectangle input_access(input, 0, 0, num_elems_processed_per_iteration, num_elems_processed_per_iteration); + + bool window_changed = update_window_and_padding(win, input_access); + + if(output->total_size() != 0) + { + // TODO (COMPMID-708): Replace AccessWindowStatic with AccessWindowTranspose + AccessWindowStatic output_access(output, 0, 0, ceil_to_multiple(output->dimension(0), num_elems_processed_per_iteration), ceil_to_multiple(output->dimension(1), + num_elems_processed_per_iteration)); + + window_changed = window_changed || update_window_and_padding(win, output_access); + + output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); + } + + Error err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Error{}; + return std::make_pair(err, win); +} +} // namespace + +Error CLTransposeKernel::validate(const ITensorInfo *input, const ITensorInfo *output) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get()).first); + return Error{}; +} + +void CLTransposeKernel::configure(const ICLTensor *input, ICLTensor *output) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + // Output tensor auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position(), input->info()->quantization_info()); + auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(transposed_tensor_shape(input->info()->tensor_shape()))); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); _input = input; _output = output; @@ -71,18 +126,7 @@ void CLTransposeKernel::configure(const ICLTensor *input, ICLTensor *output) _kernel = static_cast(CLKernelLibrary::get().create_kernel("transpose", build_opts)); // Configure kernel window - const unsigned int num_elems_processed_per_iteration = max_cl_vector_width / input->info()->element_size(); - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration, num_elems_processed_per_iteration)); - - AccessWindowRectangle input_access(input->info(), 0, 0, num_elems_processed_per_iteration, num_elems_processed_per_iteration); - // TODO (COMPMID-708): Replace AccessWindowStatic with AccessWindowTranspose - AccessWindowStatic output_access(output->info(), 0, 0, ceil_to_multiple(output->info()->dimension(0), num_elems_processed_per_iteration), ceil_to_multiple(output->info()->dimension(1), - num_elems_processed_per_iteration)); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region()); - - ICLKernel::configure(win); + auto win_config = validate_and_configure_window(input->info(), output->info()); + ARM_COMPUTE_ERROR_THROW_ON(win_config.first); + ICLKernel::configure(win_config.second); } diff --git a/src/core/NEON/kernels/NETransposeKernel.cpp b/src/core/NEON/kernels/NETransposeKernel.cpp index 44becf5a4b..ce46c54546 100644 --- a/src/core/NEON/kernels/NETransposeKernel.cpp +++ b/src/core/NEON/kernels/NETransposeKernel.cpp @@ -41,6 +41,80 @@ class Coordinates; namespace { +TensorShape transposed_tensor_shape(const TensorShape &in) +{ + TensorShape output_shape{ in }; + const size_t w_out = in[1]; + const size_t h_out = in[0]; + output_shape.set(0, w_out); + output_shape.set(1, h_out); + + return output_shape; +} + +unsigned int num_elems_processed(size_t element_size) +{ + switch(element_size) + { + case 1: + return 8; + break; + case 2: + return 4; + break; + case 4: + return 4; + break; + default: + ARM_COMPUTE_ERROR("Element size not supported"); + break; + } +} + +Error validate_arguments(const ITensorInfo *input, const ITensorInfo *output) +{ + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::QASYMM8, DataType::U16, DataType::S16, DataType::QS16, DataType::U32, DataType::S32, + DataType::F16, + DataType::F32); + + if(output->total_size() != 0) + { + const TensorInfo tensor_info = input->clone()->set_tensor_shape(transposed_tensor_shape(input->tensor_shape())); + + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); + } + + return Error{}; +} + +std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) +{ + const unsigned int num_elems_processed_per_iteration = num_elems_processed(input->element_size()); + + // Configure kernel window + Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration, num_elems_processed_per_iteration)); + + AccessWindowRectangle input_access(input, 0, 0, num_elems_processed_per_iteration, num_elems_processed_per_iteration); + + bool window_changed = update_window_and_padding(win, input_access); + + if(output->total_size() != 0) + { + // TODO (COMPMID-708): Replace AccessWindowStatic with AccessWindowTranspose + AccessWindowStatic output_access(output, 0, 0, ceil_to_multiple(output->dimension(0), num_elems_processed_per_iteration), ceil_to_multiple(output->dimension(1), + num_elems_processed_per_iteration)); + + window_changed = window_changed || update_window_and_padding(win, output_access); + + output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); + } + + Error err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Error{}; + return std::make_pair(err, win); +} + void transpose_8bit_elements(const ITensor *in, ITensor *out, const Window &window) { Window window_out(window); @@ -173,6 +247,14 @@ void transpose_32bit_elements(const ITensor *in, ITensor *out, const Window &win } } // namespace +Error NETransposeKernel::validate(const ITensorInfo *input, const ITensorInfo *output) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get()).first); + return Error{}; +} + NETransposeKernel::NETransposeKernel() : _func(nullptr), _input(nullptr), _output(nullptr) { @@ -180,41 +262,26 @@ NETransposeKernel::NETransposeKernel() void NETransposeKernel::configure(const ITensor *input, ITensor *output) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::U16, DataType::S16, DataType::QS16, DataType::U32, DataType::S32, DataType::F16, - DataType::F32); - ARM_COMPUTE_ERROR_ON_NULLPTR(output); - - TensorShape output_shape{ input->info()->tensor_shape() }; - const size_t w_out = input->info()->dimension(1); - const size_t h_out = input->info()->dimension(0); - output_shape.set(0, w_out); - output_shape.set(1, h_out); + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); // Output tensor auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position()); + auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(transposed_tensor_shape(input->info()->tensor_shape()))); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); _input = input; _output = output; - unsigned int num_elems_processed_per_iteration = 0; - switch(input->info()->element_size()) { case 1: - _func = &transpose_8bit_elements; - num_elems_processed_per_iteration = 8; + _func = &transpose_8bit_elements; break; case 2: - _func = &transpose_16bit_elements; - num_elems_processed_per_iteration = 4; + _func = &transpose_16bit_elements; break; case 4: - _func = &transpose_32bit_elements; - num_elems_processed_per_iteration = 4; + _func = &transpose_32bit_elements; break; default: ARM_COMPUTE_ERROR("Element size not supported"); @@ -222,19 +289,9 @@ void NETransposeKernel::configure(const ITensor *input, ITensor *output) } // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration, num_elems_processed_per_iteration)); - - // TODO (COMPMID-708): Replace AccessWindowStatic with AccessWindowTranspose - AccessWindowStatic output_access(output->info(), 0, 0, ceil_to_multiple(output->info()->dimension(0), num_elems_processed_per_iteration), ceil_to_multiple(output->info()->dimension(1), - num_elems_processed_per_iteration)); - - update_window_and_padding(win, - AccessWindowRectangle(input->info(), 0, 0, num_elems_processed_per_iteration, num_elems_processed_per_iteration), - output_access); - - output_access.set_valid_region(win, input->info()->valid_region()); - - INEKernel::configure(win); + auto win_config = validate_and_configure_window(input->info(), output->info()); + ARM_COMPUTE_ERROR_THROW_ON(win_config.first); + INEKernel::configure(win_config.second); } void NETransposeKernel::run(const Window &window, const ThreadInfo &info) diff --git a/src/runtime/CL/functions/CLTranspose.cpp b/src/runtime/CL/functions/CLTranspose.cpp index cd19e255bd..ad5c04124d 100644 --- a/src/runtime/CL/functions/CLTranspose.cpp +++ b/src/runtime/CL/functions/CLTranspose.cpp @@ -35,4 +35,9 @@ void CLTranspose::configure(const ICLTensor *input, ICLTensor *output) auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output); _kernel = std::move(k); +} + +Error CLTranspose::validate(const ITensorInfo *input, const ITensorInfo *output) +{ + return CLTransposeKernel::validate(input, output); } \ No newline at end of file diff --git a/src/runtime/NEON/functions/NETranspose.cpp b/src/runtime/NEON/functions/NETranspose.cpp index eb81e02205..14bca69f33 100644 --- a/src/runtime/NEON/functions/NETranspose.cpp +++ b/src/runtime/NEON/functions/NETranspose.cpp @@ -36,3 +36,8 @@ void NETranspose::configure(const ITensor *input, ITensor *output) k->configure(input, output); _kernel = std::move(k); } + +Error NETranspose::validate(const ITensorInfo *input, const ITensorInfo *output) +{ + return NETransposeKernel::validate(input, output); +} \ No newline at end of file -- cgit v1.2.1