From 358ca205c9e41f523517ffa55a9057308b736040 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 7 Dec 2017 16:47:52 +0000 Subject: COMPMID-617: Adds CLFullyConnectionLayer validation support Change-Id: I4d2eb9872a3165fdcaa7784596e441cbe563dbc2 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/112577 Tested-by: Jenkins Reviewed-by: Ioan-Cristian Szabo Reviewed-by: Anthony Barbier --- src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp | 97 +++++++++++++++++------- 1 file changed, 69 insertions(+), 28 deletions(-) (limited to 'src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp') diff --git a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp index 35074f94cf..69a545b76b 100644 --- a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp +++ b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp @@ -33,36 +33,82 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include using namespace arm_compute; +using namespace arm_compute::misc::shape_calculator; -void CLGEMMTranspose1xWKernel::configure(const ICLTensor *input, ICLTensor *output) +namespace +{ +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::QASYMM8, - DataType::U16, DataType::S16, DataType::QS16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_NULLPTR(output); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::U8, DataType::S8, + DataType::QS16, DataType::U16, DataType::S16, DataType::U32, DataType::S32, + DataType::F16, DataType::F32); + + if(output->total_size() != 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), + compute_transpose1xW_with_element_size_shape(*input)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); + } - TensorShape output_shape{ input->info()->tensor_shape() }; - const size_t transpose_w = 16 / input->info()->element_size(); - output_shape.set(0, input->info()->dimension(1) * transpose_w); - output_shape.set(1, static_cast(std::ceil((input->info()->dimension(0) / static_cast(transpose_w))))); + return Status{}; +} - // Output tensor auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape)); +std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, unsigned int &num_elems_processed_per_iteration) +{ + num_elems_processed_per_iteration = 16 / input->element_size(); + + const int scale_x = num_elems_processed_per_iteration; + bool window_changed = false; + + // Configure kernel window + Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); + + if((win.x().end() / scale_x) == 0) + { + return std::make_pair(ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Transposed shape would be 0 in the second dimension"), win); + } + + AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); + window_changed = window_changed || update_window_and_padding(win, input_access); + + // Configure window in case of configured output + if(output->total_size() != 0) + { + AccessWindowTranspose output_access(output, 0, 0, num_elems_processed_per_iteration, 1, scale_x, 1.f / scale_x); + window_changed = window_changed || update_window_and_padding(win, output_access); + output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), input->tensor_shape())); + } + + Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + return std::make_pair(err, win); +} +} // namespace - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); - ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); +void CLGEMMTranspose1xWKernel::configure(const ICLTensor *input, ICLTensor *output) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + + // Output tensor auto inizialitation if not yet initialized + auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(compute_transpose1xW_with_element_size_shape(*input->info()))); - const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size(); - const int scale_x = num_elems_processed_per_iteration; + // Perform validate step + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); _input = input; _output = output; + // Configure kernel window + unsigned int num_elems_processed_per_iteration = 1; + auto win_config = validate_and_configure_window(input->info(), output->info(), num_elems_processed_per_iteration); + ARM_COMPUTE_ERROR_THROW_ON(win_config.first); + ICLKernel::configure(win_config.second); + /* * Following an example of how the transposition1xW works when the input data type is F32 * @@ -76,20 +122,15 @@ void CLGEMMTranspose1xWKernel::configure(const ICLTensor *input, ICLTensor *outp // Create kernel std::string kernel_name = "gemm_transpose1x" + support::cpp11::to_string(num_elems_processed_per_iteration); _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name)); +} - // Configure window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - - ARM_COMPUTE_ERROR_ON_MSG((win.x().end() / scale_x) == 0, "Transposed shape would be 0 in the second dimension"); - - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowTranspose output_access(output->info(), 0, 0, num_elems_processed_per_iteration, 1, scale_x, 1.f / scale_x); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), input->info()->tensor_shape())); +Status CLGEMMTranspose1xWKernel::validate(const ITensorInfo *input, const ITensorInfo *output) +{ + unsigned int num_elems_processed_per_iteration = 1; + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), num_elems_processed_per_iteration).first); - ICLKernel::configure(win); + return Status{}; } void CLGEMMTranspose1xWKernel::run(const Window &window, cl::CommandQueue &queue) -- cgit v1.2.1