From ea9e0dc18c408fecb6dc482b774bd900dd321610 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 28 Aug 2018 16:24:56 +0100 Subject: COMPMID-1469: Add validate in NEGEMMMatrixAdditionKernel Change-Id: I228e2503eb40c12869fbd7e834ac1309aa613480 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145878 Reviewed-by: Giorgio Arena Tested-by: Jenkins --- arm_compute/core/CPP/ICPPSimpleKernel.h | 10 +++++ .../core/NEON/kernels/NEGEMMMatrixAdditionKernel.h | 11 ++++++ src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp | 2 +- src/core/CPP/ICPPSimpleKernel.cpp | 43 ++++++++++++++++------ .../NEON/kernels/NEGEMMMatrixAdditionKernel.cpp | 43 ++++++++++++++++------ src/runtime/NEON/functions/NEGEMM.cpp | 6 +++ 6 files changed, 91 insertions(+), 24 deletions(-) diff --git a/arm_compute/core/CPP/ICPPSimpleKernel.h b/arm_compute/core/CPP/ICPPSimpleKernel.h index d8cdc794ef..086c71f776 100644 --- a/arm_compute/core/CPP/ICPPSimpleKernel.h +++ b/arm_compute/core/CPP/ICPPSimpleKernel.h @@ -57,6 +57,16 @@ protected: * @param[in] border_size (Optional) Size of the border. */ void configure(const ITensor *input, ITensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize()); + /** Static function to check if given info will lead to a valid configuration of @ref ICPPSimpleKernel. + * + * @param[in] input Source tensor info. + * @param[in] output Destination tensor info. + * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration. + * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant. + * @param[in] border_size (Optional) Size of the border. + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_elems_processed_per_iteration, + bool border_undefined = false, const BorderSize &border_size = BorderSize()); protected: const ITensor *_input; diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h index 1a235933dc..9d74cfce97 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h @@ -64,6 +64,17 @@ public: * @param[in] beta Weight of matrix C */ void configure(const ITensor *input, ITensor *output, float beta); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixAdditionKernel. + * + * @note The input and output tensor must have the same dimensions + * + * @param[in] input Input tensor info (Matrix C). Data types supported: F16/F32 + * @param[in] output Output tensor info. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input. + * @param[in] beta Weight of matrix C + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; diff --git a/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp index 0c65bb40c0..825d7fb216 100644 --- a/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp @@ -60,7 +60,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, float beta) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_UNUSED(input, output, beta); + ARM_COMPUTE_UNUSED(beta); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); diff --git a/src/core/CPP/ICPPSimpleKernel.cpp b/src/core/CPP/ICPPSimpleKernel.cpp index 9d18a9c165..01fb016ffe 100644 --- a/src/core/CPP/ICPPSimpleKernel.cpp +++ b/src/core/CPP/ICPPSimpleKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -27,7 +27,26 @@ #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" -using namespace arm_compute; +namespace arm_compute +{ +namespace +{ +std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, unsigned int num_elems_processed_per_iteration, + bool border_undefined, const arm_compute::BorderSize &border_size) +{ + // Configure kernel window + Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration), border_undefined, border_size); + AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); + AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); + + bool window_changed = update_window_and_padding(win, input_access, output_access); + + output_access.set_valid_region(win, input->valid_region(), border_undefined, border_size); + + Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + return std::make_pair(err, win); +} +} // namespace ICPPSimpleKernel::ICPPSimpleKernel() : _input{ nullptr }, _output{ nullptr } @@ -40,14 +59,16 @@ void ICPPSimpleKernel::configure(const ITensor *input, ITensor *output, unsigned _output = output; // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, - AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration), - output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size); + auto win_config = validate_and_configure_window(input->info(), output->info(), num_elems_processed_per_iteration, border_undefined, border_size); + ARM_COMPUTE_ERROR_THROW_ON(win_config.first); + ICPPKernel::configure(win_config.second); +} - ICPPKernel::configure(win); +Status ICPPSimpleKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_elems_processed_per_iteration, + bool border_undefined, const arm_compute::BorderSize &border_size) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), num_elems_processed_per_iteration, border_undefined, border_size).first); + return Status{}; } + +} // namespace arm_compute \ No newline at end of file diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp index cd6aa553db..757dbbc399 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp @@ -32,15 +32,27 @@ #include -using namespace arm_compute; - namespace arm_compute { -class Coordinates; -} // namespace arm_compute - namespace { +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, float beta) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_UNUSED(beta); + + ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + + if(output->total_size() > 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); + } + + return Status{}; +} + void matrix_addition_f32(const ITensor *input, ITensor *output, const Window &window, float beta) { const float32x4_t beta_f32 = vdupq_n_f32(beta); @@ -101,12 +113,10 @@ NEGEMMMatrixAdditionKernel::NEGEMMMatrixAdditionKernel() void NEGEMMMatrixAdditionKernel::configure(const ITensor *input, ITensor *output, float beta) { - ARM_COMPUTE_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != output->info()->dimension(0)); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != output->info()->dimension(1)); + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + + // Perform validation step + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), beta)); switch(input->info()->data_type()) { @@ -123,13 +133,21 @@ void NEGEMMMatrixAdditionKernel::configure(const ITensor *input, ITensor *output break; } + // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 16; - INESimpleKernel::configure(input, output, num_elems_processed_per_iteration); _beta = beta; } +Status NEGEMMMatrixAdditionKernel::validate(const ITensorInfo *input, const ITensorInfo *output, float beta) +{ + constexpr unsigned int num_elems_processed_per_iteration = 16; + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, beta)); + ARM_COMPUTE_RETURN_ON_ERROR(INESimpleKernel::validate(input->clone().get(), output->clone().get(), num_elems_processed_per_iteration)); + return Status{}; +} + void NEGEMMMatrixAdditionKernel::run(const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); @@ -141,3 +159,4 @@ void NEGEMMMatrixAdditionKernel::run(const Window &window, const ThreadInfo &inf (*_func)(_input, _output, window, _beta); } } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp index de51266267..321ecf85d8 100644 --- a/src/runtime/NEON/functions/NEGEMM.cpp +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -218,6 +218,12 @@ Status NEGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixMultiplyKernel::validate(matrix_a_info, matrix_b_info, &tmp_output_info, alpha, run_interleave_transpose, reshape_info)); } + // Validate matrix addition kernel + if(beta != 0 && c != nullptr) + { + ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixAdditionKernel::validate(c, output, beta)); + } + return Status{}; } -- cgit v1.2.1