diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2018-08-28 16:24:56 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:54:54 +0000 |
commit | ea9e0dc18c408fecb6dc482b774bd900dd321610 (patch) | |
tree | b6e67a6559b53b5d4d97f77251d83ac73a6e55a5 | |
parent | 84797636b0ad44c16838df4177cf5a05aa929781 (diff) | |
download | ComputeLibrary-ea9e0dc18c408fecb6dc482b774bd900dd321610.tar.gz |
COMPMID-1469: Add validate in NEGEMMMatrixAdditionKernel
Change-Id: I228e2503eb40c12869fbd7e834ac1309aa613480
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145878
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
-rw-r--r-- | arm_compute/core/CPP/ICPPSimpleKernel.h | 10 | ||||
-rw-r--r-- | arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h | 11 | ||||
-rw-r--r-- | src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp | 2 | ||||
-rw-r--r-- | src/core/CPP/ICPPSimpleKernel.cpp | 43 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp | 43 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEGEMM.cpp | 6 |
6 files changed, 91 insertions, 24 deletions
diff --git a/arm_compute/core/CPP/ICPPSimpleKernel.h b/arm_compute/core/CPP/ICPPSimpleKernel.h index d8cdc794ef..086c71f776 100644 --- a/arm_compute/core/CPP/ICPPSimpleKernel.h +++ b/arm_compute/core/CPP/ICPPSimpleKernel.h @@ -57,6 +57,16 @@ protected: * @param[in] border_size (Optional) Size of the border. */ void configure(const ITensor *input, ITensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize()); + /** Static function to check if given info will lead to a valid configuration of @ref ICPPSimpleKernel. + * + * @param[in] input Source tensor info. + * @param[in] output Destination tensor info. + * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration. + * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant. + * @param[in] border_size (Optional) Size of the border. + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_elems_processed_per_iteration, + bool border_undefined = false, const BorderSize &border_size = BorderSize()); protected: const ITensor *_input; diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h index 1a235933dc..9d74cfce97 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h @@ -64,6 +64,17 @@ public: * @param[in] beta Weight of matrix C */ void configure(const ITensor *input, ITensor *output, float beta); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixAdditionKernel. + * + * @note The input and output tensor must have the same dimensions + * + * @param[in] input Input tensor info (Matrix C). Data types supported: F16/F32 + * @param[in] output Output tensor info. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input. + * @param[in] beta Weight of matrix C + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; diff --git a/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp index 0c65bb40c0..825d7fb216 100644 --- a/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp @@ -60,7 +60,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, float beta) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_UNUSED(input, output, beta); + ARM_COMPUTE_UNUSED(beta); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); diff --git a/src/core/CPP/ICPPSimpleKernel.cpp b/src/core/CPP/ICPPSimpleKernel.cpp index 9d18a9c165..01fb016ffe 100644 --- a/src/core/CPP/ICPPSimpleKernel.cpp +++ b/src/core/CPP/ICPPSimpleKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -27,7 +27,26 @@ #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" -using namespace arm_compute; +namespace arm_compute +{ +namespace +{ +std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, unsigned int num_elems_processed_per_iteration, + bool border_undefined, const arm_compute::BorderSize &border_size) +{ + // Configure kernel window + Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration), border_undefined, border_size); + AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); + AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); + + bool window_changed = update_window_and_padding(win, input_access, output_access); + + output_access.set_valid_region(win, input->valid_region(), border_undefined, border_size); + + Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + return std::make_pair(err, win); +} +} // namespace ICPPSimpleKernel::ICPPSimpleKernel() : _input{ nullptr }, _output{ nullptr } @@ -40,14 +59,16 @@ void ICPPSimpleKernel::configure(const ITensor *input, ITensor *output, unsigned _output = output; // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, - AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration), - output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size); + auto win_config = validate_and_configure_window(input->info(), output->info(), num_elems_processed_per_iteration, border_undefined, border_size); + ARM_COMPUTE_ERROR_THROW_ON(win_config.first); + ICPPKernel::configure(win_config.second); +} - ICPPKernel::configure(win); +Status ICPPSimpleKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_elems_processed_per_iteration, + bool border_undefined, const arm_compute::BorderSize &border_size) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), num_elems_processed_per_iteration, border_undefined, border_size).first); + return Status{}; } + +} // namespace arm_compute
\ No newline at end of file diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp index cd6aa553db..757dbbc399 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp @@ -32,15 +32,27 @@ #include <arm_neon.h> -using namespace arm_compute; - namespace arm_compute { -class Coordinates; -} // namespace arm_compute - namespace { +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, float beta) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_UNUSED(beta); + + ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + + if(output->total_size() > 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); + } + + return Status{}; +} + void matrix_addition_f32(const ITensor *input, ITensor *output, const Window &window, float beta) { const float32x4_t beta_f32 = vdupq_n_f32(beta); @@ -101,12 +113,10 @@ NEGEMMMatrixAdditionKernel::NEGEMMMatrixAdditionKernel() void NEGEMMMatrixAdditionKernel::configure(const ITensor *input, ITensor *output, float beta) { - ARM_COMPUTE_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != output->info()->dimension(0)); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != output->info()->dimension(1)); + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + + // Perform validation step + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), beta)); switch(input->info()->data_type()) { @@ -123,13 +133,21 @@ void NEGEMMMatrixAdditionKernel::configure(const ITensor *input, ITensor *output break; } + // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 16; - INESimpleKernel::configure(input, output, num_elems_processed_per_iteration); _beta = beta; } +Status NEGEMMMatrixAdditionKernel::validate(const ITensorInfo *input, const ITensorInfo *output, float beta) +{ + constexpr unsigned int num_elems_processed_per_iteration = 16; + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, beta)); + ARM_COMPUTE_RETURN_ON_ERROR(INESimpleKernel::validate(input->clone().get(), output->clone().get(), num_elems_processed_per_iteration)); + return Status{}; +} + void NEGEMMMatrixAdditionKernel::run(const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); @@ -141,3 +159,4 @@ void NEGEMMMatrixAdditionKernel::run(const Window &window, const ThreadInfo &inf (*_func)(_input, _output, window, _beta); } } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp index de51266267..321ecf85d8 100644 --- a/src/runtime/NEON/functions/NEGEMM.cpp +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -218,6 +218,12 @@ Status NEGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixMultiplyKernel::validate(matrix_a_info, matrix_b_info, &tmp_output_info, alpha, run_interleave_transpose, reshape_info)); } + // Validate matrix addition kernel + if(beta != 0 && c != nullptr) + { + ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixAdditionKernel::validate(c, output, beta)); + } + return Status{}; } |