aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-08-28 16:24:56 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commitea9e0dc18c408fecb6dc482b774bd900dd321610 (patch)
treeb6e67a6559b53b5d4d97f77251d83ac73a6e55a5
parent84797636b0ad44c16838df4177cf5a05aa929781 (diff)
downloadComputeLibrary-ea9e0dc18c408fecb6dc482b774bd900dd321610.tar.gz
COMPMID-1469: Add validate in NEGEMMMatrixAdditionKernel
Change-Id: I228e2503eb40c12869fbd7e834ac1309aa613480 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145878 Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/CPP/ICPPSimpleKernel.h10
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h11
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp2
-rw-r--r--src/core/CPP/ICPPSimpleKernel.cpp43
-rw-r--r--src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp43
-rw-r--r--src/runtime/NEON/functions/NEGEMM.cpp6
6 files changed, 91 insertions, 24 deletions
diff --git a/arm_compute/core/CPP/ICPPSimpleKernel.h b/arm_compute/core/CPP/ICPPSimpleKernel.h
index d8cdc794ef..086c71f776 100644
--- a/arm_compute/core/CPP/ICPPSimpleKernel.h
+++ b/arm_compute/core/CPP/ICPPSimpleKernel.h
@@ -57,6 +57,16 @@ protected:
* @param[in] border_size (Optional) Size of the border.
*/
void configure(const ITensor *input, ITensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize());
+ /** Static function to check if given info will lead to a valid configuration of @ref ICPPSimpleKernel.
+ *
+ * @param[in] input Source tensor info.
+ * @param[in] output Destination tensor info.
+ * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration.
+ * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant.
+ * @param[in] border_size (Optional) Size of the border.
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_elems_processed_per_iteration,
+ bool border_undefined = false, const BorderSize &border_size = BorderSize());
protected:
const ITensor *_input;
diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
index 1a235933dc..9d74cfce97 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
@@ -64,6 +64,17 @@ public:
* @param[in] beta Weight of matrix C
*/
void configure(const ITensor *input, ITensor *output, float beta);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixAdditionKernel.
+ *
+ * @note The input and output tensor must have the same dimensions
+ *
+ * @param[in] input Input tensor info (Matrix C). Data types supported: F16/F32
+ * @param[in] output Output tensor info. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input.
+ * @param[in] beta Weight of matrix C
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta);
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
diff --git a/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp
index 0c65bb40c0..825d7fb216 100644
--- a/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp
@@ -60,7 +60,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, float beta)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_UNUSED(input, output, beta);
+ ARM_COMPUTE_UNUSED(beta);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
diff --git a/src/core/CPP/ICPPSimpleKernel.cpp b/src/core/CPP/ICPPSimpleKernel.cpp
index 9d18a9c165..01fb016ffe 100644
--- a/src/core/CPP/ICPPSimpleKernel.cpp
+++ b/src/core/CPP/ICPPSimpleKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,7 +27,26 @@
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/ITensor.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+namespace
+{
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, unsigned int num_elems_processed_per_iteration,
+ bool border_undefined, const arm_compute::BorderSize &border_size)
+{
+ // Configure kernel window
+ Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration), border_undefined, border_size);
+ AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
+ AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
+
+ bool window_changed = update_window_and_padding(win, input_access, output_access);
+
+ output_access.set_valid_region(win, input->valid_region(), border_undefined, border_size);
+
+ Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+ return std::make_pair(err, win);
+}
+} // namespace
ICPPSimpleKernel::ICPPSimpleKernel()
: _input{ nullptr }, _output{ nullptr }
@@ -40,14 +59,16 @@ void ICPPSimpleKernel::configure(const ITensor *input, ITensor *output, unsigned
_output = output;
// Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- update_window_and_padding(win,
- AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration),
- output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size);
+ auto win_config = validate_and_configure_window(input->info(), output->info(), num_elems_processed_per_iteration, border_undefined, border_size);
+ ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+ ICPPKernel::configure(win_config.second);
+}
- ICPPKernel::configure(win);
+Status ICPPSimpleKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_elems_processed_per_iteration,
+ bool border_undefined, const arm_compute::BorderSize &border_size)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), num_elems_processed_per_iteration, border_undefined, border_size).first);
+ return Status{};
}
+
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
index cd6aa553db..757dbbc399 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
@@ -32,15 +32,27 @@
#include <arm_neon.h>
-using namespace arm_compute;
-
namespace arm_compute
{
-class Coordinates;
-} // namespace arm_compute
-
namespace
{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, float beta)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_UNUSED(beta);
+
+ ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+
+ if(output->total_size() > 0)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+ }
+
+ return Status{};
+}
+
void matrix_addition_f32(const ITensor *input, ITensor *output, const Window &window, float beta)
{
const float32x4_t beta_f32 = vdupq_n_f32(beta);
@@ -101,12 +113,10 @@ NEGEMMMatrixAdditionKernel::NEGEMMMatrixAdditionKernel()
void NEGEMMMatrixAdditionKernel::configure(const ITensor *input, ITensor *output, float beta)
{
- ARM_COMPUTE_ERROR_ON_CPU_F16_UNSUPPORTED(input);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != output->info()->dimension(0));
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != output->info()->dimension(1));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+ // Perform validation step
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), beta));
switch(input->info()->data_type())
{
@@ -123,13 +133,21 @@ void NEGEMMMatrixAdditionKernel::configure(const ITensor *input, ITensor *output
break;
}
+ // Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 16;
-
INESimpleKernel::configure(input, output, num_elems_processed_per_iteration);
_beta = beta;
}
+Status NEGEMMMatrixAdditionKernel::validate(const ITensorInfo *input, const ITensorInfo *output, float beta)
+{
+ constexpr unsigned int num_elems_processed_per_iteration = 16;
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, beta));
+ ARM_COMPUTE_RETURN_ON_ERROR(INESimpleKernel::validate(input->clone().get(), output->clone().get(), num_elems_processed_per_iteration));
+ return Status{};
+}
+
void NEGEMMMatrixAdditionKernel::run(const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
@@ -141,3 +159,4 @@ void NEGEMMMatrixAdditionKernel::run(const Window &window, const ThreadInfo &inf
(*_func)(_input, _output, window, _beta);
}
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp
index de51266267..321ecf85d8 100644
--- a/src/runtime/NEON/functions/NEGEMM.cpp
+++ b/src/runtime/NEON/functions/NEGEMM.cpp
@@ -218,6 +218,12 @@ Status NEGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso
ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixMultiplyKernel::validate(matrix_a_info, matrix_b_info, &tmp_output_info, alpha, run_interleave_transpose, reshape_info));
}
+ // Validate matrix addition kernel
+ if(beta != 0 && c != nullptr)
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixAdditionKernel::validate(c, output, beta));
+ }
+
return Status{};
}