From ea9e0dc18c408fecb6dc482b774bd900dd321610 Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Tue, 28 Aug 2018 16:24:56 +0100
Subject: COMPMID-1469: Add validate in NEGEMMMatrixAdditionKernel

Change-Id: I228e2503eb40c12869fbd7e834ac1309aa613480
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145878
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
---
 arm_compute/core/CPP/ICPPSimpleKernel.h            | 10 +++++
 .../core/NEON/kernels/NEGEMMMatrixAdditionKernel.h | 11 ++++++
 src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp |  2 +-
 src/core/CPP/ICPPSimpleKernel.cpp                  | 43 ++++++++++++++++------
 .../NEON/kernels/NEGEMMMatrixAdditionKernel.cpp    | 43 ++++++++++++++++------
 src/runtime/NEON/functions/NEGEMM.cpp              |  6 +++
 6 files changed, 91 insertions(+), 24 deletions(-)

diff --git a/arm_compute/core/CPP/ICPPSimpleKernel.h b/arm_compute/core/CPP/ICPPSimpleKernel.h
index d8cdc794ef..086c71f776 100644
--- a/arm_compute/core/CPP/ICPPSimpleKernel.h
+++ b/arm_compute/core/CPP/ICPPSimpleKernel.h
@@ -57,6 +57,16 @@ protected:
      * @param[in]  border_size                       (Optional) Size of the border.
      */
     void configure(const ITensor *input, ITensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize());
+    /** Static function to check if given info will lead to a valid configuration of @ref ICPPSimpleKernel.
+     *
+     * @param[in] input                             Source tensor info.
+     * @param[in] output                            Destination tensor info.
+     * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration.
+     * @param[in] border_undefined                  (Optional) True if the border mode is undefined. False if it's replicate or constant.
+     * @param[in] border_size                       (Optional) Size of the border.
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_elems_processed_per_iteration,
+                           bool border_undefined = false, const BorderSize &border_size = BorderSize());
 
 protected:
     const ITensor *_input;
diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
index 1a235933dc..9d74cfce97 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
@@ -64,6 +64,17 @@ public:
      * @param[in]      beta   Weight of matrix C
      */
     void configure(const ITensor *input, ITensor *output, float beta);
+    /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixAdditionKernel.
+     *
+     * @note The input and output tensor must have the same dimensions
+     *
+     * @param[in] input  Input tensor info (Matrix C). Data types supported: F16/F32
+     * @param[in] output Output tensor info. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input.
+     * @param[in] beta   Weight of matrix C
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta);
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
diff --git a/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp
index 0c65bb40c0..825d7fb216 100644
--- a/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp
@@ -60,7 +60,7 @@ namespace
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, float beta)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
-    ARM_COMPUTE_UNUSED(input, output, beta);
+    ARM_COMPUTE_UNUSED(beta);
 
     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
diff --git a/src/core/CPP/ICPPSimpleKernel.cpp b/src/core/CPP/ICPPSimpleKernel.cpp
index 9d18a9c165..01fb016ffe 100644
--- a/src/core/CPP/ICPPSimpleKernel.cpp
+++ b/src/core/CPP/ICPPSimpleKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,7 +27,26 @@
 #include "arm_compute/core/IAccessWindow.h"
 #include "arm_compute/core/ITensor.h"
 
-using namespace arm_compute;
+namespace arm_compute
+{
+namespace
+{
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, unsigned int num_elems_processed_per_iteration,
+                                                        bool border_undefined, const arm_compute::BorderSize &border_size)
+{
+    // Configure kernel window
+    Window                 win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration), border_undefined, border_size);
+    AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
+
+    bool window_changed = update_window_and_padding(win, input_access, output_access);
+
+    output_access.set_valid_region(win, input->valid_region(), border_undefined, border_size);
+
+    Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+    return std::make_pair(err, win);
+}
+} // namespace
 
 ICPPSimpleKernel::ICPPSimpleKernel()
     : _input{ nullptr }, _output{ nullptr }
@@ -40,14 +59,16 @@ void ICPPSimpleKernel::configure(const ITensor *input, ITensor *output, unsigned
     _output = output;
 
     // Configure kernel window
-    Window                 win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size);
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
-    update_window_and_padding(win,
-                              AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration),
-                              output_access);
-
-    output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size);
+    auto win_config = validate_and_configure_window(input->info(), output->info(), num_elems_processed_per_iteration, border_undefined, border_size);
+    ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+    ICPPKernel::configure(win_config.second);
+}
 
-    ICPPKernel::configure(win);
+Status ICPPSimpleKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_elems_processed_per_iteration,
+                                  bool border_undefined, const arm_compute::BorderSize &border_size)
+{
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), num_elems_processed_per_iteration, border_undefined, border_size).first);
+    return Status{};
 }
+
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
index cd6aa553db..757dbbc399 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
@@ -32,15 +32,27 @@
 
 #include <arm_neon.h>
 
-using namespace arm_compute;
-
 namespace arm_compute
 {
-class Coordinates;
-} // namespace arm_compute
-
 namespace
 {
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, float beta)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_UNUSED(beta);
+
+    ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+
+    if(output->total_size() > 0)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+    }
+
+    return Status{};
+}
+
 void matrix_addition_f32(const ITensor *input, ITensor *output, const Window &window, float beta)
 {
     const float32x4_t beta_f32 = vdupq_n_f32(beta);
@@ -101,12 +113,10 @@ NEGEMMMatrixAdditionKernel::NEGEMMMatrixAdditionKernel()
 
 void NEGEMMMatrixAdditionKernel::configure(const ITensor *input, ITensor *output, float beta)
 {
-    ARM_COMPUTE_ERROR_ON_CPU_F16_UNSUPPORTED(input);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-    ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != output->info()->dimension(0));
-    ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != output->info()->dimension(1));
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+    // Perform validation step
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), beta));
 
     switch(input->info()->data_type())
     {
@@ -123,13 +133,21 @@ void NEGEMMMatrixAdditionKernel::configure(const ITensor *input, ITensor *output
             break;
     }
 
+    // Configure kernel window
     constexpr unsigned int num_elems_processed_per_iteration = 16;
-
     INESimpleKernel::configure(input, output, num_elems_processed_per_iteration);
 
     _beta = beta;
 }
 
+Status NEGEMMMatrixAdditionKernel::validate(const ITensorInfo *input, const ITensorInfo *output, float beta)
+{
+    constexpr unsigned int num_elems_processed_per_iteration = 16;
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, beta));
+    ARM_COMPUTE_RETURN_ON_ERROR(INESimpleKernel::validate(input->clone().get(), output->clone().get(), num_elems_processed_per_iteration));
+    return Status{};
+}
+
 void NEGEMMMatrixAdditionKernel::run(const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_UNUSED(info);
@@ -141,3 +159,4 @@ void NEGEMMMatrixAdditionKernel::run(const Window &window, const ThreadInfo &inf
         (*_func)(_input, _output, window, _beta);
     }
 }
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp
index de51266267..321ecf85d8 100644
--- a/src/runtime/NEON/functions/NEGEMM.cpp
+++ b/src/runtime/NEON/functions/NEGEMM.cpp
@@ -218,6 +218,12 @@ Status NEGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso
         ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixMultiplyKernel::validate(matrix_a_info, matrix_b_info, &tmp_output_info, alpha, run_interleave_transpose, reshape_info));
     }
 
+    // Validate matrix addition kernel
+    if(beta != 0 && c != nullptr)
+    {
+        ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixAdditionKernel::validate(c, output, beta));
+    }
+
     return Status{};
 }
 
-- 
cgit v1.2.1