COMPMID-617 Add validation methods to Kernels

- NEActivationLayer - NESoftmax - NEDirectConvolutionLayer - NENormalizationLayer - NEPoolingLayer Change-Id: Ib279f1c1b7f9247679b0d6593aed7393da8fe87b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111335 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
author: Michalis Spyrou <michalis.spyrou@arm.com> 2017-11-30 14:25:57 +0000
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:42:17 +0000
commit: afa5d817b1d083837cd7ea30d32f845d82620c12 (patch)
tree: 1ca2a27ab7108b7137b96fc1547a8b5ac5d9c8f7
parent: 631c41a4e3645a948b0f597caa77e8fa91ca0efc (diff)
download: ComputeLibrary-afa5d817b1d083837cd7ea30d32f845d82620c12.tar.gz
29 files changed, 1127 insertions, 307 deletions
diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
index ef51cbe841..e8c032aaeb 100644
--- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
@@ -59,6 +59,16 @@ public:
      * @param[in]      activation_info Activation layer information.
      */
     void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info);
+    /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayerKernel
+     *
+     * @param[in] input    Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
+     *                     of the activation function. Data types supported: QS8/QS16/F16/F32.
+     * @param[in] output   Destination tensor info. Data type supported: same as @p input
+     * @param[in] act_info Activation layer information.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info);
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h
index 14c8e9c7e0..05ade1c5dd 100644
--- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h
@@ -57,6 +57,16 @@ public:
      *                         Data type supported: Same as @p input
      */
     void configure(ITensor *input, const ITensor *bias, ITensor *output = nullptr);
+    /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerBiasAccumulateKernel
+     *
+     * @param[in] input  Input to add the bias to. If @p output is not specified then accumulation is done in-place.
+     *                   Data type supported: QS8/QS16/F16/F32
+     * @param[in] bias   The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
+     * @param[in] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
+     *                         Data type supported: Same as @p input
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output = nullptr);
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
index 370ddca480..4529120f02 100644
--- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
@@ -62,6 +62,20 @@ public:
      * @param[in]  conv_info Contains padding and stride information described in @ref PadStrideInfo.
      */
     void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info);
+    /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerKernel
+     *
+     * @param[in] input     The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
+     *                      while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/QS16/F16/F32.
+     * @param[in] weights   Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+     *                      The 3rd dimension must be the same as the input's volume 3rd dimension.
+     *                      Data type supported:Same as @p input.
+     * @param[in] output    Output tensor.
+     *                      The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
+     * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info);
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
diff --git a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h
index 40fae3520b..405daf1068 100644
--- a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h
@@ -57,6 +57,18 @@ public:
      * @param[in]  norm_info     Normalization layer information like the normalization type, normalization size and other parameters.
      */
     void configure(const ITensor *input, const ITensor *input_squared, ITensor *output, NormalizationLayerInfo norm_info);
+    /** Static function to check if given info will lead to a valid configuration of @ref NENormalizationLayerKernel
+     *
+     * @param[in] input         Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
+     *                          and an optional 4th dimension for batch of inputs. Data types supported: QS8/QS16/FP16/F32.
+     * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM],
+     *                          Data type supported: same as @p input
+     * @param[in] output        Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
+     * @param[in] norm_info     Normalization layer information like the normalization type, normalization size and other parameters.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *input_squared, const ITensorInfo *output, NormalizationLayerInfo norm_info);
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
diff --git a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h
index 05eb8d6ddc..10f990e7ef 100644
--- a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h
+++ b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h
@@ -64,9 +64,13 @@ public:
     void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
     /** Static function to check if given info will lead to a valid configuration of @ref NEPixelWiseMultiplicationKernel
      *
-     * @param[in] input1          An input tensor info. Data types supported: U8/QS8/QS16/S16/F16/F32
-     * @param[in] input2          An input tensor info. Data types supported: U8, QS8 (only if @p input1 is QS8), QS16 (only if @p input1 is QS16), S16/F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
-     * @param[in] output          The output tensor info. Data types supported: U8 (Only if both inputs are U8), QS8 (only if both inputs are QS8), QS16 (only if both inputs are QS16), S16/F16 (only if @p input1 is F16), F32 (only if both inputs are F32).
+     * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
+     *       For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
+     *       For QS8/QS16 scale = 1 is the only supported value.
+     *
+     * @param[in] input1          An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32
+     * @param[in] input2          An input tensor. Data types supported: U8, QS8 (only if @p input1 is QS8), QS16 (only if @p input1 is QS16), S16/F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
+     * @param[in] output          The output tensor. Data types supported: U8 (Only if both inputs are U8), QS8 (only if both inputs are QS8), QS16 (only if both inputs are QS16), S16/F16 (only if @p input1 is F16), F32 (only if both inputs are F32).
      * @param[in] scale           Scale to apply after multiplication.
      *                            Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
      * @param[in] overflow_policy Overflow policy.
diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
index 0a57a26f17..87d14e5f91 100644
--- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
@@ -55,6 +55,17 @@ public:
      * @param[in]  pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
      */
     void configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info);
+    /** Static function to check if given info will lead to a valid configuration of @ref NEPoolingLayerKernel
+     *
+     * @note QS8, QS16 and F16 are supported for pool sizes 2 and 3 only
+     *
+     * @param[in] input     Source tensor. Data types supported: QS8/QS16/F16/F32.
+     * @param[in] output    Destination tensor. Data types supported: Same as @p input.
+     * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info);
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
@@ -144,7 +155,7 @@ private:
     const ITensor   *_input;
     ITensor         *_output;
     PoolingLayerInfo _pool_info;
-    int              _num_elems_processed_per_iteration;
+    unsigned int     _num_elems_processed_per_iteration;
     BorderSize       _border_size;
 };
 } // namespace arm_compute
diff --git a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h b/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h
index c3e25181b6..0fecfac15a 100644
--- a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h
@@ -43,6 +43,14 @@ public:
      * @param[out] output Destination tensor. Data types supported: same as @p input
      */
     void configure(const ITensor *input, ITensor *output);
+    /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DMaxKernel
+     *
+     * @param[in] input  Source tensor. Data types supported: QS8/QS16/F16/F32
+     * @param[in] output Destination tensor. Data types supported: same as @p input
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
@@ -81,6 +89,17 @@ public:
      * @param[in]  beta   (Optional) A scaling factor for the exponent. QS8/QS16 only support a beta value of 1.
      */
     void configure(const ITensor *input, const ITensor *max, ITensor *output, ITensor *sum, float beta = 1.0f);
+    /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DShiftExpSumKernel
+     *
+     * @param[in] input  Source tensor. Data types supported: QS8/QS16/F16/F32
+     * @param[in] max    Max values tensor. Data types supported: same as @p input
+     * @param[in] output Destination tensor. Data types supported: same as @p input.
+     * @param[in] sum    Sum of 1D logits tensor. Data types supported: same as @p input.
+     * @param[in] beta   (Optional) A scaling factor for the exponent. QS8/QS16 only support a beta value of 1.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum, float beta = 1.0f);
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
@@ -120,6 +139,15 @@ public:
      * @param[out] output Destination tensor. Data types supported: same as @p input.
      */
     void configure(const ITensor *input, const ITensor *sum, ITensor *output);
+    /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DNormKernel
+     *
+     * @param[in] input  Source tensor. Data types supported: QS8/QS16/S32/F16/F32
+     * @param[in] sum    Sum tensor. The number of dimensions should be dim(input)-1. Data types supported: same as @p input.
+     * @param[in] output Destination tensor. Data types supported: same as @p input.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output);
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
index f3cd305910..007c53a0a8 100644
--- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
@@ -49,6 +49,16 @@ public:
      * @param[in]      activation_info Activation layer parameters.
      */
     void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info);
+    /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayer
+     *
+     * @param[in] input    Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
+     *                     of the activation function. Data types supported: QS8/QS16/F16/F32.
+     * @param[in] output   Destination tensor info. Data type supported: same as @p input
+     * @param[in] act_info Activation layer information.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info);
 };
 }
 #endif /* __ARM_COMPUTE_NEACTIVATIONLAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
index c731bf278f..6d8ce1f6f4 100644
--- a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
@@ -68,6 +68,26 @@ public:
      * @param[in]      conv_info Contains padding and stride information described in @ref PadStrideInfo.
      */
     void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &conv_info);
+    /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayer
+     *
+     * @note: DirectConvolution only works in the following configurations:
+     *    1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = QS8/QS16/F16/F32
+     *    3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = QS8/F16/F32
+     *    5x5 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F32
+     *
+     * @param[in] input     Input tensor. Data types supported: QS8/QS16/F16/F32.
+     * @param[in] weights   Set of kernels to convolve the input volume.
+     *                      Supported sizes: 1x1, 3x3 and 5x5.
+     *                      The 3rd dimension must be the same as the input's volume 3rd dimension.
+     *                      Data type supported: Same as @p input.
+     * @param[in] bias      Set of biases. Data type supported: Same as @p input.
+     * @param[in] output    Output tensor.
+     *                      The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
+     * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &conv_info);
 
     // Inherited methods overridden:
     void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
index 0d5656d602..4b5ad28706 100644
--- a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
@@ -55,11 +55,21 @@ public:
     /** Set the input and output tensors.
      *
      * @param[in]  input     Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
-     *                       and an optional 4th dimension for batch of inputs. Data type supported: QS8/F16/F32
+     *                       and an optional 4th dimension for batch of inputs. Data type supported: QS8/QS16/F16/F32
      * @param[out] output    Destination with the same dimensions, data type and number of channels of  @p input
      * @param[in]  norm_info Normalization layer information like the normalization type, normalization size and other parameters.
      */
     void configure(const ITensor *input, ITensor *output, const NormalizationLayerInfo &norm_info);
+    /** Static function to check if given info will lead to a valid configuration of @ref NENormalizationLayer
+     *
+     * @param[in] input     Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
+     *                      and an optional 4th dimension for batch of inputs. Data type supported: QS8/QS16/F16/F32
+     * @param[in] output    Destination with the same dimensions, data type and number of channels of  @p input
+     * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const NormalizationLayerInfo &norm_info);
 
     // Inherited methods overridden:
     void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
index 7b038aaa51..0f8abb587d 100644
--- a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
@@ -53,6 +53,17 @@ public:
      * @param[in]      pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
      */
     void configure(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info);
+    /** Static function to check if given info will lead to a valid configuration of @ref NEPoolingLayer
+     *
+     * @note QS8, QS16 and F16 are supported for pool sizes 2 and 3 only
+     *
+     * @param[in] input     Source tensor. (Written to only when padding != 0) Data types supported: QS8/QS16/F16/F32.
+     * @param[in] output    Destination tensor. Data types supported: Same as @p input.
+     * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info);
 
     // Inherited methods overridden:
     void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
index 38a0f2116f..5043f79c23 100644
--- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
@@ -56,6 +56,15 @@ public:
      * @param[in]  beta   (Optional) A scaling factor for the exponent. QS8/QS16 only support a beta value of 1.
      */
     void configure(ITensor *input, ITensor *output, float beta = 1.0f);
+    /** Static function to check if given info will lead to a valid configuration of @ref NESoftmaxLayer
+     *
+     * @param[in] input  Source tensor. Data types supported: QS8/QS16/F16/F32
+     * @param[in] output Destination tensor. Data types supported: same as @p input
+     * @param[in] beta   (Optional) A scaling factor for the exponent. QS8/QS16 only support a beta value of 1.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta = 1.0f);
 
     // Inherited methods overridden:
     void run() override;
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp
index 5346dbbe15..eecc94f23c 100644
--- a/src/core/CL/kernels/CLActivationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp
@@ -36,7 +36,6 @@
 
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
 #include "support/ToolchainSupport.h"
 
 #include <cmath>
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index 94bd5f15e3..6ea504a173 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -39,6 +39,51 @@
 #include <map>
 
 using namespace arm_compute;
+namespace
+{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+
+    // Checks performed when output is configured
+    if((output != nullptr) && (output->total_size() != 0))
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
+    }
+
+    return Status{};
+}
+
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
+{
+    constexpr unsigned int num_elems_processed_per_iteration = 16;
+    Window                 win                               = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
+    bool                   window_changed                    = false;
+
+    if(output != nullptr && (output->total_size() != 0))
+    {
+        AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
+
+        window_changed = update_window_and_padding(win,
+                                                   AccessWindowHorizontal(input, 0, num_elems_processed_per_iteration),
+                                                   output_access);
+
+        output_access.set_valid_region(win, input->valid_region());
+    }
+    else
+    {
+        // In-place computation
+        window_changed = update_window_and_padding(win,
+                                                   AccessWindowHorizontal(input, 0, num_elems_processed_per_iteration));
+    }
+
+    Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+    return std::make_pair(err, win);
+}
+} // namespace
 
 NEActivationLayerKernel::NEActivationLayerKernel()
     : _input(nullptr), _output(nullptr), _func(nullptr), _act_info(ActivationFunction::LOGISTIC)
@@ -47,7 +92,7 @@ NEActivationLayerKernel::NEActivationLayerKernel()
 
 void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
 {
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input);
 
     _input    = input;
     _act_info = activation_info;
@@ -56,15 +101,12 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
     if(output != nullptr)
     {
         // Output auto inizialitation if not yet initialized
-        auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
-
-        ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
-        ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-        ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
-
+        auto_init_if_empty(*output->info(), *input->info()->clone());
         _output = output;
     }
 
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr));
+
     // Activation functions : FP32
     static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f32 =
     {
@@ -149,29 +191,10 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
             ARM_COMPUTE_ERROR("Unsupported data type.");
     }
 
-    constexpr unsigned int num_elems_processed_per_iteration = 16;
-
     // Configure kernel window
-    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
-
-    if(output != nullptr)
-    {
-        AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
-        update_window_and_padding(win,
-                                  AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration),
-                                  output_access);
-
-        output_access.set_valid_region(win, input->info()->valid_region());
-    }
-    else
-    {
-        // In-place computation
-        update_window_and_padding(win,
-                                  AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration));
-    }
-
-    ICPPKernel::configure(win);
+    auto win_config = validate_and_configure_window(input->info(), (output != nullptr) ? output->info() : nullptr);
+    ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+    ICPPKernel::configure(win_config.second);
 }
 
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
@@ -653,6 +676,15 @@ typename std::enable_if<std::is_same<T, qint16_t>::value, void>::type NEActivati
     input, output);
 }
 
+Status NEActivationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
+{
+    ARM_COMPUTE_UNUSED(act_info);
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (output != nullptr) ? output->clone().get() : nullptr).first);
+
+    return Status{};
+}
+
 void NEActivationLayerKernel::run(const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_UNUSED(info);
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp
index f00af9f93e..a6585ade12 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp
@@ -40,6 +40,62 @@ using namespace arm_compute;
 
 namespace
 {
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, bias);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::QS32, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::QS32, DataType::F32);
+    if(is_data_type_quantized(input->data_type()))
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QS8 && bias->data_type() != DataType::QS8, "Wrong data type for bias");
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QS16 && bias->data_type() != DataType::QS8, "Wrong data type for bias");
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QS32 && bias->data_type() != DataType::QS16, "Wrong data type for bias");
+    }
+    else
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
+    }
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, bias);
+
+    // Checks performed when output is configured
+    if((output != nullptr) && (output->total_size() != 0))
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::QS16, DataType::F32);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(bias, output);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(bias, output);
+    }
+
+    ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() > 1);
+
+    return Status{};
+}
+
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *bias, ITensorInfo *output)
+{
+    bool               window_changed                    = false;
+    const unsigned int num_elems_processed_per_iteration = 16 / element_size_from_data_type(input->data_type());
+
+    // Configure kernel window
+    Window                 win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
+    AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
+    AccessWindowStatic     bias_access(bias, 0, 0, bias->dimension(0), bias->dimension(1));
+    if(output != nullptr && (output->total_size() != 0))
+    {
+        AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
+        window_changed = update_window_and_padding(win, input_access, output_access, bias_access);
+        output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
+    }
+    else
+    {
+        window_changed = update_window_and_padding(win, input_access, bias_access);
+        input_access.set_valid_region(win, ValidRegion(Coordinates(), input->tensor_shape()));
+    }
+
+    Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+    return std::make_pair(err, win);
+}
+
 // Internal load
 inline float32x4_t internal_vld1q(const float *in)
 {
@@ -186,40 +242,26 @@ NEDirectConvolutionLayerBiasAccumulateKernel::NEDirectConvolutionLayerBiasAccumu
 
 void NEDirectConvolutionLayerBiasAccumulateKernel::configure(ITensor *input, const ITensor *bias, ITensor *output)
 {
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::QS32, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::QS32, DataType::F32);
-    ARM_COMPUTE_ERROR_ON(input->info()->fixed_point_position() != bias->info()->fixed_point_position());
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, bias);
+
     if(output != nullptr)
     {
-        ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::QS16, DataType::F32);
-        ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(bias, output);
-        ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(bias, output);
+        // Output tensor auto initialization if not yet initialized
+        auto_init_if_empty(*output->info(), *input->info());
     }
-    ARM_COMPUTE_ERROR_ON(bias->info()->num_dimensions() > 1);
+
+    // Perform validation step
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), bias->info(), (output == nullptr) ? nullptr : output->info()));
 
     _func   = nullptr;
     _bias   = bias;
     _input  = input;
     _output = output;
 
-    const unsigned int num_elems_processed_per_iteration = 16 / element_size_from_data_type(input->info()->data_type());
-
     // Configure kernel window
-    Window                 win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
-    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
-    AccessWindowStatic     bias_access(bias->info(), 0, 0, bias->info()->dimension(0), bias->info()->dimension(1));
-    if(output != nullptr)
-    {
-        AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-        update_window_and_padding(win, input_access, output_access, bias_access);
-        output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-    }
-    else
-    {
-        update_window_and_padding(win, input_access, bias_access);
-        input_access.set_valid_region(win, ValidRegion(Coordinates(), input->info()->tensor_shape()));
-    }
-    INEKernel::configure(win);
+    auto win_config = validate_and_configure_window(input->info(), bias->info(), (output == nullptr) ? nullptr : output->info());
+    ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+    INEKernel::configure(win_config.second);
 
     // Set appropriate function
     switch(input->info()->data_type())
@@ -266,6 +308,14 @@ void NEDirectConvolutionLayerBiasAccumulateKernel::configure(ITensor *input, con
     }
 }
 
+Status NEDirectConvolutionLayerBiasAccumulateKernel::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output)
+{
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, bias, output));
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), bias->clone().get(), output == nullptr ? nullptr : output->clone().get()).first);
+
+    return Status{};
+}
+
 void NEDirectConvolutionLayerBiasAccumulateKernel::run(const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_UNUSED(info);
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
index 78afbc2c20..1ca213b04a 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
@@ -251,10 +251,15 @@ inline qint32x4x2_t internal_vmlal(const qint32x4x2_t &x, const qint16x8_t &y, c
     return r;
 }
 
-constexpr int SmallTensorSizeOptim = 8;
+constexpr int small_tensor_size_optim = 8;
+inline bool run_optim_small_tensor_info(const ITensorInfo *t)
+{
+    return t->dimension(Window::DimX) <= small_tensor_size_optim && t->dimension(Window::DimY) <= small_tensor_size_optim;
+}
+
 inline bool run_optim_small_tensor(const ITensor *t)
 {
-    return t->info()->dimension(Window::DimX) <= SmallTensorSizeOptim && t->info()->dimension(Window::DimY) <= SmallTensorSizeOptim;
+    return run_optim_small_tensor_info(t->info());
 }
 
 // Optimized convolver for 1x1 kernels used only where input width and height are both <= 8
@@ -266,8 +271,8 @@ class convolver_w1x1_i8x8_f32
 public:
     static void convolve(const Window &window, const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info)
     {
-        ARM_COMPUTE_ERROR_ON(input->info()->dimension(Window::DimX) > SmallTensorSizeOptim);
-        ARM_COMPUTE_ERROR_ON(input->info()->dimension(Window::DimY) > SmallTensorSizeOptim);
+        ARM_COMPUTE_ERROR_ON(input->info()->dimension(Window::DimX) > small_tensor_size_optim);
+        ARM_COMPUTE_ERROR_ON(input->info()->dimension(Window::DimY) > small_tensor_size_optim);
 
         const int          input_stride_y  = input->info()->strides_in_bytes().y();
         const int          input_stride_z  = input->info()->strides_in_bytes().z();
@@ -302,12 +307,12 @@ public:
 
         execute_window_loop(window_out, [&](const Coordinates & id)
         {
-            const uint8_t *input_ptr                    = in.ptr();
-            uint8_t       *out_ptr                      = out.ptr();
-            int            ih                           = 0;
-            int            oh                           = 0;
-            float32x4_t    accum0[SmallTensorSizeOptim] = { vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0) };
-            float32x4_t    accum1[SmallTensorSizeOptim] = { vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0) };
+            const uint8_t *input_ptr                       = in.ptr();
+            uint8_t       *out_ptr                         = out.ptr();
+            int            ih                              = 0;
+            int            oh                              = 0;
+            float32x4_t    accum0[small_tensor_size_optim] = { vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0) };
+            float32x4_t    accum1[small_tensor_size_optim] = { vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0) };
             for(int oz = 0; oz < range_z; ++oz)
             {
                 accum0[0] = accum0[1] = accum0[2] = accum0[3] = accum0[4] = accum0[5] = accum0[6] = accum0[7] = vdupq_n_f32(0.f);
@@ -992,121 +997,118 @@ inline void convolve_5x5(const Window &window, unsigned int num_elems_read_per_i
     }
 }
 
-} // namespace
-
-NEDirectConvolutionLayerKernel::NEDirectConvolutionLayerKernel()
-    : _input(nullptr), _weights(nullptr), _output(nullptr), _conv_info(), _border_size(0), _kernel_size(0), _num_weight_elems_read_per_row(0), _num_elems_read_per_iteration(0),
-      _num_elems_written_per_iteration(0)
-{
-}
-
-BorderSize NEDirectConvolutionLayerKernel::border_size() const
+inline TensorShape get_convolved_dimensions(const ITensorInfo *input, const ITensorInfo *weights, const int kernel_size, const PadStrideInfo &conv_info)
 {
-    return _border_size;
-}
-
-void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::QS16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
-    ARM_COMPUTE_ERROR_ON_MSG(weights->info()->dimension(0) == 1 && (std::get<0>(conv_info.pad()) || std::get<1>(conv_info.pad())),
-                             "Pad > 0 not supported for 1x1 weights");
-    ARM_COMPUTE_ERROR_ON_MSG(weights->info()->dimension(0) == 3 && (std::get<0>(conv_info.pad()) > 1 || std::get<1>(conv_info.pad()) > 1),
-                             "Pad > 1 not supported for 3x3 weights");
-    ARM_COMPUTE_ERROR_ON_MSG(weights->info()->dimension(0) == 5 && (std::get<0>(conv_info.pad()) > 2 || std::get<1>(conv_info.pad()) > 2),
-                             "Pad > 2 not supported for 5x5 weights");
-
-    ARM_COMPUTE_ERROR_ON_MSG(std::get<0>(conv_info.stride()) > 3, "Strides larger than 3 not supported.");
-    ARM_COMPUTE_ERROR_ON(weights->info()->dimension(2) != input->info()->dimension(2));
-    ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != weights->info()->dimension(1));
-    ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4);
-
-    const unsigned int conv_stride_x = std::get<0>(conv_info.stride());
-    const unsigned int conv_pad_x    = std::get<0>(conv_info.pad());
-    const unsigned int conv_pad_y    = std::get<1>(conv_info.pad());
-
-    _input       = input;
-    _weights     = weights;
-    _output      = output;
-    _conv_info   = conv_info;
-    _kernel_size = weights->info()->dimension(0);
-    _border_size = BorderSize(conv_pad_y, conv_pad_x);
-
-    const unsigned int kernel_size = weights->info()->dimension(0);
-
-    // Get convolved dimensions
     unsigned int output_width  = 0;
     unsigned int output_height = 0;
-    std::tie(output_width, output_height) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_size, kernel_size, conv_info);
+    std::tie(output_width, output_height) = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_size, kernel_size, conv_info);
 
-    TensorShape output_shape = input->info()->tensor_shape();
+    TensorShape output_shape = input->tensor_shape();
     output_shape.set(0, output_width);
     output_shape.set(1, output_height);
-    output_shape.set(2, weights->info()->dimension(3));
+    output_shape.set(2, weights->dimension(3));
 
-    DataType data_type = input->info()->data_type();
+    return output_shape;
+}
 
-    if(is_data_type_fixed_point(data_type))
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(0) == 1 && (std::get<0>(conv_info.pad()) || std::get<1>(conv_info.pad())),
+                                    "Pad > 0 not supported for 1x1 weights");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(0) == 3 && (std::get<0>(conv_info.pad()) > 1 || std::get<1>(conv_info.pad()) > 1),
+                                    "Pad > 1 not supported for 3x3 weights");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(0) == 5 && (std::get<0>(conv_info.pad()) > 2 || std::get<1>(conv_info.pad()) > 2),
+                                    "Pad > 2 not supported for 5x5 weights");
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(std::get<0>(conv_info.stride()) > 3, "Strides larger than 3 not supported.");
+    ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(2) != input->dimension(2));
+    ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != weights->dimension(1));
+    ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
+
+    // Checks performed when output is configured
+    if(output->total_size() != 0)
     {
-        // Promote data type in case of fixed point
-        data_type = ((data_type == DataType::QS8) ? DataType::QS16 : DataType::QS32);
+        TensorShape output_shape = get_convolved_dimensions(input, weights, weights->dimension(0), conv_info);
+
+        DataType data_type = input->data_type();
+        if(is_data_type_fixed_point(data_type))
+        {
+            // Promote data type in case of fixed point
+            data_type = ((data_type == DataType::QS8) ? DataType::QS16 : DataType::QS32);
+        }
+
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
+        ARM_COMPUTE_RETURN_ERROR_ON(output->data_type() != data_type);
     }
 
-    // Output auto inizialitation if not yet initialized
-    auto_init_if_empty(*output->info(), output_shape, 1, data_type, input->info()->fixed_point_position());
+    return Status{};
+}
 
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, output->info()->data_type());
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *weights, ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int &num_weight_elems_read_per_row,
+                                                        unsigned int &num_elems_read_per_iteration, unsigned int &num_elems_written_per_iteration)
+{
+    // Calculate right and bottom border
+    unsigned int       kernel_size   = weights->dimension(0);
+    const unsigned int conv_pad_x    = std::get<0>(conv_info.pad());
+    const unsigned int conv_pad_y    = std::get<1>(conv_info.pad());
+    const unsigned int conv_stride_x = std::get<0>(conv_info.stride());
+    const unsigned int conv_stride_y = std::get<1>(conv_info.stride());
+    BorderSize         border_size   = BorderSize(conv_pad_y, conv_pad_x);
+    const int          input_width   = input->dimension(0);
+    const int          input_height  = input->dimension(1);
 
-    switch(_kernel_size)
+    switch(kernel_size)
     {
         case 1:
         {
-            switch(input->info()->data_type())
+            switch(input->data_type())
             {
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
                 case DataType::F16:
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
                 case DataType::QS8:
                 case DataType::QS16:
-                    _num_elems_written_per_iteration = 8;
+                    num_elems_written_per_iteration = 8;
                     break;
                 case DataType::F32:
-                    if(run_optim_small_tensor(input))
+                    if(run_optim_small_tensor_info(input))
                     {
-                        _num_elems_written_per_iteration = 8;
+                        num_elems_written_per_iteration = 8;
                     }
                     else
                     {
-                        _num_elems_written_per_iteration = 4;
+                        num_elems_written_per_iteration = 4;
                     }
                     break;
                 default:
                     ARM_COMPUTE_ERROR("Data type not supported.");
                     break;
             }
-            _num_weight_elems_read_per_row = kernel_size;
-            _num_elems_read_per_iteration  = conv_stride_x * _num_elems_written_per_iteration;
+            num_weight_elems_read_per_row = kernel_size;
+            num_elems_read_per_iteration  = conv_stride_x * num_elems_written_per_iteration;
             break;
         }
         case 3:
         case 5:
         {
-            switch(input->info()->data_type())
+            switch(input->data_type())
             {
                 case DataType::F32:
-                    _num_weight_elems_read_per_row   = 4 + _kernel_size - 1;
-                    _num_elems_read_per_iteration    = 12;
-                    _num_elems_written_per_iteration = 16 >> conv_stride_x;
+                    num_weight_elems_read_per_row   = 4 + kernel_size - 1;
+                    num_elems_read_per_iteration    = 12;
+                    num_elems_written_per_iteration = 16 >> conv_stride_x;
                     break;
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
                 case DataType::F16:
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
                 case DataType::QS8:
                 case DataType::QS16:
-                    _num_weight_elems_read_per_row   = 8 + _kernel_size - 1;
-                    _num_elems_read_per_iteration    = 24;
-                    _num_elems_written_per_iteration = 32 >> conv_stride_x;
+                    num_weight_elems_read_per_row   = 8 + kernel_size - 1;
+                    num_elems_read_per_iteration    = 24;
+                    num_elems_written_per_iteration = 32 >> conv_stride_x;
                     break;
                 default:
                     ARM_COMPUTE_ERROR("Data type not supported.");
@@ -1121,22 +1123,81 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens
         }
     }
 
-    // Calculate right and bottom border
-    const unsigned int conv_stride_y = std::get<1>(_conv_info.stride());
-    const int          input_width   = input->info()->dimension(0);
-    const int          input_height  = input->info()->dimension(1);
-    const int          upper_bound_w = ceil_to_multiple(((output->info()->dimension(0) - 1) * conv_stride_x + _kernel_size), _num_elems_read_per_iteration) - conv_pad_x - input_width;
-    const int          upper_bound_h = ((output->info()->dimension(1) - 1) * conv_stride_y - conv_pad_y + _kernel_size) - input_height;
-    _border_size.right               = std::max(upper_bound_w, static_cast<int>(_kernel_size));
-    _border_size.bottom              = std::max(upper_bound_h, static_cast<int>(_kernel_size));
-    Window                 win       = calculate_max_window(*output->info(), Steps(_num_elems_written_per_iteration));
-    AccessWindowStatic     input_access(input->info(), -conv_pad_x, -conv_pad_y, input_width + _border_size.right, input_height + _border_size.bottom);
-    AccessWindowStatic     weights_access(weights->info(), 0, 0, _num_weight_elems_read_per_row, _kernel_size);
-    AccessWindowHorizontal output_access(output->info(), 0, _num_elems_written_per_iteration);
-    update_window_and_padding(win, input_access, weights_access, output_access);
-    output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
-    INEKernel::configure(win);
+    const int upper_bound_w    = ceil_to_multiple(((output->dimension(0) - 1) * conv_stride_x + kernel_size), num_elems_read_per_iteration) - conv_pad_x - input_width;
+    const int upper_bound_h    = ((output->dimension(1) - 1) * conv_stride_y - conv_pad_y + kernel_size) - input_height;
+    border_size.right          = std::max(upper_bound_w, static_cast<int>(kernel_size));
+    border_size.bottom         = std::max(upper_bound_h, static_cast<int>(kernel_size));
+    Window                 win = calculate_max_window(*output, Steps(num_elems_written_per_iteration));
+    AccessWindowStatic     input_access(input, -conv_pad_x, -conv_pad_y, input_width + border_size.right, input_height + border_size.bottom);
+    AccessWindowStatic     weights_access(weights, 0, 0, num_weight_elems_read_per_row, kernel_size);
+    AccessWindowHorizontal output_access(output, 0, num_elems_written_per_iteration);
+    bool                   window_changed = update_window_and_padding(win, input_access, weights_access, output_access);
+    output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
+
+    Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+    return std::make_pair(err, win);
+}
+} // namespace
+
+NEDirectConvolutionLayerKernel::NEDirectConvolutionLayerKernel()
+    : _input(nullptr), _weights(nullptr), _output(nullptr), _conv_info(), _border_size(0), _kernel_size(0), _num_weight_elems_read_per_row(0), _num_elems_read_per_iteration(0),
+      _num_elems_written_per_iteration(0)
+{
+}
+
+BorderSize NEDirectConvolutionLayerKernel::border_size() const
+{
+    return _border_size;
+}
+
+void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info)
+{
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+    const unsigned int conv_pad_x = std::get<0>(conv_info.pad());
+    const unsigned int conv_pad_y = std::get<1>(conv_info.pad());
+
+    _input       = input;
+    _weights     = weights;
+    _output      = output;
+    _conv_info   = conv_info;
+    _kernel_size = weights->info()->dimension(0);
+    _border_size = BorderSize(conv_pad_y, conv_pad_x);
+
+    // Get convolved dimensions
+    TensorShape output_shape = get_convolved_dimensions(input->info(), weights->info(), _kernel_size, conv_info);
+
+    DataType data_type = input->info()->data_type();
+
+    if(is_data_type_fixed_point(data_type))
+    {
+        // Promote data type in case of fixed point
+        data_type = ((data_type == DataType::QS8) ? DataType::QS16 : DataType::QS32);
+    }
+
+    // Output auto inizialitation if not yet initialized
+    auto_init_if_empty(*output->info(), output_shape, 1, data_type, input->info()->fixed_point_position());
+
+    // Perform validation step
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), output->info(), conv_info));
+
+    // Configure kernel window
+    auto win_config = validate_and_configure_window(input->info(), weights->info(), output->info(), conv_info, _num_weight_elems_read_per_row,
+                                                    _num_elems_read_per_iteration, _num_elems_written_per_iteration);
+    ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+    INEKernel::configure(win_config.second);
+}
+
+Status NEDirectConvolutionLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info)
+{
+    unsigned int num_weight_elems_read_per_row   = 0;
+    unsigned int num_elems_read_per_iteration    = 0;
+    unsigned int num_elems_written_per_iteration = 0;
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, output, conv_info));
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), weights->clone().get(), output->clone().get(), conv_info, num_weight_elems_read_per_row, num_elems_read_per_iteration,
+                                                              num_elems_written_per_iteration)
+                                .first);
+
+    return Status{};
 }
 
 void NEDirectConvolutionLayerKernel::run(const Window &window, const ThreadInfo &info)
diff --git a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
index b983609e49..776cb27d7a 100644
--- a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
@@ -34,6 +34,67 @@
 
 using namespace arm_compute;
 
+namespace
+{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *input_squared, const ITensorInfo *output, const NormalizationLayerInfo &norm_info)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, input_squared, output);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, input_squared);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, input_squared);
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd");
+
+    if(is_data_type_fixed_point(input->data_type()))
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, input_squared);
+        ARM_COMPUTE_RETURN_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.beta(), input);
+        ARM_COMPUTE_RETURN_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.kappa(), input);
+        ARM_COMPUTE_RETURN_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.scale_coeff(), input);
+    }
+
+    // Checks performed when output is configured
+    if(output->total_size() != 0)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
+    }
+
+    return Status{};
+}
+
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *input_squared, ITensorInfo *output, const NormalizationLayerInfo &norm_info)
+{
+    unsigned int       num_elems_processed_per_iteration = 16 / input->element_size();
+    const unsigned int num_elems_read_per_iteration      = num_elems_processed_per_iteration + 2 * (norm_info.norm_size() / 2);
+    const unsigned int num_rows                          = (norm_info.type() == NormType::IN_MAP_2D) ? norm_info.norm_size() : 1;
+    const unsigned int border_width                      = (norm_info.is_cross_map()) ? 0 : std::min<unsigned int>(norm_info.norm_size() / 2, 3U);
+    BorderSize         border_size                       = BorderSize(0, border_width);
+    bool               window_changed                    = false;
+
+    // Configure window
+    Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
+
+    AccessWindowRectangle input_access(input, -border_size.left, 0, num_elems_read_per_iteration, num_rows);
+    AccessWindowRectangle input_squared_access(input_squared, -border_size.left, 0, num_elems_read_per_iteration, num_rows);
+
+    if(output->total_size() != 0)
+    {
+        AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
+        window_changed = update_window_and_padding(win, input_access, input_squared_access, output_access);
+        output_access.set_valid_region(win, input->valid_region());
+    }
+    else
+    {
+        window_changed = update_window_and_padding(win, input_access, input_squared_access);
+    }
+
+    Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+    return std::make_pair(err, win);
+}
+} // namespace
+
 NENormalizationLayerKernel::NENormalizationLayerKernel()
     : _func(nullptr), _input(nullptr), _input_squared(nullptr), _output(nullptr), _norm_info(NormType::IN_MAP_1D), _border_size()
 {
@@ -46,20 +107,12 @@ BorderSize NENormalizationLayerKernel::border_size() const
 
 void NENormalizationLayerKernel::configure(const ITensor *input, const ITensor *input_squared, ITensor *output, NormalizationLayerInfo norm_info)
 {
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, input_squared, output);
     // Output tensor auto initialization if not yet initialized
-    auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, input_squared, output);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, input_squared, output);
-    ARM_COMPUTE_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd");
-    if(is_data_type_fixed_point(input->info()->data_type()))
-    {
-        ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, input_squared, output);
-        ARM_COMPUTE_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.beta(), input);
-        ARM_COMPUTE_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.kappa(), input);
-        ARM_COMPUTE_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.scale_coeff(), input);
-    }
+    auto_init_if_empty(*output->info(), *input->info());
+
+    // Perform validation step
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), input_squared->info(), output->info(), norm_info));
 
     const unsigned int border_width = (norm_info.is_cross_map()) ? 0 : std::min<unsigned int>(norm_info.norm_size() / 2, 3U);
 
@@ -69,14 +122,10 @@ void NENormalizationLayerKernel::configure(const ITensor *input, const ITensor *
     _norm_info     = norm_info;
     _border_size   = BorderSize(0, border_width);
 
-    unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size();
-    ARM_COMPUTE_UNUSED(num_elems_processed_per_iteration);
-
     switch(_input->info()->data_type())
     {
         case DataType::F32:
         {
-            num_elems_processed_per_iteration = 4;
             switch(norm_info.type())
             {
                 case NormType::IN_MAP_1D:
@@ -90,14 +139,12 @@ void NENormalizationLayerKernel::configure(const ITensor *input, const ITensor *
                     _func = &NENormalizationLayerKernel::normalize_float<DataType::F32, 2, false>;
                     break;
                 default:
-                    ARM_COMPUTE_ERROR("Not supported");
                     break;
             }
             break;
         }
         case DataType::F16:
         {
-            num_elems_processed_per_iteration = 8;
             switch(norm_info.type())
             {
                 case NormType::IN_MAP_1D:
@@ -111,14 +158,12 @@ void NENormalizationLayerKernel::configure(const ITensor *input, const ITensor *
                     _func = &NENormalizationLayerKernel::normalize_float<DataType::F16, 2, false>;
                     break;
                 default:
-                    ARM_COMPUTE_ERROR("Not supported");
                     break;
             }
             break;
         }
         case DataType::QS8:
         {
-            num_elems_processed_per_iteration = 16;
             switch(norm_info.type())
             {
                 case NormType::IN_MAP_1D:
@@ -132,14 +177,12 @@ void NENormalizationLayerKernel::configure(const ITensor *input, const ITensor *
                     _func = &NENormalizationLayerKernel::normalize_fixed_point<DataType::QS8, 2, false>;
                     break;
                 default:
-                    ARM_COMPUTE_ERROR("Not supported");
                     break;
             }
             break;
         }
         case DataType::QS16:
         {
-            num_elems_processed_per_iteration = 8;
             switch(norm_info.type())
             {
                 case NormType::IN_MAP_1D:
@@ -153,7 +196,6 @@ void NENormalizationLayerKernel::configure(const ITensor *input, const ITensor *
                     _func = &NENormalizationLayerKernel::normalize_fixed_point<DataType::QS16, 2, false>;
                     break;
                 default:
-                    ARM_COMPUTE_ERROR("Not supported");
                     break;
             }
             break;
@@ -162,21 +204,10 @@ void NENormalizationLayerKernel::configure(const ITensor *input, const ITensor *
             ARM_COMPUTE_ERROR("NOT SUPPORTED!");
     }
 
-    const unsigned int num_elems_read_per_iteration = num_elems_processed_per_iteration + 2 * (norm_info.norm_size() / 2);
-    const unsigned int num_rows                     = (norm_info.type() == NormType::IN_MAP_2D) ? norm_info.norm_size() : 1;
-
-    // Configure window
-    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
-
-    AccessWindowRectangle  input_access(input->info(), -_border_size.left, 0, num_elems_read_per_iteration, num_rows);
-    AccessWindowRectangle  input_squared_access(input_squared->info(), -_border_size.left, 0, num_elems_read_per_iteration, num_rows);
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
-    update_window_and_padding(win, input_access, input_squared_access, output_access);
-
-    output_access.set_valid_region(win, input->info()->valid_region());
-
-    INEKernel::configure(win);
+    // Configure kernel window
+    auto win_config = validate_and_configure_window(input->info(), input_squared->info(), output->info(), norm_info);
+    ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+    INEKernel::configure(win_config.second);
 }
 
 template <DataType dt, unsigned int dim, bool do_2D_norm>
@@ -374,6 +405,14 @@ void NENormalizationLayerKernel::normalize_fixed_point(const Window &window)
     }
 }
 
+Status NENormalizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *input_squared, const ITensorInfo *output, const NormalizationLayerInfo norm_info)
+{
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, input_squared, output, norm_info));
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), input_squared->clone().get(), output->clone().get(), norm_info).first);
+
+    return Status{};
+}
+
 void NENormalizationLayerKernel::run(const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_UNUSED(info);
diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
index 0e06704666..47372c2d5d 100644
--- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
@@ -47,6 +47,15 @@ using namespace arm_compute;
 
 namespace
 {
+void auto_init(const ITensorInfo *input, ITensorInfo *output, unsigned int pooled_w, unsigned int pooled_h)
+{
+    TensorShape output_shape{ input->tensor_shape() };
+    output_shape.set(0, pooled_w);
+    output_shape.set(1, pooled_h);
+
+    auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape));
+}
+
 template <bool exclude_padding>
 inline float calculate_avg_scale(const Coordinates &id, const int pool_size, const int upper_bound_w, const int upper_bound_h,
                                  const int pad_x, const int pad_y, const int stride_x, const int stride_y)
@@ -88,75 +97,77 @@ inline qint16_t calculate_avg_scale_q16(const Coordinates &id, int pool_size, in
     const int val     = ((end_y - start_y) * (end_x - start_x));
     return sshr_qs16(scale_values_q16[val], (15 - fixed_point_position));
 }
-} // namespace
-
-NEPoolingLayerKernel::NEPoolingLayerKernel()
-    : _func(nullptr), _input(nullptr), _output(nullptr), _pool_info(), _num_elems_processed_per_iteration(0), _border_size(0)
-{
-}
 
-BorderSize NEPoolingLayerKernel::border_size() const
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, unsigned int &pooled_w, unsigned int pooled_h, int pool_size)
 {
-    return _border_size;
-}
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
 
-void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info)
-{
     int                 pool_pad_x        = 0;
     int                 pool_pad_y        = 0;
     int                 pool_stride_x     = 0;
     int                 pool_stride_y     = 0;
-    unsigned int        pooled_w          = 0;
-    unsigned int        pooled_h          = 0;
     PoolingType         pool_type         = pool_info.pool_type();
-    int                 pool_size         = pool_info.pool_size();
     const PadStrideInfo pad_stride_info   = pool_info.pad_stride_info();
     const bool          exclude_padding   = pool_info.exclude_padding();
     const bool          is_global_pooling = pool_info.is_global_pooling();
     std::tie(pool_pad_x, pool_pad_y)       = pad_stride_info.pad();
     std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
-
     static const std::set<int> supported_pool_sizes = { 2, 3 };
-    ARM_COMPUTE_UNUSED(supported_pool_sizes);
 
-    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON(pool_type == PoolingType::L2 && is_data_type_fixed_point(input->info()->data_type()));
-    ARM_COMPUTE_ERROR_ON((supported_pool_sizes.find(pool_size) == supported_pool_sizes.end()) && (input->info()->data_type() != DataType::F32));
-    ARM_COMPUTE_ERROR_ON(!is_global_pooling && (pool_pad_x >= pool_size || pool_pad_y >= pool_size));
-    ARM_COMPUTE_ERROR_ON(is_global_pooling && (input->info()->tensor_shape().x() != input->info()->tensor_shape().y()));
-    ARM_COMPUTE_ERROR_ON(is_data_type_fixed_point(input->info()->data_type()) && pool_stride_x > 2);
-    ARM_COMPUTE_ERROR_ON(exclude_padding && is_data_type_fixed_point(input->info()->data_type()));
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON(pool_type == PoolingType::L2 && is_data_type_fixed_point(input->data_type()));
+    ARM_COMPUTE_RETURN_ERROR_ON((supported_pool_sizes.find(pool_size) == supported_pool_sizes.end()) && (input->data_type() != DataType::F32));
+    ARM_COMPUTE_RETURN_ERROR_ON(!is_global_pooling && (pool_pad_x >= pool_size || pool_pad_y >= pool_size));
+    ARM_COMPUTE_RETURN_ERROR_ON(is_global_pooling && (input->tensor_shape().x() != input->tensor_shape().y()));
+    ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_fixed_point(input->data_type()) && pool_stride_x > 2);
+    ARM_COMPUTE_RETURN_ERROR_ON(exclude_padding && is_data_type_fixed_point(input->data_type()));
 
-    // Update pool size in case of global pooling
-    pool_size = is_global_pooling ? input->info()->dimension(0) : pool_size;
+    if(output->total_size() != 0)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON((output->dimension(0) != pooled_w) || (output->dimension(1) != pooled_h));
+    }
 
-    // Check output dimensions
-    std::tie(pooled_w, pooled_h) = scaled_dimensions(input->info()->dimension(0),
-                                                     input->info()->dimension(1),
-                                                     pool_size,
-                                                     pool_size,
-                                                     pool_info.pad_stride_info());
+    return Status{};
+}
 
-    // Output auto initialization if not yet initialized
-    {
-        TensorShape output_shape{ input->info()->tensor_shape() };
-        output_shape.set(0, pooled_w);
-        output_shape.set(1, pooled_h);
+Status validate_arguments_pool_info(const ITensorInfo *input, const PoolingLayerInfo &pool_info, const unsigned int pool_size)
+{
+    const bool is_global_pooling = pool_info.is_global_pooling();
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_global_pooling && (input->tensor_shape().x() != input->tensor_shape().y()),
+                                    "Global pooling is supported only with rectangular inputs!");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(!is_global_pooling && ((pool_info.pad_stride_info().pad().first >= pool_size) || (pool_info.pad_stride_info().pad().second >= pool_size)),
+                                    "Invalid pool size and pool pad combination!");
 
-        auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
-    }
+    return Status{};
+}
 
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
-    ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) != pooled_w) || (output->info()->dimension(1) != pooled_h));
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const PoolingLayerInfo &pool_info, unsigned int &num_elems_processed_per_iteration,
+                                                        BorderSize &border_size,
+                                                        unsigned int pooled_w, unsigned int pooled_h, int pool_size)
+{
+    unsigned int        num_elems_read_per_iteration = 0;
+    unsigned int        num_elems_horizontal_window  = 0;
+    int                 pool_pad_x                   = 0;
+    int                 pool_pad_y                   = 0;
+    int                 pool_stride_x                = 0;
+    int                 pool_stride_y                = 0;
+    const int           input_width                  = input->dimension(0);
+    const int           input_height                 = input->dimension(1);
+    const PadStrideInfo pad_stride_info              = pool_info.pad_stride_info();
+    std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
+    std::tie(pool_pad_x, pool_pad_y)       = pad_stride_info.pad();
 
-    unsigned int num_elems_read_per_iteration      = 0;
-    unsigned int num_elems_processed_per_iteration = 0;
-    unsigned int num_elems_horizontal_window       = 0;
+    // Check output dimensions
+    std::tie(pooled_w, pooled_h) = scaled_dimensions(input->dimension(0),
+                                                     input->dimension(1),
+                                                     pool_size,
+                                                     pool_size,
+                                                     pad_stride_info);
 
     // Select element size
-    switch(input->info()->data_type())
+    switch(input->data_type())
     {
         case DataType::QS8:
             num_elems_read_per_iteration = 16;
@@ -233,19 +244,89 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons
             break;
     }
 
-    _num_elems_processed_per_iteration = num_elems_processed_per_iteration;
-    const int input_width              = input->info()->dimension(0);
-    const int input_height             = input->info()->dimension(1);
-    const int upper_bound_w            = ((pooled_w - 1) * pool_stride_x - pool_pad_x + num_elems_read_per_iteration) - input_width;
-    const int upper_bound_h            = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height;
+    const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + num_elems_read_per_iteration) - input_width;
+    const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height;
+
+    border_size         = BorderSize(pool_pad_y, pool_pad_x);
+    border_size.right   = std::max(upper_bound_w, pool_pad_x);
+    border_size.bottom  = std::max(upper_bound_h, pool_pad_y);
+    bool window_changed = false;
+
+    TensorShape output_shape{ input->tensor_shape() };
+    output_shape.set(0, pooled_w);
+    output_shape.set(1, pooled_h);
+    TensorInfo output_info(input->clone()->set_tensor_shape(output_shape));
+
+    Window             win = calculate_max_window(output_info, Steps(num_elems_processed_per_iteration));
+    AccessWindowStatic input_access(input, -pool_pad_x, -pool_pad_y, input_width + border_size.right, input_height + border_size.bottom);
+
+    if(output->total_size() != 0)
+    {
+        AccessWindowHorizontal output_access(output, 0, num_elems_horizontal_window);
+        window_changed = update_window_and_padding(win, input_access, output_access);
+        output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
+    }
+    else
+    {
+        window_changed = update_window_and_padding(win, input_access);
+    }
+
+    Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+    return std::make_pair(err, win);
+}
+} // namespace
+
+NEPoolingLayerKernel::NEPoolingLayerKernel()
+    : _func(nullptr), _input(nullptr), _output(nullptr), _pool_info(), _num_elems_processed_per_iteration(0), _border_size(0)
+{
+}
+
+BorderSize NEPoolingLayerKernel::border_size() const
+{
+    return _border_size;
+}
+
+void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info)
+{
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+    int                 pool_pad_x        = 0;
+    int                 pool_pad_y        = 0;
+    int                 pool_stride_x     = 0;
+    int                 pool_stride_y     = 0;
+    unsigned int        pooled_w          = 0;
+    unsigned int        pooled_h          = 0;
+    PoolingType         pool_type         = pool_info.pool_type();
+    int                 pool_size         = pool_info.pool_size();
+    const PadStrideInfo pad_stride_info   = pool_info.pad_stride_info();
+    const bool          exclude_padding   = pool_info.exclude_padding();
+    const bool          is_global_pooling = pool_info.is_global_pooling();
+    std::tie(pool_pad_x, pool_pad_y)       = pad_stride_info.pad();
+    std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
+
+    // Update pool size in case of global pooling
+    pool_size = is_global_pooling ? input->info()->dimension(0) : pool_size;
+
+    // Validate pool info before calling scaled_dimensions
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_pool_info(input->info(), pool_info, pool_size));
+
+    // Check output dimensions
+    std::tie(pooled_w, pooled_h) = scaled_dimensions(input->info()->dimension(0),
+                                                     input->info()->dimension(1),
+                                                     pool_size,
+                                                     pool_size,
+                                                     pool_info.pad_stride_info());
+
+    // Output auto initialization if not yet initialized
+    auto_init(input->info(), output->info(), pooled_w, pooled_h);
+
+    // Perform validation step
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info, pooled_w, pooled_h, pool_size));
 
     // Set instance variables
-    _input              = input;
-    _output             = output;
-    _pool_info          = pool_info;
-    _border_size        = BorderSize(pool_pad_y, pool_pad_x);
-    _border_size.right  = std::max(upper_bound_w, pool_pad_x);
-    _border_size.bottom = std::max(upper_bound_h, pool_pad_y);
+    _input     = input;
+    _output    = output;
+    _pool_info = pool_info;
 
     // Select appropriate function
     switch(pool_size)
@@ -413,12 +494,9 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons
     }
 
     // Configure kernel window
-    Window                 win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
-    AccessWindowStatic     input_access(input->info(), -pool_pad_x, -pool_pad_y, input_width + _border_size.right, input_height + _border_size.bottom);
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_horizontal_window);
-    update_window_and_padding(win, input_access, output_access);
-    output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-    INEKernel::configure(win);
+    auto win_config = validate_and_configure_window(input->info(), output->info(), pool_info, _num_elems_processed_per_iteration, _border_size, pooled_w, pooled_h, pool_size);
+    ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+    INEKernel::configure(win_config.second);
 }
 
 template <PoolingType pooling_type>
@@ -1154,6 +1232,34 @@ void NEPoolingLayerKernel::poolingN_f32(const Window &window_input, const Window
     input, output);
 }
 
+Status NEPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
+
+    unsigned int pooled_w                          = 0;
+    unsigned int pooled_h                          = 0;
+    unsigned int num_elems_processed_per_iteration = 0;
+    BorderSize   border_size(0);
+
+    const bool         is_global_pooling = pool_info.is_global_pooling();
+    const unsigned int pool_size         = is_global_pooling ? input->tensor_shape().x() : pool_info.pool_size();
+
+    // Validate pool info befor calling scaled_dimensions
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_pool_info(input, pool_info, pool_size));
+
+    // Check output dimensions
+    std::tie(pooled_w, pooled_h) = scaled_dimensions(input->dimension(0),
+                                                     input->dimension(1),
+                                                     pool_size,
+                                                     pool_size,
+                                                     pool_info.pad_stride_info());
+
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, pool_info, pooled_w, pooled_h, pool_size));
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), pool_info, num_elems_processed_per_iteration, border_size, pooled_w, pooled_h, pool_size).first);
+
+    return Status{};
+}
+
 void NEPoolingLayerKernel::run(const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_UNUSED(info);
diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
index a8a0f59a41..b13fb0e87c 100644
--- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
+++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
@@ -42,6 +42,149 @@ using namespace arm_compute;
 
 namespace
 {
+Status validate_arguments_logits_1d_max(const ITensorInfo *input, const ITensorInfo *output)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+
+    // Checks performed when output is configured
+    if(output->total_size() != 0)
+    {
+        // Softmax across the x dimension
+        TensorShape output_shape{ input->tensor_shape() };
+        output_shape.set(0, 1);
+
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
+    }
+
+    return Status{};
+}
+
+std::pair<Status, Window> validate_and_configure_window_logits_1d_max(ITensorInfo *input, ITensorInfo *output)
+{
+    // Configure kernel window
+    constexpr unsigned int num_elems_written_per_row = 1;
+    const int              input_width               = input->valid_region().shape.x();
+
+    unsigned int           num_elems_processed_per_iteration = 16 / data_size_from_type(input->data_type());
+    Window                 win                               = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
+    AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
+    bool                   window_changed = false;
+
+    if(output->total_size() != 0)
+    {
+        AccessWindowHorizontal output_access(output, 0, num_elems_written_per_row, 1.f / input_width);
+        window_changed = update_window_and_padding(win, input_access, output_access);
+        output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
+    }
+    else
+    {
+        window_changed = update_window_and_padding(win, input_access);
+    }
+
+    Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+    return std::make_pair(err, win);
+}
+
+Status validate_arguments_logits_1d_shift_exp_sum(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum, float beta)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, max, sum, output);
+    ARM_COMPUTE_RETURN_ERROR_ON((beta != 1.0f) && is_data_type_fixed_point(input->data_type()));
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+
+    // Checks performed when output is configured
+    if(output->total_size() != 0)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output);
+    }
+
+    // Checks performed when sum is configured
+    if(sum->total_size() != 0)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, max, sum);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(max, sum);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, max, sum);
+    }
+
+    return Status{};
+}
+
+std::pair<Status, Window> validate_and_configure_window_logits_1d_shift_exp_sum(ITensorInfo *input, ITensorInfo *max, ITensorInfo *output, ITensorInfo *sum)
+{
+    unsigned int num_elems_processed_per_iteration = input->valid_region().shape.x();
+
+    // Configure kernel window
+    Window                 win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
+    AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal max_access(max, 0, 1);
+    AccessWindowHorizontal sum_access(sum, 0, 1);
+    bool                   window_changed = false;
+
+    if(output->total_size() != 0)
+    {
+        AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
+        window_changed = update_window_and_padding(win, input_access, max_access, output_access, sum_access);
+        output_access.set_valid_region(win, input->valid_region());
+    }
+    else
+    {
+        window_changed = update_window_and_padding(win, input_access, max_access, sum_access);
+    }
+
+    sum_access.set_valid_region(win, ValidRegion(Coordinates(), sum->tensor_shape()));
+
+    Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+    return std::make_pair(err, win);
+}
+
+Status validate_arguments_logits_1d_norm(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, sum, output);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::S32, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, sum);
+
+    // Checks performed when output is configured
+    if(output->total_size() != 0)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output);
+    }
+
+    return Status{};
+}
+
+std::pair<Status, Window> validate_and_configure_window_logits_1d_norm(ITensorInfo *input, ITensorInfo *sum, ITensorInfo *output)
+{
+    // Configure kernel window
+    unsigned int num_elems_processed_per_iteration = 16 / data_size_from_type(input->data_type());
+    Window       win                               = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
+
+    AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
+    AccessWindowStatic     sum_access(sum, 0, 0, 1, sum->dimension(1));
+    bool                   window_changed = false;
+
+    if(output->total_size() != 0)
+    {
+        AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
+
+        window_changed = update_window_and_padding(win, input_access, sum_access, output_access);
+
+        output_access.set_valid_region(win, input->valid_region());
+    }
+    else
+    {
+        window_changed = update_window_and_padding(win, input_access, sum_access);
+    }
+    Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+    return std::make_pair(err, win);
+}
+
 void logits_1d_max_qs8(const ITensor *in, ITensor *out, const Window &window)
 {
     Window in_slice = window.first_slice_window_1D();
@@ -184,8 +327,7 @@ BorderSize NELogits1DMaxKernel::border_size() const
 
 void NELogits1DMaxKernel::configure(const ITensor *input, ITensor *output)
 {
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
 
     // Softmax across the x dimension
     TensorShape output_shape{ input->info()->tensor_shape() };
@@ -194,9 +336,8 @@ void NELogits1DMaxKernel::configure(const ITensor *input, ITensor *output)
     // Output auto initialization if not yet initialized
     auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
 
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
+    // Perform validation step
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_logits_1d_max(input->info(), output->info()));
 
     const int    input_width                       = input->info()->valid_region().shape.x();
     unsigned int num_elems_processed_per_iteration = 16 / data_size_from_type(input->info()->data_type());
@@ -226,17 +367,17 @@ void NELogits1DMaxKernel::configure(const ITensor *input, ITensor *output)
     _border_size = BorderSize(0, num_elems_processed_per_iteration - (input_width % num_elems_processed_per_iteration), 0, 0);
 
     // Configure kernel window
-    constexpr unsigned int num_elems_written_per_row = 1;
-
-    Window                 win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
-    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_row, 1.f / input_width);
-
-    update_window_and_padding(win, input_access, output_access);
+    auto win_config = validate_and_configure_window_logits_1d_max(input->info(), output->info());
+    ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+    INEKernel::configure(win_config.second);
+}
 
-    output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
+Status NELogits1DMaxKernel::validate(const ITensorInfo *input, const ITensorInfo *output)
+{
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_logits_1d_max(input, output));
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_logits_1d_max(input->clone().get(), output->clone().get()).first);
 
-    INEKernel::configure(win);
+    return Status{};
 }
 
 void NELogits1DMaxKernel::run(const Window &window, const ThreadInfo &info)
@@ -512,20 +653,14 @@ NELogits1DShiftExpSumKernel::NELogits1DShiftExpSumKernel()
 
 void NELogits1DShiftExpSumKernel::configure(const ITensor *input, const ITensor *max, ITensor *output, ITensor *sum, float beta)
 {
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(max, sum, output);
-    ARM_COMPUTE_ERROR_ON((beta != 1.0f) && is_data_type_fixed_point(input->info()->data_type()));
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, max, sum, output);
 
     // Output auto initialization if not yet initialized
     auto_init_if_empty(*sum->info(), max->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
     auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
 
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, max, sum);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output, max, sum);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(max, sum);
-
-    unsigned int num_elems_processed_per_iteration = input->info()->valid_region().shape.x();
+    // Perform validation step
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_logits_1d_shift_exp_sum(input->info(), max->info(), output->info(), sum->info(), beta));
 
     switch(input->info()->data_type())
     {
@@ -555,18 +690,17 @@ void NELogits1DShiftExpSumKernel::configure(const ITensor *input, const ITensor
     _beta   = beta;
 
     // Configure kernel window
-    Window                 win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
-    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
-    AccessWindowHorizontal max_access(max->info(), 0, 1);
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-    AccessWindowHorizontal sum_access(sum->info(), 0, 1);
-
-    update_window_and_padding(win, input_access, max_access, output_access, sum_access);
+    auto win_config = validate_and_configure_window_logits_1d_shift_exp_sum(input->info(), max->info(), output->info(), sum->info());
+    ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+    INEKernel::configure(win_config.second);
+}
 
-    output_access.set_valid_region(win, input->info()->valid_region());
-    sum_access.set_valid_region(win, ValidRegion(Coordinates(), sum->info()->tensor_shape()));
+Status NELogits1DShiftExpSumKernel::validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum, float beta)
+{
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_logits_1d_shift_exp_sum(input, max, output, sum, beta));
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_logits_1d_shift_exp_sum(input->clone().get(), max->clone().get(), output->clone().get(), sum->clone().get()).first);
 
-    INEKernel::configure(win);
+    return Status{};
 }
 
 void NELogits1DShiftExpSumKernel::run(const Window &window, const ThreadInfo &info)
@@ -717,23 +851,18 @@ NELogits1DNormKernel::NELogits1DNormKernel()
 
 void NELogits1DNormKernel::configure(const ITensor *input, const ITensor *sum, ITensor *output)
 {
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(sum, output);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output);
 
     // Output auto initialization if not yet initialized
     auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
 
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum, output);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, sum, output);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
+    // Perform validation step
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_logits_1d_norm(input->info(), sum->info(), output->info()));
 
     _input  = input;
     _sum    = sum;
     _output = output;
 
-    // Configure kernel window
-    unsigned int num_elems_processed_per_iteration = 16 / data_size_from_type(input->info()->data_type());
-
     switch(input->info()->data_type())
     {
         case DataType::QS8:
@@ -755,17 +884,18 @@ void NELogits1DNormKernel::configure(const ITensor *input, const ITensor *sum, I
             break;
     }
 
-    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
-
-    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
-    AccessWindowStatic     sum_access(sum->info(), 0, 0, 1, sum->info()->dimension(1));
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
-    update_window_and_padding(win, input_access, sum_access, output_access);
+    // Configure kernel window
+    auto win_config = validate_and_configure_window_logits_1d_norm(input->info(), sum->info(), output->info());
+    ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+    INEKernel::configure(win_config.second);
+}
 
-    output_access.set_valid_region(win, input->info()->valid_region());
+Status NELogits1DNormKernel::validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output)
+{
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_logits_1d_norm(input, sum, output));
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_logits_1d_norm(input->clone().get(), sum->clone().get(), output->clone().get()).first);
 
-    INEKernel::configure(win);
+    return Status{};
 }
 
 void NELogits1DNormKernel::run(const Window &window, const ThreadInfo &info)
diff --git a/src/runtime/NEON/functions/NEActivationLayer.cpp b/src/runtime/NEON/functions/NEActivationLayer.cpp
index 57a1738f85..cdf1b54659 100644
--- a/src/runtime/NEON/functions/NEActivationLayer.cpp
+++ b/src/runtime/NEON/functions/NEActivationLayer.cpp
@@ -34,3 +34,8 @@ void NEActivationLayer::configure(ITensor *input, ITensor *output, ActivationLay
     k->configure(input, output, activation_info);
     _kernel = std::move(k);
 }
+
+Status NEActivationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
+{
+    return NEActivationLayerKernel::validate(input, output, act_info);
+}
diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
index 52a4cc158f..2eabe459a5 100644
--- a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
@@ -85,6 +85,30 @@ void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights,
     _input_border_handler.configure(input, _conv_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<float>(0.f)));
 }
 
+Status NEDirectConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &conv_info)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, bias, output);
+
+    DataType data_type = output->data_type();
+    if(is_data_type_fixed_point(data_type))
+    {
+        // Promote data type in case of fixed point
+        data_type = ((data_type == DataType::QS8) ? DataType::QS16 : DataType::QS32);
+    }
+    TensorInfo accumulator(output->clone()->set_is_resizable(true).reset_padding().set_data_type(data_type));
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, bias);
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(bias->dimension(0) != weights->dimension(3),
+                                    "Biases size and number of input feature maps should match");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(bias->num_dimensions() > 1,
+                                    "Biases should be one dimensional");
+
+    ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerKernel::validate(input, weights, &accumulator, conv_info));
+    ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerBiasAccumulateKernel::validate(&accumulator, bias, output));
+
+    return Status{};
+}
+
 void NEDirectConvolutionLayer::run()
 {
     NEScheduler::get().schedule(&_input_border_handler, Window::DimZ);
diff --git a/src/runtime/NEON/functions/NENormalizationLayer.cpp b/src/runtime/NEON/functions/NENormalizationLayer.cpp
index da4314b5ed..af98ac1f17 100644
--- a/src/runtime/NEON/functions/NENormalizationLayer.cpp
+++ b/src/runtime/NEON/functions/NENormalizationLayer.cpp
@@ -39,7 +39,7 @@ NENormalizationLayer::NENormalizationLayer(std::shared_ptr<IMemoryManager> memor
 
 void NENormalizationLayer::configure(const ITensor *input, ITensor *output, const NormalizationLayerInfo &norm_info)
 {
-    ARM_COMPUTE_ERROR_ON(input == nullptr);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
 
     TensorInfo tensor_info(input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
     _input_squared.allocator()->init(tensor_info);
@@ -56,6 +56,17 @@ void NENormalizationLayer::configure(const ITensor *input, ITensor *output, cons
     _input_squared.allocator()->allocate();
 }
 
+Status NENormalizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const NormalizationLayerInfo &norm_info)
+{
+    // Perform validation step
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+
+    ARM_COMPUTE_RETURN_ON_ERROR(NENormalizationLayerKernel::validate(input, input, output, norm_info));
+    ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplicationKernel::validate(input, input, output, 1.0f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO));
+
+    return Status{};
+}
+
 void NENormalizationLayer::run()
 {
     _memory_group.acquire();
diff --git a/src/runtime/NEON/functions/NEPoolingLayer.cpp b/src/runtime/NEON/functions/NEPoolingLayer.cpp
index f8a85b9897..530c7fca4a 100644
--- a/src/runtime/NEON/functions/NEPoolingLayer.cpp
+++ b/src/runtime/NEON/functions/NEPoolingLayer.cpp
@@ -48,6 +48,11 @@ void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLay
     _border_handler.configure(input, _pooling_layer_kernel.border_size(), border_mode, PixelValue(static_cast<float>(0.f)));
 }
 
+Status NEPoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
+{
+    return NEPoolingLayerKernel::validate(input, output, pool_info);
+}
+
 void NEPoolingLayer::run()
 {
     // Fill border
diff --git a/src/runtime/NEON/functions/NESoftmaxLayer.cpp b/src/runtime/NEON/functions/NESoftmaxLayer.cpp
index 84ecfdaf33..8e6773c5b1 100644
--- a/src/runtime/NEON/functions/NESoftmaxLayer.cpp
+++ b/src/runtime/NEON/functions/NESoftmaxLayer.cpp
@@ -38,7 +38,7 @@ NESoftmaxLayer::NESoftmaxLayer(std::shared_ptr<IMemoryManager> memory_manager)
 
 void NESoftmaxLayer::configure(ITensor *input, ITensor *output, float beta)
 {
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
 
     // Create intermediate tensors shapes
     TensorInfo tensor_info_tmp(input->info()->tensor_shape(), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position());
@@ -67,6 +67,23 @@ void NESoftmaxLayer::configure(ITensor *input, ITensor *output, float beta)
     _sum.allocator()->allocate();
 }
 
+Status NESoftmaxLayer::validate(const ITensorInfo *input, const ITensorInfo *output, float beta)
+{
+    // Perform validation step
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+
+    TensorShape max_sum_shape = input->tensor_shape();
+    max_sum_shape.set(0, 1);
+
+    TensorInfo tensor_info_max_sum(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(max_sum_shape));
+
+    ARM_COMPUTE_RETURN_ON_ERROR(NELogits1DMaxKernel::validate(input, &tensor_info_max_sum));
+    ARM_COMPUTE_RETURN_ON_ERROR(NELogits1DShiftExpSumKernel::validate(input, &tensor_info_max_sum, input, &tensor_info_max_sum, beta));
+    ARM_COMPUTE_RETURN_ON_ERROR(NELogits1DNormKernel::validate(input, &tensor_info_max_sum, output));
+
+    return Status{};
+}
+
 void NESoftmaxLayer::run()
 {
     _memory_group.acquire();
diff --git a/tests/validation/NEON/ActivationLayer.cpp b/tests/validation/NEON/ActivationLayer.cpp
index b13cb59b9a..6ed482e4e7 100644
--- a/tests/validation/NEON/ActivationLayer.cpp
+++ b/tests/validation/NEON/ActivationLayer.cpp
@@ -137,6 +137,36 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(conca
     }
 }
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+    framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),     // Mismatching data types
+                                            TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),     // Mismatching shapes
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 2),  // Mismatching fixed point
+                                            TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                          }),
+    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16),
+                                            TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+                                            TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 3),
+                                            TensorInfo(),
+                                          })),
+    framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                                 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                                 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                                 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                                 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                               })),
+    framework::dataset::make("Expected", { false, true, false, false, true })),
+    input_info, output_info, act_info, expected)
+{
+    bool is_valid = bool(NEActivationLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), act_info));
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
 template <typename T>
 using NEActivationLayerFixture = ActivationValidationFixture<Tensor, Accessor, NEActivationLayer, T>;
 
diff --git a/tests/validation/NEON/DirectConvolutionLayer.cpp b/tests/validation/NEON/DirectConvolutionLayer.cpp
index 52e2b2c034..cd23ce4bc3 100644
--- a/tests/validation/NEON/DirectConvolutionLayer.cpp
+++ b/tests/validation/NEON/DirectConvolutionLayer.cpp
@@ -91,6 +91,68 @@ const auto data_qs16 = combine(datasets::SmallDirectConvolutionShapes(),
 TEST_SUITE(NEON)
 TEST_SUITE(DirectConvolutionLayer)
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
+        framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching data type input/weights
+                                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching input feature maps
+                                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Unsupported kernel width
+                                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Non-rectangular weights dimensions
+                                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid weights dimensions
+                                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid stride
+                                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid biases size
+                                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid biases dimensions
+                                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid output size
+                                              }),
+        framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F16, 0),
+                                                 TensorInfo(TensorShape(3U, 3U, 3U, 4U), 1, DataType::F32, 0),
+                                                 TensorInfo(TensorShape(9U, 9U, 2U, 4U), 1, DataType::F32, 0),
+                                                 TensorInfo(TensorShape(5U, 3U, 2U, 4U), 1, DataType::F32, 0),
+                                                 TensorInfo(TensorShape(3U, 3U, 2U, 4U, 3U), 1, DataType::F32, 0),
+                                                 TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
+                                                 TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
+                                                 TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
+                                                 TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
+                                              })),
+        framework::dataset::make("BiasesInfo",{ TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(3U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(4U, 2U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                              })),
+        framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(26U, 11U, 4U), 1, DataType::F32, 0),
+                                              })),
+        framework::dataset::make("ConvInfo",  { PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(3, 3, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                               })),
+        framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false })),
+        input_info, weights_info, biases_info, output_info, conv_info, expected)
+{
+        bool is_valid = bool(NEDirectConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info));
+        ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
 //TODO(COMPMID-415): Configuration tests?
 
 template <typename T>
diff --git a/tests/validation/NEON/NormalizationLayer.cpp b/tests/validation/NEON/NormalizationLayer.cpp
index 3afa52cb4c..4d504a8972 100644
--- a/tests/validation/NEON/NormalizationLayer.cpp
+++ b/tests/validation/NEON/NormalizationLayer.cpp
@@ -66,6 +66,42 @@ TEST_SUITE(NormalizationLayer)
 
 //TODO(COMPMID-415): Missing configuration?
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+    framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching data type input/output
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching shapes
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Even normalization
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Non implemented IN_MAP_2D
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 4), // Mismatching fixed point position
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Window shrink
+                                            TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32, 0),
+                                          }),
+    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16, 0),
+                                            TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 3),
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32, 0),
+                                          })),
+    framework::dataset::make("NormInfo",  { NormalizationLayerInfo(NormType::IN_MAP_1D, 5),
+                                            NormalizationLayerInfo(NormType::IN_MAP_1D, 5),
+                                            NormalizationLayerInfo(NormType::IN_MAP_1D, 4),
+                                            NormalizationLayerInfo(NormType::IN_MAP_2D, 5),
+                                            NormalizationLayerInfo(NormType::IN_MAP_1D, 5),
+                                            NormalizationLayerInfo(NormType::IN_MAP_1D, 5),
+                                            NormalizationLayerInfo(NormType::CROSS_MAP, 1),
+                                           })),
+    framework::dataset::make("Expected", { false, false, false, false, false, false, true })),
+    input_info, output_info, norm_info, expected)
+{
+    bool is_valid = bool(NENormalizationLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), norm_info));
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
 template <typename T>
 using NENormalizationLayerFixture = NormalizationValidationFixture<Tensor, Accessor, NENormalizationLayer, T>;
 
diff --git a/tests/validation/NEON/PoolingLayer.cpp b/tests/validation/NEON/PoolingLayer.cpp
index 78c326975c..79a732a59a 100644
--- a/tests/validation/NEON/PoolingLayer.cpp
+++ b/tests/validation/NEON/PoolingLayer.cpp
@@ -66,6 +66,48 @@ TEST_SUITE(PoolingLayer)
 
 //TODO(COMPMID-415): Configuration tests?
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+    framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),     // Mismatching data type
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),     // Window shrink
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 4),     // Mismatching fixed point position
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS16, 11),   // Window shrink
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),     // Invalid pad/size combination
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),     // Invalid pad/size combination
+                                            TensorInfo(TensorShape(15U, 13U, 5U), 1, DataType::F32, 0),     // Non-rectangular Global Pooling
+                                            TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32, 0),     // Invalid output Global Pooling
+                                            TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32, 0),
+                                          }),
+    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16, 0),
+                                            TensorInfo(TensorShape(25U, 10U, 2U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::QS8, 5),
+                                            TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::QS16, 11),
+                                            TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(25U, 16U, 2U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(1U, 1U, 5U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(2U, 2U, 5U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 0),
+                                          })),
+    framework::dataset::make("PoolInfo",  { PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)),
+                                            PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)),
+                                            PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)),
+                                            PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)),
+                                            PoolingLayerInfo(PoolingType::AVG, 2, PadStrideInfo(1, 1, 2, 0)),
+                                            PoolingLayerInfo(PoolingType::AVG, 2, PadStrideInfo(1, 1, 0, 2)),
+                                            PoolingLayerInfo(PoolingType::AVG),
+                                            PoolingLayerInfo(PoolingType::MAX),
+                                            PoolingLayerInfo(PoolingType::AVG),
+                                           })),
+    framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false, true })),
+    input_info, output_info, pool_info, expected)
+{
+    bool is_valid = bool(NEPoolingLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pool_info));
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
 template <typename T>
 using NEPoolingLayerFixture = PoolingLayerValidationFixture<Tensor, Accessor, NEPoolingLayer, T>;
 
diff --git a/tests/validation/NEON/SoftmaxLayer.cpp b/tests/validation/NEON/SoftmaxLayer.cpp
index 1a303e14a3..0b688dfd1b 100644
--- a/tests/validation/NEON/SoftmaxLayer.cpp
+++ b/tests/validation/NEON/SoftmaxLayer.cpp
@@ -93,6 +93,28 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datase
     validate(dst.info()->padding(), padding);
 }
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
+    framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Mismatching data types
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Mismatching shapes
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 2), // Mismatching fixed point
+                                            TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32),
+                                           }),
+    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16),
+                                            TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32),
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 3),
+                                            TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32),
+                                          })),
+    framework::dataset::make("Expected", { false, false, false, true })),
+    input_info, output_info, expected)
+{
+    bool is_valid = bool(NESoftmaxLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false)));
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
 template <typename T>
 using NESoftmaxLayerFixture = SoftmaxValidationFixture<Tensor, Accessor, NESoftmaxLayer, T>;
author	Michalis Spyrou <michalis.spyrou@arm.com>	2017-11-30 14:25:57 +0000
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:42:17 +0000
commit	afa5d817b1d083837cd7ea30d32f845d82620c12 (patch)
tree	1ca2a27ab7108b7137b96fc1547a8b5ac5d9c8f7
parent	631c41a4e3645a948b0f597caa77e8fa91ca0efc (diff)
download	ComputeLibrary-afa5d817b1d083837cd7ea30d32f845d82620c12.tar.gz