From 27c08abe6947b1ee5b266799f2bb2bf0a05d0def Mon Sep 17 00:00:00 2001
From: Alex Gilday <alexander.gilday@arm.com>
Date: Thu, 22 Feb 2018 11:36:16 +0000
Subject: COMPMID-754: Add validation to LocallyConnected and NEDeconv layers

Change-Id: Ifed8713f4d7f1315af684b30d11323db2b533f10
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/121783
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
---
 .../CLLocallyConnectedMatrixMultiplyKernel.cpp     |  63 +++++++---
 .../NELocallyConnectedMatrixMultiplyKernel.cpp     |  57 ++++++---
 .../CL/functions/CLLocallyConnectedLayer.cpp       | 132 ++++++++++++++-------
 .../NEON/functions/NEDeconvolutionLayer.cpp        |  69 +++++++++--
 .../NEON/functions/NELocallyConnectedLayer.cpp     | 132 ++++++++++++++-------
 5 files changed, 322 insertions(+), 131 deletions(-)

(limited to 'src')

diff --git a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp
index a3af5b00ba..84f2e0cbac 100644
--- a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,13 +46,44 @@ CLLocallyConnectedMatrixMultiplyKernel::CLLocallyConnectedMatrixMultiplyKernel()
 {
 }
 
+namespace
+{
+Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input0, input1, output);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1, output);
+    ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(0) != input1->dimension(1));
+
+    return Status{};
+}
+
+std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1, ITensorInfo *output)
+{
+    const unsigned int num_elems_processed_per_iteration_x = max_cl_vector_width / data_size_from_type(input0->data_type());
+
+    Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration_x));
+
+    AccessWindowHorizontal input0_access(input0, 0, num_elems_processed_per_iteration_x);
+    AccessWindowHorizontal input1_access(input1, 0, num_elems_processed_per_iteration_x);
+    AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration_x);
+
+    bool window_changed = update_window_and_padding(win, input0_access, input1_access, output_access);
+
+    output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
+
+    Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+
+    return std::make_tuple(err, win);
+}
+} // namespace
+
 void CLLocallyConnectedMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output)
 {
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1, output);
-    ARM_COMPUTE_ERROR_ON(input0->info()->dimension(0) != input1->info()->dimension(1));
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(), input1->info(), output->info()));
 
     _input0 = input0;
     _input1 = input1;
@@ -77,20 +108,20 @@ void CLLocallyConnectedMatrixMultiplyKernel::configure(const ICLTensor *input0,
     std::string data_type_name = lower_string(string_from_data_type(input0->info()->data_type()));
     _kernel                    = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(("gemm_lc_vm_" + data_type_name), build_opts));
 
-    // Configure window kernel
-    const unsigned int num_elems_processed_per_iteration_x = max_cl_vector_width / data_size_from_type(input0->info()->data_type());
-
-    Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration_x));
+    // Configure kernel window
+    auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info());
 
-    AccessWindowRectangle input0_access(input0->info(), 0, 0, num_elems_processed_per_iteration_x, 1);
-    AccessWindowRectangle input1_access(input1->info(), 0, 0, num_elems_processed_per_iteration_x, 1);
-    AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_processed_per_iteration_x, 1);
+    ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
 
-    update_window_and_padding(win, input0_access, input1_access, output_access);
+    ICLKernel::configure(std::get<1>(win_config));
+}
 
-    output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
+Status CLLocallyConnectedMatrixMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output)
+{
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, output));
+    ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input0->clone().get(), input1->clone().get(), output->clone().get())));
 
-    ICLKernel::configure(win);
+    return Status{};
 }
 
 void CLLocallyConnectedMatrixMultiplyKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp
index 35beb82689..099626d259 100644
--- a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp
+++ b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp
@@ -302,6 +302,37 @@ void vector_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, IT
     },
     ina, out);
 }
+
+Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input0, input1, output);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1, output);
+    ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(0) != input1->dimension(1));
+
+    return Status{};
+}
+
+std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1, ITensorInfo *output)
+{
+    const unsigned int num_elems_processed_per_iteration_x = 16;
+
+    Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration_x));
+
+    AccessWindowHorizontal input0_access(input0, 0, num_elems_processed_per_iteration_x);
+    AccessWindowHorizontal input1_access(input1, 0, num_elems_processed_per_iteration_x);
+    AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration_x);
+
+    bool window_changed = update_window_and_padding(win, input0_access, input1_access, output_access);
+
+    output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
+
+    Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+
+    return std::make_tuple(err, win);
+}
 } // namespace
 
 NELocallyConnectedMatrixMultiplyKernel::NELocallyConnectedMatrixMultiplyKernel()
@@ -311,31 +342,27 @@ NELocallyConnectedMatrixMultiplyKernel::NELocallyConnectedMatrixMultiplyKernel()
 
 void NELocallyConnectedMatrixMultiplyKernel::configure(const ITensor *input0, const ITensor *input1, ITensor *output)
 {
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON(input0->info()->dimension(0) != input1->info()->dimension(1));
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(), input1->info(), output->info()));
 
     _input0 = input0;
     _input1 = input1;
     _output = output;
 
-    const unsigned int num_elems_processed_per_iteration_x = 16;
-
     // Configure kernel window
-    Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration_x));
+    auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info());
 
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration_x);
+    ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
 
-    update_window_and_padding(win,
-                              AccessWindowHorizontal(input0->info(), 0, num_elems_processed_per_iteration_x),
-                              AccessWindowHorizontal(input1->info(), 0, num_elems_processed_per_iteration_x),
-                              output_access);
+    INEKernel::configure(std::get<1>(win_config));
+}
 
-    output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
+Status NELocallyConnectedMatrixMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output)
+{
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, output));
+    ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input0->clone().get(), input1->clone().get(), output->clone().get())));
 
-    INEKernel::configure(win);
+    return Status{};
 }
 
 void NELocallyConnectedMatrixMultiplyKernel::run(const Window &window, const ThreadInfo &info)
diff --git a/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp b/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp
index d284949323..a3eb5010bd 100644
--- a/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp
@@ -33,40 +33,102 @@
 
 using namespace arm_compute;
 
+namespace
+{
+void calculate_shapes(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+                      TensorShape &shape_wr, TensorShape &shape_im2col, TensorShape &shape_gemm)
+{
+    ARM_COMPUTE_UNUSED(output);
+
+    const unsigned int kernel_width  = weights->dimension(0);
+    const unsigned int kernel_height = weights->dimension(1);
+
+    bool has_bias = (biases != nullptr);
+
+    // Get convolved dimensions
+    unsigned int conv_w = 0;
+    unsigned int conv_h = 0;
+    std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_width, kernel_height,
+                                                 conv_info);
+
+    const size_t mat_weights_cols = weights->dimension(3);
+    const size_t mat_weights_rows = weights->dimension(0) * weights->dimension(1) * weights->dimension(2) + ((has_bias) ? 1 : 0);
+    const size_t mat_weights_num  = weights->dimension(4);
+
+    shape_wr = TensorShape(mat_weights_cols, mat_weights_rows, mat_weights_num);
+
+    const size_t mat_input_cols = mat_weights_rows;
+    const size_t mat_input_rows = conv_w * conv_h;
+
+    shape_im2col = input->tensor_shape();
+    shape_im2col.set(0, mat_input_cols);
+    shape_im2col.set(1, mat_input_rows);
+    shape_im2col.set(2, 1);
+
+    shape_gemm = shape_im2col;
+    shape_gemm.set(0, mat_weights_cols);
+    shape_gemm.set(1, mat_input_rows);
+}
+} // namespace
+
 CLLocallyConnectedLayer::CLLocallyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager)
     : _memory_group(std::move(memory_manager)), _input_im2col_kernel(), _weights_reshape_kernel(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _weights_reshaped(), _gemm_output(),
       _is_first_run(false)
 {
 }
 
-void CLLocallyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info)
+Status CLLocallyConnectedLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info)
 {
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
-    ARM_COMPUTE_ERROR_ON(weights->info()->dimension(2) != input->info()->dimension(2));
-    ARM_COMPUTE_ERROR_ON(!conv_info.padding_is_symmetric());
-
-    if(biases != nullptr)
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
+    ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(2) != input->dimension(2));
+    ARM_COMPUTE_RETURN_ERROR_ON(!conv_info.padding_is_symmetric());
+
+    bool has_bias = (biases != nullptr);
+
+    if(has_bias)
     {
-        ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::F32);
-        ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
-        ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(3));
-        ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 2);
+        ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(3));
+        ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 2);
     }
 
+    const unsigned int kernel_width  = weights->dimension(0);
+    const unsigned int kernel_height = weights->dimension(1);
+
+    // Get convolved dimensions
+    unsigned int conv_w = 0;
+    unsigned int conv_h = 0;
+    std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_width, kernel_height,
+                                                 conv_info);
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG((output->dimension(0) != conv_w) || (output->dimension(1) != conv_h), "Output shape does not match the expected one");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(4) != (conv_w * conv_h), "Weights shape does not match the expected one");
+
+    // Calculate intermediate buffer shapes
+    TensorShape shape_wr;
+    TensorShape shape_im2col;
+    TensorShape shape_gemm;
+    calculate_shapes(input, weights, biases, output, conv_info, shape_wr, shape_im2col, shape_gemm);
+
+    TensorInfo weights_reshaped_info(shape_wr, 1, weights->data_type());
+    TensorInfo input_im2col_reshaped_info(shape_im2col, 1, input->data_type());
+    TensorInfo gemm_output_info(shape_gemm, 1, input->data_type());
+
+    ARM_COMPUTE_RETURN_ON_ERROR(CLIm2ColKernel::validate(input, &input_im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, has_bias));
+    ARM_COMPUTE_RETURN_ON_ERROR(CLWeightsReshapeKernel::validate(weights, biases, &weights_reshaped_info));
+    ARM_COMPUTE_RETURN_ON_ERROR(CLLocallyConnectedMatrixMultiplyKernel::validate(&input_im2col_reshaped_info, &weights_reshaped_info, &gemm_output_info));
+    ARM_COMPUTE_RETURN_ON_ERROR(CLCol2ImKernel::validate(&gemm_output_info, output, std::make_pair(conv_w, conv_h)));
+
+    return Status{};
+}
+
+void CLLocallyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info)
+{
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+    ARM_COMPUTE_ERROR_THROW_ON(CLLocallyConnectedLayer::validate(input->info(), weights->info(), biases == nullptr ? nullptr : biases->info(), output->info(), conv_info));
+
     bool _has_bias = (biases != nullptr);
     _is_first_run  = true;
 
-    // Get parameters for conv_info
-    unsigned int stride_x = 0;
-    unsigned int stride_y = 0;
-    unsigned int pad_x    = 0;
-    unsigned int pad_y    = 0;
-    std::tie(stride_x, stride_y) = conv_info.stride();
-    std::tie(pad_x, pad_y)       = conv_info.pad();
-
     const unsigned int kernel_width  = weights->info()->dimension(0);
     const unsigned int kernel_height = weights->info()->dimension(1);
 
@@ -76,32 +138,14 @@ void CLLocallyConnectedLayer::configure(const ICLTensor *input, const ICLTensor
     std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_width, kernel_height,
                                                  conv_info);
 
-    ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(0) != conv_w) || (output->info()->dimension(1) != conv_h), "Output shape does not match the expected one");
-    ARM_COMPUTE_ERROR_ON_MSG(weights->info()->dimension(4) != (conv_w * conv_h), "Weights shape does not match the expected one");
-
-    // Create tensor to store the reshaped weights
-    const size_t mat_weights_cols = weights->info()->dimension(3);
-    const size_t mat_weights_rows = weights->info()->dimension(0) * weights->info()->dimension(1) * weights->info()->dimension(2) + ((_has_bias) ? 1 : 0);
-    const size_t mat_weights_num  = weights->info()->dimension(4);
-
-    const TensorShape shape_wr(mat_weights_cols, mat_weights_rows, mat_weights_num);
+    // Calculate intermediate buffer shapes
+    TensorShape shape_wr;
+    TensorShape shape_im2col;
+    TensorShape shape_gemm;
+    calculate_shapes(input->info(), weights->info(), biases == nullptr ? nullptr : biases->info(), output->info(), conv_info, shape_wr, shape_im2col, shape_gemm);
 
     _weights_reshaped.allocator()->init(TensorInfo(shape_wr, 1, weights->info()->data_type()));
-
-    // Create tensor to store im2col reshaped inputs
-    const size_t mat_input_cols = mat_weights_rows;
-    const size_t mat_input_rows = conv_w * conv_h;
-    TensorShape  shape_im2col   = input->info()->tensor_shape();
-    shape_im2col.set(0, mat_input_cols);
-    shape_im2col.set(1, mat_input_rows);
-    shape_im2col.set(2, 1);
-
     _input_im2col_reshaped.allocator()->init(TensorInfo(shape_im2col, 1, input->info()->data_type()));
-
-    // Create locally connected layer output tensor
-    TensorShape shape_gemm = _input_im2col_reshaped.info()->tensor_shape();
-    shape_gemm.set(0, mat_weights_cols);
-    shape_gemm.set(1, mat_input_rows);
     _gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, input->info()->data_type()));
 
     // Manage intermediate buffers
diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
index 693d7a4f70..14d54d2f73 100644
--- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
@@ -42,13 +42,64 @@ NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memor
 {
 }
 
+Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &info,
+                                      unsigned int inner_border_right, unsigned int inner_border_top)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != weights->dimension(1));
+    ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) < 1);
+    ARM_COMPUTE_RETURN_ERROR_ON(!info.padding_is_symmetric());
+
+    const unsigned int stride_x = info.stride().first;
+    const unsigned int stride_y = info.stride().second;
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border_right > stride_x - 1, "inner_border_right must be smaller than stride_x");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border_top > stride_y - 1, "inner_border_top must be smaller than stride_y");
+
+    auto out_dims = deconvolution_output_dimensions(input->dimension(0), input->dimension(1), weights->dimension(0), weights->dimension(1),
+                                                    info.pad().first, info.pad().second, inner_border_right, inner_border_top, stride_x, stride_y);
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, bias);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, weights, bias);
+
+    if(bias != nullptr)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, bias);
+    }
+
+    if(output->tensor_shape().total_size() > 0)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
+
+        const TensorShape output_shape = deconvolution_output_shape(out_dims, input->tensor_shape(), weights->tensor_shape());
+
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimX) != output_shape.x(), "Output's width is invalid.");
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimY) != output_shape.y(), "Output's height is invalid.");
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) != output_shape.z(), "Output's depth is invalid.");
+    }
+
+    TensorInfo scale_out_info(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_deconvolution_shape(*input, stride_x, stride_y, inner_border_right, inner_border_top,
+                                                                                                      info)));
+    const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+
+    for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != scale_out_info.dimension(i));
+    }
+
+    ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayer::validate(&scale_out_info, weights, bias, output, info, WeightsInfo()));
+
+    return Status{};
+}
+
 void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info,
                                      unsigned int inner_border_right, unsigned int inner_border_top)
 {
-    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != weights->info()->dimension(1));
-    ARM_COMPUTE_ERROR_ON(!info.padding_is_symmetric());
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
 
     _input        = input;
     _info         = info;
@@ -56,15 +107,9 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
 
     const unsigned int stride_x = info.stride().first;
     const unsigned int stride_y = info.stride().second;
-    auto               out_dims = deconvolution_output_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights->info()->dimension(0), weights->info()->dimension(1),
-                                                                  info.pad().first, info.pad().second, inner_border_right, inner_border_top, stride_x, stride_y);
-
-    const TensorShape output_shape = deconvolution_output_shape(out_dims, input->info()->tensor_shape(), weights->info()->tensor_shape());
 
-    ARM_COMPUTE_UNUSED(output_shape);
-    ARM_COMPUTE_ERROR_ON_MSG(output->info()->dimension(Window::DimX) != output_shape.x(), "Output's width is invalid.");
-    ARM_COMPUTE_ERROR_ON_MSG(output->info()->dimension(Window::DimY) != output_shape.y(), "Output's height is invalid.");
-    ARM_COMPUTE_ERROR_ON_MSG(output->info()->dimension(Window::DimZ) != output_shape.z(), "Output's depth is invalid.");
+    // Perform validation step
+    ARM_COMPUTE_ERROR_THROW_ON(NEDeconvolutionLayer::validate(input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(), info, inner_border_right, inner_border_top));
 
     _memory_group.manage(&_scaled_output);
 
diff --git a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp
index 45ddb70ff5..973559441f 100644
--- a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp
+++ b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp
@@ -33,40 +33,102 @@
 
 using namespace arm_compute;
 
+namespace
+{
+void calculate_shapes(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+                      TensorShape &shape_wr, TensorShape &shape_im2col, TensorShape &shape_gemm)
+{
+    ARM_COMPUTE_UNUSED(output);
+
+    const unsigned int kernel_width  = weights->dimension(0);
+    const unsigned int kernel_height = weights->dimension(1);
+
+    bool has_bias = (biases != nullptr);
+
+    // Get convolved dimensions
+    unsigned int conv_w = 0;
+    unsigned int conv_h = 0;
+    std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_width, kernel_height,
+                                                 conv_info);
+
+    const size_t mat_weights_cols = weights->dimension(3);
+    const size_t mat_weights_rows = weights->dimension(0) * weights->dimension(1) * weights->dimension(2) + ((has_bias) ? 1 : 0);
+    const size_t mat_weights_num  = weights->dimension(4);
+
+    shape_wr = TensorShape(mat_weights_cols, mat_weights_rows, mat_weights_num);
+
+    const size_t mat_input_cols = mat_weights_rows;
+    const size_t mat_input_rows = conv_w * conv_h;
+
+    shape_im2col = input->tensor_shape();
+    shape_im2col.set(0, mat_input_cols);
+    shape_im2col.set(1, mat_input_rows);
+    shape_im2col.set(2, 1);
+
+    shape_gemm = shape_im2col;
+    shape_gemm.set(0, mat_weights_cols);
+    shape_gemm.set(1, mat_input_rows);
+}
+} // namespace
+
 NELocallyConnectedLayer::NELocallyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager)
     : _memory_group(std::move(memory_manager)), _input_im2col_kernel(), _weights_reshape_kernel(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _weights_reshaped(), _gemm_output(),
       _is_first_run(false)
 {
 }
 
-void NELocallyConnectedLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info)
+Status NELocallyConnectedLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info)
 {
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
-    ARM_COMPUTE_ERROR_ON(weights->info()->dimension(2) != input->info()->dimension(2));
-    ARM_COMPUTE_ERROR_ON(!conv_info.padding_is_symmetric());
-
-    if(biases != nullptr)
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
+    ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(2) != input->dimension(2));
+    ARM_COMPUTE_RETURN_ERROR_ON(!conv_info.padding_is_symmetric());
+
+    bool has_bias = (biases != nullptr);
+
+    if(has_bias)
     {
-        ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::F32);
-        ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
-        ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(3));
-        ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 2);
+        ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(3));
+        ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 2);
     }
 
+    const unsigned int kernel_width  = weights->dimension(0);
+    const unsigned int kernel_height = weights->dimension(1);
+
+    // Get convolved dimensions
+    unsigned int conv_w = 0;
+    unsigned int conv_h = 0;
+    std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_width, kernel_height,
+                                                 conv_info);
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG((output->dimension(0) != conv_w) || (output->dimension(1) != conv_h), "Output shape does not match the expected one");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(4) != (conv_w * conv_h), "Weights shape does not match the expected one");
+
+    // Calculate intermediate buffer shapes
+    TensorShape shape_wr;
+    TensorShape shape_im2col;
+    TensorShape shape_gemm;
+    calculate_shapes(input, weights, biases, output, conv_info, shape_wr, shape_im2col, shape_gemm);
+
+    TensorInfo weights_reshaped_info(shape_wr, 1, weights->data_type());
+    TensorInfo input_im2col_reshaped_info(shape_im2col, 1, input->data_type());
+    TensorInfo gemm_output_info(shape_gemm, 1, input->data_type());
+
+    ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &input_im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, has_bias, false));
+    ARM_COMPUTE_RETURN_ON_ERROR(NEWeightsReshapeKernel::validate(weights, biases, &weights_reshaped_info));
+    ARM_COMPUTE_RETURN_ON_ERROR(NELocallyConnectedMatrixMultiplyKernel::validate(&input_im2col_reshaped_info, &weights_reshaped_info, &gemm_output_info));
+    ARM_COMPUTE_RETURN_ON_ERROR(NECol2ImKernel::validate(&gemm_output_info, output, Size2D(conv_w, conv_h)));
+
+    return Status{};
+}
+
+void NELocallyConnectedLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info)
+{
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+    ARM_COMPUTE_ERROR_THROW_ON(NELocallyConnectedLayer::validate(input->info(), weights->info(), biases == nullptr ? nullptr : biases->info(), output->info(), conv_info));
+
     bool _has_bias = (biases != nullptr);
     _is_first_run  = true;
 
-    // Get parameters for conv_info
-    unsigned int stride_x = 0;
-    unsigned int stride_y = 0;
-    unsigned int pad_x    = 0;
-    unsigned int pad_y    = 0;
-    std::tie(stride_x, stride_y) = conv_info.stride();
-    std::tie(pad_x, pad_y)       = conv_info.pad();
-
     const unsigned int kernel_width  = weights->info()->dimension(0);
     const unsigned int kernel_height = weights->info()->dimension(1);
 
@@ -76,32 +138,14 @@ void NELocallyConnectedLayer::configure(const ITensor *input, const ITensor *wei
     std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_width, kernel_height,
                                                  conv_info);
 
-    ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(0) != conv_w) || (output->info()->dimension(1) != conv_h), "Output shape does not match the expected one");
-    ARM_COMPUTE_ERROR_ON_MSG(weights->info()->dimension(4) != (conv_w * conv_h), "Weights shape does not match the expected one");
-
-    // Create tensor to store the reshaped weights
-    const size_t mat_weights_cols = weights->info()->dimension(3);
-    const size_t mat_weights_rows = weights->info()->dimension(0) * weights->info()->dimension(1) * weights->info()->dimension(2) + ((_has_bias) ? 1 : 0);
-    const size_t mat_weights_num  = weights->info()->dimension(4);
-
-    const TensorShape shape_wr(mat_weights_cols, mat_weights_rows, mat_weights_num);
+    // Calculate intermediate buffer shapes
+    TensorShape shape_wr;
+    TensorShape shape_im2col;
+    TensorShape shape_gemm;
+    calculate_shapes(input->info(), weights->info(), biases == nullptr ? nullptr : biases->info(), output->info(), conv_info, shape_wr, shape_im2col, shape_gemm);
 
     _weights_reshaped.allocator()->init(TensorInfo(shape_wr, 1, weights->info()->data_type()));
-
-    // Create tensor to store im2col reshaped inputs
-    const size_t mat_input_cols = mat_weights_rows;
-    const size_t mat_input_rows = conv_w * conv_h;
-    TensorShape  shape_im2col   = input->info()->tensor_shape();
-    shape_im2col.set(0, mat_input_cols);
-    shape_im2col.set(1, mat_input_rows);
-    shape_im2col.set(2, 1);
-
     _input_im2col_reshaped.allocator()->init(TensorInfo(shape_im2col, 1, input->info()->data_type()));
-
-    // Create locally connected layer output tensor
-    TensorShape shape_gemm = _input_im2col_reshaped.info()->tensor_shape();
-    shape_gemm.set(0, mat_weights_cols);
-    shape_gemm.set(1, mat_input_rows);
     _gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, input->info()->data_type()));
 
     // Manage intermediate buffers
-- 
cgit v1.2.1