4 files changed, 46 insertions, 22 deletions
diff --git a/src/runtime/CL/functions/CLFlattenLayer.cpp b/src/runtime/CL/functions/CLFlattenLayer.cpp
index c10e91bf96..b2860ea24a 100644
--- a/src/runtime/CL/functions/CLFlattenLayer.cpp
+++ b/src/runtime/CL/functions/CLFlattenLayer.cpp
@@ -23,11 +23,16 @@
  */
 #include "arm_compute/runtime/CL/functions/CLFlattenLayer.h"
 
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLFlattenLayerKernel.h"
-
-using namespace arm_compute;
+#include "src/core/helpers/AutoConfiguration.h"
 
+namespace arm_compute
+{
 void CLFlattenLayer::configure(const ICLTensor *input, ICLTensor *output)
 {
     configure(CLKernelLibrary::get().get_compile_context(), input, output);
@@ -35,13 +40,24 @@ void CLFlattenLayer::configure(const ICLTensor *input, ICLTensor *output)
 
 void CLFlattenLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
 {
-    auto k = std::make_unique<CLFlattenLayerKernel>();
-    k->configure(compile_context, input, output);
-    _kernel = std::move(k);
-    CLScheduler::get().tune_kernel_static(*_kernel);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+    auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(misc::shape_calculator::compute_flatten_shape(input->info())));
+    _reshape.configure(compile_context, input, output);
 }
 
 Status CLFlattenLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
 {
-    return CLFlattenLayerKernel::validate(input, output);
+    // Checks performed when output is configured
+    if(output->total_size() != 0)
+    {
+        const TensorInfo tensor_info_output = input->clone()->set_tensor_shape(misc::shape_calculator::compute_flatten_shape(input));
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_output);
+    }
+    return CLReshapeLayer::validate(input, output);
+}
+
+void CLFlattenLayer::run()
+{
+    _reshape.run();
 }
+} // namespace arm_compute
+\ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEFlattenLayer.cpp b/src/runtime/NEON/functions/NEFlattenLayer.cpp
index 21e55665cd..c5aa162f0a 100644
--- a/src/runtime/NEON/functions/NEFlattenLayer.cpp
+++ b/src/runtime/NEON/functions/NEFlattenLayer.cpp
@@ -23,20 +23,32 @@
  */
 #include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h"
 
-#include "arm_compute/core/Size2D.h"
-#include "src/core/NEON/kernels/NEFlattenLayerKernel.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
 
 namespace arm_compute
 {
 void NEFlattenLayer::configure(const ITensor *input, ITensor *output)
 {
-    auto k = std::make_unique<NEFlattenLayerKernel>();
-    k->configure(input, output);
-    _kernel = std::move(k);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+    auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(misc::shape_calculator::compute_flatten_shape(input->info())));
+    _reshape.configure(input, output);
 }
 
 Status NEFlattenLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
 {
-    return NEFlattenLayerKernel::validate(input, output);
+    // Checks performed when output is configured
+    if(output->total_size() != 0)
+    {
+        const TensorInfo tensor_info_output = input->clone()->set_tensor_shape(misc::shape_calculator::compute_flatten_shape(input));
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_output);
+    }
+    return NEReshapeLayer::validate(input, output);
+}
+void NEFlattenLayer::run()
+{
+    _reshape.run();
 }
 } // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
index f12c410a59..ec782fc163 100644
--- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
+++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
@@ -31,8 +31,6 @@
 #include "arm_compute/runtime/NEON/NEScheduler.h"
 #include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
 #include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
-#include "src/core/NEON/kernels/NEFlattenLayerKernel.h"
-#include "src/core/NEON/kernels/NEFlattenLayerKernel.h"
 #include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
 #include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
 #include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
@@ -159,7 +157,7 @@ Status NEFullyConnectedLayerReshapeWeights::validate(const ITensorInfo *input, c
 NEFullyConnectedLayer::~NEFullyConnectedLayer() = default;
 
 NEFullyConnectedLayer::NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
-    : _memory_group(std::move(memory_manager)), _weights_manager(weights_manager), _flatten_kernel(), _convert_weights(), _convert_weights_managed(), _reshape_weights_function(),
+    : _memory_group(std::move(memory_manager)), _weights_manager(weights_manager), _flatten(), _convert_weights(), _convert_weights_managed(), _reshape_weights_function(),
       _reshape_weights_managed_function(), _mm_gemm(nullptr, weights_manager), _mm_gemmlowp(nullptr, weights_manager), _flatten_output(), _converted_weights_output(), _reshape_weights_output(),
       _original_weights(nullptr), _are_weights_converted(true), _are_weights_reshaped(false), _is_fc_after_conv(false), _is_quantized_asymmetric(false), _is_prepared(false)
 {
@@ -213,8 +211,7 @@ void NEFullyConnectedLayer::configure_conv_fc(const ITensor *input, const ITenso
     // Configure flatten kernel
     _memory_group.manage(&_flatten_output);
 
-    _flatten_kernel = std::make_unique<NEFlattenLayerKernel>();
-    _flatten_kernel->configure(input, &_flatten_output);
+    _flatten.configure(input, &_flatten_output);
 
     // Configure matrix multiply kernel
     configure_mm(&_flatten_output, weights, biases, output, act);
@@ -392,7 +389,7 @@ Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn
         ARM_COMPUTE_RETURN_ERROR_ON((weights_to_use->dimension(1) != (input->dimension(0) * input->dimension(1) * input->dimension(2))));
 
         // Validate flatten kernel
-        ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayerKernel::validate(input, &flatten_input));
+        ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayer::validate(input, &flatten_input));
         input_to_use = &flatten_input;
     }
     else
@@ -415,7 +412,7 @@ void NEFullyConnectedLayer::run()
     // Linearize input if it comes from a convolutional layer
     if(_is_fc_after_conv)
     {
-        NEScheduler::get().schedule(_flatten_kernel.get(), Window::DimY);
+        _flatten.run();
     }
 
     // Run matrix multiply
diff --git a/src/runtime/NEON/functions/NERNNLayer.cpp b/src/runtime/NEON/functions/NERNNLayer.cpp
index c16d09f60c..93e37cc000 100644
--- a/src/runtime/NEON/functions/NERNNLayer.cpp
+++ b/src/runtime/NEON/functions/NERNNLayer.cpp
@@ -33,7 +33,6 @@
 #include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
 #include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
 #include "src/core/NEON/kernels/NECopyKernel.h"
-#include "src/core/NEON/kernels/NEFlattenLayerKernel.h"
 #include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
 #include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
 #include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"