COMPMID-1097: Port mobilenet to NHWC

Change-Id: I789065bfa0d4ef133388e1904c5caf31e450f80f Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/129495 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2018-04-27 19:07:19 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:51:17 +0000
commit: cac13b1cfd593889271f8e2191be2039b8d88f36 (patch)
tree: d1c5196877d7fbd5dcfbb9f9003faf6035f82a33
parent: ad0c7388f6261989a268ffb2d042f2bd80736e3f (diff)
download: ComputeLibrary-cac13b1cfd593889271f8e2191be2039b8d88f36.tar.gz
55 files changed, 474 insertions, 312 deletions
diff --git a/arm_compute/graph/INode.h b/arm_compute/graph/INode.h
index 5d9c36e098..f8101d7df2 100644
--- a/arm_compute/graph/INode.h
+++ b/arm_compute/graph/INode.h
@@ -60,7 +60,7 @@ public:
      *
      * @return Status containing any errors
      */
-    virtual Status validate() = 0;
+    virtual Status validate() const;
     /** Returns node's type
      *
      * @return Node's type
diff --git a/arm_compute/graph/TensorDescriptor.h b/arm_compute/graph/TensorDescriptor.h
index 785c493cbc..704f015672 100644
--- a/arm_compute/graph/TensorDescriptor.h
+++ b/arm_compute/graph/TensorDescriptor.h
@@ -26,29 +26,89 @@
 
 #include "arm_compute/graph/Types.h"
 
+#include "arm_compute/core/utils/misc/ICloneable.h"
+
+#include <memory>
+
 namespace arm_compute
 {
 namespace graph
 {
 /** Tensor metadata class */
-struct TensorDescriptor final
+struct TensorDescriptor final : public misc::ICloneable<TensorDescriptor>
 {
     /** Default Constructor **/
     TensorDescriptor() = default;
     /** Constructor
      *
-     * @param[in] tensor_shape      Tensor shape
-     * @param[in] tensor_data_type  Tensor data type
-     * @param[in] tensor_quant_info Tensor quantization info
-     * @param[in] tensor_target     Target to allocate the tensor for
+     * @param[in] tensor_shape       Tensor shape
+     * @param[in] tensor_data_type   Tensor data type
+     * @param[in] tensor_quant_info  Tensor quantization info
+     * @param[in] tensor_data_layout Tensor data layout
+     * @param[in] tensor_target      Target to allocate the tensor for
+     */
+    TensorDescriptor(TensorShape      tensor_shape,
+                     DataType         tensor_data_type,
+                     QuantizationInfo tensor_quant_info  = QuantizationInfo(),
+                     DataLayout       tensor_data_layout = DataLayout::NCHW,
+                     Target           tensor_target      = Target::UNSPECIFIED)
+        : shape(tensor_shape), data_type(tensor_data_type), layout(tensor_data_layout), quant_info(tensor_quant_info), target(tensor_target)
+    {
+    }
+    /** Sets tensor descriptor shape
+     *
+     * @param[in] tensor_shape Tensor shape to set
+     *
+     * @return This tensor descriptor
      */
-    TensorDescriptor(TensorShape tensor_shape, DataType tensor_data_type, QuantizationInfo tensor_quant_info = QuantizationInfo(), Target tensor_target = Target::UNSPECIFIED)
-        : shape(tensor_shape), data_type(tensor_data_type), quant_info(tensor_quant_info), target(tensor_target)
+    TensorDescriptor &set_shape(TensorShape &tensor_shape)
+    {
+        shape = tensor_shape;
+        return *this;
+    }
+    /** Sets tensor descriptor data type
+     *
+     * @param[in] tensor_data_type Data type
+     *
+     * @return This tensor descriptor
+     */
+    TensorDescriptor &set_data_type(DataType tensor_data_type)
+    {
+        data_type = tensor_data_type;
+        return *this;
+    }
+    /** Sets tensor descriptor data layout
+     *
+     * @param[in] data_layout Data layout
+     *
+     * @return This tensor descriptor
+     */
+    TensorDescriptor &set_layout(DataLayout data_layout)
+    {
+        layout = data_layout;
+        return *this;
+    }
+    /** Sets tensor descriptor quantization info
+     *
+     * @param[in] tensor_quant_info Quantization information
+     *
+     * @return This tensor descriptor
+     */
+    TensorDescriptor &set_quantization_info(QuantizationInfo tensor_quant_info)
+    {
+        quant_info = tensor_quant_info;
+        return *this;
+    }
+
+    // Inherited methods overridden:
+    std::unique_ptr<TensorDescriptor> clone() const override
     {
+        return support::cpp14::make_unique<TensorDescriptor>(*this);
     }
 
     TensorShape      shape{};                        /**< Tensor shape */
     DataType         data_type{ DataType::UNKNOWN }; /**< Data type */
+    DataLayout       layout{ DataLayout::NCHW };     /**< Data layout */
     QuantizationInfo quant_info{};                   /**< Quantization info */
     Target           target{ Target::UNSPECIFIED };  /**< Target */
 };
diff --git a/arm_compute/graph/TypePrinter.h b/arm_compute/graph/TypePrinter.h
index ed578307b5..0ecd57de9d 100644
--- a/arm_compute/graph/TypePrinter.h
+++ b/arm_compute/graph/TypePrinter.h
@@ -138,6 +138,24 @@ inline ::std::ostream &operator<<(::std::ostream &os, const Target &target)
     return os;
 }
 
+/** Formatted output of the DataLayout */
+inline ::std::ostream &operator<<(::std::ostream &os, const DataLayout &data_layout)
+{
+    switch(data_layout)
+    {
+        case DataLayout::NCHW:
+            os << "NCHW";
+            break;
+        case DataLayout::NHWC:
+            os << "NHWC";
+            break;
+        default:
+            ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+    }
+
+    return os;
+}
+
 /** Formatted output of the activation function type. */
 inline ::std::ostream &operator<<(::std::ostream &os, const ActivationLayerInfo::ActivationFunction &act_function)
 {
diff --git a/arm_compute/graph/Types.h b/arm_compute/graph/Types.h
index 35f701284b..02e5d92983 100644
--- a/arm_compute/graph/Types.h
+++ b/arm_compute/graph/Types.h
@@ -40,6 +40,8 @@ using arm_compute::Status;
 
 using arm_compute::Coordinates;
 using arm_compute::DataType;
+using arm_compute::DataLayout;
+using arm_compute::DataLayoutDimension;
 using arm_compute::TensorShape;
 using arm_compute::Size2D;
 
@@ -80,13 +82,6 @@ struct GraphConfig
     int  num_threads{ -1 };                      /**< Number of threads to use (thread capable backends), if 0 the backend will auto-initialize, if -1 the backend will stay as it is. */
 };
 
-/**< Data layout format */
-enum class DataLayout
-{
-    NCHW, /** N(Batches), C(Channels), H(Height), W(Width) from slow to fast moving dimension */
-    NHWC  /** N(Batches), H(Height), W(Width), C(Channels) from slow to fast moving dimension */
-};
-
 /**< Device target types */
 enum class Target
 {
diff --git a/arm_compute/graph/Utils.h b/arm_compute/graph/Utils.h
index 83deb70348..582d47e406 100644
--- a/arm_compute/graph/Utils.h
+++ b/arm_compute/graph/Utils.h
@@ -94,6 +94,22 @@ PassManager create_default_pass_manager(Target target);
  * @param[in] ctx Graph Context
  */
 void setup_default_graph_context(GraphContext &ctx);
+/** Get size of a tensor's given dimension depending on its layout
+ *
+ * @param[in] descriptor            Descriptor
+ * @param[in] data_layout_dimension Tensor data layout dimension
+ *
+ * @return Size of requested dimension
+ */
+size_t get_dimension_size(const TensorDescriptor &descriptor, const DataLayoutDimension data_layout_dimension);
+/** Get index of a tensor's given dimension depending on its layout
+ *
+ * @param[in] descriptor            Descriptor
+ * @param[in] data_layout_dimension Tensor data layout dimension
+ *
+ * @return Idx of given dimension
+ */
+size_t get_dimension_idx(const TensorDescriptor &descriptor, const DataLayoutDimension data_layout_dimension);
 } // namespace graph
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_GRAPH_UTILS_H__ */
diff --git a/arm_compute/graph/backends/ValidateHelpers.h b/arm_compute/graph/backends/ValidateHelpers.h
index c1b87ee0c0..237d4ae2a4 100644
--- a/arm_compute/graph/backends/ValidateHelpers.h
+++ b/arm_compute/graph/backends/ValidateHelpers.h
@@ -30,6 +30,7 @@
 #include "arm_compute/graph/nodes/Nodes.h"
 
 #include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/ITensorInfo.h"
 
 namespace arm_compute
@@ -138,7 +139,7 @@ Status validate_depthwise_convolution_layer(DepthwiseConvolutionLayerNode &node)
 
     // TODO (geopin01) : Switch when validation is implemented
     // Validate function
-    if((dwc_algorithm == DepthwiseConvolutionMethod::OPTIMIZED_3x3) && (weights->tensor_shape().x() != 3))
+    if((dwc_algorithm == DepthwiseConvolutionMethod::OPTIMIZED_3x3) && (weights->tensor_shape()[get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH)] != 3))
     {
         ARM_COMPUTE_LOG_GRAPH_INFO("Switched DepthwiseConvolutionLayer method of node with ID : "
                                    << node.id() << " and Name: " << node.name() << std::endl);
diff --git a/arm_compute/graph/detail/ExecutionHelpers.h b/arm_compute/graph/detail/ExecutionHelpers.h
index a868df8a5d..27cae4b8ab 100644
--- a/arm_compute/graph/detail/ExecutionHelpers.h
+++ b/arm_compute/graph/detail/ExecutionHelpers.h
@@ -41,6 +41,11 @@ namespace detail
 {
 /** Initializes the available backends **/
 void default_initialize_backends();
+/** Validates all nodes
+ *
+ * @param[in] g Graph to validate
+ */
+void validate_all_nodes(Graph &g);
 /** Configures all nodes of a graph
  *
  * @param[in] g Graph to configure
@@ -66,11 +71,6 @@ void allocate_const_tensors(Graph &g);
  * @param[in] g Graph to allocate the tensors
  */
 void allocate_all_tensors(Graph &g);
-/** Validates all nodes
- *
- * @param[in] g Graph to validate
- */
-void validate_all_nodes(Graph &g);
 /** Configures all nodes of graph
  *
  * @param[in] g   Graph to configure the nodes
diff --git a/arm_compute/graph/frontend/Types.h b/arm_compute/graph/frontend/Types.h
index 78644e66af..6cf7460900 100644
--- a/arm_compute/graph/frontend/Types.h
+++ b/arm_compute/graph/frontend/Types.h
@@ -34,6 +34,7 @@ namespace frontend
 {
 // Import types for graph
 using graph::DataType;
+using graph::DataLayout;
 using graph::TensorShape;
 
 using graph::ActivationLayerInfo;
diff --git a/arm_compute/graph/nodes/ActivationLayerNode.h b/arm_compute/graph/nodes/ActivationLayerNode.h
index 985e10a8d8..570351bb94 100644
--- a/arm_compute/graph/nodes/ActivationLayerNode.h
+++ b/arm_compute/graph/nodes/ActivationLayerNode.h
@@ -46,7 +46,6 @@ public:
     ActivationLayerInfo activation_info() const;
 
     // Inherited overridden methods:
-    Status           validate() override;
     NodeType         type() const override;
     bool             forward_descriptors() override;
     TensorDescriptor configure_output(size_t idx) const override;
diff --git a/arm_compute/graph/nodes/BatchNormalizationLayerNode.h b/arm_compute/graph/nodes/BatchNormalizationLayerNode.h
index b36d66993b..a364d1c5ae 100644
--- a/arm_compute/graph/nodes/BatchNormalizationLayerNode.h
+++ b/arm_compute/graph/nodes/BatchNormalizationLayerNode.h
@@ -57,7 +57,6 @@ public:
     void set_fused_activation(ActivationLayerInfo fused_activation);
 
     // Inherited overridden methods:
-    Status           validate() override;
     NodeType         type() const override;
     bool             forward_descriptors() override;
     TensorDescriptor configure_output(size_t idx) const override;
diff --git a/arm_compute/graph/nodes/ConstNode.h b/arm_compute/graph/nodes/ConstNode.h
index 346a3c82e7..3216a3a035 100644
--- a/arm_compute/graph/nodes/ConstNode.h
+++ b/arm_compute/graph/nodes/ConstNode.h
@@ -41,7 +41,6 @@ public:
     ConstNode(TensorDescriptor desc);
 
     // Inherited overridden methods:
-    Status           validate() override;
     NodeType         type() const override;
     bool             forward_descriptors() override;
     TensorDescriptor configure_output(size_t idx) const override;
diff --git a/arm_compute/graph/nodes/ConvolutionLayerNode.h b/arm_compute/graph/nodes/ConvolutionLayerNode.h
index d029895609..d1186a8eae 100644
--- a/arm_compute/graph/nodes/ConvolutionLayerNode.h
+++ b/arm_compute/graph/nodes/ConvolutionLayerNode.h
@@ -59,18 +59,19 @@ public:
      * @return Convolution information
      */
     PadStrideInfo convolution_info() const;
-    /** Computes convolution output shape
+    /** Computes convolution output descriptor
      *
-     * @param[in] input_shape   Input shape
-     * @param[in] weights_shape Weights shape
-     * @param[in] info          Convolution operation attributes
+     * @param[in] input_descriptor   Input descriptor
+     * @param[in] weights_descriptor Weights descriptor
+     * @param[in] info               Convolution operation attributes
      *
-     * @return Output shape
+     * @return Output descriptor
      */
-    static TensorShape compute_output_shape(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo info);
+    static TensorDescriptor compute_output_descriptor(const TensorDescriptor &input_descriptor,
+                                                      const TensorDescriptor &weights_descriptor,
+                                                      const PadStrideInfo    &info);
 
     // Inherited overridden methods:
-    Status           validate() override;
     NodeType         type() const override;
     bool             forward_descriptors() override;
     TensorDescriptor configure_output(size_t idx) const override;
diff --git a/arm_compute/graph/nodes/DepthConcatenateLayerNode.h b/arm_compute/graph/nodes/DepthConcatenateLayerNode.h
index cb309f38c1..ffdec709ef 100644
--- a/arm_compute/graph/nodes/DepthConcatenateLayerNode.h
+++ b/arm_compute/graph/nodes/DepthConcatenateLayerNode.h
@@ -39,13 +39,13 @@ public:
      * @param[in] total_nodes Number of nodes that will get concatenated
      */
     DepthConcatenateLayerNode(unsigned int total_nodes);
-    /** Computes depth concatenations output shape
+    /** Computes depth concatenations output descriptor
      *
-     * @param input_shapes   Shapes of the inputs
+     * @param[in] input_descriptors Input descriptors
      *
-     * @return Expected output shape
+     * @return Expected output descriptor
      */
-    static TensorShape compute_output_shape(const std::vector<TensorShape> &input_shapes);
+    static TensorDescriptor compute_output_descriptor(const std::vector<TensorDescriptor> &input_descriptors);
     /** Disables or not the depth concatenate node
      *
      * @warning This is used when depth concatenate is performed with sub-tensors,
@@ -63,7 +63,6 @@ public:
     bool is_enabled() const;
 
     // Inherited overridden methods:
-    Status           validate() override;
     NodeType         type() const override;
     bool             forward_descriptors() override;
     TensorDescriptor configure_output(size_t idx) const override;
diff --git a/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h b/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h
index b4cf9b4d03..df6f456ac9 100644
--- a/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h
+++ b/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h
@@ -58,18 +58,19 @@ public:
      * @return Convolution information
      */
     PadStrideInfo convolution_info() const;
-    /** Computes depthwise convolution output shape
+    /** Computes depthwise convolution output descriptor
      *
-     * @param[in] input_shape   Input shape
-     * @param[in] weights_shape Weights shape
-     * @param[in] info          Convolution operation attributes
+     * @param[in] input_descriptor   Input descriptor
+     * @param[in] weights_descriptor Weights descriptor
+     * @param[in] info               Convolution operation attributes
      *
-     * @return Output shape
+     * @return Output descriptor
      */
-    static TensorShape compute_output_shape(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo info);
+    static TensorDescriptor compute_output_descriptor(const TensorDescriptor &input_descriptor,
+                                                      const TensorDescriptor &weights_descriptor,
+                                                      const PadStrideInfo    &info);
 
     // Inherited overridden methods:
-    Status           validate() override;
     NodeType         type() const override;
     bool             forward_descriptors() override;
     TensorDescriptor configure_output(size_t idx) const override;
diff --git a/arm_compute/graph/nodes/EltwiseLayerNode.h b/arm_compute/graph/nodes/EltwiseLayerNode.h
index 9da88d75b5..5b9fa84bbb 100644
--- a/arm_compute/graph/nodes/EltwiseLayerNode.h
+++ b/arm_compute/graph/nodes/EltwiseLayerNode.h
@@ -46,7 +46,6 @@ public:
     EltwiseOperation eltwise_operation() const;
 
     // Inherited overridden methods:
-    Status           validate() override;
     NodeType         type() const override;
     bool             forward_descriptors() override;
     TensorDescriptor configure_output(size_t idx) const override;
diff --git a/arm_compute/graph/nodes/FlattenLayerNode.h b/arm_compute/graph/nodes/FlattenLayerNode.h
index f0dde1fab1..18a96ab787 100644
--- a/arm_compute/graph/nodes/FlattenLayerNode.h
+++ b/arm_compute/graph/nodes/FlattenLayerNode.h
@@ -38,7 +38,6 @@ public:
     FlattenLayerNode();
 
     // Inherited overridden methods:
-    Status           validate() override;
     NodeType         type() const override;
     bool             forward_descriptors() override;
     TensorDescriptor configure_output(size_t idx) const override;
diff --git a/arm_compute/graph/nodes/FullyConnectedLayerNode.h b/arm_compute/graph/nodes/FullyConnectedLayerNode.h
index 166751b8fa..3d1b68909a 100644
--- a/arm_compute/graph/nodes/FullyConnectedLayerNode.h
+++ b/arm_compute/graph/nodes/FullyConnectedLayerNode.h
@@ -39,29 +39,28 @@ public:
      * @param[in] num_outputs Number of neurons in the layer
      */
     FullyConnectedLayerNode(unsigned int num_outputs);
-    /** Computes weights shape
+    /** Computes weights descriptor
      *
      * @warning Works for inputs with 1D batch space
      *
-     * @param[in] input_shape Input shape
-     * @param[in] num_outputs Number of output neurons
+     * @param[in] input_descriptor Input descriptor
+     * @param[in] num_outputs      Number of output neurons
      *
-     * @return Weights shape
+     * @return Weights descriptor
      */
-    static TensorShape compute_weights_shape(TensorShape input_shape, unsigned int num_outputs);
-    /** Computes fully connected layer output shape
+    static TensorDescriptor compute_weights_descriptor(const TensorDescriptor &input_descriptor, unsigned int num_outputs);
+    /** Computes fully connected layer output descriptor
      *
      * @warning Works for inputs with 1D batch space
      *
-     * @param[in] input_shape Input shape
-     * @param[in] num_outputs Number of output neurons
+     * @param[in] input_descriptor Input descriptor
+     * @param[in] num_outputs      Number of output neurons
      *
-     * @return Output shape
+     * @return Output descriptor
      */
-    static TensorShape compute_output_shape(TensorShape input_shape, unsigned int num_outputs);
+    static TensorDescriptor compute_output_descriptor(const TensorDescriptor &input_descriptor, unsigned int num_outputs);
 
     // Inherited overridden methods:
-    Status           validate() override;
     NodeType         type() const override;
     bool             forward_descriptors() override;
     TensorDescriptor configure_output(size_t idx) const override;
diff --git a/arm_compute/graph/nodes/InputNode.h b/arm_compute/graph/nodes/InputNode.h
index cacea95ab8..4297c8aba5 100644
--- a/arm_compute/graph/nodes/InputNode.h
+++ b/arm_compute/graph/nodes/InputNode.h
@@ -41,7 +41,6 @@ public:
     InputNode(TensorDescriptor desc);
 
     // Inherited overridden methods:
-    Status           validate() override;
     NodeType         type() const override;
     bool             forward_descriptors() override;
     TensorDescriptor configure_output(size_t idx) const override;
diff --git a/arm_compute/graph/nodes/NormalizationLayerNode.h b/arm_compute/graph/nodes/NormalizationLayerNode.h
index 34dc3ccf8f..43040e15a2 100644
--- a/arm_compute/graph/nodes/NormalizationLayerNode.h
+++ b/arm_compute/graph/nodes/NormalizationLayerNode.h
@@ -46,7 +46,6 @@ public:
     NormalizationLayerInfo normalization_info() const;
 
     // Inherited overridden methods:
-    Status           validate() override;
     NodeType         type() const override;
     bool             forward_descriptors() override;
     TensorDescriptor configure_output(size_t idx) const override;
diff --git a/arm_compute/graph/nodes/OutputNode.h b/arm_compute/graph/nodes/OutputNode.h
index 46988cf969..03d41eba6e 100644
--- a/arm_compute/graph/nodes/OutputNode.h
+++ b/arm_compute/graph/nodes/OutputNode.h
@@ -38,7 +38,6 @@ public:
     OutputNode();
 
     // Inherited overridden methods:
-    Status           validate() override;
     NodeType         type() const override;
     bool             forward_descriptors() override;
     TensorDescriptor configure_output(size_t idx) const override;
diff --git a/arm_compute/graph/nodes/PoolingLayerNode.h b/arm_compute/graph/nodes/PoolingLayerNode.h
index e250eb247a..d037ea25ab 100644
--- a/arm_compute/graph/nodes/PoolingLayerNode.h
+++ b/arm_compute/graph/nodes/PoolingLayerNode.h
@@ -44,17 +44,16 @@ public:
      * @return Pooling Layer info
      */
     PoolingLayerInfo pooling_info() const;
-    /** Computes pooling output shape
+    /** Computes pooling output descriptor
      *
-     * @param[in] input_shape Input shape
-     * @param[in] info        Pooling operation attributes
+     * @param[in] input_descriptor Input descriptor
+     * @param[in] info             Pooling operation attributes
      *
-     * @return Output shape
+     * @return Output descriptor
      */
-    static TensorShape compute_output_shape(TensorShape input_shape, PoolingLayerInfo info);
+    static TensorDescriptor compute_output_descriptor(const TensorDescriptor &input_descriptor, PoolingLayerInfo info);
 
     // Inherited overridden methods:
-    Status           validate() override;
     NodeType         type() const override;
     bool             forward_descriptors() override;
     TensorDescriptor configure_output(size_t idx) const override;
diff --git a/arm_compute/graph/nodes/ReshapeLayerNode.h b/arm_compute/graph/nodes/ReshapeLayerNode.h
index ded344e041..5161af866d 100644
--- a/arm_compute/graph/nodes/ReshapeLayerNode.h
+++ b/arm_compute/graph/nodes/ReshapeLayerNode.h
@@ -41,7 +41,6 @@ public:
     ReshapeLayerNode(TensorShape shape);
 
     // Inherited overridden methods:
-    Status           validate() override;
     NodeType         type() const override;
     bool             forward_descriptors() override;
     TensorDescriptor configure_output(size_t idx) const override;
diff --git a/arm_compute/graph/nodes/SoftmaxLayerNode.h b/arm_compute/graph/nodes/SoftmaxLayerNode.h
index 8b716047ff..6ace58d89b 100644
--- a/arm_compute/graph/nodes/SoftmaxLayerNode.h
+++ b/arm_compute/graph/nodes/SoftmaxLayerNode.h
@@ -46,7 +46,6 @@ public:
     float beta() const;
 
     // Inherited overridden methods:
-    Status           validate() override;
     NodeType         type() const override;
     bool             forward_descriptors() override;
     TensorDescriptor configure_output(size_t idx) const override;
diff --git a/arm_compute/graph/nodes/SplitLayerNode.h b/arm_compute/graph/nodes/SplitLayerNode.h
index 923b3d1fa6..abd28ae5e3 100644
--- a/arm_compute/graph/nodes/SplitLayerNode.h
+++ b/arm_compute/graph/nodes/SplitLayerNode.h
@@ -42,16 +42,17 @@ public:
      * @param[in] axis       (Optional) Axis to split on. Supported axis >= 2. Defaults to 0
      */
     SplitLayerNode(unsigned int num_splits, unsigned int axis = 0);
-    /** Computes split layer output shape
+    /** Computes split layer output descriptor
      *
-     * @param[in] input_shape Shape of the input
-     * @param[in] num_splits  Number of splits
-     * @param[in] axis        Axis to perform the split on
-     * @param[in] idx         Index of the split
+     * @param[in] input_descriptor Descriptor of the input tensor
+     * @param[in] num_splits       Number of splits
+     * @param[in] axis             Axis to perform the split on
+     * @param[in] idx              Index of the split
      *
-     * @return  A pair with the shape of the split and the starting coordinates
+     * @return  A pair with the descriptor of the split and the starting coordinates
      */
-    static std::pair<TensorShape, Coordinates> compute_output_shape(TensorShape input_shape, unsigned int num_splits, unsigned int axis, unsigned int idx);
+    static std::pair<TensorDescriptor, Coordinates> compute_output_descriptor(const TensorDescriptor &input_descriptor,
+                                                                              unsigned int num_splits, unsigned int axis, unsigned int idx);
     /** Number of splits accessor
      *
      * @return Number of splits
@@ -64,7 +65,7 @@ public:
     unsigned int axis() const;
 
     // Inherited overridden methods:
-    Status           validate() override;
+    Status           validate() const override;
     NodeType         type() const override;
     bool             forward_descriptors() override;
     TensorDescriptor configure_output(size_t idx) const override;
diff --git a/examples/graph_mobilenet.cpp b/examples/graph_mobilenet.cpp
index 743dd1a50c..6e2921a8a6 100644
--- a/examples/graph_mobilenet.cpp
+++ b/examples/graph_mobilenet.cpp
@@ -58,47 +58,62 @@ public:
         // Set model to execute. 0 (MobileNetV1_1.0_224), 1 (MobileNetV1_0.75_160)
         int model_id = (argc > 2) ? std::strtol(argv[2], nullptr, 10) : 0;
         ARM_COMPUTE_ERROR_ON_MSG(model_id > 1, "Invalid model ID. Model must be 0 (MobileNetV1_1.0_224) or 1 (MobileNetV1_0.75_160)");
-        float        depth_scale  = (model_id == 0) ? 1.f : 0.75;
-        unsigned int spatial_size = (model_id == 0) ? 224 : 160;
-        std::string  model_path   = (model_id == 0) ? "/cnn_data/mobilenet_v1_1_224_model/" : "/cnn_data/mobilenet_v1_075_160_model/";
+        int layout_id = (argc > 3) ? std::strtol(argv[3], nullptr, 10) : 0;
+        ARM_COMPUTE_ERROR_ON_MSG(layout_id > 1, "Invalid layout ID. Layout must be 0 (NCHW) or 1 (NHWC)");
+
+        float            depth_scale           = (model_id == 0) ? 1.f : 0.75;
+        unsigned int     spatial_size          = (model_id == 0) ? 224 : 160;
+        std::string      model_path            = (model_id == 0) ? "/cnn_data/mobilenet_v1_1_224_model/" : "/cnn_data/mobilenet_v1_075_160_model/";
+        TensorDescriptor input_descriptor_nchw = TensorDescriptor(TensorShape(spatial_size, spatial_size, 3U, 1U), DataType::F32);
+        TensorDescriptor input_descriptor_nhwc = TensorDescriptor(TensorShape(3U, spatial_size, spatial_size, 1U), DataType::F32).set_layout(DataLayout::NHWC);
+        TensorDescriptor input_descriptor      = (layout_id == 0) ? input_descriptor_nchw : input_descriptor_nhwc;
 
         // Parse arguments
         if(argc < 2)
         {
             // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [model] [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " [target] [model] [layout] [path_to_data] [image] [labels]\n\n";
             std::cout << "No model ID provided: using MobileNetV1_1.0_224\n\n";
+            std::cout << "No data layout provided: using NCHW\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 2)
         {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [model] [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [model] [layout] [path_to_data] [image] [labels]\n\n";
             std::cout << "No model ID provided: using MobileNetV1_1.0_224\n\n";
+            std::cout << "No data layout provided: using NCHW\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 3)
         {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [layout] [path_to_data] [image] [labels]\n\n";
+            std::cout << "No data layout provided: using NCHW\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 4)
         {
-            data_path = argv[3];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [image] [labels]\n\n";
-            std::cout << "No image provided: using random values\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [path_to_data] [image] [labels]\n\n";
+            std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 5)
         {
-            data_path = argv[3];
-            image     = argv[4];
+            data_path = argv[4];
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [image] [labels]\n\n";
+            std::cout << "No image provided: using random values\n\n";
+            std::cout << "No text file with labels provided: skipping output accessor\n\n";
+        }
+        else if(argc == 6)
+        {
+            data_path = argv[4];
+            image     = argv[5];
             std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n";
             std::cout << "No text file with labels provided: skipping output accessor\n\n";
         }
         else
         {
-            data_path = argv[3];
-            image     = argv[4];
-            label     = argv[5];
+            data_path = argv[4];
+            image     = argv[5];
+            label     = argv[6];
         }
 
         // Add model path to data path
@@ -110,11 +125,11 @@ public:
         graph << target_hint
               << convolution_hint
               << depthwise_convolution_hint
-              << InputLayer(TensorDescriptor(TensorShape(spatial_size, spatial_size, 3U, 1U), DataType::F32),
+              << InputLayer(input_descriptor,
                             get_input_accessor(image, std::move(preprocessor), false))
               << ConvolutionLayer(
                   3U, 3U, 32U * depth_scale,
-                  get_weights_accessor(data_path, "Conv2d_0_weights.npy"),
+                  get_weights_accessor(data_path, "Conv2d_0_weights.npy", DataLayout::NCHW),
                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                   PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))
               << BatchNormalizationLayer(
@@ -140,7 +155,7 @@ public:
         graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG))
               << ConvolutionLayer(
                   1U, 1U, 1001U,
-                  get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_weights.npy"),
+                  get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_weights.npy", DataLayout::NCHW),
                   get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_biases.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               << ReshapeLayer(TensorShape(1001U))
@@ -170,7 +185,7 @@ private:
         SubStream   sg(graph);
         sg << DepthwiseConvolutionLayer(
                3U, 3U,
-               get_weights_accessor(data_path, total_path + "depthwise_depthwise_weights.npy"),
+               get_weights_accessor(data_path, total_path + "depthwise_depthwise_weights.npy", DataLayout::NCHW),
                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                dwc_pad_stride_info)
            << BatchNormalizationLayer(
@@ -182,7 +197,7 @@ private:
            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))
            << ConvolutionLayer(
                1U, 1U, conv_filt,
-               get_weights_accessor(data_path, total_path + "pointwise_weights.npy"),
+               get_weights_accessor(data_path, total_path + "pointwise_weights.npy", DataLayout::NCHW),
                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                conv_pad_stride_info)
            << BatchNormalizationLayer(
diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
index ffb6d08993..be706e2e83 100644
--- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
@@ -2719,7 +2719,7 @@ void NEPoolingLayerKernel::run(const Window &window, const ThreadInfo &info)
     }
     else
     {
-        window_input.set(Window::DimX, Window::Dimension(0, _input->info()->dimension(0), _num_elems_processed_per_iteration));
+        window_input.set(Window::DimX, Window::Dimension(window.x().start(), window.x().end(), _num_elems_processed_per_iteration));
         window_input.set(Window::DimY, Window::Dimension(0, _input->info()->dimension(1), pool_stride_x));
         window_input.set(Window::DimZ, Window::Dimension(0, _input->info()->dimension(2), pool_stride_y));
     }
diff --git a/src/graph/GraphBuilder.cpp b/src/graph/GraphBuilder.cpp
index 4ad34e789c..56b31c7844 100644
--- a/src/graph/GraphBuilder.cpp
+++ b/src/graph/GraphBuilder.cpp
@@ -63,7 +63,7 @@ Status set_accessor_on_node(Graph &g, NodeID nid, bool is_output, size_t idx, IT
 NodeID add_const_node_with_name(Graph &g, NodeParams params, const std::string &name, TensorDescriptor desc, ITensorAccessorUPtr accessor)
 {
     params.name = params.name.empty() ? "" : params.name + name;
-    auto nid    = GraphBuilder::add_const_node(g, params, desc, std::move(accessor));
+    auto nid    = GraphBuilder::add_const_node(g, params, std::move(desc), std::move(accessor));
     set_node_params(g, nid, params);
     return nid;
 }
@@ -165,7 +165,7 @@ NodeID GraphBuilder::add_batch_normalization_node(Graph &g, NodeParams params, N
 
     // Calculate Common Descriptor
     TensorDescriptor common_desc = input_tensor_desc;
-    common_desc.shape            = TensorShape(common_desc.shape.z());
+    common_desc.shape            = TensorShape(get_dimension_size(input_tensor_desc, DataLayoutDimension::CHANNEL));
 
     // Create mean and nodes
     auto mean_nid = add_const_node_with_name(g, params, "Mean", common_desc, std::move(mean_accessor));
@@ -221,8 +221,11 @@ NodeID GraphBuilder::add_convolution_node(Graph &g, NodeParams params, NodeIdxPa
 
     // Create weights node
     TensorDescriptor w_desc = input_tensor_desc;
-    w_desc.shape            = TensorShape(kernel_spatial_extend.width, kernel_spatial_extend.height, w_desc.shape.z() / num_groups, depth);
-
+    w_desc.shape.set(get_dimension_idx(input_tensor_desc, DataLayoutDimension::WIDTH), kernel_spatial_extend.width);
+    w_desc.shape.set(get_dimension_idx(input_tensor_desc, DataLayoutDimension::HEIGHT), kernel_spatial_extend.height);
+    w_desc.shape.set(get_dimension_idx(input_tensor_desc, DataLayoutDimension::CHANNEL),
+                     get_dimension_size(input_tensor_desc, DataLayoutDimension::CHANNEL) / num_groups);
+    w_desc.shape.set(get_dimension_idx(input_tensor_desc, DataLayoutDimension::BATCHES), depth);
     if(!weights_quant_info.empty())
     {
         w_desc.quant_info = weights_quant_info;
@@ -290,8 +293,10 @@ NodeID GraphBuilder::add_depthwise_convolution_node(Graph &g, NodeParams params,
 
     // Create weights node
     TensorDescriptor w_desc = input_tensor_desc;
-    w_desc.shape            = TensorShape(kernel_spatial_extend.width, kernel_spatial_extend.height, w_desc.shape.z());
-
+    w_desc.shape.set(get_dimension_idx(input_tensor_desc, DataLayoutDimension::WIDTH), kernel_spatial_extend.width);
+    w_desc.shape.set(get_dimension_idx(input_tensor_desc, DataLayoutDimension::HEIGHT), kernel_spatial_extend.height);
+    w_desc.shape.set(get_dimension_idx(input_tensor_desc, DataLayoutDimension::CHANNEL),
+                     get_dimension_size(input_tensor_desc, DataLayoutDimension::CHANNEL));
     if(!quant_info.empty())
     {
         w_desc.quant_info = quant_info;
@@ -353,9 +358,8 @@ NodeID GraphBuilder::add_fully_connected_layer(Graph &g, NodeParams params, Node
     const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]);
 
     // Create weights node
-    TensorDescriptor w_desc = input_tensor_desc;
-    w_desc.shape            = FullyConnectedLayerNode::compute_weights_shape(input_tensor_desc.shape, num_outputs);
-    NodeID w_nid            = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor));
+    TensorDescriptor w_desc = FullyConnectedLayerNode::compute_weights_descriptor(input_tensor_desc, num_outputs);
+    NodeID           w_nid  = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor));
 
     // Create bias nodes
     NodeID b_nid = EmptyNodeID;
diff --git a/src/graph/GraphManager.cpp b/src/graph/GraphManager.cpp
index fa7dfdf8f8..aac6488311 100644
--- a/src/graph/GraphManager.cpp
+++ b/src/graph/GraphManager.cpp
@@ -62,8 +62,6 @@ void GraphManager::finalize_graph(Graph &graph, GraphContext &ctx, PassManager &
     // Apply all mutating passes
     pm.run_all(graph);
 
-    // TODO (geopin01): Perform a graph validation
-
     // Perform topological sort
     // FIXME : Sort nodes and pass sorted indices in configure all nodes
 
diff --git a/src/graph/INode.cpp b/src/graph/INode.cpp
index c1c18e5853..cd9a46ac40 100644
--- a/src/graph/INode.cpp
+++ b/src/graph/INode.cpp
@@ -42,6 +42,11 @@ INode::INode()
 // clang-format on
 // *INDENT-ON*
 
+Status INode::validate() const
+{
+    return Status{};
+}
+
 void INode::set_graph(Graph *g)
 {
     ARM_COMPUTE_ERROR_ON(g == nullptr);
diff --git a/src/graph/Utils.cpp b/src/graph/Utils.cpp
index 8537bbfb2a..030fa2df59 100644
--- a/src/graph/Utils.cpp
+++ b/src/graph/Utils.cpp
@@ -89,10 +89,6 @@ PassManager create_default_pass_manager(Target target)
     return pm;
 }
 
-/** Default setups a graph Context
- *
- * @param[in] ctx Context to default initialize
- */
 void setup_default_graph_context(GraphContext &ctx)
 {
     for(const auto &backend : backends::BackendRegistry::get().backends())
@@ -100,5 +96,40 @@ void setup_default_graph_context(GraphContext &ctx)
         backend.second->setup_backend_context(ctx);
     }
 }
+
+size_t get_dimension_size(const TensorDescriptor &descriptor, const DataLayoutDimension data_layout_dimension)
+{
+    ARM_COMPUTE_ERROR_ON_MSG(descriptor.layout == DataLayout::UNKNOWN, "Cannot retrieve the dimension index for an unknown layout!");
+    return descriptor.shape[get_dimension_idx(descriptor, data_layout_dimension)];
+}
+
+size_t get_dimension_idx(const TensorDescriptor &descriptor, const DataLayoutDimension data_layout_dimension)
+{
+    ARM_COMPUTE_ERROR_ON_MSG(descriptor.layout == DataLayout::UNKNOWN, "Cannot retrieve the dimension index for an unknown layout!");
+
+    /* Return the index based on the data layout
+     * [N C H W]
+     * [3 2 1 0]
+     * [N H W C]
+     */
+    switch(data_layout_dimension)
+    {
+        case DataLayoutDimension::CHANNEL:
+            return (descriptor.layout == DataLayout::NCHW) ? 2 : 0;
+            break;
+        case DataLayoutDimension::HEIGHT:
+            return (descriptor.layout == DataLayout::NCHW) ? 1 : 2;
+            break;
+        case DataLayoutDimension::WIDTH:
+            return (descriptor.layout == DataLayout::NCHW) ? 0 : 1;
+            break;
+        case DataLayoutDimension::BATCHES:
+            return 3;
+            break;
+        default:
+            ARM_COMPUTE_ERROR("Data layout index not supported!");
+            break;
+    }
+}
 } // namespace graph
 } // namespace arm_compute
 \ No newline at end of file
diff --git a/src/graph/backends/CL/CLDeviceBackend.cpp b/src/graph/backends/CL/CLDeviceBackend.cpp
index 92cb6936c3..37cbcd72d7 100644
--- a/src/graph/backends/CL/CLDeviceBackend.cpp
+++ b/src/graph/backends/CL/CLDeviceBackend.cpp
@@ -127,7 +127,8 @@ std::unique_ptr<ITensorHandle> CLDeviceBackend::create_tensor(const Tensor &tens
 
     // Create backend tensor handle
     TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type, tensor_desc.quant_info);
-    auto       backend_tensor_handle = support::cpp14::make_unique<CLTensorHandle>(info);
+    info.set_data_layout(tensor_desc.layout);
+    auto backend_tensor_handle = support::cpp14::make_unique<CLTensorHandle>(info);
 
     return std::move(backend_tensor_handle);
 }
diff --git a/src/graph/backends/GLES/GCDeviceBackend.cpp b/src/graph/backends/GLES/GCDeviceBackend.cpp
index a55215f058..0185598965 100644
--- a/src/graph/backends/GLES/GCDeviceBackend.cpp
+++ b/src/graph/backends/GLES/GCDeviceBackend.cpp
@@ -88,7 +88,8 @@ std::unique_ptr<ITensorHandle> GCDeviceBackend::create_tensor(const Tensor &tens
 
     // Create backend tensor handle
     TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type, tensor_desc.quant_info);
-    auto       backend_tensor_handle = support::cpp14::make_unique<GCTensorHandle>(info);
+    info.set_data_layout(tensor_desc.layout);
+    auto backend_tensor_handle = support::cpp14::make_unique<GCTensorHandle>(info);
 
     return std::move(backend_tensor_handle);
 }
diff --git a/src/graph/backends/NEON/NEDeviceBackend.cpp b/src/graph/backends/NEON/NEDeviceBackend.cpp
index 9123196540..def6c39003 100644
--- a/src/graph/backends/NEON/NEDeviceBackend.cpp
+++ b/src/graph/backends/NEON/NEDeviceBackend.cpp
@@ -94,7 +94,8 @@ std::unique_ptr<ITensorHandle> NEDeviceBackend::create_tensor(const Tensor &tens
 
     // Create backend tensor handle
     TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type, tensor_desc.quant_info);
-    auto       backend_tensor_handle = support::cpp14::make_unique<NETensorHandle>(info);
+    info.set_data_layout(tensor_desc.layout);
+    auto backend_tensor_handle = support::cpp14::make_unique<NETensorHandle>(info);
 
     return std::move(backend_tensor_handle);
 }
diff --git a/src/graph/detail/ExecutionHelpers.cpp b/src/graph/detail/ExecutionHelpers.cpp
index 0bb47f2b33..c1304436f6 100644
--- a/src/graph/detail/ExecutionHelpers.cpp
+++ b/src/graph/detail/ExecutionHelpers.cpp
@@ -43,6 +43,24 @@ void default_initialize_backends()
     }
 }
 
+void validate_all_nodes(Graph &g)
+{
+    auto &nodes = g.nodes();
+
+    // Create tasks
+    for(auto &node : nodes)
+    {
+        if(node != nullptr)
+        {
+            Target assigned_target = node->assigned_target();
+            auto   backend         = backends::BackendRegistry::get().find_backend(assigned_target);
+            ARM_COMPUTE_ERROR_ON_MSG(!backend, "Requested backend doesn't exist!");
+            Status status = backend->validate_node(*node);
+            ARM_COMPUTE_ERROR_ON_MSG(!bool(status), status.error_description().c_str());
+        }
+    }
+}
+
 void configure_all_tensors(Graph &g)
 {
     auto &tensors = g.tensors();
@@ -121,24 +139,6 @@ void allocate_all_tensors(Graph &g)
     }
 }
 
-void validate_all_nodes(Graph &g)
-{
-    auto &nodes = g.nodes();
-
-    // Create tasks
-    for(auto &node : nodes)
-    {
-        if(node != nullptr)
-        {
-            Target assigned_target = node->assigned_target();
-            auto   backend         = backends::BackendRegistry::get().find_backend(assigned_target);
-            ARM_COMPUTE_ERROR_ON_MSG(!backend, "Requested backend doesn't exist!");
-            Status status = backend->validate_node(*node);
-            ARM_COMPUTE_ERROR_ON_MSG(!bool(status), status.error_description().c_str());
-        }
-    }
-}
-
 ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx)
 {
     ExecutionWorkload workload;
diff --git a/src/graph/mutators/SplitLayerSubTensorMutator.cpp b/src/graph/mutators/SplitLayerSubTensorMutator.cpp
index 179a6c35fb..2a8c029843 100644
--- a/src/graph/mutators/SplitLayerSubTensorMutator.cpp
+++ b/src/graph/mutators/SplitLayerSubTensorMutator.cpp
@@ -75,7 +75,7 @@ void SplitLayerSubTensorMutator::mutate(Graph &g)
                     Tensor           *output_tensor = node->output(i);
                     const TensorShape output_shape  = output_tensor->desc().shape;
                     Coordinates       coords;
-                    std::tie(std::ignore, coords) = SplitLayerNode::compute_output_shape(input_tensor->desc().shape, num_splits, axis, i);
+                    std::tie(std::ignore, coords) = SplitLayerNode::compute_output_descriptor(input_tensor->desc(), num_splits, axis, i);
 
                     backends::IDeviceBackend      *backend = backends::BackendRegistry::get().find_backend(output_tensor->desc().target);
                     std::unique_ptr<ITensorHandle> handle  = backend->create_subtensor(input_tensor->handle(), output_shape, coords, extend_parent);
diff --git a/src/graph/nodes/ActivationLayerNode.cpp b/src/graph/nodes/ActivationLayerNode.cpp
index 9996d2ce3f..414684cf30 100644
--- a/src/graph/nodes/ActivationLayerNode.cpp
+++ b/src/graph/nodes/ActivationLayerNode.cpp
@@ -65,11 +65,6 @@ TensorDescriptor ActivationLayerNode::configure_output(size_t idx) const
     return src->desc();
 }
 
-Status ActivationLayerNode::validate()
-{
-    return Status{};
-}
-
 NodeType ActivationLayerNode::type() const
 {
     return NodeType::ActivationLayer;
diff --git a/src/graph/nodes/BatchNormalizationLayerNode.cpp b/src/graph/nodes/BatchNormalizationLayerNode.cpp
index f7b041c828..3ae11fc24d 100644
--- a/src/graph/nodes/BatchNormalizationLayerNode.cpp
+++ b/src/graph/nodes/BatchNormalizationLayerNode.cpp
@@ -76,11 +76,6 @@ TensorDescriptor BatchNormalizationLayerNode::configure_output(size_t idx) const
     return src->desc();
 }
 
-Status BatchNormalizationLayerNode::validate()
-{
-    return Status{};
-}
-
 NodeType BatchNormalizationLayerNode::type() const
 {
     return NodeType::BatchNormalizationLayer;
diff --git a/src/graph/nodes/ConstNode.cpp b/src/graph/nodes/ConstNode.cpp
index 631971c98f..2f3cd142af 100644
--- a/src/graph/nodes/ConstNode.cpp
+++ b/src/graph/nodes/ConstNode.cpp
@@ -31,7 +31,7 @@ namespace arm_compute
 namespace graph
 {
 ConstNode::ConstNode(TensorDescriptor desc)
-    : _desc(desc)
+    : _desc(std::move(desc))
 {
     _outputs.resize(1, NullTensorID);
 }
@@ -54,11 +54,6 @@ TensorDescriptor ConstNode::configure_output(size_t idx) const
     return _desc;
 }
 
-Status ConstNode::validate()
-{
-    return Status{};
-}
-
 NodeType ConstNode::type() const
 {
     return NodeType::Const;
diff --git a/src/graph/nodes/ConvolutionLayerNode.cpp b/src/graph/nodes/ConvolutionLayerNode.cpp
index eb0c6a1c1a..eaf1f7f035 100644
--- a/src/graph/nodes/ConvolutionLayerNode.cpp
+++ b/src/graph/nodes/ConvolutionLayerNode.cpp
@@ -26,6 +26,7 @@
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/INodeVisitor.h"
+#include "arm_compute/graph/Utils.h"
 
 namespace arm_compute
 {
@@ -53,18 +54,26 @@ PadStrideInfo ConvolutionLayerNode::convolution_info() const
     return _info;
 }
 
-TensorShape ConvolutionLayerNode::compute_output_shape(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo info)
+TensorDescriptor ConvolutionLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
+                                                                 const TensorDescriptor &weights_descriptor,
+                                                                 const PadStrideInfo    &info)
 {
     unsigned int output_width  = 0;
     unsigned int output_height = 0;
-    std::tie(output_width, output_height) = scaled_dimensions(input_shape.x(), input_shape.y(), weights_shape.x(), weights_shape.y(), info);
 
-    TensorShape output_shape{ input_shape };
-    output_shape.set(0, output_width);
-    output_shape.set(1, output_height);
-    output_shape.set(2, weights_shape[3]);
+    const unsigned int input_width   = get_dimension_size(input_descriptor, DataLayoutDimension::WIDTH);
+    const unsigned int input_height  = get_dimension_size(input_descriptor, DataLayoutDimension::HEIGHT);
+    const unsigned int kernel_width  = get_dimension_size(weights_descriptor, DataLayoutDimension::WIDTH);
+    const unsigned int kernel_height = get_dimension_size(weights_descriptor, DataLayoutDimension::HEIGHT);
 
-    return output_shape;
+    std::tie(output_width, output_height) = scaled_dimensions(input_width, input_height, kernel_width, kernel_height, info);
+
+    TensorDescriptor output_descriptor = input_descriptor;
+    output_descriptor.shape.set(get_dimension_idx(output_descriptor, DataLayoutDimension::WIDTH), output_width);
+    output_descriptor.shape.set(get_dimension_idx(output_descriptor, DataLayoutDimension::HEIGHT), output_height);
+    output_descriptor.shape.set(get_dimension_idx(output_descriptor, DataLayoutDimension::CHANNEL), weights_descriptor.shape[3]);
+
+    return output_descriptor;
 }
 
 bool ConvolutionLayerNode::forward_descriptors()
@@ -87,10 +96,7 @@ TensorDescriptor ConvolutionLayerNode::configure_output(size_t idx) const
 
     ARM_COMPUTE_ERROR_ON(src == nullptr || weights == nullptr);
 
-    TensorDescriptor output_info  = src->desc();
-    TensorShape      output_shape = compute_output_shape(src->desc().shape, weights->desc().shape, _info);
-    output_info.shape             = output_shape;
-
+    TensorDescriptor output_info = compute_output_descriptor(src->desc(), weights->desc(), _info);
     if(!_out_quant_info.empty())
     {
         output_info.quant_info = _out_quant_info;
@@ -99,11 +105,6 @@ TensorDescriptor ConvolutionLayerNode::configure_output(size_t idx) const
     return output_info;
 }
 
-Status ConvolutionLayerNode::validate()
-{
-    return Status{};
-}
-
 NodeType ConvolutionLayerNode::type() const
 {
     return NodeType::ConvolutionLayer;
diff --git a/src/graph/nodes/DepthConcatenateLayerNode.cpp b/src/graph/nodes/DepthConcatenateLayerNode.cpp
index 1c0539744f..08cccc1ff1 100644
--- a/src/graph/nodes/DepthConcatenateLayerNode.cpp
+++ b/src/graph/nodes/DepthConcatenateLayerNode.cpp
@@ -34,7 +34,7 @@ namespace graph
 DepthConcatenateLayerNode::DepthConcatenateLayerNode(unsigned int total_nodes)
     : _total_nodes(total_nodes), _is_enabled(true)
 {
-    _input_edges.resize(total_nodes, EmptyEdgeID);
+    _input_edges.resize(_total_nodes, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
 }
 
@@ -48,28 +48,28 @@ bool DepthConcatenateLayerNode::is_enabled() const
     return _is_enabled;
 }
 
-TensorShape DepthConcatenateLayerNode::compute_output_shape(const std::vector<TensorShape> &input_shapes)
+TensorDescriptor DepthConcatenateLayerNode::compute_output_descriptor(const std::vector<TensorDescriptor> &input_descriptors)
 {
-    ARM_COMPUTE_ERROR_ON(input_shapes.size() == 0);
+    ARM_COMPUTE_ERROR_ON(input_descriptors.size() == 0);
 
-    TensorShape output_shape = input_shapes[0];
+    TensorDescriptor output_descriptor = input_descriptors[0];
 
     size_t max_x = 0;
     size_t max_y = 0;
     size_t depth = 0;
 
-    for(const auto &shape : input_shapes)
+    for(const auto &input_descriptor : input_descriptors)
     {
-        max_x = std::max(shape.x(), max_x);
-        max_y = std::max(shape.y(), max_y);
-        depth += shape.z();
+        max_x = std::max(input_descriptor.shape.x(), max_x);
+        max_y = std::max(input_descriptor.shape.y(), max_y);
+        depth += input_descriptor.shape.z();
     }
 
-    output_shape.set(0, max_x);
-    output_shape.set(1, max_y);
-    output_shape.set(2, depth);
+    output_descriptor.shape.set(0, max_x);
+    output_descriptor.shape.set(1, max_y);
+    output_descriptor.shape.set(2, depth);
 
-    return output_shape;
+    return output_descriptor;
 }
 
 bool DepthConcatenateLayerNode::forward_descriptors()
@@ -99,27 +99,19 @@ TensorDescriptor DepthConcatenateLayerNode::configure_output(size_t idx) const
 
     if(are_all_inputs_set)
     {
-        std::vector<TensorShape> inputs_shapes;
+        std::vector<TensorDescriptor> inputs_descriptors;
         for(unsigned int i = 0; i < _input_edges.size(); ++i)
         {
             const Tensor *t = _graph->tensor(input_id(i));
             ARM_COMPUTE_ERROR_ON(t == nullptr);
-            inputs_shapes.push_back(t->desc().shape);
+            inputs_descriptors.push_back(t->desc());
         }
-        output_info              = input(0)->desc();
-        TensorShape output_shape = compute_output_shape(inputs_shapes);
-        output_info.shape        = output_shape;
+        output_info = compute_output_descriptor(inputs_descriptors);
     }
 
     return output_info;
 }
 
-Status DepthConcatenateLayerNode::validate()
-{
-    ARM_COMPUTE_UNUSED(_total_nodes);
-    return Status{};
-}
-
 NodeType DepthConcatenateLayerNode::type() const
 {
     return NodeType::DepthConcatenateLayer;
diff --git a/src/graph/nodes/DepthwiseConvolutionLayerNode.cpp b/src/graph/nodes/DepthwiseConvolutionLayerNode.cpp
index 67a39029e6..1a6f8d398d 100644
--- a/src/graph/nodes/DepthwiseConvolutionLayerNode.cpp
+++ b/src/graph/nodes/DepthwiseConvolutionLayerNode.cpp
@@ -26,6 +26,7 @@
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/INodeVisitor.h"
+#include "arm_compute/graph/Utils.h"
 
 namespace arm_compute
 {
@@ -53,17 +54,25 @@ PadStrideInfo DepthwiseConvolutionLayerNode::convolution_info() const
     return _info;
 }
 
-TensorShape DepthwiseConvolutionLayerNode::compute_output_shape(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo info)
+TensorDescriptor DepthwiseConvolutionLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
+                                                                          const TensorDescriptor &weights_descriptor,
+                                                                          const PadStrideInfo    &info)
 {
     unsigned int output_width  = 0;
     unsigned int output_height = 0;
-    std::tie(output_width, output_height) = scaled_dimensions(input_shape.x(), input_shape.y(), weights_shape.x(), weights_shape.y(), info);
 
-    TensorShape output_shape{ input_shape };
-    output_shape.set(0, output_width);
-    output_shape.set(1, output_height);
+    const unsigned int input_width   = get_dimension_size(input_descriptor, DataLayoutDimension::WIDTH);
+    const unsigned int input_height  = get_dimension_size(input_descriptor, DataLayoutDimension::HEIGHT);
+    const unsigned int kernel_width  = get_dimension_size(weights_descriptor, DataLayoutDimension::WIDTH);
+    const unsigned int kernel_height = get_dimension_size(weights_descriptor, DataLayoutDimension::HEIGHT);
 
-    return output_shape;
+    std::tie(output_width, output_height) = scaled_dimensions(input_width, input_height, kernel_width, kernel_height, info);
+
+    TensorDescriptor output_descriptor = input_descriptor;
+    output_descriptor.shape.set(get_dimension_idx(output_descriptor, DataLayoutDimension::WIDTH), output_width);
+    output_descriptor.shape.set(get_dimension_idx(output_descriptor, DataLayoutDimension::HEIGHT), output_height);
+
+    return output_descriptor;
 }
 
 bool DepthwiseConvolutionLayerNode::forward_descriptors()
@@ -86,15 +95,7 @@ TensorDescriptor DepthwiseConvolutionLayerNode::configure_output(size_t idx) con
 
     ARM_COMPUTE_ERROR_ON(src == nullptr || weights == nullptr);
 
-    TensorDescriptor output_info  = src->desc();
-    TensorShape      output_shape = compute_output_shape(src->desc().shape, weights->desc().shape, _info);
-    output_info.shape             = output_shape;
-    return output_info;
-}
-
-Status DepthwiseConvolutionLayerNode::validate()
-{
-    return Status{};
+    return compute_output_descriptor(src->desc(), weights->desc(), _info);
 }
 
 NodeType DepthwiseConvolutionLayerNode::type() const
diff --git a/src/graph/nodes/EltwiseLayerNode.cpp b/src/graph/nodes/EltwiseLayerNode.cpp
index b794043f2f..6f1e0eecd9 100644
--- a/src/graph/nodes/EltwiseLayerNode.cpp
+++ b/src/graph/nodes/EltwiseLayerNode.cpp
@@ -65,11 +65,6 @@ TensorDescriptor EltwiseLayerNode::configure_output(size_t idx) const
     return src->desc();
 }
 
-Status EltwiseLayerNode::validate()
-{
-    return Status{};
-}
-
 NodeType EltwiseLayerNode::type() const
 {
     return NodeType::EltwiseLayer;
diff --git a/src/graph/nodes/FlattenLayerNode.cpp b/src/graph/nodes/FlattenLayerNode.cpp
index 8b847c7056..78b45dc305 100644
--- a/src/graph/nodes/FlattenLayerNode.cpp
+++ b/src/graph/nodes/FlattenLayerNode.cpp
@@ -62,11 +62,6 @@ TensorDescriptor FlattenLayerNode::configure_output(size_t idx) const
     return output_desc;
 }
 
-Status FlattenLayerNode::validate()
-{
-    return Status{};
-}
-
 NodeType FlattenLayerNode::type() const
 {
     return NodeType::FlattenLayer;
diff --git a/src/graph/nodes/FullyConnectedLayer.cpp b/src/graph/nodes/FullyConnectedLayer.cpp
index cbf2b35ddd..d94a7851ff 100644
--- a/src/graph/nodes/FullyConnectedLayer.cpp
+++ b/src/graph/nodes/FullyConnectedLayer.cpp
@@ -38,10 +38,11 @@ FullyConnectedLayerNode::FullyConnectedLayerNode(unsigned int num_outputs)
     _outputs.resize(1, NullTensorID);
 }
 
-TensorShape FullyConnectedLayerNode::compute_weights_shape(TensorShape input_shape, unsigned int num_outputs)
+TensorDescriptor FullyConnectedLayerNode::compute_weights_descriptor(const TensorDescriptor &input_descriptor,
+                                                                     unsigned int            num_outputs)
 {
     unsigned int num_weights    = 1;
-    unsigned int num_dimensions = input_shape.num_dimensions();
+    unsigned int num_dimensions = input_descriptor.shape.num_dimensions();
     // Ignore the batch dimension if there is one:
     if(num_dimensions == 2 || num_dimensions == 4)
     {
@@ -49,20 +50,29 @@ TensorShape FullyConnectedLayerNode::compute_weights_shape(TensorShape input_sha
     }
     for(unsigned int i = 0; i < num_dimensions; i++)
     {
-        num_weights *= input_shape[i];
+        num_weights *= input_descriptor.shape[i];
     }
-    return TensorShape(num_weights, num_outputs);
+
+    TensorDescriptor weights_descriptor = input_descriptor;
+    weights_descriptor.shape            = TensorShape(num_weights, num_outputs);
+
+    return weights_descriptor;
 }
 
-TensorShape FullyConnectedLayerNode::compute_output_shape(TensorShape input_shape, unsigned int num_outputs)
+TensorDescriptor FullyConnectedLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
+                                                                    unsigned int            num_outputs)
 {
     // Note: Only 1D batch space is supported at the moment
-    unsigned int batches = input_shape[1];
-    if(input_shape.num_dimensions() > 2)
+    unsigned int batches = input_descriptor.shape[1];
+    if(input_descriptor.shape.num_dimensions() > 2)
     {
-        batches = input_shape[3];
+        batches = input_descriptor.shape[3];
     }
-    return TensorShape(num_outputs, batches);
+
+    TensorDescriptor output_descriptor = input_descriptor;
+    output_descriptor.shape            = TensorShape(num_outputs, batches);
+
+    return output_descriptor;
 }
 
 bool FullyConnectedLayerNode::forward_descriptors()
@@ -83,15 +93,7 @@ TensorDescriptor FullyConnectedLayerNode::configure_output(size_t idx) const
     const Tensor *src = input(0);
     ARM_COMPUTE_ERROR_ON(src == nullptr);
 
-    TensorDescriptor output_info  = src->desc();
-    TensorShape      output_shape = compute_output_shape(src->desc().shape, _num_outputs);
-    output_info.shape             = output_shape;
-    return output_info;
-}
-
-Status FullyConnectedLayerNode::validate()
-{
-    return Status{};
+    return compute_output_descriptor(src->desc(), _num_outputs);
 }
 
 NodeType FullyConnectedLayerNode::type() const
diff --git a/src/graph/nodes/InputNode.cpp b/src/graph/nodes/InputNode.cpp
index e912633a66..709eaae14c 100644
--- a/src/graph/nodes/InputNode.cpp
+++ b/src/graph/nodes/InputNode.cpp
@@ -31,7 +31,7 @@ namespace arm_compute
 namespace graph
 {
 InputNode::InputNode(TensorDescriptor desc)
-    : _desc(desc)
+    : _desc(std::move(desc))
 {
     _outputs.resize(1, NullTensorID);
 }
@@ -54,11 +54,6 @@ TensorDescriptor InputNode::configure_output(size_t idx) const
     return _desc;
 }
 
-Status InputNode::validate()
-{
-    return Status{};
-}
-
 NodeType InputNode::type() const
 {
     return NodeType::Input;
diff --git a/src/graph/nodes/NormalizationLayerNode.cpp b/src/graph/nodes/NormalizationLayerNode.cpp
index a9f2fbd066..a7b373860e 100644
--- a/src/graph/nodes/NormalizationLayerNode.cpp
+++ b/src/graph/nodes/NormalizationLayerNode.cpp
@@ -66,11 +66,6 @@ TensorDescriptor NormalizationLayerNode::configure_output(size_t idx) const
     return src->desc();
 }
 
-Status NormalizationLayerNode::validate()
-{
-    return Status{};
-}
-
 NodeType NormalizationLayerNode::type() const
 {
     return NodeType::NormalizationLayer;
diff --git a/src/graph/nodes/OutputNode.cpp b/src/graph/nodes/OutputNode.cpp
index 4c63bfa20c..8aa249bc2a 100644
--- a/src/graph/nodes/OutputNode.cpp
+++ b/src/graph/nodes/OutputNode.cpp
@@ -48,11 +48,6 @@ TensorDescriptor OutputNode::configure_output(size_t idx) const
     return TensorDescriptor();
 }
 
-Status OutputNode::validate()
-{
-    return Status{};
-}
-
 NodeType OutputNode::type() const
 {
     return NodeType::Output;
diff --git a/src/graph/nodes/PoolingLayerNode.cpp b/src/graph/nodes/PoolingLayerNode.cpp
index a7b6b3679a..26c145ae31 100644
--- a/src/graph/nodes/PoolingLayerNode.cpp
+++ b/src/graph/nodes/PoolingLayerNode.cpp
@@ -26,6 +26,7 @@
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/INodeVisitor.h"
+#include "arm_compute/graph/Utils.h"
 
 namespace arm_compute
 {
@@ -43,20 +44,24 @@ PoolingLayerInfo PoolingLayerNode::pooling_info() const
     return _info;
 }
 
-TensorShape PoolingLayerNode::compute_output_shape(TensorShape input_shape, PoolingLayerInfo info)
+TensorDescriptor PoolingLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
+                                                             PoolingLayerInfo        info)
 {
-    const int pool_size_x = info.is_global_pooling() ? input_shape.x() : info.pool_size().width;
-    const int pool_size_y = info.is_global_pooling() ? input_shape.y() : info.pool_size().height;
-
     unsigned int pooled_width  = 0;
     unsigned int pooled_height = 0;
-    std::tie(pooled_width, pooled_height) = scaled_dimensions(input_shape.x(), input_shape.y(), pool_size_x, pool_size_y, info.pad_stride_info());
 
-    TensorShape output_shape{ input_shape };
-    output_shape.set(0, pooled_width);
-    output_shape.set(1, pooled_height);
+    const unsigned int input_width  = get_dimension_size(input_descriptor, DataLayoutDimension::WIDTH);
+    const unsigned int input_height = get_dimension_size(input_descriptor, DataLayoutDimension::HEIGHT);
+    const unsigned int pool_size_x  = info.is_global_pooling() ? input_width : info.pool_size().width;
+    const unsigned int pool_size_y  = info.is_global_pooling() ? input_height : info.pool_size().height;
+
+    std::tie(pooled_width, pooled_height) = scaled_dimensions(input_width, input_height, pool_size_x, pool_size_y, info.pad_stride_info());
+
+    TensorDescriptor output_descriptor = input_descriptor;
+    output_descriptor.shape.set(get_dimension_idx(output_descriptor, DataLayoutDimension::WIDTH), pooled_width);
+    output_descriptor.shape.set(get_dimension_idx(output_descriptor, DataLayoutDimension::HEIGHT), pooled_height);
 
-    return output_shape;
+    return output_descriptor;
 }
 
 bool PoolingLayerNode::forward_descriptors()
@@ -79,15 +84,7 @@ TensorDescriptor PoolingLayerNode::configure_output(size_t idx) const
     const Tensor *src = input(0);
     ARM_COMPUTE_ERROR_ON(src == nullptr);
 
-    TensorDescriptor output_info  = src->desc();
-    TensorShape      output_shape = compute_output_shape(src->desc().shape, _info);
-    output_info.shape             = output_shape;
-    return output_info;
-}
-
-Status PoolingLayerNode::validate()
-{
-    return Status{};
+    return compute_output_descriptor(src->desc(), _info);
 }
 
 NodeType PoolingLayerNode::type() const
diff --git a/src/graph/nodes/ReshapeLayer.cpp b/src/graph/nodes/ReshapeLayer.cpp
index 2757f06bd3..58610e9b1c 100644
--- a/src/graph/nodes/ReshapeLayer.cpp
+++ b/src/graph/nodes/ReshapeLayer.cpp
@@ -63,11 +63,6 @@ TensorDescriptor ReshapeLayerNode::configure_output(size_t idx) const
     return output_desc;
 }
 
-Status ReshapeLayerNode::validate()
-{
-    return Status{};
-}
-
 NodeType ReshapeLayerNode::type() const
 {
     return NodeType::ReshapeLayer;
diff --git a/src/graph/nodes/SoftmaxLayerNode.cpp b/src/graph/nodes/SoftmaxLayerNode.cpp
index b6241e6654..57e556160f 100644
--- a/src/graph/nodes/SoftmaxLayerNode.cpp
+++ b/src/graph/nodes/SoftmaxLayerNode.cpp
@@ -69,11 +69,6 @@ TensorDescriptor SoftmaxLayerNode::configure_output(size_t idx) const
     return out_desc;
 }
 
-Status SoftmaxLayerNode::validate()
-{
-    return Status{};
-}
-
 NodeType SoftmaxLayerNode::type() const
 {
     return NodeType::SoftmaxLayer;
diff --git a/src/graph/nodes/SplitLayerNode.cpp b/src/graph/nodes/SplitLayerNode.cpp
index c8fb43c2a1..5d46c9dcc9 100644
--- a/src/graph/nodes/SplitLayerNode.cpp
+++ b/src/graph/nodes/SplitLayerNode.cpp
@@ -48,26 +48,25 @@ unsigned int SplitLayerNode::axis() const
     return _axis;
 }
 
-std::pair<TensorShape, Coordinates> SplitLayerNode::compute_output_shape(TensorShape input_shape, unsigned int num_splits, unsigned int axis, unsigned int idx)
+std::pair<TensorDescriptor, Coordinates> SplitLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
+                                                                                   unsigned int num_splits, unsigned int axis, unsigned int idx)
 {
-    ARM_COMPUTE_ERROR_ON(axis >= input_shape.num_dimensions());
-    ARM_COMPUTE_ERROR_ON_MSG(input_shape[axis] % num_splits, "Split should be exact");
+    const unsigned int split_size = input_descriptor.shape[axis] / num_splits;
 
-    const unsigned int split_size = input_shape[axis] / num_splits;
-
-    TensorShape output_shape = input_shape;
-    output_shape.set(axis, split_size);
+    TensorDescriptor output_descriptor = input_descriptor;
+    output_descriptor.shape.set(axis, split_size);
 
     Coordinates coords;
     coords.set(axis, idx * split_size);
 
-    return std::make_pair(output_shape, coords);
+    return std::make_pair(output_descriptor, coords);
 }
 
 bool SplitLayerNode::forward_descriptors()
 {
     if(input_id(0) != NullTensorID)
     {
+        validate();
         for(unsigned int i = 0; i < _outputs.size(); ++i)
         {
             if(output_id(i) != NullTensorID)
@@ -90,17 +89,19 @@ TensorDescriptor SplitLayerNode::configure_output(size_t idx) const
     const Tensor *src = input(0);
     ARM_COMPUTE_ERROR_ON(src == nullptr);
 
-    TensorShape output_shape;
-
-    TensorDescriptor output_info = src->desc();
-    std::tie(output_shape, std::ignore) = compute_output_shape(src->desc().shape, _num_splits, _axis, idx);
-    output_info.shape = output_shape;
+    TensorDescriptor output_info;
+    std::tie(output_info, std::ignore) = compute_output_descriptor(src->desc(), _num_splits, _axis, idx);
 
     return output_info;
 }
 
-Status SplitLayerNode::validate()
+Status SplitLayerNode::validate() const
 {
+    const Tensor *src = input(0);
+    ARM_COMPUTE_RETURN_ERROR_ON(src == nullptr);
+    ARM_COMPUTE_RETURN_ERROR_ON(_axis >= src->desc().shape.num_dimensions());
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->desc().shape[_axis] % _num_splits, "Split should be exact");
+
     return Status{};
 }
 
diff --git a/src/graph/printers/DotGraphPrinter.cpp b/src/graph/printers/DotGraphPrinter.cpp
index 47b1bb56bf..61cf42356f 100644
--- a/src/graph/printers/DotGraphPrinter.cpp
+++ b/src/graph/printers/DotGraphPrinter.cpp
@@ -164,7 +164,7 @@ void DotGraphPrinter::print_edges(const Graph &g, std::ostream &os)
             os << source_node_id << " -> " << sink_node_id << " ";
             const Tensor *t = e->tensor();
             ARM_COMPUTE_ERROR_ON(t == nullptr);
-            os << R"([label = ")" << t->desc().shape << R"( \n )" << t->desc().data_type << R"("])";
+            os << R"([label = ")" << t->desc().shape << R"( \n )" << t->desc().data_type << R"( \n )" << t->desc().layout << R"("])";
             os << ";\n";
         }
     }
diff --git a/utils/GraphUtils.cpp b/utils/GraphUtils.cpp
index 7912fd6b7d..145e44950b 100644
--- a/utils/GraphUtils.cpp
+++ b/utils/GraphUtils.cpp
@@ -24,6 +24,8 @@
 
 #include "utils/GraphUtils.h"
 
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/SubTensor.h"
 #include "utils/Utils.h"
 
@@ -31,6 +33,27 @@
 
 using namespace arm_compute::graph_utils;
 
+namespace
+{
+std::pair<arm_compute::TensorShape, arm_compute::PermutationVector> compute_permutation_paramaters(const arm_compute::TensorShape &shape,
+                                                                                                   arm_compute::DataLayout data_layout)
+{
+    // Set permutation parameters if needed
+    arm_compute::TensorShape       permuted_shape = shape;
+    arm_compute::PermutationVector perm;
+    // Permute only if num_dimensions greater than 2
+    if(shape.num_dimensions() > 2)
+    {
+        perm = (data_layout == arm_compute::DataLayout::NHWC) ? arm_compute::PermutationVector(2U, 0U, 1U) : arm_compute::PermutationVector(1U, 2U, 0U);
+
+        arm_compute::PermutationVector perm_shape = (data_layout == arm_compute::DataLayout::NCHW) ? arm_compute::PermutationVector(2U, 0U, 1U) : arm_compute::PermutationVector(1U, 2U, 0U);
+        arm_compute::permute(permuted_shape, perm_shape);
+    }
+
+    return std::make_pair(permuted_shape, perm);
+}
+} // namespace
+
 void TFPreproccessor::preprocess(ITensor &tensor)
 {
     Window window;
@@ -118,8 +141,15 @@ bool PPMAccessor::access_tensor(ITensor &tensor)
     // Open PPM file
     ppm.open(_ppm_path);
 
-    ARM_COMPUTE_ERROR_ON_MSG(ppm.width() != tensor.info()->dimension(0) || ppm.height() != tensor.info()->dimension(1),
-                             "Failed to load image file: dimensions [%d,%d] not correct, expected [%d,%d].", ppm.width(), ppm.height(), tensor.info()->dimension(0), tensor.info()->dimension(1));
+    // Get permutated shape and permutation parameters
+    TensorShape                    permuted_shape = tensor.info()->tensor_shape();
+    arm_compute::PermutationVector perm;
+    if(tensor.info()->data_layout() != DataLayout::NCHW)
+    {
+        std::tie(permuted_shape, perm) = compute_permutation_paramaters(tensor.info()->tensor_shape(), tensor.info()->data_layout());
+    }
+    ARM_COMPUTE_ERROR_ON_MSG(ppm.width() != permuted_shape.x() || ppm.height() != permuted_shape.y(),
+                             "Failed to load image file: dimensions [%d,%d] not correct, expected [%d,%d].", ppm.width(), ppm.height(), permuted_shape.x(), permuted_shape.y());
 
     // Fill the tensor with the PPM content (BGR)
     ppm.fill_planar_tensor(tensor, _bgr);
@@ -320,8 +350,8 @@ bool RandomAccessor::access_tensor(ITensor &tensor)
     return true;
 }
 
-NumPyBinLoader::NumPyBinLoader(std::string filename)
-    : _filename(std::move(filename))
+NumPyBinLoader::NumPyBinLoader(std::string filename, DataLayout file_layout)
+    : _filename(std::move(filename)), _file_layout(file_layout)
 {
 }
 
@@ -366,30 +396,57 @@ bool NumPyBinLoader::access_tensor(ITensor &tensor)
         }
     }
 
+    bool are_layouts_different = (_file_layout != tensor.info()->data_layout());
+
     // Validate tensor ranks
     ARM_COMPUTE_ERROR_ON_MSG(shape.size() != tensor_shape.num_dimensions(), "Tensor ranks mismatch");
 
+    // Set permutation parameters if needed
+    TensorShape                    permuted_shape = tensor_shape;
+    arm_compute::PermutationVector perm;
+    if(are_layouts_different)
+    {
+        std::tie(permuted_shape, perm) = compute_permutation_paramaters(tensor_shape, tensor.info()->data_layout());
+    }
+
     // Validate shapes
     for(size_t i = 0; i < shape.size(); ++i)
     {
-        ARM_COMPUTE_ERROR_ON_MSG(tensor_shape[i] != shape[i], "Tensor dimensions mismatch");
+        ARM_COMPUTE_ERROR_ON_MSG(permuted_shape[i] != shape[i], "Tensor dimensions mismatch");
     }
 
-    // Read data
-    if(tensor.info()->padding().empty() && (dynamic_cast<SubTensor *>(&tensor) == nullptr))
+    // Validate shapes and copy tensor
+    if(!are_layouts_different || perm.num_dimensions() <= 2)
     {
-        // If tensor has no padding read directly from stream.
-        stream.read(reinterpret_cast<char *>(tensor.buffer()), tensor.info()->total_size());
+        // Read data
+        if(tensor.info()->padding().empty() && (dynamic_cast<SubTensor *>(&tensor) == nullptr))
+        {
+            // If tensor has no padding read directly from stream.
+            stream.read(reinterpret_cast<char *>(tensor.buffer()), tensor.info()->total_size());
+        }
+        else
+        {
+            // If tensor has padding accessing tensor elements through execution window.
+            Window window;
+            window.use_tensor_dimensions(tensor_shape);
+
+            execute_window_loop(window, [&](const Coordinates & id)
+            {
+                stream.read(reinterpret_cast<char *>(tensor.ptr_to_element(id)), tensor.info()->element_size());
+            });
+        }
     }
     else
     {
         // If tensor has padding accessing tensor elements through execution window.
         Window window;
-        window.use_tensor_dimensions(tensor_shape);
+        window.use_tensor_dimensions(permuted_shape);
 
         execute_window_loop(window, [&](const Coordinates & id)
         {
-            stream.read(reinterpret_cast<char *>(tensor.ptr_to_element(id)), tensor.info()->element_size());
+            Coordinates coords(id);
+            arm_compute::permute(coords, perm);
+            stream.read(reinterpret_cast<char *>(tensor.ptr_to_element(coords)), tensor.info()->element_size());
         });
     }
     return true;
diff --git a/utils/GraphUtils.h b/utils/GraphUtils.h
index 11f1e0590a..a8507b1ac7 100644
--- a/utils/GraphUtils.h
+++ b/utils/GraphUtils.h
@@ -201,9 +201,10 @@ class NumPyBinLoader final : public graph::ITensorAccessor
 public:
     /** Default Constructor
      *
-     * @param filename Binary file name
+     * @param[in] filename    Binary file name
+     * @param[in] file_layout (Optional) Layout of the numpy tensor data. Defaults to NCHW
      */
-    NumPyBinLoader(std::string filename);
+    NumPyBinLoader(std::string filename, DataLayout file_layout = DataLayout::NCHW);
     /** Allows instances to move constructed */
     NumPyBinLoader(NumPyBinLoader &&) = default;
 
@@ -212,6 +213,7 @@ public:
 
 private:
     const std::string _filename;
+    const DataLayout  _file_layout;
 };
 
 /** Generates appropriate random accessor
@@ -231,12 +233,15 @@ inline std::unique_ptr<graph::ITensorAccessor> get_random_accessor(PixelValue lo
  *
  * @note If path is empty will generate a DummyAccessor else will generate a NumPyBinLoader
  *
- * @param[in] path      Path to the data files
- * @param[in] data_file Relative path to the data files from path
+ * @param[in] path        Path to the data files
+ * @param[in] data_file   Relative path to the data files from path
+ * @param[in] file_layout (Optional) Layout of file. Defaults to NCHW
  *
  * @return An appropriate tensor accessor
  */
-inline std::unique_ptr<graph::ITensorAccessor> get_weights_accessor(const std::string &path, const std::string &data_file)
+inline std::unique_ptr<graph::ITensorAccessor> get_weights_accessor(const std::string &path,
+                                                                    const std::string &data_file,
+                                                                    DataLayout         file_layout = DataLayout::NCHW)
 {
     if(path.empty())
     {
@@ -244,7 +249,7 @@ inline std::unique_ptr<graph::ITensorAccessor> get_weights_accessor(const std::s
     }
     else
     {
-        return arm_compute::support::cpp14::make_unique<NumPyBinLoader>(path + data_file);
+        return arm_compute::support::cpp14::make_unique<NumPyBinLoader>(path + data_file, file_layout);
     }
 }
 
diff --git a/utils/Utils.h b/utils/Utils.h
index 6241562a28..cadba3a088 100644
--- a/utils/Utils.h
+++ b/utils/Utils.h
@@ -406,7 +406,14 @@ public:
     {
         ARM_COMPUTE_ERROR_ON(!is_open());
         ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&tensor, 1, DataType::U8, DataType::F32);
-        ARM_COMPUTE_ERROR_ON(tensor.info()->dimension(0) != _width || tensor.info()->dimension(1) != _height || tensor.info()->dimension(2) != 3);
+
+        const DataLayout  data_layout  = tensor.info()->data_layout();
+        const TensorShape tensor_shape = tensor.info()->tensor_shape();
+
+        ARM_COMPUTE_UNUSED(tensor_shape);
+        ARM_COMPUTE_ERROR_ON(tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH)] != _width);
+        ARM_COMPUTE_ERROR_ON(tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT)] != _height);
+        ARM_COMPUTE_ERROR_ON(tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL)] != 3);
 
         try
         {
@@ -423,11 +430,25 @@ public:
                                      "Not enough data in file");
             ARM_COMPUTE_UNUSED(end_position);
 
+            // Stride across channels
+            size_t stride_z = 0;
+
             // Iterate through every pixel of the image
             arm_compute::Window window;
-            window.set(arm_compute::Window::DimX, arm_compute::Window::Dimension(0, _width, 1));
-            window.set(arm_compute::Window::DimY, arm_compute::Window::Dimension(0, _height, 1));
-            window.set(arm_compute::Window::DimZ, arm_compute::Window::Dimension(0, 1, 1));
+            if(data_layout == DataLayout::NCHW)
+            {
+                window.set(arm_compute::Window::DimX, arm_compute::Window::Dimension(0, _width, 1));
+                window.set(arm_compute::Window::DimY, arm_compute::Window::Dimension(0, _height, 1));
+                window.set(arm_compute::Window::DimZ, arm_compute::Window::Dimension(0, 1, 1));
+                stride_z = tensor.info()->strides_in_bytes()[2];
+            }
+            else
+            {
+                window.set(arm_compute::Window::DimX, arm_compute::Window::Dimension(0, 1, 1));
+                window.set(arm_compute::Window::DimY, arm_compute::Window::Dimension(0, _width, 1));
+                window.set(arm_compute::Window::DimZ, arm_compute::Window::Dimension(0, _height, 1));
+                stride_z = tensor.info()->strides_in_bytes()[0];
+            }
 
             arm_compute::Iterator out(&tensor, window);
 
@@ -435,8 +456,6 @@ public:
             unsigned char green = 0;
             unsigned char blue  = 0;
 
-            size_t stride_z = tensor.info()->strides_in_bytes()[2];
-
             arm_compute::execute_window_loop(window, [&](const arm_compute::Coordinates & id)
             {
                 red   = _fs.get();
author	Georgios Pinitas <georgios.pinitas@arm.com>	2018-04-27 19:07:19 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:51:17 +0000
commit	cac13b1cfd593889271f8e2191be2039b8d88f36 (patch)
tree	d1c5196877d7fbd5dcfbb9f9003faf6035f82a33
parent	ad0c7388f6261989a268ffb2d042f2bd80736e3f (diff)
download	ComputeLibrary-cac13b1cfd593889271f8e2191be2039b8d88f36.tar.gz