1 files changed, 472 insertions, 662 deletions
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 48c87cd8ac..f2f60c150e 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,17 +21,52 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TYPES_H
-#define ARM_COMPUTE_TYPES_H
-
+#ifndef ACL_ARM_COMPUTE_CORE_TYPES_H
+#define ACL_ARM_COMPUTE_CORE_TYPES_H
+
+/** The following symbols have been moved to:
+ * half
+ * PermutationVector
+ * Format
+ * DataType
+ * DataLayout
+ * DataLayoutDimension
+ * PadStrideInfo
+ * WeightFormat
+ * Channel
+ * DimensionRoundingType
+ */
+#include "arm_compute/core/CoreTypes.h"
+/** The following symbols have been moved to:
+ * ActivationFunction
+ * ActivationLayerInfo
+ */
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+/** The following symbols have been moved to:
+ * ConvolutionInfo
+ */
+#include "arm_compute/function_info/ConvolutionInfo.h"
+/** The following symbols have been moved to:
+ * FullyConnectedLayerInfo
+ */
+#include "arm_compute/function_info/FullyConnectedLayerInfo.h"
+/** The following symbols have been moved to:
+ * GEMMLowpOutputStageType
+ * GEMMLowpOutputStageInfo
+ * GEMMInfo
+ */
+#include "arm_compute/function_info/GEMMInfo.h"
+/** The following symbols have been moved to:
+ * MatMulInfo
+ */
 #include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/QuantizationInfo.h"
 #include "arm_compute/core/Size2D.h"
-#include "arm_compute/core/Strides.h"
+#include "arm_compute/core/Size3D.h"
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/utils/misc/Macros.h"
+#include "arm_compute/function_info/MatMulInfo.h"
+
 #include "support/Bfloat16.h"
-#include "support/Half.h"
 
 #include <cmath>
 #include <cstddef>
@@ -42,62 +77,9 @@
 
 namespace arm_compute
 {
-/** 16-bit floating point type */
-using half = half_float::half;
-
-/** Permutation vector */
-using PermutationVector = Strides;
 /** Bidirectional strides */
 using BiStrides = Coordinates;
 
-/** Image colour formats */
-enum class Format
-{
-    UNKNOWN,  /**< Unknown image format */
-    U8,       /**< 1 channel, 1 U8 per channel */
-    S16,      /**< 1 channel, 1 S16 per channel */
-    U16,      /**< 1 channel, 1 U16 per channel */
-    S32,      /**< 1 channel, 1 S32 per channel */
-    U32,      /**< 1 channel, 1 U32 per channel */
-    BFLOAT16, /**< 16-bit brain floating-point number */
-    F16,      /**< 1 channel, 1 F16 per channel */
-    F32,      /**< 1 channel, 1 F32 per channel */
-    UV88,     /**< 2 channel, 1 U8 per channel */
-    RGB888,   /**< 3 channels, 1 U8 per channel */
-    RGBA8888, /**< 4 channels, 1 U8 per channel */
-    YUV444,   /**< A 3 plane of 8 bit 4:4:4 sampled Y, U, V planes */
-    YUYV422,  /**< A single plane of 32-bit macro pixel of Y0, U0, Y1, V0 bytes */
-    NV12,     /**< A 2 plane YUV format of Luma (Y) and interleaved UV data at 4:2:0 sampling */
-    NV21,     /**< A 2 plane YUV format of Luma (Y) and interleaved VU data at 4:2:0 sampling */
-    IYUV,     /**< A 3 plane of 8-bit 4:2:0 sampled Y, U, V planes */
-    UYVY422   /**< A single plane of 32-bit macro pixel of U0, Y0, V0, Y1 byte */
-};
-
-/** Available data types */
-enum class DataType
-{
-    UNKNOWN,            /**< Unknown data type */
-    U8,                 /**< unsigned 8-bit number */
-    S8,                 /**< signed 8-bit number */
-    QSYMM8,             /**< quantized, symmetric fixed-point 8-bit number */
-    QASYMM8,            /**< quantized, asymmetric fixed-point 8-bit number unsigned */
-    QASYMM8_SIGNED,     /**< quantized, asymmetric fixed-point 8-bit number signed */
-    QSYMM8_PER_CHANNEL, /**< quantized, symmetric per channel fixed-point 8-bit number */
-    U16,                /**< unsigned 16-bit number */
-    S16,                /**< signed 16-bit number */
-    QSYMM16,            /**< quantized, symmetric fixed-point 16-bit number */
-    QASYMM16,           /**< quantized, asymmetric fixed-point 16-bit number */
-    U32,                /**< unsigned 32-bit number */
-    S32,                /**< signed 32-bit number */
-    U64,                /**< unsigned 64-bit number */
-    S64,                /**< signed 64-bit number */
-    BFLOAT16,           /**< 16-bit brain floating-point number */
-    F16,                /**< 16-bit floating-point number */
-    F32,                /**< 32-bit floating-point number */
-    F64,                /**< 64-bit floating-point number */
-    SIZET               /**< size_t */
-};
-
 /** Available Sampling Policies */
 enum class SamplingPolicy
 {
@@ -105,32 +87,13 @@ enum class SamplingPolicy
     TOP_LEFT /**< Samples are taken at pixel top left corner */
 };
 
-/** [DataLayout enum definition] **/
-
-/** Supported tensor data layouts */
-enum class DataLayout
-{
-    UNKNOWN, /**< Unknown data layout */
-    NCHW,    /**< Num samples, channels, height, width */
-    NHWC     /**< Num samples, height, width, channels */
-};
-/** [DataLayout enum definition] **/
-
-/** Supported tensor data layout dimensions */
-enum class DataLayoutDimension
-{
-    CHANNEL, /**< channel */
-    HEIGHT,  /**< height */
-    WIDTH,   /**< width */
-    BATCHES  /**< batches */
-};
-
 /** Available ConvolutionMethod*/
 enum class ConvolutionMethod
 {
     GEMM,        /**< Convolution using GEMM */
     GEMM_CONV2D, /**< Direct 2D GEMM convolution */
     DIRECT,      /**< Direct convolution */
+    INDIRECT,    /**< Indirect convolution */
     WINOGRAD,    /**< Convolution using Winograd */
     FFT          /**< Convolution using FFT */
 };
@@ -145,8 +108,9 @@ enum class DepthwiseConvolutionFunction
 /** Available DeconvolutionMethod*/
 enum class DeconvolutionMethod
 {
-    GEMM,   /**< Deconvolution using GEMM */
-    DIRECT, /**< Direct deconvolution */
+    GEMM,          /**< Deconvolution using GEMM */
+    DIRECT,        /**< Direct deconvolution */
+    UPSCALE_CONV2D /**< Deconvolution with Upscaling */
 };
 
 /** Available FuseBatchNormalizationType*/
@@ -179,8 +143,7 @@ enum class ComparisonOperation
 struct ValidRegion
 {
     /** Default constructor */
-    ValidRegion()
-        : anchor{}, shape{}
+    ValidRegion() : anchor{}, shape{}
     {
     }
 
@@ -201,8 +164,7 @@ struct ValidRegion
      * @param[in] a_shape   Shape of the valid region.
      *
      */
-    ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape)
-        : anchor{ an_anchor }, shape{ a_shape }
+    ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape) : anchor{an_anchor}, shape{a_shape}
     {
         anchor.set_num_dimensions(std::max(anchor.num_dimensions(), shape.num_dimensions()));
     }
@@ -215,7 +177,7 @@ struct ValidRegion
      *
      */
     ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape, size_t num_dimensions)
-        : anchor{ an_anchor }, shape{ a_shape }
+        : anchor{an_anchor}, shape{a_shape}
     {
         ARM_COMPUTE_ERROR_ON(num_dimensions < std::max(anchor.num_dimensions(), shape.num_dimensions()));
         anchor.set_num_dimensions(num_dimensions);
@@ -248,9 +210,22 @@ struct ValidRegion
         return *this;
     }
 
+    /** Check whether two valid regions are equal.
+     *
+     * @param[in] lhs LHS valid region
+     * @param[in] rhs RHS valid region
+     *
+     * @return True if the valid regions are the same.
+     */
+    inline friend bool operator==(const ValidRegion &lhs, const ValidRegion &rhs);
+
     Coordinates anchor; /**< Anchor for the start of the valid region. */
     TensorShape shape;  /**< Shape of the valid region. */
 };
+inline bool operator==(const ValidRegion &lhs, const ValidRegion &rhs)
+{
+    return (lhs.anchor == rhs.anchor) && (lhs.shape == rhs.shape);
+}
 
 /** Methods available to handle borders */
 enum class BorderMode
@@ -264,32 +239,24 @@ enum class BorderMode
 struct BorderSize
 {
     /** Empty border, i.e. no border */
-    constexpr BorderSize() noexcept
-        : top{ 0 },
-    right{ 0 },
-    bottom{ 0 },
-    left{ 0 }
+    constexpr BorderSize() noexcept : top{0}, right{0}, bottom{0}, left{0}
     {
     }
 
     /** Border with equal size around the 2D plane */
-    explicit constexpr BorderSize(unsigned int size) noexcept
-        : top{ size },
-    right{ size },
-    bottom{ size },
-    left{ size }
+    explicit constexpr BorderSize(unsigned int size) noexcept : top{size}, right{size}, bottom{size}, left{size}
     {
     }
 
     /** Border with same size for top/bottom and left/right */
     constexpr BorderSize(unsigned int top_bottom, unsigned int left_right)
-        : top{ top_bottom }, right{ left_right }, bottom{ top_bottom }, left{ left_right }
+        : top{top_bottom}, right{left_right}, bottom{top_bottom}, left{left_right}
     {
     }
 
     /** Border with different sizes */
     constexpr BorderSize(unsigned int top, unsigned int right, unsigned int bottom, unsigned int left)
-        : top{ top }, right{ right }, bottom{ bottom }, left{ left }
+        : top{top}, right{right}, bottom{bottom}, left{left}
     {
     }
 
@@ -341,7 +308,7 @@ struct BorderSize
      *
      * @return true if they are equal
      */
-    bool operator==(const BorderSize &rhs)
+    bool operator==(const BorderSize &rhs) const
     {
         return (top == rhs.top) && (right == rhs.right) && (bottom == rhs.bottom) && (left == rhs.left);
     }
@@ -352,7 +319,7 @@ struct BorderSize
      *
      * @return true if they are different
      */
-    bool operator!=(const BorderSize &rhs)
+    bool operator!=(const BorderSize &rhs) const
     {
         return !(*this == rhs);
     }
@@ -378,7 +345,11 @@ struct BorderSize
 /** Container for 2D padding size */
 using PaddingSize = BorderSize;
 
-/** Policy to handle overflow */
+/** Policy to handle integer overflow
+ *  @note: This is ignored by floating point operations where the overflow behavior adheres to the IEEE-754 standard
+ *         which states that in case of overflow ±infinity is returned for the round-to-nearest modes (and follows the
+ *         rounding rules for the directed rounding modes) by default.
+ */
 enum class ConvertPolicy
 {
     WRAP,    /**< Wrap around */
@@ -390,7 +361,7 @@ enum class InterpolationPolicy
 {
     NEAREST_NEIGHBOR, /**< Output values are defined to match the source pixel whose center is nearest to the sample position */
     BILINEAR,         /**< Output values are defined by bilinear interpolation between the pixels */
-    AREA,             /**< Output values are determined by averaging the source pixels whose areas fall under the area of the destination pixel, projected onto the source image */
+    AREA, /**< Output values are determined by averaging the source pixels whose areas fall under the area of the destination pixel, projected onto the source image */
 };
 
 /** Bilinear Interpolation method used by LKTracker */
@@ -433,23 +404,6 @@ using PaddingList = std::vector<PaddingInfo>;
 /** Information to produce a tiled version of a Tensor */
 using Multiples = std::vector<uint32_t>;
 
-/** Available channels */
-enum class Channel
-{
-    UNKNOWN, /** Unknown channel format */
-    C0,      /**< First channel (used by formats with unknown channel types). */
-    C1,      /**< Second channel (used by formats with unknown channel types). */
-    C2,      /**< Third channel (used by formats with unknown channel types). */
-    C3,      /**< Fourth channel (used by formats with unknown channel types). */
-    R,       /**< Red channel. */
-    G,       /**< Green channel. */
-    B,       /**< Blue channel. */
-    A,       /**< Alpha channel. */
-    Y,       /**< Luma channel. */
-    U,       /**< Cb/U channel. */
-    V        /**< Cr/V/Value channel. */
-};
-
 /** Available reduction operations */
 enum class ReductionOperation
 {
@@ -514,21 +468,12 @@ enum class NormType
  */
 struct DetectionWindow
 {
-    uint16_t x{ 0 };         /**< Top-left x coordinate */
-    uint16_t y{ 0 };         /**< Top-left y coordinate */
-    uint16_t width{ 0 };     /**< Width of the detection window */
-    uint16_t height{ 0 };    /**< Height of the detection window */
-    uint16_t idx_class{ 0 }; /**< Index of the class */
-    float    score{ 0.f };   /**< Confidence value for the detection window */
-};
-
-/** Dimension rounding type when down-scaling on CNNs
- * @note Used in pooling and convolution layer
- */
-enum class DimensionRoundingType
-{
-    FLOOR, /**< Floor rounding */
-    CEIL   /**< Ceil rounding */
+    uint16_t x{0};         /**< Top-left x coordinate */
+    uint16_t y{0};         /**< Top-left y coordinate */
+    uint16_t width{0};     /**< Width of the detection window */
+    uint16_t height{0};    /**< Height of the detection window */
+    uint16_t idx_class{0}; /**< Index of the class */
+    float    score{0.f};   /**< Confidence value for the detection window */
 };
 
 /** Available pooling types */
@@ -565,12 +510,28 @@ public:
      * @param[in] im_width                 (Optional) Boxes whose centers (on the x axis) is beyond im_width will be filtered. Defaults to 1
      * @param[in] im_height                (Optional) Boxes whose centers (on the y axis) is beyond im_height will be filtered. Defaults to 1
      */
-    BoxNMSLimitInfo(float score_thresh = 0.05f, float nms = 0.3f,
-                    int detections = 100, bool soft_nms_enabled = false,
-                    NMSType soft_nms_method = NMSType::LINEAR,
-                    float soft_nms_sigma = 0.5f, float soft_nms_min_score_thres = 0.001f, bool suppress_size = false, float min_size = 1.0f, float im_width = 1.0f, float im_height = 1.0f)
-        : _score_thresh(score_thresh), _nms(nms), _detections_per_im(detections), _soft_nms_enabled(soft_nms_enabled), _soft_nms_method(soft_nms_method), _soft_nms_sigma(soft_nms_sigma),
-          _soft_nms_min_score_thres(soft_nms_min_score_thres), _suppress_size(suppress_size), _min_size(min_size), _im_width(im_width), _im_height(im_height)
+    BoxNMSLimitInfo(float   score_thresh             = 0.05f,
+                    float   nms                      = 0.3f,
+                    int     detections               = 100,
+                    bool    soft_nms_enabled         = false,
+                    NMSType soft_nms_method          = NMSType::LINEAR,
+                    float   soft_nms_sigma           = 0.5f,
+                    float   soft_nms_min_score_thres = 0.001f,
+                    bool    suppress_size            = false,
+                    float   min_size                 = 1.0f,
+                    float   im_width                 = 1.0f,
+                    float   im_height                = 1.0f)
+        : _score_thresh(score_thresh),
+          _nms(nms),
+          _detections_per_im(detections),
+          _soft_nms_enabled(soft_nms_enabled),
+          _soft_nms_method(soft_nms_method),
+          _soft_nms_sigma(soft_nms_sigma),
+          _soft_nms_min_score_thres(soft_nms_min_score_thres),
+          _suppress_size(suppress_size),
+          _min_size(min_size),
+          _im_width(im_width),
+          _im_height(im_height)
     {
     }
     /** Get the score threshold */
@@ -644,120 +605,42 @@ private:
 };
 
 /** Padding and stride information class */
-class PadStrideInfo
+/** Padding information for 2D operations like Conv2d */
+struct Padding2D
 {
-public:
-    /** Constructor
-     *
-     * @param[in] stride_x (Optional) Stride, in elements, across x. Defaults to 1.
-     * @param[in] stride_y (Optional) Stride, in elements, across y. Defaults to 1.
-     * @param[in] pad_x    (Optional) Padding, in elements, across x. Defaults to 0.
-     * @param[in] pad_y    (Optional) Padding, in elements, across y. Defaults to 0.
-     * @param[in] round    (Optional) Dimensions rounding. Defaults to @ref FLOOR.
-     */
-    PadStrideInfo(unsigned int stride_x = 1, unsigned int stride_y = 1,
-                  unsigned int pad_x = 0, unsigned int pad_y = 0,
-                  DimensionRoundingType round = DimensionRoundingType::FLOOR)
-        : _stride(std::make_pair(stride_x, stride_y)),
-          _pad_left(pad_x),
-          _pad_top(pad_y),
-          _pad_right(pad_x),
-          _pad_bottom(pad_y),
-          _round_type(round)
-    {
-    }
-    /** Constructor
-     *
-     * @param[in] stride_x   Stride, in elements, across x.
-     * @param[in] stride_y   Stride, in elements, across y.
-     * @param[in] pad_left   Padding across x on the left, in elements.
-     * @param[in] pad_top    Padding across y on the top, in elements.
-     * @param[in] pad_right  Padding across x on the right, in elements.
-     * @param[in] pad_bottom Padding across y on the bottom, in elements.
-     * @param[in] round      Dimensions rounding.
-     */
-    PadStrideInfo(unsigned int stride_x, unsigned int stride_y,
-                  unsigned int pad_left, unsigned int pad_right,
-                  unsigned int pad_top, unsigned int pad_bottom,
-                  DimensionRoundingType round)
-        : _stride(std::make_pair(stride_x, stride_y)),
-          _pad_left(pad_left),
-          _pad_top(pad_top),
-          _pad_right(pad_right),
-          _pad_bottom(pad_bottom),
-          _round_type(round)
-    {
-    }
-    /** Get the stride.
-     *
-     * @return a pair: stride x, stride y.
-     */
-    std::pair<unsigned int, unsigned int> stride() const
-    {
-        return _stride;
-    }
-    /** Check whether the padding is symmetric.
-     *
-     * @return True if the padding is symmetric.
-     */
-    bool padding_is_symmetric() const
-    {
-        return (_pad_left == _pad_right) && (_pad_top == _pad_bottom);
-    }
-    /** Get the padding.
-     *
-     * @note This should only be used when the padding is symmetric.
-     *
-     * @return a pair: padding left/right, padding top/bottom
-     */
-    std::pair<unsigned int, unsigned int> pad() const
+    Padding2D() = default;
+    Padding2D(size_t left, size_t right, size_t top, size_t bottom) : left(left), right(right), top(top), bottom(bottom)
     {
-        //this accessor should be used only when padding is symmetric
-        ARM_COMPUTE_ERROR_ON(!padding_is_symmetric());
-        return std::make_pair(_pad_left, _pad_top);
     }
+    size_t left   = {0}; /**<  Padding across the width dimension on the left, in elements. */
+    size_t right  = {0}; /**<  Padding across the width dimension on the right, in elements. */
+    size_t top    = {0}; /**<  Padding across the height dimension on the top, in elements. */
+    size_t bottom = {0}; /**<  Padding across the height dimension on the bottom, in elements. */
+};
 
-    /** Get the left padding */
-    unsigned int pad_left() const
-    {
-        return _pad_left;
-    }
-    /** Get the right padding */
-    unsigned int pad_right() const
-    {
-        return _pad_right;
-    }
-    /** Get the top padding */
-    unsigned int pad_top() const
-    {
-        return _pad_top;
-    }
-    /** Get the bottom padding */
-    unsigned int pad_bottom() const
+/** Padding information for 3D operations like Conv3d */
+struct Padding3D
+{
+    Padding3D() noexcept
     {
-        return _pad_bottom;
     }
 
-    /** Get the rounding type */
-    DimensionRoundingType round() const
+    Padding3D(size_t pad_x, size_t pad_y, size_t pad_z)
+        : left(pad_x), right(pad_x), top(pad_y), bottom(pad_y), front(pad_z), back(pad_z)
     {
-        return _round_type;
     }
 
-    /** Check whether this has any padding */
-    bool has_padding() const
+    Padding3D(size_t left, size_t right, size_t top, size_t bottom, size_t front, size_t back)
+        : left(left), right(right), top(top), bottom(bottom), front(front), back(back)
     {
-        return (_pad_left != 0 || _pad_top != 0 || _pad_right != 0 || _pad_bottom != 0);
     }
 
-private:
-    std::pair<unsigned int, unsigned int> _stride;
-    unsigned int _pad_left;
-    unsigned int _pad_top;
-    unsigned int _pad_right;
-    unsigned int _pad_bottom;
-
-    DimensionRoundingType _round_type;
+    size_t left   = {0}; /**<  Padding across the width dimenstion on the left, in elements. */
+    size_t right  = {0}; /**<  Padding across the width dimenstion on the right, in elements. */
+    size_t top    = {0}; /**<  Padding across the height dimenstion  on the top, in elements. */
+    size_t bottom = {0}; /**<  Padding across the height dimenstion on the bottom, in elements. */
+    size_t front  = {0}; /**<  Padding across the depth dimenstion on the front, in elements. */
+    size_t back   = {0}; /**<  Padding across the depth dimenstion on the back, in elements. */
 };
 
 /** PriorBox layer info */
@@ -789,9 +672,15 @@ public:
      * @param[in] img_size      (Optional) Image size.
      * @param[in] steps         (Optional) Step values.
      */
-    PriorBoxLayerInfo(const std::vector<float> &min_sizes, const std::vector<float> &variances, float offset, bool flip = true, bool clip = false,
-                      const std::vector<float> &max_sizes = {}, const std::vector<float> &aspect_ratios = {},
-    const Coordinates2D &img_size = Coordinates2D{ 0, 0 }, const std::array<float, 2> &steps = { { 0.f, 0.f } })
+    PriorBoxLayerInfo(const std::vector<float>   &min_sizes,
+                      const std::vector<float>   &variances,
+                      float                       offset,
+                      bool                        flip          = true,
+                      bool                        clip          = false,
+                      const std::vector<float>   &max_sizes     = {},
+                      const std::vector<float>   &aspect_ratios = {},
+                      const Coordinates2D        &img_size      = Coordinates2D{0, 0},
+                      const std::array<float, 2> &steps         = {{0.f, 0.f}})
         : _min_sizes(min_sizes),
           _variances(variances),
           _offset(offset),
@@ -803,22 +692,22 @@ public:
           _steps(steps)
     {
         _aspect_ratios.push_back(1.);
-        for(unsigned int i = 0; i < aspect_ratios.size(); ++i)
+        for (unsigned int i = 0; i < aspect_ratios.size(); ++i)
         {
             float ar            = aspect_ratios[i];
             bool  already_exist = false;
-            for(auto ar_new : _aspect_ratios)
+            for (auto ar_new : _aspect_ratios)
             {
-                if(fabs(ar - ar_new) < 1e-6)
+                if (fabs(ar - ar_new) < 1e-6)
                 {
                     already_exist = true;
                     break;
                 }
             }
-            if(!already_exist)
+            if (!already_exist)
             {
                 _aspect_ratios.push_back(ar);
-                if(flip)
+                if (flip)
                 {
                     _aspect_ratios.push_back(1.f / ar);
                 }
@@ -872,14 +761,14 @@ public:
     }
 
 private:
-    std::vector<float> _min_sizes;
-    std::vector<float> _variances;
-    float              _offset;
-    bool               _flip;
-    bool               _clip;
-    std::vector<float> _max_sizes;
-    std::vector<float> _aspect_ratios;
-    Coordinates2D      _img_size;
+    std::vector<float>   _min_sizes;
+    std::vector<float>   _variances;
+    float                _offset;
+    bool                 _flip;
+    bool                 _clip;
+    std::vector<float>   _max_sizes;
+    std::vector<float>   _aspect_ratios;
+    Coordinates2D        _img_size;
     std::array<float, 2> _steps;
 };
 
@@ -930,8 +819,16 @@ public:
      * @param[in] variance_encoded_in_target (Optional) If true, variance is encoded in target. Otherwise we need to adjust the predicted offset accordingly.Default set to false.
      * @param[in] eta                        (Optional) Eta.
      */
-    DetectionOutputLayerInfo(int num_classes, bool share_location, DetectionOutputLayerCodeType code_type, int keep_top_k, float nms_threshold, int top_k = -1, int background_label_id = -1,
-                             float confidence_threshold = std::numeric_limits<float>::lowest(), bool variance_encoded_in_target = false, float eta = 1)
+    DetectionOutputLayerInfo(int                          num_classes,
+                             bool                         share_location,
+                             DetectionOutputLayerCodeType code_type,
+                             int                          keep_top_k,
+                             float                        nms_threshold,
+                             int                          top_k                = -1,
+                             int                          background_label_id  = -1,
+                             float                        confidence_threshold = std::numeric_limits<float>::lowest(),
+                             bool                         variance_encoded_in_target = false,
+                             float                        eta                        = 1)
         : _num_classes(num_classes),
           _share_location(share_location),
           _code_type(code_type),
@@ -1045,8 +942,15 @@ public:
      * @param[in] detection_per_class       (Optional) Number of detection per class. Used in the Regular Non-Max-Suppression. Defaults to 100.
      * @param[in] dequantize_scores         (Optional) If the scores need to be dequantized. Defaults to true.
      */
-    DetectionPostProcessLayerInfo(unsigned int max_detections, unsigned int max_classes_per_detection, float nms_score_threshold, float iou_threshold, unsigned int num_classes,
-                                  std::array<float, 4> scales_values, bool use_regular_nms = false, unsigned int detection_per_class = 100, bool dequantize_scores = true)
+    DetectionPostProcessLayerInfo(unsigned int         max_detections,
+                                  unsigned int         max_classes_per_detection,
+                                  float                nms_score_threshold,
+                                  float                iou_threshold,
+                                  unsigned int         num_classes,
+                                  std::array<float, 4> scales_values,
+                                  bool                 use_regular_nms     = false,
+                                  unsigned int         detection_per_class = 100,
+                                  bool                 dequantize_scores   = true)
         : _max_detections(max_detections),
           _max_classes_per_detection(max_classes_per_detection),
           _nms_score_threshold(nms_score_threshold),
@@ -1124,15 +1028,15 @@ public:
     }
 
 private:
-    unsigned int _max_detections;
-    unsigned int _max_classes_per_detection;
-    float        _nms_score_threshold;
-    float        _iou_threshold;
-    unsigned int _num_classes;
+    unsigned int         _max_detections;
+    unsigned int         _max_classes_per_detection;
+    float                _nms_score_threshold;
+    float                _iou_threshold;
+    unsigned int         _num_classes;
     std::array<float, 4> _scales_values;
-    bool         _use_regular_nms;
-    unsigned int _detection_per_class;
-    bool         _dequantize_scores;
+    bool                 _use_regular_nms;
+    unsigned int         _detection_per_class;
+    bool                 _dequantize_scores;
 };
 
 /** Pooling Layer Information struct*/
@@ -1146,7 +1050,9 @@ struct PoolingLayerInfo
           pad_stride_info(PadStrideInfo()),
           exclude_padding(false),
           is_global_pooling(false),
-          fp_mixed_precision(false)
+          fp_mixed_precision(false),
+          use_inf_as_limit(true),
+          use_kernel_indices(false)
     {
     }
     /** Constructor
@@ -1159,20 +1065,26 @@ struct PoolingLayerInfo
      *                               True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area).
      *                               Defaults to false;
      * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
+     * @param[in] use_inf_as_limit   (Optional) Use inf to represent the limits of datatypes range, instead of  using "lowest" property of the data type.
+     * @param[in] use_kernel_indices (Optional) Use kernel indices instead of using source indices while computing indices tensor.
      */
     explicit PoolingLayerInfo(PoolingType   pool_type,
                               unsigned int  pool_size,
                               DataLayout    data_layout,
                               PadStrideInfo pad_stride_info    = PadStrideInfo(),
                               bool          exclude_padding    = false,
-                              bool          fp_mixed_precision = false)
+                              bool          fp_mixed_precision = false,
+                              bool          use_inf_as_limit   = true,
+                              bool          use_kernel_indices = false)
         : pool_type(pool_type),
           pool_size(Size2D(pool_size, pool_size)),
           data_layout(data_layout),
           pad_stride_info(pad_stride_info),
           exclude_padding(exclude_padding),
           is_global_pooling(false),
-          fp_mixed_precision(fp_mixed_precision)
+          fp_mixed_precision(fp_mixed_precision),
+          use_inf_as_limit(use_inf_as_limit),
+          use_kernel_indices(use_kernel_indices)
     {
     }
 
@@ -1186,20 +1098,26 @@ struct PoolingLayerInfo
      *                               True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area).
      *                               Defaults to false;
      * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
+     * @param[in] use_inf_as_limit   (Optional) Use inf to represent the limits of datatypes range, instead of  using "lowest" property of the data type.
+     * @param[in] use_kernel_indices (Optional) Use kernel indices instead of using source indices while computing indices tensor.
      */
     explicit PoolingLayerInfo(PoolingType   pool_type,
                               Size2D        pool_size,
                               DataLayout    data_layout,
                               PadStrideInfo pad_stride_info    = PadStrideInfo(),
                               bool          exclude_padding    = false,
-                              bool          fp_mixed_precision = false)
+                              bool          fp_mixed_precision = false,
+                              bool          use_inf_as_limit   = true,
+                              bool          use_kernel_indices = false)
         : pool_type(pool_type),
           pool_size(pool_size),
           data_layout(data_layout),
           pad_stride_info(pad_stride_info),
           exclude_padding(exclude_padding),
           is_global_pooling(false),
-          fp_mixed_precision(fp_mixed_precision)
+          fp_mixed_precision(fp_mixed_precision),
+          use_inf_as_limit(use_inf_as_limit),
+          use_kernel_indices(use_kernel_indices)
     {
     }
 
@@ -1217,7 +1135,9 @@ struct PoolingLayerInfo
           pad_stride_info(PadStrideInfo(1, 1, 0, 0)),
           exclude_padding(false),
           is_global_pooling(true),
-          fp_mixed_precision(false)
+          fp_mixed_precision(false),
+          use_inf_as_limit(true),
+          use_kernel_indices(false)
     {
     }
 
@@ -1228,6 +1148,111 @@ struct PoolingLayerInfo
     bool          exclude_padding;
     bool          is_global_pooling;
     bool          fp_mixed_precision;
+    bool          use_inf_as_limit;
+    bool          use_kernel_indices;
+};
+
+/** Pooling Layer Information struct*/
+struct Pooling3dLayerInfo
+{
+    /** Default Constructor */
+    Pooling3dLayerInfo() noexcept
+        : pool_type(PoolingType::MAX),
+          pool_size(Size3D()),
+          stride(Size3D()),
+          padding(Padding3D()),
+          exclude_padding(false),
+          is_global_pooling(false),
+          fp_mixed_precision(false),
+          round_type(DimensionRoundingType::FLOOR)
+    {
+    }
+    /** Constructor
+     *
+     * @param[in] pool_type          Pooling type @ref PoolingType.
+     * @param[in] pool_size          Pooling size, in elements, across x, y and z.
+     * @param[in] stride             (Optional) stride information @ref Size3D
+     * @param[in] padding            (Optional) padding information @ref Padding3D
+     * @param[in] exclude_padding    (Optional) Strategy when accounting padding in calculations.
+     *                               True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area).
+     *                               Defaults to false;
+     * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
+     * @param[in] round_type         (Optional) Dimensions rounding. Defaults to @ref DimensionRoundingType::FLOOR
+     */
+    explicit Pooling3dLayerInfo(PoolingType           pool_type,
+                                unsigned int          pool_size,
+                                Size3D                stride             = Size3D(1U, 1U, 1U),
+                                Padding3D             padding            = Padding3D(),
+                                bool                  exclude_padding    = false,
+                                bool                  fp_mixed_precision = false,
+                                DimensionRoundingType round_type         = DimensionRoundingType::FLOOR)
+        : pool_type(pool_type),
+          pool_size(Size3D(pool_size, pool_size, pool_size)),
+          stride(stride),
+          padding(padding),
+          exclude_padding(exclude_padding),
+          is_global_pooling(false),
+          fp_mixed_precision(fp_mixed_precision),
+          round_type(round_type)
+    {
+    }
+
+    /** Constructor
+     *
+     * @param[in] pool_type          Pooling type @ref PoolingType.
+     * @param[in] pool_size          Pooling size, in elements, across  x, y and z.
+     * @param[in] stride             (Optional) stride information @ref Size3D
+     * @param[in] padding            (Optional) padding information @ref Padding3D
+     * @param[in] exclude_padding    (Optional) Strategy when accounting padding in calculations.
+     *                               True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area).
+     *                               Defaults to false;
+     * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
+     * @param[in] round_type         (Optional) Dimensions rounding. Defaults to @ref DimensionRoundingType::FLOOR
+     */
+    explicit Pooling3dLayerInfo(PoolingType           pool_type,
+                                Size3D                pool_size,
+                                Size3D                stride             = Size3D(1U, 1U, 1U),
+                                Padding3D             padding            = Padding3D(),
+                                bool                  exclude_padding    = false,
+                                bool                  fp_mixed_precision = false,
+                                DimensionRoundingType round_type         = DimensionRoundingType::FLOOR)
+        : pool_type(pool_type),
+          pool_size(pool_size),
+          stride(stride),
+          padding(padding),
+          exclude_padding(exclude_padding),
+          is_global_pooling(false),
+          fp_mixed_precision(fp_mixed_precision),
+          round_type(round_type)
+    {
+    }
+
+    /** Constructor
+     *
+     * @note This constructor is used for global pooling
+     *
+     * @param[in] pool_type Pooling type @ref PoolingType.
+     */
+    explicit Pooling3dLayerInfo(PoolingType pool_type)
+        : pool_type(pool_type),
+          pool_size(Size3D()),
+          stride(Size3D(1U, 1U, 1U)),
+          padding(Padding3D(0, 0, 0)),
+          exclude_padding(false),
+          is_global_pooling(true),
+          fp_mixed_precision(false),
+          round_type(DimensionRoundingType::FLOOR)
+    {
+    }
+
+    PoolingType           pool_type;
+    Size3D                pool_size;
+    Size3D                stride;
+    Padding3D             padding;
+    bool                  exclude_padding;
+    bool                  is_global_pooling;
+    bool                  fp_mixed_precision;
+    DimensionRoundingType round_type;
 };
 
 /** ROI Pooling Layer Information class */
@@ -1241,8 +1266,14 @@ public:
      * @param[in] spatial_scale  Spatial scale to be applied to the ROI coordinates and dimensions.
      * @param[in] sampling_ratio Number of samples to include in each pooling region (if set to zero, a ceil(roi_dims/pooling_dims))
      */
-    ROIPoolingLayerInfo(unsigned int pooled_width, unsigned int pooled_height, float spatial_scale, unsigned int sampling_ratio = 0)
-        : _pooled_width(pooled_width), _pooled_height(pooled_height), _spatial_scale(spatial_scale), _sampling_ratio(sampling_ratio)
+    ROIPoolingLayerInfo(unsigned int pooled_width,
+                        unsigned int pooled_height,
+                        float        spatial_scale,
+                        unsigned int sampling_ratio = 0)
+        : _pooled_width(pooled_width),
+          _pooled_height(pooled_height),
+          _spatial_scale(spatial_scale),
+          _sampling_ratio(sampling_ratio)
     {
     }
     /** Get the pooled width of the layer */
@@ -1289,10 +1320,24 @@ public:
      * @param[in] min_size       (Optional)Size used to validate the anchors produced. Defaults to 16.
      * @param[in] values_per_roi (Optional)Values used to represent a ROI(Region of interest). Defaults to 4.
      */
-    GenerateProposalsInfo(float im_width, float im_height, float im_scale, float spatial_scale = 1.0, int pre_nms_topN = 6000, int post_nms_topN = 300, float nms_thres = 0.7, float min_size = 16.0,
+    GenerateProposalsInfo(float  im_width,
+                          float  im_height,
+                          float  im_scale,
+                          float  spatial_scale  = 1.0,
+                          int    pre_nms_topN   = 6000,
+                          int    post_nms_topN  = 300,
+                          float  nms_thres      = 0.7,
+                          float  min_size       = 16.0,
                           size_t values_per_roi = 4)
-        : _im_height(im_height), _im_width(im_width), _im_scale(im_scale), _spatial_scale(spatial_scale), _pre_nms_topN(pre_nms_topN), _post_nms_topN(post_nms_topN), _nms_thres(nms_thres),
-          _min_size(min_size), _values_per_roi(values_per_roi)
+        : _im_height(im_height),
+          _im_width(im_width),
+          _im_scale(im_scale),
+          _spatial_scale(spatial_scale),
+          _pre_nms_topN(pre_nms_topN),
+          _post_nms_topN(post_nms_topN),
+          _nms_thres(nms_thres),
+          _min_size(min_size),
+          _values_per_roi(values_per_roi)
     {
     }
 
@@ -1418,11 +1463,20 @@ public:
      * @param[in] correct_transform_coords (Optional)Correct bounding box transform coordinates. Defaults to false
      * @param[in] bbox_xform_clip          (Optional)Minimum bounding box width and height after bounding box transformation in log-space. Defaults to log(1000/16)
      */
-    BoundingBoxTransformInfo(float img_width, float img_height, float scale, bool apply_scale = false, const std::array<float, 4> weights = { { 1.f, 1.f, 1.f, 1.f } }, bool correct_transform_coords =
-    false,
-    float bbox_xform_clip =
-        4.135166556742356f)
-        : _img_width(img_width), _img_height(img_height), _scale(scale), _apply_scale(apply_scale), _correct_transform_coords(correct_transform_coords), _weights(weights), _bbox_xform_clip(bbox_xform_clip)
+    BoundingBoxTransformInfo(float                      img_width,
+                             float                      img_height,
+                             float                      scale,
+                             bool                       apply_scale              = false,
+                             const std::array<float, 4> weights                  = {{1.f, 1.f, 1.f, 1.f}},
+                             bool                       correct_transform_coords = false,
+                             float                      bbox_xform_clip          = 4.135166556742356f)
+        : _img_width(img_width),
+          _img_height(img_height),
+          _scale(scale),
+          _apply_scale(apply_scale),
+          _correct_transform_coords(correct_transform_coords),
+          _weights(weights),
+          _bbox_xform_clip(bbox_xform_clip)
     {
     }
 
@@ -1462,114 +1516,13 @@ public:
     }
 
 private:
-    float _img_width;
-    float _img_height;
-    float _scale;
-    bool  _apply_scale;
-    bool  _correct_transform_coords;
+    float                _img_width;
+    float                _img_height;
+    float                _scale;
+    bool                 _apply_scale;
+    bool                 _correct_transform_coords;
     std::array<float, 4> _weights;
-    float _bbox_xform_clip;
-};
-
-/** Activation Layer Information class */
-class ActivationLayerInfo
-{
-public:
-    /** Available activation functions */
-    enum class ActivationFunction
-    {
-        LOGISTIC,        /**< Logistic ( \f$ f(x) = \frac{1}{1 + e^{-x}} \f$ ) */
-        TANH,            /**< Hyperbolic tangent ( \f$ f(x) = a \cdot tanh(b \cdot x) \f$ ) */
-        RELU,            /**< Rectifier ( \f$ f(x) = max(0,x) \f$ ) */
-        BOUNDED_RELU,    /**< Upper Bounded Rectifier ( \f$ f(x) = min(a, max(0,x)) \f$ ) */
-        LU_BOUNDED_RELU, /**< Lower and Upper Bounded Rectifier ( \f$ f(x) = min(a, max(b,x)) \f$ ) */
-        LEAKY_RELU,      /**< Leaky Rectifier ( \f$ f(x) = \begin{cases}  \alpha x & \quad \text{if } x \text{ < 0}\\  x & \quad \text{if } x \geq \text{ 0 } \end{cases} \f$ ) */
-        SOFT_RELU,       /**< Soft Rectifier ( \f$ f(x)= log(1+e^x) \f$ ) */
-        ELU,             /**< Exponential Linear Unit ( \f$ f(x) = \begin{cases}  \alpha (exp(x) - 1) & \quad \text{if } x \text{ < 0}\\  x & \quad \text{if } x \geq \text{ 0 } \end{cases} \f$ ) */
-        ABS,             /**< Absolute ( \f$ f(x)= |x| \f$ ) */
-        SQUARE,          /**< Square ( \f$ f(x)= x^2 \f$ )*/
-        SQRT,            /**< Square root ( \f$ f(x) = \sqrt{x} \f$ )*/
-        LINEAR,          /**< Linear ( \f$ f(x)= ax + b \f$ ) */
-        IDENTITY,        /**< Identity ( \f$ f(x)= x \f$ ) */
-        HARD_SWISH       /**< Hard-swish ( \f$ f(x) = (x * relu6(x+3))/6 \f$ ) */
-    };
-
-    ActivationLayerInfo() = default;
-    /** Default Constructor
-     *
-     * @param[in] f The activation function to use.
-     * @param[in] a (Optional) The alpha parameter used by some activation functions
-     *              (@ref ActivationFunction::BOUNDED_RELU, @ref ActivationFunction::LU_BOUNDED_RELU, @ref ActivationFunction::LINEAR, @ref ActivationFunction::TANH).
-     * @param[in] b (Optional) The beta parameter used by some activation functions (@ref ActivationFunction::LINEAR, @ref ActivationFunction::LU_BOUNDED_RELU, @ref ActivationFunction::TANH).
-     */
-    ActivationLayerInfo(ActivationFunction f, float a = 0.0f, float b = 0.0f)
-        : _act(f), _a(a), _b(b), _enabled(true)
-    {
-    }
-    /** Get the type of activation function */
-    ActivationFunction activation() const
-    {
-        return _act;
-    }
-    /** Get the alpha value */
-    float a() const
-    {
-        return _a;
-    }
-    /** Get the beta value */
-    float b() const
-    {
-        return _b;
-    }
-    /** Check if initialised */
-    bool enabled() const
-    {
-        return _enabled;
-    }
-
-private:
-    ActivationFunction _act     = { ActivationLayerInfo::ActivationFunction::IDENTITY };
-    float              _a       = {};
-    float              _b       = {};
-    bool               _enabled = { false };
-};
-
-/** Fully connected layer info */
-struct FullyConnectedLayerInfo
-{
-    /* Fused-activation parameters */
-    ActivationLayerInfo activation_info{}; /**<  Fused activation to apply after the matrix multiplication. */
-    /* Information about weights */
-    DataLayout weights_trained_layout{ DataLayout::NCHW }; /**<  Layout that the weights have been trained with. */
-    bool       transpose_weights{ true };                  /**<  Transpose weights if true. */
-    bool       are_weights_reshaped{ false };              /**<  Reshape the weights tensor if false. */
-    bool       retain_internal_weights{ false };           /**<  Retain internal reshaped weights. */
-    bool       constant_weights{ true };                   /**<  If false, weights can vary between runs. */
-    /* Other parameters */
-    bool fp_mixed_precision{ false }; /**<  Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. */
-
-    /** Sets the weights trained data layout
-     *
-     * @param[in] layout Data layout that the weights were trained with
-     *
-     * @return Updated object
-     */
-    FullyConnectedLayerInfo &set_weights_trained_layout(DataLayout layout)
-    {
-        weights_trained_layout = layout;
-        return *this;
-    }
-    /** Sets the transpose weights flag
-     *
-     * @param[in] should_transpose_weights Boolean flag indicating if weights should be transposed
-     *
-     * @return Updated object
-     */
-    FullyConnectedLayerInfo &set_transpose_weights(bool should_transpose_weights)
-    {
-        transpose_weights = should_transpose_weights;
-        return *this;
-    }
+    float                _bbox_xform_clip;
 };
 
 /** Normalization Layer Information class */
@@ -1586,7 +1539,12 @@ public:
      * @param[in] is_scaled (Optional) Boolean that specifies if alpha will be scaled by the normalization size or not.
      *                      Should be false to follow [Krichevksy 2012].
      */
-    NormalizationLayerInfo(NormType type, uint32_t norm_size = 5, float alpha = 0.0001f, float beta = 0.5f, float kappa = 1.f, bool is_scaled = true)
+    NormalizationLayerInfo(NormType type,
+                           uint32_t norm_size = 5,
+                           float    alpha     = 0.0001f,
+                           float    beta      = 0.5f,
+                           float    kappa     = 1.f,
+                           bool     is_scaled = true)
         : _type(type), _norm_size(norm_size), _alpha(alpha), _beta(beta), _kappa(kappa), _is_scaled(is_scaled)
     {
     }
@@ -1690,13 +1648,36 @@ private:
     int32_t _shrink_axis_mask;
 };
 
+// OHWIo<interleave_by>i<block_by>
+inline int interleave_by(const WeightFormat wf)
+{
+    return (static_cast<int>(wf) >> 8) & 0xFFF;
+}
+inline int block_by(const WeightFormat wf)
+{
+    return (static_cast<int>(wf) >> 20) & 0xF;
+}
+inline bool is_fixed_format(const WeightFormat &wf)
+{
+    return wf != WeightFormat::UNSPECIFIED && wf != WeightFormat::ANY;
+}
+inline bool is_fixed_format_fast_math(const WeightFormat &wf)
+{
+    return (static_cast<int>(wf) >> 4) & 0x1;
+}
+
 /** Convolution Layer Weights Information class. This class stores the necessary information to compute convolution layer when the weights are already reshaped */
 class WeightsInfo
 {
 public:
     /** Default constructor */
     WeightsInfo()
-        : _are_reshaped(false), _kernel_width(0), _kernel_height(0), _num_kernels(0), _retain_internal_weights(false)
+        : _are_reshaped(false),
+          _kernel_width(0),
+          _kernel_height(0),
+          _num_kernels(0),
+          _retain_internal_weights(false),
+          _weight_format(arm_compute::WeightFormat::UNSPECIFIED)
     {
     }
     /** Constructor
@@ -1706,9 +1687,20 @@ public:
      * @param[in] kernel_height           Kernel height.
      * @param[in] num_kernels             Number of convolution kernels.
      * @param[in] retain_internal_weights (Optional) True if internal reshaped weights must be retained. Used for reconfiguration purposes. Default is false.
+     * @param[in] weight_format           (Optional) arm_gemm:WeightFormat enumeration requested by the user. Default is arm_compute::WeightFormat::UNSPECIFIED.
      */
-    WeightsInfo(bool are_reshaped, unsigned int kernel_width, unsigned int kernel_height, unsigned int num_kernels, bool retain_internal_weights = false)
-        : _are_reshaped(are_reshaped), _kernel_width(kernel_width), _kernel_height(kernel_height), _num_kernels(num_kernels), _retain_internal_weights(retain_internal_weights)
+    WeightsInfo(bool                      are_reshaped,
+                unsigned int              kernel_width,
+                unsigned int              kernel_height,
+                unsigned int              num_kernels,
+                bool                      retain_internal_weights = false,
+                arm_compute::WeightFormat weight_format           = arm_compute::WeightFormat::UNSPECIFIED)
+        : _are_reshaped(are_reshaped),
+          _kernel_width(kernel_width),
+          _kernel_height(kernel_height),
+          _num_kernels(num_kernels),
+          _retain_internal_weights(retain_internal_weights),
+          _weight_format(weight_format)
     {
     }
     /** Flag which specifies if the weights tensor has been reshaped.
@@ -1739,21 +1731,39 @@ public:
     {
         return _retain_internal_weights;
     }
+    arm_compute::WeightFormat weight_format() const
+    {
+        return _weight_format;
+    }
+    void set_weight_format(arm_compute::WeightFormat weight_format)
+    {
+        _weight_format = weight_format;
+    }
+
+    unsigned int kernel_width() const
+    {
+        return _kernel_width;
+    }
+    unsigned int kernel_height() const
+    {
+        return _kernel_height;
+    }
 
 private:
-    bool         _are_reshaped;
-    unsigned int _kernel_width;
-    unsigned int _kernel_height;
-    unsigned int _num_kernels;
-    bool         _retain_internal_weights;
+    bool                      _are_reshaped;
+    unsigned int              _kernel_width;
+    unsigned int              _kernel_height;
+    unsigned int              _num_kernels;
+    bool                      _retain_internal_weights;
+    arm_compute::WeightFormat _weight_format;
 };
 
 /** GEMM reshape information class. This class stores the necessary information about matrix A and matrix B reshape.
  *
- * The matrix A can only be reshaped through @ref opencl::kernels::ClGemmReshapeLhsMatrixKernel or  @ref NEGEMMInterleave4x4Kernel
+ * The matrix A can only be reshaped through @ref opencl::kernels::ClGemmReshapeLhsMatrixKernel or  @ref cpu::kernels::CpuGemmInterleave4x4Kernel
  * Note: Optionally just for @ref opencl::kernels::ClGemmReshapeLhsMatrixKernel is it possible to set mult_interleave4x4_height, the multiplication factor for the height of the 4x4 interleaved block
  *
- * The matrix B can only be reshaped through @ref opencl::kernels::ClGemmReshapeRhsMatrixKernel or  @ref NEGEMMTranspose1xWKernel
+ * The matrix B can only be reshaped through @ref opencl::kernels::ClGemmReshapeRhsMatrixKernel or  @ref cpu::kernels::CpuGemmTranspose1xWKernel
  * Note: Optionally just for @ref opencl::kernels::ClGemmReshapeRhsMatrixKernel is it possible to set mult_transpose1xW_width, the multiplication factor for the width of the 1xW transposed block
  *
  */
@@ -1762,7 +1772,14 @@ class GEMMReshapeInfo final
 public:
     /** Default constructor */
     GEMMReshapeInfo()
-        : _m(1), _n(1), _k(1), _mult_transpose1xW_width(1), _mult_interleave4x4_height(1), _depth_output_gemm3d(0), _reinterpret_input_as_3d(false), _broadcast_bias(false)
+        : _m(1),
+          _n(1),
+          _k(1),
+          _mult_transpose1xW_width(1),
+          _mult_interleave4x4_height(1),
+          _depth_output_gemm3d(0),
+          _reinterpret_input_as_3d(false),
+          _broadcast_bias(false)
     {
     }
     /** Constructor
@@ -1778,9 +1795,22 @@ public:
      *                                      to perform 1x1 convolutions with the NHWC data layout)
      * @param[in] broadcast_bias            (Optional) Broadcast the shape of the bias tensor from a vector to a matrix.
      */
-    GEMMReshapeInfo(int m, int n, int k, int mult_transpose1xW_width = 1, int mult_interleave4x4_height = 1, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool broadcast_bias = false)
-        : _m(m), _n(n), _k(k), _mult_transpose1xW_width(mult_transpose1xW_width), _mult_interleave4x4_height(mult_interleave4x4_height), _depth_output_gemm3d(depth_output_gemm3d),
-          _reinterpret_input_as_3d(reinterpret_input_as_3d), _broadcast_bias(broadcast_bias)
+    GEMMReshapeInfo(int  m,
+                    int  n,
+                    int  k,
+                    int  mult_transpose1xW_width   = 1,
+                    int  mult_interleave4x4_height = 1,
+                    int  depth_output_gemm3d       = 0,
+                    bool reinterpret_input_as_3d   = false,
+                    bool broadcast_bias            = false)
+        : _m(m),
+          _n(n),
+          _k(k),
+          _mult_transpose1xW_width(mult_transpose1xW_width),
+          _mult_interleave4x4_height(mult_interleave4x4_height),
+          _depth_output_gemm3d(depth_output_gemm3d),
+          _reinterpret_input_as_3d(reinterpret_input_as_3d),
+          _broadcast_bias(broadcast_bias)
     {
     }
     /** Number of matrix A rows
@@ -1862,44 +1892,6 @@ private:
     bool _broadcast_bias;
 };
 
-struct ConvolutionInfo
-{
-    ConvolutionInfo() = default;
-    ConvolutionInfo(const PadStrideInfo &pad_stride_info, unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
-        : pad_stride_info(pad_stride_info), depth_multiplier(depth_multiplier), act_info(act_info), dilation(dilation)
-    {
-    }
-    PadStrideInfo       pad_stride_info{};        /**< Convolution info (Pads, strides,...) */
-    unsigned int        depth_multiplier{ 1 };    /**< Multiplier to apply to input's depth to retrieve the output depth. Defaults to 1 */
-    ActivationLayerInfo act_info{};               /**< Fused activation to apply after convolution. */
-    Size2D              dilation{ Size2D(1, 1) }; /**< Dilation, in elements, across x and y. Defaults to (1, 1). */
-};
-
-/** GEMMLowp output stage type */
-enum class GEMMLowpOutputStageType
-{
-    NONE,                     /**< No quantization */
-    QUANTIZE_DOWN,            /**< Quantize using an integer multiplication */
-    QUANTIZE_DOWN_FIXEDPOINT, /**< Quantize using a fixed point multiplication */
-    QUANTIZE_DOWN_FLOAT       /**< Quantize using a floating point multiplication */
-};
-
-/** GEMMLowp output stage info */
-struct GEMMLowpOutputStageInfo
-{
-    GEMMLowpOutputStageType type{ GEMMLowpOutputStageType::NONE };                        /**< GEMMLowp output stage type */
-    int32_t                 gemmlowp_offset{ 0 };                                         /**< GEMMLowp output stage offset used for quantizing to QASYMM8 */
-    int32_t                 gemmlowp_multiplier{ 0 };                                     /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
-    int32_t                 gemmlowp_shift{ 0 };                                          /**< GEMMLowp output stage shift used for quantizing to uint8 */
-    int32_t                 gemmlowp_min_bound{ std::numeric_limits<int32_t>::lowest() }; /**< GEMMLowp min value used to saturate down the output result before converting back to QASYMM8 */
-    int32_t                 gemmlowp_max_bound{ std::numeric_limits<int32_t>::max() };    /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */
-    std::vector<int32_t>    gemmlowp_multipliers{};                                       /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
-    std::vector<int32_t>    gemmlowp_shifts{};                                            /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
-    float                   gemmlowp_real_multiplier{ 0 };                                /**< GEMMLowp output stage real multiplier used for quantizing to QASYMM8 */
-    bool                    is_quantized_per_channel{ false };                            /**< GEMMLowp quantized per-channel flag */
-    DataType                output_data_type{ DataType::UNKNOWN };                        /**< Output tensor data type to use if the output is not initialized */
-};
-
 /** GEMM LHS (Left Hand Side) matrix information */
 struct GEMMLHSMatrixInfo
 {
@@ -1908,11 +1900,11 @@ struct GEMMLHSMatrixInfo
         : m0(m), k0(k), v0(v), transpose(trans), interleave(inter)
     {
     }
-    unsigned int m0{ 1 };            /**< Number of rows processed by the matrix multiplication */
-    unsigned int k0{ 1 };            /**< Number of partial accumulations performed by the matrix multiplication */
-    unsigned int v0{ 1 };            /**< Number of vertical blocks of size (m0xk0) stored on the same output row */
-    bool         transpose{ true };  /**< True if the (m0xk0) block has to be transposed before been stored */
-    bool         interleave{ true }; /**< True if the v0 (m0xk0) blocks have to be interleaved in the output row */
+    unsigned int m0{1};            /**< Number of rows processed by the matrix multiplication */
+    unsigned int k0{1};            /**< Number of partial accumulations performed by the matrix multiplication */
+    unsigned int v0{1};            /**< Number of vertical blocks of size (m0xk0) stored on the same output row */
+    bool         transpose{true};  /**< True if the (m0xk0) block has to be transposed before been stored */
+    bool         interleave{true}; /**< True if the v0 (m0xk0) blocks have to be interleaved in the output row */
 };
 
 /** GEMM RHS (Right Hand Side) matrix information */
@@ -1923,208 +1915,16 @@ struct GEMMRHSMatrixInfo
         : n0(n), k0(k), h0(h), transpose(trans), interleave(inter), export_to_cl_image(export_to_cl_img)
     {
     }
-    unsigned int n0{ 1 };                     /**< Number of columns processed by the matrix multiplication */
-    unsigned int k0{ 1 };                     /**< Number of partial accumulations performed by the matrix multiplication */
-    unsigned int h0{ 1 };                     /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
-    bool         transpose{ true };           /**< True if the (k0xn0) block has to be transposed before been stored */
-    bool         interleave{ true };          /**< True if the h0 (k0xn0) blocks have to be interleaved in the output row */
-    bool         export_to_cl_image{ false }; /**< True if the reshaped rhs has to be exported to cl_image. n0 must be equal to 4 */
+    unsigned int n0{1};            /**< Number of columns processed by the matrix multiplication */
+    unsigned int k0{1};            /**< Number of partial accumulations performed by the matrix multiplication */
+    unsigned int h0{1};            /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
+    bool         transpose{true};  /**< True if the (k0xn0) block has to be transposed before been stored */
+    bool         interleave{true}; /**< True if the h0 (k0xn0) blocks have to be interleaved in the output row */
+    bool         export_to_cl_image{
+        false}; /**< True if the reshaped rhs has to be exported to cl_image. n0 must be equal to 4 */
 };
 
-/** GEMM information class. This class stores the necessary information to compute GEMM functions
- *
- * This object also contains the information about how matrix A and matrix B have been reshaped
- *
- */
-class GEMMInfo
-{
-public:
-    /** Default constructor */
-    GEMMInfo() noexcept
-        : _is_a_reshaped(false),
-          _is_b_reshaped(false),
-          _reshape_b_only_on_first_run(true),
-          _depth_output_gemm3d(0),
-          _reinterpret_input_as_3d(false),
-          _retain_internal_weights(false),
-          _gemmlowp_output_stage(),
-          _fp_mixed_precision(false),
-          _broadcast_bias(false),
-          _pretranpose_B(true),
-          _activation_info(),
-          _constant_weights(true)
-    {
-    }
-    /** Constructor
-     *
-     * @param[in] is_a_reshaped               True if the matrix A has been reshaped
-     * @param[in] is_b_reshaped               True if the matrix B has been reshaped
-     * @param[in] reshape_b_only_on_first_run Reshape matrix B only for the first run
-     * @param[in] depth_output_gemm3d         (Optional) Depth (third dimension) of the output tensor to be used with the GEMM3D kernel
-     *                                        If 0 the output will not be reinterpreted as 3D. Default 0
-     * @param[in] reinterpret_input_as_3d     (Optional) Reinterpret the input as 3D tensor. (i.e. this flag should be set to true when GEMM is used
-     *                                        to perform 1x1 convolutions with the NHWC data layout)
-     * @param[in] retain_internal_weights     (Optional) Retain the weights tensor from previous run
-     * @param[in] gemmlowp_output_stage       (Optional) GEMMLowp Output stage info
-     * @param[in] fp_mixed_precision          (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
-     * @param[in] broadcast_bias              (Optional) Broadcast the shape of the bias tensor from a vector to a matrix.
-     * @param[in] activation_info             (Optional) Activation to apply after the matrix multiplication
-     * @param[in] constant_weights            (Optional) Weights have constant values throughout multiple executions
-     */
-    GEMMInfo(bool is_a_reshaped, bool is_b_reshaped, bool reshape_b_only_on_first_run, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool retain_internal_weights = false,
-             GEMMLowpOutputStageInfo gemmlowp_output_stage = GEMMLowpOutputStageInfo(), bool fp_mixed_precision = false, bool broadcast_bias = false,
-             const ActivationLayerInfo &activation_info = ActivationLayerInfo(), bool constant_weights = true) noexcept
-        : _is_a_reshaped(is_a_reshaped),
-          _is_b_reshaped(is_b_reshaped),
-          _reshape_b_only_on_first_run(reshape_b_only_on_first_run),
-          _depth_output_gemm3d(depth_output_gemm3d),
-          _reinterpret_input_as_3d(reinterpret_input_as_3d),
-          _retain_internal_weights(retain_internal_weights),
-          _gemmlowp_output_stage(gemmlowp_output_stage),
-          _fp_mixed_precision(fp_mixed_precision),
-          _broadcast_bias(broadcast_bias),
-          _pretranpose_B(reshape_b_only_on_first_run),
-          _activation_info(activation_info),
-          _constant_weights(constant_weights)
-    {
-    }
-    /** Flag which specifies if the matrix A has been reshaped
-     *
-     * @return True if the matrix A has been reshaped
-     */
-    bool is_a_reshaped() const
-    {
-        return _is_a_reshaped;
-    };
-    /** Flag which specifies if the matrix B has been reshaped
-     *
-     * @return True if the matrix B has been reshaped
-     */
-    bool is_b_reshaped() const
-    {
-        return _is_b_reshaped;
-    };
-    /** Flag which specifies if the reshape of matrix B should executed only for the first
-     *
-     * @note This flag could be set to TRUE when GEMM is used to accelerate convolution layer
-     *
-     * @return True if the reshaped of matrix B happens only for the first run
-     */
-    bool reshape_b_only_on_first_run() const
-    {
-        return _reshape_b_only_on_first_run;
-    };
-    /** Depth of the output when GEMM output is reinterpreted as 3D tensor
-     *
-     * @return the depth of the output tensor
-     */
-    int depth_output_gemm3d() const
-    {
-        return _depth_output_gemm3d;
-    };
-    /** Flag which specifies if the input tensor has to be reinterpreted as 3D
-     *
-     * @return True if the input tensor has to be reinterpreted as 3D tensor
-     */
-    bool reinterpret_input_as_3d() const
-    {
-        return _reinterpret_input_as_3d;
-    };
-    /** Flag which specifies if the weights tensor has to be retained from previous run
-     *
-     * @return True if the weights tensor has to be retained
-     */
-    bool retain_internal_weights() const
-    {
-        return _retain_internal_weights;
-    };
-    /** GEMMLowp output stage
-     *
-     * @return the GEMMLowp output stage info
-     */
-    GEMMLowpOutputStageInfo gemmlowp_output_stage() const
-    {
-        return _gemmlowp_output_stage;
-    };
-    /** Sets GEMMLowp output stage
-     *
-     * @param[in] output_stage Output stage to set
-     */
-    void set_gemmlowp_output_stage(GEMMLowpOutputStageInfo &output_stage)
-    {
-        _gemmlowp_output_stage = output_stage;
-    };
-    /** Flag which specifies if a wider accumulator should be used.
-     *
-     * @return True if a wider accumulator has to be used
-     */
-    bool fp_mixed_precision() const
-    {
-        return _fp_mixed_precision;
-    };
-    /** Flag which specifies whether to broadcast the shape of the bias tensor.
-     *
-     * @return True if the shape of the bias tensor is to be broadcasted.
-     */
-    bool broadcast_bias() const
-    {
-        return _broadcast_bias;
-    };
-    /** Flag which specifies whether b should be pre-transposed if supported.
-     *
-     * @return True if b should be pre-transposed else false.
-     */
-    bool pretranpose_B() const
-    {
-        return _pretranpose_B;
-    };
-    /** Set pre-transpose b flag
-     *
-     * @param[in] flag Flag to set
-     */
-    void set_pretranpose_B(bool flag)
-    {
-        _pretranpose_B = flag;
-    }
-    /** Activation layer to apply after the matrix multiplication
-     *
-     * @return ActivationLayerInfo object
-     */
-    ActivationLayerInfo activation_info() const
-    {
-        return _activation_info;
-    }
-    /** Set activation layer info
-     *
-     * @param[in] activation_info ActivationLayerInfo object to set
-     */
-    void set_activation_info(const ActivationLayerInfo &activation_info)
-    {
-        _activation_info = activation_info;
-    }
-    /** Flag which specifies if the values of the weights tensor are constant throughout multiple executions or not
-     *
-     * @return True if the weights tensor is constant
-     */
-    bool constant_weights() const
-    {
-        return _constant_weights;
-    };
-
-private:
-    bool                    _is_a_reshaped;
-    bool                    _is_b_reshaped;
-    bool                    _reshape_b_only_on_first_run;
-    int                     _depth_output_gemm3d;
-    bool                    _reinterpret_input_as_3d;
-    bool                    _retain_internal_weights;
-    GEMMLowpOutputStageInfo _gemmlowp_output_stage;
-    bool                    _fp_mixed_precision;
-    bool                    _broadcast_bias;
-    bool                    _pretranpose_B;
-    ActivationLayerInfo     _activation_info;
-    bool                    _constant_weights;
-};
+class ITensorInfo;
 
 /** Winograd information */
 struct WinogradInfo
@@ -2137,16 +1937,23 @@ struct WinogradInfo
      * @param[in] conv_info      Convolution info (Pads, strides)
      * @param[in] data_layout    Data layout to use for the output tensor once the convolution has been applied
      */
-    WinogradInfo(Size2D output_tile_sz, Size2D kernel_sz, Size2D input_dims, PadStrideInfo conv_info, DataLayout data_layout)
-        : output_tile_size(output_tile_sz), kernel_size(kernel_sz), input_dimensions(input_dims), convolution_info(conv_info), output_data_layout(data_layout)
-    {
-    }
-
-    Size2D        output_tile_size{};                     /**< Width and height of the output tile */
-    Size2D        kernel_size{};                          /**< Width and height of the kernel*/
-    Size2D        input_dimensions{};                     /**< Width and height of the input tensor before the convolution is applied */
-    PadStrideInfo convolution_info{};                     /**< Convolution info (Pads, strides,...) */
-    DataLayout    output_data_layout{ DataLayout::NCHW }; /**< Data layout to use for the output tensor once the convolution has been applied (NCHW or NHWC) */
+    WinogradInfo(
+        Size2D output_tile_sz, Size2D kernel_sz, Size2D input_dims, PadStrideInfo conv_info, DataLayout data_layout)
+        : output_tile_size(output_tile_sz),
+          kernel_size(kernel_sz),
+          input_dimensions(input_dims),
+          convolution_info(conv_info),
+          output_data_layout(data_layout)
+    {
+    }
+
+    Size2D        output_tile_size{}; /**< Width and height of the output tile */
+    Size2D        kernel_size{};      /**< Width and height of the kernel*/
+    Size2D        input_dimensions{}; /**< Width and height of the input tensor before the convolution is applied */
+    PadStrideInfo convolution_info{}; /**< Convolution info (Pads, strides,...) */
+    DataLayout    output_data_layout{
+        DataLayout::
+            NCHW}; /**< Data layout to use for the output tensor once the convolution has been applied (NCHW or NHWC) */
 };
 
 /** IO formatting information class*/
@@ -2205,5 +2012,8 @@ struct IOFormatInfo
     /** Align columns */
     bool align_columns;
 };
+
+/** Class for holding information related to cropping */
+using CropInfo = Padding2D;
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TYPES_H */
+#endif // ACL_ARM_COMPUTE_CORE_TYPES_H