41 files changed, 503 insertions, 460 deletions
diff --git a/arm_compute/BUILD.bazel b/arm_compute/BUILD.bazel
index 641a327d51..d1219015b7 100644
--- a/arm_compute/BUILD.bazel
+++ b/arm_compute/BUILD.bazel
@@ -28,6 +28,7 @@ cc_library(
         "*.h",
         "*.hpp",
         "dynamic_fusion/**/*.h",
+        "function_info/**/*.h",
     ]),
     visibility = ["//visibility:public"],
 )
diff --git a/arm_compute/core/CoreTypes.h b/arm_compute/core/CoreTypes.h
new file mode 100644
index 0000000000..4a48a36651
--- /dev/null
+++ b/arm_compute/core/CoreTypes.h
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_ARM_COMPUTE_CORE_CORETYPES
+#define ACL_ARM_COMPUTE_CORE_CORETYPES
+
+#include "arm_compute/core/Strides.h"
+#include "support/Half.h"
+
+/** CoreTypes.h groups together essential small types that are used across functions */
+
+namespace arm_compute
+{
+/** 16-bit floating point type */
+using half = half_float::half;
+/** Permutation vector */
+using PermutationVector = Strides;
+
+/** Available channels */
+enum class Channel
+{
+    UNKNOWN, /** Unknown channel format */
+    C0,      /**< First channel (used by formats with unknown channel types). */
+    C1,      /**< Second channel (used by formats with unknown channel types). */
+    C2,      /**< Third channel (used by formats with unknown channel types). */
+    C3,      /**< Fourth channel (used by formats with unknown channel types). */
+    R,       /**< Red channel. */
+    G,       /**< Green channel. */
+    B,       /**< Blue channel. */
+    A,       /**< Alpha channel. */
+    Y,       /**< Luma channel. */
+    U,       /**< Cb/U channel. */
+    V        /**< Cr/V/Value channel. */
+};
+
+/** Image colour formats */
+enum class Format
+{
+    UNKNOWN,  /**< Unknown image format */
+    U8,       /**< 1 channel, 1 U8 per channel */
+    S16,      /**< 1 channel, 1 S16 per channel */
+    U16,      /**< 1 channel, 1 U16 per channel */
+    S32,      /**< 1 channel, 1 S32 per channel */
+    U32,      /**< 1 channel, 1 U32 per channel */
+    S64,      /**< 1 channel, 1 S64 per channel */
+    U64,      /**< 1 channel, 1 U64 per channel */
+    BFLOAT16, /**< 16-bit brain floating-point number */
+    F16,      /**< 1 channel, 1 F16 per channel */
+    F32,      /**< 1 channel, 1 F32 per channel */
+    UV88,     /**< 2 channel, 1 U8 per channel */
+    RGB888,   /**< 3 channels, 1 U8 per channel */
+    RGBA8888, /**< 4 channels, 1 U8 per channel */
+    YUV444,   /**< A 3 plane of 8 bit 4:4:4 sampled Y, U, V planes */
+    YUYV422,  /**< A single plane of 32-bit macro pixel of Y0, U0, Y1, V0 bytes */
+    NV12,     /**< A 2 plane YUV format of Luma (Y) and interleaved UV data at 4:2:0 sampling */
+    NV21,     /**< A 2 plane YUV format of Luma (Y) and interleaved VU data at 4:2:0 sampling */
+    IYUV,     /**< A 3 plane of 8-bit 4:2:0 sampled Y, U, V planes */
+    UYVY422   /**< A single plane of 32-bit macro pixel of U0, Y0, V0, Y1 byte */
+};
+
+/** Available data types */
+enum class DataType
+{
+    UNKNOWN,            /**< Unknown data type */
+    U8,                 /**< unsigned 8-bit number */
+    S8,                 /**< signed 8-bit number */
+    QSYMM8,             /**< quantized, symmetric fixed-point 8-bit number */
+    QASYMM8,            /**< quantized, asymmetric fixed-point 8-bit number unsigned */
+    QASYMM8_SIGNED,     /**< quantized, asymmetric fixed-point 8-bit number signed */
+    QSYMM8_PER_CHANNEL, /**< quantized, symmetric per channel fixed-point 8-bit number */
+    U16,                /**< unsigned 16-bit number */
+    S16,                /**< signed 16-bit number */
+    QSYMM16,            /**< quantized, symmetric fixed-point 16-bit number */
+    QASYMM16,           /**< quantized, asymmetric fixed-point 16-bit number */
+    U32,                /**< unsigned 32-bit number */
+    S32,                /**< signed 32-bit number */
+    U64,                /**< unsigned 64-bit number */
+    S64,                /**< signed 64-bit number */
+    BFLOAT16,           /**< 16-bit brain floating-point number */
+    F16,                /**< 16-bit floating-point number */
+    F32,                /**< 32-bit floating-point number */
+    F64,                /**< 64-bit floating-point number */
+    SIZET               /**< size_t */
+};
+
+/** [DataLayout enum definition] **/
+
+/** Supported tensor data layouts */
+enum class DataLayout
+{
+    UNKNOWN, /**< Unknown data layout */
+    NCHW,    /**< Num samples, channels, height, width */
+    NHWC,    /**< Num samples, height, width, channels */
+    NCDHW,   /**< Num samples, channels, depth, height, width */
+    NDHWC    /**< Num samples, depth, height, width, channels */
+};
+/** [DataLayout enum definition] **/
+
+/** Supported tensor data layout dimensions */
+enum class DataLayoutDimension
+{
+    CHANNEL, /**< channel */
+    HEIGHT,  /**< height */
+    WIDTH,   /**< width */
+    DEPTH,   /**< depth */
+    BATCHES  /**< batches */
+};
+
+/** Dimension rounding type when down-scaling on CNNs
+ * @note Used in pooling and convolution layer
+ */
+enum class DimensionRoundingType
+{
+    FLOOR, /**< Floor rounding */
+    CEIL   /**< Ceil rounding */
+};
+
+class PadStrideInfo
+{
+public:
+    /** Constructor
+     *
+     * @param[in] stride_x (Optional) Stride, in elements, across x. Defaults to 1.
+     * @param[in] stride_y (Optional) Stride, in elements, across y. Defaults to 1.
+     * @param[in] pad_x    (Optional) Padding, in elements, across x. Defaults to 0.
+     * @param[in] pad_y    (Optional) Padding, in elements, across y. Defaults to 0.
+     * @param[in] round    (Optional) Dimensions rounding. Defaults to @ref DimensionRoundingType::FLOOR.
+     */
+    PadStrideInfo(unsigned int stride_x = 1, unsigned int stride_y = 1,
+                  unsigned int pad_x = 0, unsigned int pad_y = 0,
+                  DimensionRoundingType round = DimensionRoundingType::FLOOR)
+        : _stride(std::make_pair(stride_x, stride_y)),
+          _pad_left(pad_x),
+          _pad_top(pad_y),
+          _pad_right(pad_x),
+          _pad_bottom(pad_y),
+          _round_type(round)
+    {
+    }
+    /** Constructor
+     *
+     * @param[in] stride_x   Stride, in elements, across x.
+     * @param[in] stride_y   Stride, in elements, across y.
+     * @param[in] pad_left   Padding across x on the left, in elements.
+     * @param[in] pad_right  Padding across x on the right, in elements.
+     * @param[in] pad_top    Padding across y on the top, in elements.
+     * @param[in] pad_bottom Padding across y on the bottom, in elements.
+     * @param[in] round      Dimensions rounding.
+     */
+    PadStrideInfo(unsigned int stride_x, unsigned int stride_y,
+                  unsigned int pad_left, unsigned int pad_right,
+                  unsigned int pad_top, unsigned int pad_bottom,
+                  DimensionRoundingType round)
+        : _stride(std::make_pair(stride_x, stride_y)),
+          _pad_left(pad_left),
+          _pad_top(pad_top),
+          _pad_right(pad_right),
+          _pad_bottom(pad_bottom),
+          _round_type(round)
+    {
+    }
+    /** Get the stride.
+     *
+     * @return a pair: stride x, stride y.
+     */
+    std::pair<unsigned int, unsigned int> stride() const
+    {
+        return _stride;
+    }
+    /** Check whether the padding is symmetric.
+     *
+     * @return True if the padding is symmetric.
+     */
+    bool padding_is_symmetric() const
+    {
+        return (_pad_left == _pad_right) && (_pad_top == _pad_bottom);
+    }
+    /** Get the padding.
+     *
+     * @note This should only be used when the padding is symmetric.
+     *
+     * @return a pair: padding left/right, padding top/bottom
+     */
+    std::pair<unsigned int, unsigned int> pad() const
+    {
+        //this accessor should be used only when padding is symmetric
+        ARM_COMPUTE_ERROR_ON(!padding_is_symmetric());
+        return std::make_pair(_pad_left, _pad_top);
+    }
+
+    /** Get the left padding */
+    unsigned int pad_left() const
+    {
+        return _pad_left;
+    }
+    /** Get the right padding */
+    unsigned int pad_right() const
+    {
+        return _pad_right;
+    }
+    /** Get the top padding */
+    unsigned int pad_top() const
+    {
+        return _pad_top;
+    }
+    /** Get the bottom padding */
+    unsigned int pad_bottom() const
+    {
+        return _pad_bottom;
+    }
+
+    /** Get the rounding type */
+    DimensionRoundingType round() const
+    {
+        return _round_type;
+    }
+
+    /** Check whether this has any padding */
+    bool has_padding() const
+    {
+        return (_pad_left != 0 || _pad_top != 0 || _pad_right != 0 || _pad_bottom != 0);
+    }
+
+private:
+    std::pair<unsigned int, unsigned int> _stride;
+    unsigned int _pad_left;
+    unsigned int _pad_top;
+    unsigned int _pad_right;
+    unsigned int _pad_bottom;
+
+    DimensionRoundingType _round_type;
+};
+
+/** Memory layouts for the weights tensor.
+ *
+ * * UNSPECIFIED is used to select kernels that do not run in
+ *    variable weights mode.
+ *
+ * * ANY is used to query the kernel database to retrieve any of the
+ *   kernels that runs in variable weights mode. Once a kernel is
+ *   found, the specific format expected by the kernel can be
+ *   retrieved by the user for reordering the weights tensor
+ *   accordingly.
+ *
+ * The other values OHWIo{interleave_by}i{block_by} describe the
+ * memory layout of a 4D tensor with layout OHWI that has been
+ * transformed into a 4D tensor with dimensions O'HWI' where:
+ *
+ * O' = first multiple of {interleave_by} s.t. O<=O'
+ * I' = first multiple of {block_by} s.t. I<=I'
+ *
+ * The total size of the dst tensor is O' x H x W x I'
+ *
+ * The access function of the tensor with layout
+ * OHWIo{interleave_by}i{block_by} and size O'HWI' is a 6-parameter
+ * access function, where the 6 parameters are computed as follows:
+ *
+ * x5 = floor(o/{interleave_by}) RANGE [0, O'/{interleave_by} -1] SIZE: O'/{interleave_by}
+ *
+ * x4 = h                        RANGE [0, H-1]                   SIZE: H
+ * x3 = w                        RANGE [0, W-1]                   SIZE: W
+ * x2 = floor(i/{block_by})      RANGE [0, I'/{block_by} -1]      SIZE: I'/{block_by}
+ * x1 = o%{interleave_by}        RANGE [0, {interleave_by} -1]    SIZE: {interleave_by}
+ * x0 = i%{block_by}             RANGE [0, {block_by} -1]         SIZE: {block_by}
+ *                                                          TOTAL SIZE: O' * H * W * I'
+ *
+ *        4D                       6D
+ * -----------------   -----------------------------------
+ * value(o, h, w, i) =   x5 * H * W * I' * {interleave_by}
+ *                     + x4 * W * I' * {interleave_by}
+ *                     + x3 * I' * {interleave_by}
+ *                     + x2 * {interleave_by} * {block_by}
+ *                     + x1 * {block_by}
+ *                     + x0
+ *
+ * Notice that in arm_gemm the 4D tensor of dimension O'HWI' created
+ * for the OHWIo{interleave_by}i{block_by} format is in reality seen
+ * as a 2D tensor, where the number of rows is O'/{interleave_by}
+ * and the number of columns is {interleave_by} * H * W * I'.
+ *
+ * The postfix *_bf16 is for the memory layout needed for the
+ * fast-mode kernels, in which the weights are passed in bfloat16
+ * format.
+ */
+enum class WeightFormat
+{
+    UNSPECIFIED    = 0x1,
+    ANY            = 0x2,
+    OHWI           = 0x100100,
+    OHWIo2         = 0x100200,
+    OHWIo4         = 0x100400,
+    OHWIo8         = 0x100800,
+    OHWIo16        = 0x101000,
+    OHWIo32        = 0x102000,
+    OHWIo64        = 0x104000,
+    OHWIo128       = 0x108000,
+    OHWIo4i2       = 0x200400,
+    OHWIo4i2_bf16  = 0x200410,
+    OHWIo8i2       = 0x200800,
+    OHWIo8i2_bf16  = 0x200810,
+    OHWIo16i2      = 0x201000,
+    OHWIo16i2_bf16 = 0x201010,
+    OHWIo32i2      = 0x202000,
+    OHWIo32i2_bf16 = 0x202010,
+    OHWIo64i2      = 0x204000,
+    OHWIo64i2_bf16 = 0x204010,
+    OHWIo4i4       = 0x400400,
+    OHWIo4i4_bf16  = 0x400410,
+    OHWIo8i4       = 0x400800,
+    OHWIo8i4_bf16  = 0x400810,
+    OHWIo16i4      = 0x401000,
+    OHWIo16i4_bf16 = 0x401010,
+    OHWIo32i4      = 0x402000,
+    OHWIo32i4_bf16 = 0x402010,
+    OHWIo64i4      = 0x404000,
+    OHWIo64i4_bf16 = 0x404010,
+    OHWIo2i8       = 0x800200,
+    OHWIo4i8       = 0x800400,
+    OHWIo8i8       = 0x800800,
+    OHWIo16i8      = 0x801000,
+    OHWIo32i8      = 0x802000,
+    OHWIo64i8      = 0x804000
+};
+
+} // namespace arm_compute
+#endif /* ACL_ARM_COMPUTE_CORE_CORETYPES */
diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h
index 1ce37d31c1..305766e825 100644
--- a/arm_compute/core/KernelDescriptors.h
+++ b/arm_compute/core/KernelDescriptors.h
@@ -26,8 +26,8 @@
 
 #include "arm_compute/core/PixelValue.h"
 #include "arm_compute/core/Types.h"
-#include "arm_compute/core/ActivationLayerInfo.h"
 #include "arm_compute/core/experimental/IPostOp.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 
 namespace arm_compute
 {
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index a69177ed80..12d860205e 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -21,18 +21,53 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TYPES_H
-#define ARM_COMPUTE_TYPES_H
+#ifndef ACL_ARM_COMPUTE_CORE_TYPES
+#define ACL_ARM_COMPUTE_CORE_TYPES
+
+/** The following symbols have been moved to:
+ * half
+ * PermutationVector
+ * Format
+ * DataType
+ * DataLayout
+ * DataLayoutDimension
+ * PadStrideInfo
+ * WeightFormat
+ * Channel
+ * DimensionRoundingType
+ */
+#include "arm_compute/core/CoreTypes.h"
+/** The following symbols have been moved to:
+ * ActivationFunction
+ * ActivationLayerInfo
+ */
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+/** The following symbols have been moved to:
+ * ConvolutionInfo
+ */
+#include "arm_compute/function_info/ConvolutionInfo.h"
+/** The following symbols have been moved to:
+ * FullyConnectedLayerInfo
+ */
+#include "arm_compute/function_info/FullyConnectedLayerInfo.h"
+/** The following symbols have been moved to:
+ * GEMMLowpOutputStageType
+ * GEMMLowpOutputStageInfo
+ * GEMMInfo
+ */
+#include "arm_compute/function_info/GEMMInfo.h"
+/** The following symbols have been moved to:
+ * MatMulInfo
+ */
+#include "arm_compute/function_info/MatMulInfo.h"
 
 #include "arm_compute/core/Coordinates.h"
 #include "arm_compute/core/Size2D.h"
 #include "arm_compute/core/Size3D.h"
-#include "arm_compute/core/Strides.h"
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/experimental/IPostOp.h"
 #include "arm_compute/core/utils/misc/Macros.h"
 #include "support/Bfloat16.h"
-#include "support/Half.h"
 
 #include <cmath>
 #include <cstddef>
@@ -43,85 +78,9 @@
 
 namespace arm_compute
 {
-/** 16-bit floating point type */
-using half = half_float::half;
-
-/** Permutation vector */
-using PermutationVector = Strides;
 /** Bidirectional strides */
 using BiStrides = Coordinates;
 
-/** Available activation functions */
-enum class ActivationFunction
-{
-    LOGISTIC,        /**< Logistic ( \f$ f(x) = \frac{1}{1 + e^{-x}} \f$ ) */
-    TANH,            /**< Hyperbolic tangent ( \f$ f(x) = a \cdot tanh(b \cdot x) \f$ ) */
-    RELU,            /**< Rectifier ( \f$ f(x) = max(0,x) \f$ ) */
-    BOUNDED_RELU,    /**< Upper Bounded Rectifier ( \f$ f(x) = min(a, max(0,x)) \f$ ) */
-    LU_BOUNDED_RELU, /**< Lower and Upper Bounded Rectifier ( \f$ f(x) = min(a, max(b,x)) \f$ ) */
-    LEAKY_RELU,      /**< Leaky Rectifier ( \f$ f(x) = \begin{cases}  \alpha x & \quad \text{if } x \text{ < 0}\\  x & \quad \text{if } x \geq \text{ 0 } \end{cases} \f$ ) */
-    SOFT_RELU,       /**< Soft Rectifier ( \f$ f(x)= log(1+e^x) \f$ ) */
-    ELU,             /**< Exponential Linear Unit ( \f$ f(x) = \begin{cases}  \alpha (exp(x) - 1) & \quad \text{if } x \text{ < 0}\\  x & \quad \text{if } x \geq \text{ 0 } \end{cases} \f$ ) */
-    ABS,             /**< Absolute ( \f$ f(x)= |x| \f$ ) */
-    SQUARE,          /**< Square ( \f$ f(x)= x^2 \f$ )*/
-    SQRT,            /**< Square root ( \f$ f(x) = \sqrt{x} \f$ )*/
-    LINEAR,          /**< Linear ( \f$ f(x)= ax + b \f$ ) */
-    IDENTITY,        /**< Identity ( \f$ f(x)= x \f$ ) */
-    HARD_SWISH,      /**< Hard-swish ( \f$ f(x) = (x \text{ReLU6}(x+3))/6 = x \min(\max(0,x+3),6)/6 \f$ ) */
-    SWISH,           /**< Swish ( \f$ f(x) = \frac{x}{1 + e^{-ax}} = x \text{logistic}(ax) \f$ ) */
-    GELU             /**< GELU ( \f$ f(x) = x * 1/2 * 1 + erf(x / \sqrt{2}) \f$ ) */
-};
-
-/** Image colour formats */
-enum class Format
-{
-    UNKNOWN,  /**< Unknown image format */
-    U8,       /**< 1 channel, 1 U8 per channel */
-    S16,      /**< 1 channel, 1 S16 per channel */
-    U16,      /**< 1 channel, 1 U16 per channel */
-    S32,      /**< 1 channel, 1 S32 per channel */
-    U32,      /**< 1 channel, 1 U32 per channel */
-    S64,      /**< 1 channel, 1 S64 per channel */
-    U64,      /**< 1 channel, 1 U64 per channel */
-    BFLOAT16, /**< 16-bit brain floating-point number */
-    F16,      /**< 1 channel, 1 F16 per channel */
-    F32,      /**< 1 channel, 1 F32 per channel */
-    UV88,     /**< 2 channel, 1 U8 per channel */
-    RGB888,   /**< 3 channels, 1 U8 per channel */
-    RGBA8888, /**< 4 channels, 1 U8 per channel */
-    YUV444,   /**< A 3 plane of 8 bit 4:4:4 sampled Y, U, V planes */
-    YUYV422,  /**< A single plane of 32-bit macro pixel of Y0, U0, Y1, V0 bytes */
-    NV12,     /**< A 2 plane YUV format of Luma (Y) and interleaved UV data at 4:2:0 sampling */
-    NV21,     /**< A 2 plane YUV format of Luma (Y) and interleaved VU data at 4:2:0 sampling */
-    IYUV,     /**< A 3 plane of 8-bit 4:2:0 sampled Y, U, V planes */
-    UYVY422   /**< A single plane of 32-bit macro pixel of U0, Y0, V0, Y1 byte */
-};
-
-/** Available data types */
-enum class DataType
-{
-    UNKNOWN,            /**< Unknown data type */
-    U8,                 /**< unsigned 8-bit number */
-    S8,                 /**< signed 8-bit number */
-    QSYMM8,             /**< quantized, symmetric fixed-point 8-bit number */
-    QASYMM8,            /**< quantized, asymmetric fixed-point 8-bit number unsigned */
-    QASYMM8_SIGNED,     /**< quantized, asymmetric fixed-point 8-bit number signed */
-    QSYMM8_PER_CHANNEL, /**< quantized, symmetric per channel fixed-point 8-bit number */
-    U16,                /**< unsigned 16-bit number */
-    S16,                /**< signed 16-bit number */
-    QSYMM16,            /**< quantized, symmetric fixed-point 16-bit number */
-    QASYMM16,           /**< quantized, asymmetric fixed-point 16-bit number */
-    U32,                /**< unsigned 32-bit number */
-    S32,                /**< signed 32-bit number */
-    U64,                /**< unsigned 64-bit number */
-    S64,                /**< signed 64-bit number */
-    BFLOAT16,           /**< 16-bit brain floating-point number */
-    F16,                /**< 16-bit floating-point number */
-    F32,                /**< 32-bit floating-point number */
-    F64,                /**< 64-bit floating-point number */
-    SIZET               /**< size_t */
-};
-
 /** Available Sampling Policies */
 enum class SamplingPolicy
 {
@@ -129,29 +88,6 @@ enum class SamplingPolicy
     TOP_LEFT /**< Samples are taken at pixel top left corner */
 };
 
-/** [DataLayout enum definition] **/
-
-/** Supported tensor data layouts */
-enum class DataLayout
-{
-    UNKNOWN, /**< Unknown data layout */
-    NCHW,    /**< Num samples, channels, height, width */
-    NHWC,    /**< Num samples, height, width, channels */
-    NCDHW,   /**< Num samples, channels, depth, height, width */
-    NDHWC    /**< Num samples, depth, height, width, channels */
-};
-/** [DataLayout enum definition] **/
-
-/** Supported tensor data layout dimensions */
-enum class DataLayoutDimension
-{
-    CHANNEL, /**< channel */
-    HEIGHT,  /**< height */
-    WIDTH,   /**< width */
-    DEPTH,   /**< depth */
-    BATCHES  /**< batches */
-};
-
 /** Available ConvolutionMethod*/
 enum class ConvolutionMethod
 {
@@ -479,23 +415,6 @@ using PaddingList = std::vector<PaddingInfo>;
 /** Information to produce a tiled version of a Tensor */
 using Multiples = std::vector<uint32_t>;
 
-/** Available channels */
-enum class Channel
-{
-    UNKNOWN, /** Unknown channel format */
-    C0,      /**< First channel (used by formats with unknown channel types). */
-    C1,      /**< Second channel (used by formats with unknown channel types). */
-    C2,      /**< Third channel (used by formats with unknown channel types). */
-    C3,      /**< Fourth channel (used by formats with unknown channel types). */
-    R,       /**< Red channel. */
-    G,       /**< Green channel. */
-    B,       /**< Blue channel. */
-    A,       /**< Alpha channel. */
-    Y,       /**< Luma channel. */
-    U,       /**< Cb/U channel. */
-    V        /**< Cr/V/Value channel. */
-};
-
 /** Available reduction operations */
 enum class ReductionOperation
 {
@@ -568,15 +487,6 @@ struct DetectionWindow
     float    score{ 0.f };   /**< Confidence value for the detection window */
 };
 
-/** Dimension rounding type when down-scaling on CNNs
- * @note Used in pooling and convolution layer
- */
-enum class DimensionRoundingType
-{
-    FLOOR, /**< Floor rounding */
-    CEIL   /**< Ceil rounding */
-};
-
 /** Available pooling types */
 enum class PoolingType
 {
@@ -690,122 +600,6 @@ private:
 };
 
 /** Padding and stride information class */
-class PadStrideInfo
-{
-public:
-    /** Constructor
-     *
-     * @param[in] stride_x (Optional) Stride, in elements, across x. Defaults to 1.
-     * @param[in] stride_y (Optional) Stride, in elements, across y. Defaults to 1.
-     * @param[in] pad_x    (Optional) Padding, in elements, across x. Defaults to 0.
-     * @param[in] pad_y    (Optional) Padding, in elements, across y. Defaults to 0.
-     * @param[in] round    (Optional) Dimensions rounding. Defaults to @ref DimensionRoundingType::FLOOR.
-     */
-    PadStrideInfo(unsigned int stride_x = 1, unsigned int stride_y = 1,
-                  unsigned int pad_x = 0, unsigned int pad_y = 0,
-                  DimensionRoundingType round = DimensionRoundingType::FLOOR)
-        : _stride(std::make_pair(stride_x, stride_y)),
-          _pad_left(pad_x),
-          _pad_top(pad_y),
-          _pad_right(pad_x),
-          _pad_bottom(pad_y),
-          _round_type(round)
-    {
-    }
-    /** Constructor
-     *
-     * @param[in] stride_x   Stride, in elements, across x.
-     * @param[in] stride_y   Stride, in elements, across y.
-     * @param[in] pad_left   Padding across x on the left, in elements.
-     * @param[in] pad_right  Padding across x on the right, in elements.
-     * @param[in] pad_top    Padding across y on the top, in elements.
-     * @param[in] pad_bottom Padding across y on the bottom, in elements.
-     * @param[in] round      Dimensions rounding.
-     */
-    PadStrideInfo(unsigned int stride_x, unsigned int stride_y,
-                  unsigned int pad_left, unsigned int pad_right,
-                  unsigned int pad_top, unsigned int pad_bottom,
-                  DimensionRoundingType round)
-        : _stride(std::make_pair(stride_x, stride_y)),
-          _pad_left(pad_left),
-          _pad_top(pad_top),
-          _pad_right(pad_right),
-          _pad_bottom(pad_bottom),
-          _round_type(round)
-    {
-    }
-    /** Get the stride.
-     *
-     * @return a pair: stride x, stride y.
-     */
-    std::pair<unsigned int, unsigned int> stride() const
-    {
-        return _stride;
-    }
-    /** Check whether the padding is symmetric.
-     *
-     * @return True if the padding is symmetric.
-     */
-    bool padding_is_symmetric() const
-    {
-        return (_pad_left == _pad_right) && (_pad_top == _pad_bottom);
-    }
-    /** Get the padding.
-     *
-     * @note This should only be used when the padding is symmetric.
-     *
-     * @return a pair: padding left/right, padding top/bottom
-     */
-    std::pair<unsigned int, unsigned int> pad() const
-    {
-        //this accessor should be used only when padding is symmetric
-        ARM_COMPUTE_ERROR_ON(!padding_is_symmetric());
-        return std::make_pair(_pad_left, _pad_top);
-    }
-
-    /** Get the left padding */
-    unsigned int pad_left() const
-    {
-        return _pad_left;
-    }
-    /** Get the right padding */
-    unsigned int pad_right() const
-    {
-        return _pad_right;
-    }
-    /** Get the top padding */
-    unsigned int pad_top() const
-    {
-        return _pad_top;
-    }
-    /** Get the bottom padding */
-    unsigned int pad_bottom() const
-    {
-        return _pad_bottom;
-    }
-
-    /** Get the rounding type */
-    DimensionRoundingType round() const
-    {
-        return _round_type;
-    }
-
-    /** Check whether this has any padding */
-    bool has_padding() const
-    {
-        return (_pad_left != 0 || _pad_top != 0 || _pad_right != 0 || _pad_bottom != 0);
-    }
-
-private:
-    std::pair<unsigned int, unsigned int> _stride;
-    unsigned int _pad_left;
-    unsigned int _pad_top;
-    unsigned int _pad_right;
-    unsigned int _pad_bottom;
-
-    DimensionRoundingType _round_type;
-};
-
 /** Padding information for 2D operations like Conv2d */
 struct Padding2D
 {
@@ -1795,96 +1589,6 @@ private:
     int32_t _shrink_axis_mask;
 };
 
-/** Memory layouts for the weights tensor.
-  *
-  * * UNSPECIFIED is used to select kernels that do not run in
-  *    variable weights mode.
-  *
-  * * ANY is used to query the kernel database to retrieve any of the
-  *   kernels that runs in variable weights mode. Once a kernel is
-  *   found, the specific format expected by the kernel can be
-  *   retrieved by the user for reordering the weights tensor
-  *   accordingly.
-  *
-  * The other values OHWIo{interleave_by}i{block_by} describe the
-  * memory layout of a 4D tensor with layout OHWI that has been
-  * transformed into a 4D tensor with dimensions O'HWI' where:
-  *
-  * O' = first multiple of {interleave_by} s.t. O<=O'
-  * I' = first multiple of {block_by} s.t. I<=I'
-  *
-  * The total size of the dst tensor is O' x H x W x I'
-  *
-  * The access function of the tensor with layout
-  * OHWIo{interleave_by}i{block_by} and size O'HWI' is a 6-parameter
-  * access function, where the 6 parameters are computed as follows:
-  *
-  * x5 = floor(o/{interleave_by}) RANGE [0, O'/{interleave_by} -1] SIZE: O'/{interleave_by}
-  *
-  * x4 = h                        RANGE [0, H-1]                   SIZE: H
-  * x3 = w                        RANGE [0, W-1]                   SIZE: W
-  * x2 = floor(i/{block_by})      RANGE [0, I'/{block_by} -1]      SIZE: I'/{block_by}
-  * x1 = o%{interleave_by}        RANGE [0, {interleave_by} -1]    SIZE: {interleave_by}
-  * x0 = i%{block_by}             RANGE [0, {block_by} -1]         SIZE: {block_by}
-  *                                                          TOTAL SIZE: O' * H * W * I'
-  *
-  *        4D                       6D
-  * -----------------   -----------------------------------
-  * value(o, h, w, i) =   x5 * H * W * I' * {interleave_by}
-  *                     + x4 * W * I' * {interleave_by}
-  *                     + x3 * I' * {interleave_by}
-  *                     + x2 * {interleave_by} * {block_by}
-  *                     + x1 * {block_by}
-  *                     + x0
-  *
-  * Notice that in arm_gemm the 4D tensor of dimension O'HWI' created
-  * for the OHWIo{interleave_by}i{block_by} format is in reality seen
-  * as a 2D tensor, where the number of rows is O'/{interleave_by}
-  * and the number of columns is {interleave_by} * H * W * I'.
-  *
-  * The postfix *_bf16 is for the memory layout needed for the
-  * fast-mode kernels, in which the weights are passed in bfloat16
-  * format.
-  */
-enum class WeightFormat
-{
-    UNSPECIFIED    = 0x1,
-    ANY            = 0x2,
-    OHWI           = 0x100100,
-    OHWIo2         = 0x100200,
-    OHWIo4         = 0x100400,
-    OHWIo8         = 0x100800,
-    OHWIo16        = 0x101000,
-    OHWIo32        = 0x102000,
-    OHWIo64        = 0x104000,
-    OHWIo128       = 0x108000,
-    OHWIo4i2       = 0x200400,
-    OHWIo4i2_bf16  = 0x200410,
-    OHWIo8i2       = 0x200800,
-    OHWIo8i2_bf16  = 0x200810,
-    OHWIo16i2      = 0x201000,
-    OHWIo16i2_bf16 = 0x201010,
-    OHWIo32i2      = 0x202000,
-    OHWIo32i2_bf16 = 0x202010,
-    OHWIo64i2      = 0x204000,
-    OHWIo64i2_bf16 = 0x204010,
-    OHWIo4i4       = 0x400400,
-    OHWIo4i4_bf16  = 0x400410,
-    OHWIo8i4       = 0x400800,
-    OHWIo8i4_bf16  = 0x400810,
-    OHWIo16i4      = 0x401000,
-    OHWIo16i4_bf16 = 0x401010,
-    OHWIo32i4      = 0x402000,
-    OHWIo32i4_bf16 = 0x402010,
-    OHWIo64i4      = 0x404000,
-    OHWIo64i4_bf16 = 0x404010,
-    OHWIo2i8       = 0x800200,
-    OHWIo4i8       = 0x800400,
-    OHWIo8i8       = 0x800800,
-    OHWIo16i8      = 0x801000,
-    OHWIo32i8      = 0x802000,
-    OHWIo64i8      = 0x804000
-};
 // OHWIo<interleave_by>i<block_by>
 inline int interleave_by(const WeightFormat wf)
 {
@@ -2095,31 +1799,6 @@ private:
     bool _broadcast_bias;
 };
 
-/** GEMMLowp output stage type */
-enum class GEMMLowpOutputStageType
-{
-    NONE,                     /**< No quantization */
-    QUANTIZE_DOWN,            /**< Quantize using an integer multiplication */
-    QUANTIZE_DOWN_FIXEDPOINT, /**< Quantize using a fixed point multiplication */
-    QUANTIZE_DOWN_FLOAT       /**< Quantize using a floating point multiplication */
-};
-
-/** GEMMLowp output stage info */
-struct GEMMLowpOutputStageInfo
-{
-    GEMMLowpOutputStageType type{ GEMMLowpOutputStageType::NONE };                        /**< GEMMLowp output stage type */
-    int32_t                 gemmlowp_offset{ 0 };                                         /**< GEMMLowp output stage offset used for quantizing to QASYMM8 */
-    int32_t                 gemmlowp_multiplier{ 0 };                                     /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
-    int32_t                 gemmlowp_shift{ 0 };                                          /**< GEMMLowp output stage shift used for quantizing to uint8 */
-    int32_t                 gemmlowp_min_bound{ std::numeric_limits<int32_t>::lowest() }; /**< GEMMLowp min value used to saturate down the output result before converting back to QASYMM8 */
-    int32_t                 gemmlowp_max_bound{ std::numeric_limits<int32_t>::max() };    /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */
-    std::vector<int32_t>    gemmlowp_multipliers{};                                       /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
-    std::vector<int32_t>    gemmlowp_shifts{};                                            /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
-    float                   gemmlowp_real_multiplier{ 0 };                                /**< GEMMLowp output stage real multiplier used for quantizing to QASYMM8 */
-    bool                    is_quantized_per_channel{ false };                            /**< GEMMLowp quantized per-channel flag */
-    DataType                output_data_type{ DataType::UNKNOWN };                        /**< Output tensor data type to use if the output is not initialized */
-};
-
 /** GEMM LHS (Left Hand Side) matrix information */
 struct GEMMLHSMatrixInfo
 {
@@ -2236,4 +1915,4 @@ struct IOFormatInfo
 /** Class for holding information related to cropping */
 using CropInfo = Padding2D;
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TYPES_H */
+#endif /* ACL_ARM_COMPUTE_CORE_TYPES */
diff --git a/arm_compute/core/experimental/PostOps.h b/arm_compute/core/experimental/PostOps.h
index c70df841b8..a5585bab5d 100644
--- a/arm_compute/core/experimental/PostOps.h
+++ b/arm_compute/core/experimental/PostOps.h
@@ -26,8 +26,8 @@
 
 #include "arm_compute/core/experimental/IPostOp.h"
 
-#include "arm_compute/core/ActivationLayerInfo.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 
 #include <vector>
 
diff --git a/arm_compute/core/utils/FormatUtils.h b/arm_compute/core/utils/FormatUtils.h
index 10e6f747f2..afb0f78255 100644
--- a/arm_compute/core/utils/FormatUtils.h
+++ b/arm_compute/core/utils/FormatUtils.h
@@ -24,7 +24,8 @@
 #ifndef ARM_COMPUTE_CORE_UTILS_FORMATUTILS_H
 #define ARM_COMPUTE_CORE_UTILS_FORMATUTILS_H
 
-#include "arm_compute/core/Types.h"
+#include "arm_compute/core/CoreTypes.h"
+#include "arm_compute/core/Error.h"
 
 namespace arm_compute
 {
@@ -339,6 +340,5 @@ inline size_t num_channels_from_format(Format format)
  * @return The string describing the format.
  */
 const std::string &string_from_format(Format format);
-
 }
 #endif /*ARM_COMPUTE_CORE_UTILS_FORMATUTILS_H */
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index 2a4aa4d7db..77ad33910b 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -24,11 +24,11 @@
 #ifndef ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR
 #define ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR
 
-#include "arm_compute/core/ConvolutionInfo.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/ITensorInfo.h"
 #include "arm_compute/core/KernelDescriptors.h"
 #include "arm_compute/core/Utils.h"
+#include "arm_compute/function_info/ConvolutionInfo.h"
 #include "arm_compute/runtime/FunctionDescriptors.h"
 
 #include "arm_compute/core/utils/helpers/tensor_transform.h"
@@ -433,8 +433,8 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input,
     const int        weights_width_idx   = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::WIDTH);
     const int        weights_height_idx  = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::HEIGHT);
 
-    unsigned int output_width             = 0;
-    unsigned int output_height            = 0;
+    unsigned int output_width  = 0;
+    unsigned int output_height = 0;
     std::tie(output_width, output_height) = scaled_dimensions(input_shape[width_idx], input_shape[height_idx],
                                                               weights_shape[weights_width_idx], weights_shape[weights_height_idx],
                                                               info.pad_stride_info, info.dilation);
@@ -684,8 +684,8 @@ inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &in
     const DataLayout    data_layout      = winograd_info.output_data_layout;
 
     // Compute output shape
-    unsigned int output_width             = 0;
-    unsigned int output_height            = 0;
+    unsigned int output_width  = 0;
+    unsigned int output_height = 0;
     std::tie(output_width, output_height) = scaled_dimensions(input_dimensions.width, input_dimensions.height,
                                                               kernel_size.width, kernel_size.height, conv_info);
 
@@ -725,7 +725,7 @@ inline TensorShape compute_deep_convolution_shape(const TensorShape &input_shape
     const unsigned int weights_out_channel = weights_shape[3];
     unsigned int       output_width        = 0;
     unsigned int       output_height       = 0;
-    std::tie(output_width, output_height)  = scaled_dimensions(input_width, input_height, weights_width, weights_height, conv_info);
+    std::tie(output_width, output_height) = scaled_dimensions(input_width, input_height, weights_width, weights_height, conv_info);
 
     TensorShape output_shape{ input_shape };
     output_shape.set(idx_width, output_width);
diff --git a/arm_compute/core/ActivationLayerInfo.h b/arm_compute/function_info/ActivationLayerInfo.h
index d9dc0a0702..84e962cb3a 100644
--- a/arm_compute/core/ActivationLayerInfo.h
+++ b/arm_compute/function_info/ActivationLayerInfo.h
@@ -21,30 +21,36 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_ACTIVATIONLAYERINFO_H
-#define ARM_COMPUTE_ACTIVATIONLAYERINFO_H
+#ifndef ACL_ARM_COMPUTE_FUNCTION_INFO_ACTIVATIONLAYERINFO
+#define ACL_ARM_COMPUTE_FUNCTION_INFO_ACTIVATIONLAYERINFO
 
-#include "arm_compute/core/Coordinates.h"
+#include "arm_compute/core/CoreTypes.h"
 #include "arm_compute/core/QuantizationInfo.h"
-#include "arm_compute/core/Size2D.h"
-#include "arm_compute/core/Size3D.h"
-#include "arm_compute/core/Strides.h"
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/experimental/IPostOp.h"
-#include "arm_compute/core/utils/misc/Macros.h"
-#include "support/Bfloat16.h"
-#include "support/Half.h"
 
-#include <cmath>
-#include <cstddef>
-#include <cstdint>
-#include <map>
-#include <string>
-#include <utility>
+#include <array>
 
 namespace arm_compute
 {
+/** Available activation functions */
+enum class ActivationFunction
+{
+    LOGISTIC,        /**< Logistic ( \f$ f(x) = \frac{1}{1 + e^{-x}} \f$ ) */
+    TANH,            /**< Hyperbolic tangent ( \f$ f(x) = a \cdot tanh(b \cdot x) \f$ ) */
+    RELU,            /**< Rectifier ( \f$ f(x) = max(0,x) \f$ ) */
+    BOUNDED_RELU,    /**< Upper Bounded Rectifier ( \f$ f(x) = min(a, max(0,x)) \f$ ) */
+    LU_BOUNDED_RELU, /**< Lower and Upper Bounded Rectifier ( \f$ f(x) = min(a, max(b,x)) \f$ ) */
+    LEAKY_RELU,      /**< Leaky Rectifier ( \f$ f(x) = \begin{cases}  \alpha x & \quad \text{if } x \text{ < 0}\\  x & \quad \text{if } x \geq \text{ 0 } \end{cases} \f$ ) */
+    SOFT_RELU,       /**< Soft Rectifier ( \f$ f(x)= log(1+e^x) \f$ ) */
+    ELU,             /**< Exponential Linear Unit ( \f$ f(x) = \begin{cases}  \alpha (exp(x) - 1) & \quad \text{if } x \text{ < 0}\\  x & \quad \text{if } x \geq \text{ 0 } \end{cases} \f$ ) */
+    ABS,             /**< Absolute ( \f$ f(x)= |x| \f$ ) */
+    SQUARE,          /**< Square ( \f$ f(x)= x^2 \f$ )*/
+    SQRT,            /**< Square root ( \f$ f(x) = \sqrt{x} \f$ )*/
+    LINEAR,          /**< Linear ( \f$ f(x)= ax + b \f$ ) */
+    IDENTITY,        /**< Identity ( \f$ f(x)= x \f$ ) */
+    HARD_SWISH,      /**< Hard-swish ( \f$ f(x) = (x \text{ReLU6}(x+3))/6 = x \min(\max(0,x+3),6)/6 \f$ ) */
+    SWISH,           /**< Swish ( \f$ f(x) = \frac{x}{1 + e^{-ax}} = x \text{logistic}(ax) \f$ ) */
+    GELU             /**< GELU ( \f$ f(x) = x * 1/2 * 1 + erf(x / \sqrt{2}) \f$ ) */
+};
 /** Activation Layer Information class */
 class ActivationLayerInfo
 {
@@ -108,4 +114,4 @@ private:
 #endif // __aarch64__
 };
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_ACTIVATIONLAYERINFO_H */
+#endif /* ACL_ARM_COMPUTE_FUNCTION_INFO_ACTIVATIONLAYERINFO */
diff --git a/arm_compute/core/ConvolutionInfo.h b/arm_compute/function_info/ConvolutionInfo.h
index 1b5e5d197b..c27dc523c8 100644
--- a/arm_compute/core/ConvolutionInfo.h
+++ b/arm_compute/function_info/ConvolutionInfo.h
@@ -21,11 +21,12 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CONVOLUTIONINFO_H
-#define ARM_COMPUTE_CONVOLUTIONINFO_H
+#ifndef ACL_ARM_COMPUTE_FUNCTION_INFO_CONVOLUTIONINFO
+#define ACL_ARM_COMPUTE_FUNCTION_INFO_CONVOLUTIONINFO
 
-#include "arm_compute/core/ActivationLayerInfo.h"
-#include "arm_compute/core/Types.h"
+#include "arm_compute/core/CoreTypes.h"
+#include "arm_compute/core/Size2D.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 
 namespace arm_compute
 {
@@ -42,4 +43,4 @@ struct ConvolutionInfo
     Size2D              dilation{ Size2D(1, 1) }; /**< Dilation, in elements, across x and y. Defaults to (1, 1). */
 };
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CONVOLUTIONINFO_H */
+#endif /* ACL_ARM_COMPUTE_FUNCTION_INFO_CONVOLUTIONINFO */
diff --git a/arm_compute/core/FullyConnectedLayerInfo.h b/arm_compute/function_info/FullyConnectedLayerInfo.h
index f699cb2792..5f5578eadd 100644
--- a/arm_compute/core/FullyConnectedLayerInfo.h
+++ b/arm_compute/function_info/FullyConnectedLayerInfo.h
@@ -21,11 +21,11 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_FULLYCONNECTEDLAYERINFO_H
-#define ARM_COMPUTE_FULLYCONNECTEDLAYERINFO_H
+#ifndef ACL_ARM_COMPUTE_FUNCTION_INFO_FULLYCONNECTEDLAYERINFO
+#define ACL_ARM_COMPUTE_FUNCTION_INFO_FULLYCONNECTEDLAYERINFO
 
-#include "arm_compute/core/ActivationLayerInfo.h"
-#include "arm_compute/core/Types.h"
+#include "arm_compute/core/CoreTypes.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 
 namespace arm_compute
 {
@@ -68,4 +68,4 @@ struct FullyConnectedLayerInfo
 };
 
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_FULLYCONNECTEDLAYERINFO_H */
+#endif /* ACL_ARM_COMPUTE_FUNCTION_INFO_FULLYCONNECTEDLAYERINFO */
diff --git a/arm_compute/core/GEMMInfo.h b/arm_compute/function_info/GEMMInfo.h
index 4c8e94a315..daaf86243a 100644
--- a/arm_compute/core/GEMMInfo.h
+++ b/arm_compute/function_info/GEMMInfo.h
@@ -21,14 +21,41 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_GEMMINFO_H
-#define ARM_COMPUTE_GEMMINFO_H
+#ifndef ACL_ARM_COMPUTE_FUNCTION_INFO_GEMMINFO
+#define ACL_ARM_COMPUTE_FUNCTION_INFO_GEMMINFO
 
-#include "arm_compute/core/ActivationLayerInfo.h"
-#include "arm_compute/core/Types.h"
+#include "arm_compute/core/CoreTypes.h"
+#include "arm_compute/core/experimental/IPostOp.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include <vector>
 
 namespace arm_compute
 {
+class ITensorInfo;
+/** GEMMLowp output stage type */
+enum class GEMMLowpOutputStageType
+{
+    NONE,                     /**< No quantization */
+    QUANTIZE_DOWN,            /**< Quantize using an integer multiplication */
+    QUANTIZE_DOWN_FIXEDPOINT, /**< Quantize using a fixed point multiplication */
+    QUANTIZE_DOWN_FLOAT       /**< Quantize using a floating point multiplication */
+};
+
+/** GEMMLowp output stage info */
+struct GEMMLowpOutputStageInfo
+{
+    GEMMLowpOutputStageType type{ GEMMLowpOutputStageType::NONE };                        /**< GEMMLowp output stage type */
+    int32_t                 gemmlowp_offset{ 0 };                                         /**< GEMMLowp output stage offset used for quantizing to QASYMM8 */
+    int32_t                 gemmlowp_multiplier{ 0 };                                     /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
+    int32_t                 gemmlowp_shift{ 0 };                                          /**< GEMMLowp output stage shift used for quantizing to uint8 */
+    int32_t                 gemmlowp_min_bound{ std::numeric_limits<int32_t>::lowest() }; /**< GEMMLowp min value used to saturate down the output result before converting back to QASYMM8 */
+    int32_t                 gemmlowp_max_bound{ std::numeric_limits<int32_t>::max() };    /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */
+    std::vector<int32_t>    gemmlowp_multipliers{};                                       /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
+    std::vector<int32_t>    gemmlowp_shifts{};                                            /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
+    float                   gemmlowp_real_multiplier{ 0 };                                /**< GEMMLowp output stage real multiplier used for quantizing to QASYMM8 */
+    bool                    is_quantized_per_channel{ false };                            /**< GEMMLowp quantized per-channel flag */
+    DataType                output_data_type{ DataType::UNKNOWN };                        /**< Output tensor data type to use if the output is not initialized */
+};
 /** GEMM information class. This class stores the necessary information to compute GEMM functions
  *
  * This object also contains the information about how matrix A and matrix B have been reshaped
@@ -311,4 +338,4 @@ private:
     arm_compute::WeightFormat               _weight_format;
 };
 } //namespace arm_compute
-#endif /* ARM_COMPUTE_GEMMINFO_H */
+#endif /* ACL_ARM_COMPUTE_FUNCTION_INFO_GEMMINFO */
diff --git a/arm_compute/core/MatMulInfo.h b/arm_compute/function_info/MatMulInfo.h
index 01b9b47761..cd9ef1f4d9 100644
--- a/arm_compute/core/MatMulInfo.h
+++ b/arm_compute/function_info/MatMulInfo.h
@@ -21,25 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_MATMULINFO_H
-#define ARM_COMPUTE_MATMULINFO_H
-
-#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/Size2D.h"
-#include "arm_compute/core/Size3D.h"
-#include "arm_compute/core/Strides.h"
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/experimental/IPostOp.h"
-#include "arm_compute/core/utils/misc/Macros.h"
-#include "support/Bfloat16.h"
-#include "support/Half.h"
-
-#include <cmath>
-#include <cstddef>
-#include <cstdint>
-#include <map>
-#include <string>
-#include <utility>
+#ifndef ACL_ARM_COMPUTE_FUNCTION_INFO_MATMULINFO
+#define ACL_ARM_COMPUTE_FUNCTION_INFO_MATMULINFO
 
 namespace arm_compute
 {
@@ -72,8 +55,8 @@ public:
     }
 
 private:
-    bool                _adj_lhs{ false };
-    bool                _adj_rhs{ false };
+    bool _adj_lhs{ false };
+    bool _adj_rhs{ false };
 };
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_MATMULINFO_H */
+#endif /* ACL_ARM_COMPUTE_FUNCTION_INFO_MATMULINFO */
diff --git a/arm_compute/graph/Types.h b/arm_compute/graph/Types.h
index 644f12b6a4..167f7388d4 100644
--- a/arm_compute/graph/Types.h
+++ b/arm_compute/graph/Types.h
@@ -27,10 +27,10 @@
 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/PixelValue.h"
 #include "arm_compute/core/Types.h"
-#include "arm_compute/core/ActivationLayerInfo.h"
-#include "arm_compute/core/ConvolutionInfo.h"
-#include "arm_compute/core/FullyConnectedLayerInfo.h"
-#include "arm_compute/core/GEMMInfo.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/function_info/ConvolutionInfo.h"
+#include "arm_compute/function_info/FullyConnectedLayerInfo.h"
+#include "arm_compute/function_info/GEMMInfo.h"
 #include "arm_compute/runtime/CL/CLTunerTypes.h"
 #include "arm_compute/runtime/CL/CLTypes.h"
 
diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h
index 34e47f56f5..4a718ab4b6 100644
--- a/arm_compute/runtime/CL/functions/CLActivationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h
@@ -26,8 +26,8 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/ActivationLayerInfo.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 #include "arm_compute/runtime/CL/CLRuntimeContext.h"
 
 namespace arm_compute
diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
index 2acdfc37ab..37a0680709 100644
--- a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
@@ -26,8 +26,8 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/ActivationLayerInfo.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 
 #include <memory>
 
diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
index 4b8c550442..8c9e45d753 100644
--- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
@@ -24,10 +24,10 @@
 #ifndef ARM_COMPUTE_CLCONVOLUTIONLAYER_H
 #define ARM_COMPUTE_CLCONVOLUTIONLAYER_H
 
-#include "arm_compute/core/ActivationLayerInfo.h"
 #include "arm_compute/core/CL/CLCompileContext.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/experimental/IPostOp.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
index 9613caa10a..2798449100 100644
--- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
@@ -24,8 +24,8 @@
 #ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H
 #define ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H
 
-#include "arm_compute/core/ActivationLayerInfo.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/functions/CLPermute.h"
 #include "arm_compute/runtime/IFunction.h"
diff --git a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
index bc6f34f2a9..462a3ac07e 100644
--- a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
@@ -24,8 +24,8 @@
 #ifndef ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYER_H
 #define ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYER_H
 
-#include "arm_compute/core/ActivationLayerInfo.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 #include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
 #include "arm_compute/runtime/IFunction.h"
 
diff --git a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
index ecebac435a..9de362d2b2 100644
--- a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
+++ b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
@@ -24,7 +24,7 @@
 #ifndef ARM_COMPUTE_CLELEMENTWISEOPERATIONS_H
 #define ARM_COMPUTE_CLELEMENTWISEOPERATIONS_H
 
-#include "arm_compute/core/ActivationLayerInfo.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 #include "arm_compute/runtime/CL/ICLOperator.h"
 #include "arm_compute/runtime/IFunction.h"
 
diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
index 19243e473a..b784226a2f 100644
--- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
@@ -24,7 +24,7 @@
 #ifndef ARM_COMPUTE_CLFULLYCONNECTEDLAYER_H
 #define ARM_COMPUTE_CLFULLYCONNECTEDLAYER_H
 
-#include "arm_compute/core/FullyConnectedLayerInfo.h"
+#include "arm_compute/function_info/FullyConnectedLayerInfo.h"
 #include "arm_compute/runtime/IFunction.h"
 
 #include "arm_compute/runtime/CL/CLTensor.h"
diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h
index b72ffa0357..3a39aca692 100644
--- a/arm_compute/runtime/CL/functions/CLGEMM.h
+++ b/arm_compute/runtime/CL/functions/CLGEMM.h
@@ -24,7 +24,7 @@
 #ifndef ARM_COMPUTE_CLGEMM_H
 #define ARM_COMPUTE_CLGEMM_H
 
-#include "arm_compute/core/GEMMInfo.h"
+#include "arm_compute/function_info/GEMMInfo.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTypes.h"
 #include "arm_compute/runtime/IFunction.h"
diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
index f07fbb4cc9..9827340382 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
@@ -24,8 +24,8 @@
 #ifndef ARM_COMPUTE_CLGEMMCONVOLUTIONLAYER_H
 #define ARM_COMPUTE_CLGEMMCONVOLUTIONLAYER_H
 
-#include "arm_compute/core/ActivationLayerInfo.h"
 #include "arm_compute/core/experimental/IPostOp.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTypes.h"
 #include "arm_compute/runtime/IFunction.h"
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
index 1532060293..8b8d9f235f 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
@@ -24,7 +24,7 @@
 #ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H
 #define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H
 
-#include "arm_compute/core/GEMMInfo.h"
+#include "arm_compute/function_info/GEMMInfo.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/MemoryGroup.h"
diff --git a/arm_compute/runtime/CL/functions/CLIndirectConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLIndirectConvolutionLayer.h
index 11677fb83a..12b83ea25b 100644
--- a/arm_compute/runtime/CL/functions/CLIndirectConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLIndirectConvolutionLayer.h
@@ -24,8 +24,8 @@
 #ifndef ARM_COMPUTE_CLINDIRECTCONVOLUTIONLAYER_H
 #define ARM_COMPUTE_CLINDIRECTCONVOLUTIONLAYER_H
 
-#include "arm_compute/core/ActivationLayerInfo.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 #include "arm_compute/runtime/IFunction.h"
 
 #include <memory>
diff --git a/arm_compute/runtime/CL/functions/CLMatMul.h b/arm_compute/runtime/CL/functions/CLMatMul.h
index a11c1ed6a2..9d54bab868 100644
--- a/arm_compute/runtime/CL/functions/CLMatMul.h
+++ b/arm_compute/runtime/CL/functions/CLMatMul.h
@@ -24,8 +24,8 @@
 #ifndef ACL_ARM_COMPUTE_RUNTIME_CL_FUNCTIONS_CLMATMUL
 #define ACL_ARM_COMPUTE_RUNTIME_CL_FUNCTIONS_CLMATMUL
 
-#include "arm_compute/core/ActivationLayerInfo.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 #include "arm_compute/runtime/IFunction.h"
 #include <memory>
 
diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
index f9081cfe25..62b6d96ad5 100644
--- a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
+++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
@@ -24,8 +24,8 @@
 #ifndef ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H
 #define ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H
 
-#include "arm_compute/core/ActivationLayerInfo.h"
 #include "arm_compute/core/Rounding.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 #include "arm_compute/runtime/CL/ICLOperator.h"
 #include "arm_compute/runtime/IFunction.h"
 
diff --git a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
index 74ffe46690..adf5f18626 100644
--- a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
@@ -24,8 +24,8 @@
 #ifndef ARM_COMPUTE_CLWINOGRADCONVOLUTIONLAYER_H
 #define ARM_COMPUTE_CLWINOGRADCONVOLUTIONLAYER_H
 
-#include "arm_compute/core/ActivationLayerInfo.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 
diff --git a/arm_compute/runtime/FunctionDescriptors.h b/arm_compute/runtime/FunctionDescriptors.h
index 241359519f..630f533244 100644
--- a/arm_compute/runtime/FunctionDescriptors.h
+++ b/arm_compute/runtime/FunctionDescriptors.h
@@ -24,8 +24,8 @@
 #ifndef ARM_COMPUTE_RUNTIME_FUNCTION_DESCRIPTORS_H
 #define ARM_COMPUTE_RUNTIME_FUNCTION_DESCRIPTORS_H
 
-#include "arm_compute/core/ActivationLayerInfo.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 
 #include <utility>
 
diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
index 7a1e532cf7..9992de2af8 100644
--- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
@@ -26,8 +26,8 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/ActivationLayerInfo.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 #include "arm_compute/runtime/IRuntimeContext.h"
 
 #include <memory>
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
index e55c6d94fc..b0d710d517 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
@@ -24,8 +24,8 @@
 #ifndef ARM_COMPUTE_NEARITHMETICADDITION_H
 #define ARM_COMPUTE_NEARITHMETICADDITION_H
 
-#include "arm_compute/core/ActivationLayerInfo.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 #include "arm_compute/runtime/IFunction.h"
 #include <memory>
 
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
index 483d81bdf6..6fbe9ad450 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
@@ -24,8 +24,8 @@
 #ifndef ARM_COMPUTE_NEARITHMETICSUBTRACTION_H
 #define ARM_COMPUTE_NEARITHMETICSUBTRACTION_H
 
-#include "arm_compute/core/ActivationLayerInfo.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/NEON/INEOperator.h"
 
diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
index 8b9b157918..4dd76d082b 100644
--- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
@@ -26,9 +26,9 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/ActivationLayerInfo.h"
 #include "arm_compute/core/ITensorInfo.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 
 #include <memory>
diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
index 02eb3ac7d1..8db7e6596b 100644
--- a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
@@ -24,8 +24,8 @@
 #ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H
 #define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H
 
-#include "arm_compute/core/ActivationLayerInfo.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
index c91ae203bb..bfcd221e17 100644
--- a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
+++ b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
@@ -24,8 +24,8 @@
 #ifndef ARM_COMPUTE_NEELEMENTWISEOPERATIONS_H
 #define ARM_COMPUTE_NEELEMENTWISEOPERATIONS_H
 
-#include "arm_compute/core/ActivationLayerInfo.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/NEON/INEOperator.h"
 
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
index d0367b79fd..05b7ce3735 100644
--- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
@@ -24,7 +24,7 @@
 #ifndef ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H
 #define ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H
 
-#include "arm_compute/core/FullyConnectedLayerInfo.h"
+#include "arm_compute/function_info/FullyConnectedLayerInfo.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/IWeightsManager.h"
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index 2ad3746718..c6ff2dfb92 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -24,7 +24,7 @@
 #ifndef ARM_COMPUTE_NEGEMM_H
 #define ARM_COMPUTE_NEGEMM_H
 
-#include "arm_compute/core/GEMMInfo.h"
+#include "arm_compute/function_info/GEMMInfo.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/IWeightsManager.h"
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index 1aca1df8eb..72309e464e 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -26,8 +26,8 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/ActivationLayerInfo.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/IWeightsManager.h"
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
index ccd2fb5a49..addb13cdfa 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
@@ -24,8 +24,8 @@
 #ifndef ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H
 #define ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H
 
-#include "arm_compute/core/GEMMInfo.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/GEMMInfo.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/IWeightsManager.h"
diff --git a/arm_compute/runtime/NEON/functions/NEMatMul.h b/arm_compute/runtime/NEON/functions/NEMatMul.h
index 81fec19f86..e961f860c1 100644
--- a/arm_compute/runtime/NEON/functions/NEMatMul.h
+++ b/arm_compute/runtime/NEON/functions/NEMatMul.h
@@ -24,8 +24,8 @@
 #ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL
 #define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL
 
-#include "arm_compute/core/ActivationLayerInfo.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 #include "arm_compute/runtime/IFunction.h"
 #include <memory>
 
diff --git a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
index d09899c3bb..634e8e0c39 100644
--- a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
+++ b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
@@ -24,9 +24,9 @@
 #ifndef ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H
 #define ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H
 
-#include "arm_compute/core/ActivationLayerInfo.h"
-#include "arm_compute/core/Types.h"
 #include "arm_compute/core/Rounding.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 #include "arm_compute/runtime/IFunction.h"
 
 #include <memory>
diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
index b3d3f9e30a..f6f0185e7d 100644
--- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
@@ -26,8 +26,8 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/ActivationLayerInfo.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
 #include "arm_compute/runtime/Tensor.h"
 
 #include <memory>