13 files changed, 1790 insertions, 0 deletions
diff --git a/src/core/helpers/AutoConfiguration.h b/src/core/helpers/AutoConfiguration.h
new file mode 100644
index 0000000000..9df2a76983
--- /dev/null
+++ b/src/core/helpers/AutoConfiguration.h
@@ -0,0 +1,183 @@
+/*
+* Copyright (c) 2020, 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CORE_HELPERS_AUTOCONFIGURATION_H
+#define SRC_CORE_HELPERS_AUTOCONFIGURATION_H
+
+#include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/DataTypeUtils.h"
+
+namespace arm_compute
+{
+/** Auto initialize the tensor info (shape, number of channels and data type) if the current assignment is empty.
+ *
+ * @param[in,out] info              Tensor info used to check and assign.
+ * @param[in]     shape             New shape.
+ * @param[in]     num_channels      New number of channels.
+ * @param[in]     data_type         New data type
+ * @param[in]     quantization_info (Optional) New quantization info
+ *
+ * @return True if the tensor info has been initialized
+ */
+inline bool auto_init_if_empty(ITensorInfo       &info,
+                               const TensorShape &shape,
+                               int                num_channels,
+                               DataType           data_type,
+                               QuantizationInfo   quantization_info = QuantizationInfo())
+{
+    if (info.tensor_shape().total_size() == 0)
+    {
+        info.set_data_type(data_type);
+        info.set_num_channels(num_channels);
+        info.set_tensor_shape(shape);
+        info.set_quantization_info(quantization_info);
+        return true;
+    }
+
+    return false;
+}
+
+/** Auto initialize the tensor info using another tensor info.
+ *
+ * (COMPMID-6012) This method should remain in sync with the fields of ITensorInfo that have setters.
+ *
+ *
+ * @param info_sink   Tensor info used to check and assign
+ * @param info_source Tensor info used to assign
+ *
+ *
+ * @return True if the tensor info has been initialized
+ */
+inline bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_source)
+{
+    if (info_sink.tensor_shape().total_size() == 0)
+    {
+        info_sink.set_data_type(info_source.data_type());
+        info_sink.set_num_channels(info_source.num_channels());
+        info_sink.set_tensor_shape(info_source.tensor_shape());
+        info_sink.set_quantization_info(info_source.quantization_info());
+        info_sink.set_data_layout(info_source.data_layout());
+        info_sink.set_are_values_constant(info_source.are_values_constant());
+        return true;
+    }
+
+    return false;
+}
+
+/** Set the shape to the specified value if the current assignment is empty.
+ *
+ * @param[in,out] info  Tensor info used to check and assign.
+ * @param[in]     shape New shape.
+ *
+ * @return True if the shape has been changed.
+ */
+inline bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape)
+{
+    if (info.tensor_shape().total_size() == 0)
+    {
+        info.set_tensor_shape(shape);
+        return true;
+    }
+
+    return false;
+}
+
+/** Set the format, data type and number of channels to the specified value if
+ * the current data type is unknown.
+ *
+ * @param[in,out] info   Tensor info used to check and assign.
+ * @param[in]     format New format.
+ *
+ * @return True if the format has been changed.
+ */
+inline bool set_format_if_unknown(ITensorInfo &info, Format format)
+{
+    if (info.data_type() == DataType::UNKNOWN)
+    {
+        info.set_format(format);
+        return true;
+    }
+
+    return false;
+}
+
+/** Set the data type and number of channels to the specified value if
+ * the current data type is unknown.
+ *
+ * @param[in,out] info      Tensor info used to check and assign.
+ * @param[in]     data_type New data type.
+ *
+ * @return True if the data type has been changed.
+ */
+inline bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type)
+{
+    if (info.data_type() == DataType::UNKNOWN)
+    {
+        info.set_data_type(data_type);
+        return true;
+    }
+
+    return false;
+}
+
+/** Set the data layout to the specified value if
+ * the current data layout is unknown.
+ *
+ * @param[in,out] info        Tensor info used to check and assign.
+ * @param[in]     data_layout New data layout.
+ *
+ * @return True if the data type has been changed.
+ */
+inline bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout)
+{
+    if (info.data_layout() == DataLayout::UNKNOWN)
+    {
+        info.set_data_layout(data_layout);
+        return true;
+    }
+
+    return false;
+}
+
+/** Set the quantization info to the specified value if
+ * the current quantization info is empty and the data type of asymmetric quantized type
+ *
+ * @param[in,out] info              Tensor info used to check and assign.
+ * @param[in]     quantization_info Quantization info
+ *
+ * @return True if the quantization info has been changed.
+ */
+inline bool set_quantization_info_if_empty(ITensorInfo &info, QuantizationInfo quantization_info)
+{
+    if (info.quantization_info().empty() && (is_data_type_quantized_asymmetric(info.data_type())))
+    {
+        info.set_quantization_info(quantization_info);
+        return true;
+    }
+
+    return false;
+}
+} // namespace arm_compute
+
+#endif /* SRC_CORE_HELPERS_AUTOCONFIGURATION_H */
diff --git a/src/core/helpers/LUTManager.cpp b/src/core/helpers/LUTManager.cpp
new file mode 100644
index 0000000000..06e35eed8c
--- /dev/null
+++ b/src/core/helpers/LUTManager.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "src/core/helpers/LUTManager.h"
+
+namespace arm_compute
+{
+#ifdef __aarch64__
+namespace
+{
+
+void init_lut_fp16(ActivationLayerInfo::LookupTable65536 *lut)
+{
+    union Element
+    {
+        uint16_t  i = 0;
+        float16_t fp;
+    } item;
+    // Fill lut by iterating over all 16 bit values using the union.
+    while (true)
+    {
+        (*lut)[item.i] = 1.f / (1.f + std::exp(-item.fp));
+        if (item.i == 65535)
+            break;
+        item.i++;
+    }
+}
+} // namespace
+
+std::shared_ptr<ActivationLayerInfo::LookupTable65536> LUTManager::get_lut_table(LUTInfo info)
+{
+    const auto itr   = map_fp16.find(info);
+    auto       s_ptr = (itr != map_fp16.end()) ? itr->second.lock() : nullptr; // nullptr if invalid or not found.
+    if (s_ptr != nullptr)
+    {
+        // Found and valid
+        return s_ptr; // Return weak ptr as shared ptr
+    }
+    else
+    {
+        // Not found, or pointer not valid
+        // We do not use make_shared to prevent the weak_ptr keeping the control block alive
+        std::shared_ptr<ActivationLayerInfo::LookupTable65536> ptr(new ActivationLayerInfo::LookupTable65536);
+        init_lut_fp16(ptr.get());
+        map_fp16[info] = ptr;
+        return ptr;
+    }
+}
+#endif // __aarch64__
+
+// Static function to get LutManager instance
+LUTManager &LUTManager::get_instance()
+{
+    static auto inst_ = std::make_unique<LUTManager>(); // The one, single instance.
+    return *inst_;
+}
+
+} // namespace arm_compute
diff --git a/src/core/helpers/LUTManager.h b/src/core/helpers/LUTManager.h
new file mode 100644
index 0000000000..4e13ead7e3
--- /dev/null
+++ b/src/core/helpers/LUTManager.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_SRC_CORE_HELPERS_LUTMANAGER_H
+#define ACL_SRC_CORE_HELPERS_LUTMANAGER_H
+
+#include "arm_compute/core/CoreTypes.h"
+#include "arm_compute/core/QuantizationInfo.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+
+#include <map>
+#include <memory>
+
+namespace arm_compute
+{
+
+struct LUTInfo
+{
+    ActivationLayerInfo::ActivationFunction act;
+    DataType                                dt;
+    QuantizationInfo                        qinfo;
+    // Operators enable use of map with Lutinfo as key
+    friend bool operator<(const LUTInfo &l, const LUTInfo &r)
+    {
+        return (l.act < r.act) || ((l.act == r.act) && (l.dt < r.dt)) ||
+               ((l.act == r.act) && (l.dt == r.dt) && (l.qinfo.scale() < r.qinfo.scale())) ||
+               ((l.act == r.act) && (l.dt == r.dt) && (l.qinfo.scale() == r.qinfo.scale()) &&
+                (l.qinfo.offset() < l.qinfo.offset()));
+    }
+    bool operator==(const LUTInfo &l)
+    {
+        return this->act == l.act && this->dt == l.dt && this->qinfo == l.qinfo;
+    }
+};
+
+/* Class to handle getting look up table */
+class LUTManager
+{
+public:
+    LUTManager() = default;
+
+    static LUTManager &get_instance();
+#ifdef __aarch64__
+    std::shared_ptr<ActivationLayerInfo::LookupTable65536> get_lut_table(LUTInfo info);
+
+private:
+    std::map<LUTInfo, std::weak_ptr<ActivationLayerInfo::LookupTable65536>> map_fp16{};
+#endif // __aarch64__
+};
+
+} // namespace arm_compute
+#endif // ACL_SRC_CORE_HELPERS_LUTMANAGER_H
diff --git a/src/core/helpers/MemoryHelpers.h b/src/core/helpers/MemoryHelpers.h
new file mode 100644
index 0000000000..dd094b414c
--- /dev/null
+++ b/src/core/helpers/MemoryHelpers.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_COMMON_MEMORY_HELPERS_H
+#define SRC_COMMON_MEMORY_HELPERS_H
+
+#include "arm_compute/core/experimental/Types.h"
+#include "arm_compute/core/ITensorPack.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+namespace arm_compute
+{
+inline int offset_int_vec(int offset)
+{
+    return ACL_INT_VEC + offset;
+}
+
+template <typename TensorType>
+struct WorkspaceDataElement
+{
+    int                          slot{-1};
+    experimental::MemoryLifetime lifetime{experimental::MemoryLifetime::Temporary};
+    std::unique_ptr<TensorType>  tensor{nullptr};
+};
+
+template <typename TensorType>
+using WorkspaceData = std::vector<WorkspaceDataElement<TensorType>>;
+
+template <typename TensorType>
+WorkspaceData<TensorType>
+manage_workspace(const experimental::MemoryRequirements &mem_reqs, MemoryGroup &mgroup, ITensorPack &run_pack)
+{
+    ITensorPack dummy_pack = ITensorPack();
+    return manage_workspace<TensorType>(mem_reqs, mgroup, run_pack, dummy_pack);
+}
+
+template <typename TensorType>
+WorkspaceData<TensorType> manage_workspace(const experimental::MemoryRequirements &mem_reqs,
+                                           MemoryGroup                            &mgroup,
+                                           ITensorPack                            &run_pack,
+                                           ITensorPack                            &prep_pack)
+{
+    WorkspaceData<TensorType> workspace_memory;
+    for (const auto &req : mem_reqs)
+    {
+        if (req.size == 0)
+        {
+            continue;
+        }
+
+        const auto aux_info = TensorInfo{TensorShape(req.size), 1, DataType::U8};
+        workspace_memory.emplace_back(
+            WorkspaceDataElement<TensorType>{req.slot, req.lifetime, std::make_unique<TensorType>()});
+
+        auto aux_tensor = workspace_memory.back().tensor.get();
+        ARM_COMPUTE_ERROR_ON_NULLPTR(aux_tensor);
+        aux_tensor->allocator()->init(aux_info, req.alignment);
+
+        if (req.lifetime == experimental::MemoryLifetime::Temporary)
+        {
+            mgroup.manage(aux_tensor);
+        }
+        else
+        {
+            prep_pack.add_tensor(req.slot, aux_tensor);
+        }
+        run_pack.add_tensor(req.slot, aux_tensor);
+    }
+
+    for (auto &mem : workspace_memory)
+    {
+        auto tensor = mem.tensor.get();
+        tensor->allocator()->allocate();
+    }
+
+    return workspace_memory;
+}
+
+template <typename TensorType>
+void release_prepare_tensors(WorkspaceData<TensorType> &workspace, ITensorPack &prep_pack)
+{
+    workspace.erase(std::remove_if(workspace.begin(), workspace.end(),
+                                   [&prep_pack](auto &wk)
+                                   {
+                                       const bool to_erase = wk.lifetime == experimental::MemoryLifetime::Prepare;
+                                       if (to_erase)
+                                       {
+                                           prep_pack.remove_tensor(wk.slot);
+                                       }
+                                       return to_erase;
+                                   }),
+                    workspace.end());
+}
+
+/** Utility function to release tensors with lifetime marked as Prepare */
+template <typename TensorType>
+void release_temporaries(const experimental::MemoryRequirements &mem_reqs, WorkspaceData<TensorType> &workspace)
+{
+    for (auto &ws : workspace)
+    {
+        const int slot = ws.slot;
+        for (auto &m : mem_reqs)
+        {
+            if (m.slot == slot && m.lifetime == experimental::MemoryLifetime::Prepare)
+            {
+                auto tensor = ws.tensor.get();
+                tensor->allocator()->free();
+                break;
+            }
+        }
+    }
+}
+} // namespace arm_compute
+#endif /* SRC_COMMON_MEMORY_HELPERS_H */
diff --git a/src/core/helpers/NormalizationHelpers.h b/src/core/helpers/NormalizationHelpers.h
new file mode 100644
index 0000000000..d94d5e3602
--- /dev/null
+++ b/src/core/helpers/NormalizationHelpers.h
@@ -0,0 +1,47 @@
+/*
+* Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CORE_HELPERS_NORMALIZATIONHELPERS_H
+#define SRC_CORE_HELPERS_NORMALIZATIONHELPERS_H
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+/** Calculate the normalization dimension index for a given normalization type
+ *
+ * @param[in] layout Data layout of the input and output tensor
+ * @param[in] info   Normalization info
+ *
+ * @return Normalization dimension index
+ */
+inline unsigned int get_normalization_dimension_index(DataLayout layout, const NormalizationLayerInfo &info)
+{
+    const unsigned int width_idx   = get_data_layout_dimension_index(layout, DataLayoutDimension::WIDTH);
+    const unsigned int channel_idx = get_data_layout_dimension_index(layout, DataLayoutDimension::CHANNEL);
+
+    return info.is_in_map() ? width_idx : channel_idx;
+}
+} // namespace arm_compute
+#endif /* SRC_CORE_HELPERS_NORMALIZATIONHELPERS_H */
diff --git a/src/core/helpers/PoolingHelpers.h b/src/core/helpers/PoolingHelpers.h
new file mode 100644
index 0000000000..9ef045f472
--- /dev/null
+++ b/src/core/helpers/PoolingHelpers.h
@@ -0,0 +1,219 @@
+/*
+* Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CORE_HELPERS_POOLINGHELPERS_H
+#define SRC_CORE_HELPERS_POOLINGHELPERS_H
+
+#include "src/core/NEON/NEAsymm.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+namespace
+{
+
+inline float calculate_avg_scale_pool3d(bool               exclude_padding,
+                                        const Coordinates &id,
+                                        const int          pool_size_x,
+                                        const int          pool_size_y,
+                                        const int          pool_size_z,
+                                        const int          upper_bound_w,
+                                        const int          upper_bound_h,
+                                        const int          upper_bound_d,
+                                        const int          pad_x,
+                                        const int          pad_y,
+                                        const int          pad_z,
+                                        const int          stride_x,
+                                        const int          stride_y,
+                                        const int          stride_z)
+{
+    // Based on NDHWC
+    int start_x = id[1] * stride_x - pad_x;
+    int start_y = id[2] * stride_y - pad_y;
+    int start_z = id[3] * stride_z - pad_z;
+
+    const int end_x = std::min(start_x + pool_size_x, upper_bound_w);
+    const int end_y = std::min(start_y + pool_size_y, upper_bound_h);
+    const int end_z = std::min(start_z + pool_size_z, upper_bound_d);
+    if (exclude_padding)
+    {
+        start_x = std::max(0, start_x);
+        start_y = std::max(0, start_y);
+        start_z = std::max(0, start_z);
+    }
+    return 1.f / ((end_y - start_y) * (end_x - start_x) * (end_z - start_z));
+}
+
+inline float calculate_avg_scale_pool2d(bool               exclude_padding,
+                                        DataLayout         data_layout,
+                                        const Coordinates &id,
+                                        const int          pool_size_x,
+                                        const int          pool_size_y,
+                                        const int          upper_bound_w,
+                                        const int          upper_bound_h,
+                                        const int          pad_x,
+                                        const int          pad_y,
+                                        const int          stride_x,
+                                        const int          stride_y)
+{
+    const unsigned int idx_width  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+    const unsigned int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+
+    int start_x = id[idx_width] * stride_x - pad_x;
+    int start_y = id[idx_height] * stride_y - pad_y;
+
+    const int end_x = std::min(start_x + pool_size_x, upper_bound_w);
+    const int end_y = std::min(start_y + pool_size_y, upper_bound_h);
+    if (exclude_padding)
+    {
+        start_x = std::max(0, start_x);
+        start_y = std::max(0, start_y);
+    }
+    return 1.f / ((end_y - start_y) * (end_x - start_x));
+}
+
+template <typename T>
+inline typename std::enable_if<std::is_same<T, int8_t>::value, int8_t>::type
+quantize(float val, const UniformQuantizationInfo &info)
+{
+    return quantize_qasymm8_signed(val, info);
+}
+
+template <typename T>
+inline typename std::enable_if<std::is_same<T, uint8_t>::value, uint8_t>::type
+quantize(float val, const UniformQuantizationInfo &info)
+{
+    return quantize_qasymm8(val, info);
+}
+
+template <typename T>
+inline T vcvtq_q32_f32(float32x4_t values);
+
+template <>
+inline uint32x4_t vcvtq_q32_f32(float32x4_t values)
+{
+    return vcvtq_u32_f32(values);
+}
+
+template <>
+inline int32x4_t vcvtq_q32_f32(float32x4_t values)
+{
+    return vcvtq_s32_f32(values);
+}
+
+template <typename T>
+inline float32x4_t vcvtq_f32_q32(T values);
+
+template <>
+inline float32x4_t vcvtq_f32_q32(uint32x4_t values)
+{
+    return vcvtq_f32_u32(values);
+}
+
+template <>
+inline float32x4_t vcvtq_f32_q32(int32x4_t values)
+{
+    return vcvtq_f32_s32(values);
+}
+
+template <typename Tout>
+inline Tout vrequantize_pooling_with_scale(const float32x4x4_t &acc,
+                                           const float          quant_rescale,
+                                           const float          scale_pooling,
+                                           const int32_t        new_offset);
+
+template <>
+inline uint8x16_t vrequantize_pooling_with_scale(const float32x4x4_t &acc,
+                                                 const float          quant_rescale,
+                                                 const float          scale_pooling,
+                                                 const int32_t        new_offset)
+{
+    const float new_scale = quant_rescale / scale_pooling;
+    return vquantize(acc, UniformQuantizationInfo(new_scale, new_offset));
+}
+
+template <>
+inline int8x16_t vrequantize_pooling_with_scale(const float32x4x4_t &acc,
+                                                const float          quant_rescale,
+                                                const float          scale_pooling,
+                                                const int32_t        new_offset)
+{
+    const float new_scale = quant_rescale / scale_pooling;
+    return vquantize_signed(acc, UniformQuantizationInfo(new_scale, new_offset));
+}
+
+template <typename Tin, typename Tout>
+inline Tout vrequantize_pooling(Tin vec1, Tin vec2, const UniformQuantizationInfo &requant_qinfo);
+
+template <>
+inline uint8x16_t vrequantize_pooling(uint8x8_t vec1, uint8x8_t vec2, const UniformQuantizationInfo &requant_qinfo)
+{
+    const float32x4x4_t acc = {{
+        vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec1))))),
+        vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec1))))),
+        vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec2))))),
+        vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec2))))),
+    }};
+    return vquantize(acc, requant_qinfo);
+}
+
+template <>
+inline int8x16_t vrequantize_pooling(int8x8_t vec1, int8x8_t vec2, const UniformQuantizationInfo &requant_qinfo)
+{
+    const float32x4x4_t acc = {{
+        vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec1))))),
+        vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec1))))),
+        vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec2))))),
+        vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec2))))),
+    }};
+    return vquantize_signed(acc, requant_qinfo);
+}
+
+template <typename T>
+inline T vrequantize_pooling(T &vec, const UniformQuantizationInfo &requant_qinfo);
+
+template <>
+inline uint8x8_t vrequantize_pooling(uint8x8_t &vec, const UniformQuantizationInfo &requant_qinfo)
+{
+    const float32x4x2_t acc = {{
+        vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec))))),
+        vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec))))),
+    }};
+    return vquantize(acc, requant_qinfo);
+}
+
+template <>
+inline int8x8_t vrequantize_pooling(int8x8_t &vec, const UniformQuantizationInfo &requant_qinfo)
+{
+    const float32x4x2_t acc = {{
+        vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec))))),
+        vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec))))),
+    }};
+    return vquantize_signed(acc, requant_qinfo);
+}
+
+} // namespace
+} // namespace cpu
+} // namespace arm_compute
+#endif /* SRC_CORE_HELPERS_POOLINGHELPERS_H */
diff --git a/src/core/helpers/ScaleHelpers.h b/src/core/helpers/ScaleHelpers.h
new file mode 100644
index 0000000000..47605e7385
--- /dev/null
+++ b/src/core/helpers/ScaleHelpers.h
@@ -0,0 +1,207 @@
+/*
+* Copyright (c) 2020-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CORE_HELPERS_SCALEHELPERS_H
+#define SRC_CORE_HELPERS_SCALEHELPERS_H
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/QuantizationInfo.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+
+namespace arm_compute
+{
+namespace scale_helpers
+{
+/** Computes bilinear interpolation for quantized input and output, using the pointer to the top-left pixel and the pixel's distance between
+ * the real coordinates and the smallest following integer coordinates. Input must be QASYMM8 and in single channel format.
+ *
+ * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input.
+ * @param[in] stride    Stride to access the bottom-left and bottom-right pixel values
+ * @param[in] dx        Pixel's distance between the X real coordinate and the smallest X following integer
+ * @param[in] dy        Pixel's distance between the Y real coordinate and the smallest Y following integer
+ * @param[in] iq_info   Input QuantizationInfo
+ * @param[in] oq_info   Output QuantizationInfo
+ *
+ * @note dx and dy must be in the range [0, 1.0]
+ *
+ * @return The bilinear interpolated pixel value
+ */
+inline uint8_t delta_bilinear_c1_quantized(const uint8_t          *pixel_ptr,
+                                           size_t                  stride,
+                                           float                   dx,
+                                           float                   dy,
+                                           UniformQuantizationInfo iq_info,
+                                           UniformQuantizationInfo oq_info)
+{
+    ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
+
+    const float dx1 = 1.0f - dx;
+    const float dy1 = 1.0f - dy;
+
+    const float a00 = dequantize_qasymm8(*pixel_ptr, iq_info);
+    const float a01 = dequantize_qasymm8(*(pixel_ptr + 1), iq_info);
+    const float a10 = dequantize_qasymm8(*(pixel_ptr + stride), iq_info);
+    const float a11 = dequantize_qasymm8(*(pixel_ptr + stride + 1), iq_info);
+
+    const float w1  = dx1 * dy1;
+    const float w2  = dx * dy1;
+    const float w3  = dx1 * dy;
+    const float w4  = dx * dy;
+    float       res = a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4;
+    return static_cast<uint8_t>(quantize_qasymm8(res, oq_info));
+}
+
+/** Computes bilinear interpolation for quantized input and output, using the pointer to the top-left pixel and the pixel's distance between
+ * the real coordinates and the smallest following integer coordinates. Input must be QASYMM8_SIGNED and in single channel format.
+ *
+ * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input.
+ * @param[in] stride    Stride to access the bottom-left and bottom-right pixel values
+ * @param[in] dx        Pixel's distance between the X real coordinate and the smallest X following integer
+ * @param[in] dy        Pixel's distance between the Y real coordinate and the smallest Y following integer
+ * @param[in] iq_info   Input QuantizationInfo
+ * @param[in] oq_info   Output QuantizationInfo
+ *
+ * @note dx and dy must be in the range [0, 1.0]
+ *
+ * @return The bilinear interpolated pixel value
+ */
+inline int8_t delta_bilinear_c1_quantized(const int8_t           *pixel_ptr,
+                                          size_t                  stride,
+                                          float                   dx,
+                                          float                   dy,
+                                          UniformQuantizationInfo iq_info,
+                                          UniformQuantizationInfo oq_info)
+{
+    ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
+
+    const float dx1 = 1.0f - dx;
+    const float dy1 = 1.0f - dy;
+
+    const float a00 = dequantize_qasymm8_signed(*pixel_ptr, iq_info);
+    const float a01 = dequantize_qasymm8_signed(*(pixel_ptr + 1), iq_info);
+    const float a10 = dequantize_qasymm8_signed(*(pixel_ptr + stride), iq_info);
+    const float a11 = dequantize_qasymm8_signed(*(pixel_ptr + stride + 1), iq_info);
+
+    const float w1  = dx1 * dy1;
+    const float w2  = dx * dy1;
+    const float w3  = dx1 * dy;
+    const float w4  = dx * dy;
+    float       res = a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4;
+    return static_cast<int8_t>(quantize_qasymm8_signed(res, oq_info));
+}
+
+/** Return the pixel at (x,y) using area interpolation by clamping when out of borders. The image must be single channel U8
+ *
+ * @note The interpolation area depends on the width and height ration of the input and output images
+ * @note Currently average of the contributing pixels is calculated
+ *
+ * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image.
+ * @param[in] stride          Stride in bytes of the image
+ * @param[in] width           Width of the image
+ * @param[in] height          Height of the image
+ * @param[in] wr              Width ratio among the input image width and output image width.
+ * @param[in] hr              Height ratio among the input image height and output image height.
+ * @param[in] x               X position of the wanted pixel
+ * @param[in] y               Y position of the wanted pixel
+ *
+ * @return The pixel at (x, y) using area interpolation.
+ */
+inline uint8_t pixel_area_c1u8_clamp(
+    const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y)
+{
+    ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr);
+
+    // Calculate sampling position
+    float in_x = (x + 0.5f) * wr - 0.5f;
+    float in_y = (y + 0.5f) * hr - 0.5f;
+
+    // Get bounding box offsets
+    int x_from = std::floor(x * wr - 0.5f - in_x);
+    int y_from = std::floor(y * hr - 0.5f - in_y);
+    int x_to   = std::ceil((x + 1) * wr - 0.5f - in_x);
+    int y_to   = std::ceil((y + 1) * hr - 0.5f - in_y);
+
+    // Clamp position to borders
+    in_x = std::max(-1.f, std::min(in_x, static_cast<float>(width)));
+    in_y = std::max(-1.f, std::min(in_y, static_cast<float>(height)));
+
+    // Clamp bounding box offsets to borders
+    x_from = ((in_x + x_from) < -1) ? -1 : x_from;
+    y_from = ((in_y + y_from) < -1) ? -1 : y_from;
+    x_to   = ((in_x + x_to) > width) ? (width - in_x) : x_to;
+    y_to   = ((in_y + y_to) > height) ? (height - in_y) : y_to;
+
+    // Get pixel index
+    const int xi = std::floor(in_x);
+    const int yi = std::floor(in_y);
+
+    // Bounding box elements in each dimension
+    const int x_elements = (x_to - x_from + 1);
+    const int y_elements = (y_to - y_from + 1);
+    ARM_COMPUTE_ERROR_ON(x_elements == 0 || y_elements == 0);
+
+    // Sum pixels in area
+    int sum = 0;
+    for (int j = yi + y_from, je = yi + y_to; j <= je; ++j)
+    {
+        const uint8_t *ptr = first_pixel_ptr + j * stride + xi + x_from;
+        sum                = std::accumulate(ptr, ptr + x_elements, sum);
+    }
+
+    // Return average
+    return sum / (x_elements * y_elements);
+}
+
+/** Computes bilinear interpolation using the top-left, top-right, bottom-left, bottom-right pixels and the pixel's distance between
+ * the real coordinates and the smallest following integer coordinates.
+ *
+ * @param[in] a00    The top-left pixel value.
+ * @param[in] a01    The top-right pixel value.
+ * @param[in] a10    The bottom-left pixel value.
+ * @param[in] a11    The bottom-right pixel value.
+ * @param[in] dx_val Pixel's distance between the X real coordinate and the smallest X following integer
+ * @param[in] dy_val Pixel's distance between the Y real coordinate and the smallest Y following integer
+ *
+ * @note dx and dy must be in the range [0, 1.0]
+ *
+ * @return The bilinear interpolated pixel value
+ */
+inline float delta_bilinear(float a00, float a01, float a10, float a11, float dx_val, float dy_val)
+{
+    const float dx1_val = 1.0f - dx_val;
+    const float dy1_val = 1.0f - dy_val;
+
+    const float w1 = dx1_val * dy1_val;
+    const float w2 = dx_val * dy1_val;
+    const float w3 = dx1_val * dy_val;
+    const float w4 = dx_val * dy_val;
+    return a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4;
+}
+} // namespace scale_helpers
+} // namespace arm_compute
+
+#endif /* SRC_CORE_HELPERS_SCALEHELPERS_H */
diff --git a/src/core/helpers/SoftmaxHelpers.cpp b/src/core/helpers/SoftmaxHelpers.cpp
new file mode 100644
index 0000000000..8184991ab5
--- /dev/null
+++ b/src/core/helpers/SoftmaxHelpers.cpp
@@ -0,0 +1,45 @@
+/*
+* Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/core/helpers/SoftmaxHelpers.h"
+
+namespace arm_compute
+{
+namespace softmax_helpers
+{
+PermutationVector get_permutation_vector_from_softmax_axis(size_t axis)
+{
+    switch (axis)
+    {
+        case 1:
+            return PermutationVector(1U, 0U, 2U, 3U);
+        case 2:
+            return PermutationVector(2U, 1U, 0U, 3U);
+        case 3:
+            return PermutationVector(3U, 1U, 2U, 0U);
+        default:
+            ARM_COMPUTE_ERROR("Axis not supported");
+    }
+}
+} // namespace softmax_helpers
+} // namespace arm_compute
diff --git a/src/core/helpers/SoftmaxHelpers.h b/src/core/helpers/SoftmaxHelpers.h
new file mode 100644
index 0000000000..de5490a14d
--- /dev/null
+++ b/src/core/helpers/SoftmaxHelpers.h
@@ -0,0 +1,50 @@
+/*
+* Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CORE_HELPERS_SOFTMAXHELPERS_H
+#define SRC_CORE_HELPERS_SOFTMAXHELPERS_H
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace softmax_helpers
+{
+/** Given a softmax axis, this function returns the permutation vector required to put the axis to the front
+ *
+ * @note This function assumes a tensor rank <= 4
+ *
+ * Axis selects the dimension on which softmax is performed.
+ * E.g. For input of shape 4x5x6 and axis=1, softmax will be applied to 4x6=24 vectors of size 5.
+ * Interally softmax kernels is always performed on the first dimension (front dimension), therefore permutation is
+ * required to put the dimension specified by @p axis to the first dimension.
+ *
+ * @param[in] axis Axis on which to perform softmax. Supported: 1, 2, 3 (0 implies no permutation needed)
+ *
+ * @return the permutation vector
+ */
+PermutationVector get_permutation_vector_from_softmax_axis(size_t axis);
+} // namespace softmax_helpers
+} // namespace arm_compute
+
+#endif /* SRC_CORE_HELPERS_SOFTMAXHELPERS_H */
diff --git a/src/core/helpers/Utils.cpp b/src/core/helpers/Utils.cpp
new file mode 100644
index 0000000000..f8895d8a3c
--- /dev/null
+++ b/src/core/helpers/Utils.cpp
@@ -0,0 +1,49 @@
+/*
+* Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/core/helpers/Utils.h"
+
+namespace arm_compute
+{
+bool has_holes(const ITensorInfo &info)
+{
+    return has_holes(info, info.num_dimensions() - 1);
+}
+
+bool has_holes(const ITensorInfo &info, size_t dimension)
+{
+    const auto &shape          = info.tensor_shape();
+    const auto &strides        = info.strides_in_bytes();
+    size_t      squashed_bytes = info.element_size();
+
+    for (size_t dim = 0; dim <= dimension; ++dim)
+    {
+        if (strides[dim] != squashed_bytes)
+        {
+            return true;
+        }
+        squashed_bytes *= shape[dim];
+    }
+    return false;
+}
+} // namespace arm_compute
diff --git a/src/core/helpers/Utils.h b/src/core/helpers/Utils.h
new file mode 100644
index 0000000000..a17a78f7ee
--- /dev/null
+++ b/src/core/helpers/Utils.h
@@ -0,0 +1,120 @@
+/*
+* Copyright (c) 2020-2021, 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_CORE_HELPERS_UTILS_H
+#define ACL_SRC_CORE_HELPERS_UTILS_H
+
+#include "arm_compute/core/ITensorInfo.h"
+
+namespace arm_compute
+{
+/** Create a strides object based on the provided strides and the tensor dimensions.
+ *
+ * @param[in] info          Tensor info object providing the shape of the tensor for unspecified strides.
+ * @param[in] stride_x      Stride to be used in X dimension (in bytes).
+ * @param[in] fixed_strides Strides to be used in higher dimensions starting at Y (in bytes).
+ *
+ * @return Strides object based on the specified strides. Missing strides are
+ *         calculated based on the tensor shape and the strides of lower dimensions.
+ */
+template <typename T, typename... Ts>
+inline Strides compute_strides(const ITensorInfo &info, T stride_x, Ts &&...fixed_strides)
+{
+    const TensorShape &shape = info.tensor_shape();
+
+    // Create strides object
+    Strides strides(stride_x, fixed_strides...);
+
+    for (size_t i = 1 + sizeof...(Ts); i < info.num_dimensions(); ++i)
+    {
+        strides.set(i, shape[i - 1] * strides[i - 1]);
+    }
+
+    return strides;
+}
+
+/** Create a strides object based on the tensor dimensions.
+ *
+ * @param[in] info Tensor info object used to compute the strides.
+ *
+ * @return Strides object based on element size and tensor shape.
+ */
+template <typename... Ts>
+inline Strides compute_strides(const ITensorInfo &info)
+{
+    return compute_strides(info, info.element_size());
+}
+
+/** Given an integer value, this function returns the next power of two
+ *
+ * @param[in] x Input value
+ *
+ * @return the next power of two
+ */
+inline unsigned int get_next_power_two(unsigned int x)
+{
+    // Decrement by 1
+    x--;
+
+    // Shift right by 1
+    x |= x >> 1u;
+    // Shift right by 2
+    x |= x >> 2u;
+    // Shift right by 4
+    x |= x >> 4u;
+    // Shift right by 8
+    x |= x >> 8u;
+    // Shift right by 16
+    x |= x >> 16u;
+
+    // Increment by 1
+    x++;
+
+    return x;
+}
+
+/** Check if the tensor has any holes.
+ *
+ * A hole is defined as any gap in the tensor between two consecutive values. This can be a result of extending
+ * the paddings or manipulating the strides of the tensor
+ *
+ * @param[in] info Tensor info object defining the shape of the input tensor.
+ *
+ * @note This function checks for holes in all dimensions.
+ *
+ */
+bool has_holes(const ITensorInfo &info);
+
+/** Check if the tensor has any holes.
+ *
+ * @param[in] info      Tensor info object defining the shape of the input tensor.
+ * @param[in] dimension Highest dimension to check.
+ *
+ * @note This function checks for holes in all the dimensions upto and including the highest dimension.
+ *
+ */
+bool has_holes(const ITensorInfo &info, size_t dimension);
+
+} // namespace arm_compute
+
+#endif // ACL_SRC_CORE_HELPERS_UTILS_H
diff --git a/src/core/helpers/WindowHelpers.cpp b/src/core/helpers/WindowHelpers.cpp
new file mode 100644
index 0000000000..30a55fcbc6
--- /dev/null
+++ b/src/core/helpers/WindowHelpers.cpp
@@ -0,0 +1,349 @@
+/*
+* Copyright (c) 2020-2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/core/helpers/WindowHelpers.h"
+
+namespace arm_compute
+{
+Window
+calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
+{
+    if (!skip_border)
+    {
+        border_size = BorderSize(0);
+    }
+
+    const Coordinates &anchor = valid_region.anchor;
+    const TensorShape &shape  = valid_region.shape;
+
+    Window window;
+
+    window.set(0, Window::Dimension(
+                      // Skip the border left of the image
+                      anchor[0] + border_size.left,
+                      // Skip the border right of the image
+                      // Make sure the window width is a multiple of the step size
+                      anchor[0] + border_size.left +
+                          ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) -
+                                                           static_cast<int>(border_size.right)),
+                                           steps[0]),
+                      steps[0]));
+
+    size_t n = 1;
+
+    if (anchor.num_dimensions() > 1)
+    {
+        window.set(1,
+                   Window::Dimension(
+                       // Skip the border above the image
+                       anchor[1] + border_size.top,
+                       // Skip the border below the image
+                       anchor[1] + border_size.top +
+                           ceil_to_multiple(std::max(0, static_cast<int>(shape[1]) - static_cast<int>(border_size.top) -
+                                                            static_cast<int>(border_size.bottom)),
+                                            steps[1]),
+                       steps[1]));
+
+        ++n;
+    }
+
+    if (anchor.num_dimensions() > 2)
+    {
+        window.set(2, Window::Dimension(anchor[2], std::max<size_t>(1, shape[2]), steps[2]));
+
+        ++n;
+    }
+
+    for (; n < anchor.num_dimensions(); ++n)
+    {
+        window.set(n, Window::Dimension(anchor[n], std::max<size_t>(1, shape[n])));
+    }
+
+    for (; n < Coordinates::num_max_dimensions; ++n)
+    {
+        window.set(n, Window::Dimension(0, 1));
+    }
+
+    return window;
+}
+
+Window calculate_max_window(const TensorShape &shape, const Steps &steps, bool skip_border, BorderSize border_size)
+{
+    if (!skip_border)
+    {
+        border_size = BorderSize(0);
+    }
+
+    Window window;
+
+    window.set(0, Window::Dimension(
+                      // Skip the border left of the image
+                      border_size.left,
+                      // Skip the border right of the image
+                      // Make sure the window width is a multiple of the step size
+                      border_size.left +
+                          ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) -
+                                                           static_cast<int>(border_size.right)),
+                                           steps[0]),
+                      steps[0]));
+
+    size_t n = 1;
+
+    if (shape.num_dimensions() > 1)
+    {
+        window.set(1, Window::Dimension(
+                          // Skip the border above the image
+                          border_size.top,
+                          // Skip the border below the image
+                          border_size.top + ceil_to_multiple(std::max(0, static_cast<int>(shape[1]) -
+                                                                             static_cast<int>(border_size.top) -
+                                                                             static_cast<int>(border_size.bottom)),
+                                                             steps[1]),
+                          steps[1]));
+
+        ++n;
+    }
+
+    if (shape.num_dimensions() > 2)
+    {
+        window.set(2, Window::Dimension(0, std::max<size_t>(1, shape[2]), steps[2]));
+
+        ++n;
+    }
+
+    for (; n < shape.num_dimensions(); ++n)
+    {
+        window.set(n, Window::Dimension(0, std::max<size_t>(1, shape[n])));
+    }
+
+    for (; n < Coordinates::num_max_dimensions; ++n)
+    {
+        window.set(n, Window::Dimension(0, 1));
+    }
+
+    return window;
+}
+
+Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Steps &steps, BorderSize border_size)
+{
+    const Coordinates &anchor = valid_region.anchor;
+    const TensorShape &shape  = valid_region.shape;
+
+    Window window;
+
+    window.set(0, Window::Dimension(
+                      // move the anchor to the start from the border
+                      anchor[0] - border_size.left,
+                      // move the anchor to include the right end border
+                      // Make sure the window width is a multiple of the step size
+                      anchor[0] - border_size.left +
+                          ceil_to_multiple(shape[0] + border_size.left + border_size.right, steps[0]),
+                      steps[0]));
+
+    size_t n = 1;
+
+    if (anchor.num_dimensions() > 1)
+    {
+        window.set(1, Window::Dimension(
+                          // Include the border above the image
+                          anchor[1] - border_size.top,
+                          // Include the border below the image
+                          anchor[1] - border_size.top +
+                              ceil_to_multiple(shape[1] + border_size.top + border_size.bottom, steps[1]),
+                          steps[1]));
+
+        ++n;
+    }
+
+    if (anchor.num_dimensions() > 2)
+    {
+        window.set(2, Window::Dimension(0, std::max<size_t>(1, shape[n]), steps[2]));
+
+        ++n;
+    }
+
+    for (; n < anchor.num_dimensions(); ++n)
+    {
+        window.set(n, Window::Dimension(anchor[n], std::max<size_t>(1, shape[n])));
+    }
+
+    for (; n < Coordinates::num_max_dimensions; ++n)
+    {
+        window.set(n, Window::Dimension(0, 1));
+    }
+
+    return window;
+}
+
+Window calculate_max_window_horizontal(const ValidRegion &valid_region,
+                                       const Steps       &steps,
+                                       bool               skip_border,
+                                       BorderSize         border_size)
+{
+    if (skip_border)
+    {
+        border_size.top    = 0;
+        border_size.bottom = 0;
+    }
+    else
+    {
+        border_size.left  = 0;
+        border_size.right = 0;
+    }
+
+    const Coordinates &anchor = valid_region.anchor;
+    const TensorShape &shape  = valid_region.shape;
+
+    Window window;
+
+    window.set(0, Window::Dimension(
+                      // Skip the border left of the image
+                      anchor[0] + border_size.left,
+                      // Skip the border right of the image
+                      // Make sure the window width is a multiple of the step size
+                      anchor[0] + border_size.left +
+                          ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) -
+                                                           static_cast<int>(border_size.right)),
+                                           steps[0]),
+                      steps[0]));
+
+    size_t n = 1;
+
+    if (anchor.num_dimensions() > 1)
+    {
+        window.set(1, Window::Dimension(
+                          // Skip the border above the image
+                          anchor[1] - border_size.top,
+                          // Skip the border below the image
+                          anchor[1] + shape[1] + border_size.bottom, 1));
+
+        ++n;
+    }
+
+    for (; n < anchor.num_dimensions(); ++n)
+    {
+        window.set(n, Window::Dimension(anchor[n], std::max<size_t>(1, shape[n])));
+    }
+
+    for (; n < Coordinates::num_max_dimensions; ++n)
+    {
+        window.set(n, Window::Dimension(0, 1));
+    }
+
+    return window;
+}
+
+std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src0, const ITensorInfo &src1)
+{
+    const auto &shape0         = src0.tensor_shape();
+    const auto &shape1         = src1.tensor_shape();
+    const auto &strides0       = src0.strides_in_bytes();
+    const auto &strides1       = src1.strides_in_bytes();
+    const auto  num_dimensions = std::max(src0.num_dimensions(), src1.num_dimensions());
+
+    Window win;
+    size_t split_dimension = Window::DimY;
+    size_t dim             = 0;
+
+    size_t squashed_bytes = src0.element_size();
+
+    // Try to squash the low dimensions together.
+    for (; dim < num_dimensions; ++dim)
+    {
+        if (shape0[dim] != shape1[dim] || strides0[dim] != squashed_bytes || strides1[dim] != squashed_bytes)
+        {
+            break;
+        }
+
+        squashed_bytes *= shape0[dim];
+    }
+
+    if (dim == num_dimensions)
+    {
+        auto squashed_elements = squashed_bytes / src0.element_size();
+
+        split_dimension = Window::DimX;
+
+        // The input tensors can be interpreted as 1D array.
+        win.set(0, Window::Dimension(0, squashed_elements, 1));
+
+        for (dim = 1; dim < Coordinates::num_max_dimensions; ++dim)
+        {
+            win.set(dim, Window::Dimension(0, 1, 1));
+        }
+    }
+    else
+    {
+        // Generates the max window.
+        for (dim = 0; dim < Coordinates::num_max_dimensions; ++dim)
+        {
+            win.set(dim, Window::Dimension(0, std::max(shape0[dim], shape1[dim]), 1));
+        }
+    }
+
+    return std::make_pair(win, split_dimension);
+}
+
+std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src)
+{
+    const auto &shape          = src.tensor_shape();
+    const auto &strides        = src.strides_in_bytes();
+    const auto  num_dimensions = src.num_dimensions();
+
+    Window win;
+    size_t split_dimension = Window::DimY;
+    size_t dim             = 0;
+    size_t squashed_bytes  = src.element_size();
+
+    // Try to squash the low dimensions together.
+    for (; dim < num_dimensions; ++dim)
+    {
+        if (strides[dim] != squashed_bytes)
+        {
+            break;
+        }
+        squashed_bytes *= shape[dim];
+    }
+    if (dim == num_dimensions)
+    {
+        const auto squashed_elements = squashed_bytes / src.element_size();
+        split_dimension              = Window::DimX;
+        // The input tensor can be interpreted as 1D array.
+        win.set(0, Window::Dimension(0, squashed_elements, 1));
+        for (dim = 1; dim < Coordinates::num_max_dimensions; ++dim)
+        {
+            win.set(dim, Window::Dimension(0, 1, 1));
+        }
+    }
+    else
+    {
+        // Generate the max window.
+        for (dim = 0; dim < Coordinates::num_max_dimensions; ++dim)
+        {
+            win.set(dim, Window::Dimension(0, shape[dim], 1));
+        }
+    }
+    return std::make_pair(win, split_dimension);
+}
+
+} // namespace arm_compute
diff --git a/src/core/helpers/WindowHelpers.h b/src/core/helpers/WindowHelpers.h
new file mode 100644
index 0000000000..e404c18e8a
--- /dev/null
+++ b/src/core/helpers/WindowHelpers.h
@@ -0,0 +1,230 @@
+/*
+* Copyright (c) 2020-2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CORE_HELPERS_WINDOWHELPERS_H
+#define SRC_CORE_HELPERS_WINDOWHELPERS_H
+
+#include "arm_compute/core/IAccessWindow.h"
+#include "arm_compute/core/Steps.h"
+#include "arm_compute/core/Window.h"
+
+namespace arm_compute
+{
+/** Update window and padding size for each of the access patterns.
+ *
+ * First the window size is reduced based on all access patterns that are not
+ * allowed to modify the padding of the underlying tensor. Then the padding of
+ * the remaining tensors is increased to match the window.
+ *
+ * @param[in] win      Window that is used by the kernel.
+ * @param[in] patterns Access patterns used to calculate the final window and padding.
+ *
+ * @return True if the window has been changed. Changes to the padding do not
+ *         influence the returned value.
+ */
+template <typename... Ts>
+bool update_window_and_padding(Window &win, Ts &&...patterns)
+{
+    bool window_changed = false;
+
+    utility::for_each([&](const IAccessWindow &w) { window_changed |= w.update_window_if_needed(win); }, patterns...);
+
+    utility::for_each([&](IAccessWindow &w) { w.update_padding_if_needed(win); }, patterns...);
+
+    return window_changed;
+}
+
+/** Intersect multiple valid regions.
+ *
+ * @param[in] regions Valid regions.
+ *
+ * @return Intersection of all regions.
+ */
+template <typename... Ts>
+ValidRegion intersect_valid_regions(const Ts &...regions)
+{
+    auto intersect = [](const ValidRegion &r1, const ValidRegion &r2) -> ValidRegion
+    {
+        ValidRegion region;
+
+        for (size_t d = 0; d < std::min(r1.anchor.num_dimensions(), r2.anchor.num_dimensions()); ++d)
+        {
+            region.anchor.set(d, std::max(r1.anchor[d], r2.anchor[d]));
+        }
+
+        for (size_t d = 0; d < std::min(r1.shape.num_dimensions(), r2.shape.num_dimensions()); ++d)
+        {
+            region.shape.set(d, std::min(r1.shape[d], r2.shape[d]));
+        }
+
+        return region;
+    };
+
+    return utility::foldl(intersect, regions...);
+}
+
+#ifndef DOXYGEN_SKIP_THIS
+/** Calculate the maximum window for a given tensor shape and border setting
+ *
+ * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created.
+ * @param[in] steps        (Optional) Number of elements processed for each step.
+ * @param[in] skip_border  (Optional) If true exclude the border region from the window.
+ * @param[in] border_size  (Optional) Border size.
+ *
+ * @return The maximum window the kernel can be executed on.
+ */
+Window calculate_max_window(const ValidRegion &valid_region,
+                            const Steps       &steps       = Steps(),
+                            bool               skip_border = false,
+                            BorderSize         border_size = BorderSize());
+
+/** Calculate the maximum window for a given tensor shape and border setting
+ *
+ * @param[in] shape       Shape of the tensor space
+ * @param[in] steps       (Optional) Number of elements processed for each step.
+ * @param[in] skip_border (Optional) If true exclude the border region from the window.
+ * @param[in] border_size (Optional) Border size.
+ *
+ * @return The maximum window the kernel can be executed on.
+ */
+Window calculate_max_window(const TensorShape &shape,
+                            const Steps       &steps       = Steps(),
+                            bool               skip_border = false,
+                            BorderSize         border_size = BorderSize());
+
+/** Calculate the maximum window for a given tensor shape and border setting
+ *
+ * @param[in] info        Tensor info object defining the shape of the object for which the window is created.
+ * @param[in] steps       (Optional) Number of elements processed for each step.
+ * @param[in] skip_border (Optional) If true exclude the border region from the window.
+ * @param[in] border_size (Optional) Border size.
+ *
+ * @return The maximum window the kernel can be executed on.
+ */
+inline Window calculate_max_window(const ITensorInfo &info,
+                                   const Steps       &steps       = Steps(),
+                                   bool               skip_border = false,
+                                   BorderSize         border_size = BorderSize())
+{
+    return calculate_max_window(info.tensor_shape(), steps, skip_border, border_size);
+}
+
+/** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting
+ *
+ * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created.
+ * @param[in] steps        (Optional) Number of elements processed for each step.
+ * @param[in] skip_border  (Optional) If true exclude the border region from the window.
+ * @param[in] border_size  (Optional) Border size. The border region will be excluded from the window.
+ *
+ * @return The maximum window the kernel can be executed on.
+ */
+Window calculate_max_window_horizontal(const ValidRegion &valid_region,
+                                       const Steps       &steps       = Steps(),
+                                       bool               skip_border = false,
+                                       BorderSize         border_size = BorderSize());
+
+/** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting
+ *
+ * @param[in] info        Tensor info object defining the shape of the object for which the window is created.
+ * @param[in] steps       (Optional) Number of elements processed for each step.
+ * @param[in] skip_border (Optional) If true exclude the border region from the window.
+ * @param[in] border_size (Optional) Border size.
+ *
+ * @return The maximum window the kernel can be executed on.
+ */
+inline Window calculate_max_window_horizontal(const ITensorInfo &info,
+                                              const Steps       &steps       = Steps(),
+                                              bool               skip_border = false,
+                                              BorderSize         border_size = BorderSize())
+{
+    return calculate_max_window_horizontal(info.valid_region(), steps, skip_border, border_size);
+}
+
+/** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border.
+ *
+ * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created.
+ * @param[in] steps        (Optional) Number of elements processed for each step.
+ * @param[in] border_size  (Optional) Border size. The border region will be included in the window.
+ *
+ * @return The maximum window the kernel can be executed on.
+ */
+Window calculate_max_enlarged_window(const ValidRegion &valid_region,
+                                     const Steps       &steps       = Steps(),
+                                     BorderSize         border_size = BorderSize());
+
+/** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border.
+ *
+ * @param[in] info        Tensor info object defining the shape of the object for which the window is created.
+ * @param[in] steps       (Optional) Number of elements processed for each step.
+ * @param[in] border_size (Optional) Border size. The border region will be included in the window.
+ *
+ * @return The maximum window the kernel can be executed on.
+ */
+inline Window calculate_max_enlarged_window(const ITensorInfo &info,
+                                            const Steps       &steps       = Steps(),
+                                            BorderSize         border_size = BorderSize())
+{
+    return calculate_max_enlarged_window(info.valid_region(), steps, border_size);
+}
+
+/** Calculate the squashed or maximum window for the given tensor shape.
+ *
+ * If the tensor data resides continuously in the memory, the tensor can be interpreted
+ * as 1D array and all the dimensions can be squashed together into the x-dimension.
+ * Otherwise, generate the max window for the given tensor shape.
+ *
+ * @param[in] src Tensor info object defining the shape of the input tensor.
+ *
+ * @return The maximum window the kernel can be executed on and the preferred split dimension.
+ */
+std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src);
+
+/** Calculate the squashed or maximum window for the given tensor shapes.
+ *
+ * If the tensor data resides continuously in the memory, the tensor can be interpreted
+ * as 1D array and all the dimensions can be squashed together into the x-dimension.
+ * Otherwise, generate the max window for the given tensor shapes.
+ *
+ * @param[in] src0 Tensor info object defining the shape of the first input tensor.
+ * @param[in] src1 Tensor info object defining the shape of the second input tensor.
+ *
+ * @return The squashed or maximum window the kernel can be executed on and the preferred split dimension.
+ */
+std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src0, const ITensorInfo &src1);
+
+/** Function to compute the shape of output and window for the given inputs
+ *
+ * @param[in] infos Input tensor informations
+ *
+ * @return A pair of the shape and window
+ */
+template <typename... Shapes>
+std::pair<TensorShape, Window> compute_output_shape_and_window(const Shapes &...shapes)
+{
+    const TensorShape out_shape = TensorShape::broadcast_shape(shapes...);
+    return std::make_pair(out_shape, calculate_max_window(out_shape));
+}
+#endif /* DOXYGEN_SKIP_THIS */
+} // namespace arm_compute
+
+#endif /* SRC_CORE_HELPERS_WINDOWHELPERS_H */