From 68dd25fbe6e4d3c3513fa5993863419769aa08fc Mon Sep 17 00:00:00 2001
From: Sang-Hoon Park <sang-hoon.park@arm.com>
Date: Mon, 19 Oct 2020 16:00:11 +0100
Subject: COMPMID-3637: Move utility headers from arm_compute to src

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: If9d6fa8c900b68c4b6fd373f2fc1f9abb83ea917
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4145
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 arm_compute/core/AccessWindowAutoPadding.h         |  85 ---
 arm_compute/core/AccessWindowStatic.h              | 101 ---
 arm_compute/core/AccessWindowTranspose.h           |  48 --
 arm_compute/core/CL/CLValidate.h                   |  61 --
 arm_compute/core/CL/ICLGEMMKernelConfiguration.h   |  68 --
 arm_compute/core/CL/ICLKernel.h                    |   1 +
 arm_compute/core/CL/gemm/CLGEMMHelpers.h           |  73 --
 .../gemm/native/CLGEMMNativeKernelConfiguration.h  |  65 --
 .../CLGEMMNativeKernelConfigurationBifrost.h       |  56 --
 .../CLGEMMNativeKernelConfigurationMidgard.h       |  51 --
 .../CLGEMMNativeKernelConfigurationValhall.h       |  53 --
 .../reshaped/CLGEMMReshapedKernelConfiguration.h   |  63 --
 .../CLGEMMReshapedKernelConfigurationBifrost.h     |  56 --
 .../CLGEMMReshapedKernelConfigurationValhall.h     |  53 --
 .../CLGEMMReshapedOnlyRHSKernelConfiguration.h     |  63 --
 ...GEMMReshapedOnlyRHSKernelConfigurationBifrost.h |  59 --
 ...GEMMReshapedOnlyRHSKernelConfigurationValhall.h |  53 --
 arm_compute/core/CPP/Validate.h                    | 117 ----
 arm_compute/core/GPUTarget.h                       |   4 +-
 arm_compute/core/Helpers.h                         | 615 +----------------
 arm_compute/core/Helpers.inl                       | 134 ----
 arm_compute/core/ITensorInfo.h                     |   4 +-
 .../NEDepthwiseConvolutionLayerNativeKernel.h      |   1 +
 .../NEON/kernels/assembly/INEGEMMWrapperKernel.h   | 108 ---
 .../NEDepthwiseConvolutionAssemblyKernelWrapper.h  |  88 ---
 .../NEON/kernels/convolution/common/activation.hpp |  37 -
 .../core/NEON/kernels/convolution/common/alloc.hpp |  31 -
 .../core/NEON/kernels/convolution/common/arm.hpp   |  39 --
 .../kernels/convolution/common/convolution.hpp     |  29 -
 .../NEON/kernels/convolution/common/padding.hpp    |  91 ---
 .../core/NEON/kernels/convolution/common/perf.h    |  32 -
 .../NEON/kernels/convolution/common/qasymm8.hpp    |  54 --
 .../NEON/kernels/convolution/common/qsymm8.hpp     |  76 ---
 .../core/NEON/kernels/convolution/common/shims.hpp | 749 ---------------------
 .../NEON/kernels/convolution/common/tensor.hpp     | 178 -----
 .../kernels/convolution/common/tensor_utils.hpp    |  46 --
 .../core/NEON/kernels/convolution/common/utils.hpp |  60 --
 .../kernels/convolution/depthwise/depthwise.hpp    | 551 ---------------
 .../convolution/depthwise/depthwise_dilated.hpp    | 156 -----
 .../convolution/depthwise/depthwise_quantized.hpp  | 291 --------
 .../depthwise/depthwise_quantized_dilated.hpp      |  88 ---
 arm_compute/core/SubTensorInfo.h                   |   3 +-
 arm_compute/core/utils/helpers/bit_ops.h           |  52 --
 arm_compute/core/utils/helpers/fft.h               |  55 --
 arm_compute/core/utils/helpers/float_ops.h         | 116 ----
 arm_compute/core/utils/helpers/tensor_info.h       |  57 --
 arm_compute/core/utils/math/SafeOps.h              |   6 +-
 arm_compute/core/utils/misc/CRTP.h                 |  55 --
 arm_compute/core/utils/misc/Cast.h                 | 119 ----
 arm_compute/core/utils/misc/ICloneable.h           |  48 --
 arm_compute/core/utils/misc/Iterable.h             | 108 ---
 arm_compute/core/utils/misc/Random.h               |  98 ---
 arm_compute/core/utils/misc/Requires.h             |  51 --
 arm_compute/core/utils/misc/Rounding.h             | 205 ------
 arm_compute/core/utils/misc/SaturateCast.h         | 218 ------
 55 files changed, 13 insertions(+), 5766 deletions(-)
 delete mode 100644 arm_compute/core/AccessWindowAutoPadding.h
 delete mode 100644 arm_compute/core/AccessWindowStatic.h
 delete mode 100644 arm_compute/core/AccessWindowTranspose.h
 delete mode 100644 arm_compute/core/CL/CLValidate.h
 delete mode 100644 arm_compute/core/CL/ICLGEMMKernelConfiguration.h
 delete mode 100644 arm_compute/core/CL/gemm/CLGEMMHelpers.h
 delete mode 100644 arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h
 delete mode 100644 arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h
 delete mode 100644 arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h
 delete mode 100644 arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h
 delete mode 100644 arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h
 delete mode 100644 arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h
 delete mode 100644 arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h
 delete mode 100644 arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h
 delete mode 100644 arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h
 delete mode 100644 arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h
 delete mode 100644 arm_compute/core/CPP/Validate.h
 delete mode 100644 arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h
 delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/activation.hpp
 delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/alloc.hpp
 delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/arm.hpp
 delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/convolution.hpp
 delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/padding.hpp
 delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/perf.h
 delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/qasymm8.hpp
 delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/qsymm8.hpp
 delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/shims.hpp
 delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/tensor.hpp
 delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp
 delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/utils.hpp
 delete mode 100644 arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
 delete mode 100644 arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp
 delete mode 100644 arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp
 delete mode 100644 arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp
 delete mode 100644 arm_compute/core/utils/helpers/bit_ops.h
 delete mode 100644 arm_compute/core/utils/helpers/fft.h
 delete mode 100644 arm_compute/core/utils/helpers/float_ops.h
 delete mode 100644 arm_compute/core/utils/helpers/tensor_info.h
 delete mode 100644 arm_compute/core/utils/misc/CRTP.h
 delete mode 100644 arm_compute/core/utils/misc/Cast.h
 delete mode 100644 arm_compute/core/utils/misc/ICloneable.h
 delete mode 100644 arm_compute/core/utils/misc/Iterable.h
 delete mode 100644 arm_compute/core/utils/misc/Random.h
 delete mode 100644 arm_compute/core/utils/misc/Requires.h
 delete mode 100644 arm_compute/core/utils/misc/Rounding.h
 delete mode 100644 arm_compute/core/utils/misc/SaturateCast.h

(limited to 'arm_compute/core')

diff --git a/arm_compute/core/AccessWindowAutoPadding.h b/arm_compute/core/AccessWindowAutoPadding.h
deleted file mode 100644
index 12d65532cb..0000000000
--- a/arm_compute/core/AccessWindowAutoPadding.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H
-#define ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H
-
-#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class Window;
-class ITensorInfo;
-
-/** Dummy access window.
- *
- * This implementation always uses the auto padding of the tensor info and
- * never updates the window. The valid region is always set to cover the entire
- * tensor.
- *
- * @note This access window is only used during the migration to the new
- *       padding system. It will be removed once all kernels have been ported.
- *
- * */
-class AccessWindowAutoPadding : public IAccessWindow
-{
-public:
-    /** Default constructor.
-     *
-     * @param[in,out] info Tensor info of the accessed kernel.
-     */
-    AccessWindowAutoPadding(ITensorInfo *info);
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    AccessWindowAutoPadding(const AccessWindowAutoPadding &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    AccessWindowAutoPadding &operator=(const AccessWindowAutoPadding &) = delete;
-    /** Allow instances of this class to be move constructed */
-    AccessWindowAutoPadding(AccessWindowAutoPadding &&) = default;
-    /** Allow instances of this class to be moved */
-    AccessWindowAutoPadding &operator=(AccessWindowAutoPadding &&) = default;
-    /** Default destructor */
-    ~AccessWindowAutoPadding() = default;
-
-    /** Set the valid region to match the entire tensor. */
-    void set_valid_region();
-
-    /** Return a valid region that spans across the entire tensor.
-     *
-     * @return a valid region.
-     *
-     */
-    ValidRegion compute_valid_region() const;
-
-    // Inherited methods overridden:
-    bool update_window_if_needed(Window &window) const override;
-    bool update_padding_if_needed(const Window &window) override;
-    ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override;
-
-private:
-    ITensorInfo *_info;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H*/
diff --git a/arm_compute/core/AccessWindowStatic.h b/arm_compute/core/AccessWindowStatic.h
deleted file mode 100644
index 1f2ca1b470..0000000000
--- a/arm_compute/core/AccessWindowStatic.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_IACCESS_WINDOW_STATIC_H
-#define ARM_COMPUTE_IACCESS_WINDOW_STATIC_H
-
-#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-
-#include <array>
-
-namespace arm_compute
-{
-class Window;
-class ITensorInfo;
-
-/** Implementation of a static rectangular access pattern.
- *
- * In this implementation the access offsets and sizes are not relative to the
- * current element. Instead they are considered to be absolute coordinates
- * within the accessed tensor's shape.
- *
- * */
-class AccessWindowStatic : public IAccessWindow
-{
-public:
-    /** Constructor for a static access pattern.
-     *
-     * @param[in,out] info    Tensor info of the accessed kernel.
-     * @param[in]     start_x Start of the access in X direction.
-     * @param[in]     start_y Start of the access in Y direction.
-     * @param[in]     end_x   End of the access in X direction.
-     * @param[in]     end_y   End of the access in Y direction.
-     */
-    AccessWindowStatic(ITensorInfo *info, int start_x, int start_y, int end_x, int end_y);
-
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    AccessWindowStatic(const AccessWindowStatic &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    AccessWindowStatic &operator=(const AccessWindowStatic &) = delete;
-    /** Allow instances of this class to be move constructed */
-    AccessWindowStatic(AccessWindowStatic &&) = default;
-    /** Allow instances of this class to be moved */
-    AccessWindowStatic &operator=(AccessWindowStatic &&) = default;
-    /** Default destructor */
-    ~AccessWindowStatic() = default;
-
-    /** Set the valid region based on the static access pattern and valid
-     *  region of the inputs.
-     *
-     * @param[in] window             Execution window of the kernel.
-     * @param[in] input_valid_region Combined valid region of all inputs.
-     */
-    void set_valid_region(const Window &window, const ValidRegion &input_valid_region);
-
-    /** Compute the valid region based on the static access pattern and valid region of the inputs.
-     *
-     * @param[in] window             Execution window of the kernel.
-     * @param[in] input_valid_region Combined valid region of all inputs.
-     *
-     * @return a valid region.
-     *
-     */
-    ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region) const;
-
-    // Inherited methods overriden:
-    bool update_window_if_needed(Window &window) const override;
-    bool update_padding_if_needed(const Window &window) override;
-    ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override;
-
-private:
-    ITensorInfo *_info;
-    int          _start_x;
-    int          _start_y;
-    int          _end_x;
-    int          _end_y;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_IACCESS_WINDOW_STATIC_H*/
diff --git a/arm_compute/core/AccessWindowTranspose.h b/arm_compute/core/AccessWindowTranspose.h
deleted file mode 100644
index 85709092c3..0000000000
--- a/arm_compute/core/AccessWindowTranspose.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H
-#define ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H
-
-#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class Window;
-class ITensorInfo;
-
-/** Implementation of a XY-transpose access pattern. */
-class AccessWindowTranspose : public AccessWindowRectangle
-{
-public:
-    using AccessWindowRectangle::AccessWindowRectangle;
-    bool update_window_if_needed(Window &window) const override;
-    bool update_padding_if_needed(const Window &window) override;
-    using AccessWindowRectangle::compute_valid_region;
-    ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H*/
diff --git a/arm_compute/core/CL/CLValidate.h b/arm_compute/core/CL/CLValidate.h
deleted file mode 100644
index 3f8b76ba4c..0000000000
--- a/arm_compute/core/CL/CLValidate.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_VALIDATE_H
-#define ARM_COMPUTE_CL_VALIDATE_H
-
-#include "arm_compute/core/Validate.h"
-
-namespace arm_compute
-{
-#define ARM_COMPUTE_ERROR_ON_F16_UNSUPPORTED(tensor) \
-    ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_fp16(__func__, __FILE__, __LINE__, tensor, CLKernelLibrary::get().fp16_supported()))
-
-#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor) \
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_fp16(__func__, __FILE__, __LINE__, tensor, CLKernelLibrary::get().fp16_supported()))
-
-/** Return an error if int64_base_atomics extension is not supported by the device.
- *
- * @param[in] function Function in which the error occurred.
- * @param[in] file     Name of the file where the error occurred.
- * @param[in] line     Line on which the error occurred.
- *
- * @return Status
- */
-inline arm_compute::Status error_on_unsupported_int64_base_atomics(const char *function, const char *file, const int line)
-{
-    if(!CLKernelLibrary::get().int64_base_atomics_supported())
-    {
-        return ARM_COMPUTE_CREATE_ERROR_LOC(arm_compute::ErrorCode::UNSUPPORTED_EXTENSION_USE, function, file, line, "Atomic functions are not supported");
-    }
-    return arm_compute::Status{};
-}
-
-#define ARM_COMPUTE_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED() \
-    ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_int64_base_atomics(__func__, __FILE__, __LINE__));
-
-#define ARM_COMPUTE_RETURN_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED() \
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_int64_base_atomics(__func__, __FILE__, __LINE__));
-
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_VALIDATE_H */
diff --git a/arm_compute/core/CL/ICLGEMMKernelConfiguration.h b/arm_compute/core/CL/ICLGEMMKernelConfiguration.h
deleted file mode 100644
index 90600efba5..0000000000
--- a/arm_compute/core/CL/ICLGEMMKernelConfiguration.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H
-#define ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H
-
-#include "arm_compute/core/GPUTarget.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-/** Basic interface for the GEMM kernel configuration */
-class ICLGEMMKernelConfiguration
-{
-public:
-    /** Constructor
-     *
-     * @param[in] arch GPU target
-     */
-    ICLGEMMKernelConfiguration(GPUTarget arch)
-        : _target(arch)
-    {
-    }
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    ICLGEMMKernelConfiguration(const ICLGEMMKernelConfiguration &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    ICLGEMMKernelConfiguration &operator=(const ICLGEMMKernelConfiguration &) = delete;
-    /** Default Move Constructor. */
-    ICLGEMMKernelConfiguration(ICLGEMMKernelConfiguration &&) = default;
-    /** Default move assignment operator */
-    ICLGEMMKernelConfiguration &operator=(ICLGEMMKernelConfiguration &&) = default;
-    /** Virtual destructor */
-    virtual ~ICLGEMMKernelConfiguration() = default;
-    /** Given M, N, K and B, this method returns the @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo to be used
-     *
-     * @param[in] m         Number of rows LHS matrix
-     * @param[in] n         Number of columns RHS matrix
-     * @param[in] k         Number of columns LHS matrix or number of rows RHS matrix
-     * @param[in] b         Batch size
-     * @param[in] data_type Data type
-     */
-    virtual std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) = 0;
-
-protected:
-    GPUTarget _target;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H */
diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h
index d4990a1dee..a24cd8c798 100644
--- a/arm_compute/core/CL/ICLKernel.h
+++ b/arm_compute/core/CL/ICLKernel.h
@@ -29,6 +29,7 @@
 #include "arm_compute/core/CL/OpenCL.h"
 #include "arm_compute/core/GPUTarget.h"
 #include "arm_compute/core/IKernel.h"
+#include "arm_compute/core/Validate.h"
 #include "arm_compute/core/experimental/Types.h"
 
 #include <string>
diff --git a/arm_compute/core/CL/gemm/CLGEMMHelpers.h b/arm_compute/core/CL/gemm/CLGEMMHelpers.h
deleted file mode 100644
index 013c068cf7..0000000000
--- a/arm_compute/core/CL/gemm/CLGEMMHelpers.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMHELPERS_H
-#define ARM_COMPUTE_CLGEMMHELPERS_H
-
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensorInfo;
-struct GEMMRHSMatrixInfo;
-
-namespace cl_gemm
-{
-/** Configure @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo
- *
- * @param[in] m                  Number of rows (M) in the LHS matrix not reshaped
- * @param[in] n                  Number of columns (N) in the RHS matrix not reshaped
- * @param[in] m0                 Number of rows processed by each thread/work-item
- * @param[in] n0                 Number of columns processed by each thread/work-item
- * @param[in] k0                 Number of inner accumulation performed by each thread/work-item
- * @param[in] v0                 Number of vertical blocks of size (m0xk0) stored on the same output row
- * @param[in] h0                 Number of horizontal blocks of size (k0xn0) stored on the same output row
- * @param[in] lhs_interleave     True if the v0 (m0xk0) blocks have to be interleaved in the output row
- * @param[in] rhs_interleave     True if the h0 (k0xn0) blocks have to be interleaved in the output row
- * @param[in] lhs_transpose      True if the (m0xk0) block has to be transposed before been stored
- * @param[in] rhs_transpose      True if the (k0xn0) block has to be transposed before been stored
- * @param[in] export_to_cl_image (Optional) True if the RHS reshaped matrix has to be exported to cl_image
- *
- * @return @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo
- */
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_lhs_rhs_info(unsigned int m, unsigned int n, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0,
-                                                                       bool lhs_interleave, bool rhs_interleave, bool lhs_transpose, bool rhs_transpose, bool export_to_cl_image = false);
-
-/** Update padding required to export the OpenCL buffer to OpenCL image2d
- *
- * @param[in,out] tensor ITensorInfo of the tensor required to be exported to OpenCL image2d
- */
-void update_padding_for_cl_image(ITensorInfo *tensor);
-
-/** Utility function to validate the image2d OpenCL object support on the RHS reshaped matrix
- *
- * @param[in] tensor_reshaped_info TensorInfo for the RHS reshaped matrix
- * @param[in] rhs_info             @ref GEMMRHSMatrixInfo
- *
- * @return Status reporting if we can use the image2d OpenCL object on the RHS reshaped matrix
- */
-Status validate_image2d_support_on_rhs(const ITensorInfo &tensor_reshaped_info, const GEMMRHSMatrixInfo &rhs_info);
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMHELPERS_H */
diff --git a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h
deleted file mode 100644
index 7270a8e6db..0000000000
--- a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H
-#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H
-
-#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
-#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h"
-#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h"
-#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** CLGEMMNative factory class */
-class CLGEMMNativeKernelConfigurationFactory final
-{
-public:
-    /** Static method to construct CLGEMMNative kernel object accordingly with the GPU target
-     *
-     * @param[in] gpu GPU target
-     *
-     * @return CLGEMMNative kernel configuration class
-     */
-    static std::unique_ptr<ICLGEMMKernelConfiguration> create(GPUTarget gpu)
-    {
-        switch(get_arch_from_target(gpu))
-        {
-            case GPUTarget::MIDGARD:
-                return support::cpp14::make_unique<CLGEMMNativeKernelConfigurationMidgard>(gpu);
-            case GPUTarget::BIFROST:
-                return support::cpp14::make_unique<CLGEMMNativeKernelConfigurationBifrost>(gpu);
-            case GPUTarget::VALHALL:
-                return support::cpp14::make_unique<CLGEMMNativeKernelConfigurationValhall>(gpu);
-            default:
-                ARM_COMPUTE_ERROR("Not supported GPU target");
-        }
-    }
-};
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H */
diff --git a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h
deleted file mode 100644
index 1e4989615e..0000000000
--- a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONBIFROST_H
-#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONBIFROST_H
-
-#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** Bifrost based OpenCL GEMMNative configuration */
-class CLGEMMNativeKernelConfigurationBifrost final : public ICLGEMMKernelConfiguration
-{
-public:
-    /** Constructor
-     *
-     * @param[in] gpu GPU target
-     */
-    CLGEMMNativeKernelConfigurationBifrost(GPUTarget gpu);
-
-    // Inherited overridden method
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
-
-private:
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G71_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G71_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_default_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-};
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONBIFROST_H */
diff --git a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h
deleted file mode 100644
index 4cebfceb75..0000000000
--- a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONMIDGARD_H
-#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONMIDGARD_H
-
-#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** Midgard based OpenCL GEMMNative configuration */
-class CLGEMMNativeKernelConfigurationMidgard final : public ICLGEMMKernelConfiguration
-{
-public:
-    /** Constructor
-     *
-     * @param[in] gpu GPU target
-     */
-    CLGEMMNativeKernelConfigurationMidgard(GPUTarget gpu);
-
-    // Inherited overridden method
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
-
-private:
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-};
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONMIDGARD_H */
diff --git a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h
deleted file mode 100644
index 07389ea76f..0000000000
--- a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONVALHALL_H
-#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONVALHALL_H
-
-#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** Valhall based OpenCL GEMMNative configuration */
-class CLGEMMNativeKernelConfigurationValhall final : public ICLGEMMKernelConfiguration
-{
-public:
-    /** Constructor
-     *
-     * @param[in] gpu GPU target
-     */
-    CLGEMMNativeKernelConfigurationValhall(GPUTarget gpu);
-
-    // Inherited overridden method
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
-
-private:
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-};
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONVALHALL_H */
diff --git a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h b/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h
deleted file mode 100644
index b953fd264f..0000000000
--- a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATION_H
-#define ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATION_H
-
-#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
-#include "arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h"
-#include "arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** CLGEMMReshaped factory class */
-class CLGEMMReshapedKernelConfigurationFactory final
-{
-public:
-    /** Static method to call the CLGEMMReshaped kernel configuration class accordingly with the GPU target
-     *
-     * @param[in] gpu GPU target
-     *
-     * @return CLGEMMReshaped kernel configuration class
-     */
-    static std::unique_ptr<ICLGEMMKernelConfiguration> create(GPUTarget gpu)
-    {
-        switch(get_arch_from_target(gpu))
-        {
-            case GPUTarget::MIDGARD:
-            case GPUTarget::BIFROST:
-                return support::cpp14::make_unique<CLGEMMReshapedKernelConfigurationBifrost>(gpu);
-            case GPUTarget::VALHALL:
-                return support::cpp14::make_unique<CLGEMMReshapedKernelConfigurationValhall>(gpu);
-            default:
-                ARM_COMPUTE_ERROR("Not supported GPU target");
-        }
-    }
-};
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATION_H */
diff --git a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h b/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h
deleted file mode 100644
index 4df27843aa..0000000000
--- a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONBIFROST_H
-#define ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONBIFROST_H
-
-#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** Bifrost based OpenCL GEMMReshaped configuration */
-class CLGEMMReshapedKernelConfigurationBifrost final : public ICLGEMMKernelConfiguration
-{
-public:
-    /** Constructor
-     *
-     * @param[in] gpu GPU target
-     */
-    CLGEMMReshapedKernelConfigurationBifrost(GPUTarget gpu);
-
-    // Inherited overridden method
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
-
-private:
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-};
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONBIFROST_H */
diff --git a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h b/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h
deleted file mode 100644
index 7a617e05be..0000000000
--- a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONVALHALL_H
-#define ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONVALHALL_H
-
-#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** Valhall based OpenCL GEMMReshaped configuration */
-class CLGEMMReshapedKernelConfigurationValhall final : public ICLGEMMKernelConfiguration
-{
-public:
-    /** Constructor
-     *
-     * @param[in] gpu GPU target
-     */
-    CLGEMMReshapedKernelConfigurationValhall(GPUTarget gpu);
-
-    // Inherited overridden method
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
-
-private:
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-};
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONVALHALL_H */
diff --git a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h
deleted file mode 100644
index 6d5ce8835b..0000000000
--- a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATION_H
-#define ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATION_H
-
-#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
-#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h"
-#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** CLGEMMReshapedOnlyRHS factory class */
-class CLGEMMReshapedOnlyRHSKernelConfigurationFactory final
-{
-public:
-    /** Static method to call the CLGEMMReshapedOnlyRHS kernel configuration class accordingly with the GPU target
-     *
-     * @param[in] gpu GPU target
-     *
-     * @return CLGEMMReshapedOnlyRHS kernel configuration class
-     */
-    static std::unique_ptr<ICLGEMMKernelConfiguration> create(GPUTarget gpu)
-    {
-        switch(get_arch_from_target(gpu))
-        {
-            case GPUTarget::MIDGARD:
-            case GPUTarget::BIFROST:
-                return support::cpp14::make_unique<CLGEMMReshapedOnlyRHSKernelConfigurationBifrost>(gpu);
-            case GPUTarget::VALHALL:
-                return support::cpp14::make_unique<CLGEMMReshapedOnlyRHSKernelConfigurationValhall>(gpu);
-            default:
-                ARM_COMPUTE_ERROR("Not supported GPU target");
-        }
-    }
-};
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATION_H */
diff --git a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h
deleted file mode 100644
index 346bfd7b91..0000000000
--- a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONBIFROST_H
-#define ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONBIFROST_H
-
-#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** Bifrost based OpenCL GEMMReshapedOnlyRHS configuration */
-class CLGEMMReshapedOnlyRHSKernelConfigurationBifrost final : public ICLGEMMKernelConfiguration
-{
-public:
-    /** Constructor
-     *
-     * @param[in] gpu GPU target
-     */
-    CLGEMMReshapedOnlyRHSKernelConfigurationBifrost(GPUTarget gpu);
-
-    // Inherited overridden method
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
-
-private:
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G51_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G51_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G51_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-};
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONBIFROST_H */
diff --git a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h
deleted file mode 100644
index 2162baf338..0000000000
--- a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONVALHALL_H
-#define ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONVALHALL_H
-
-#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** Valhall based OpenCL GEMMReshapedOnlyRHS configuration */
-class CLGEMMReshapedOnlyRHSKernelConfigurationValhall final : public ICLGEMMKernelConfiguration
-{
-public:
-    /** Constructor
-     *
-     * @param[in] gpu GPU target
-     */
-    CLGEMMReshapedOnlyRHSKernelConfigurationValhall(GPUTarget gpu);
-
-    // Inherited overridden method
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
-
-private:
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-};
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONVALHALL_H */
diff --git a/arm_compute/core/CPP/Validate.h b/arm_compute/core/CPP/Validate.h
deleted file mode 100644
index 9e95f72c3f..0000000000
--- a/arm_compute/core/CPP/Validate.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CPP_VALIDATE_H
-#define ARM_COMPUTE_CPP_VALIDATE_H
-
-#include "arm_compute/core/Validate.h"
-
-namespace arm_compute
-{
-/** Return an error if the data type of the passed tensor info is FP16 and FP16 support is not compiled in.
- *
- * @param[in] function    Function in which the error occurred.
- * @param[in] file        Name of the file where the error occurred.
- * @param[in] line        Line on which the error occurred.
- * @param[in] tensor_info Tensor info to validate.
- *
- * @return Status
- */
-inline Status error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line,
-                                            const ITensorInfo *tensor_info)
-{
-    ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
-#ifndef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(tensor_info->data_type() == DataType::F16,
-                                        function, file, line, "This CPU architecture does not support F16 data type, you need v8.2 or above");
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-    return Status {};
-}
-
-/** Return an error if the data type of the passed tensor info is BFLOAT16 and BFLOAT16 support is not compiled in.
- *
- * @param[in] function    Function in which the error occurred.
- * @param[in] file        Name of the file where the error occurred.
- * @param[in] line        Line on which the error occurred.
- * @param[in] tensor_info Tensor info to validate.
- *
- * @return Status
- */
-inline Status error_on_unsupported_cpu_bf16(const char *function, const char *file, const int line,
-                                            const ITensorInfo *tensor_info)
-{
-    ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
-#if !(defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16))
-    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(tensor_info->data_type() == DataType::BFLOAT16,
-                                        function, file, line, "This CPU architecture does not support BFloat16 data type, you need v8.6 or above");
-#endif /* !(defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)) */
-    return Status {};
-}
-
-/** Return an error if the data type of the passed tensor is FP16 and FP16 support is not compiled in.
- *
- * @param[in] function Function in which the error occurred.
- * @param[in] file     Name of the file where the error occurred.
- * @param[in] line     Line on which the error occurred.
- * @param[in] tensor   Tensor to validate.
- *
- * @return Status
- */
-inline Status error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line,
-                                            const ITensor *tensor)
-{
-    ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_fp16(function, file, line, tensor->info()));
-    return Status{};
-}
-
-/** Return an error if the data type of the passed tensor is BFLOAT16 and BFLOAT16 support is not compiled in.
- *
- * @param[in] function Function in which the error occurred.
- * @param[in] file     Name of the file where the error occurred.
- * @param[in] line     Line on which the error occurred.
- * @param[in] tensor   Tensor to validate.
- *
- * @return Status
- */
-inline Status error_on_unsupported_cpu_bf16(const char *function, const char *file, const int line,
-                                            const ITensor *tensor)
-{
-    ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_bf16(function, file, line, tensor->info()));
-    return Status{};
-}
-
-#define ARM_COMPUTE_ERROR_ON_CPU_F16_UNSUPPORTED(tensor) \
-    ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_cpu_fp16(__func__, __FILE__, __LINE__, tensor))
-
-#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor) \
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_fp16(__func__, __FILE__, __LINE__, tensor))
-
-#define ARM_COMPUTE_ERROR_ON_CPU_BF16_UNSUPPORTED(tensor) \
-    ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_cpu_bf16(__func__, __FILE__, __LINE__, tensor))
-
-#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_BF16_UNSUPPORTED(tensor) \
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_bf16(__func__, __FILE__, __LINE__, tensor))
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CPP_VALIDATE_H */
diff --git a/arm_compute/core/GPUTarget.h b/arm_compute/core/GPUTarget.h
index 06025ca3ae..b8143f8d5c 100644
--- a/arm_compute/core/GPUTarget.h
+++ b/arm_compute/core/GPUTarget.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,7 @@
 #ifndef ARM_COMPUTE_GPUTARGET_H
 #define ARM_COMPUTE_GPUTARGET_H
 
-#include "arm_compute/core/Helpers.h"
+#include "support/Traits.h"
 
 #include <string>
 
diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h
index 90dd6082e1..5a8d6efe9d 100644
--- a/arm_compute/core/Helpers.h
+++ b/arm_compute/core/Helpers.h
@@ -24,23 +24,17 @@
 #ifndef ARM_COMPUTE_HELPERS_H
 #define ARM_COMPUTE_HELPERS_H
 
-#include "arm_compute/core/Coordinates.h"
 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/Steps.h"
-#include "arm_compute/core/Strides.h"
-#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/ITensor.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
 #include "arm_compute/core/Window.h"
-#include "support/MemorySupport.h"
 
 #include <array>
 #include <cstddef>
 #include <cstdint>
-#include <memory>
 #include <tuple>
-#include <type_traits>
-#include <utility>
 
 namespace arm_compute
 {
@@ -48,307 +42,6 @@ class IKernel;
 class ITensor;
 class ITensorInfo;
 
-/** Disable bitwise operations by default */
-template <typename T>
-struct enable_bitwise_ops
-{
-    static constexpr bool value = false; /**< Disabled */
-};
-
-#ifndef DOXYGEN_SKIP_THIS
-template <typename T>
-typename std::enable_if<enable_bitwise_ops<T>::value, T>::type operator&(T lhs, T rhs)
-{
-    using underlying_type = typename std::underlying_type<T>::type;
-    return static_cast<T>(static_cast<underlying_type>(lhs) & static_cast<underlying_type>(rhs));
-}
-#endif /* DOXYGEN_SKIP_THIS */
-
-/** Helper function to create and return a unique_ptr pointed to a CL/GLES kernel object
- *  It also calls the kernel's configuration.
- *
- * @param[in] args All the arguments that need pass to kernel's configuration.
- *
- * @return A unique pointer pointed to a CL/GLES kernel object
- */
-template <typename Kernel, typename... T>
-std::unique_ptr<Kernel> create_configure_kernel(T &&... args)
-{
-    std::unique_ptr<Kernel> k = arm_compute::support::cpp14::make_unique<Kernel>();
-    k->configure(std::forward<T>(args)...);
-    return k;
-}
-
-/** Helper function to create and return a unique_ptr pointed to a CL/GLES kernel object
- *
- * @return A unique pointer pointed to a Kernel kernel object
- */
-template <typename Kernel>
-std::unique_ptr<Kernel> create_kernel()
-{
-    std::unique_ptr<Kernel> k = arm_compute::support::cpp14::make_unique<Kernel>();
-    return k;
-}
-
-namespace traits
-{
-/** Check if a type T is contained in a tuple Tuple of types */
-template <typename T, typename Tuple>
-struct is_contained;
-
-template <typename T>
-struct is_contained<T, std::tuple<>> : std::false_type
-{
-};
-
-template <typename T, typename... Ts>
-struct is_contained<T, std::tuple<T, Ts...>> : std::true_type
-{
-};
-
-template <typename T, typename U, typename... Ts>
-struct is_contained<T, std::tuple<U, Ts...>> : is_contained<T, std::tuple<Ts...>>
-{
-};
-}
-
-/** Computes bilinear interpolation using the pointer to the top-left pixel and the pixel's distance between
- * the real coordinates and the smallest following integer coordinates. Input must be in single channel format.
- *
- * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input.
- * @param[in] stride    Stride to access the bottom-left and bottom-right pixel values
- * @param[in] dx        Pixel's distance between the X real coordinate and the smallest X following integer
- * @param[in] dy        Pixel's distance between the Y real coordinate and the smallest Y following integer
- *
- * @note dx and dy must be in the range [0, 1.0]
- *
- * @return The bilinear interpolated pixel value
- */
-template <typename T>
-inline T delta_bilinear_c1(const T *pixel_ptr, size_t stride, float dx, float dy)
-{
-    ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
-
-    const float dx1 = 1.0f - dx;
-    const float dy1 = 1.0f - dy;
-
-    const T a00 = *pixel_ptr;
-    const T a01 = *(pixel_ptr + 1);
-    const T a10 = *(pixel_ptr + stride);
-    const T a11 = *(pixel_ptr + stride + 1);
-
-    const float w1 = dx1 * dy1;
-    const float w2 = dx * dy1;
-    const float w3 = dx1 * dy;
-    const float w4 = dx * dy;
-
-    return static_cast<T>(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4);
-}
-
-/** Computes bilinear interpolation for quantized input and output, using the pointer to the top-left pixel and the pixel's distance between
- * the real coordinates and the smallest following integer coordinates. Input must be QASYMM8 and in single channel format.
- *
- * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input.
- * @param[in] stride    Stride to access the bottom-left and bottom-right pixel values
- * @param[in] dx        Pixel's distance between the X real coordinate and the smallest X following integer
- * @param[in] dy        Pixel's distance between the Y real coordinate and the smallest Y following integer
- * @param[in] iq_info   Input QuantizationInfo
- * @param[in] oq_info   Output QuantizationInfo
- *
- * @note dx and dy must be in the range [0, 1.0]
- *
- * @return The bilinear interpolated pixel value
- */
-inline uint8_t delta_bilinear_c1_quantized(const uint8_t *pixel_ptr, size_t stride, float dx, float dy, UniformQuantizationInfo iq_info, UniformQuantizationInfo oq_info)
-{
-    ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
-
-    const float dx1 = 1.0f - dx;
-    const float dy1 = 1.0f - dy;
-
-    const float a00 = dequantize_qasymm8(*pixel_ptr, iq_info);
-    const float a01 = dequantize_qasymm8(*(pixel_ptr + 1), iq_info);
-    const float a10 = dequantize_qasymm8(*(pixel_ptr + stride), iq_info);
-    const float a11 = dequantize_qasymm8(*(pixel_ptr + stride + 1), iq_info);
-
-    const float w1  = dx1 * dy1;
-    const float w2  = dx * dy1;
-    const float w3  = dx1 * dy;
-    const float w4  = dx * dy;
-    float       res = a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4;
-    return static_cast<uint8_t>(quantize_qasymm8(res, oq_info));
-}
-
-/** Computes bilinear interpolation for quantized input and output, using the pointer to the top-left pixel and the pixel's distance between
- * the real coordinates and the smallest following integer coordinates. Input must be QASYMM8_SIGNED and in single channel format.
- *
- * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input.
- * @param[in] stride    Stride to access the bottom-left and bottom-right pixel values
- * @param[in] dx        Pixel's distance between the X real coordinate and the smallest X following integer
- * @param[in] dy        Pixel's distance between the Y real coordinate and the smallest Y following integer
- * @param[in] iq_info   Input QuantizationInfo
- * @param[in] oq_info   Output QuantizationInfo
- *
- * @note dx and dy must be in the range [0, 1.0]
- *
- * @return The bilinear interpolated pixel value
- */
-inline int8_t delta_bilinear_c1_quantized(const int8_t *pixel_ptr, size_t stride, float dx, float dy, UniformQuantizationInfo iq_info, UniformQuantizationInfo oq_info)
-{
-    ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
-
-    const float dx1 = 1.0f - dx;
-    const float dy1 = 1.0f - dy;
-
-    const float a00 = dequantize_qasymm8_signed(*pixel_ptr, iq_info);
-    const float a01 = dequantize_qasymm8_signed(*(pixel_ptr + 1), iq_info);
-    const float a10 = dequantize_qasymm8_signed(*(pixel_ptr + stride), iq_info);
-    const float a11 = dequantize_qasymm8_signed(*(pixel_ptr + stride + 1), iq_info);
-
-    const float w1  = dx1 * dy1;
-    const float w2  = dx * dy1;
-    const float w3  = dx1 * dy;
-    const float w4  = dx * dy;
-    float       res = a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4;
-    return static_cast<int8_t>(quantize_qasymm8_signed(res, oq_info));
-}
-
-/** Computes linear interpolation using the pointer to the top pixel and the pixel's distance between
- * the real coordinates and the smallest following integer coordinates. Input must be in single channel format.
- *
- * @param[in] pixel_ptr Pointer to the top pixel value of a single channel input.
- * @param[in] stride    Stride to access the bottom pixel value
- * @param[in] dy        Pixel's distance between the Y real coordinate and the smallest Y following integer
- *
- * @note dy must be in the range [0, 1.0]
- *
- * @return The linear interpolated pixel value
- */
-template <typename T>
-inline T delta_linear_c1_y(const T *pixel_ptr, size_t stride, float dy)
-{
-    ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
-
-    const float dy1 = 1.0f - dy;
-
-    const T a00 = *pixel_ptr;
-    const T a10 = *(pixel_ptr + stride);
-
-    const float w1 = dy1;
-    const float w3 = dy;
-
-    return static_cast<T>(a00 * w1 + a10 * w3);
-}
-/** Computes linear interpolation using the pointer to the left pixel and the pixel's distance between
- * the real coordinates and the smallest following integer coordinates. Input must be in single channel format.
- *
- * @param[in] pixel_ptr Pointer to the left pixel value of a single channel input.
- * @param[in] dx        Pixel's distance between the X real coordinate and the smallest X following integer
- *
- * @note dx must be in the range [0, 1.0]
- *
- * @return The linear interpolated pixel value
- */
-template <typename T>
-inline T delta_linear_c1_x(const T *pixel_ptr, float dx)
-{
-    ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
-
-    const T a00 = *pixel_ptr;
-    const T a01 = *(pixel_ptr + 1);
-
-    const float dx1 = 1.0f - dx;
-
-    const float w1 = dx1;
-    const float w2 = dx;
-
-    return static_cast<T>(a00 * w1 + a01 * w2);
-}
-/** Return the pixel at (x,y) using bilinear interpolation.
- *
- * @warning Only works if the iterator was created with an IImage
- *
- * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel input.
- * @param[in] stride          Stride in bytes of the image;
- * @param[in] x               X position of the wanted pixel
- * @param[in] y               Y position of the wanted pixel
- *
- * @return The pixel at (x, y) using bilinear interpolation.
- */
-template <typename T>
-inline T pixel_bilinear_c1(const T *first_pixel_ptr, size_t stride, float x, float y)
-{
-    ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr);
-
-    const int32_t xi = std::floor(x);
-    const int32_t yi = std::floor(y);
-
-    const float dx = x - xi;
-    const float dy = y - yi;
-
-    return delta_bilinear_c1(first_pixel_ptr + xi + yi * stride, stride, dx, dy);
-}
-
-/** Return the pixel at (x,y) using bilinear interpolation by clamping when out of borders. The image must be single channel input
- *
- * @warning Only works if the iterator was created with an IImage
- *
- * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel image.
- * @param[in] stride          Stride in bytes of the image
- * @param[in] width           Width of the image
- * @param[in] height          Height of the image
- * @param[in] x               X position of the wanted pixel
- * @param[in] y               Y position of the wanted pixel
- *
- * @return The pixel at (x, y) using bilinear interpolation.
- */
-template <typename T>
-inline uint8_t pixel_bilinear_c1_clamp(const T *first_pixel_ptr, size_t stride, size_t width, size_t height, float x, float y)
-{
-    ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr);
-
-    x = std::max(-1.f, std::min(x, static_cast<float>(width)));
-    y = std::max(-1.f, std::min(y, static_cast<float>(height)));
-
-    const float xi = std::floor(x);
-    const float yi = std::floor(y);
-
-    const float dx = x - xi;
-    const float dy = y - yi;
-
-    if(dx == 0.0f)
-    {
-        if(dy == 0.0f)
-        {
-            return static_cast<T>(first_pixel_ptr[static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride]);
-        }
-        return delta_linear_c1_y(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride, stride, dy);
-    }
-    if(dy == 0.0f)
-    {
-        return delta_linear_c1_x(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride, dx);
-    }
-    return delta_bilinear_c1(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride, stride, dx, dy);
-}
-
-/** Return the pixel at (x,y) using area interpolation by clamping when out of borders. The image must be single channel U8
- *
- * @note The interpolation area depends on the width and height ration of the input and output images
- * @note Currently average of the contributing pixels is calculated
- *
- * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image.
- * @param[in] stride          Stride in bytes of the image
- * @param[in] width           Width of the image
- * @param[in] height          Height of the image
- * @param[in] wr              Width ratio among the input image width and output image width.
- * @param[in] hr              Height ratio among the input image height and output image height.
- * @param[in] x               X position of the wanted pixel
- * @param[in] y               Y position of the wanted pixel
- *
- * @return The pixel at (x, y) using area interpolation.
- */
-inline uint8_t pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y);
-
 /** Iterator updated by @ref execute_window_loop for each window element */
 class Iterator
 {
@@ -421,179 +114,6 @@ private:
 template <typename L, typename... Ts>
 inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators);
 
-/** Update window and padding size for each of the access patterns.
- *
- * First the window size is reduced based on all access patterns that are not
- * allowed to modify the padding of the underlying tensor. Then the padding of
- * the remaining tensors is increased to match the window.
- *
- * @param[in] win      Window that is used by the kernel.
- * @param[in] patterns Access patterns used to calculate the final window and padding.
- *
- * @return True if the window has been changed. Changes to the padding do not
- *         influence the returned value.
- */
-template <typename... Ts>
-bool update_window_and_padding(Window &win, Ts &&... patterns)
-{
-    bool window_changed = false;
-
-    utility::for_each([&](const IAccessWindow & w)
-    {
-        window_changed |= w.update_window_if_needed(win);
-    },
-    patterns...);
-
-    bool padding_changed = false;
-
-    utility::for_each([&](IAccessWindow & w)
-    {
-        padding_changed |= w.update_padding_if_needed(win);
-    },
-    patterns...);
-
-    return window_changed;
-}
-
-/** Calculate the maximum window for a given tensor shape and border setting
- *
- * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created.
- * @param[in] steps        (Optional) Number of elements processed for each step.
- * @param[in] skip_border  (Optional) If true exclude the border region from the window.
- * @param[in] border_size  (Optional) Border size.
- *
- * @return The maximum window the kernel can be executed on.
- */
-Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize());
-
-/** Calculate the maximum window for a given tensor shape and border setting
- *
- * @param[in] info        Tensor info object defining the shape of the object for which the window is created.
- * @param[in] steps       (Optional) Number of elements processed for each step.
- * @param[in] skip_border (Optional) If true exclude the border region from the window.
- * @param[in] border_size (Optional) Border size.
- *
- * @return The maximum window the kernel can be executed on.
- */
-inline Window calculate_max_window(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize())
-{
-    return calculate_max_window(info.valid_region(), steps, skip_border, border_size);
-}
-
-/** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting
- *
- * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created.
- * @param[in] steps        (Optional) Number of elements processed for each step.
- * @param[in] skip_border  (Optional) If true exclude the border region from the window.
- * @param[in] border_size  (Optional) Border size. The border region will be excluded from the window.
- *
- * @return The maximum window the kernel can be executed on.
- */
-Window calculate_max_window_horizontal(const ValidRegion &valid_region, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize());
-
-/** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting
- *
- * @param[in] info        Tensor info object defining the shape of the object for which the window is created.
- * @param[in] steps       (Optional) Number of elements processed for each step.
- * @param[in] skip_border (Optional) If true exclude the border region from the window.
- * @param[in] border_size (Optional) Border size.
- *
- * @return The maximum window the kernel can be executed on.
- */
-inline Window calculate_max_window_horizontal(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize())
-{
-    return calculate_max_window_horizontal(info.valid_region(), steps, skip_border, border_size);
-}
-
-/** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border.
- *
- * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created.
- * @param[in] steps        (Optional) Number of elements processed for each step.
- * @param[in] border_size  (Optional) Border size. The border region will be included in the window.
- *
- * @return The maximum window the kernel can be executed on.
- */
-Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Steps &steps = Steps(), BorderSize border_size = BorderSize());
-
-/** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border.
- *
- * @param[in] info        Tensor info object defining the shape of the object for which the window is created.
- * @param[in] steps       (Optional) Number of elements processed for each step.
- * @param[in] border_size (Optional) Border size. The border region will be included in the window.
- *
- * @return The maximum window the kernel can be executed on.
- */
-inline Window calculate_max_enlarged_window(const ITensorInfo &info, const Steps &steps = Steps(), BorderSize border_size = BorderSize())
-{
-    return calculate_max_enlarged_window(info.valid_region(), steps, border_size);
-}
-
-/** Intersect multiple valid regions.
- *
- * @param[in] regions Valid regions.
- *
- * @return Intersection of all regions.
- */
-template <typename... Ts>
-ValidRegion intersect_valid_regions(const Ts &... regions)
-{
-    auto intersect = [](const ValidRegion & r1, const ValidRegion & r2) -> ValidRegion
-    {
-        ValidRegion region;
-
-        for(size_t d = 0; d < std::min(r1.anchor.num_dimensions(), r2.anchor.num_dimensions()); ++d)
-        {
-            region.anchor.set(d, std::max(r1.anchor[d], r2.anchor[d]));
-        }
-
-        for(size_t d = 0; d < std::min(r1.shape.num_dimensions(), r2.shape.num_dimensions()); ++d)
-        {
-            region.shape.set(d, std::min(r1.shape[d], r2.shape[d]));
-        }
-
-        return region;
-    };
-
-    return utility::foldl(intersect, regions...);
-}
-
-/** Create a strides object based on the provided strides and the tensor dimensions.
- *
- * @param[in] info          Tensor info object providing the shape of the tensor for unspecified strides.
- * @param[in] stride_x      Stride to be used in X dimension (in bytes).
- * @param[in] fixed_strides Strides to be used in higher dimensions starting at Y (in bytes).
- *
- * @return Strides object based on the specified strides. Missing strides are
- *         calculated based on the tensor shape and the strides of lower dimensions.
- */
-template <typename T, typename... Ts>
-inline Strides compute_strides(const ITensorInfo &info, T stride_x, Ts &&... fixed_strides)
-{
-    const TensorShape &shape = info.tensor_shape();
-
-    // Create strides object
-    Strides strides(stride_x, fixed_strides...);
-
-    for(size_t i = 1 + sizeof...(Ts); i < info.num_dimensions(); ++i)
-    {
-        strides.set(i, shape[i - 1] * strides[i - 1]);
-    }
-
-    return strides;
-}
-
-/** Create a strides object based on the tensor dimensions.
- *
- * @param[in] info Tensor info object used to compute the strides.
- *
- * @return Strides object based on element size and tensor shape.
- */
-template <typename... Ts>
-inline Strides compute_strides(const ITensorInfo &info)
-{
-    return compute_strides(info, info.element_size());
-}
-
 /** Permutes given Dimensions according to a permutation vector
  *
  * @warning Validity of permutation is not checked
@@ -629,79 +149,6 @@ inline void permute(TensorShape &shape, const PermutationVector &perm)
     }
 }
 
-/** Auto initialize the tensor info (shape, number of channels and data type) if the current assignment is empty.
- *
- * @param[in,out] info              Tensor info used to check and assign.
- * @param[in]     shape             New shape.
- * @param[in]     num_channels      New number of channels.
- * @param[in]     data_type         New data type
- * @param[in]     quantization_info (Optional) New quantization info
- *
- * @return True if the tensor info has been initialized
- */
-bool auto_init_if_empty(ITensorInfo       &info,
-                        const TensorShape &shape,
-                        int num_channels, DataType data_type,
-                        QuantizationInfo quantization_info = QuantizationInfo());
-
-/** Auto initialize the tensor info using another tensor info.
- *
- * @param info_sink   Tensor info used to check and assign
- * @param info_source Tensor info used to assign
- *
- * @return True if the tensor info has been initialized
- */
-bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_source);
-
-/** Set the shape to the specified value if the current assignment is empty.
- *
- * @param[in,out] info  Tensor info used to check and assign.
- * @param[in]     shape New shape.
- *
- * @return True if the shape has been changed.
- */
-bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape);
-
-/** Set the format, data type and number of channels to the specified value if
- * the current data type is unknown.
- *
- * @param[in,out] info   Tensor info used to check and assign.
- * @param[in]     format New format.
- *
- * @return True if the format has been changed.
- */
-bool set_format_if_unknown(ITensorInfo &info, Format format);
-
-/** Set the data type and number of channels to the specified value if
- * the current data type is unknown.
- *
- * @param[in,out] info      Tensor info used to check and assign.
- * @param[in]     data_type New data type.
- *
- * @return True if the data type has been changed.
- */
-bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type);
-
-/** Set the data layout to the specified value if
- * the current data layout is unknown.
- *
- * @param[in,out] info        Tensor info used to check and assign.
- * @param[in]     data_layout New data layout.
- *
- * @return True if the data type has been changed.
- */
-bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout);
-
-/** Set the quantization info to the specified value if
- * the current quantization info is empty and the data type of asymmetric quantized type
- *
- * @param[in,out] info              Tensor info used to check and assign.
- * @param[in]     quantization_info Quantization info
- *
- * @return True if the quantization info has been changed.
- */
-bool set_quantization_info_if_empty(ITensorInfo &info, QuantizationInfo quantization_info);
-
 /** Helper function to calculate the Valid Region for Scale.
  *
  * @param[in] src_info           Input tensor info used to check.
@@ -751,21 +198,6 @@ inline size_t get_data_layout_dimension_index(const DataLayout data_layout, cons
  */
 inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout data_layout, const size_t index);
 
-/** Calculate the normalization dimension index for a given normalization type
- *
- * @param[in] layout Data layout of the input and output tensor
- * @param[in] info   Normalization info
- *
- * @return Normalization dimension index
- */
-inline unsigned int get_normalization_dimension_index(DataLayout layout, const NormalizationLayerInfo &info)
-{
-    const unsigned int width_idx   = get_data_layout_dimension_index(layout, DataLayoutDimension::WIDTH);
-    const unsigned int channel_idx = get_data_layout_dimension_index(layout, DataLayoutDimension::CHANNEL);
-
-    return info.is_in_map() ? width_idx : channel_idx;
-}
-
 /** Calculate the number of output tiles required by Winograd Convolution layer. This utility function can be used by the Winograd input transform
  *  to know the number of tiles on the x and y direction
  *
@@ -814,49 +246,6 @@ inline Coordinates &convert_negative_axis(Coordinates &coords, int max_value)
     }
     return coords;
 }
-
-/** Given an integer value, this function returns the next power of two
- *
- * @param[in] x Input value
- *
- * @return the next power of two
- */
-inline unsigned int get_next_power_two(unsigned int x)
-{
-    // Decrement by 1
-    x--;
-
-    // Shift right by 1
-    x |= x >> 1u;
-    // Shift right by 2
-    x |= x >> 2u;
-    // Shift right by 4
-    x |= x >> 4u;
-    // Shift right by 8
-    x |= x >> 8u;
-    // Shift right by 16
-    x |= x >> 16u;
-
-    // Increment by 1
-    x++;
-
-    return x;
-}
-
-/** Given a softmax axis, this function returns the permutation vector required to put the axis to the front
- *
- * @note This function assumes a tensor rank <= 4
- *
- * Axis selects the dimension on which softmax is performed.
- * E.g. For input of shape 4x5x6 and axis=1, softmax will be applied to 4x6=24 vectors of size 5.
- * Interally softmax kernels is always performed on the first dimension (front dimension), therefore permutation is
- * required to put the dimension specified by @p axis to the first dimension.
- *
- * @param[in] axis Axis on which to perform softmax. Supported: 1, 2, 3 (0 implies no permutation needed)
- *
- * @return the permutation vector
- */
-PermutationVector get_permutation_vector_from_softmax_axis(size_t axis);
 } // namespace arm_compute
 
 #include "arm_compute/core/Helpers.inl"
diff --git a/arm_compute/core/Helpers.inl b/arm_compute/core/Helpers.inl
index 5613e8c74e..a960876074 100644
--- a/arm_compute/core/Helpers.inl
+++ b/arm_compute/core/Helpers.inl
@@ -22,58 +22,12 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
 
 #include <cmath>
 #include <numeric>
 
 namespace arm_compute
 {
-inline uint8_t pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y)
-{
-    ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr);
-
-    // Calculate sampling position
-    float in_x = (x + 0.5f) * wr - 0.5f;
-    float in_y = (y + 0.5f) * hr - 0.5f;
-
-    // Get bounding box offsets
-    int x_from = std::floor(x * wr - 0.5f - in_x);
-    int y_from = std::floor(y * hr - 0.5f - in_y);
-    int x_to   = std::ceil((x + 1) * wr - 0.5f - in_x);
-    int y_to   = std::ceil((y + 1) * hr - 0.5f - in_y);
-
-    // Clamp position to borders
-    in_x = std::max(-1.f, std::min(in_x, static_cast<float>(width)));
-    in_y = std::max(-1.f, std::min(in_y, static_cast<float>(height)));
-
-    // Clamp bounding box offsets to borders
-    x_from = ((in_x + x_from) < -1) ? -1 : x_from;
-    y_from = ((in_y + y_from) < -1) ? -1 : y_from;
-    x_to   = ((in_x + x_to) > width) ? (width - in_x) : x_to;
-    y_to   = ((in_y + y_to) > height) ? (height - in_y) : y_to;
-
-    // Get pixel index
-    const int xi = std::floor(in_x);
-    const int yi = std::floor(in_y);
-
-    // Bounding box elements in each dimension
-    const int x_elements = (x_to - x_from + 1);
-    const int y_elements = (y_to - y_from + 1);
-    ARM_COMPUTE_ERROR_ON(x_elements == 0 || y_elements == 0);
-
-    // Sum pixels in area
-    int sum = 0;
-    for(int j = yi + y_from, je = yi + y_to; j <= je; ++j)
-    {
-        const uint8_t *ptr = first_pixel_ptr + j * stride + xi + x_from;
-        sum                = std::accumulate(ptr, ptr + x_elements, sum);
-    }
-
-    // Return average
-    return sum / (x_elements * y_elements);
-}
-
 template <size_t dimension>
 struct IncrementIterators
 {
@@ -199,94 +153,6 @@ inline void Iterator::reset(const size_t dimension)
     }
 }
 
-inline bool auto_init_if_empty(ITensorInfo       &info,
-                               const TensorShape &shape,
-                               int                num_channels,
-                               DataType           data_type,
-                               QuantizationInfo   quantization_info)
-{
-    if(info.tensor_shape().total_size() == 0)
-    {
-        info.set_data_type(data_type);
-        info.set_num_channels(num_channels);
-        info.set_tensor_shape(shape);
-        info.set_quantization_info(quantization_info);
-        return true;
-    }
-
-    return false;
-}
-
-inline bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_source)
-{
-    if(info_sink.tensor_shape().total_size() == 0)
-    {
-        info_sink.set_data_type(info_source.data_type());
-        info_sink.set_num_channels(info_source.num_channels());
-        info_sink.set_tensor_shape(info_source.tensor_shape());
-        info_sink.set_quantization_info(info_source.quantization_info());
-        info_sink.set_data_layout(info_source.data_layout());
-        return true;
-    }
-
-    return false;
-}
-
-inline bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape)
-{
-    if(info.tensor_shape().total_size() == 0)
-    {
-        info.set_tensor_shape(shape);
-        return true;
-    }
-
-    return false;
-}
-
-inline bool set_format_if_unknown(ITensorInfo &info, Format format)
-{
-    if(info.data_type() == DataType::UNKNOWN)
-    {
-        info.set_format(format);
-        return true;
-    }
-
-    return false;
-}
-
-inline bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type)
-{
-    if(info.data_type() == DataType::UNKNOWN)
-    {
-        info.set_data_type(data_type);
-        return true;
-    }
-
-    return false;
-}
-
-inline bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout)
-{
-    if(info.data_layout() == DataLayout::UNKNOWN)
-    {
-        info.set_data_layout(data_layout);
-        return true;
-    }
-
-    return false;
-}
-
-inline bool set_quantization_info_if_empty(ITensorInfo &info, QuantizationInfo quantization_info)
-{
-    if(info.quantization_info().empty() && (is_data_type_quantized_asymmetric(info.data_type())))
-    {
-        info.set_quantization_info(quantization_info);
-        return true;
-    }
-
-    return false;
-}
-
 inline Coordinates index2coords(const TensorShape &shape, int index)
 {
     int num_elements = shape.total_size();
diff --git a/arm_compute/core/ITensorInfo.h b/arm_compute/core/ITensorInfo.h
index c5f0949196..3eb7239460 100644
--- a/arm_compute/core/ITensorInfo.h
+++ b/arm_compute/core/ITensorInfo.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,8 +29,8 @@
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/Utils.h"
-#include "arm_compute/core/utils/misc/ICloneable.h"
 #include "arm_compute/core/utils/misc/Utility.h"
+#include "support/ICloneable.h"
 
 #include <cstddef>
 
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
index 335a70fc2b..eba1737a03 100644
--- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
@@ -26,6 +26,7 @@
 
 #include "arm_compute/core/NEON/INEKernel.h"
 #include "arm_compute/core/utils/misc/Traits.h"
+#include "support/Requires.h"
 
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 #include <arm_neon.h>
diff --git a/arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h b/arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h
deleted file mode 100644
index 74161e330e..0000000000
--- a/arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_INEGEMMWRAPPERKERNEL_H
-#define ARM_COMPUTE_INEGEMMWRAPPERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Common interface for all the arm_gemm Gemms
- */
-class INEGEMMWrapperKernel : public INEKernel
-{
-public:
-    /** Parameters defining the dimensions of the matrices being multiplied */
-    struct Params
-    {
-        unsigned int M{ 0 };       /**< Rows in output matrix C (and input matrix A). */
-        unsigned int N{ 0 };       /**< Columns in output matrix C (and input matrix B). */
-        unsigned int K{ 0 };       /**< Columns of input matrix A (= rows of input matrix B). */
-        unsigned int batches{ 0 }; /**< Number of "batched" GEMMs (unique A and C, shared B). */
-        unsigned int multis{ 0 };  /**< Number of "multi" GEMMs (unique A, B and C). */
-    };
-
-    static Params extract_parameters(const ITensor *a, const ITensor *b, const ITensor *c, const GEMMInfo &gemm_info);
-
-    /** Constructor */
-    INEGEMMWrapperKernel();
-    /** Prevent instances of this class from being copied */
-    INEGEMMWrapperKernel(const INEGEMMWrapperKernel &) = delete;
-    /** Prevent instances of this class from being copied */
-    INEGEMMWrapperKernel &operator=(const INEGEMMWrapperKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    INEGEMMWrapperKernel(INEGEMMWrapperKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    INEGEMMWrapperKernel &operator=(INEGEMMWrapperKernel &&) = default;
-    /** Initialise the kernel's input and output.
-     *
-     * @note The input and output tensor must have the same dimensions
-     *
-     * @param[in]  a         Input tensor (Matrix A)
-     * @param[in]  b         Input tensor (Matrix B)
-     * @param[out] c         Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
-     * @param[in]  alpha     Scalar multiplier to apply to AB matrix product.
-     * @param[in]  beta      Scalar multiplier to apply to input C matrix before adding product.
-     * @param[in]  gemm_info GEMM meta-data
-     */
-    void configure(const ITensor *a, const ITensor *b, ITensor *c, float alpha, float beta, const GEMMInfo &gemm_info);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-protected:
-    /** Called as part of configure() after _a, _b, _c and _params have been set.
-     *
-     * @param[in] alpha Scalar multiplier to apply to AB matrix product.
-     * @param[in] beta  Scalar multiplier to apply to input C matrix before adding product.
-     *
-     * @return A 3D execution window.
-     */
-    virtual Window configure_internal(float alpha, float beta) = 0;
-
-    /** Run the kernel from the start to the end offset in window.
-     *
-     * @param[in] window       Window to use for the iteration
-     * @param[in] start_offset Where to start iterating from (In Window coordinates)
-     * @param[in] end_offset   Where to stop iterating (In Window coordinates).
-     * @param[in] info         Info about executing thread and CPU.
-     */
-    virtual void run_internal(const Window &window, const Coordinates &start_offset, const Coordinates &end_offset, const ThreadInfo &info) = 0;
-
-    const ITensor *_a;
-    const ITensor *_b;
-    ITensor       *_c;
-    Params         _params;
-    GEMMInfo       _gemm_info;
-
-private:
-    Window      _window3d;
-    TensorShape _window_shape;
-};
-
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_INEGEMMRAPPERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h b/arm_compute/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h
deleted file mode 100644
index 7c10f85824..0000000000
--- a/arm_compute/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H
-#define ARM_COMPUTE_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-
-#include "arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** This class is a wrapper for the depthwise convolution assembly kernels.  */
-class NEDepthwiseConvolutionAssemblyKernelWrapper final : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEDepthwiseConvolutionAssemblyKernelWrapper";
-    }
-
-    /** Default constructor */
-    NEDepthwiseConvolutionAssemblyKernelWrapper()
-        : _kernel(nullptr)
-    {
-    }
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDepthwiseConvolutionAssemblyKernelWrapper(const NEDepthwiseConvolutionAssemblyKernelWrapper &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDepthwiseConvolutionAssemblyKernelWrapper &operator=(const NEDepthwiseConvolutionAssemblyKernelWrapper &) = delete;
-    /** Default Move Constructor. */
-    NEDepthwiseConvolutionAssemblyKernelWrapper(NEDepthwiseConvolutionAssemblyKernelWrapper &&) = default;
-    /** Default move assignment operator */
-    NEDepthwiseConvolutionAssemblyKernelWrapper &operator=(NEDepthwiseConvolutionAssemblyKernelWrapper &&) = default;
-
-    /** Initialise the kernel's input and output.
-     *
-     * @param[in] kernel Pointer to an assembly kernel implementation.
-     */
-    void configure(depthwise::IDepthwiseConvolution *kernel)
-    {
-        ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(kernel)));
-        _kernel = kernel;
-        Window win;
-        win.set(Window::DimX, Window::Dimension(0, _kernel->get_window(), 1));
-        INEKernel::configure(win);
-    }
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override
-    {
-        ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(_kernel)));
-        ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-        auto first = window.x().start();
-        auto last  = window.x().end();
-        _kernel->run(first, last, info.thread_id);
-    }
-
-private:
-    depthwise::IDepthwiseConvolution *_kernel;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/convolution/common/activation.hpp b/arm_compute/core/NEON/kernels/convolution/common/activation.hpp
deleted file mode 100644
index 0c9b7c1368..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/activation.hpp
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-
-namespace neon_convolution_kernels
-{
-
-enum class ActivationFunction
-{
-  None,
-  ReLU,
-  ReLU6,
-};
-
-}
diff --git a/arm_compute/core/NEON/kernels/convolution/common/alloc.hpp b/arm_compute/core/NEON/kernels/convolution/common/alloc.hpp
deleted file mode 100644
index 7be3cdaaf5..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/alloc.hpp
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-
-#ifdef ALLOC_ALIGN
-#define ALLOCATE(x) aligned_alloc(ALLOC_ALIGN, x)
-#else
-#define ALLOCATE(x) malloc(x)
-#endif
diff --git a/arm_compute/core/NEON/kernels/convolution/common/arm.hpp b/arm_compute/core/NEON/kernels/convolution/common/arm.hpp
deleted file mode 100644
index b19bf98252..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/arm.hpp
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/** Sets the macro __arm_any__ if compiling for Aarch32 or Aarch64.
- *  Includes `arm_neon.h` if compiling for either architecture.
- */
-
-#ifdef __arm__
-#define __arm_any__
-#endif  // __arm__
-
-#ifdef __aarch64__
-#define __arm_any__
-#endif  // __aarch64__
-
-#ifdef __arm_any__
-#include <arm_neon.h>
-#endif  // __arm_any__
diff --git a/arm_compute/core/NEON/kernels/convolution/common/convolution.hpp b/arm_compute/core/NEON/kernels/convolution/common/convolution.hpp
deleted file mode 100644
index b1413527c3..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/convolution.hpp
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-
-enum PaddingType {
-  PADDING_SAME, PADDING_VALID
-};
diff --git a/arm_compute/core/NEON/kernels/convolution/common/padding.hpp b/arm_compute/core/NEON/kernels/convolution/common/padding.hpp
deleted file mode 100644
index b6f95872c0..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/padding.hpp
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-
-#include <cstddef>
-
-// Utilities for copying tensor tiles and adding/removing padding.
-namespace padding
-{
-
-/* Copy a tile and apply padding to the output copy.
- */
-template <typename T>
-void copy_and_pad_tile(
-  unsigned int tile_rows,
-  unsigned int tile_cols,
-  unsigned int n_channels,
-  const T *inptr,
-  unsigned int in_row_stride,
-  unsigned int in_col_stride,
-  T* outptr,
-  unsigned int out_row_stride,
-  unsigned int out_col_stride,
-  unsigned int pad_top,
-  unsigned int pad_left,
-  unsigned int pad_bottom,
-  unsigned int pad_right,
-  T pad_value=static_cast<T>(0)
-);
-
-/** Copy a tile and remove padding elements in the output.
- */
-template <unsigned int TileRows, unsigned int TileCols>
-class CopyCropped
-{
-  public:
-    static void execute(
-      size_t size,  // Amount of data to copy
-      const void *inptr,
-      size_t in_row_stride,
-      size_t in_col_stride,
-      void *outptr,
-      size_t out_row_stride,
-      size_t out_col_stride,
-      unsigned int pad_top,
-      unsigned int pad_left,
-      unsigned int pad_bottom,
-      unsigned int pad_right
-    );
-};
-
-template <typename T>
-void crop_and_copy_tile(
-  unsigned int tile_rows,
-  unsigned int tile_cols,
-  unsigned int n_channels,
-  const T *inptr,
-  unsigned int in_row_stride,
-  unsigned int in_col_stride,
-  T *outptr,
-  unsigned int out_row_stride,
-  unsigned int out_col_stride,
-  unsigned int crop_top,
-  unsigned int crop_left,
-  unsigned int crop_bottom,
-  unsigned int crop_right
-);
-
-}
diff --git a/arm_compute/core/NEON/kernels/convolution/common/perf.h b/arm_compute/core/NEON/kernels/convolution/common/perf.h
deleted file mode 100644
index fbae4dcdfa..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/perf.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#pragma once
-
-/* Prototypes from perf.c */
-
-void start_counter(int fd);
-long long get_counter(int fd);
-long long stop_counter(int fd);
-int open_instruction_counter(void);
-int open_cycle_counter(void);
diff --git a/arm_compute/core/NEON/kernels/convolution/common/qasymm8.hpp b/arm_compute/core/NEON/kernels/convolution/common/qasymm8.hpp
deleted file mode 100644
index 88ef7327c0..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/qasymm8.hpp
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-#include <cstdint>
-
-namespace qasymm8
-{
-
-struct QAsymm8Params
-{
-  uint8_t quantize(float value) const;
-  float dequantize(uint8_t value) const;
-
-  uint8_t offset;
-  float scale;
-};
-
-struct QAsymm8RescaleParams
-{
-  static QAsymm8RescaleParams make_rescale_params(
-    const QAsymm8Params& weight_quant,
-    const QAsymm8Params& input_quant,
-    const QAsymm8Params& output_quant
-  );
-
-  QAsymm8RescaleParams(int32_t shift, int32_t multiplier, float rescale);
-
-  const int32_t shift, multiplier;
-  const float rescale;
-};
-
-}
diff --git a/arm_compute/core/NEON/kernels/convolution/common/qsymm8.hpp b/arm_compute/core/NEON/kernels/convolution/common/qsymm8.hpp
deleted file mode 100644
index 726a02ccfd..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/qsymm8.hpp
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-#include <cstdint>
-#include <vector>
-#include "qasymm8.hpp"
-
-
-namespace qsymm8 {
-
-struct QSymm8Params {
-  int8_t quantize(float value) const;
-  float dequantize(int8_t value) const;
-
-  float scale;
-};
-
-struct QSymm8RescaleParams {
-  static QSymm8RescaleParams
-  make_rescale_params(const QSymm8Params &weight_quant,
-                      const QSymm8Params &input_quant,
-                      const QSymm8Params &output_quant);
-
-  QSymm8RescaleParams(int32_t shift, int32_t multiplier, float rescale);
-
-  const int32_t shift, multiplier;
-  const float rescale;
-};
-
-struct QSymm8PerChannelParams {
-  int8_t quantize(float value, float scale) const;
-  float dequantize(int8_t value, float scale) const;
-
-  std::vector<float> scales;
-};
-
-struct QSymm8PerChannelRescaleParams {
-  static QSymm8PerChannelRescaleParams
-  make_rescale_params(const QSymm8PerChannelParams &weight_quant,
-                      const QSymm8PerChannelParams &input_quant,
-                      const QSymm8PerChannelParams &output_quant);
-
-  static QSymm8PerChannelRescaleParams
-  make_rescale_params(const QSymm8PerChannelParams &weight_quant,
-                      const qasymm8::QAsymm8Params &input_quant,
-                      const qasymm8::QAsymm8Params &output_quant);
-
-  QSymm8PerChannelRescaleParams(std::vector<int32_t>& shift, std::vector<int32_t>& multiplier, std::vector<float>& rescale);
-
-  std::vector<int32_t>  shifts, multipliers;
-  std::vector<float> rescales;
-};
-
-} // namespace qsymm8
diff --git a/arm_compute/core/NEON/kernels/convolution/common/shims.hpp b/arm_compute/core/NEON/kernels/convolution/common/shims.hpp
deleted file mode 100644
index 310bd47b82..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/shims.hpp
+++ /dev/null
@@ -1,749 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-#ifndef DOXYGEN_SKIP_THIS
-#include <cstdint>
-#endif /* DOXYGEN_SKIP_THIS */
-#include "arm.hpp"
-
-namespace reorder {
-/** Re-order a tensor from NCHW format to NHWC.
- *
- * @note The stride parameters are optional and are provided to allow padding in either input or output tensors.
- *
- * @param[in] in Input tensor in NCHW format.
- * @param[out] out Output tensor, to be written in NHWC format.
- * @param n_batches Number of batches in the tensors.
- * @param n_channels Number of channels in the tensors
- * @param n_rows Height of the tensor
- * @param n_cols Width of the tensor
- * @param in_batch_stride Stride over batches in the input tensor. If `0` defaults to `n_channels * in_channel_stride`.
- * @param in_channel_stride Stride over channels in the input tensor. If `0` defaults to `n_rows * in_row_stride`.
- * @param in_row_stride Stride over rows in the input tensor. If `0` defaults to `n_cols`.
- * @param out_batch_stride Stride over batches in the output tensor. If `0` defaults to `n_rows * out_row_stride`.
- * @param out_row_stride Stride over rows in the output tensor. If `0` defaults to `n_cols * out_col_stride`.
- * @param out_col_stride Stride over columns in the output tensor. If `0` defaults to `n_channels`.
- */
-template <typename T>
-inline void nchw_to_nhwc(
-  const T* const in,
-  T* const out,
-  const int n_batches,
-  const int n_channels,
-  const int n_rows,
-  const int n_cols,
-  int in_batch_stride=0,
-  int in_channel_stride=0,
-  int in_row_stride=0,
-  int out_batch_stride=0,
-  int out_row_stride=0,
-  int out_col_stride=0
-);
-
-/** Re-order a tensor from NHWC format to NCHW.
- *
- * @note The stride parameters are optional and are provided to allow padding in either input or output tensors.
- *
- * @param[in] in Input tensor in NHWC format.
- * @param[out] out Output tensor, to be written in NCHW format.
- * @param n_batches Number of batches in the tensors.
- * @param n_rows Height of the tensor
- * @param n_cols Width of the tensor
- * @param n_channels Number of channels in the tensors
- * @param in_batch_stride Stride over batches in the input tensor. If `0` defaults to `n_rows * in_row_stride`.
- * @param in_row_stride Stride over rows in the input tensor. If `0` defaults to `n_cols * in_col_stride`.
- * @param in_col_stride Stride over columns in the input tensor. If `0` defaults to `n_channels`.
- * @param out_batch_stride Stride over batches in the output tensor. If `0` defaults to `n_channels * out_channel_stride`.
- * @param out_channel_stride Stride over channels in the output tensor. If `0` defaults to `n_rows * out_row_stride`.
- * @param out_row_stride Stride over rows in the output tensor. If `0` defaults to `n_cols`.
- */
-template <typename T>
-inline void nhwc_to_nchw(
-  const T* const in,  // Input data in NHWC form
-  T* const out,       // Output data in NCHW form
-  const int n_batches,
-  const int n_rows,
-  const int n_cols,
-  const int n_channels,
-  int in_batch_stride=0,
-  int in_row_stride=0,
-  int in_col_stride=0,
-  int out_batch_stride=0,
-  int out_channel_stride=0,
-  int out_row_stride=0
-);
-
-/** Re-order a weight tensor from [Output feature map x Input feature map x
- *  Height x Width] format to [Height x Width x Input feature map x Output
- *  feature map] format.
- */
-template <typename T>
-inline void ofm_ifm_h_w_to_h_w_ifm_ofm(
-  const T* const in,  // Input in [Output x Input x Height x Width] form
-  T* const out,       // Output in [Height x Width x Input x Output] form
-  const int n_output_feature_maps,
-  const int n_input_feature_maps,
-  const int n_rows,
-  const int n_cols,
-  int in_output_feature_map_stride=0,
-  int in_input_feature_map_stride=0,
-  int in_row_stride=0,
-  int out_row_stride=0,
-  int out_col_stride=0,
-  int out_input_feature_map_stride=0
-);
-
-/** Re-order a weight tensor from [Height x Width x Input feature map x Output
- *  feature map] format to [Output feature map x Input feature map x Height x
- *  Width] format.
- */
-template <typename T>
-inline void h_w_ifm_ofm_to_ofm_ifm_h_w(
-  const T* const in,  // Input in [Height x Width x Input x Output] form
-  T* const out,       // Output in [Output x Input x Height x Width] form
-  const int n_rows,
-  const int n_cols,
-  const int n_input_feature_maps,
-  const int n_output_feature_maps,
-  int in_row_stride=0,
-  int in_col_stride=0,
-  int in_input_feature_map_stride=0,
-  int out_output_feature_map_stride=0,
-  int out_input_feature_map_stride=0,
-  int out_row_stride=0
-);
-
-/*****************************************************************************/
-/* 32-bit implementation : NCHW -> NHWC
- */
-template <>
-inline void nchw_to_nhwc(
-  const int32_t* const in,
-  int32_t* const out,
-  const int n_batches,
-  const int n_channels,
-  const int n_rows,
-  const int n_cols,
-  int in_batch_stride,
-  int in_channel_stride,
-  int in_row_stride,
-  int out_batch_stride,
-  int out_row_stride,
-  int out_col_stride
-)
-{
-  typedef int32_t T;
-
-  // Fill in the stride values
-  in_row_stride = (in_row_stride) ? in_row_stride : n_cols;
-  in_channel_stride = (in_channel_stride) ? in_channel_stride
-                                          : n_rows * in_row_stride;
-  in_batch_stride = (in_batch_stride) ? in_batch_stride
-                                      : n_channels * in_channel_stride;
-
-  out_col_stride = (out_col_stride) ? out_col_stride : n_channels;
-  out_row_stride = (out_row_stride) ? out_row_stride : n_cols * out_col_stride;
-  out_batch_stride = (out_batch_stride) ? out_batch_stride
-                                        : n_rows * out_row_stride;
-
-  // Perform the re-ordering
-  for (int n = 0; n < n_batches; n++)
-  {
-    const T* const in_batch = in + n*in_batch_stride;
-    T* const out_batch = out + n*out_batch_stride;
-
-    for (int i = 0; i < n_rows; i++)
-    {
-      const T* const in_row = in_batch + i*in_row_stride;
-      T* const out_row = out_batch + i*out_row_stride;
-
-      int j = 0, j_remaining = n_cols;
-#ifdef __arm_any__
-      for (; j_remaining >= 4; j += 4, j_remaining -= 4)
-      {
-        int c = 0, c_remaining = n_channels;
-        for (; c_remaining >= 4; c += 4, c_remaining -= 4)
-        {
-          // Read 4 channels worth of 4 columns, then zip to produce 4 columns
-          // worth of 4 channels.
-          int32x4_t channel_pixels[4];
-          channel_pixels[0] = vld1q_s32(in_row + (c + 0)*in_channel_stride + j);
-          channel_pixels[1] = vld1q_s32(in_row + (c + 1)*in_channel_stride + j);
-          channel_pixels[2] = vld1q_s32(in_row + (c + 2)*in_channel_stride + j);
-          channel_pixels[3] = vld1q_s32(in_row + (c + 3)*in_channel_stride + j);
-
-          const auto zip1 = vzipq_s32(channel_pixels[0], channel_pixels[2]);
-          const auto zip2 = vzipq_s32(channel_pixels[1], channel_pixels[3]);
-          const auto out_0 = vzipq_s32(zip1.val[0], zip2.val[0]);
-          const auto out_1 = vzipq_s32(zip1.val[1], zip2.val[1]);
-
-          vst1q_s32(out_row + (j + 0)*out_col_stride + c, out_0.val[0]);
-          vst1q_s32(out_row + (j + 1)*out_col_stride + c, out_0.val[1]);
-          vst1q_s32(out_row + (j + 2)*out_col_stride + c, out_1.val[0]);
-          vst1q_s32(out_row + (j + 3)*out_col_stride + c, out_1.val[1]);
-        }
-        for (; c_remaining; c++, c_remaining--)
-        {
-          for (int _j = 0; _j < 4; _j++)
-          {
-            const T* const in_col = in_row + j + _j;
-            T* const out_col = out_row + (j + _j)*out_col_stride;
-            const T* const in_channel = in_col + c*in_channel_stride;
-            out_col[c] = *(in_channel);
-          }
-        }
-      }
-      for (; j_remaining >= 2; j += 2, j_remaining -= 2)
-      {
-        int c = 0, c_remaining = n_channels;
-        for (; c_remaining >= 2; c += 2, c_remaining -= 2)
-        {
-          // Read 2 channels worth of 2 columns, then zip to produce 2 columns
-          // worth of 2 channels.
-          int32x2_t channel_pixels[2];
-          channel_pixels[0] = vld1_s32(in_row + (c + 0)*in_channel_stride + j);
-          channel_pixels[1] = vld1_s32(in_row + (c + 1)*in_channel_stride + j);
-
-          const auto output = vzip_s32(channel_pixels[0], channel_pixels[1]);
-
-          vst1_s32(out_row + (j + 0)*out_col_stride + c, output.val[0]);
-          vst1_s32(out_row + (j + 1)*out_col_stride + c, output.val[1]);
-        }
-        for (; c_remaining; c++, c_remaining--)
-        {
-          for (int _j = 0; _j < 2; _j++)
-          {
-            const T* const in_col = in_row + j + _j;
-            T* const out_col = out_row + (j + _j)*out_col_stride;
-            const T* const in_channel = in_col + c*in_channel_stride;
-            out_col[c] = *(in_channel);
-          }
-        }
-      }
-#endif  // __arm_any__
-      for (; j_remaining; j++, j_remaining--)
-      {
-        const T* const in_col = in_row + j;
-        T* const out_col = out_row + j*out_col_stride;
-
-        for (int c = 0; c < n_channels; c++)
-        {
-          const T* const in_channel = in_col + c*in_channel_stride;
-          out_col[c] = *(in_channel);
-        }
-      }
-    }
-  }
-}
-
-template <>
-inline void nchw_to_nhwc(
-  const uint32_t* const in,
-  uint32_t* const out,
-  const int n_batches,
-  const int n_channels,
-  const int n_rows,
-  const int n_cols,
-  int in_batch_stride,
-  int in_channel_stride,
-  int in_row_stride,
-  int out_batch_stride,
-  int out_row_stride,
-  int out_col_stride
-)
-{
-  nchw_to_nhwc(
-    reinterpret_cast<const int32_t*>(in),
-    reinterpret_cast<int32_t*>(out),
-    n_batches, n_channels, n_rows, n_cols,
-    in_batch_stride, in_channel_stride, in_row_stride,
-    out_batch_stride, out_row_stride, out_col_stride
-  );
-}
-
-template <>
-inline void nchw_to_nhwc(
-  const float* const in,
-  float* const out,
-  const int n_batches,
-  const int n_channels,
-  const int n_rows,
-  const int n_cols,
-  int in_batch_stride,
-  int in_channel_stride,
-  int in_row_stride,
-  int out_batch_stride,
-  int out_row_stride,
-  int out_col_stride
-)
-{
-  nchw_to_nhwc(
-    reinterpret_cast<const int32_t*>(in),
-    reinterpret_cast<int32_t*>(out),
-    n_batches, n_channels, n_rows, n_cols,
-    in_batch_stride, in_channel_stride, in_row_stride,
-    out_batch_stride, out_row_stride, out_col_stride
-  );
-}
-
-/*****************************************************************************/
-/* Generic implementation : NCHW -> NHWC
- */
-template <typename T>
-inline void nchw_to_nhwc(
-  const T* const in,
-  T* const out,
-  const int n_batches,
-  const int n_channels,
-  const int n_rows,
-  const int n_cols,
-  int in_batch_stride,
-  int in_channel_stride,
-  int in_row_stride,
-  int out_batch_stride,
-  int out_row_stride,
-  int out_col_stride
-)
-{
-  // Fill in the stride values
-  in_row_stride = (in_row_stride) ? in_row_stride : n_cols;
-  in_channel_stride = (in_channel_stride) ? in_channel_stride
-                                          : n_rows * in_row_stride;
-  in_batch_stride = (in_batch_stride) ? in_batch_stride
-                                      : n_channels * in_channel_stride;
-
-  out_col_stride = (out_col_stride) ? out_col_stride : n_channels;
-  out_row_stride = (out_row_stride) ? out_row_stride : n_cols * out_col_stride;
-  out_batch_stride = (out_batch_stride) ? out_batch_stride
-                                        : n_rows * out_row_stride;
-
-  // Perform the re-ordering
-  for (int n = 0; n < n_batches; n++)
-  {
-    const T* const in_batch = in + n*in_batch_stride;
-    T* const out_batch = out + n*out_batch_stride;
-
-    for (int i = 0; i < n_rows; i++)
-    {
-      const T* const in_row = in_batch + i*in_row_stride;
-      T* const out_row = out_batch + i*out_row_stride;
-
-      for (int j = 0; j < n_cols; j++)
-      {
-        const T* const in_col = in_row + j;
-        T* const out_col = out_row + j*out_col_stride;
-
-        for (int c = 0; c < n_channels; c++)
-        {
-          const T* const in_channel = in_col + c*in_channel_stride;
-          out_col[c] = *(in_channel);
-        }
-      }
-    }
-  }
-}
-
-/*****************************************************************************/
-/* 32-bit implementation : NHWC -> NCHW
- */
-template <>
-inline void nhwc_to_nchw(
-  const int32_t* const in,  // Input data in NHWC form
-  int32_t* const out,       // Output data in NCHW form
-  const int n_batches,
-  const int n_rows,
-  const int n_cols,
-  const int n_channels,
-  int in_batch_stride,
-  int in_row_stride,
-  int in_col_stride,
-  int out_batch_stride,
-  int out_channel_stride,
-  int out_row_stride
-)
-{
-  typedef int32_t T;
-
-  // Fill in stride values
-  in_col_stride = (in_col_stride) ? in_col_stride : n_channels;
-  in_row_stride = (in_row_stride) ? in_row_stride : n_cols * in_col_stride;
-  in_batch_stride = (in_batch_stride) ? in_batch_stride
-                                      : n_rows * in_row_stride;
-
-  out_row_stride = (out_row_stride) ? out_row_stride : n_cols;
-  out_channel_stride = (out_channel_stride) ? out_channel_stride
-                                            : n_rows * out_row_stride;
-  out_batch_stride = (out_batch_stride) ? out_batch_stride
-                                        : n_channels * out_channel_stride;
-
-  // Perform the re-ordering
-  // For every batch
-  for (int n = 0; n < n_batches; n++)
-  {
-    const T* const in_batch = in + n*in_batch_stride;
-    T* const out_batch = out + n*out_batch_stride;
-
-    // For every row
-    for (int i = 0; i < n_rows; i++)
-    {
-      const T* const in_i = in_batch + i*in_row_stride;
-      T* const out_i = out_batch + i*out_row_stride;
-
-      // For every column, beginning with chunks of 4
-      int j = 0, j_remaining = n_cols;
-#ifdef __arm_any__
-      for (; j_remaining >= 4; j += 4, j_remaining -=4)
-      {
-        // For every channel, beginning with chunks of 4
-        int c = 0, c_remaining = n_channels;
-        for (; c_remaining >= 4; c += 4, c_remaining -= 4)
-        {
-          // Read 4 columns worth of 4 channels then zip to produce 4 channels
-          // worth of 4 columns.
-          int32x4_t pixel_channels[4];
-          pixel_channels[0] = vld1q_s32(in_i + (j + 0)*in_col_stride + c);
-          pixel_channels[1] = vld1q_s32(in_i + (j + 1)*in_col_stride + c);
-          pixel_channels[2] = vld1q_s32(in_i + (j + 2)*in_col_stride + c);
-          pixel_channels[3] = vld1q_s32(in_i + (j + 3)*in_col_stride + c);
-
-          const auto zip1 = vzipq_s32(pixel_channels[0], pixel_channels[2]);
-          const auto zip2 = vzipq_s32(pixel_channels[1], pixel_channels[3]);
-          const auto out_0 = vzipq_s32(zip1.val[0], zip2.val[0]);
-          const auto out_1 = vzipq_s32(zip1.val[1], zip2.val[1]);
-
-          vst1q_s32(out_i + j + (c + 0)*out_channel_stride, out_0.val[0]);
-          vst1q_s32(out_i + j + (c + 1)*out_channel_stride, out_0.val[1]);
-          vst1q_s32(out_i + j + (c + 2)*out_channel_stride, out_1.val[0]);
-          vst1q_s32(out_i + j + (c + 3)*out_channel_stride, out_1.val[1]);
-        }
-        for (; c_remaining; c++, c_remaining--)
-        {
-          for (int _j = 0; _j < 4; _j++)
-          {
-            const T* const in_j = in_i + (j + _j)*in_col_stride;
-            T* const out_j = out_i + (j + _j);
-
-            const T* const in_channel = in_j + c;
-            T* const out_channel = out_j + c*out_channel_stride;
-            *(out_channel) = *(in_channel);
-          }
-        }
-      }
-      for (; j_remaining >= 2; j += 2, j_remaining -=2)
-      {
-        int c = 0, c_remaining = n_channels;
-        for (; c_remaining >= 2; c += 2, c_remaining -= 2)
-        {
-          // Read 2 columns worth of 2 channels then zip to produce 2 channels
-          // worth of 2 columns.
-          int32x2_t pixel_channels[2];
-          pixel_channels[0] = vld1_s32(in_i + (j + 0)*in_col_stride + c);
-          pixel_channels[1] = vld1_s32(in_i + (j + 1)*in_col_stride + c);
-
-          const auto output = vzip_s32(pixel_channels[0], pixel_channels[1]);
-
-          vst1_s32(out_i + j + (c + 0)*out_channel_stride, output.val[0]);
-          vst1_s32(out_i + j + (c + 1)*out_channel_stride, output.val[1]);
-        }
-        for (; c_remaining; c++, c_remaining--)
-        {
-          for (int _j = 0; _j < 2; _j++)
-          {
-            const T* const in_j = in_i + (j + _j)*in_col_stride;
-            T* const out_j = out_i + (j + _j);
-
-            const T* const in_channel = in_j + c;
-            T* const out_channel = out_j + c*out_channel_stride;
-            *(out_channel) = *(in_channel);
-          }
-        }
-      }
-#endif  // __arm_any__
-      for (; j_remaining; j++, j_remaining--)
-      {
-        const T* const in_j = in_i + j*in_col_stride;
-        T* const out_j = out_i + j;
-
-        // For every channel
-        for (int c = 0; c < n_channels; c++)
-        {
-          const T* const in_channel = in_j + c;
-          T* const out_channel = out_j + c*out_channel_stride;
-          *(out_channel) = *(in_channel);
-        }
-      }
-    }
-  }
-}
-
-template <>
-inline void nhwc_to_nchw(
-  const uint32_t* const in,  // Input data in NHWC form
-  uint32_t* const out,       // Output data in NCHW form
-  const int n_batches,
-  const int n_rows,
-  const int n_cols,
-  const int n_channels,
-  int in_batch_stride,
-  int in_row_stride,
-  int in_col_stride,
-  int out_batch_stride,
-  int out_channel_stride,
-  int out_row_stride
-)
-{
-  // Redirect to generic 32-bit implementation
-  nhwc_to_nchw(
-    reinterpret_cast<const int32_t*>(in),
-    reinterpret_cast<int32_t*>(out),
-    n_batches, n_rows, n_cols, n_channels,
-    in_batch_stride, in_row_stride, in_col_stride,
-    out_batch_stride, out_channel_stride, out_row_stride
-  );
-}
-
-template <>
-inline void nhwc_to_nchw(
-  const float* const in,  // Input data in NHWC form
-  float* const out,       // Output data in NCHW form
-  const int n_batches,
-  const int n_rows,
-  const int n_cols,
-  const int n_channels,
-  int in_batch_stride,
-  int in_row_stride,
-  int in_col_stride,
-  int out_batch_stride,
-  int out_channel_stride,
-  int out_row_stride
-)
-{
-  // Redirect to generic 32-bit implementation
-  nhwc_to_nchw(
-    reinterpret_cast<const int32_t*>(in),
-    reinterpret_cast<int32_t*>(out),
-    n_batches, n_rows, n_cols, n_channels,
-    in_batch_stride, in_row_stride, in_col_stride,
-    out_batch_stride, out_channel_stride, out_row_stride
-  );
-}
-
-/*****************************************************************************/
-/* Generic implementation : NHWC -> NCHW
- */
-template <typename T>
-inline void nhwc_to_nchw(
-  const T* const in,  // Input data in NHWC form
-  T* const out,       // Output data in NCHW form
-  const int n_batches,
-  const int n_rows,
-  const int n_cols,
-  const int n_channels,
-  int in_batch_stride,
-  int in_row_stride,
-  int in_col_stride,
-  int out_batch_stride,
-  int out_channel_stride,
-  int out_row_stride
-)
-{
-  // Fill in stride values
-  in_col_stride = (in_col_stride) ? in_col_stride : n_channels;
-  in_row_stride = (in_row_stride) ? in_row_stride : n_cols * in_col_stride;
-  in_batch_stride = (in_batch_stride) ? in_batch_stride
-                                      : n_rows * in_row_stride;
-
-  out_row_stride = (out_row_stride) ? out_row_stride : n_cols;
-  out_channel_stride = (out_channel_stride) ? out_channel_stride
-                                            : n_rows * out_row_stride;
-  out_batch_stride = (out_batch_stride) ? out_batch_stride
-                                        : n_channels * out_channel_stride;
-
-  // Perform the re-ordering
-  // For every batch
-  for (int n = 0; n < n_batches; n++)
-  {
-    const T* const in_batch = in + n*in_batch_stride;
-    T* const out_batch = out + n*out_batch_stride;
-
-    // For every row
-    for (int i = 0; i < n_rows; i++)
-    {
-      const T* const in_i = in_batch + i*in_row_stride;
-      T* const out_i = out_batch + i*out_row_stride;
-
-      // For every column
-      for (int j = 0; j < n_cols; j++)
-      {
-        const T* const in_j = in_i + j*in_col_stride;
-        T* const out_j = out_i + j;
-
-        // For every channel
-        for (int c = 0; c < n_channels; c++)
-        {
-          const T* const in_channel = in_j + c;
-          T* const out_channel = out_j + c*out_channel_stride;
-          *(out_channel) = *(in_channel);
-        }
-      }
-    }
-  }
-}
-
-/*****************************************************************************/
-/* Generic weight re-order implementation.
- */
-template <typename T>
-inline void ofm_ifm_h_w_to_h_w_ifm_ofm(
-  const T* const in,  // Input in [Output x Input x Height x Width] form
-  T* const out,       // Output in [Height x Width x Input x Output] form
-  const int n_output_feature_maps,
-  const int n_input_feature_maps,
-  const int n_rows,
-  const int n_cols,
-  int in_output_feature_map_stride,
-  int in_input_feature_map_stride,
-  int in_row_stride,
-  int out_row_stride,
-  int out_col_stride,
-  int out_input_feature_map_stride
-)
-{
-  // Fill in stride values
-  in_row_stride = (in_row_stride)
-    ? in_row_stride
-    : n_cols;
-  in_input_feature_map_stride = (in_input_feature_map_stride)
-    ? in_input_feature_map_stride
-    : n_rows * in_row_stride;
-  in_output_feature_map_stride = (in_output_feature_map_stride)
-    ? in_output_feature_map_stride
-    : n_input_feature_maps * in_input_feature_map_stride;
-
-  out_input_feature_map_stride = (out_input_feature_map_stride)
-    ? out_input_feature_map_stride
-    : n_output_feature_maps;
-  out_col_stride = (out_col_stride)
-    ? out_col_stride
-    : n_input_feature_maps * out_input_feature_map_stride;
-  out_row_stride = (out_row_stride)
-    ? out_row_stride
-    : n_cols * out_col_stride;
-
-  // Perform the re-ordering
-  for (int i = 0; i < n_rows; i++)
-  {
-    const T* const in_row = in + i * in_row_stride;
-    T* out_row = out + i * out_row_stride;
-
-    for (int j = 0; j < n_cols; j++)
-    {
-      const T* const in_col = in_row + j;
-      T* const out_col = out_row + j * out_col_stride;
-
-      for (int ifm = 0; ifm < n_input_feature_maps; ifm++)
-      {
-        const T* const in_ifm = in_col + ifm * in_input_feature_map_stride;
-        T* const out_ifm = out_col + ifm * out_input_feature_map_stride;
-
-        for (int ofm = 0; ofm < n_output_feature_maps; ofm++)
-        {
-          const T* const in_ofm = in_ifm + ofm * in_output_feature_map_stride;
-          T* const out_ofm = out_ifm + ofm;
-          *(out_ofm) = *(in_ofm);
-        }
-      }
-    }
-  }
-}
-
-/*****************************************************************************/
-/* Generic weight re-order implementation.
- */
-template <typename T>
-inline void h_w_ifm_ofm_to_ofm_ifm_h_w(
-  const T* const in,  // Input in [Height x Width x Input x Output] form
-  T* const out,       // Output in [Output x Input x Height x Width] form
-  const int n_rows,
-  const int n_cols,
-  const int n_input_feature_maps,
-  const int n_output_feature_maps,
-  int in_row_stride,
-  int in_col_stride,
-  int in_input_feature_map_stride,
-  int out_output_feature_map_stride,
-  int out_input_feature_map_stride,
-  int out_row_stride
-)
-{
-  // Fill in the stride values
-  in_input_feature_map_stride = (in_input_feature_map_stride)
-    ? in_input_feature_map_stride
-    : n_output_feature_maps;
-  in_col_stride = (in_col_stride)
-    ? in_col_stride
-    : n_input_feature_maps * in_input_feature_map_stride;
-  in_row_stride = (in_row_stride)
-    ? in_row_stride
-    : n_cols * in_col_stride;
-
-  out_row_stride = (out_row_stride)
-    ? out_row_stride
-    : n_cols;
-  out_input_feature_map_stride = (out_input_feature_map_stride)
-    ? out_input_feature_map_stride
-    : n_rows * out_row_stride;
-  out_output_feature_map_stride = (out_output_feature_map_stride)
-    ? out_output_feature_map_stride
-    : n_input_feature_maps * out_input_feature_map_stride;
-
-  // Perform the re-ordering
-  for (int i = 0; i < n_rows; i++)
-  {
-    const T* const in_row = in + i * in_row_stride;
-    T* const out_row = out + i * out_row_stride;
-
-    for (int j = 0; j < n_cols; j++)
-    {
-      const T* const in_col = in_row + j * in_col_stride;
-      T* const out_col = out_row + j;
-
-      for (int ifm = 0; ifm < n_input_feature_maps; ifm++)
-      {
-        const T* const in_ifm = in_col + ifm * in_input_feature_map_stride;
-        T* const out_ifm = out_col + ifm * out_input_feature_map_stride;
-
-        for (int ofm = 0; ofm < n_output_feature_maps; ofm++)
-        {
-          const T* const in_ofm = in_ifm + ofm;
-          T* const out_ofm = out_ifm + ofm * out_output_feature_map_stride;
-          *(out_ofm) = *(in_ofm);
-        }
-      }
-    }
-  }
-}
-
-}  // namespace reorder
diff --git a/arm_compute/core/NEON/kernels/convolution/common/tensor.hpp b/arm_compute/core/NEON/kernels/convolution/common/tensor.hpp
deleted file mode 100644
index 7738cdb349..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/tensor.hpp
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-#include <cstdlib>
-#include <random>
-
-#include "alloc.hpp"
-
-enum TensorOrder
-{
-  NHWC,  ///< [Batch x Height x Width x Channels]
-  NCHW,  ///< [Batch x Channels x Height x Width]
-};
-
-struct Tensor4DShape
-{
-  int n_batches, n_rows, n_cols, n_channels;
-  TensorOrder ordering;
-
-  // Create a new tensor with the default (NHWC) ordering
-  inline Tensor4DShape(
-    const int n_batches,
-    const int n_rows,
-    const int n_cols,
-    const int n_channels,
-    const TensorOrder ordering=NHWC
-  ) : n_batches(n_batches),
-      n_rows(n_rows),
-      n_cols(n_cols),
-      n_channels(n_channels),
-      ordering(ordering)
-  {
-  }
-
-  inline int index(const int n, const int i, const int j, const int c) const
-  {
-    if (this->ordering == NHWC)
-    {
-      return ((n*this->n_rows + i)*this->n_cols + j)*this->n_channels + c;
-    }
-    else  // NCHW
-    {
-      return ((n*this->n_channels + c)*this->n_rows + i)*this->n_cols + j;
-    }
-  }
-
-  inline int size() const
-  {
-    return n_batches * n_rows * n_cols * n_channels;
-  }
-
-  inline bool TestEq(const Tensor4DShape& other) const
-  {
-    return (n_batches == other.n_batches &&
-            n_rows == other.n_rows &&
-            n_cols == other.n_cols &&
-            n_channels == other.n_channels);
-  }
-};
-
-
-enum WeightOrder
-{
-  HWIO,  ///< [Height x Width x Input channels x Output channels]
-  OIHW,  ///< [Output channels x Input channels x Height x Width]
-};
-
-struct KernelShape
-{
-  int n_output_channels, n_rows, n_cols, n_input_channels;
-  WeightOrder ordering;
-
-  inline KernelShape(
-    const int n_output_channels,
-    const int n_rows,
-    const int n_cols,
-    const int n_input_channels,
-    const WeightOrder ordering=HWIO
-  ) : n_output_channels(n_output_channels),
-      n_rows(n_rows),
-      n_cols(n_cols),
-      n_input_channels(n_input_channels),
-      ordering(ordering)
-  {
-  }
-
-  inline int index(int oc, int i, int j, int ic) const
-  {
-    if (this->ordering == HWIO)
-    {
-      return ((i*this->n_cols + j)*this->n_input_channels + ic)*this->n_output_channels + oc;
-    }
-    else  // OIHW
-    {
-      return ((oc*this->n_input_channels + ic)*this->n_rows + i)*this->n_cols + j;
-    }
-  }
-
-  inline int size(void) const
-  {
-    return n_output_channels * n_rows * n_cols * n_input_channels;
-  }
-};
-
-
-template <typename ShapeT, typename T>
-class Tensor4D final
-{
-  public:
-    Tensor4D(ShapeT shape) :
-      shape(shape),
-      _data(reinterpret_cast<T*>(ALLOCATE(size_bytes())))
-    {
-        Clear();
-    }
-
-    Tensor4D(const Tensor4D<ShapeT, T>&) = delete;
-    Tensor4D operator=(const Tensor4D<ShapeT, T>&) = delete;
-
-    ~Tensor4D() {
-      free(_data);
-    }
-
-    inline T* ptr() const {
-      return _data;
-    }
-
-    inline size_t size_bytes() const {
-      return shape.size() * sizeof(T);
-    }
-
-    /* Extract an element of the tensor.
-     *
-     * If the shape is a Tensor4DShape then the index is given as batch, row,
-     * column and channel.  If the shape is a KernelShape then the index is
-     * given as output channel, row, column and input channel.
-     */
-    inline T& element(const int a, const int b, const int c, const int d) const
-    {
-      return _data[shape.index(a, b, c, d)];
-    }
-
-    inline void Clear() {
-      Fill(static_cast<T>(0));
-    }
-
-    inline void Fill(T val) {
-      for (int i = 0; i < shape.size(); i++)
-        _data[i] = val;
-    }
-
-    const ShapeT shape;
-
-  private:
-    T* const _data;
-};
diff --git a/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp b/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp
deleted file mode 100644
index 82619f4799..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-#include "tensor.hpp"
-
-// Methods to print tensors and weights
-void PrintTensor(const Tensor4D<Tensor4DShape, float>& tensor);
-void PrintWeights(const Tensor4D<KernelShape, float>& weights);
-
-// Test the equivalence of two tensors
-// Counts the instances that |a - b|/|a| > max_err
-bool CmpTensors(
-  const Tensor4D<Tensor4DShape, float>& a,
-  const Tensor4D<Tensor4DShape, float>& b,
-  const float max_err=0.0f
-);
-
-// Fill the tensor with a test pattern
-void TestPattern(Tensor4D<Tensor4DShape, float>& tensor);
-void TestPattern(Tensor4D<KernelShape, float>& weights);
-
-// Fill the tensor with random values
-void Randomise(Tensor4D<Tensor4DShape, float>& tensor, const int seed=0);
-void Randomise(Tensor4D<KernelShape, float>& weights, const int seed=0);
diff --git a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp b/arm_compute/core/NEON/kernels/convolution/common/utils.hpp
deleted file mode 100644
index b7a9517c65..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2017-2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-
-#include <limits>
-
-void PrintMatrix(const float *const m, const int M, const int N, const int row_stride);
-
-constexpr inline int iceildiv(const int a, const int b)
-{
-    return (a + b - 1) / b;
-}
-
-template <typename T>
-inline T roundup(const T a, const T b)
-{
-    return b * iceildiv(a, b);
-}
-
-template<typename T>
-struct TypeBounds
-{
-    static constexpr T lower() noexcept { return std::numeric_limits<T>::has_infinity
-                                                 ? -std::numeric_limits<T>::infinity()
-                                                 : std::numeric_limits<T>::lowest(); };
-    static constexpr T upper() noexcept { return std::numeric_limits<T>::has_infinity
-                                                 ? std::numeric_limits<T>::infinity()
-                                                 : std::numeric_limits<T>::max(); };
-};
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-template<>
-struct TypeBounds<__fp16>
-{
-    static constexpr __fp16 lower() noexcept { return -std::numeric_limits<float>::infinity(); };
-    static constexpr __fp16 upper() noexcept { return std::numeric_limits<float>::infinity(); }
-};
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
deleted file mode 100644
index 70d6689731..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
+++ /dev/null
@@ -1,551 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-
-#include <arm_neon.h>
-#include "activation.hpp"
-#include "padding.hpp"
-
-namespace depthwise
-{
-
-namespace nck = neon_convolution_kernels;
-
-class IDepthwiseConvolution
-{
-  public:
-    virtual ~IDepthwiseConvolution() = default;
-
-    virtual int output_size(
-      int dim_size,
-      unsigned int padding_before,
-      unsigned int padding_after
-    ) const = 0;
-
-    /* Set input tensor and stride. */
-    virtual void set_input(const void *inptr) = 0;
-    virtual void set_input(const void *inptr, int column_stride) = 0;
-    virtual void set_input(const void *inptr, int row_stride, int column_stride) = 0;
-    virtual void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) = 0;
-
-    /* Set output tensor and stride. */
-    virtual void set_output(void *outptr) = 0;
-    virtual void set_output(void *outptr, int column_stride) = 0;
-    virtual void set_output(void *outptr, int row_stride, int column_stride) = 0;
-    virtual void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) = 0;
-
-    /* Weights and biases are re-ordered to improve memory access patterns. Use
-     * these methods to determine the size of the re-pack buffer and to set the
-     * address (and implicitly reorder the weights and biases into) the buffer.
-     */
-    virtual size_t get_packed_params_size(void) const = 0;
-    virtual void set_packed_params_buffer(void *) = 0;
-
-    virtual void pack_params(const void *weights, const void *biases=nullptr) const = 0;
-    virtual void pack_params(void *buffer, const void *weights, const void *biases=nullptr) const = 0;
-    virtual void pack_params(
-      void *buffer,
-      const void* weights,
-      unsigned int weight_row_stride,
-      unsigned int weight_col_stride,
-      const void *biases=nullptr
-    ) const = 0;
-
-    /* Working space is used to pad tensors on the fly. Before running any
-     * inference check the amount of space required, allocate and provide a
-     * pointer to the convolution engine.
-     */
-    virtual size_t get_working_space_size(unsigned int nthreads=1) const = 0;
-    virtual void set_working_space(void *) = 0;
-
-    virtual unsigned int get_window(void) const = 0;
-    virtual void run(
-      unsigned int start,
-      unsigned int stop,
-      unsigned int threadid=0
-    ) = 0;
-};
-
-template <
-  unsigned int OutputTileRows, unsigned int OutputTileCols,
-  unsigned int KernelRows, unsigned int KernelCols,
-  unsigned int StrideRows, unsigned int StrideCols,
-  typename TIn, typename TBias, typename TOut,
-  typename Derived
->
-class DepthwiseConvolutionBase : public IDepthwiseConvolution
-{
-  public:
-    // Information about the specific convolution instance
-    using InputType = TIn;
-    using BiasType = TBias;
-    using OutputType = TOut;
-    static constexpr int output_tile_rows = OutputTileRows;
-    static constexpr int output_tile_cols = OutputTileCols;
-    static constexpr int kernel_rows = KernelRows;
-    static constexpr int kernel_cols = KernelCols;
-    static constexpr int stride_rows = StrideRows;
-    static constexpr int stride_cols = StrideCols;
-    static constexpr int inner_tile_rows = stride_rows * (output_tile_rows - 1) + kernel_rows;
-    static constexpr int inner_tile_cols = stride_cols * (output_tile_cols - 1) + kernel_cols;
-
-    /** Create a new depthwise convolution engine.
-     *
-     * @param[in] n_batches Number of batches tensors.
-     * @param[in] n_input_rows Number of rows in input tensor.
-     * @param[in] n_input_cols Number of columns in input tensor.
-     * @param[in] n_channels Number of channels in input and output tensors.
-     */
-    DepthwiseConvolutionBase(
-      int n_batches, int n_input_rows, int n_input_cols, int n_channels,
-      nck::ActivationFunction activation,
-      unsigned int padding_top,
-      unsigned int padding_left,
-      unsigned int padding_bottom,
-      unsigned int padding_right
-    );
-
-    /** Create a new depthwise convolution engine.
-     *
-     * @param[in] n_batches Number of batches tensors.
-     * @param[in] n_input_rows Number of rows in input tensor.
-     * @param[in] n_input_cols Number of columns in input tensor.
-     * @param[in] n_channels Number of channels in input and output tensors.
-     */
-    DepthwiseConvolutionBase(
-      int n_batches, int n_input_rows, int n_input_cols, int n_channels,
-      int n_output_rows, int n_output_cols,
-      nck::ActivationFunction activation,
-      unsigned int padding_top,
-      unsigned int padding_left,
-      unsigned int padding_bottom,
-      unsigned int padding_right
-    );
-
-    // Cannot copy or move a DepthwiseConvolution.
-    DepthwiseConvolutionBase(DepthwiseConvolutionBase&) = delete;
-    DepthwiseConvolutionBase operator=(DepthwiseConvolutionBase&) = delete;
-
-    /* Set input tensor and stride. */
-    void set_input(const void *inptr) override;
-    void set_input(const void *inptr, int column_stride) override;
-    void set_input(const void *inptr, int row_stride, int column_stride) override;
-    void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) override;
-
-    /* Set output tensor and stride. */
-    void set_output(void *outptr) override;
-    void set_output(void *outptr, int column_stride) override;
-    void set_output(void *outptr, int row_stride, int column_stride) override;
-    void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) override;
-
-    /** Get the number of output rows/columns.
-     *
-     * @param[in] dim_size Number of elements in the dimension (rows/columns)
-     * @param[in] same_padding True if the padding is SAME, otherwise false.
-     */
-    static int get_output_size(
-      int dim_size, unsigned int padding_before, unsigned int padding_after
-    );
-
-    int output_size(
-      int dim_size, unsigned int padding_before, unsigned int padding_after
-    ) const override;
-
-    /* Determine how much memory is required to store the packed weights and
-     * biases.
-     */
-    size_t get_packed_params_size(void) const override;
-
-    /* Set the buffer for the packed weights and biases, and perform the
-     * packing.
-     */
-    void set_packed_params_buffer(void *buffer) override;
-
-    void pack_params(const void *weights, const void *biases=nullptr) const override;
-
-    void pack_params(
-      void *buffer,
-      const void *weights,
-      const void *biases=nullptr
-    ) const override;
-
-    void pack_params(
-      void *buffer,
-      const void *weights,
-      unsigned int weight_row_stride,
-      unsigned int weight_col_stride,
-      const void *biases=nullptr
-    ) const override;
-
-    /** Query the amount of working space required.
-     * @param[in] The largest number of threads which will be used to execute
-     *            the kernel.
-     */
-    size_t get_working_space_size(unsigned int n_threads=1) const override;
-
-    /** Set the working space buffer.
-     */
-    void set_working_space(void *buffer) override;
-
-    /** Get the window of work to be performed by an instance of the operator.
-     */
-    unsigned int get_window(void) const override;
-
-    /** Perform a portion of the work associated with the operator.
-     *
-     * Will perform the window of work described by $[start, stop)$.
-     *
-     * @param[in] start Start of the window of work to perform.
-     * @param[in] stop End of the work to perform.
-     * @param[in] ID of the thread performing the work.
-     */
-    void run(
-      unsigned int start,
-      unsigned int stop,
-      unsigned int threadid=0
-    ) override;
-
-  protected:
-    /** Get the value to use to pad the tensor.
-     */
-    TIn _input_padding_value(void) const;
-
-    /** Implementation of the parameter packing.
-     */
-    void _pack_params(
-      void *buffer,
-      const void *weights,
-      unsigned int weight_row_stride,
-      unsigned int weight_col_stride,
-      const void *biases=nullptr
-    ) const;
-
-    /** Process a tile-row of the tensors.
-     */
-    void process_tile_row(
-      unsigned int threadid,
-      int n_channels,
-      const void* packed_params,
-      const InputType* inptr,
-      OutputType* outptr,
-      int row_pad_in_top,
-      int row_pad_in_left,
-      int row_pad_in_bottom,
-      int row_pad_out_bottom,
-      int n_tiles,
-      int n_input_cols,
-      int n_output_cols
-    );
-
-    /** Process a single tile of the tensor.
-     *
-     * This method will apply input/output padding (if required) and call the
-     * depthwise tile implementation.
-     */
-    void process_tile(
-      unsigned int threadid,
-      int n_channels,
-      const void* packed_params,
-      const InputType* inptr,
-      OutputType* outptr,
-      int pad_in_top,
-      int pad_in_left,
-      int pad_in_bottom,
-      int pad_in_right,
-      int pad_out_bottom,
-      int pad_out_right
-    );
-
-    /** Perform depthwise convolution on a single tile.
-     */
-    template <nck::ActivationFunction Activation>
-    void execute_tile(
-      int n_channels,
-      const void* packed_params,
-      const InputType* inptr,
-      unsigned int in_row_stride,
-      unsigned int in_col_stride,
-      OutputType* outptr,
-      unsigned int out_row_stride,
-      unsigned int out_col_stride
-    );
-
-    template <nck::ActivationFunction Activation>
-    void execute_tile(
-      int n_channels,
-      const void* packed_params,
-      const InputType* inptrs[inner_tile_rows][inner_tile_cols],
-      OutputType* outptrs[output_tile_rows][output_tile_cols]
-    );
-
-    int n_channels(void) const;
-
-  private:
-    // Member variables of instances of a convolution engine.
-    const InputType* _input;
-    OutputType* _output;
-    void* _packed_parameters;
-    void* _working_space;  // Per-thread working space
-    const int _n_batches, _n_input_rows, _n_input_cols, _n_channels,
-              _n_output_rows, _n_output_cols, _n_tile_rows, _n_tile_cols;
-    const unsigned int _padding_top, _padding_left, _padding_bottom, _padding_right;
-    const nck::ActivationFunction _activation;
-
-    // Stride information for a convolution instance
-    int _input_col_stride, _input_row_stride, _input_batch_stride;
-    int _output_col_stride, _output_row_stride, _output_batch_stride;
-
-    // Methods for getting access to working space
-    size_t _get_input_working_space_size(void) const;
-    size_t _get_output_working_space_size(void) const;
-
-    void *_get_input_working_space(unsigned int threadid) const;
-    void *_get_output_working_space(unsigned int threadid) const;
-};
-
-
-template <
-  unsigned int OutputTileRows, unsigned int OutputTileCols,
-  unsigned int KernelRows, unsigned int KernelCols,
-  unsigned int StrideRows, unsigned int StrideCols,
-  typename TIn, typename TBias, typename TOut
->
-class DepthwiseConvolution : public DepthwiseConvolutionBase<
-  OutputTileRows, OutputTileCols,
-  KernelRows, KernelCols,
-  StrideRows, StrideCols,
-  TIn, TBias, TOut,
-  DepthwiseConvolution<
-    OutputTileRows, OutputTileCols,
-    KernelRows, KernelCols,
-    StrideRows, StrideCols,
-    TIn, TBias, TOut
-  >
->
-{
-  using Base = DepthwiseConvolutionBase<
-    OutputTileRows, OutputTileCols,
-    KernelRows, KernelCols,
-    StrideRows, StrideCols,
-    TIn, TBias, TOut,
-    DepthwiseConvolution<
-      OutputTileRows, OutputTileCols,
-      KernelRows, KernelCols,
-      StrideRows, StrideCols,
-      TIn, TBias, TOut
-  > >;
-  friend Base;
-  using InputType = typename Base::InputType;
-  using OutputType = typename Base::OutputType;
-
-  public:
-    using Base::DepthwiseConvolutionBase;
-
-  protected:
-    template <nck::ActivationFunction Activation>
-    void execute_tile(
-      int n_channels,
-      const void* packed_params,
-      const TIn* inptr,
-      unsigned int in_row_stride,
-      unsigned int in_col_stride,
-      TOut* outptr,
-      unsigned int out_row_stride,
-      unsigned int out_col_stride
-    );
-
-    template <nck::ActivationFunction Activation>
-    void execute_tile(
-      int n_channels,
-      const void* packed_params,
-      const InputType* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
-      OutputType* outptrs[Base::output_tile_rows][Base::output_tile_cols]
-    );
-};
-
-
-template <
-  unsigned int OutputTileRows, unsigned int OutputTileCols,
-  unsigned int KernelRows, unsigned int KernelCols,
-  unsigned int StrideRows, unsigned int StrideCols
->
-class DepthwiseConvolution<
-  OutputTileRows, OutputTileCols,
-  KernelRows, KernelCols,
-  StrideRows, StrideCols,
-  float, float, float
-> : public DepthwiseConvolutionBase<
-  OutputTileRows, OutputTileCols,
-  KernelRows, KernelCols,
-  StrideRows, StrideCols,
-  float, float, float,
-  DepthwiseConvolution<
-    OutputTileRows, OutputTileCols,
-    KernelRows, KernelCols,
-    StrideRows, StrideCols,
-    float, float, float
-  >
->
-{
-  using Base = DepthwiseConvolutionBase<
-    OutputTileRows, OutputTileCols,
-    KernelRows, KernelCols,
-    StrideRows, StrideCols,
-    float, float, float,
-    DepthwiseConvolution<
-      OutputTileRows, OutputTileCols,
-      KernelRows, KernelCols,
-      StrideRows, StrideCols,
-      float, float, float
-  > >;
-  friend Base;
-  using InputType = typename Base::InputType;
-  using OutputType = typename Base::OutputType;
-
-  public:
-    DepthwiseConvolution(
-      int n_batches, int n_input_rows, int n_input_cols, int n_channels,
-      nck::ActivationFunction activation,
-      unsigned int padding_top,
-      unsigned int padding_left,
-      unsigned int padding_bottom,
-      unsigned int padding_right
-    );
-
-    DepthwiseConvolution(
-      int n_batches, int n_input_rows, int n_input_cols, int n_channels,
-      int n_output_rows, int n_output_cols,
-      nck::ActivationFunction activation,
-      unsigned int padding_top,
-      unsigned int padding_left,
-      unsigned int padding_bottom,
-      unsigned int padding_right
-    );
-
-  protected:
-    template <nck::ActivationFunction Activation>
-    void execute_tile(
-      int n_channels,
-      const void* packed_params,
-      const float* inptr,
-      unsigned int in_row_stride,
-      unsigned int in_col_stride,
-      float* outptr,
-      unsigned int out_row_stride,
-      unsigned int out_col_stride
-    );
-
-    template <nck::ActivationFunction Activation>
-    void execute_tile(
-      int n_channels,
-      const void* packed_params,
-      const float* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
-      float* outptrs[Base::output_tile_rows][Base::output_tile_cols]
-    );
-};
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-template <
-  unsigned int OutputTileRows, unsigned int OutputTileCols,
-  unsigned int KernelRows, unsigned int KernelCols,
-  unsigned int StrideRows, unsigned int StrideCols
->
-class DepthwiseConvolution<
-  OutputTileRows, OutputTileCols,
-  KernelRows, KernelCols,
-  StrideRows, StrideCols,
-  float16_t, float16_t, float16_t
-> : public DepthwiseConvolutionBase<
-  OutputTileRows, OutputTileCols,
-  KernelRows, KernelCols,
-  StrideRows, StrideCols,
-  float16_t, float16_t, float16_t,
-  DepthwiseConvolution<
-    OutputTileRows, OutputTileCols,
-    KernelRows, KernelCols,
-    StrideRows, StrideCols,
-    float16_t, float16_t, float16_t
-  >
->
-{
-  using Base = DepthwiseConvolutionBase<
-    OutputTileRows, OutputTileCols,
-    KernelRows, KernelCols,
-    StrideRows, StrideCols,
-    float16_t, float16_t, float16_t,
-    DepthwiseConvolution<
-      OutputTileRows, OutputTileCols,
-      KernelRows, KernelCols,
-      StrideRows, StrideCols,
-      float16_t, float16_t, float16_t
-  > >;
-  friend Base;
-  using InputType = typename Base::InputType;
-  using OutputType = typename Base::OutputType;
-
-  public:
-    DepthwiseConvolution(
-      int n_batches, int n_input_rows, int n_input_cols, int n_channels,
-      nck::ActivationFunction activation,
-      unsigned int padding_top,
-      unsigned int padding_left,
-      unsigned int padding_bottom,
-      unsigned int padding_right
-    );
-
-    DepthwiseConvolution(
-      int n_batches, int n_input_rows, int n_input_cols, int n_channels,
-      int n_output_rows, int n_output_cols,
-      nck::ActivationFunction activation,
-      unsigned int padding_top,
-      unsigned int padding_left,
-      unsigned int padding_bottom,
-      unsigned int padding_right
-    );
-
-  protected:
-    template <nck::ActivationFunction Activation>
-    void execute_tile(
-      int n_channels,
-      const void* packed_params,
-      const float16_t* inptr,
-      unsigned int in_row_stride,
-      unsigned int in_col_stride,
-      float16_t* outptr,
-      unsigned int out_row_stride,
-      unsigned int out_col_stride
-    );
-
-    template <nck::ActivationFunction Activation>
-    void execute_tile(
-      int n_channels,
-      const void* packed_params,
-      const float16_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
-      float16_t* outptrs[Base::output_tile_rows][Base::output_tile_cols]
-    );
-};
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-}  // namespace depthwise
diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp
deleted file mode 100644
index 1bae815613..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-
-#include <deque>
-#include <functional>
-#include <memory>
-
-#include "depthwise.hpp"
-
-namespace depthwise
-{
-
-template <
-  unsigned int OutputTileRows, unsigned int OutputTileCols,
-  unsigned int KernelRows, unsigned int KernelCols,
-  unsigned int StrideRows, unsigned int StrideCols,
-  typename TIn, typename TBias, typename TOut
->
-class DilatedDepthwiseConvolution : public IDepthwiseConvolution
-{
-  public:
-    /** Create a new dilated depthwise convolution engine.
-     */
-    DilatedDepthwiseConvolution(
-      int n_batches, int n_input_rows, int n_input_cols, int n_channels,
-      int dilation_factor,
-      nck::ActivationFunction activation,
-      unsigned int padding_top,
-      unsigned int padding_left,
-      unsigned int padding_bottom,
-      unsigned int padding_right
-    );
-
-    /** Create a new dilated depthwise convolution engine.
-     */
-    DilatedDepthwiseConvolution(
-      int n_batches, int n_input_rows, int n_input_cols, int n_channels,
-      int dilation_factor, int n_output_rows, int n_output_cols,
-      nck::ActivationFunction activation,
-      unsigned int padding_top,
-      unsigned int padding_left,
-      unsigned int padding_bottom,
-      unsigned int padding_right
-    );
-
-    // Cannot copy or move a DilatedDepthwiseConvolution.
-    DilatedDepthwiseConvolution(DilatedDepthwiseConvolution&) = delete;
-    DilatedDepthwiseConvolution operator=(DilatedDepthwiseConvolution&) = delete;
-
-    /* Set input tensor and stride. */
-    void set_input(const void *inptr) override;
-    void set_input(const void *inptr, int column_stride) override;
-    void set_input(const void *inptr, int row_stride, int column_stride) override;
-    void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) override;
-
-    /* Set output tensor and stride. */
-    void set_output(void *outptr) override;
-    void set_output(void *outptr, int column_stride) override;
-    void set_output(void *outptr, int row_stride, int column_stride) override;
-    void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) override;
-
-    static int get_output_size(
-      int dim_size,
-      unsigned int padding_before,
-      unsigned int padding_after,
-      int dilation_factor
-    );
-
-    int output_size(
-      int dim_size, unsigned int padding_before, unsigned int padding_after
-    ) const override;
-
-    /* Weights and biases are re-ordered to improve memory access patterns. Use
-     * these methods to determine the size of the re-pack buffer and to set the
-     * address (and implicitly reorder the weights and biases into) the buffer.
-     */
-    size_t get_packed_params_size(void) const override;
-    void set_packed_params_buffer(void *) override;
-
-    void pack_params(const void *weights, const void *biases=nullptr) const override;
-    void pack_params(void *buffer, const void *weights, const void *biases=nullptr) const override;
-    void pack_params(
-      void *buffer,
-      const void* weights,
-      unsigned int weight_row_stride,
-      unsigned int weight_col_stride,
-      const void *biases=nullptr
-    ) const override;
-
-    /* Working space is used to pad tensors on the fly. Before running any
-     * inference check the amount of space required, allocate and provide a
-     * pointer to the convolution engine.
-     */
-    size_t get_working_space_size(unsigned int nthreads=1) const override;
-    void set_working_space(void *) override;
-
-    unsigned int get_window(void) const override;
-    void run(unsigned int start, unsigned int stop, unsigned int threadid=0) override;
-
-  protected:
-    /** Protected constructor which also accepts a function to construct a new
-     * subconvolution
-     */
-    DilatedDepthwiseConvolution(
-      int n_batches, int n_input_rows, int n_input_cols, int n_channels,
-      int dilation_factor, int n_output_rows, int n_output_cols,
-      nck::ActivationFunction activation,
-      unsigned int padding_top,
-      unsigned int padding_left,
-      unsigned int padding_bottom,
-      unsigned int padding_right,
-      std::function<IDepthwiseConvolution *(int, int, int, int, int, int, nck::ActivationFunction, unsigned int, unsigned int, unsigned int, unsigned int)> subconvfn
-    );
-
-    const int _dilation_factor;
-    const int _n_input_rows, _n_input_cols, _n_channels;
-    const int _padding_top, _padding_left;
-    const int _n_output_rows, _n_output_cols;
-
-    /* Dilated depthwise convolution is performed through repeated calls to
-     * non-dilated convolutions. If the dilation factor is $n$, then we perform
-     * $(n + 1)^2$ depthwise convolutions.
-     */
-    using BaseDepthwise = DepthwiseConvolution<
-      OutputTileRows, OutputTileCols,
-      KernelRows, KernelCols,
-      StrideRows, StrideCols,
-      TIn, TBias, TOut
-    >;
-    std::deque<std::deque<std::unique_ptr<IDepthwiseConvolution>>> _convs;
-};
-
-}  // namespace depthwise
diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp
deleted file mode 100644
index 4343f6ad45..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-#include "depthwise.hpp"
-#include "qasymm8.hpp"
-#include "qsymm8.hpp"
-#pragma once
-
-using namespace neon_convolution_kernels;
-using namespace qasymm8;
-
-inline int32x4_t saturating_doubling_high_mul(const int32x4_t& a, const int32x4_t& b)
-{
-  return vqrdmulhq_s32(a, b);
-}
-
-inline int32x4_t saturating_doubling_high_mul(const int32x4_t& a, const int32_t& b)
-{
-  return vqrdmulhq_n_s32(a, b);
-}
-
-inline int32_t saturating_doubling_high_mul(const int32_t& a, const int32_t& b)
-{
-  return vget_lane_s32(vqrdmulh_n_s32(vdup_n_s32(a), b), 0);
-}
-
-inline int32x4_t rounding_divide_by_exp2(const int32x4_t& x, const int32x4_t shift)
-{
-  const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift), 31);
-  const int32x4_t fixed = vqaddq_s32(x, fixup);
-  return vrshlq_s32(fixed, shift);
-}
-
-inline int32x4_t rounding_divide_by_exp2(const int32x4_t& x, const int exponent)
-{
-  const int32x4_t shift = vdupq_n_s32(-exponent);
-  const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift), 31);
-  const int32x4_t fixed = vqaddq_s32(x, fixup);
-  return vrshlq_s32(fixed, shift);
-}
-
-inline int32x2_t rounding_divide_by_exp2(const int32x2_t& x, const int exponent)
-{
-  const int32x2_t shift = vdup_n_s32(-exponent);
-  const int32x2_t fixup = vshr_n_s32(vand_s32(x, shift), 31);
-  const int32x2_t fixed = vqadd_s32(x, fixup);
-  return vrshl_s32(fixed, shift);
-}
-
-inline int32_t rounding_divide_by_exp2(const int32_t& x, const int exponent)
-{
-  const int32x2_t xs = vdup_n_s32(x);
-  return vget_lane_s32(rounding_divide_by_exp2(xs, exponent), 0);
-}
-
-namespace depthwise
-{
-
-namespace nck = neon_convolution_kernels;
-
-template <
-  unsigned int OutputTileRows, unsigned int OutputTileCols,
-  unsigned int KernelRows, unsigned int KernelCols,
-  unsigned int StrideRows, unsigned int StrideCols
->
-class QAsymm8DepthwiseConvolution : public DepthwiseConvolutionBase<
-  OutputTileRows, OutputTileCols,
-  KernelRows, KernelCols,
-  StrideRows, StrideCols,
-  uint8_t, int32_t, uint8_t,
-  QAsymm8DepthwiseConvolution<OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows, StrideCols>
->
-{
-  using Base = DepthwiseConvolutionBase<
-    OutputTileRows, OutputTileCols,
-    KernelRows, KernelCols,
-    StrideRows, StrideCols,
-    uint8_t, int32_t, uint8_t,
-    QAsymm8DepthwiseConvolution<OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows, StrideCols>
-  >;
-  friend Base;
-  using InputType = typename Base::InputType;
-  using OutputType = typename Base::OutputType;
-
-  public:
-    QAsymm8DepthwiseConvolution(
-      int n_batches, int n_input_rows, int n_input_cols, int n_channels,
-      nck::ActivationFunction activation,
-      const qasymm8::QAsymm8Params& weight_quantisation,
-      const qasymm8::QAsymm8Params& input_quantisation,
-      const qasymm8::QAsymm8Params& output_quantisation,
-      unsigned int padding_top,
-      unsigned int padding_left,
-      unsigned int padding_bottom,
-      unsigned int padding_right
-    );
-
-    QAsymm8DepthwiseConvolution(
-      int n_batches, int n_input_rows, int n_input_cols, int n_channels,
-      int n_output_rows, int n_output_cols,
-      nck::ActivationFunction activation,
-      const qasymm8::QAsymm8Params& weight_quantisation,
-      const qasymm8::QAsymm8Params& input_quantisation,
-      const qasymm8::QAsymm8Params& output_quantisation,
-      unsigned int padding_top,
-      unsigned int padding_left,
-      unsigned int padding_bottom,
-      unsigned int padding_right
-    );
-
-    QAsymm8DepthwiseConvolution(
-      int n_batches, int n_input_rows, int n_input_cols, int n_channels,
-      nck::ActivationFunction activation,
-      const qasymm8::QAsymm8Params& weight_quantisation,
-      const qasymm8::QAsymm8Params& input_quantisation,
-      const qasymm8::QAsymm8Params& output_quantisation,
-      const qasymm8::QAsymm8RescaleParams& rescale_parameters,
-      unsigned int padding_top,
-      unsigned int padding_left,
-      unsigned int padding_bottom,
-      unsigned int padding_right
-    );
-
-    QAsymm8DepthwiseConvolution(
-      int n_batches, int n_input_rows, int n_input_cols, int n_channels,
-      int n_output_rows, int n_output_cols,
-      nck::ActivationFunction activation,
-      const qasymm8::QAsymm8Params& weight_quantisation,
-      const qasymm8::QAsymm8Params& input_quantisation,
-      const qasymm8::QAsymm8Params& output_quantisation,
-      const qasymm8::QAsymm8RescaleParams& rescale_parameters,
-      unsigned int padding_top,
-      unsigned int padding_left,
-      unsigned int padding_bottom,
-      unsigned int padding_right
-    );
-
-  protected:
-    uint8_t _input_padding_value(void) const;
-
-    void _pack_params(
-      void *buffer,
-      const void *weights,
-      unsigned int weight_row_stride,
-      unsigned int weight_col_stride,
-      const void *biases=nullptr
-    ) const;
-
-    template <nck::ActivationFunction Activation>
-    void execute_tile(
-      int n_channels,
-      const void* packed_params,
-      const uint8_t* inptr,
-      unsigned int in_row_stride,
-      unsigned int in_col_stride,
-      uint8_t* outptr,
-      unsigned int out_row_stride,
-      unsigned int out_col_stride
-    );
-
-    template <nck::ActivationFunction Activation>
-    void execute_tile(
-      int n_channels,
-      const void* packed_params,
-      const uint8_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
-      uint8_t* outptrs[Base::output_tile_rows][Base::output_tile_cols]
-    );
-
-  private:
-    // Quantization parameters
-    const qasymm8::QAsymm8Params _weights_quant, _inputs_quant, _output_quant;
-    const qasymm8::QAsymm8RescaleParams rescale_parameters;
-};
-
-template <
-  unsigned int OutputTileRows, unsigned int OutputTileCols,
-  unsigned int KernelRows, unsigned int KernelCols,
-  unsigned int StrideRows, unsigned int StrideCols
->
-class QSymm8HybridPerChannelDepthwiseConvolution : public DepthwiseConvolutionBase<
-  OutputTileRows, OutputTileCols,
-  KernelRows, KernelCols,
-  StrideRows, StrideCols,
-  uint8_t, int32_t, uint8_t,
-  QSymm8HybridPerChannelDepthwiseConvolution<OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows, StrideCols>
->
-{
-  using Base = DepthwiseConvolutionBase<
-    OutputTileRows, OutputTileCols,
-    KernelRows, KernelCols,
-    StrideRows, StrideCols,
-    uint8_t, int32_t, uint8_t,
-    QSymm8HybridPerChannelDepthwiseConvolution<OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows, StrideCols>
-  >;
-  friend Base;
-  using InputType = typename Base::InputType;
-  using OutputType = typename Base::OutputType;
-
-  public:
-  QSymm8HybridPerChannelDepthwiseConvolution(
-      int n_batches, int n_input_rows, int n_input_cols, int n_channels,
-      nck::ActivationFunction activation,
-      const qsymm8::QSymm8PerChannelParams& weight_quantisation,
-      const qasymm8::QAsymm8Params& input_quantisation,
-      const qasymm8::QAsymm8Params& output_quantisation,
-      unsigned int padding_top,
-      unsigned int padding_left,
-      unsigned int padding_bottom,
-      unsigned int padding_right
-    );
-
-  QSymm8HybridPerChannelDepthwiseConvolution(
-      int n_batches, int n_input_rows, int n_input_cols, int n_channels,
-      nck::ActivationFunction activation,
-      const qsymm8::QSymm8PerChannelParams& weight_quantisation,
-      const qasymm8::QAsymm8Params& input_quantisation,
-      const qasymm8::QAsymm8Params& output_quantisation,
-      const qsymm8::QSymm8PerChannelRescaleParams& rescale_parameters,
-      unsigned int padding_top,
-      unsigned int padding_left,
-      unsigned int padding_bottom,
-      unsigned int padding_right
-    );
-
-  size_t get_packed_params_size(void) const override
-  {
-      return this->n_channels() * (sizeof(int8_t)*KernelRows*KernelCols + 3*sizeof(int32_t));
-
-  }
-
-  protected:
-    uint8_t _input_padding_value(void) const;
-
-    void _pack_params(
-      void *buffer,
-      const void *weights,
-      unsigned int weight_row_stride,
-      unsigned int weight_col_stride,
-      const void *biases=nullptr
-    ) const;
-
-    template <nck::ActivationFunction Activation>
-    void execute_tile(
-      int n_channels,
-      const void* packed_params,
-      const uint8_t* inptr,
-      unsigned int in_row_stride,
-      unsigned int in_col_stride,
-      uint8_t* outptr,
-      unsigned int out_row_stride,
-      unsigned int out_col_stride
-    );
-
-    template <nck::ActivationFunction Activation>
-    void execute_tile(
-      int n_channels,
-      const void* packed_params,
-      const uint8_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
-      uint8_t* outptrs[Base::output_tile_rows][Base::output_tile_cols]
-    );
-
-  private:
-    // Quantization parameters
-    const qsymm8::QSymm8PerChannelParams _weights_quant;
-    const qasymm8::QAsymm8Params _input_quant, _output_quant;
-    const qsymm8::QSymm8PerChannelRescaleParams _rescale_parameters;
-};
-
-}  // namespace depthwise
diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp
deleted file mode 100644
index a11b0981c9..0000000000
--- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-#include "depthwise_dilated.hpp"
-#include "depthwise_quantized.hpp"
-
-namespace depthwise {
-
-template <unsigned int OutputTileRows, unsigned int OutputTileCols,
-          unsigned int KernelRows, unsigned int KernelCols,
-          unsigned int StrideRows, unsigned int StrideCols>
-class QAsymm8DilatedDepthwiseConvolution
-    : public DilatedDepthwiseConvolution<
-          OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows,
-          StrideCols, uint8_t, int32_t, uint8_t> {
-public:
-  /** Create a new dilated depthwise convolution engine.
-   */
-  QAsymm8DilatedDepthwiseConvolution(
-      int n_batches, int n_input_rows, int n_input_cols, int n_channels,
-      int dilation_factor, nck::ActivationFunction activation,
-      const qasymm8::QAsymm8Params &weight_quantisation,
-      const qasymm8::QAsymm8Params &input_quantisation,
-      const qasymm8::QAsymm8Params &output_quantisation,
-      unsigned int padding_top, unsigned int padding_left,
-      unsigned int padding_bottom, unsigned int padding_right);
-
-  /** Create a new dilated depthwise convolution engine.
-   */
-  QAsymm8DilatedDepthwiseConvolution(
-      int n_batches, int n_input_rows, int n_input_cols, int n_channels,
-      int dilation_factor, int n_output_rows, int n_output_cols,
-      nck::ActivationFunction activation,
-      const qasymm8::QAsymm8Params &weight_quantisation,
-      const qasymm8::QAsymm8Params &input_quantisation,
-      const qasymm8::QAsymm8Params &output_quantisation,
-      unsigned int padding_top, unsigned int padding_left,
-      unsigned int padding_bottom, unsigned int padding_right);
-
-  /** Create a new dilated depthwise convolution engine.
-   */
-  QAsymm8DilatedDepthwiseConvolution(
-      int n_batches, int n_input_rows, int n_input_cols, int n_channels,
-      int dilation_factor, nck::ActivationFunction activation,
-      const qasymm8::QAsymm8Params &weight_quantisation,
-      const qasymm8::QAsymm8Params &input_quantisation,
-      const qasymm8::QAsymm8Params &output_quantisation,
-      const qasymm8::QAsymm8RescaleParams &rescale_parameters,
-      unsigned int padding_top, unsigned int padding_left,
-      unsigned int padding_bottom, unsigned int padding_right);
-
-  /** Create a new dilated depthwise convolution engine.
-   */
-  QAsymm8DilatedDepthwiseConvolution(
-      int n_batches, int n_input_rows, int n_input_cols, int n_channels,
-      int dilation_factor, int n_output_rows, int n_output_cols,
-      nck::ActivationFunction activation,
-      const qasymm8::QAsymm8Params &weight_quantisation,
-      const qasymm8::QAsymm8Params &input_quantisation,
-      const qasymm8::QAsymm8Params &output_quantisation,
-      const qasymm8::QAsymm8RescaleParams& rescale_parameters,
-      unsigned int padding_top, unsigned int padding_left,
-      unsigned int padding_bottom, unsigned int padding_right);
-};
-
-}  // namespace depthwise
diff --git a/arm_compute/core/SubTensorInfo.h b/arm_compute/core/SubTensorInfo.h
index f604f55924..6654ccf00a 100644
--- a/arm_compute/core/SubTensorInfo.h
+++ b/arm_compute/core/SubTensorInfo.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,7 +31,6 @@
 #include "arm_compute/core/Strides.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Validate.h"
 
 #include <cstddef>
 #include <memory>
diff --git a/arm_compute/core/utils/helpers/bit_ops.h b/arm_compute/core/utils/helpers/bit_ops.h
deleted file mode 100644
index eee360c9e3..0000000000
--- a/arm_compute/core/utils/helpers/bit_ops.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_UTILS_HELPERS_BIT_OPS_H
-#define ARM_COMPUTE_UTILS_HELPERS_BIT_OPS_H
-
-#include "arm_compute/core/utils/misc/Requires.h"
-
-#include <type_traits>
-
-namespace arm_compute
-{
-namespace helpers
-{
-namespace bit_ops
-{
-/** Checks if the idx-th bit is set in an integral type
- *
- * @param[in] v   Integral input
- * @param[in] idx Index of the bit to check
- *
- * @return True if the idx-th bit is set else false
- */
-template <typename T, REQUIRES_TA(std::is_integral<T>::value)>
-bool is_bit_set(T v, unsigned int idx)
-{
-    return (v & 1 << idx) != 0;
-}
-} // namespace bit_ops
-} // namespace helpers
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_UTILS_HELPERS_BIT_OPS_H */
diff --git a/arm_compute/core/utils/helpers/fft.h b/arm_compute/core/utils/helpers/fft.h
deleted file mode 100644
index 7d111b764b..0000000000
--- a/arm_compute/core/utils/helpers/fft.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_UTILS_HELPERS_FFT_H
-#define ARM_COMPUTE_UTILS_HELPERS_FFT_H
-
-#include <set>
-#include <vector>
-
-namespace arm_compute
-{
-namespace helpers
-{
-namespace fft
-{
-/** Decompose a given 1D input size using the provided supported factors.
- *
- * @param[in] N                 Input size to be decomposed.
- * @param[in] supported_factors Supported factors that can be used for decomposition.
- *
- * @return A vector with the stages of the decomposition. Will be empty if decomposition failed.
- */
-std::vector<unsigned int> decompose_stages(unsigned int N, const std::set<unsigned int> &supported_factors);
-/** Calculate digit reverse index vector given fft size and the decomposed stages
- *
- * @param N          Input size to calculate digit reverse for
- * @param fft_stages A vector with the FFT decomposed stages
- *
- * @return A vector with the digit reverse indices. Will be empty if it failed.
- */
-std::vector<unsigned int> digit_reverse_indices(unsigned int N, const std::vector<unsigned int> &fft_stages);
-} // namespace fft
-} // namespace helpers
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_UTILS_HELPERS_FFT_H */
diff --git a/arm_compute/core/utils/helpers/float_ops.h b/arm_compute/core/utils/helpers/float_ops.h
deleted file mode 100644
index 1a08fc76b4..0000000000
--- a/arm_compute/core/utils/helpers/float_ops.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_UTILS_HELPERS_FLOAT_OPS_H
-#define ARM_COMPUTE_UTILS_HELPERS_FLOAT_OPS_H
-
-namespace arm_compute
-{
-namespace helpers
-{
-namespace float_ops
-{
-union RawFloat
-{
-    /** Constructor
-     *
-     * @param[in] val Floating-point value
-     */
-    explicit RawFloat(float val)
-        : f32(val)
-    {
-    }
-    /** Extract sign of floating point number
-     *
-     * @return Sign of floating point number
-     */
-    int32_t sign() const
-    {
-        return i32 >> 31;
-    }
-    /** Extract exponent of floating point number
-     *
-     * @return Exponent of floating point number
-     */
-    int32_t exponent() const
-    {
-        return (i32 >> 23) & 0xFF;
-    }
-    /** Extract mantissa of floating point number
-     *
-     * @return Mantissa of floating point number
-     */
-    int32_t mantissa() const
-    {
-        return i32 & 0x007FFFFF;
-    }
-
-    int32_t i32;
-    float   f32;
-};
-
-/** Checks if two floating point numbers are equal given an allowed number of ULPs
- *
- * @param[in] a                First number to compare
- * @param[in] b                Second number to compare
- * @param[in] max_allowed_ulps (Optional) Number of allowed ULPs
- *
- * @return True if number is close else false
- */
-inline bool is_equal_ulps(float a, float b, int max_allowed_ulps = 0)
-{
-    RawFloat ra(a);
-    RawFloat rb(b);
-
-    // Check ULP distance
-    const int ulps = std::abs(ra.i32 - rb.i32);
-    return ulps <= max_allowed_ulps;
-}
-
-/** Checks if the input floating point number is 1.0f checking if the difference is within a range defined with epsilon
- *
- * @param[in] a       Input floating point number
- * @param[in] epsilon (Optional) Epsilon used to define the error bounds
- *
- * @return True if number is close to 1.0f
- */
-inline bool is_one(float a, float epsilon = 0.00001f)
-{
-    return std::abs(1.0f - a) <= epsilon;
-}
-
-/** Checks if the input floating point number is 0.0f checking if the difference is within a range defined with epsilon
- *
- * @param[in] a       Input floating point number
- * @param[in] epsilon (Optional) Epsilon used to define the error bounds
- *
- * @return True if number is close to 0.0f
- */
-inline bool is_zero(float a, float epsilon = 0.00001f)
-{
-    return std::abs(0.0f - a) <= epsilon;
-}
-} // namespace float_ops
-} // namespace helpers
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_UTILS_HELPERS_FLOAT_OPS_H */
diff --git a/arm_compute/core/utils/helpers/tensor_info.h b/arm_compute/core/utils/helpers/tensor_info.h
deleted file mode 100644
index 443234064a..0000000000
--- a/arm_compute/core/utils/helpers/tensor_info.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_UTILS_HELPERS_TENSOR_INFO_H
-#define ARM_COMPUTE_UTILS_HELPERS_TENSOR_INFO_H
-
-#include "arm_compute/core/ITensorInfo.h"
-
-namespace arm_compute
-{
-namespace helpers
-{
-namespace tensor_info
-{
-/** Checks if the quantization info of given tensors are different
- *
- * @param tensor_info_1 Tensor info of the first tensor
- * @param tensor_info_2 Tensor info of the second tensor
- * @param tensor_infos  Tensor infos of the rest tensors
- *
- * @return True if tensors have mismatching quantization info else false.
- */
-template <typename... Ts>
-inline bool tensors_have_different_quantization_info(const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos)
-{
-    const QuantizationInfo first_quantization_info = tensor_info_1->quantization_info();
-
-    const std::array < const ITensorInfo *, 1 + sizeof...(Ts) > tensor_infos_array{ { tensor_info_2, std::forward<Ts>(tensor_infos)... } };
-    return std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), [&](const ITensorInfo * tensor_info)
-    {
-        return tensor_info->quantization_info() != first_quantization_info;
-    });
-}
-} // namespace tensor_info
-} // namespace helpers
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_UTILS_HELPERS_TENSOR_INFO_H */
diff --git a/arm_compute/core/utils/math/SafeOps.h b/arm_compute/core/utils/math/SafeOps.h
index 4f81cf4b44..c222c65e84 100644
--- a/arm_compute/core/utils/math/SafeOps.h
+++ b/arm_compute/core/utils/math/SafeOps.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,7 +25,9 @@
 #define ARM_COMPUTE_UTILS_MATH_SAFE_OPS
 
 #include "arm_compute/core/Error.h"
-#include "arm_compute/core/utils/misc/Requires.h"
+#include "support/Requires.h"
+
+#include <limits>
 
 namespace arm_compute
 {
diff --git a/arm_compute/core/utils/misc/CRTP.h b/arm_compute/core/utils/misc/CRTP.h
deleted file mode 100644
index d295500bef..0000000000
--- a/arm_compute/core/utils/misc/CRTP.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_MISC_CRTP_H
-#define ARM_COMPUTE_MISC_CRTP_H
-
-namespace arm_compute
-{
-namespace misc
-{
-/** Curiously recurring template pattern Interface */
-template <typename T, template <typename> class Type>
-struct CRTP
-{
-public:
-    /** Exact type */
-    using ExactType = T;
-
-protected:
-    const T &impl() const
-    {
-        return static_cast<const T &>(*this);
-    }
-    T &impl()
-    {
-        return static_cast<T &>(*this);
-    }
-
-private:
-    CRTP() = default;
-    friend Type<T>;
-};
-} // namespace misc
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_MISC_CRTP_H */
diff --git a/arm_compute/core/utils/misc/Cast.h b/arm_compute/core/utils/misc/Cast.h
deleted file mode 100644
index 57c7e49942..0000000000
--- a/arm_compute/core/utils/misc/Cast.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_MISC_CAST_H
-#define ARM_COMPUTE_MISC_CAST_H
-
-#include "arm_compute/core/Error.h"
-
-namespace arm_compute
-{
-namespace utils
-{
-namespace cast
-{
-/** Polymorphic cast between two types
- *
- * @warning Will throw an exception if cast cannot take place
- *
- * @tparam Target Target to cast type
- * @tparam Source Source from cast type
- *
- * @param[in] v Value to cast
- *
- * @return The casted value
- */
-template <typename Target, typename Source>
-inline Target polymorphic_cast(Source *v)
-{
-    if(dynamic_cast<Target>(v) == nullptr)
-    {
-        ARM_COMPUTE_THROW(std::bad_cast());
-    }
-    return static_cast<Target>(v);
-}
-
-/** Polymorphic down cast between two types
- *
- * @warning Will assert if cannot take place
- *
- * @tparam Target Target to cast type
- * @tparam Source Source from cast type
- *
- * @param[in] v Value to cast
- *
- * @return The casted value
- */
-template <typename Target, typename Source>
-inline Target polymorphic_downcast(Source *v)
-{
-    ARM_COMPUTE_ERROR_ON(dynamic_cast<Target>(v) != static_cast<Target>(v));
-    return static_cast<Target>(v);
-}
-
-/** Polymorphic cast between two unique pointer types
- *
- * @warning Will throw an exception if cast cannot take place
- *
- * @tparam Target  Target to cast type
- * @tparam Source  Source from cast type
- * @tparam Deleter Deleter function type
- *
- * @param[in] v Value to cast
- *
- * @return The casted value
- */
-template <typename Target, typename Source, typename Deleter>
-std::unique_ptr<Target, Deleter> polymorphic_cast_unique_ptr(std::unique_ptr<Source, Deleter> &&v)
-{
-    if(dynamic_cast<Target *>(v.get()) == nullptr)
-    {
-        ARM_COMPUTE_THROW(std::bad_cast());
-    }
-    auto r = static_cast<Target *>(v.release());
-    return std::unique_ptr<Target, Deleter>(r, std::move(v.get_deleter()));
-}
-
-/** Polymorphic down cast between two unique pointer types
- *
- * @warning Will assert if cannot take place
- *
- * @tparam Target  Target to cast type
- * @tparam Source  Source from cast type
- * @tparam Deleter Deleter function type
- *
- * @param[in] v Value to cast
- *
- * @return The casted value
- */
-template <typename Target, typename Source, typename Deleter>
-std::unique_ptr<Target, Deleter> polymorphic_downcast_unique_ptr(std::unique_ptr<Source, Deleter> &&v)
-{
-    ARM_COMPUTE_ERROR_ON(dynamic_cast<Target *>(v.get()) != static_cast<Target *>(v.get()));
-    auto r = static_cast<Target *>(v.release());
-    return std::unique_ptr<Target, Deleter>(r, std::move(v.get_deleter()));
-}
-} // namespace cast
-} // namespace utils
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_MISC_CAST_H */
diff --git a/arm_compute/core/utils/misc/ICloneable.h b/arm_compute/core/utils/misc/ICloneable.h
deleted file mode 100644
index cbb0b3c149..0000000000
--- a/arm_compute/core/utils/misc/ICloneable.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_MISC_ICLONEABLE_H
-#define ARM_COMPUTE_MISC_ICLONEABLE_H
-
-#include <memory>
-
-namespace arm_compute
-{
-namespace misc
-{
-/** Clonable Interface */
-template <class T>
-class ICloneable
-{
-public:
-    /** Default virtual desctructor */
-    virtual ~ICloneable() = default;
-    /** Provide a clone of the current object of class T
-     *
-     * @return Clone object of class T
-     */
-    virtual std::unique_ptr<T> clone() const = 0;
-};
-} // namespace misc
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_MISC_ICLONEABLE_H */
diff --git a/arm_compute/core/utils/misc/Iterable.h b/arm_compute/core/utils/misc/Iterable.h
deleted file mode 100644
index 34232088e8..0000000000
--- a/arm_compute/core/utils/misc/Iterable.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_MISC_ITERABLE_H
-#define ARM_COMPUTE_MISC_ITERABLE_H
-
-#include <iterator>
-
-namespace arm_compute
-{
-namespace utils
-{
-namespace iterable
-{
-/** Reverse range iterable class
- *
- * @tparam T Type to create a reverse range on
- */
-template <typename T>
-class reverse_iterable
-{
-public:
-    /** Default constructor
-     *
-     * @param[in] it Value to reverse iterate on
-     */
-    explicit reverse_iterable(T &it)
-        : _it(it)
-    {
-    }
-
-    /** Get beginning of iterator.
-     *
-     * @return beginning of iterator.
-     */
-    typename T::reverse_iterator begin()
-    {
-        return _it.rbegin();
-    }
-
-    /** Get end of iterator.
-     *
-     * @return end of iterator.
-     */
-    typename T::reverse_iterator end()
-    {
-        return _it.rend();
-    }
-
-    /** Get beginning of const iterator.
-     *
-     * @return beginning of const iterator.
-     */
-    typename T::const_reverse_iterator cbegin()
-    {
-        return _it.rbegin();
-    }
-
-    /** Get end of const iterator.
-     *
-     * @return end of const iterator.
-     */
-    typename T::const_reverse_iterator cend()
-    {
-        return _it.rend();
-    }
-
-private:
-    T &_it;
-};
-
-/** Creates a reverse iterable for a given type
- *
- * @tparam T Type to create a reverse iterable on
- *
- * @param[in] val Iterable input
- *
- * @return Reverse iterable container
- */
-template <typename T>
-reverse_iterable<T> reverse_iterate(T &val)
-{
-    return reverse_iterable<T>(val);
-}
-} // namespace iterable
-} // namespace utils
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_MISC_ITERABLE_H */
diff --git a/arm_compute/core/utils/misc/Random.h b/arm_compute/core/utils/misc/Random.h
deleted file mode 100644
index 6832c495e3..0000000000
--- a/arm_compute/core/utils/misc/Random.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_MISC_RANDOM_H
-#define ARM_COMPUTE_MISC_RANDOM_H
-
-#include "arm_compute/core/Error.h"
-
-#include <random>
-#include <type_traits>
-
-namespace arm_compute
-{
-namespace utils
-{
-namespace random
-{
-/** Uniform distribution within a given number of sub-ranges
- *
- * @tparam T Distribution primitive type
- */
-template <typename T>
-class RangedUniformDistribution
-{
-public:
-    using DT = typename std::conditional<std::is_integral<T>::value,
-          std::uniform_int_distribution<T>,
-          std::uniform_real_distribution<float>>::type;
-    using result_type = T;
-    using range_pair  = std::pair<result_type, result_type>;
-
-public:
-    /** Constructor
-     *
-     * @param[in] low            lowest value in the range (inclusive)
-     * @param[in] high           highest value in the range (inclusive for uniform_int_distribution, exclusive for uniform_real_distribution)
-     * @param[in] exclude_ranges Ranges to exclude from the generator
-     */
-    RangedUniformDistribution(result_type low, result_type high, const std::vector<range_pair> &exclude_ranges)
-        : _distributions(), _selector()
-    {
-        result_type clow = low;
-        for(const auto &erange : exclude_ranges)
-        {
-            result_type epsilon = std::is_integral<result_type>::value ? 1 : static_cast<result_type>(std::numeric_limits<float>::epsilon());
-
-            ARM_COMPUTE_ERROR_ON(clow > erange.first || clow >= erange.second);
-
-            _distributions.emplace_back(DT(clow, erange.first - epsilon));
-            clow = erange.second + epsilon;
-        }
-        ARM_COMPUTE_ERROR_ON(clow > high);
-        _distributions.emplace_back(DT(clow, high));
-        _selector = std::uniform_int_distribution<uint32_t>(0, _distributions.size() - 1);
-    }
-    /** Generate random number
-     *
-     * @tparam URNG Random number generator object type
-     *
-     * @param[in] g A uniform random number generator object, used as the source of randomness.
-     *
-     * @return A new random number.
-     */
-    template <class URNG>
-    result_type operator()(URNG &g)
-    {
-        unsigned int rand_select = _selector(g);
-        return _distributions[rand_select](g);
-    }
-
-private:
-    std::vector<DT>                         _distributions;
-    std::uniform_int_distribution<uint32_t> _selector;
-};
-} // namespace random
-} // namespace utils
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_MISC_RANDOM_H */
diff --git a/arm_compute/core/utils/misc/Requires.h b/arm_compute/core/utils/misc/Requires.h
deleted file mode 100644
index ba91039596..0000000000
--- a/arm_compute/core/utils/misc/Requires.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_UTILS_REQUIRES_H
-#define ARM_COMPUTE_UTILS_REQUIRES_H
-
-namespace arm_compute
-{
-namespace utils
-{
-namespace requires
-{
-// *INDENT-OFF*
-// clang-format off
-namespace detail
-{
-enum class enabler
-{
-};
-} // namespace arm_compute
-
-/** Requirements as template */
-#define REQUIRES_T(...) template <bool Cond = (__VA_ARGS__), typename std::enable_if<Cond, int>::type = 0>
-/** Requirements as template argument */
-#define REQUIRES_TA(...) typename = typename std::enable_if<(__VA_ARGS__), arm_compute::utils::requires::detail::enabler>::type
-// clang-format on
-// *INDENT-ON*
-} // namespace requires
-} // namespace utils
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_UTILS_REQUIRES_H */
diff --git a/arm_compute/core/utils/misc/Rounding.h b/arm_compute/core/utils/misc/Rounding.h
deleted file mode 100644
index 1ed4e64886..0000000000
--- a/arm_compute/core/utils/misc/Rounding.h
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_UTILS_ROUNDING_H
-#define ARM_COMPUTE_UTILS_ROUNDING_H
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/utils/misc/Requires.h"
-#include "arm_compute/core/utils/misc/Traits.h"
-#include "support/ToolchainSupport.h"
-
-#include <cmath>
-
-namespace arm_compute
-{
-namespace utils
-{
-namespace rounding
-{
-/** Rounding mode */
-enum class RoundingMode
-{
-    TO_ZERO,             /**< Round towards zero */
-    AWAY_FROM_ZERO,      /**< Round away from zero */
-    HALF_TO_ZERO,        /**< Round half towards from zero */
-    HALF_AWAY_FROM_ZERO, /**< Round half away from zero */
-    HALF_UP,             /**< Round half towards positive infinity */
-    HALF_DOWN,           /**< Round half towards negative infinity */
-    HALF_EVEN            /**< Round half towards nearest even */
-};
-
-/** Round floating-point value with round to zero
- *
- * @tparam T Parameter type. Should be of floating point type.
- *
- * @param[in] value floating-point value to be rounded.
- *
- * @return Floating-point value of rounded @p value.
- */
-template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
-inline T round_to_zero(T value)
-{
-    T res = std::floor(std::fabs(value));
-    return (value < 0.f) ? -res : res;
-}
-
-/** Round floating-point value with round away from zero
- *
- * @tparam T Parameter type. Should be of floating point type.
- *
- * @param[in] value floating-point value to be rounded.
- *
- * @return Floating-point value of rounded @p value.
- */
-template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
-inline T round_away_from_zero(T value)
-{
-    T res = std::ceil(std::fabs(value));
-    return (value < 0.f) ? -res : res;
-}
-
-/** Round floating-point value with half value rounding towards zero.
- *
- * @tparam T Parameter type. Should be of floating point type.
- *
- * @param[in] value floating-point value to be rounded.
- *
- * @return Floating-point value of rounded @p value.
- */
-template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
-inline T round_half_to_zero(T value)
-{
-    T res = T(std::ceil(std::fabs(value) - 0.5f));
-    return (value < 0.f) ? -res : res;
-}
-
-/** Round floating-point value with half value rounding away from zero.
- *
- * @tparam T Parameter type. Should be of floating point type.
- *
- * @param[in] value floating-point value to be rounded.
- *
- * @return Floating-point value of rounded @p value.
- */
-template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
-inline T round_half_away_from_zero(T value)
-{
-    T res = T(std::floor(std::fabs(value) + 0.5f));
-    return (value < 0.f) ? -res : res;
-}
-
-/** Round floating-point value with half value rounding to positive infinity.
- *
- * @tparam T Parameter type. Should be of floating point type.
- *
- * @param[in] value floating-point value to be rounded.
- *
- * @return Floating-point value of rounded @p value.
- */
-template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
-inline T round_half_up(T value)
-{
-    return std::floor(value + 0.5f);
-}
-
-/** Round floating-point value with half value rounding to negative infinity.
- *
- * @tparam T Parameter type. Should be of floating point type.
- *
- * @param[in] value floating-point value to be rounded.
- *
- * @return Floating-point value of rounded @p value.
- */
-template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
-inline T round_half_down(T value)
-{
-    return std::ceil(value - 0.5f);
-}
-
-/** Round floating-point value with half value rounding to nearest even.
- *
- * @tparam T Parameter type. Should be of floating point type.
- *
- * @param[in] value   floating-point value to be rounded.
- * @param[in] epsilon precision.
- *
- * @return Floating-point value of rounded @p value.
- */
-template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
-inline T round_half_even(T value, T epsilon = std::numeric_limits<T>::epsilon())
-{
-    T positive_value = std::abs(value);
-    T ipart          = 0;
-    std::modf(positive_value, &ipart);
-    // If 'value' is exactly halfway between two integers
-    if(std::abs(positive_value - (ipart + 0.5f)) < epsilon)
-    {
-        // If 'ipart' is even then return 'ipart'
-        if(std::fmod(ipart, 2.f) < epsilon)
-        {
-            return support::cpp11::copysign(ipart, value);
-        }
-        // Else return the nearest even integer
-        return support::cpp11::copysign(std::ceil(ipart + 0.5f), value);
-    }
-    // Otherwise use the usual round to closest
-    return support::cpp11::copysign(support::cpp11::round(positive_value), value);
-}
-
-/** Round floating-point value given a rounding mode
- *
- * @tparam T Parameter type. Should be of floating point type.
- *
- * @param[in] value         floating-point value to be rounded.
- * @param[in] rounding_mode Rounding mode to use.
- *
- * @return Floating-point value of rounded @p value.
- */
-template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
-inline T round(T value, RoundingMode rounding_mode)
-{
-    switch(rounding_mode)
-    {
-        case RoundingMode::TO_ZERO:
-            return round_to_zero(value);
-        case RoundingMode::AWAY_FROM_ZERO:
-            return round_away_from_zero(value);
-        case RoundingMode::HALF_TO_ZERO:
-            return round_half_to_zero(value);
-        case RoundingMode::HALF_AWAY_FROM_ZERO:
-            return round_half_away_from_zero(value);
-        case RoundingMode::HALF_UP:
-            return round_half_up(value);
-        case RoundingMode::HALF_DOWN:
-            return round_half_down(value);
-        case RoundingMode::HALF_EVEN:
-            return round_half_even(value);
-        default:
-            ARM_COMPUTE_ERROR("Unsupported rounding mode!");
-    }
-}
-} // namespace rounding
-} // namespace utils
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_UTILS_ROUNDING_H */
diff --git a/arm_compute/core/utils/misc/SaturateCast.h b/arm_compute/core/utils/misc/SaturateCast.h
deleted file mode 100644
index cbced83f89..0000000000
--- a/arm_compute/core/utils/misc/SaturateCast.h
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H
-#define ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H
-
-#include "arm_compute/core/utils/misc/Rounding.h"
-#include "arm_compute/core/utils/misc/Traits.h"
-#include "arm_compute/core/utils/misc/Utility.h"
-
-namespace arm_compute
-{
-namespace utils
-{
-namespace cast
-{
-// *INDENT-OFF*
-// clang-format off
-// same type
-template<typename T,
-         typename U,
-         typename std::enable_if<std::is_same<T, U>::value, int >::type = 0 >
-T saturate_cast(U v)
-{
-    return v;
-}
-
-// signed -> signed widening/same_width
-template<typename T,
-         typename U,
-         typename std::enable_if<std::is_integral<T>::value &&
-                                 std::is_integral<U>::value &&
-                                 std::is_signed<U>() &&
-                                 std::is_signed<T>() &&
-                                 !std::is_same<T, U>::value &&
-                                 sizeof(T) >= sizeof(U),
-                  int >::type = 0 >
-inline T saturate_cast(U v)
-{
-    return static_cast<T>(v);
-}
-// signed -> signed narrowing
-template<typename T,
-         typename U,
-         typename std::enable_if<std::is_integral<T>::value &&
-                                 std::is_integral<U>::value &&
-                                 std::is_signed<U>() &&
-                                 std::is_signed<T>() &&
-                                 !std::is_same<T, U>::value &&
-                                 sizeof(T) < sizeof(U),
-                  int >::type = 0 >
-inline T saturate_cast(U v)
-{
-    return static_cast<T>(utility::clamp<U>(v, std::numeric_limits<T>::lowest(), std::numeric_limits<T>::max()));
-}
-
-// unsigned -> signed widening
-template<typename T,
-         typename U,
-         typename std::enable_if<std::is_integral<T>::value &&
-                                 std::is_integral<U>::value &&
-                                 std::is_unsigned<U>() &&
-                                 std::is_signed<T>() &&
-                                 !std::is_same<T, U>::value &&
-                                 (sizeof(T) > sizeof(U)),
-                  int >::type = 0 >
-inline T saturate_cast(U v)
-{
-    return static_cast<T>(v);
-}
-// unsigned -> signed narrowing
-template<typename T,
-         typename U,
-         typename std::enable_if<std::is_integral<T>::value &&
-                                 std::is_integral<U>::value &&
-                                 std::is_unsigned<U>() &&
-                                 std::is_signed<T>() &&
-                                 !std::is_same<T, U>::value &&
-                                 sizeof(T) < sizeof(U),
-                  int >::type = 0 >
-inline T saturate_cast(U v)
-{
-    return static_cast<T>(std::min<U>(v, std::numeric_limits<T>::max()));
-}
-// unsigned -> signed same_width
-template<typename T,
-         typename U,
-         typename std::enable_if<std::is_integral<T>::value &&
-                                 std::is_integral<U>::value &&
-                                 std::is_unsigned<U>() &&
-                                 std::is_signed<T>() &&
-                                 !std::is_same<T, U>::value &&
-                                 sizeof(T) == sizeof(U),
-                  int >::type = 0 >
-inline T saturate_cast(U v)
-{
-    return static_cast<T>(std::min<U>(v, std::numeric_limits<T>::max()));
-}
-
-// signed -> unsigned widening/same width
-template<typename T,
-         typename U,
-         typename std::enable_if<std::is_integral<T>::value &&
-                                 std::is_integral<U>::value &&
-                                 std::is_signed<U>() &&
-                                 std::is_unsigned<T>() &&
-                                 !std::is_same<T, U>::value &&
-                                 sizeof(T) >= sizeof(U),
-                  int >::type = 0 >
-inline T saturate_cast(U v)
-{
-    return static_cast<T>(std::max<U>(0, v));
-}
-
-// signed -> unsigned narrowing
-template<typename T,
-         typename U,
-         typename std::enable_if<std::is_integral<T>::value &&
-                                 std::is_integral<U>::value &&
-                                 std::is_signed<U>() &&
-                                 std::is_unsigned<T>() &&
-                                 !std::is_same<T, U>::value &&
-                                 sizeof(T) < sizeof(U),
-                  int >::type = 0 >
-inline T saturate_cast(U v)
-{
-    return static_cast<T>(utility::clamp<U>(v, 0, std::numeric_limits<T>::max()));
-}
-
-// unsigned -> unsigned widening/same width
-template<typename T,
-         typename U,
-         typename std::enable_if<std::is_integral<T>::value &&
-                                 std::is_integral<U>::value &&
-                                 std::is_unsigned<T>() &&
-                                 std::is_unsigned<U>() &&
-                                 !std::is_same<T, U>::value &&
-                                 sizeof(T) >= sizeof(U),
-                  int >::type = 0 >
-inline T saturate_cast(U v)
-{
-    return static_cast<T>(v);
-}
-
-// unsigned -> unsigned narrowing
-template<typename T,
-         typename U,
-         typename std::enable_if<std::is_integral<T>::value &&
-                                 std::is_integral<U>::value &&
-                                 std::is_unsigned<T>() &&
-                                 std::is_unsigned<U>() &&
-                                 !std::is_same<T, U>::value &&
-                                 sizeof(T) < sizeof(U),
-                  int >::type = 0 >
-inline T saturate_cast(U v)
-{
-    return static_cast<T>(utility::clamp<U>(v, std::numeric_limits<T>::lowest(), std::numeric_limits<T>::max()));
-}
-
-// float -> int
-template<typename T,
-         typename U,
-         typename std::enable_if<std::is_integral<T>::value &&
-                                 traits::is_floating_point<U>::value,
-                  int >::type = 0 >
-inline T saturate_cast(U v)
-{
-    int32_t vi = utils::rounding::round_half_away_from_zero(v);
-    return saturate_cast<T>(vi);
-}
-
-// int -> float
-template<typename T,
-         typename U,
-         typename std::enable_if<traits::is_floating_point<T>::value &&
-                                 std::is_integral<U>::value,
-                  int >::type = 0 >
-inline T saturate_cast(U v)
-{
-    return static_cast<T>(v);
-}
-
-// float -> float
-template<typename T,
-        typename U,
-        typename std::enable_if<traits::is_floating_point<T>::value &&
-                                traits::is_floating_point<U>::value,
-                int >::type = 0 >
-inline T saturate_cast(U v)
-{
-    return static_cast<T>(v);
-}
-// clang-format on
-// *INDENT-ON*
-} // namespace cast
-} // namespace utils
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H */
-- 
cgit v1.2.1