14 files changed, 0 insertions, 1294 deletions
diff --git a/src/backends/aclCommon/ArmComputeTensorHandle.hpp b/src/backends/aclCommon/ArmComputeTensorHandle.hpp
deleted file mode 100644
index ae7df2ec95..0000000000
--- a/src/backends/aclCommon/ArmComputeTensorHandle.hpp
+++ /dev/null
@@ -1,25 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#pragma once
-
-#include <armnn/backends/ITensorHandle.hpp>
-
-#include <arm_compute/runtime/IMemoryGroup.h>
-#include <arm_compute/runtime/Tensor.h>
-
-namespace armnn
-{
-
-class IAclTensorHandle : public ITensorHandle
-{
-public:
-    virtual arm_compute::ITensor& GetTensor() = 0;
-    virtual arm_compute::ITensor const& GetTensor() const = 0;
-    virtual arm_compute::DataType GetDataType() const = 0;
-    virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) = 0;
-};
-
-} //namespace armnn
-\ No newline at end of file
diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.cpp b/src/backends/aclCommon/ArmComputeTensorUtils.cpp
deleted file mode 100644
index 49fef5bf17..0000000000
--- a/src/backends/aclCommon/ArmComputeTensorUtils.cpp
+++ /dev/null
@@ -1,247 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-#include <aclCommon/ArmComputeTensorUtils.hpp>
-#include <aclCommon/ArmComputeUtils.hpp>
-
-#include "armnn/Exceptions.hpp"
-#include <armnn/Descriptors.hpp>
-
-namespace armnn
-{
-namespace armcomputetensorutils
-{
-
-arm_compute::DataType GetArmComputeDataType(armnn::DataType dataType, bool multiScales)
-{
-    switch(dataType)
-    {
-        case armnn::DataType::Boolean:
-            return arm_compute::DataType::U8;
-        case armnn::DataType::Float16:
-            return arm_compute::DataType::F16;
-        case armnn::DataType::Float32:
-            return arm_compute::DataType::F32;
-        case armnn::DataType::QAsymmS8:
-            return arm_compute::DataType::QASYMM8_SIGNED;
-        case armnn::DataType::QAsymmU8:
-            return arm_compute::DataType::QASYMM8;
-        case armnn::DataType::QSymmS16:
-            return arm_compute::DataType::QSYMM16;
-        case armnn::DataType::QSymmS8:
-        {
-            return multiScales ? arm_compute::DataType::QSYMM8_PER_CHANNEL : arm_compute::DataType::QSYMM8;
-        }
-        ARMNN_NO_DEPRECATE_WARN_BEGIN
-        case armnn::DataType::QuantizedSymm8PerAxis:
-            return arm_compute::DataType::QSYMM8_PER_CHANNEL;
-        ARMNN_NO_DEPRECATE_WARN_END
-        case armnn::DataType::Signed32:
-            return arm_compute::DataType::S32;
-        default:
-            BOOST_ASSERT_MSG(false, "Unknown data type");
-            return arm_compute::DataType::UNKNOWN;
-    }
-}
-
-arm_compute::Coordinates BuildArmComputeReductionCoordinates(size_t inputDimensions,
-                                                             unsigned int originalInputRank,
-                                                             const std::vector<unsigned int>& armnnAxes)
-{
-    arm_compute::Coordinates outAclCoords;
-
-    if (armnnAxes.empty())
-    {
-        // If no reduction axes were provided, then the input must be reduced along all dimensions.
-        // Since Compute Library does not accept an empty vector as the reduction dimensions, we then
-        // manually create a vector including all the input dimensions (in reversed order) as:
-        //
-        // { inputDimensions - 1, inputDimensions - 2, ..., 1, 0 }
-        //
-        outAclCoords.set_num_dimensions(inputDimensions);
-        std::generate(outAclCoords.begin(), outAclCoords.end(), [d = inputDimensions - 1] () mutable { return d--; });
-    }
-    else
-    {
-        // Create a vector of reduction dimensions (in reversed order) with the given reduction axes.
-        //
-        // Adjust the given reduction axes according to the original rank of the input tensor (before ACL applied any
-        // dimension correction).
-        // For example, if the input tensor originally had 4 dimensions, and one of the reduction axes was 2, then the
-        // new value for that reduction axis should be 1.
-        //
-        // Example:
-        // ArmNN input shape = { 1, 1, 3, 2 } -> ACL input shape = { 2, 3 }
-        // ArmNN reduction axis = { 2 }       -> ACL reduction axis = { 1 }
-        // ArmNN reduction axis = { 3 }       -> ACL reduction axis = { 0 }
-        //
-        // The transformation: ACL reduction axis index = original rank - ArmNN reduction axis index - 1
-        //
-        outAclCoords.set_num_dimensions(armnnAxes.size());
-        std::transform(armnnAxes.begin(), armnnAxes.end(),
-                       outAclCoords.begin(),
-                       [originalInputRank](unsigned int i){ return originalInputRank - i - 1; });
-    }
-
-    return outAclCoords;
-}
-
-arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape)
-{
-    arm_compute::TensorShape shape;
-
-    // armnn tensors are (batch, channels, height, width).
-    // arm_compute tensors are (width, height, channels, batch).
-    for (unsigned int i = 0; i < tensorShape.GetNumDimensions(); i++)
-    {
-        // Note that our dimensions are stored in the opposite order to ACL's.
-        shape.set(tensorShape.GetNumDimensions() - i - 1, tensorShape[i], false);
-
-        // TensorShape::set() flattens leading ones, so that batch size 1 cannot happen.
-        // arm_compute tensors expect this.
-    }
-
-    // prevent arm_compute issue where tensor is flattened to nothing
-    if (shape.num_dimensions() == 0)
-    {
-        shape.set_num_dimensions(1);
-    }
-
-    return shape;
-}
-
-// Utility function used to build a TensorInfo object, that can be used to initialise
-// ARM Compute Tensor and CLTensor allocators.
-arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo)
-{
-    bool multiScales = tensorInfo.HasMultipleQuantizationScales();
-    const arm_compute::TensorShape aclTensorShape = BuildArmComputeTensorShape(tensorInfo.GetShape());
-    const arm_compute::DataType aclDataType       = GetArmComputeDataType(tensorInfo.GetDataType(), multiScales);
-
-    const arm_compute::QuantizationInfo aclQuantizationInfo = multiScales ?
-        arm_compute::QuantizationInfo(tensorInfo.GetQuantizationScales()) :
-        arm_compute::QuantizationInfo(tensorInfo.GetQuantizationScale(), tensorInfo.GetQuantizationOffset());
-
-    return arm_compute::TensorInfo(aclTensorShape, 1, aclDataType, aclQuantizationInfo);
-}
-
-arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo,
-                                                  armnn::DataLayout dataLayout)
-{
-    arm_compute::TensorInfo aclTensorInfo = BuildArmComputeTensorInfo(tensorInfo);
-    aclTensorInfo.set_data_layout(ConvertDataLayout(dataLayout));
-
-    return aclTensorInfo;
-}
-
-arm_compute::DataLayout ConvertDataLayout(armnn::DataLayout dataLayout)
-{
-    switch(dataLayout)
-    {
-        case armnn::DataLayout::NHWC : return arm_compute::DataLayout::NHWC;
-
-        case armnn::DataLayout::NCHW : return arm_compute::DataLayout::NCHW;
-
-        default: throw InvalidArgumentException("Unknown armnn::DataLayout: [" +
-                                                std::to_string(static_cast<int>(dataLayout)) + "]");
-    }
-}
-
-arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor,
-                                                              bool fpMixedPrecision)
-{
-    using arm_compute::PoolingType;
-    using arm_compute::DimensionRoundingType;
-    using arm_compute::PadStrideInfo;
-    using arm_compute::PoolingLayerInfo;
-    using arm_compute::Size2D;
-    using arm_compute::DataLayout;
-
-    // Resolve ARM Compute layer parameters.
-    const PoolingType poolingType = ConvertPoolingAlgorithmToAclPoolingType(descriptor.m_PoolType);
-
-    const DataLayout dataLayout = ConvertDataLayout(descriptor.m_DataLayout);
-
-    bool isGlobalPooling = (descriptor.m_StrideX==0 && descriptor.m_StrideY==0);
-    //use specific constructor if global pooling
-    if(isGlobalPooling)
-    {
-        return arm_compute::PoolingLayerInfo(poolingType, dataLayout);
-    }
-
-    const DimensionRoundingType rounding = ConvertOutputShapeRoundingToAclDimensionRoundingType(
-                                                                                    descriptor.m_OutputShapeRounding);
-    const PadStrideInfo padStrideInfo(descriptor.m_StrideX,
-                                      descriptor.m_StrideY,
-                                      descriptor.m_PadLeft,
-                                      descriptor.m_PadRight,
-                                      descriptor.m_PadTop,
-                                      descriptor.m_PadBottom,
-                                      rounding);
-
-    const bool excludePadding = (descriptor.m_PaddingMethod == PaddingMethod::Exclude);
-
-    const Size2D poolSize(descriptor.m_PoolWidth, descriptor.m_PoolHeight);
-
-    return arm_compute::PoolingLayerInfo(poolingType, poolSize, dataLayout, padStrideInfo, excludePadding,
-                                         fpMixedPrecision);
-}
-
-arm_compute::NormalizationLayerInfo BuildArmComputeNormalizationLayerInfo(const NormalizationDescriptor& descriptor)
-{
-    const arm_compute::NormType normType =
-        ConvertNormalizationAlgorithmChannelToAclNormType(descriptor.m_NormChannelType);
-    return arm_compute::NormalizationLayerInfo(normType,
-                                               descriptor.m_NormSize,
-                                               descriptor.m_Alpha,
-                                               descriptor.m_Beta,
-                                               descriptor.m_K,
-                                               false);
-}
-
-arm_compute::PermutationVector BuildArmComputePermutationVector(const armnn::PermutationVector& perm)
-{
-    arm_compute::PermutationVector aclPerm;
-
-    unsigned int start = 0;
-    while ((start < perm.GetSize()) && (start == perm[start]))
-    {
-        ++start;
-    }
-
-    for (unsigned int i = start; i < perm.GetSize(); ++i)
-    {
-        aclPerm.set(i - start, perm[i] - start);
-    }
-
-    return aclPerm;
-}
-
-arm_compute::Size2D BuildArmComputeSize2D(const unsigned int width, const unsigned int height)
-{
-    return arm_compute::Size2D(width, height);
-}
-
-arm_compute::PixelValue GetPixelValue(arm_compute::ITensor& input, float pixelValue)
-{
-    switch (input.info()->data_type())
-    {
-        case arm_compute::DataType::F16:
-            return arm_compute::PixelValue(static_cast<Half>(pixelValue));
-        case arm_compute::DataType::F32:
-            return arm_compute::PixelValue(pixelValue);
-        case arm_compute::DataType::QASYMM8:
-            return arm_compute::PixelValue(static_cast<uint8_t>(pixelValue));
-        case arm_compute::DataType::QSYMM16:
-            return arm_compute::PixelValue(static_cast<int16_t>(pixelValue));
-        case arm_compute::DataType::QSYMM8_PER_CHANNEL:
-            return arm_compute::PixelValue(static_cast<int8_t>(pixelValue));
-        default:
-            throw InvalidArgumentException("Unsupported DataType: [" +
-                                           std::to_string(static_cast<int>(input.info()->data_type())) + "]");
-    }
-}
-
-} // namespace armcomputetensorutils
-} // namespace armnn
diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.hpp b/src/backends/aclCommon/ArmComputeTensorUtils.hpp
deleted file mode 100644
index b4ff0f72ff..0000000000
--- a/src/backends/aclCommon/ArmComputeTensorUtils.hpp
+++ /dev/null
@@ -1,247 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-#pragma once
-
-#include <armnn/Tensor.hpp>
-#include <armnn/DescriptorsFwd.hpp>
-
-#include <arm_compute/core/ITensor.h>
-#include <arm_compute/core/TensorInfo.h>
-#include <arm_compute/core/Types.h>
-#include <arm_compute/core/Size2D.h>
-
-#include <Half.hpp>
-
-#include <boost/cast.hpp>
-
-namespace armnn
-{
-class ITensorHandle;
-
-namespace armcomputetensorutils
-{
-
-/// Utility function to map an armnn::DataType to corresponding arm_compute::DataType.
-arm_compute::DataType GetArmComputeDataType(armnn::DataType dataType, bool multiScales);
-
-/// Utility function used to set up an arm_compute::Coordinates from a vector of ArmNN Axes for reduction functions
-arm_compute::Coordinates BuildArmComputeReductionCoordinates(size_t inputDimensions,
-                                                             unsigned int originalInputRank,
-                                                             const std::vector<unsigned int>& armnnAxes);
-
-/// Utility function used to setup an arm_compute::TensorShape object from an armnn::TensorShape.
-arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape);
-
-/// Utility function used to setup an arm_compute::ITensorInfo object whose dimensions are based on the given
-/// armnn::ITensorInfo.
-arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo);
-
-/// Utility function used to setup an arm_compute::ITensorInfo object whose dimensions are based on the given
-/// armnn::ITensorInfo.
-/// armnn::DataLayout.
-arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo,
-                                                  armnn::DataLayout dataLayout);
-
-/// Utility function used to convert armnn::DataLayout to arm_compute::DataLayout
-/// armnn::DataLayout.
-arm_compute::DataLayout ConvertDataLayout(armnn::DataLayout dataLayout);
-
-/// Utility function used to setup an arm_compute::PoolingLayerInfo object from given
-/// armnn::Pooling2dDescriptor
-/// bool fpMixedPrecision
-arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor,
-                                                              bool fpMixedPrecision = false);
-
-/// Utility function to setup an arm_compute::NormalizationLayerInfo object from an armnn::NormalizationDescriptor.
-arm_compute::NormalizationLayerInfo BuildArmComputeNormalizationLayerInfo(const NormalizationDescriptor& desc);
-
-/// Utility function used to setup an arm_compute::PermutationVector object from an armnn::PermutationVector.
-arm_compute::PermutationVector BuildArmComputePermutationVector(const armnn::PermutationVector& vector);
-
-/// Utility function used to setup an arm_compute::Size2D object from width and height values.
-arm_compute::Size2D BuildArmComputeSize2D(const unsigned int width, const unsigned int height);
-
-/// Gets the appropriate PixelValue for the input DataType
-arm_compute::PixelValue GetPixelValue(arm_compute::ITensor& input, float pixelValue);
-
-/// Utility function used to setup an arm_compute::PadStrideInfo object from an armnn layer descriptor.
-template <typename Descriptor>
-arm_compute::PadStrideInfo BuildArmComputePadStrideInfo(const Descriptor &descriptor)
-{
-    return arm_compute::PadStrideInfo(descriptor.m_StrideX,
-                                      descriptor.m_StrideY,
-                                      descriptor.m_PadLeft,
-                                      descriptor.m_PadRight,
-                                      descriptor.m_PadTop,
-                                      descriptor.m_PadBottom,
-                                      arm_compute::DimensionRoundingType::FLOOR);
-}
-
-/// Sets up the given ArmCompute tensor's dimensions based on the given ArmNN tensor.
-template <typename Tensor>
-void BuildArmComputeTensor(Tensor& tensor, const armnn::TensorInfo& tensorInfo)
-{
-    tensor.allocator()->init(BuildArmComputeTensorInfo(tensorInfo));
-}
-
-/// Sets up the given ArmCompute tensor's dimensions based on the given ArmNN tensor.
-template <typename Tensor>
-void BuildArmComputeTensor(Tensor& tensor, const armnn::TensorInfo& tensorInfo, DataLayout dataLayout)
-{
-    tensor.allocator()->init(BuildArmComputeTensorInfo(tensorInfo, dataLayout));
-}
-
-template <typename Tensor>
-void InitialiseArmComputeTensorEmpty(Tensor& tensor)
-{
-    tensor.allocator()->allocate();
-}
-
-/// Utility function to free unused tensors after a workload is configured and prepared
-template <typename Tensor>
-void FreeTensorIfUnused(std::unique_ptr<Tensor>& tensor)
-{
-    if (tensor && !tensor->is_used())
-    {
-        tensor.reset(nullptr);
-    }
-}
-
-// Helper function to obtain byte offset into tensor data
-inline size_t GetTensorOffset(const arm_compute::ITensorInfo& info,
-                              uint32_t depthIndex,
-                              uint32_t batchIndex,
-                              uint32_t channelIndex,
-                              uint32_t y,
-                              uint32_t x)
-{
-    arm_compute::Coordinates coords;
-    coords.set(4, static_cast<int>(depthIndex));
-    coords.set(3, static_cast<int>(batchIndex));
-    coords.set(2, static_cast<int>(channelIndex));
-    coords.set(1, static_cast<int>(y));
-    coords.set(0, static_cast<int>(x));
-    return boost::numeric_cast<size_t>(info.offset_element_in_bytes(coords));
-}
-
-// Helper function to obtain element offset into data buffer representing tensor data (assuming no strides).
-inline size_t GetLinearBufferOffset(const arm_compute::ITensorInfo& info,
-                                    uint32_t depthIndex,
-                                    uint32_t batchIndex,
-                                    uint32_t channelIndex,
-                                    uint32_t y,
-                                    uint32_t x)
-{
-    const arm_compute::TensorShape& shape = info.tensor_shape();
-    uint32_t width = static_cast<uint32_t>(shape[0]);
-    uint32_t height = static_cast<uint32_t>(shape[1]);
-    uint32_t numChannels = static_cast<uint32_t>(shape[2]);
-    uint32_t numBatches = static_cast<uint32_t>(shape[3]);
-    return (((depthIndex * numBatches + batchIndex) * numChannels + channelIndex) * height + y) * width + x;
-}
-
-template <typename T>
-void CopyArmComputeITensorData(const arm_compute::ITensor& srcTensor, T* dstData)
-{
-    // If MaxNumOfTensorDimensions is increased, this loop will need fixing.
-    static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyArmComputeITensorData");
-    {
-        const arm_compute::ITensorInfo& info = *srcTensor.info();
-        const arm_compute::TensorShape& shape = info.tensor_shape();
-        const uint8_t* const bufferPtr = srcTensor.buffer();
-        uint32_t width = static_cast<uint32_t>(shape[0]);
-        uint32_t height = static_cast<uint32_t>(shape[1]);
-        uint32_t numChannels = static_cast<uint32_t>(shape[2]);
-        uint32_t numBatches = static_cast<uint32_t>(shape[3]);
-        uint32_t depth = static_cast<uint32_t>(shape[4]);
-
-        for (unsigned int depthIndex = 0; depthIndex < depth; ++depthIndex)
-        {
-            for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex)
-            {
-                for (unsigned int channelIndex = 0; channelIndex < numChannels; ++channelIndex)
-                {
-                    for (unsigned int y = 0; y < height; ++y)
-                    {
-                        // Copies one row from arm_compute tensor buffer to linear memory buffer.
-                        // A row is the largest contiguous region we can copy, as the tensor data may be using strides.
-                        memcpy(
-                         dstData + GetLinearBufferOffset(info, depthIndex, batchIndex, channelIndex, y, 0),
-                         bufferPtr + GetTensorOffset(info, depthIndex, batchIndex, channelIndex, y, 0),
-                         width * sizeof(T));
-                    }
-                }
-            }
-        }
-    }
-}
-
-template <typename T>
-void CopyArmComputeITensorData(const T* srcData, arm_compute::ITensor& dstTensor)
-{
-    // If MaxNumOfTensorDimensions is increased, this loop will need fixing.
-    static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyArmComputeITensorData");
-    {
-        const arm_compute::ITensorInfo& info = *dstTensor.info();
-        const arm_compute::TensorShape& shape = info.tensor_shape();
-        uint8_t* const bufferPtr = dstTensor.buffer();
-        uint32_t width = static_cast<uint32_t>(shape[0]);
-        uint32_t height = static_cast<uint32_t>(shape[1]);
-        uint32_t numChannels = static_cast<uint32_t>(shape[2]);
-        uint32_t numBatches = static_cast<uint32_t>(shape[3]);
-        uint32_t depth = static_cast<uint32_t>(shape[4]);
-
-        for (unsigned int depthIndex = 0; depthIndex < depth; ++depthIndex)
-        {
-            for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex)
-            {
-                for (unsigned int channelIndex = 0; channelIndex < numChannels; ++channelIndex)
-                {
-                    for (unsigned int y = 0; y < height; ++y)
-                    {
-                        // Copies one row from linear memory buffer to arm_compute tensor buffer.
-                        // A row is the largest contiguous region we can copy, as the tensor data may be using strides.
-                        memcpy(
-                         bufferPtr + GetTensorOffset(info, depthIndex, batchIndex, channelIndex, y, 0),
-                         srcData + GetLinearBufferOffset(info, depthIndex, batchIndex, channelIndex, y, 0),
-                         width * sizeof(T));
-                    }
-                }
-            }
-        }
-    }
-}
-
-/// Construct a TensorShape object from an ArmCompute object based on arm_compute::Dimensions.
-/// \tparam ArmComputeType Any type that implements the Dimensions interface
-/// \tparam T Shape value type
-/// \param shapelike An ArmCompute object that implements the Dimensions interface
-/// \param initial A default value to initialise the shape with
-/// \return A TensorShape object filled from the Acl shapelike object.
-template<typename ArmComputeType, typename T>
-TensorShape GetTensorShape(const ArmComputeType& shapelike, T initial)
-{
-    std::vector<unsigned int> s(MaxNumOfTensorDimensions, initial);
-    for (unsigned int i=0; i < shapelike.num_dimensions(); ++i)
-    {
-        s[(shapelike.num_dimensions()-1)-i] = boost::numeric_cast<unsigned int>(shapelike[i]);
-    }
-    return TensorShape(boost::numeric_cast<unsigned int>(shapelike.num_dimensions()), s.data());
-};
-
-/// Get the strides from an ACL strides object
-inline TensorShape GetStrides(const arm_compute::Strides& strides)
-{
-    return GetTensorShape(strides, 0U);
-}
-
-/// Get the shape from an ACL shape object
-inline TensorShape GetShape(const arm_compute::TensorShape& shape)
-{
-    return GetTensorShape(shape, 1U);
-}
-
-} // namespace armcomputetensorutils
-} // namespace armnn
diff --git a/src/backends/aclCommon/ArmComputeUtils.hpp b/src/backends/aclCommon/ArmComputeUtils.hpp
deleted file mode 100644
index 4d690901c6..0000000000
--- a/src/backends/aclCommon/ArmComputeUtils.hpp
+++ /dev/null
@@ -1,173 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-#pragma once
-
-#include <armnn/Descriptors.hpp>
-#include <armnn/Tensor.hpp>
-
-#include <arm_compute/core/Types.h>
-
-#include <boost/assert.hpp>
-
-namespace armnn
-{
-
-inline arm_compute::NormalizationLayerInfo
-CreateAclNormalizationLayerInfoForL2Normalization(const armnn::TensorInfo& tensorInfo,
-                                                  armnn::DataLayout dataLayout)
-{
-    unsigned int depthDimension = dataLayout == armnn::DataLayout::NCHW ? 1 : 3;
-    const unsigned int depth = tensorInfo.GetShape()[depthDimension];
-
-    // At the time of writing, {CL|Neon}L2Normalization performs the reduction only along dimension 0. This version of
-    // L2 Normalization always performs the reduction along the depth axis, though. Thus, we repurpose
-    // {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by carefully chosing the normalization
-    // parameters.
-    //
-    // Please refer to both the reference implementation of the normalization layer and the implementation of
-    // {CL|Neon}NormalizationLayer when checking the derivations for the parameter values below.
-
-    // Make sure normalization covers the entire depth range. ACL requires the normalization size to be odd.
-    // CL: This does not result in extra kernel threads not doing any work: See usage of the RADIUS parameter in
-    // ACL's normalization_layer_cross_map() CL function.
-    const uint32_t normSize = depth * 2u + 1u;
-
-    // See ACL's NormalizationLayerInfo::scale_coeff() definition.
-    // For the reference implementation, to make alpha_ become 1, we'd have to use alpha = normSize instead.
-    const float alpha = 1.0f;
-
-    // Don't offset the reduction.
-    const float kappa = 0.0f;
-
-    // pow(reduction, -0.5) = 1 / sqrt(reduction)
-    const float beta = 0.5f;
-
-    return arm_compute::NormalizationLayerInfo(arm_compute::NormType::CROSS_MAP, normSize, alpha, beta, kappa, false);
-}
-
-inline arm_compute::ActivationLayerInfo::ActivationFunction
-ConvertActivationFunctionToAclActivationFunction(ActivationFunction armnnFunction)
-{
-    using AclActivationFunction = arm_compute::ActivationLayerInfo::ActivationFunction;
-
-    switch (armnnFunction)
-    {
-        case ActivationFunction::Linear:        return AclActivationFunction::LINEAR;
-        // Arm compute's 'logistic' function is non-parameterized, so it is exactly a sigmoid function.
-        case ActivationFunction::Sigmoid:       return AclActivationFunction::LOGISTIC;
-        case ActivationFunction::ReLu:          return AclActivationFunction::RELU;
-        case ActivationFunction::BoundedReLu:   return AclActivationFunction::LU_BOUNDED_RELU;
-        case ActivationFunction::SoftReLu:      return AclActivationFunction::SOFT_RELU;
-        case ActivationFunction::LeakyReLu:     return AclActivationFunction::LEAKY_RELU;
-        case ActivationFunction::Abs:           return AclActivationFunction::ABS;
-        case ActivationFunction::Sqrt:          return AclActivationFunction::SQRT;
-        case ActivationFunction::Square:        return AclActivationFunction::SQUARE;
-        case ActivationFunction::TanH:          return AclActivationFunction::TANH;
-        default:                                throw InvalidArgumentException("Unsupported activation function");
-    }
-}
-
-inline arm_compute::ActivationLayerInfo
-ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor& actDesc)
-{
-    return arm_compute::ActivationLayerInfo(ConvertActivationFunctionToAclActivationFunction(actDesc.m_Function),
-        actDesc.m_A, actDesc.m_B);
-}
-
-inline arm_compute::PoolingType ConvertPoolingAlgorithmToAclPoolingType(PoolingAlgorithm poolingAlgorithm)
-{
-    using arm_compute::PoolingType;
-
-    switch (poolingAlgorithm)
-    {
-        case PoolingAlgorithm::Max:             return PoolingType::MAX;
-        case PoolingAlgorithm::Average:         return PoolingType::AVG;
-        case PoolingAlgorithm::L2:              return PoolingType::L2;
-        default:                                throw InvalidArgumentException("Unsupported pooling algorithm");
-    }
-}
-
-inline arm_compute::DimensionRoundingType ConvertOutputShapeRoundingToAclDimensionRoundingType(OutputShapeRounding
-                                                                                                              rounding)
-{
-    using arm_compute::DimensionRoundingType;
-
-    switch (rounding)
-    {
-        case OutputShapeRounding::Ceiling:  return DimensionRoundingType::CEIL;
-        case OutputShapeRounding::Floor:    return DimensionRoundingType::FLOOR;
-        default:                            throw InvalidArgumentException("Unsupported Output Shape Rounding type");
-    }
-}
-
-inline arm_compute::NormType
-ConvertNormalizationAlgorithmChannelToAclNormType(NormalizationAlgorithmChannel channelType)
-{
-    using arm_compute::NormType;
-    switch (channelType)
-    {
-        case NormalizationAlgorithmChannel::Across: return NormType::CROSS_MAP;
-        case NormalizationAlgorithmChannel::Within: return NormType::IN_MAP_2D;
-        default:    throw InvalidArgumentException("Unsupported normalization algorithm channel type");
-    }
-}
-
-inline arm_compute::FullyConnectedLayerInfo
-ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(const FullyConnectedDescriptor& fullyConnectedDesc)
-{
-    arm_compute::FullyConnectedLayerInfo fc_info;
-    fc_info.transpose_weights = fullyConnectedDesc.m_TransposeWeightMatrix;
-    return fc_info;
-}
-
-inline arm_compute::InterpolationPolicy ConvertResizeMethodToAclInterpolationPolicy(ResizeMethod resizeMethod)
-{
-    switch (resizeMethod)
-    {
-        case ResizeMethod::Bilinear:
-            return arm_compute::InterpolationPolicy::BILINEAR;
-        case ResizeMethod::NearestNeighbor:
-            return arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR;
-        default:
-            throw InvalidArgumentException("Unsupported resize method");
-    }
-}
-
-inline unsigned int ComputeSoftmaxAclAxis(const SoftmaxDescriptor& softmaxDesc, const armnn::TensorInfo& tensor)
-{
-    // Detect the Android default value of -1 and return the ACL default value of 1.
-    if (softmaxDesc.m_Axis == -1)
-    {
-        return 1;
-    }
-
-   unsigned int dim = tensor.GetNumDimensions();
-
-    BOOST_ASSERT(dim != 0);
-
-    // Currently ArmNN support axis 1.
-    return dim - 1;
-}
-
-inline std::set<unsigned int> ComputeSplitAxis(const armnn::SplitterDescriptor& desc, const TensorShape& input)
-{
-    unsigned int numSplit = desc.GetNumViews();
-    unsigned int numDimensions = desc.GetNumDimensions();
-    std::set<unsigned int> splitAxis;
-
-    for (unsigned int i = 0; i < numSplit; ++i)
-    {
-        for (unsigned int dimIdx = 0; dimIdx < numDimensions; ++dimIdx)
-        {
-            if (desc.GetViewSizes(i)[dimIdx] != input[dimIdx])
-            {
-                splitAxis.insert(dimIdx);
-            }
-        }
-    }
-    return splitAxis;
-}
-
-} // namespace armnn
diff --git a/src/backends/aclCommon/BaseMemoryManager.cpp b/src/backends/aclCommon/BaseMemoryManager.cpp
deleted file mode 100644
index 844fbcd4ca..0000000000
--- a/src/backends/aclCommon/BaseMemoryManager.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-#include "BaseMemoryManager.hpp"
-
-#if defined(ARMCOMPUTENEON_ENABLED) || defined(ARMCOMPUTECL_ENABLED)
-#include "arm_compute/runtime/BlobLifetimeManager.h"
-#include "arm_compute/runtime/PoolManager.h"
-#include "arm_compute/runtime/OffsetLifetimeManager.h"
-#endif
-
-#include <boost/polymorphic_cast.hpp>
-
-namespace armnn
-{
-
-#if defined(ARMCOMPUTENEON_ENABLED) || defined(ARMCOMPUTECL_ENABLED)
-BaseMemoryManager::BaseMemoryManager(std::unique_ptr<arm_compute::IAllocator> alloc,
-                                     MemoryAffinity memoryAffinity)
-{
-    BOOST_ASSERT(alloc);
-    m_Allocator = std::move(alloc);
-
-    m_IntraLayerMemoryMgr = CreateArmComputeMemoryManager(memoryAffinity);
-    m_InterLayerMemoryMgr = CreateArmComputeMemoryManager(memoryAffinity);
-}
-
-std::shared_ptr<arm_compute::MemoryManagerOnDemand>
-BaseMemoryManager::CreateArmComputeMemoryManager(MemoryAffinity memoryAffinity)
-{
-    std::shared_ptr<arm_compute::ILifetimeManager> lifetimeManager = nullptr;
-
-    if (memoryAffinity == MemoryAffinity::Buffer)
-    {
-        lifetimeManager = std::make_shared<arm_compute::BlobLifetimeManager>();
-    }
-    else
-    {
-        lifetimeManager = std::make_shared<arm_compute::OffsetLifetimeManager>();
-    }
-
-    auto poolManager   = std::make_shared<arm_compute::PoolManager>();
-    auto memoryManager = std::make_shared<arm_compute::MemoryManagerOnDemand>(lifetimeManager, poolManager);
-
-    return memoryManager;
-}
-
-void BaseMemoryManager::Acquire()
-{
-    static const size_t s_NumPools = 1;
-
-    // Allocate memory pools for intra-layer memory manager
-    BOOST_ASSERT(m_IntraLayerMemoryMgr);
-    m_IntraLayerMemoryMgr->populate(*m_Allocator, s_NumPools);
-
-    // Allocate memory pools for inter-layer memory manager
-    BOOST_ASSERT(m_InterLayerMemoryMgr);
-    m_InterLayerMemoryMgr->populate(*m_Allocator, s_NumPools);
-
-    // Acquire inter-layer memory group. NOTE: This has to come after allocating the pools
-    BOOST_ASSERT(m_InterLayerMemoryGroup);
-    m_InterLayerMemoryGroup->acquire();
-}
-
-void BaseMemoryManager::Release()
-{
-    // Release inter-layer memory group. NOTE: This has to come before releasing the pools
-    BOOST_ASSERT(m_InterLayerMemoryGroup);
-    m_InterLayerMemoryGroup->release();
-
-    // Release memory pools managed by intra-layer memory manager
-    BOOST_ASSERT(m_IntraLayerMemoryMgr);
-    m_IntraLayerMemoryMgr->clear();
-
-    // Release memory pools managed by inter-layer memory manager
-    BOOST_ASSERT(m_InterLayerMemoryMgr);
-    m_InterLayerMemoryMgr->clear();
-}
-#else
-void BaseMemoryManager::Acquire()
-{
-    // No-op if neither NEON nor CL enabled
-}
-
-void BaseMemoryManager::Release()
-{
-    // No-op if neither NEON nor CL enabled
-}
-#endif
-
-#if defined(ARMCOMPUTENEON_ENABLED)
-std::shared_ptr<arm_compute::IMemoryGroup>
-NeonMemoryManager::CreateMemoryGroup(const std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
-{
-    return std::make_shared<arm_compute::MemoryGroup>(memoryManager);
-}
-#endif
-
-#if defined(ARMCOMPUTECL_ENABLED)
-std::shared_ptr<arm_compute::IMemoryGroup>
-ClMemoryManager::CreateMemoryGroup(const std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
-{
-    return std::make_shared<arm_compute::MemoryGroup>(memoryManager);
-}
-#endif
-
-}
diff --git a/src/backends/aclCommon/BaseMemoryManager.hpp b/src/backends/aclCommon/BaseMemoryManager.hpp
deleted file mode 100644
index 9d2dbf7838..0000000000
--- a/src/backends/aclCommon/BaseMemoryManager.hpp
+++ /dev/null
@@ -1,96 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-#pragma once
-
-#include <armnn/backends/IMemoryManager.hpp>
-#include <backendsCommon/WorkloadFactory.hpp>
-
-#if defined(ARMCOMPUTENEON_ENABLED) || defined(ARMCOMPUTECL_ENABLED)
-#include <arm_compute/runtime/MemoryGroup.h>
-#endif
-
-#if defined(ARMCOMPUTENEON_ENABLED) || defined(ARMCOMPUTECL_ENABLED)
-#include <arm_compute/runtime/IAllocator.h>
-#include <arm_compute/runtime/IMemoryGroup.h>
-#include <arm_compute/runtime/MemoryManagerOnDemand.h>
-#endif
-
-namespace armnn
-{
-
-class BaseMemoryManager : public IMemoryManager
-{
-public:
-    enum class MemoryAffinity
-    {
-        Buffer,
-        Offset
-    };
-
-    BaseMemoryManager() { }
-    virtual ~BaseMemoryManager() { }
-
-    void Acquire() override;
-    void Release() override;
-
-#if defined(ARMCOMPUTENEON_ENABLED) || defined(ARMCOMPUTECL_ENABLED)
-    BaseMemoryManager(std::unique_ptr<arm_compute::IAllocator> alloc, MemoryAffinity memoryAffinity);
-
-    std::shared_ptr<arm_compute::MemoryManagerOnDemand>& GetIntraLayerManager() { return m_IntraLayerMemoryMgr; }
-    std::shared_ptr<arm_compute::MemoryManagerOnDemand>& GetInterLayerManager() { return m_InterLayerMemoryMgr; }
-    std::shared_ptr<arm_compute::IMemoryGroup>& GetInterLayerMemoryGroup()      { return m_InterLayerMemoryGroup; }
-
-protected:
-    std::unique_ptr<arm_compute::IAllocator>            m_Allocator;
-    std::shared_ptr<arm_compute::MemoryManagerOnDemand> m_IntraLayerMemoryMgr;
-    std::shared_ptr<arm_compute::MemoryManagerOnDemand> m_InterLayerMemoryMgr;
-    std::shared_ptr<arm_compute::IMemoryGroup>          m_InterLayerMemoryGroup;
-
-    std::shared_ptr<arm_compute::MemoryManagerOnDemand> CreateArmComputeMemoryManager(MemoryAffinity memoryAffinity);
-
-    virtual std::shared_ptr<arm_compute::IMemoryGroup>
-    CreateMemoryGroup(const std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) = 0;
-#endif
-};
-
-#if defined(ARMCOMPUTENEON_ENABLED)
-class NeonMemoryManager : public BaseMemoryManager
-{
-public:
-    NeonMemoryManager() {}
-    virtual ~NeonMemoryManager() {}
-
-    NeonMemoryManager(std::unique_ptr<arm_compute::IAllocator> alloc, MemoryAffinity memoryAffinity)
-    : BaseMemoryManager(std::move(alloc), memoryAffinity)
-    {
-        m_InterLayerMemoryGroup = CreateMemoryGroup(m_InterLayerMemoryMgr);
-    }
-
-protected:
-    std::shared_ptr<arm_compute::IMemoryGroup>
-    CreateMemoryGroup(const std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) override;
-};
-#endif
-
-#if defined(ARMCOMPUTECL_ENABLED)
-class ClMemoryManager : public BaseMemoryManager
-{
-public:
-    ClMemoryManager() {}
-    virtual ~ClMemoryManager() {}
-
-    ClMemoryManager(std::unique_ptr<arm_compute::IAllocator> alloc)
-    : BaseMemoryManager(std::move(alloc), MemoryAffinity::Buffer)
-    {
-        m_InterLayerMemoryGroup = CreateMemoryGroup(m_InterLayerMemoryMgr);
-    }
-
-protected:
-    std::shared_ptr<arm_compute::IMemoryGroup>
-    CreateMemoryGroup(const std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) override;
-};
-#endif
-
-} //namespace armnn
diff --git a/src/backends/aclCommon/CMakeLists.txt b/src/backends/aclCommon/CMakeLists.txt
deleted file mode 100644
index d4ff541985..0000000000
--- a/src/backends/aclCommon/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-#
-# Copyright © 2017 Arm Ltd. All rights reserved.
-# SPDX-License-Identifier: MIT
-#
-
-list(APPEND armnnAclCommon_sources
-    ArmComputeTensorHandle.hpp
-    ArmComputeTensorUtils.hpp
-    ArmComputeTensorUtils.cpp
-    ArmComputeUtils.hpp
-    BaseMemoryManager.cpp
-    BaseMemoryManager.hpp
-)
-
-if(BUILD_UNIT_TESTS)
-    add_subdirectory(test)
-endif()
-
-add_library(armnnAclCommon OBJECT ${armnnAclCommon_sources})
-target_include_directories(armnnAclCommon PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn)
-target_include_directories(armnnAclCommon PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils)
-target_include_directories(armnnAclCommon PRIVATE ${PROJECT_SOURCE_DIR}/src/backends)
-target_include_directories(armnnAclCommon PRIVATE ${PROJECT_SOURCE_DIR}/src/profiling)
diff --git a/src/backends/aclCommon/common.cmake b/src/backends/aclCommon/common.cmake
deleted file mode 100644
index 89be236a7f..0000000000
--- a/src/backends/aclCommon/common.cmake
+++ /dev/null
@@ -1,10 +0,0 @@
-#
-# Copyright © 2017 Arm Ltd. All rights reserved.
-# SPDX-License-Identifier: MIT
-#
-
-if(ARMCOMPUTENEON OR ARMCOMPUTECL)
-    add_subdirectory(${PROJECT_SOURCE_DIR}/src/backends/aclCommon)
-    list(APPEND armnnLibraries armnnAclCommon)
-    list(APPEND armnnUnitTestLibraries armnnAclCommonUnitTests)
-endif()
diff --git a/src/backends/aclCommon/common.mk b/src/backends/aclCommon/common.mk
deleted file mode 100644
index 0ba966af14..0000000000
--- a/src/backends/aclCommon/common.mk
+++ /dev/null
@@ -1,20 +0,0 @@
-#
-# Copyright © 2017 ARM Ltd. All rights reserved.
-# SPDX-License-Identifier: MIT
-#
-
-# COMMON_SOURCES contains the list of files to be included
-# in the Android build and it is picked up by the Android.mk
-# file in the root of ArmNN
-
-COMMON_SOURCES := \
-    ArmComputeTensorUtils.cpp \
-    BaseMemoryManager.cpp
-
-# COMMON_TEST_SOURCES contains the list of files to be included
-# in the Android unit test build (armnn-tests) and it is picked
-# up by the Android.mk file in the root of ArmNN
-
-COMMON_TEST_SOURCES := \
-    test/ArmComputeTensorUtilsTests.cpp \
-    test/MemCopyTests.cpp
diff --git a/src/backends/aclCommon/test/ArmComputeTensorUtilsTests.cpp b/src/backends/aclCommon/test/ArmComputeTensorUtilsTests.cpp
deleted file mode 100644
index 4ab748806c..0000000000
--- a/src/backends/aclCommon/test/ArmComputeTensorUtilsTests.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-//
-// Copyright © 2019 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#include <aclCommon/ArmComputeTensorUtils.hpp>
-
-#include <boost/test/unit_test.hpp>
-
-using namespace armnn::armcomputetensorutils;
-
-BOOST_AUTO_TEST_SUITE(ArmComputeTensorUtils)
-
-BOOST_AUTO_TEST_CASE(BuildArmComputeTensorInfoTest)
-{
-
-    const armnn::TensorShape tensorShape = { 1, 2, 3, 4 };
-    const armnn::DataType dataType = armnn::DataType::QAsymmU8;
-
-    const std::vector<float> quantScales = { 1.5f, 2.5f, 3.5f, 4.5f };
-    const float quantScale = quantScales[0];
-    const int32_t quantOffset = 128;
-
-    // Tensor info with per-tensor quantization
-    const armnn::TensorInfo tensorInfo0(tensorShape, dataType, quantScale, quantOffset);
-    const arm_compute::TensorInfo aclTensorInfo0 = BuildArmComputeTensorInfo(tensorInfo0);
-
-    const arm_compute::TensorShape& aclTensorShape = aclTensorInfo0.tensor_shape();
-    BOOST_CHECK(aclTensorShape.num_dimensions() == tensorShape.GetNumDimensions());
-    for(unsigned int i = 0u; i < tensorShape.GetNumDimensions(); ++i)
-    {
-        // NOTE: arm_compute tensor dimensions are stored in the opposite order
-        BOOST_CHECK(aclTensorShape[i] == tensorShape[tensorShape.GetNumDimensions() - i - 1]);
-    }
-
-    BOOST_CHECK(aclTensorInfo0.data_type() == arm_compute::DataType::QASYMM8);
-    BOOST_CHECK(aclTensorInfo0.quantization_info().scale()[0] == quantScale);
-
-    // Tensor info with per-axis quantization
-    const armnn::TensorInfo tensorInfo1(tensorShape, dataType, quantScales, 0);
-    const arm_compute::TensorInfo aclTensorInfo1 = BuildArmComputeTensorInfo(tensorInfo1);
-
-    BOOST_CHECK(aclTensorInfo1.quantization_info().scale() == quantScales);
-}
-
-BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/aclCommon/test/CMakeLists.txt b/src/backends/aclCommon/test/CMakeLists.txt
deleted file mode 100644
index 087e56669b..0000000000
--- a/src/backends/aclCommon/test/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# Copyright © 2017 Arm Ltd. All rights reserved.
-# SPDX-License-Identifier: MIT
-#
-
-list(APPEND armnnAclCommonUnitTests_sources
-    ArmComputeTensorUtilsTests.cpp
-    CreateWorkloadClNeon.hpp
-    MemCopyTests.cpp
-    MemCopyTestImpl.hpp
-)
-
-add_library(armnnAclCommonUnitTests OBJECT ${armnnAclCommonUnitTests_sources})
-target_include_directories(armnnAclCommonUnitTests PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn)
-target_include_directories(armnnAclCommonUnitTests PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils)
-target_include_directories(armnnAclCommonUnitTests PRIVATE ${PROJECT_SOURCE_DIR}/src/backends)
-target_include_directories(armnnAclCommonUnitTests PRIVATE ${PROJECT_SOURCE_DIR}/src/profiling)
diff --git a/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp b/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp
deleted file mode 100644
index 83cec2a746..0000000000
--- a/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp
+++ /dev/null
@@ -1,111 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-#pragma once
-
-#include <test/CreateWorkload.hpp>
-
-#include <backendsCommon/MemCopyWorkload.hpp>
-#include <reference/RefWorkloadFactory.hpp>
-#include <reference/RefTensorHandle.hpp>
-
-#if defined(ARMCOMPUTECL_ENABLED)
-#include <cl/ClTensorHandle.hpp>
-#endif
-
-#if defined(ARMCOMPUTENEON_ENABLED)
-#include <neon/NeonTensorHandle.hpp>
-#endif
-
-using namespace armnn;
-
-namespace
-{
-
-using namespace std;
-
-template<typename IComputeTensorHandle>
-boost::test_tools::predicate_result CompareTensorHandleShape(IComputeTensorHandle*               tensorHandle,
-                                                             std::initializer_list<unsigned int> expectedDimensions)
-{
-    arm_compute::ITensorInfo* info = tensorHandle->GetTensor().info();
-
-    auto infoNumDims = info->num_dimensions();
-    auto numExpectedDims = expectedDimensions.size();
-    if (infoNumDims != numExpectedDims)
-    {
-        boost::test_tools::predicate_result res(false);
-        res.message() << "Different number of dimensions [" << info->num_dimensions()
-                      << "!=" << expectedDimensions.size() << "]";
-        return res;
-    }
-
-    size_t i = info->num_dimensions() - 1;
-
-    for (unsigned int expectedDimension : expectedDimensions)
-    {
-        if (info->dimension(i) != expectedDimension)
-        {
-            boost::test_tools::predicate_result res(false);
-            res.message() << "For dimension " << i <<
-                             " expected size " << expectedDimension <<
-                             " got " << info->dimension(i);
-            return res;
-        }
-
-        i--;
-    }
-
-    return true;
-}
-
-template<typename IComputeTensorHandle>
-void CreateMemCopyWorkloads(IWorkloadFactory& factory)
-{
-    TensorHandleFactoryRegistry registry;
-    Graph graph;
-    RefWorkloadFactory refFactory;
-
-    // Creates the layers we're testing.
-    Layer* const layer1 = graph.AddLayer<MemCopyLayer>("layer1");
-    Layer* const layer2 = graph.AddLayer<MemCopyLayer>("layer2");
-
-    // Creates extra layers.
-    Layer* const input = graph.AddLayer<InputLayer>(0, "input");
-    Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
-
-    // Connects up.
-    TensorInfo tensorInfo({2, 3}, DataType::Float32);
-    Connect(input, layer1, tensorInfo);
-    Connect(layer1, layer2, tensorInfo);
-    Connect(layer2, output, tensorInfo);
-
-    input->CreateTensorHandles(registry, refFactory);
-    layer1->CreateTensorHandles(registry, factory);
-    layer2->CreateTensorHandles(registry, refFactory);
-    output->CreateTensorHandles(registry, refFactory);
-
-    // make the workloads and check them
-    auto workload1 = MakeAndCheckWorkload<CopyMemGenericWorkload>(*layer1, factory);
-    auto workload2 = MakeAndCheckWorkload<CopyMemGenericWorkload>(*layer2, refFactory);
-
-    MemCopyQueueDescriptor queueDescriptor1 = workload1->GetData();
-    BOOST_TEST(queueDescriptor1.m_Inputs.size() == 1);
-    BOOST_TEST(queueDescriptor1.m_Outputs.size() == 1);
-    auto inputHandle1  = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor1.m_Inputs[0]);
-    auto outputHandle1 = boost::polymorphic_downcast<IComputeTensorHandle*>(queueDescriptor1.m_Outputs[0]);
-    BOOST_TEST((inputHandle1->GetTensorInfo() == TensorInfo({2, 3}, DataType::Float32)));
-    BOOST_TEST(CompareTensorHandleShape<IComputeTensorHandle>(outputHandle1, {2, 3}));
-
-
-    MemCopyQueueDescriptor queueDescriptor2 = workload2->GetData();
-    BOOST_TEST(queueDescriptor2.m_Inputs.size() == 1);
-    BOOST_TEST(queueDescriptor2.m_Outputs.size() == 1);
-    auto inputHandle2  = boost::polymorphic_downcast<IComputeTensorHandle*>(queueDescriptor2.m_Inputs[0]);
-    auto outputHandle2 = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor2.m_Outputs[0]);
-    BOOST_TEST(CompareTensorHandleShape<IComputeTensorHandle>(inputHandle2, {2, 3}));
-    BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({2, 3}, DataType::Float32)));
-}
-
-} //namespace
diff --git a/src/backends/aclCommon/test/MemCopyTestImpl.hpp b/src/backends/aclCommon/test/MemCopyTestImpl.hpp
deleted file mode 100644
index 570c6027f3..0000000000
--- a/src/backends/aclCommon/test/MemCopyTestImpl.hpp
+++ /dev/null
@@ -1,94 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-#pragma once
-
-#include <ResolveType.hpp>
-
-#include <armnn/backends/IBackendInternal.hpp>
-
-#include <backendsCommon/test/LayerTests.hpp>
-#include <backendsCommon/test/TensorCopyUtils.hpp>
-#include <backendsCommon/test/WorkloadFactoryHelper.hpp>
-#include <backendsCommon/test/WorkloadTestUtils.hpp>
-
-#include <test/TensorHelpers.hpp>
-
-#include <boost/multi_array.hpp>
-
-namespace
-{
-
-template<armnn::DataType dataType, typename T = armnn::ResolveType<dataType>>
-LayerTestResult<T, 4> MemCopyTest(armnn::IWorkloadFactory& srcWorkloadFactory,
-                                  armnn::IWorkloadFactory& dstWorkloadFactory,
-                                  bool withSubtensors)
-{
-    const std::array<unsigned int, 4> shapeData = { { 1u, 1u, 6u, 5u } };
-    const armnn::TensorShape tensorShape(4, shapeData.data());
-    const armnn::TensorInfo tensorInfo(tensorShape, dataType);
-    boost::multi_array<T, 4> inputData = MakeTensor<T, 4>(tensorInfo, std::vector<T>(
-        {
-             1,  2,  3,  4,  5,
-             6,  7,  8,  9, 10,
-            11, 12, 13, 14, 15,
-            16, 17, 18, 19, 20,
-            21, 22, 23, 24, 25,
-            26, 27, 28, 29, 30,
-        })
-    );
-
-    LayerTestResult<T, 4> ret(tensorInfo);
-    ret.outputExpected = inputData;
-
-    boost::multi_array<T, 4> outputData(shapeData);
-
-    auto inputTensorHandle = srcWorkloadFactory.CreateTensorHandle(tensorInfo);
-    auto outputTensorHandle = dstWorkloadFactory.CreateTensorHandle(tensorInfo);
-
-    AllocateAndCopyDataToITensorHandle(inputTensorHandle.get(), inputData.data());
-    outputTensorHandle->Allocate();
-
-    armnn::MemCopyQueueDescriptor memCopyQueueDesc;
-    armnn::WorkloadInfo workloadInfo;
-
-    const unsigned int origin[4] = {};
-
-    auto workloadInput = (withSubtensors && srcWorkloadFactory.SupportsSubTensors())
-                         ? srcWorkloadFactory.CreateSubTensorHandle(*inputTensorHandle, tensorShape, origin)
-                         : std::move(inputTensorHandle);
-    auto workloadOutput = (withSubtensors && dstWorkloadFactory.SupportsSubTensors())
-                          ? dstWorkloadFactory.CreateSubTensorHandle(*outputTensorHandle, tensorShape, origin)
-                          : std::move(outputTensorHandle);
-
-    AddInputToWorkload(memCopyQueueDesc, workloadInfo, tensorInfo, workloadInput.get());
-    AddOutputToWorkload(memCopyQueueDesc, workloadInfo, tensorInfo, workloadOutput.get());
-
-    dstWorkloadFactory.CreateMemCopy(memCopyQueueDesc, workloadInfo)->Execute();
-
-    CopyDataFromITensorHandle(outputData.data(), workloadOutput.get());
-    ret.output = outputData;
-
-    return ret;
-}
-
-template<typename SrcWorkloadFactory,
-         typename DstWorkloadFactory,
-         armnn::DataType dataType,
-         typename T = armnn::ResolveType<dataType>>
-LayerTestResult<T, 4> MemCopyTest(bool withSubtensors)
-{
-    armnn::IBackendInternal::IMemoryManagerSharedPtr srcMemoryManager =
-        WorkloadFactoryHelper<SrcWorkloadFactory>::GetMemoryManager();
-
-    armnn::IBackendInternal::IMemoryManagerSharedPtr dstMemoryManager =
-        WorkloadFactoryHelper<DstWorkloadFactory>::GetMemoryManager();
-
-    SrcWorkloadFactory srcWorkloadFactory = WorkloadFactoryHelper<SrcWorkloadFactory>::GetFactory(srcMemoryManager);
-    DstWorkloadFactory dstWorkloadFactory = WorkloadFactoryHelper<DstWorkloadFactory>::GetFactory(dstMemoryManager);
-
-    return MemCopyTest<dataType>(srcWorkloadFactory, dstWorkloadFactory, withSubtensors);
-}
-
-} // anonymous namespace
diff --git a/src/backends/aclCommon/test/MemCopyTests.cpp b/src/backends/aclCommon/test/MemCopyTests.cpp
deleted file mode 100644
index 3e26364354..0000000000
--- a/src/backends/aclCommon/test/MemCopyTests.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#include <aclCommon/ArmComputeTensorUtils.hpp>
-#include <aclCommon/test/MemCopyTestImpl.hpp>
-
-#if defined(ARMCOMPUTECL_ENABLED) && defined(ARMCOMPUTENEON_ENABLED)
-#include <cl/ClWorkloadFactory.hpp>
-#include <cl/test/ClContextControlFixture.hpp>
-#include <cl/test/ClWorkloadFactoryHelper.hpp>
-
-#include <neon/NeonWorkloadFactory.hpp>
-#include <neon/test/NeonWorkloadFactoryHelper.hpp>
-#endif
-
-#include <boost/test/unit_test.hpp>
-
-BOOST_AUTO_TEST_SUITE(MemCopyCommon)
-
-BOOST_AUTO_TEST_CASE(AclTypeConversions)
-{
-    arm_compute::Strides strides(1, 2, 3, 4);
-    armnn::TensorShape convertedStrides = armnn::armcomputetensorutils::GetStrides(strides);
-
-    BOOST_TEST(convertedStrides[0] == 4);
-    BOOST_TEST(convertedStrides[1] == 3);
-    BOOST_TEST(convertedStrides[2] == 2);
-    BOOST_TEST(convertedStrides[3] == 1);
-
-    arm_compute::TensorShape shape(5, 6, 7, 8);
-    armnn::TensorShape convertedshape = armnn::armcomputetensorutils::GetShape(shape);
-
-    BOOST_TEST(convertedshape[0] == 8);
-    BOOST_TEST(convertedshape[1] == 7);
-    BOOST_TEST(convertedshape[2] == 6);
-    BOOST_TEST(convertedshape[3] == 5);
-}
-
-BOOST_AUTO_TEST_SUITE_END()
-
-#if defined(ARMCOMPUTECL_ENABLED) && defined(ARMCOMPUTENEON_ENABLED)
-
-BOOST_FIXTURE_TEST_SUITE(MemCopyClNeon, ClContextControlFixture)
-
-BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndGpu)
-{
-    LayerTestResult<float, 4> result =
-        MemCopyTest<armnn::NeonWorkloadFactory, armnn::ClWorkloadFactory, armnn::DataType::Float32>(false);
-    BOOST_TEST(CompareTensors(result.output, result.outputExpected));
-}
-
-BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndNeon)
-{
-    LayerTestResult<float, 4> result =
-        MemCopyTest<armnn::ClWorkloadFactory, armnn::NeonWorkloadFactory, armnn::DataType::Float32>(false);
-    BOOST_TEST(CompareTensors(result.output, result.outputExpected));
-}
-
-BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndGpuWithSubtensors)
-{
-    LayerTestResult<float, 4> result =
-        MemCopyTest<armnn::NeonWorkloadFactory, armnn::ClWorkloadFactory, armnn::DataType::Float32>(true);
-    BOOST_TEST(CompareTensors(result.output, result.outputExpected));
-}
-
-BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndNeonWithSubtensors)
-{
-    LayerTestResult<float, 4> result =
-        MemCopyTest<armnn::ClWorkloadFactory, armnn::NeonWorkloadFactory, armnn::DataType::Float32>(true);
-    BOOST_TEST(CompareTensors(result.output, result.outputExpected));
-}
-
-BOOST_AUTO_TEST_SUITE_END()
-
-#endif