From 4fcda0101ec3d110c1d6d7bee5c83416b645528a Mon Sep 17 00:00:00 2001 From: telsoa01 Date: Fri, 9 Mar 2018 14:13:49 +0000 Subject: Release 18.02 Change-Id: Id3c11dc5ee94ef664374a988fcc6901e9a232fa6 --- src/armnn/backends/MemCopyWorkload.cpp | 256 +++++++++++++++++++++++++++++++++ 1 file changed, 256 insertions(+) create mode 100644 src/armnn/backends/MemCopyWorkload.cpp (limited to 'src/armnn/backends/MemCopyWorkload.cpp') diff --git a/src/armnn/backends/MemCopyWorkload.cpp b/src/armnn/backends/MemCopyWorkload.cpp new file mode 100644 index 0000000000..09ffd9a08a --- /dev/null +++ b/src/armnn/backends/MemCopyWorkload.cpp @@ -0,0 +1,256 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "MemCopyWorkload.hpp" +#include "backends/CpuTensorHandle.hpp" + +#if ARMCOMPUTECL_ENABLED +#include "backends/ClTensorHandle.hpp" +#endif + +#if ARMCOMPUTENEON_ENABLED +#include "backends/NeonTensorHandle.hpp" +#endif + +#include +#include + +namespace armnn +{ + +namespace +{ + +template +void GatherTensorHandlePairs(const MemCopyQueueDescriptor& descriptor, + std::vector>& tensorHandlePairs) +{ + const unsigned int numInputs = boost::numeric_cast(descriptor.m_Inputs.size()); + tensorHandlePairs.reserve(numInputs); + + for (unsigned int i = 0; i < numInputs; ++i) + { + SrcTensorHandleType* const srcTensorHandle = boost::polymorphic_downcast( + descriptor.m_Inputs[i]); + DstTensorHandleType* const dstTensorHandle = boost::polymorphic_downcast( + descriptor.m_Outputs[i]); + + tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle); + } +} + +void CopyFromCpuToCpu(const ConstCpuTensorHandle& srcHandle, CpuTensorHandle& dstHandle) +{ + const unsigned int numBytes = srcHandle.GetTensorInfo().GetNumBytes(); + const void* const input = srcHandle.GetConstTensor(); + void* const output = dstHandle.GetTensor(); + std::memcpy(output, input, numBytes); +} + +#if ARMCOMPUTECL_ENABLED || ARMCOMPUTENEON_ENABLED + +#include "backends/ArmComputeTensorUtils.hpp" + +template +void CopyFromCpuToAclBackend(const ConstCpuTensorHandle& srcHandle, arm_compute::ITensor& dstAclTensor) +{ + using T = ResolveType; + armnn::armcomputetensorutils::CopyArmComputeITensorData(srcHandle.GetConstTensor(), dstAclTensor); +} + +template +void CopyFromAclBackendToCpu(const arm_compute::ITensor& srcAclTensor, CpuTensorHandle& dstHandle) +{ + using T = ResolveType; + armnn::armcomputetensorutils::CopyArmComputeITensorData(srcAclTensor, dstHandle.GetTensor()); +} + +#endif // ARMCOMPUTECL_ENABLED || ARMCOMPUTENEON_ENABLED + +} + +template +CopyFromCpuToCpuWorkload::CopyFromCpuToCpuWorkload(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload(descriptor, info) +{ + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +template +void CopyFromCpuToCpuWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "CopyFromCpuToCpuWorkload_Execute"); + + for (const auto& pair : m_TensorHandlePairs) + { + CopyFromCpuToCpu(*pair.first, *pair.second); + } +} + +template class CopyFromCpuToCpuWorkload; +template class CopyFromCpuToCpuWorkload; + +#if ARMCOMPUTECL_ENABLED + +template +CopyFromCpuToClWorkload::CopyFromCpuToClWorkload(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload(descriptor, info) +{ + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +template +void CopyFromCpuToClWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "CopyFromCpuToClWorkload_Execute"); + + for (const auto& pair : m_TensorHandlePairs) + { + IClTensorHandle& handle = *pair.second; + + handle.Map(true); + CopyFromCpuToAclBackend(*pair.first, handle.GetTensor()); + handle.UnMap(); + } +} + +template class CopyFromCpuToClWorkload; +template class CopyFromCpuToClWorkload; + + +template +CopyFromClToCpuWorkload::CopyFromClToCpuWorkload(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload(descriptor, info) +{ + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +template +void CopyFromClToCpuWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "CopyFromClToCpuWorkload_Execute"); + + for (const auto& pair : m_TensorHandlePairs) + { + IClTensorHandle& handle = *pair.first; + + handle.Map(true); + CopyFromAclBackendToCpu(handle.GetTensor(), *pair.second); + handle.UnMap(); + } +} + +template class CopyFromClToCpuWorkload; +template class CopyFromClToCpuWorkload; + +#endif // ARMCOMPUTECL_ENABLED + +#if ARMCOMPUTENEON_ENABLED + +template +CopyFromCpuToNeonWorkload::CopyFromCpuToNeonWorkload(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload(descriptor, info) +{ + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +template +void CopyFromCpuToNeonWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "CopyFromCpuToNeonWorkload_Execute"); + + for (const auto& pair : m_TensorHandlePairs) + { + CopyFromCpuToAclBackend(*pair.first, pair.second->GetTensor()); + } +} + +template class CopyFromCpuToNeonWorkload; +template class CopyFromCpuToNeonWorkload; + +template +CopyFromNeonToCpuWorkload::CopyFromNeonToCpuWorkload(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload(descriptor, info) +{ + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +template +void CopyFromNeonToCpuWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "CopyFromNeonToCpuWorkload_Execute"); + + for (const auto& pair : m_TensorHandlePairs) + { + CopyFromAclBackendToCpu(pair.first->GetTensor(), *pair.second); + } +} + +template class CopyFromNeonToCpuWorkload; +template class CopyFromNeonToCpuWorkload; + +#endif // ARMCOMPUTENEON_ENABLED + +#if ARMCOMPUTECL_ENABLED && ARMCOMPUTENEON_ENABLED + +template +CopyFromNeonToClWorkload::CopyFromNeonToClWorkload(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload(descriptor, info) +{ + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +template +void CopyFromNeonToClWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "CopyFromNeonToClWorkload_Execute"); + + for (const auto& pair : m_TensorHandlePairs) + { + IClTensorHandle& handle = *pair.second; + + handle.Map(true); + handle.GetTensor().copy_from(pair.first->GetTensor()); + handle.UnMap(); + } +} + +template class CopyFromNeonToClWorkload; +template class CopyFromNeonToClWorkload; + +template +CopyFromClToNeonWorkload::CopyFromClToNeonWorkload(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload(descriptor, info) +{ + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +template +void CopyFromClToNeonWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "CopyFromClToNeonWorkload_Execute"); + + for (const auto& pair : m_TensorHandlePairs) + { + IClTensorHandle& handle = *pair.first; + + handle.Map(true); + pair.second->GetTensor().copy_from(handle.GetTensor()); + handle.UnMap(); + } +} + +template class CopyFromClToNeonWorkload; +template class CopyFromClToNeonWorkload; + +#endif // ARMCOMPUTECL_ENABLED && ARMCOMPUTENEON_ENABLED + +} -- cgit v1.2.1