// // Copyright © 2017 Arm Ltd. All rights reserved. // See LICENSE file in the project root for full license information. // #include "MemCopyWorkload.hpp" #include "backends/CpuTensorHandle.hpp" #if ARMCOMPUTECL_ENABLED #include "backends/ClTensorHandle.hpp" #endif #if ARMCOMPUTENEON_ENABLED #include "backends/NeonTensorHandle.hpp" #endif #include #include namespace armnn { namespace { template void GatherTensorHandlePairs(const MemCopyQueueDescriptor& descriptor, std::vector>& tensorHandlePairs) { const unsigned int numInputs = boost::numeric_cast(descriptor.m_Inputs.size()); tensorHandlePairs.reserve(numInputs); for (unsigned int i = 0; i < numInputs; ++i) { SrcTensorHandleType* const srcTensorHandle = boost::polymorphic_downcast( descriptor.m_Inputs[i]); DstTensorHandleType* const dstTensorHandle = boost::polymorphic_downcast( descriptor.m_Outputs[i]); tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle); } } void CopyFromCpuToCpu(const ConstCpuTensorHandle& srcHandle, CpuTensorHandle& dstHandle) { const unsigned int numBytes = srcHandle.GetTensorInfo().GetNumBytes(); const void* const input = srcHandle.GetConstTensor(); void* const output = dstHandle.GetTensor(); std::memcpy(output, input, numBytes); } #if ARMCOMPUTECL_ENABLED || ARMCOMPUTENEON_ENABLED #include "backends/ArmComputeTensorUtils.hpp" template void CopyFromCpuToAclBackend(const ConstCpuTensorHandle& srcHandle, arm_compute::ITensor& dstAclTensor) { using T = ResolveType; armnn::armcomputetensorutils::CopyArmComputeITensorData(srcHandle.GetConstTensor(), dstAclTensor); } template void CopyFromAclBackendToCpu(const arm_compute::ITensor& srcAclTensor, CpuTensorHandle& dstHandle) { using T = ResolveType; armnn::armcomputetensorutils::CopyArmComputeITensorData(srcAclTensor, dstHandle.GetTensor()); } #endif // ARMCOMPUTECL_ENABLED || ARMCOMPUTENEON_ENABLED } template CopyFromCpuToCpuWorkload::CopyFromCpuToCpuWorkload(const MemCopyQueueDescriptor& descriptor, const WorkloadInfo& info) : TypedWorkload(descriptor, info) { GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); } template void CopyFromCpuToCpuWorkload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "CopyFromCpuToCpuWorkload_Execute"); for (const auto& pair : m_TensorHandlePairs) { CopyFromCpuToCpu(*pair.first, *pair.second); } } template class CopyFromCpuToCpuWorkload; template class CopyFromCpuToCpuWorkload; #if ARMCOMPUTECL_ENABLED template CopyFromCpuToClWorkload::CopyFromCpuToClWorkload(const MemCopyQueueDescriptor& descriptor, const WorkloadInfo& info) : TypedWorkload(descriptor, info) { GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); } template void CopyFromCpuToClWorkload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "CopyFromCpuToClWorkload_Execute"); for (const auto& pair : m_TensorHandlePairs) { IClTensorHandle& handle = *pair.second; handle.Map(true); CopyFromCpuToAclBackend(*pair.first, handle.GetTensor()); handle.UnMap(); } } template class CopyFromCpuToClWorkload; template class CopyFromCpuToClWorkload; template CopyFromClToCpuWorkload::CopyFromClToCpuWorkload(const MemCopyQueueDescriptor& descriptor, const WorkloadInfo& info) : TypedWorkload(descriptor, info) { GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); } template void CopyFromClToCpuWorkload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "CopyFromClToCpuWorkload_Execute"); for (const auto& pair : m_TensorHandlePairs) { IClTensorHandle& handle = *pair.first; handle.Map(true); CopyFromAclBackendToCpu(handle.GetTensor(), *pair.second); handle.UnMap(); } } template class CopyFromClToCpuWorkload; template class CopyFromClToCpuWorkload; #endif // ARMCOMPUTECL_ENABLED #if ARMCOMPUTENEON_ENABLED template CopyFromCpuToNeonWorkload::CopyFromCpuToNeonWorkload(const MemCopyQueueDescriptor& descriptor, const WorkloadInfo& info) : TypedWorkload(descriptor, info) { GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); } template void CopyFromCpuToNeonWorkload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "CopyFromCpuToNeonWorkload_Execute"); for (const auto& pair : m_TensorHandlePairs) { CopyFromCpuToAclBackend(*pair.first, pair.second->GetTensor()); } } template class CopyFromCpuToNeonWorkload; template class CopyFromCpuToNeonWorkload; template CopyFromNeonToCpuWorkload::CopyFromNeonToCpuWorkload(const MemCopyQueueDescriptor& descriptor, const WorkloadInfo& info) : TypedWorkload(descriptor, info) { GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); } template void CopyFromNeonToCpuWorkload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "CopyFromNeonToCpuWorkload_Execute"); for (const auto& pair : m_TensorHandlePairs) { CopyFromAclBackendToCpu(pair.first->GetTensor(), *pair.second); } } template class CopyFromNeonToCpuWorkload; template class CopyFromNeonToCpuWorkload; #endif // ARMCOMPUTENEON_ENABLED #if ARMCOMPUTECL_ENABLED && ARMCOMPUTENEON_ENABLED template CopyFromNeonToClWorkload::CopyFromNeonToClWorkload(const MemCopyQueueDescriptor& descriptor, const WorkloadInfo& info) : TypedWorkload(descriptor, info) { GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); } template void CopyFromNeonToClWorkload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "CopyFromNeonToClWorkload_Execute"); for (const auto& pair : m_TensorHandlePairs) { IClTensorHandle& handle = *pair.second; handle.Map(true); handle.GetTensor().copy_from(pair.first->GetTensor()); handle.UnMap(); } } template class CopyFromNeonToClWorkload; template class CopyFromNeonToClWorkload; template CopyFromClToNeonWorkload::CopyFromClToNeonWorkload(const MemCopyQueueDescriptor& descriptor, const WorkloadInfo& info) : TypedWorkload(descriptor, info) { GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); } template void CopyFromClToNeonWorkload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "CopyFromClToNeonWorkload_Execute"); for (const auto& pair : m_TensorHandlePairs) { IClTensorHandle& handle = *pair.first; handle.Map(true); pair.second->GetTensor().copy_from(handle.GetTensor()); handle.UnMap(); } } template class CopyFromClToNeonWorkload; template class CopyFromClToNeonWorkload; #endif // ARMCOMPUTECL_ENABLED && ARMCOMPUTENEON_ENABLED }