diff options
Diffstat (limited to 'src/armnn/backends/MemCopyWorkload.cpp')
-rw-r--r-- | src/armnn/backends/MemCopyWorkload.cpp | 223 |
1 files changed, 14 insertions, 209 deletions
diff --git a/src/armnn/backends/MemCopyWorkload.cpp b/src/armnn/backends/MemCopyWorkload.cpp index 09ffd9a08a..27e60f93b7 100644 --- a/src/armnn/backends/MemCopyWorkload.cpp +++ b/src/armnn/backends/MemCopyWorkload.cpp @@ -4,14 +4,7 @@ // #include "MemCopyWorkload.hpp" #include "backends/CpuTensorHandle.hpp" - -#if ARMCOMPUTECL_ENABLED -#include "backends/ClTensorHandle.hpp" -#endif - -#if ARMCOMPUTENEON_ENABLED -#include "backends/NeonTensorHandle.hpp" -#endif +#include "TypeUtils.hpp" #include <cstring> #include <boost/cast.hpp> @@ -26,7 +19,7 @@ template <typename SrcTensorHandleType, typename DstTensorHandleType> void GatherTensorHandlePairs(const MemCopyQueueDescriptor& descriptor, std::vector<std::pair<SrcTensorHandleType*, DstTensorHandleType*>>& tensorHandlePairs) { - const unsigned int numInputs = boost::numeric_cast<unsigned int>(descriptor.m_Inputs.size()); + const unsigned int numInputs = static_cast<unsigned int>(descriptor.m_Inputs.size()); tensorHandlePairs.reserve(numInputs); for (unsigned int i = 0; i < numInputs; ++i) @@ -40,217 +33,29 @@ void GatherTensorHandlePairs(const MemCopyQueueDescriptor& descriptor, } } -void CopyFromCpuToCpu(const ConstCpuTensorHandle& srcHandle, CpuTensorHandle& dstHandle) -{ - const unsigned int numBytes = srcHandle.GetTensorInfo().GetNumBytes(); - const void* const input = srcHandle.GetConstTensor<void>(); - void* const output = dstHandle.GetTensor<void>(); - std::memcpy(output, input, numBytes); -} - -#if ARMCOMPUTECL_ENABLED || ARMCOMPUTENEON_ENABLED - -#include "backends/ArmComputeTensorUtils.hpp" - -template <armnn::DataType DataType> -void CopyFromCpuToAclBackend(const ConstCpuTensorHandle& srcHandle, arm_compute::ITensor& dstAclTensor) -{ - using T = ResolveType<DataType>; - armnn::armcomputetensorutils::CopyArmComputeITensorData(srcHandle.GetConstTensor<T>(), dstAclTensor); -} - -template <armnn::DataType DataType> -void CopyFromAclBackendToCpu(const arm_compute::ITensor& srcAclTensor, CpuTensorHandle& dstHandle) -{ - using T = ResolveType<DataType>; - armnn::armcomputetensorutils::CopyArmComputeITensorData(srcAclTensor, dstHandle.GetTensor<T>()); -} - -#endif // ARMCOMPUTECL_ENABLED || ARMCOMPUTENEON_ENABLED - -} - -template <armnn::DataType DataType> -CopyFromCpuToCpuWorkload<DataType>::CopyFromCpuToCpuWorkload(const MemCopyQueueDescriptor& descriptor, - const WorkloadInfo& info) - : TypedWorkload<MemCopyQueueDescriptor, DataType>(descriptor, info) -{ - GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); -} - -template <armnn::DataType DataType> -void CopyFromCpuToCpuWorkload<DataType>::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "CopyFromCpuToCpuWorkload_Execute"); - - for (const auto& pair : m_TensorHandlePairs) - { - CopyFromCpuToCpu(*pair.first, *pair.second); - } -} - -template class CopyFromCpuToCpuWorkload<DataType::Float32>; -template class CopyFromCpuToCpuWorkload<DataType::QuantisedAsymm8>; - -#if ARMCOMPUTECL_ENABLED - -template <armnn::DataType DataType> -CopyFromCpuToClWorkload<DataType>::CopyFromCpuToClWorkload(const MemCopyQueueDescriptor& descriptor, - const WorkloadInfo& info) - : TypedWorkload<MemCopyQueueDescriptor, DataType>(descriptor, info) -{ - GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); -} - -template <armnn::DataType DataType> -void CopyFromCpuToClWorkload<DataType>::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "CopyFromCpuToClWorkload_Execute"); - - for (const auto& pair : m_TensorHandlePairs) - { - IClTensorHandle& handle = *pair.second; - - handle.Map(true); - CopyFromCpuToAclBackend<DataType>(*pair.first, handle.GetTensor()); - handle.UnMap(); - } -} - -template class CopyFromCpuToClWorkload<DataType::Float32>; -template class CopyFromCpuToClWorkload<DataType::QuantisedAsymm8>; - - -template <armnn::DataType DataType> -CopyFromClToCpuWorkload<DataType>::CopyFromClToCpuWorkload(const MemCopyQueueDescriptor& descriptor, - const WorkloadInfo& info) - : TypedWorkload<MemCopyQueueDescriptor, DataType>(descriptor, info) -{ - GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); -} - -template <armnn::DataType DataType> -void CopyFromClToCpuWorkload<DataType>::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "CopyFromClToCpuWorkload_Execute"); - - for (const auto& pair : m_TensorHandlePairs) - { - IClTensorHandle& handle = *pair.first; - - handle.Map(true); - CopyFromAclBackendToCpu<DataType>(handle.GetTensor(), *pair.second); - handle.UnMap(); - } -} - -template class CopyFromClToCpuWorkload<DataType::Float32>; -template class CopyFromClToCpuWorkload<DataType::QuantisedAsymm8>; - -#endif // ARMCOMPUTECL_ENABLED +} //namespace -#if ARMCOMPUTENEON_ENABLED -template <armnn::DataType DataType> -CopyFromCpuToNeonWorkload<DataType>::CopyFromCpuToNeonWorkload(const MemCopyQueueDescriptor& descriptor, - const WorkloadInfo& info) - : TypedWorkload<MemCopyQueueDescriptor, DataType>(descriptor, info) +CopyMemGenericWorkload::CopyMemGenericWorkload(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) + : BaseWorkload<MemCopyQueueDescriptor>(descriptor, info) { GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); } -template <armnn::DataType DataType> -void CopyFromCpuToNeonWorkload<DataType>::Execute() const +void CopyMemGenericWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "CopyFromCpuToNeonWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "CopyMemGeneric_Execute"); - for (const auto& pair : m_TensorHandlePairs) - { - CopyFromCpuToAclBackend<DataType>(*pair.first, pair.second->GetTensor()); - } -} - -template class CopyFromCpuToNeonWorkload<DataType::Float32>; -template class CopyFromCpuToNeonWorkload<DataType::QuantisedAsymm8>; - -template <armnn::DataType DataType> -CopyFromNeonToCpuWorkload<DataType>::CopyFromNeonToCpuWorkload(const MemCopyQueueDescriptor& descriptor, - const WorkloadInfo& info) - : TypedWorkload<MemCopyQueueDescriptor, DataType>(descriptor, info) -{ - GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); -} - -template <armnn::DataType DataType> -void CopyFromNeonToCpuWorkload<DataType>::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "CopyFromNeonToCpuWorkload_Execute"); + auto copyFunc = [](void* dst, const void* src, size_t size) + { + memcpy(dst, src, size); + }; for (const auto& pair : m_TensorHandlePairs) { - CopyFromAclBackendToCpu<DataType>(pair.first->GetTensor(), *pair.second); + CopyTensorContentsGeneric(pair.first, pair.second, copyFunc); } } -template class CopyFromNeonToCpuWorkload<DataType::Float32>; -template class CopyFromNeonToCpuWorkload<DataType::QuantisedAsymm8>; - -#endif // ARMCOMPUTENEON_ENABLED - -#if ARMCOMPUTECL_ENABLED && ARMCOMPUTENEON_ENABLED - -template <armnn::DataType DataType> -CopyFromNeonToClWorkload<DataType>::CopyFromNeonToClWorkload(const MemCopyQueueDescriptor& descriptor, - const WorkloadInfo& info) - : TypedWorkload<MemCopyQueueDescriptor, DataType>(descriptor, info) -{ - GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); -} - -template <armnn::DataType DataType> -void CopyFromNeonToClWorkload<DataType>::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "CopyFromNeonToClWorkload_Execute"); - - for (const auto& pair : m_TensorHandlePairs) - { - IClTensorHandle& handle = *pair.second; - - handle.Map(true); - handle.GetTensor().copy_from(pair.first->GetTensor()); - handle.UnMap(); - } -} - -template class CopyFromNeonToClWorkload<DataType::Float32>; -template class CopyFromNeonToClWorkload<DataType::QuantisedAsymm8>; - -template <armnn::DataType DataType> -CopyFromClToNeonWorkload<DataType>::CopyFromClToNeonWorkload(const MemCopyQueueDescriptor& descriptor, - const WorkloadInfo& info) - : TypedWorkload<MemCopyQueueDescriptor, DataType>(descriptor, info) -{ - GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); -} - -template <armnn::DataType DataType> -void CopyFromClToNeonWorkload<DataType>::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "CopyFromClToNeonWorkload_Execute"); - - for (const auto& pair : m_TensorHandlePairs) - { - IClTensorHandle& handle = *pair.first; - - handle.Map(true); - pair.second->GetTensor().copy_from(handle.GetTensor()); - handle.UnMap(); - } -} - -template class CopyFromClToNeonWorkload<DataType::Float32>; -template class CopyFromClToNeonWorkload<DataType::QuantisedAsymm8>; - -#endif // ARMCOMPUTECL_ENABLED && ARMCOMPUTENEON_ENABLED - -} +} //namespace armnn |