aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/backends/ClWorkloadFactory.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/armnn/backends/ClWorkloadFactory.cpp')
-rw-r--r--src/armnn/backends/ClWorkloadFactory.cpp110
1 files changed, 83 insertions, 27 deletions
diff --git a/src/armnn/backends/ClWorkloadFactory.cpp b/src/armnn/backends/ClWorkloadFactory.cpp
index 916ca46aae..354440c7bc 100644
--- a/src/armnn/backends/ClWorkloadFactory.cpp
+++ b/src/armnn/backends/ClWorkloadFactory.cpp
@@ -15,9 +15,13 @@
#include <arm_compute/core/CL/CLKernelLibrary.h>
#include <arm_compute/runtime/CL/CLBufferAllocator.h>
#include <arm_compute/runtime/CL/CLScheduler.h>
+
+#include "ClWorkloads.hpp"
+
#include "backends/MemCopyWorkload.hpp"
#include "backends/ClTensorHandle.hpp"
-#include "ClWorkloads.hpp"
+
+#include "memory/IPoolManager.hpp"
#endif
#include "MakeWorkloadHelper.hpp"
@@ -29,7 +33,9 @@
namespace armnn
{
-bool ClWorkloadFactory::IsLayerSupported(const Layer& layer, DataType dataType, std::string& outReasonIfUnsupported)
+bool ClWorkloadFactory::IsLayerSupported(const Layer& layer,
+ boost::optional<DataType> dataType,
+ std::string& outReasonIfUnsupported)
{
return IWorkloadFactory::IsLayerSupported(Compute::GpuAcc, layer, dataType, outReasonIfUnsupported);
}
@@ -43,7 +49,10 @@ ClWorkloadFactory::ClWorkloadFactory()
std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
{
- return std::make_unique<ClTensorHandle>(tensorInfo);
+ std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo);
+ tensorHandle->SetMemoryGroup(m_MemoryManager.GetInterLayerMemoryGroup());
+
+ return tensorHandle;
}
std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
@@ -58,24 +67,25 @@ std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateSubTensorHandle(ITensorH
coords.set_num_dimensions(subTensorShape.GetNumDimensions());
for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++)
{
- // arm compute indexes tensor coords in reverse order
+ // Arm compute indexes tensor coords in reverse order.
unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1;
coords.set(i, boost::numeric_cast<int>(subTensorOrigin[revertedIndex]));
}
- return std::make_unique<ClSubTensorHandle>(static_cast<ClTensorHandle&>(parent).GetTensor(), shape, coords);
+ return std::make_unique<ClSubTensorHandle>(
+ boost::polymorphic_downcast<IClTensorHandle*>(&parent), shape, coords);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<CopyFromCpuToClFloat32Workload, CopyFromCpuToClUint8Workload>(descriptor, info);
+ return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<CopyFromClToCpuFloat32Workload, CopyFromClToCpuUint8Workload>(descriptor, info);
+ return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
@@ -87,7 +97,8 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateActivation(const ActivationQ
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClSoftmaxFloat32Workload, ClSoftmaxUint8Workload>(descriptor, info, m_MemoryManager.Get());
+ return MakeWorkload<ClSoftmaxFloat32Workload, ClSoftmaxUint8Workload>(descriptor, info,
+ m_MemoryManager.GetIntraLayerManager());
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
@@ -105,13 +116,14 @@ std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateMerger(const MergerQu
std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateFullyConnected(
const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const
{
- return MakeWorkload<ClFullyConnectedFloat32Workload, NullWorkload>(descriptor, info, m_MemoryManager.Get());
+ return MakeWorkload<ClFullyConnectedFloat32Workload, NullWorkload>(descriptor, info,
+ m_MemoryManager.GetIntraLayerManager());
}
std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClPermuteFloat32Workload, ClPermuteUint8Workload>(descriptor, info);
+ return MakeWorkload<ClPermuteFloatWorkload, ClPermuteUint8Workload>(descriptor, info);
}
std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
@@ -124,7 +136,7 @@ std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateConvolution2d(const C
const WorkloadInfo& info) const
{
return MakeWorkload<ClConvolution2dFloat32Workload, ClConvolution2dUint8Workload>(descriptor, info,
- m_MemoryManager.Get());
+ m_MemoryManager.GetIntraLayerManager());
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthwiseConvolution2d(
@@ -142,7 +154,7 @@ std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateNormalization(const N
std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClAdditionFloat32Workload, NullWorkload>(descriptor, info);
+ return MakeWorkload<ClAdditionFloat32Workload, ClAdditionUint8Workload>(descriptor, info);
}
std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateMultiplication(
@@ -165,21 +177,7 @@ std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateMemCopy(const MemCopy
throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemCopy workload");
}
- // Create a workload that will copy tensor data from the inputs, which can have a number of different formats,
- // to CL tensors.
- switch (descriptor.m_Inputs[0]->GetType())
- {
- case ITensorHandle::Cpu:
- return MakeWorkload<CopyFromCpuToClFloat32Workload, CopyFromCpuToClUint8Workload>(descriptor, info);
-#if ARMCOMPUTENEON_ENABLED
- case ITensorHandle::Neon:
- {
- return MakeWorkload<CopyFromNeonToClFloat32Workload, CopyFromNeonToClUint8Workload>(descriptor, info);
- }
-#endif
- default:
- throw InvalidArgumentException("ClWorkloadFactory: Destination type not supported for MemCopy Workload.");
- }
+ return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info);
}
std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateResizeBilinear(
@@ -220,11 +218,41 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFloor(const FloorQueueDescri
return MakeWorkload<ClFloorFloat32Workload, NullWorkload>(descriptor, info);
}
+std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const
+{
+ return MakeWorkload<ClLstmFloat32Workload, NullWorkload>(descriptor, info);
+}
+
+std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp16ToFp32(
+ const ConvertFp16ToFp32QueueDescriptor& descriptor,
+ const WorkloadInfo& info) const
+{
+ return std::make_unique<ClConvertFp16ToFp32Workload>(descriptor, info);
+}
+
+std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp32ToFp16(
+ const ConvertFp32ToFp16QueueDescriptor& descriptor,
+ const WorkloadInfo& info) const
+{
+ return std::make_unique<ClConvertFp32ToFp16Workload>(descriptor, info);
+}
+
void ClWorkloadFactory::Finalize()
{
m_MemoryManager.Finalize();
}
+void ClWorkloadFactory::Release()
+{
+ m_MemoryManager.Release();
+}
+
+void ClWorkloadFactory::Acquire()
+{
+ m_MemoryManager.Acquire();
+}
+
#else // #if ARMCOMPUTECL_ENABLED
ClWorkloadFactory::ClWorkloadFactory()
@@ -375,10 +403,38 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFloor(const FloorQueueDescri
return nullptr;
}
+std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const
+{
+ return nullptr;
+}
+
+std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp16ToFp32(
+ const ConvertFp16ToFp32QueueDescriptor& descriptor,
+ const WorkloadInfo& info) const
+{
+ return nullptr;
+}
+
+std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp32ToFp16(
+ const ConvertFp32ToFp16QueueDescriptor& descriptor,
+ const WorkloadInfo& info) const
+{
+ return nullptr;
+}
+
void ClWorkloadFactory::Finalize()
{
}
+void ClWorkloadFactory::Release()
+{
+}
+
+void ClWorkloadFactory::Acquire()
+{
+}
+
#endif // #if ARMCOMPUTECL_ENABLED
} // namespace armnn