aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSadik Armagan <sadik.armagan@arm.com>2020-09-14 15:44:18 +0100
committerSadik Armagan <sadik.armagan@arm.com>2020-09-14 14:42:59 +0000
commit04a729708f986b1a69c1efc42d5cf18271cfae1e (patch)
treed39be642c3b5bfe9f9520ae69889b20dade10406
parent80c6b146046252af153af27025a28fb59d33c5e6 (diff)
downloadarmnn-04a729708f986b1a69c1efc42d5cf18271cfae1e.tar.gz
IVGCVSW-5157 'Pipe ModelOption through Network::LoadNetwork() to Workload factory'
* Pass ModelOptions to WorkloadFactory * Updated signature of CL and NEON Convolution2d workloads added FastMathEnabled param. Signed-off-by: Sadik Armagan <sadik.armagan@arm.com> Change-Id: I536178be8e4dd4083489e69febadaf0feeba46d2
-rw-r--r--include/armnn/backends/IBackendInternal.hpp8
-rw-r--r--src/armnn/LoadedNetwork.cpp11
-rw-r--r--src/armnn/test/CreateWorkload.hpp11
-rw-r--r--src/backends/backendsCommon/IBackendInternal.cpp22
-rw-r--r--src/backends/backendsCommon/WorkloadFactory.cpp13
-rw-r--r--src/backends/backendsCommon/WorkloadFactory.hpp6
-rw-r--r--src/backends/cl/ClBackend.cpp19
-rw-r--r--src/backends/cl/ClBackend.hpp6
-rw-r--r--src/backends/cl/ClWorkloadFactory.cpp34
-rw-r--r--src/backends/cl/ClWorkloadFactory.hpp11
-rw-r--r--src/backends/cl/test/ClCreateWorkloadTests.cpp31
-rw-r--r--src/backends/cl/test/ClWorkloadFactoryHelper.hpp7
-rw-r--r--src/backends/cl/workloads/ClConvolution2dWorkload.cpp24
-rw-r--r--src/backends/cl/workloads/ClConvolution2dWorkload.hpp10
-rw-r--r--src/backends/neon/NeonBackend.cpp20
-rw-r--r--src/backends/neon/NeonBackend.hpp6
-rw-r--r--src/backends/neon/NeonLayerSupport.cpp2
-rw-r--r--src/backends/neon/NeonWorkloadFactory.cpp35
-rw-r--r--src/backends/neon/NeonWorkloadFactory.hpp10
-rw-r--r--src/backends/neon/test/NeonCreateWorkloadTests.cpp29
-rw-r--r--src/backends/neon/test/NeonWorkloadFactoryHelper.hpp8
-rw-r--r--src/backends/neon/workloads/NeonConvolution2dWorkload.cpp25
-rw-r--r--src/backends/neon/workloads/NeonConvolution2dWorkload.hpp10
-rw-r--r--src/backends/reference/RefWorkloadFactory.cpp8
-rw-r--r--src/backends/reference/RefWorkloadFactory.hpp5
25 files changed, 344 insertions, 27 deletions
diff --git a/include/armnn/backends/IBackendInternal.hpp b/include/armnn/backends/IBackendInternal.hpp
index ee9cb49562..4815529d6d 100644
--- a/include/armnn/backends/IBackendInternal.hpp
+++ b/include/armnn/backends/IBackendInternal.hpp
@@ -118,6 +118,14 @@ public:
virtual IWorkloadFactoryPtr CreateWorkloadFactory(
class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const;
+ virtual IWorkloadFactoryPtr CreateWorkloadFactory(
+ const IMemoryManagerSharedPtr& memoryManager,
+ const ModelOptions& modelOptions) const;
+
+ virtual IWorkloadFactoryPtr CreateWorkloadFactory(
+ class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry,
+ const ModelOptions& modelOptions) const;
+
/// Create the runtime context of the backend
///
/// Implementations may return a default-constructed IBackendContextPtr if
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 4a293b92d9..7b64a88470 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -146,14 +146,16 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
if (backend->SupportsTensorAllocatorAPI())
{
- auto workloadFactory = backend->CreateWorkloadFactory(m_TensorHandleFactoryRegistry);
+ auto workloadFactory = backend->CreateWorkloadFactory(
+ m_TensorHandleFactoryRegistry, m_OptimizedNetwork->GetModelOptions());
m_WorkloadFactories.emplace(
std::make_pair(backendId, std::make_pair(std::move(workloadFactory), nullptr)));
}
else
{
IBackendInternal::IMemoryManagerSharedPtr memoryManager = backend->CreateMemoryManager();
- auto workloadFactory = backend->CreateWorkloadFactory(memoryManager);
+ auto workloadFactory = backend->CreateWorkloadFactory(
+ memoryManager, m_OptimizedNetwork->GetModelOptions());
m_WorkloadFactories.emplace(
std::make_pair(backendId, std::make_pair(std::move(workloadFactory), memoryManager)));
@@ -361,7 +363,10 @@ const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) co
ARMNN_ASSERT_MSG(workloadFactory, "No workload factory");
std::string reasonIfUnsupported;
- ARMNN_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(layer, {}, reasonIfUnsupported),
+ ARMNN_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(layer,
+ {},
+ reasonIfUnsupported,
+ m_OptimizedNetwork->GetModelOptions()),
"Factory does not support layer");
IgnoreUnused(reasonIfUnsupported);
return *workloadFactory;
diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp
index fe73550bbc..b13b9b538e 100644
--- a/src/armnn/test/CreateWorkload.hpp
+++ b/src/armnn/test/CreateWorkload.hpp
@@ -32,14 +32,16 @@ using namespace std;
// Calls CreateWorkload for a layer, and checks the returned pointer is of the correct type.
template<typename Workload>
-std::unique_ptr<Workload> MakeAndCheckWorkload(Layer& layer, const IWorkloadFactory& factory)
+std::unique_ptr<Workload> MakeAndCheckWorkload(Layer& layer,
+ const IWorkloadFactory& factory,
+ const ModelOptions& modelOptions = {})
{
std::unique_ptr<IWorkload> workload = layer.CreateWorkload(factory);
BOOST_TEST(workload.get() == PolymorphicDowncast<Workload*>(workload.get()),
"Cannot convert to derived class");
std::string reasonIfUnsupported;
layer.SetBackendId(factory.GetBackendId());
- BOOST_TEST(factory.IsLayerSupported(layer, layer.GetDataType(), reasonIfUnsupported));
+ BOOST_TEST(factory.IsLayerSupported(layer, layer.GetDataType(), reasonIfUnsupported, modelOptions));
return std::unique_ptr<Workload>(static_cast<Workload*>(workload.release()));
}
@@ -220,7 +222,8 @@ std::unique_ptr<BatchNormalizationWorkloadType> CreateBatchNormalizationWorkload
template <typename Convolution2dWorkload, armnn::DataType DataType>
std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadTest(armnn::IWorkloadFactory& factory,
armnn::Graph& graph,
- DataLayout dataLayout = DataLayout::NCHW)
+ DataLayout dataLayout = DataLayout::NCHW,
+ const ModelOptions& modelOptions = {})
{
// Creates the layer we're testing.
Convolution2dDescriptor layerDesc;
@@ -255,7 +258,7 @@ std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadTest(armnn::IW
CreateTensorHandles(graph, factory);
// Makes the workload and checks it.
- auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory);
+ auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory, modelOptions);
Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
BOOST_TEST(queueDescriptor.m_Parameters.m_StrideX == 2);
diff --git a/src/backends/backendsCommon/IBackendInternal.cpp b/src/backends/backendsCommon/IBackendInternal.cpp
index 1cca61efa5..08060331e7 100644
--- a/src/backends/backendsCommon/IBackendInternal.cpp
+++ b/src/backends/backendsCommon/IBackendInternal.cpp
@@ -39,6 +39,28 @@ IBackendInternal::IWorkloadFactoryPtr IBackendInternal::CreateWorkloadFactory(
return IWorkloadFactoryPtr{};
}
+IBackendInternal::IWorkloadFactoryPtr IBackendInternal::CreateWorkloadFactory(
+ const IMemoryManagerSharedPtr& memoryManager,
+ const ModelOptions& modelOptions) const
+{
+ if(modelOptions.empty())
+ {
+ return CreateWorkloadFactory(memoryManager);
+ }
+ return IWorkloadFactoryPtr{};
+}
+
+IBackendInternal::IWorkloadFactoryPtr IBackendInternal::CreateWorkloadFactory(
+ class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry,
+ const ModelOptions& modelOptions) const
+{
+ if(modelOptions.empty())
+ {
+ return CreateWorkloadFactory(tensorHandleFactoryRegistry);
+ }
+ return IWorkloadFactoryPtr{};
+}
+
IBackendInternal::IBackendContextPtr IBackendInternal::CreateBackendContext(const IRuntime::CreationOptions&) const
{
return IBackendContextPtr{};
diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp
index 0bafda257c..54a4157fe3 100644
--- a/src/backends/backendsCommon/WorkloadFactory.cpp
+++ b/src/backends/backendsCommon/WorkloadFactory.cpp
@@ -1243,6 +1243,19 @@ bool IWorkloadFactory::IsLayerSupported(const IConnectableLayer& connectableLaye
modelOptions);
}
+bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId,
+ const IConnectableLayer& connectableLayer,
+ Optional<DataType> dataType,
+ std::string& outReasonIfUnsupported,
+ const ModelOptions& modelOptions)
+{
+ return IsLayerConfigurationSupported(backendId,
+ connectableLayer,
+ dataType,
+ outReasonIfUnsupported,
+ modelOptions);
+}
+
// Default Implementations
std::unique_ptr<IWorkload> IWorkloadFactory::CreateAbs(const AbsQueueDescriptor& /*descriptor*/,
const WorkloadInfo& /*info*/) const
diff --git a/src/backends/backendsCommon/WorkloadFactory.hpp b/src/backends/backendsCommon/WorkloadFactory.hpp
index 68f9da650e..5096c3ba51 100644
--- a/src/backends/backendsCommon/WorkloadFactory.hpp
+++ b/src/backends/backendsCommon/WorkloadFactory.hpp
@@ -39,6 +39,12 @@ public:
std::string& outReasonIfUnsupported,
const ModelOptions& modelOptions);
+ static bool IsLayerSupported(const BackendId& backendId,
+ const IConnectableLayer& layer,
+ Optional<DataType> dataType,
+ std::string& outReasonIfUnsupported,
+ const ModelOptions& modelOptions);
+
virtual bool SupportsSubTensors() const = 0;
ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateSubTensorHandle instead")
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp
index 49636d9b08..6254b0a32a 100644
--- a/src/backends/cl/ClBackend.cpp
+++ b/src/backends/cl/ClBackend.cpp
@@ -46,6 +46,13 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
}
IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
+ const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
+{
+ return std::make_unique<ClWorkloadFactory>(
+ PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
+}
+
+IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
TensorHandleFactoryRegistry& registry) const
{
auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
@@ -57,6 +64,18 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
}
+IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
+ TensorHandleFactoryRegistry& registry, const ModelOptions& modelOptions) const
+{
+ auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+
+ registry.RegisterMemoryManager(memoryManager);
+ registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
+
+ return std::make_unique<ClWorkloadFactory>(
+ PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
+}
+
std::vector<ITensorHandleFactory::FactoryId> ClBackend::GetHandleFactoryPreferences() const
{
return std::vector<ITensorHandleFactory::FactoryId> {ClTensorHandleFactory::GetIdStatic()};
diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp
index 108124cac9..af5534e0d0 100644
--- a/src/backends/cl/ClBackend.hpp
+++ b/src/backends/cl/ClBackend.hpp
@@ -26,6 +26,12 @@ public:
IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(
TensorHandleFactoryRegistry& registry) const override;
+ IWorkloadFactoryPtr CreateWorkloadFactory( const IMemoryManagerSharedPtr& memoryManager,
+ const ModelOptions& modelOptions) const override;
+
+ IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry,
+ const ModelOptions& modelOptions) const override;
+
std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override;
void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override;
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index 58e17df5b8..4acfa570f2 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -4,6 +4,7 @@
//
#include "ClWorkloadFactory.hpp"
#include "ClBackendId.hpp"
+#include "ClBackendModelContext.hpp"
#include <Layer.hpp>
@@ -42,6 +43,14 @@ bool ClWorkloadFactory::IsLayerSupported(const Layer& layer,
return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
}
+bool ClWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer,
+ Optional<DataType> dataType,
+ std::string& outReasonIfUnsupported,
+ const ModelOptions& modelOptions)
+{
+ return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions);
+}
+
const BackendId& ClWorkloadFactory::GetBackendId() const
{
return s_Id;
@@ -78,7 +87,13 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(const QueueDescriptor
}
ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager)
- : m_MemoryManager(memoryManager)
+ : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{})
+{
+}
+
+ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager,
+ const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr)
+ : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr)
{
}
@@ -205,7 +220,22 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp32ToFp16(
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClConvolution2dWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
+ bool isFastMathEnabled = false;
+ if (m_ModelContextPtr)
+ {
+ if (m_ModelContextPtr.get() != nullptr)
+ {
+ auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
+ if (modelOptions)
+ {
+ isFastMathEnabled = modelOptions->IsFastMathEnabled();
+ }
+ }
+ }
+ return MakeWorkload<ClConvolution2dWorkload>(descriptor,
+ info,
+ m_MemoryManager->GetIntraLayerManager(),
+ isFastMathEnabled);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor,
diff --git a/src/backends/cl/ClWorkloadFactory.hpp b/src/backends/cl/ClWorkloadFactory.hpp
index 80cd7c51f1..fad5dd04fa 100644
--- a/src/backends/cl/ClWorkloadFactory.hpp
+++ b/src/backends/cl/ClWorkloadFactory.hpp
@@ -7,6 +7,8 @@
#include <armnn/IRuntime.hpp>
#include <armnn/Optional.hpp>
+#include <armnn/backends/IBackendInternal.hpp>
+
#include <backendsCommon/WorkloadFactoryBase.hpp>
#include <aclCommon/BaseMemoryManager.hpp>
@@ -19,12 +21,20 @@ class ClWorkloadFactory : public WorkloadFactoryBase
public:
ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager);
+ ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager,
+ const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr);
+
const BackendId& GetBackendId() const override;
static bool IsLayerSupported(const Layer& layer,
Optional<DataType> dataType,
std::string& outReasonIfUnsupported);
+ static bool IsLayerSupported(const IConnectableLayer& layer,
+ Optional<DataType> dataType,
+ std::string& outReasonIfUnsupported,
+ const ModelOptions& modelOptions);
+
bool SupportsSubTensors() const override { return true; }
ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateSubTensorHandle instead")
@@ -242,6 +252,7 @@ private:
Args&&... args);
mutable std::shared_ptr<ClMemoryManager> m_MemoryManager;
+ const IBackendInternal::IBackendSpecificModelContextPtr m_ModelContextPtr;
};
} // namespace armnn
diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp
index 1dd0abeadd..fc5ccfe487 100644
--- a/src/backends/cl/test/ClCreateWorkloadTests.cpp
+++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp
@@ -6,6 +6,8 @@
#include "ClContextControlFixture.hpp"
#include "ClWorkloadFactoryHelper.hpp"
+#include <armnn/utility/Assert.hpp>
+#include <armnn/utility/IgnoreUnused.hpp>
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <backendsCommon/MemCopyWorkload.hpp>
@@ -304,6 +306,35 @@ BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NhwcWorkload)
ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
}
+BOOST_AUTO_TEST_CASE(CreateConvolution2dFastMathEnabledWorkload)
+{
+ Graph graph;
+
+ using ModelOptions = std::vector<BackendOptions>;
+ ModelOptions modelOptions = {};
+ BackendOptions gpuAcc("GpuAcc",
+ {
+ { "FastMathEnabled", true }
+ });
+ modelOptions.push_back(gpuAcc);
+
+ ClWorkloadFactory factory =
+ ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager(), modelOptions);
+
+ auto workload =
+ CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(factory,
+ graph,
+ DataLayout::NCHW,
+ modelOptions);
+
+ ARMNN_ASSERT(workload != nullptr);
+ auto conv2dWorkload = PolymorphicDowncast<ClConvolution2dWorkload*>(workload.get());
+ IgnoreUnused(conv2dWorkload);
+ ARMNN_ASSERT(conv2dWorkload != nullptr);
+ // fast_math enabled but configuration does not match with WINOGRAD
+ ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::GEMM);
+}
+
template <typename DepthwiseConvolutionWorkloadType, typename armnn::DataType DataType>
static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout)
{
diff --git a/src/backends/cl/test/ClWorkloadFactoryHelper.hpp b/src/backends/cl/test/ClWorkloadFactoryHelper.hpp
index 6e3c6fc05a..f7f1629b27 100644
--- a/src/backends/cl/test/ClWorkloadFactoryHelper.hpp
+++ b/src/backends/cl/test/ClWorkloadFactoryHelper.hpp
@@ -27,9 +27,12 @@ struct WorkloadFactoryHelper<armnn::ClWorkloadFactory>
}
static armnn::ClWorkloadFactory GetFactory(
- const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ModelOptions& modelOptions = {})
{
- return armnn::ClWorkloadFactory(armnn::PolymorphicPointerDowncast<armnn::ClMemoryManager>(memoryManager));
+ armnn::ClBackend backend;
+ return armnn::ClWorkloadFactory(armnn::PolymorphicPointerDowncast<armnn::ClMemoryManager>(memoryManager),
+ backend.CreateBackendSpecificModelContext(modelOptions));
}
static armnn::ClTensorHandleFactory GetTensorHandleFactory(
diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
index 42c9903dc4..7b52f2784f 100644
--- a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
+++ b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
@@ -59,7 +59,9 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input,
}
ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor,
- const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const bool isFastMathEnabled)
: BaseWorkload<Convolution2dQueueDescriptor>(descriptor, info)
, m_ConvolutionLayer(memoryManager)
{
@@ -95,7 +97,20 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip
&output,
padStrideInfo,
arm_compute::WeightsInfo(),
- aclDilationInfo);
+ aclDilationInfo,
+ arm_compute::ActivationLayerInfo(),
+ isFastMathEnabled);
+
+ m_ConvolutionMethod =
+ m_ConvolutionLayer.get_convolution_method(input.info(),
+ m_KernelTensor->info(),
+ output.info(),
+ padStrideInfo,
+ arm_compute::WeightsInfo(),
+ arm_compute::ActivationLayerInfo(),
+ arm_compute::CLScheduler::get().target(),
+ aclDilationInfo,
+ isFastMathEnabled);
InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight);
@@ -116,6 +131,11 @@ void ClConvolution2dWorkload::Execute() const
RunClFunction(m_ConvolutionLayer, CHECK_LOCATION());
}
+arm_compute::ConvolutionMethod ClConvolution2dWorkload::GetConvolutionMethod() const
+{
+ return m_ConvolutionMethod;
+}
+
void ClConvolution2dWorkload::FreeUnusedTensors()
{
FreeTensorIfUnused(m_KernelTensor);
diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp
index 8b0afada36..f769422a0a 100644
--- a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp
+++ b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp
@@ -28,16 +28,22 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input,
class ClConvolution2dWorkload : public BaseWorkload<Convolution2dQueueDescriptor>
{
public:
- ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
+ ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const bool isFastMathEnabled = false);
void Execute() const override;
+ arm_compute::ConvolutionMethod GetConvolutionMethod() const;
+
private:
mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer;
std::unique_ptr<arm_compute::CLTensor> m_KernelTensor;
std::unique_ptr<arm_compute::CLTensor> m_BiasTensor;
+ arm_compute::ConvolutionMethod m_ConvolutionMethod;
+
void FreeUnusedTensors();
};
diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp
index 31e08ceaf5..d300960052 100644
--- a/src/backends/neon/NeonBackend.cpp
+++ b/src/backends/neon/NeonBackend.cpp
@@ -48,6 +48,13 @@ IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
}
IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
+ const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
+{
+ return std::make_unique<NeonWorkloadFactory>(
+ PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
+}
+
+IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const
{
auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
@@ -60,6 +67,19 @@ IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
}
+IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
+ TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, const ModelOptions& modelOptions) const
+{
+ auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
+ BaseMemoryManager::MemoryAffinity::Offset);
+
+ tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);
+ tensorHandleFactoryRegistry.RegisterFactory(std::make_unique<NeonTensorHandleFactory>(memoryManager));
+
+ return std::make_unique<NeonWorkloadFactory>(
+ PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
+}
+
IBackendInternal::IBackendContextPtr NeonBackend::CreateBackendContext(const IRuntime::CreationOptions&) const
{
return IBackendContextPtr{};
diff --git a/src/backends/neon/NeonBackend.hpp b/src/backends/neon/NeonBackend.hpp
index 6458eccb6b..42c6666930 100644
--- a/src/backends/neon/NeonBackend.hpp
+++ b/src/backends/neon/NeonBackend.hpp
@@ -26,6 +26,12 @@ public:
IWorkloadFactoryPtr CreateWorkloadFactory(
class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const override;
+ IWorkloadFactoryPtr CreateWorkloadFactory( const IMemoryManagerSharedPtr& memoryManager,
+ const ModelOptions& modelOptions) const override;
+
+ IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry,
+ const ModelOptions& modelOptions) const override;
+
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override;
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(
const IRuntime::CreationOptions&, IBackendProfilingPtr& backendProfiling) override;
diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp
index 853a518b45..0084dbd03f 100644
--- a/src/backends/neon/NeonLayerSupport.cpp
+++ b/src/backends/neon/NeonLayerSupport.cpp
@@ -329,7 +329,7 @@ bool NeonLayerSupport::IsConvolution2dSupported(const TensorInfo& input,
{
if (m_ModelContextPtr.get() != nullptr)
{
- auto modelOptions = armnn::PolymorphicDowncast<NeonBackendModelContext*>(m_ModelContextPtr.get());
+ auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
if (modelOptions)
{
isFastMathEnabled = modelOptions->IsFastMathEnabled();
diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp
index 40010fe329..928989b1e4 100644
--- a/src/backends/neon/NeonWorkloadFactory.cpp
+++ b/src/backends/neon/NeonWorkloadFactory.cpp
@@ -4,6 +4,7 @@
//
#include "NeonBackendId.hpp"
+#include "NeonBackendModelContext.hpp"
#include "NeonTensorHandle.hpp"
#include "NeonWorkloadFactory.hpp"
@@ -36,13 +37,27 @@ bool NeonWorkloadFactory::IsLayerSupported(const Layer& layer,
return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
}
+bool NeonWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer,
+ Optional<DataType> dataType,
+ std::string& outReasonIfUnsupported,
+ const ModelOptions& modelOptions)
+{
+ return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions);
+}
+
const BackendId& NeonWorkloadFactory::GetBackendId() const
{
return s_Id;
}
NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager)
- : m_MemoryManager(memoryManager)
+ : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{})
+{
+}
+
+NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager,
+ const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr)
+ : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr)
{
}
@@ -184,8 +199,22 @@ std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp32ToFp16(
std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d(
const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
{
- return std::make_unique<NeonConvolution2dWorkload>(descriptor, info,
- m_MemoryManager->GetIntraLayerManager());
+ bool isFastMathEnabled = false;
+ if (m_ModelContextPtr)
+ {
+ if (m_ModelContextPtr.get() != nullptr)
+ {
+ auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
+ if (modelOptions)
+ {
+ isFastMathEnabled = modelOptions->IsFastMathEnabled();
+ }
+ }
+ }
+ return std::make_unique<NeonConvolution2dWorkload>(descriptor,
+ info,
+ m_MemoryManager->GetIntraLayerManager(),
+ isFastMathEnabled);
}
std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor,
diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp
index 09ae839e27..6a514e2812 100644
--- a/src/backends/neon/NeonWorkloadFactory.hpp
+++ b/src/backends/neon/NeonWorkloadFactory.hpp
@@ -5,6 +5,7 @@
#pragma once
#include <armnn/Optional.hpp>
+#include <armnn/backends/IBackendInternal.hpp>
#include <backendsCommon/WorkloadFactoryBase.hpp>
#include <aclCommon/BaseMemoryManager.hpp>
@@ -19,12 +20,20 @@ class NeonWorkloadFactory : public WorkloadFactoryBase
public:
NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager);
+ NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager,
+ const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr);
+
const BackendId& GetBackendId() const override;
static bool IsLayerSupported(const Layer& layer,
Optional<DataType> dataType,
std::string& outReasonIfUnsupported);
+ static bool IsLayerSupported(const IConnectableLayer& layer,
+ Optional<DataType> dataType,
+ std::string& outReasonIfUnsupported,
+ const ModelOptions& modelOptions);
+
bool SupportsSubTensors() const override { return true; }
ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateSubTensorHandle instead")
@@ -238,6 +247,7 @@ public:
private:
mutable std::shared_ptr<NeonMemoryManager> m_MemoryManager;
+ const IBackendInternal::IBackendSpecificModelContextPtr m_ModelContextPtr;
};
} // namespace armnn
diff --git a/src/backends/neon/test/NeonCreateWorkloadTests.cpp b/src/backends/neon/test/NeonCreateWorkloadTests.cpp
index 37d026f107..99ff9ae8b8 100644
--- a/src/backends/neon/test/NeonCreateWorkloadTests.cpp
+++ b/src/backends/neon/test/NeonCreateWorkloadTests.cpp
@@ -6,6 +6,8 @@
#include "NeonWorkloadFactoryHelper.hpp"
#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/Assert.hpp>
+#include <armnn/utility/IgnoreUnused.hpp>
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <backendsCommon/MemCopyWorkload.hpp>
@@ -276,6 +278,33 @@ BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload)
NeonCreateConvolution2dWorkloadTest<DataType::Float32>(DataLayout::NHWC);
}
+BOOST_AUTO_TEST_CASE(CreateConvolution2dFastMathEnabledWorkload)
+{
+ Graph graph;
+ using ModelOptions = std::vector<BackendOptions>;
+ ModelOptions modelOptions = {};
+ BackendOptions cpuAcc("CpuAcc",
+ {
+ { "FastMathEnabled", true }
+ });
+ modelOptions.push_back(cpuAcc);
+ NeonWorkloadFactory factory =
+ NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager(), modelOptions);
+
+ auto workload =
+ CreateConvolution2dWorkloadTest<NeonConvolution2dWorkload, armnn::DataType::Float32>(factory,
+ graph,
+ DataLayout::NCHW,
+ modelOptions);
+
+ ARMNN_ASSERT(workload != nullptr);
+ auto conv2dWorkload = PolymorphicDowncast<NeonConvolution2dWorkload*>(workload.get());
+ IgnoreUnused(conv2dWorkload);
+ ARMNN_ASSERT(conv2dWorkload != nullptr);
+ // fast_math enabled but configuration does not match with WINOGRAD
+ ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::GEMM);
+}
+
template <typename armnn::DataType DataType>
static void NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout)
{
diff --git a/src/backends/neon/test/NeonWorkloadFactoryHelper.hpp b/src/backends/neon/test/NeonWorkloadFactoryHelper.hpp
index 87150115f8..89052d634e 100644
--- a/src/backends/neon/test/NeonWorkloadFactoryHelper.hpp
+++ b/src/backends/neon/test/NeonWorkloadFactoryHelper.hpp
@@ -27,10 +27,12 @@ struct WorkloadFactoryHelper<armnn::NeonWorkloadFactory>
}
static armnn::NeonWorkloadFactory GetFactory(
- const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ModelOptions& modelOptions = {})
{
- return armnn::NeonWorkloadFactory(
- armnn::PolymorphicPointerDowncast<armnn::NeonMemoryManager>(memoryManager));
+ armnn::NeonBackend backend;
+ return armnn::NeonWorkloadFactory(armnn::PolymorphicPointerDowncast<armnn::NeonMemoryManager>(memoryManager),
+ backend.CreateBackendSpecificModelContext(modelOptions));
}
static armnn::NeonTensorHandleFactory GetTensorHandleFactory(
diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
index 83f761158a..d35b9685be 100644
--- a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
+++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
@@ -59,8 +59,10 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input,
}
NeonConvolution2dWorkload::NeonConvolution2dWorkload(
- const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ const Convolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const bool isFastMathEnabled)
: BaseWorkload<Convolution2dQueueDescriptor>(descriptor, info)
{
using arm_compute::NEDirectConvolutionLayer;
@@ -97,7 +99,19 @@ NeonConvolution2dWorkload::NeonConvolution2dWorkload(
&output,
padStrideInfo,
arm_compute::WeightsInfo(),
- aclDilationInfo);
+ aclDilationInfo,
+ arm_compute::ActivationLayerInfo(),
+ isFastMathEnabled);
+
+ m_ConvolutionMethod =
+ convolutionLayer->get_convolution_method(input.info(),
+ m_KernelTensor->info(),
+ output.info(),
+ padStrideInfo,
+ arm_compute::WeightsInfo(),
+ aclDilationInfo,
+ arm_compute::ActivationLayerInfo(),
+ isFastMathEnabled);
m_ConvolutionLayer.reset(convolutionLayer.release());
@@ -120,6 +134,11 @@ void NeonConvolution2dWorkload::Execute() const
m_ConvolutionLayer->run();
}
+arm_compute::ConvolutionMethod NeonConvolution2dWorkload::GetConvolutionMethod() const
+{
+ return m_ConvolutionMethod;
+}
+
void NeonConvolution2dWorkload::FreeUnusedTensors()
{
FreeTensorIfUnused(m_KernelTensor);
diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp
index 54e08a2042..860d78ba7e 100644
--- a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp
+++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp
@@ -28,17 +28,23 @@ class NeonConvolution2dWorkload : public BaseWorkload<Convolution2dQueueDescript
public:
using BaseWorkload<Convolution2dQueueDescriptor>::m_Data;
- NeonConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
+ NeonConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const bool isFastMathENabled = false);
void Execute() const override;
+ arm_compute::ConvolutionMethod GetConvolutionMethod() const;
+
private:
std::unique_ptr<arm_compute::IFunction> m_ConvolutionLayer;
std::unique_ptr<arm_compute::Tensor> m_KernelTensor;
std::unique_ptr<arm_compute::Tensor> m_BiasTensor;
+ arm_compute::ConvolutionMethod m_ConvolutionMethod;
+
void FreeUnusedTensors();
};
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index 4ab1701391..e7e57b15d1 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -103,6 +103,14 @@ bool RefWorkloadFactory::IsLayerSupported(const Layer& layer,
return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
}
+bool RefWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer,
+ Optional<DataType> dataType,
+ std::string& outReasonIfUnsupported,
+ const ModelOptions& modelOptions)
+{
+ return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions);
+}
+
std::unique_ptr<ITensorHandle> RefWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
const bool isMemoryManaged) const
{
diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp
index 3a09588363..5f22c9eac2 100644
--- a/src/backends/reference/RefWorkloadFactory.hpp
+++ b/src/backends/reference/RefWorkloadFactory.hpp
@@ -41,6 +41,11 @@ public:
Optional<DataType> dataType,
std::string& outReasonIfUnsupported);
+ static bool IsLayerSupported(const IConnectableLayer& layer,
+ Optional<DataType> dataType,
+ std::string& outReasonIfUnsupported,
+ const ModelOptions& modelOptions);
+
bool SupportsSubTensors() const override { return false; }
ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateSubTensorHandle instead")