aboutsummaryrefslogtreecommitdiff
path: root/src/backends
diff options
context:
space:
mode:
authorNarumol Prangnawarat <narumol.prangnawarat@arm.com>2021-05-07 17:52:36 +0100
committerNarumol Prangnawarat <narumol.prangnawarat@arm.com>2021-05-08 20:15:32 +0100
commite5f0b2409c2e557a5a78e2f4659d203154289b23 (patch)
tree0e32680ed15ed5157c78d5deeabda2c0ceeeb4a3 /src/backends
parentae12306486efc55293a40048618abe5e8b19151b (diff)
downloadarmnn-e5f0b2409c2e557a5a78e2f4659d203154289b23.tar.gz
IVGCVSW-5818 Enable import on GPU
Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com> Change-Id: I4e4eb107aa2bfa09625840d738001f33152e6792
Diffstat (limited to 'src/backends')
-rw-r--r--src/backends/backendsCommon/IBackendInternal.cpp18
-rw-r--r--src/backends/cl/ClBackend.cpp41
-rw-r--r--src/backends/cl/ClBackend.hpp13
-rw-r--r--src/backends/cl/ClImportTensorHandleFactory.cpp20
-rw-r--r--src/backends/cl/ClImportTensorHandleFactory.hpp6
-rw-r--r--src/backends/cl/backend.mk1
-rw-r--r--src/backends/cl/test/ClFallbackTests.cpp91
-rw-r--r--src/backends/cl/test/ClImportTensorHandleTests.cpp112
-rw-r--r--src/backends/neon/test/NeonFallbackTests.cpp73
9 files changed, 324 insertions, 51 deletions
diff --git a/src/backends/backendsCommon/IBackendInternal.cpp b/src/backends/backendsCommon/IBackendInternal.cpp
index b08dff84ed..31706eb1e7 100644
--- a/src/backends/backendsCommon/IBackendInternal.cpp
+++ b/src/backends/backendsCommon/IBackendInternal.cpp
@@ -76,6 +76,17 @@ IBackendInternal::IWorkloadFactoryPtr IBackendInternal::CreateWorkloadFactory(
return CreateWorkloadFactory(tensorHandleFactoryRegistry);
}
+IBackendInternal::IWorkloadFactoryPtr IBackendInternal::CreateWorkloadFactory(
+ class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry,
+ const ModelOptions& modelOptions,
+ MemorySourceFlags inputFlags,
+ MemorySourceFlags outputFlags) const
+{
+ IgnoreUnused(inputFlags);
+ IgnoreUnused(outputFlags);
+ return CreateWorkloadFactory(tensorHandleFactoryRegistry, modelOptions);
+}
+
IBackendInternal::IBackendContextPtr IBackendInternal::CreateBackendContext(const IRuntime::CreationOptions&) const
{
return IBackendContextPtr{};
@@ -147,6 +158,13 @@ bool IBackendInternal::SupportsTensorAllocatorAPI() const
return !GetHandleFactoryPreferences().empty();
}
+void IBackendInternal::RegisterTensorHandleFactories(class TensorHandleFactoryRegistry& registry,
+ MemorySourceFlags /*inputFlags*/,
+ MemorySourceFlags /*outputFlags*/)
+{
+ return RegisterTensorHandleFactories(registry);
+}
+
ITensorHandleFactory::FactoryId IBackendInternal::GetBackwardCompatibleFavoriteHandleFactory()
{
auto favorites = GetHandleFactoryPreferences();
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp
index f97cb4bba8..35770d9219 100644
--- a/src/backends/cl/ClBackend.cpp
+++ b/src/backends/cl/ClBackend.cpp
@@ -4,12 +4,13 @@
//
#include "ClBackend.hpp"
+#include "ClBackendContext.hpp"
#include "ClBackendId.hpp"
#include "ClBackendModelContext.hpp"
-#include "ClWorkloadFactory.hpp"
-#include "ClBackendContext.hpp"
+#include "ClImportTensorHandleFactory.hpp"
#include "ClLayerSupport.hpp"
#include "ClTensorHandleFactory.hpp"
+#include "ClWorkloadFactory.hpp"
#include <armnn/BackendRegistry.hpp>
#include <armnn/Descriptors.hpp>
@@ -71,6 +72,8 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
registry.RegisterMemoryManager(memoryManager);
registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
+ registry.RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(
+ static_cast<MemorySourceFlags>(MemorySource::Malloc), static_cast<MemorySourceFlags>(MemorySource::Malloc)));
return std::make_unique<ClWorkloadFactory>(
PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
@@ -83,6 +86,24 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
registry.RegisterMemoryManager(memoryManager);
registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
+ registry.RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(
+ static_cast<MemorySourceFlags>(MemorySource::Malloc), static_cast<MemorySourceFlags>(MemorySource::Malloc)));
+
+ return std::make_unique<ClWorkloadFactory>(
+ PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
+}
+
+IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
+ TensorHandleFactoryRegistry& registry,
+ const ModelOptions& modelOptions,
+ MemorySourceFlags inputFlags,
+ MemorySourceFlags outputFlags) const
+{
+ auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+
+ registry.RegisterMemoryManager(memoryManager);
+ registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
+ registry.RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(inputFlags, outputFlags));
return std::make_unique<ClWorkloadFactory>(
PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
@@ -90,7 +111,8 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
std::vector<ITensorHandleFactory::FactoryId> ClBackend::GetHandleFactoryPreferences() const
{
- return std::vector<ITensorHandleFactory::FactoryId> {ClTensorHandleFactory::GetIdStatic()};
+ return std::vector<ITensorHandleFactory::FactoryId> {ClTensorHandleFactory::GetIdStatic(),
+ ClImportTensorHandleFactory::GetIdStatic()};
}
void ClBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry)
@@ -99,6 +121,19 @@ void ClBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& regis
registry.RegisterMemoryManager(mgr);
registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(mgr));
+ registry.RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(
+ static_cast<MemorySourceFlags>(MemorySource::Malloc), static_cast<MemorySourceFlags>(MemorySource::Malloc)));
+}
+
+void ClBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry,
+ MemorySourceFlags inputFlags,
+ MemorySourceFlags outputFlags)
+{
+ auto mgr = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+
+ registry.RegisterMemoryManager(mgr);
+ registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(mgr));
+ registry.RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(inputFlags, outputFlags));
}
IBackendInternal::IBackendContextPtr ClBackend::CreateBackendContext(const IRuntime::CreationOptions& options) const
diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp
index f9a5745eb3..252d87edea 100644
--- a/src/backends/cl/ClBackend.hpp
+++ b/src/backends/cl/ClBackend.hpp
@@ -30,16 +30,25 @@ public:
IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(
TensorHandleFactoryRegistry& registry) const override;
- IWorkloadFactoryPtr CreateWorkloadFactory( const IMemoryManagerSharedPtr& memoryManager,
- const ModelOptions& modelOptions) const override;
+ IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr& memoryManager,
+ const ModelOptions& modelOptions) const override;
IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry,
const ModelOptions& modelOptions) const override;
+ IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry,
+ const ModelOptions& modelOptions,
+ MemorySourceFlags inputFlags,
+ MemorySourceFlags outputFlags) const override;
+
std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override;
void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override;
+ void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry,
+ MemorySourceFlags inputFlags,
+ MemorySourceFlags outputFlags) override;
+
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override;
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(
const IRuntime::CreationOptions&, IBackendProfilingPtr& backendProfiling) override;
diff --git a/src/backends/cl/ClImportTensorHandleFactory.cpp b/src/backends/cl/ClImportTensorHandleFactory.cpp
index 594e05423e..26d5f9c47a 100644
--- a/src/backends/cl/ClImportTensorHandleFactory.cpp
+++ b/src/backends/cl/ClImportTensorHandleFactory.cpp
@@ -106,6 +106,11 @@ bool ClImportTensorHandleFactory::SupportsSubTensors() const
return true;
}
+bool ClImportTensorHandleFactory::SupportsMapUnmap() const
+{
+ return false;
+}
+
MemorySourceFlags ClImportTensorHandleFactory::GetExportFlags() const
{
return m_ExportFlags;
@@ -116,4 +121,19 @@ MemorySourceFlags ClImportTensorHandleFactory::GetImportFlags() const
return m_ImportFlags;
}
+std::vector<Capability> ClImportTensorHandleFactory::GetCapabilities(const IConnectableLayer* layer,
+ const IConnectableLayer* connectedLayer,
+ CapabilityClass capabilityClass)
+{
+ IgnoreUnused(layer);
+ IgnoreUnused(connectedLayer);
+ std::vector<Capability> capabilities;
+ if (capabilityClass == CapabilityClass::FallbackImportDisabled)
+ {
+ Capability paddingCapability(CapabilityClass::FallbackImportDisabled, true);
+ capabilities.push_back(paddingCapability);
+ }
+ return capabilities;
+}
+
} // namespace armnn \ No newline at end of file
diff --git a/src/backends/cl/ClImportTensorHandleFactory.hpp b/src/backends/cl/ClImportTensorHandleFactory.hpp
index ee2f84efda..7e22949647 100644
--- a/src/backends/cl/ClImportTensorHandleFactory.hpp
+++ b/src/backends/cl/ClImportTensorHandleFactory.hpp
@@ -58,10 +58,16 @@ public:
bool SupportsSubTensors() const override;
+ bool SupportsMapUnmap() const override;
+
MemorySourceFlags GetExportFlags() const override;
MemorySourceFlags GetImportFlags() const override;
+ std::vector<Capability> GetCapabilities(const IConnectableLayer* layer,
+ const IConnectableLayer* connectedLayer,
+ CapabilityClass capabilityClass) override;
+
private:
MemorySourceFlags m_ImportFlags;
MemorySourceFlags m_ExportFlags;
diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk
index 976f614cff..e6c289cf39 100644
--- a/src/backends/cl/backend.mk
+++ b/src/backends/cl/backend.mk
@@ -20,6 +20,7 @@ BACKEND_SOURCES := \
ClContextControl.cpp \
ClContextDeserializer.cpp \
ClContextSerializer.cpp \
+ ClImportTensorHandleFactory.cpp \
ClLayerSupport.cpp \
ClRegistryInitializer.cpp \
ClTensorHandleFactory.cpp \
diff --git a/src/backends/cl/test/ClFallbackTests.cpp b/src/backends/cl/test/ClFallbackTests.cpp
index eec3afe447..183b8caa2e 100644
--- a/src/backends/cl/test/ClFallbackTests.cpp
+++ b/src/backends/cl/test/ClFallbackTests.cpp
@@ -11,7 +11,7 @@
BOOST_AUTO_TEST_SUITE(ClFallback)
-BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackToNeon, * boost::unit_test::disabled())
+BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackToNeon)
{
using namespace armnn;
@@ -34,7 +34,7 @@ BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackToNeon, * boost::unit_test::disabled
add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
- TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
+ TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
input0->GetOutputSlot(0).SetTensorInfo(info);
input1->GetOutputSlot(0).SetTensorInfo(info);
@@ -82,30 +82,49 @@ BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackToNeon, * boost::unit_test::disabled
runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
// Creates structures for input & output
- std::vector<float> inputData0
+ std::vector<float> inputValue0
{
- 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
+ 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
};
- std::vector<float> inputData1
+ std::vector<float> inputValue1
{
- 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
+ 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
};
std::vector<float> inputData2
{
- 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
+ 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
};
- std::vector<float> outputData(12);
+ std::vector<float> outputData(16);
std::vector<float> expectedOutput
{
- 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
+ 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f, 11.0f, 9.0f, 7.0f, 5.0f
};
+ // Prepare aligned data
+ unsigned int numElements = info.GetNumElements();
+ size_t totalBytes = numElements * sizeof(float);
+ const size_t alignment = 64;
+ size_t space = totalBytes + alignment + alignment;
+ auto inputData0 = std::make_unique<uint8_t[]>(space);
+ void* alignedInputPtr0 = inputData0.get();
+ BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr0, space));
+
+ auto* intputPtr0 = reinterpret_cast<float*>(alignedInputPtr0);
+ std::copy(inputValue0.begin(), inputValue0.end(), intputPtr0);
+
+ auto inputData1 = std::make_unique<uint8_t[]>(space);
+ void* alignedInputPtr1 = inputData1.get();
+ BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr1, space));
+
+ auto* intputPtr1 = reinterpret_cast<float*>(alignedInputPtr1);
+ std::copy(inputValue1.begin(), inputValue1.end(), intputPtr1);
+
InputTensors inputTensors
{
- { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
- { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
+ { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputPtr0) },
+ { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), alignedInputPtr1) },
{ 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
};
OutputTensors outputTensors
@@ -134,6 +153,8 @@ BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackToNeon, * boost::unit_test::disabled
// Check output is as expected
BOOST_TEST(outputData == expectedOutput);
+
+ runtime->UnloadNetwork(netId);
}
BOOST_AUTO_TEST_CASE(ClImportDisabledFallbackToNeon)
@@ -258,7 +279,7 @@ BOOST_AUTO_TEST_CASE(ClImportDisabledFallbackToNeon)
BOOST_TEST(outputData == expectedOutput);
}
-BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackSubgraphToNeon, * boost::unit_test::disabled())
+BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackSubgraphToNeon)
{
using namespace armnn;
@@ -269,6 +290,10 @@ BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackSubgraphToNeon, * boost::unit_test::
INetworkPtr net(INetwork::Create());
Pooling2dDescriptor desc;
+ desc.m_PoolWidth = 2;
+ desc.m_PoolHeight = 2;
+ desc.m_StrideX = 2;
+ desc.m_StrideY = 2;
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
@@ -285,8 +310,8 @@ BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackSubgraphToNeon, * boost::unit_test::
sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
- TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
- TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
+ TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
+ TensorInfo poolingInfo = TensorInfo({ 1, 2, 2, 1 }, DataType::Float32);
input0->GetOutputSlot(0).SetTensorInfo(info);
input1->GetOutputSlot(0).SetTensorInfo(info);
@@ -340,27 +365,45 @@ BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackSubgraphToNeon, * boost::unit_test::
runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
// Creates structures for input & output
- std::vector<float> inputData0
+ std::vector<float> inputValue0
{
- 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
+ 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
};
- std::vector<float> inputData1
+ std::vector<float> inputValue1
{
- 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
+ 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
};
std::vector<float> inputData2
{
- 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
+ 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
};
- std::vector<float> outputData(2);
+ std::vector<float> outputData(4);
- std::vector<float> expectedOutput{ 11.0f, -1.0f };
+ std::vector<float> expectedOutput{ 11.0f, 3.0f, -5.0f, 11.0f };
+
+ unsigned int numElements = info.GetNumElements();
+ size_t totalBytes = numElements * sizeof(float);
+ const size_t alignment = 64;
+ size_t space = totalBytes + alignment + alignment;
+ auto inputData0 = std::make_unique<uint8_t[]>(space);
+ void* alignedInputPtr0 = inputData0.get();
+ BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr0, space));
+
+ auto* intputPtr0 = reinterpret_cast<float*>(alignedInputPtr0);
+ std::copy(inputValue0.begin(), inputValue0.end(), intputPtr0);
+
+ auto inputData1 = std::make_unique<uint8_t[]>(space);
+ void* alignedInputPtr1 = inputData1.get();
+ BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr1, space));
+
+ auto* intputPtr1 = reinterpret_cast<float*>(alignedInputPtr1);
+ std::copy(inputValue1.begin(), inputValue1.end(), intputPtr1);
InputTensors inputTensors
{
- { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
- { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
+ { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputPtr0) },
+ { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), alignedInputPtr1) },
{ 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
};
OutputTensors outputTensors
@@ -393,6 +436,8 @@ BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackSubgraphToNeon, * boost::unit_test::
// Check output is as expected
BOOST_TEST(outputData == expectedOutput);
+
+ runtime->UnloadNetwork(netId);
}
BOOST_AUTO_TEST_CASE(ClImportDisableFallbackSubgraphToNeon)
diff --git a/src/backends/cl/test/ClImportTensorHandleTests.cpp b/src/backends/cl/test/ClImportTensorHandleTests.cpp
index bfb74af801..85ff35f0af 100644
--- a/src/backends/cl/test/ClImportTensorHandleTests.cpp
+++ b/src/backends/cl/test/ClImportTensorHandleTests.cpp
@@ -11,6 +11,9 @@
#include <boost/test/unit_test.hpp>
+#include <armnn/IRuntime.hpp>
+#include <armnn/INetwork.hpp>
+
using namespace armnn;
BOOST_AUTO_TEST_SUITE(ClImportTensorHandleTests)
@@ -38,7 +41,7 @@ BOOST_FIXTURE_TEST_CASE(ClMallocImport, ClContextControlFixture)
const size_t totalBytes = tensor.info()->total_size();
const size_t alignment =
arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
- size_t space = totalBytes + alignment;
+ size_t space = totalBytes + alignment + alignment;
auto testData = std::make_unique<uint8_t[]>(space);
void* alignedPtr = testData.get();
BOOST_CHECK(std::align(alignment, totalBytes, alignedPtr, space));
@@ -57,7 +60,7 @@ BOOST_FIXTURE_TEST_CASE(ClMallocImport, ClContextControlFixture)
// Validate result by checking that the output has no negative values
for(unsigned int i = 0; i < numElements; ++i)
{
- BOOST_ASSERT(typedPtr[i] >= 0);
+ BOOST_TEST(typedPtr[i] >= 0);
}
}
@@ -78,7 +81,7 @@ BOOST_FIXTURE_TEST_CASE(ClIncorrectMemorySourceImport, ClContextControlFixture)
const size_t totalBytes = tensor.info()->total_size();
const size_t alignment =
arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
- size_t space = totalBytes + alignment;
+ size_t space = totalBytes + alignment + alignment;
auto testData = std::make_unique<uint8_t[]>(space);
void* alignedPtr = testData.get();
BOOST_CHECK(std::align(alignment, totalBytes, alignedPtr, space));
@@ -108,4 +111,105 @@ BOOST_FIXTURE_TEST_CASE(ClInvalidMemorySourceImport, ClContextControlFixture)
BOOST_CHECK_THROW(handle->Import(inputData.data(), invalidMemSource), MemoryImportException);
}
-BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file
+BOOST_FIXTURE_TEST_CASE(ClImportEndToEnd, ClContextControlFixture)
+{
+ // Create runtime in which test will run
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+ // build up the structure of the network
+ INetworkPtr net(INetwork::Create());
+
+ IConnectableLayer* input = net->AddInputLayer(0, "Input");
+
+ ActivationDescriptor descriptor;
+ descriptor.m_Function = ActivationFunction::ReLu;
+ IConnectableLayer* activation = net->AddActivationLayer(descriptor, "Activation");
+
+ IConnectableLayer* output = net->AddOutputLayer(0, "Output");
+
+ input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
+ activation->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ TensorInfo tensorInfo = TensorInfo({ 1, 24, 16, 3 }, DataType::Float32);
+ unsigned int numElements = tensorInfo.GetNumElements();
+ size_t totalBytes = numElements * sizeof(float);
+
+ input->GetOutputSlot(0).SetTensorInfo(tensorInfo);
+ activation->GetOutputSlot(0).SetTensorInfo(tensorInfo);
+
+ // Optimize the network
+ OptimizerOptions optOptions;
+ optOptions.m_ImportEnabled = true;
+ std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
+ BOOST_CHECK(optNet);
+
+ // Loads it into the runtime.
+ NetworkId netId;
+ std::string ignoredErrorMessage;
+ // Enable Importing
+ INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
+ runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
+
+ // Creates structures for input & output
+ const size_t alignment =
+ arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
+ size_t space = totalBytes + alignment + alignment;
+ auto inputData = std::make_unique<uint8_t[]>(space);
+ void* alignedInputPtr = inputData.get();
+ BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
+
+ // Input with negative values
+ auto* intputPtr = reinterpret_cast<float*>(alignedInputPtr);
+ std::fill_n(intputPtr, numElements, -5.0f);
+
+ auto outputData = std::make_unique<uint8_t[]>(space);
+ void* alignedOutputPtr = outputData.get();
+ BOOST_CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space));
+
+ InputTensors inputTensors
+ {
+ {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputPtr)},
+ };
+ OutputTensors outputTensors
+ {
+ {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
+ };
+
+ runtime->GetProfiler(netId)->EnableProfiling(true);
+
+ // Do the inference
+ runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+
+ // Retrieve the Profiler.Print() output to get the workload execution
+ ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
+ std::stringstream ss;
+ profilerManager.GetProfiler()->Print(ss);;
+ std::string dump = ss.str();
+
+ // Contains ActivationWorkload
+ std::size_t found = dump.find("ActivationWorkload");
+ BOOST_TEST(found != std::string::npos);
+
+ // Contains SyncMemGeneric
+ found = dump.find("SyncMemGeneric");
+ BOOST_TEST(found != std::string::npos);
+
+ // Does not contain CopyMemGeneric
+ found = dump.find("CopyMemGeneric");
+ BOOST_TEST(found == std::string::npos);
+
+ // Check output is as expected
+ // Validate result by checking that the output has no negative values
+ auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
+ BOOST_TEST(outputResult);
+ for(unsigned int i = 0; i < numElements; ++i)
+ {
+ BOOST_TEST(outputResult[i] >= 0);
+ }
+
+ runtime->UnloadNetwork(netId);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/neon/test/NeonFallbackTests.cpp b/src/backends/neon/test/NeonFallbackTests.cpp
index 8dc592db5d..383a5f654c 100644
--- a/src/backends/neon/test/NeonFallbackTests.cpp
+++ b/src/backends/neon/test/NeonFallbackTests.cpp
@@ -16,7 +16,7 @@ BOOST_AUTO_TEST_CASE(FallbackImportToCpuAcc)
{
using namespace armnn;
- // Create a mock backend object
+ // Create a mock backend objectN
MockImportBackendInitialiser initialiser; // Register the Mock Backend
auto backendObjPtr = CreateBackendObject(MockImportBackendId());
BOOST_TEST((backendObjPtr != nullptr));
@@ -677,7 +677,7 @@ BOOST_AUTO_TEST_CASE(FallbackDisableImportFromCpuAcc)
}
#if defined(ARMCOMPUTECL_ENABLED)
-BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackToCl, * boost::unit_test::disabled())
+BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackToCl)
{
using namespace armnn;
@@ -700,7 +700,7 @@ BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackToCl, * boost::unit_test::disabled
add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
- TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
+ TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
input0->GetOutputSlot(0).SetTensorInfo(info);
input1->GetOutputSlot(0).SetTensorInfo(info);
@@ -752,29 +752,43 @@ BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackToCl, * boost::unit_test::disabled
// Creates structures for input & output
std::vector<float> inputData0
{
- 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
+ 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
};
std::vector<float> inputData1
{
- 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
+ 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
};
std::vector<float> inputData2
{
- 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
+ 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
};
- std::vector<float> outputData(12);
+ std::vector<float> outputData(16);
std::vector<float> expectedOutput
{
- 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
+ 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f, 11.0f, 9.0f, 7.0f, 5.0f
};
+ // Creates structures for input & output
+ unsigned int numElements = info.GetNumElements();
+ size_t totalBytes = numElements * sizeof(float);
+
+ // Prepare aligned data
+ const size_t alignment = 64;
+ size_t space = totalBytes + alignment + alignment;
+ auto inputData = std::make_unique<uint8_t[]>(space);
+ void* alignedInputPtr = inputData.get();
+ BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
+
+ auto* intputPtr = reinterpret_cast<float*>(alignedInputPtr);
+ std::copy(inputData2.begin(), inputData2.end(), intputPtr);
+
InputTensors inputTensors
{
{ 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
{ 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
- { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
+ { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), alignedInputPtr) }
};
OutputTensors outputTensors
{
@@ -801,7 +815,11 @@ BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackToCl, * boost::unit_test::disabled
BOOST_TEST(found != std::string::npos);
// Check output is as expected
- BOOST_TEST(outputData == expectedOutput);
+ for(unsigned int i = 0; i < numElements; ++i)
+ {
+ BOOST_TEST(outputData[i] == expectedOutput[i]);
+ }
+ runtime->UnloadNetwork(netId);
}
BOOST_AUTO_TEST_CASE(NeonImportDisabledFallbackToCl)
@@ -926,7 +944,7 @@ BOOST_AUTO_TEST_CASE(NeonImportDisabledFallbackToCl)
BOOST_TEST(outputData == expectedOutput);
}
-BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackSubgraphToCl, * boost::unit_test::disabled())
+BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackSubgraphToCl)
{
using namespace armnn;
@@ -937,6 +955,10 @@ BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackSubgraphToCl, * boost::unit_test::
INetworkPtr net(INetwork::Create());
Pooling2dDescriptor desc;
+ desc.m_PoolWidth = 2;
+ desc.m_PoolHeight = 2;
+ desc.m_StrideX = 2;
+ desc.m_StrideY = 2;
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
@@ -953,8 +975,8 @@ BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackSubgraphToCl, * boost::unit_test::
sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
- TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
- TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
+ TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
+ TensorInfo poolingInfo = TensorInfo({ 1, 2, 2, 1 }, DataType::Float32);
input0->GetOutputSlot(0).SetTensorInfo(info);
input1->GetOutputSlot(0).SetTensorInfo(info);
@@ -1012,26 +1034,38 @@ BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackSubgraphToCl, * boost::unit_test::
// Creates structures for input & output
std::vector<float> inputData0
{
- 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
+ 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
};
std::vector<float> inputData1
{
- 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
+ 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
};
std::vector<float> inputData2
{
- 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
+ 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
};
- std::vector<float> outputData(2);
+ std::vector<float> outputData(4);
- std::vector<float> expectedOutput{ 11.0f, -1.0f };
+ std::vector<float> expectedOutput{ 11.0f, 3.0f, -5.0f, 11.0f };
+
+ // Prepare aligned data
+ unsigned int numElements = info.GetNumElements();
+ size_t totalBytes = numElements * sizeof(float);
+ const size_t alignment = 64;
+ size_t space = totalBytes + alignment + alignment;
+ auto inputData = std::make_unique<uint8_t[]>(space);
+ void* alignedInputPtr = inputData.get();
+ BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
+
+ auto* intputPtr = reinterpret_cast<float*>(alignedInputPtr);
+ std::copy(inputData2.begin(), inputData2.end(), intputPtr);
InputTensors inputTensors
{
{ 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
{ 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
- { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
+ { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), alignedInputPtr) }
};
OutputTensors outputTensors
{
@@ -1067,6 +1101,7 @@ BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackSubgraphToCl, * boost::unit_test::
// Check output is as expected
BOOST_TEST(outputData == expectedOutput);
+ runtime->UnloadNetwork(netId);
}
BOOST_AUTO_TEST_CASE(NeonImportDisableFallbackSubgraphToCl)