aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFerran Balaguer <ferran.balaguer@arm.com>2019-09-19 11:49:25 +0100
committerDavid Monahan <david.monahan@arm.com>2019-09-27 07:57:32 +0000
commit83239f995e7b86062450794b85bfe4c4c387fda0 (patch)
treec351f4dd4ea3bf567d1304cf0aa319a01799d796
parentbd9e2c546d83fa654d8e764ef755b1ded0cd1ff8 (diff)
downloadarmnn-83239f995e7b86062450794b85bfe4c4c387fda0.tar.gz
IVGCVSW-3689 Support Import of Output Tensors for the Neon Backend
Change-Id: I6323c5f68248b54b3ed3b4cb92f1e8bf9c279b8d Signed-off-by: Ferran Balaguer <ferran.balaguer@arm.com>
-rw-r--r--src/armnn/LoadedNetwork.cpp52
-rw-r--r--src/backends/backendsCommon/test/EndToEndTestImpl.hpp387
-rw-r--r--src/backends/neon/test/NeonEndToEndTests.cpp114
-rw-r--r--src/backends/reference/test/RefEndToEndTests.cpp22
4 files changed, 462 insertions, 113 deletions
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 7ee4e612e0..5e3e3f24fe 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -132,7 +132,18 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
}
default:
{
- layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
+ // Look for the layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
+ // If Export is enabled disable memory management so we can export, otherwise we do a copy
+ if((layer->GetNumOutputSlots() == 1) &&
+ (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
+ (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
+ {
+ layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsExportEnabled);
+ }
+ else
+ {
+ layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
+ }
}
}
}
@@ -409,17 +420,24 @@ void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tens
info.m_OutputTensorInfos.push_back(outputTensorInfo);
MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
- if (CheckFlag(importFlags, MemorySource::Malloc) && m_IsImportEnabled) // Try import the input tensor
+ if (m_IsImportEnabled) // Try import the input tensor
{
- // This assumes a CPU Tensor handle
- void* mem = tensorHandle->Map(false);
- if (outputTensorHandle->Import(mem, MemorySource::Malloc))
+ if(CheckFlag(importFlags, MemorySource::Malloc) )
{
+ // This assumes a CPU Tensor handle
+ void* mem = tensorHandle->Map(false);
+ if (outputTensorHandle->Import(mem, MemorySource::Malloc))
+ {
+ tensorHandle->Unmap();
+ return; // No need for a workload since the import has been done.
+ }
tensorHandle->Unmap();
- return; // No need for a workload since the import has been done.
+ throw MemoryImportException("EnqueueInput: Memory Import failed");
+ }
+ else
+ {
+ throw MemoryImportException("EnqueueInput: Memory Import failed, backend does not support Import");
}
- tensorHandle->Unmap();
- throw MemoryImportException("EnqueueInput: Memory Import failed");
}
else
{
@@ -464,10 +482,10 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* ten
// b) The tensor has zero padding
// c) There is only one connection to the OutputSlot and it is to an OutputLayer.
// d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
- if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input
- && m_IsExportEnabled)
+ // e) m_IsExportEnabled must be set to true
+ if (m_IsExportEnabled && (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
{
- if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1)
+ if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
{
MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
if (CheckFlag(importFlags, MemorySource::Malloc))
@@ -485,19 +503,25 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* ten
auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
BOOST_ASSERT_MSG(syncWorkload, "No sync workload created");
m_OutputQueue.push_back(move(syncWorkload));
-
- return; //No need to add the output workload below
}
else
{
throw MemoryExportException("EnqueueOutput: Memory Export failed");
}
}
+ else
+ {
+ throw MemoryExportException("EnqueueOutput: Memory Export failed, backend does not support Export");
+ }
+ }
+ else
+ {
+ throw MemoryExportException("EnqueueOutput: Memory Export failed, attempting to export Input Layer");
}
}
else
{
- // If we got here then we couldn't import the memory, so add an output workload which performs a memcopy.
+ // If we got here then we didn't export the memory, so add an output workload which performs a memcopy.
outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
info.m_InputTensorInfos.push_back(inputTensorInfo);
diff --git a/src/backends/backendsCommon/test/EndToEndTestImpl.hpp b/src/backends/backendsCommon/test/EndToEndTestImpl.hpp
index 3bdd48bcfa..1577e1323c 100644
--- a/src/backends/backendsCommon/test/EndToEndTestImpl.hpp
+++ b/src/backends/backendsCommon/test/EndToEndTestImpl.hpp
@@ -111,6 +111,21 @@ bool CompareBoolean(T a, T b)
return (a == 0 && b == 0) ||(a != 0 && b != 0);
};
+// Utility function to find the number of instances of a substring within a string.
+int SubStringCounter(std::string& string, std::string&& substring)
+{
+ std::size_t found = 0;
+ int count = 0;
+ // Look for the substring starting from where we last found the substring
+ while((found = string.find(substring, found)) != std::string::npos)
+ {
+ count++;
+ // Offset by substring length to avoid finding the same substring twice
+ found += substring.length();
+ }
+ return count;
+}
+
template<DataType ArmnnIType, DataType ArmnnOType,
typename TInput = ResolveType<ArmnnIType>, typename TOutput = ResolveType<ArmnnOType>>
void EndToEndLayerTestImpl(INetworkPtr network,
@@ -237,7 +252,7 @@ inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
}
-inline void ImportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
+inline void ExportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
{
using namespace armnn;
@@ -296,8 +311,16 @@ inline void ImportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)}
};
- // Do the inference and expect it to fail with a ImportMemoryException
- BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException);
+ // Do the inference and expect it to fail with a ExportMemoryException
+ if (backends[0] == Compute::CpuAcc)
+ {
+ // For CpuAcc the NeonTensorHandle will throw its own exception on misaligned memory
+ BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
+ }
+ else
+ {
+ BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException);
+ }
}
inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
@@ -372,9 +395,365 @@ inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
// Contains SyncMemGeneric
found = dump.find("SyncMemGeneric");
BOOST_TEST(found != std::string::npos);
- // No contains CopyMemGeneric
+ // Does not contain CopyMemGeneric
found = dump.find("CopyMemGeneric");
BOOST_TEST(found == std::string::npos);
}
+inline void ImportOnlyWorkload(std::vector<BackendId> backends)
+{
+ using namespace armnn;
+
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
+
+ // Builds up the structure of the network.
+ INetworkPtr net(INetwork::Create());
+
+ IConnectableLayer* input = net->AddInputLayer(0);
+
+ ActivationDescriptor descriptor;
+ descriptor.m_Function = ActivationFunction::Square;
+ IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
+
+ IConnectableLayer* output = net->AddOutputLayer(0);
+
+ input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
+ pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
+ pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
+
+ // optimize the network
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+
+ BOOST_TEST_CHECKPOINT("Load Network");
+ // Load it into the runtime. It should pass.
+ NetworkId netId;
+ std::string ignoredErrorMessage;
+ INetworkProperties networkProperties(true, false);
+ BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
+ == Status::Success);
+
+ BOOST_TEST_CHECKPOINT("Generate Data");
+ // Creates structures for input & output
+ std::vector<float> inputData
+ {
+ 1.0f, 2.0f, 3.0f, 4.0f
+ };
+
+ std::vector<float> outputData(4);
+
+ std::vector<float> expectedOutput
+ {
+ 1.0f, 4.0f, 9.0f, 16.0f
+ };
+
+ BOOST_TEST_CHECKPOINT("Create Network");
+ InputTensors inputTensors
+ {
+ {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
+ };
+ OutputTensors outputTensors
+ {
+ {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
+ };
+
+ BOOST_TEST_CHECKPOINT("Get Profiler");
+
+ runtime->GetProfiler(netId)->EnableProfiling(true);
+
+ BOOST_TEST_CHECKPOINT("Run Inference");
+ // Do the inference
+ runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+
+ BOOST_TEST_CHECKPOINT("Print Profiler");
+ // Retrieve the Profiler.Print() output to get the workload execution
+ ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
+ std::stringstream ss;
+ profilerManager.GetProfiler()->Print(ss);;
+ std::string dump = ss.str();
+
+ // Check there are no SyncMemGeneric workloads as we didn't export
+ BOOST_TEST_CHECKPOINT("Find SyncMemGeneric");
+ int count = SubStringCounter(dump, "SyncMemGeneric");
+ BOOST_TEST(count == 0);
+
+ // Should only be 1 CopyMemGeneric for the output as we imported
+ BOOST_TEST_CHECKPOINT("Find CopyMemGeneric");
+ count = SubStringCounter(dump, "CopyMemGeneric");
+ BOOST_TEST(count == 1);
+
+ // Check the output is correct
+ BOOST_CHECK_EQUAL_COLLECTIONS(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end());
+}
+
+inline void ExportOnlyWorkload(std::vector<BackendId> backends)
+{
+ using namespace armnn;
+
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
+
+ // Builds up the structure of the network.
+ INetworkPtr net(INetwork::Create());
+
+ IConnectableLayer* input = net->AddInputLayer(0);
+
+ ActivationDescriptor descriptor;
+ descriptor.m_Function = ActivationFunction::Square;
+ IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
+
+ IConnectableLayer* output = net->AddOutputLayer(0);
+
+ input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
+ pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
+ pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
+
+ // optimize the network
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+
+ BOOST_TEST_CHECKPOINT("Load Network");
+ // Load it into the runtime. It should pass.
+ NetworkId netId;
+ std::string ignoredErrorMessage;
+ INetworkProperties networkProperties(false, true);
+ BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
+ == Status::Success);
+
+ BOOST_TEST_CHECKPOINT("Generate Data");
+ // Creates structures for input & output
+ std::vector<float> inputData
+ {
+ 1.0f, 2.0f, 3.0f, 4.0f
+ };
+
+ std::vector<float> outputData(4);
+
+ std::vector<float> expectedOutput
+ {
+ 1.0f, 4.0f, 9.0f, 16.0f
+ };
+
+ BOOST_TEST_CHECKPOINT("Create Network");
+ InputTensors inputTensors
+ {
+ {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
+ };
+ OutputTensors outputTensors
+ {
+ {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
+ };
+
+ BOOST_TEST_CHECKPOINT("Get Profiler");
+
+ runtime->GetProfiler(netId)->EnableProfiling(true);
+
+ BOOST_TEST_CHECKPOINT("Run Inference");
+ // Do the inference
+ runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+
+ BOOST_TEST_CHECKPOINT("Print Profiler");
+ // Retrieve the Profiler.Print() output to get the workload execution
+ ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
+ std::stringstream ss;
+ profilerManager.GetProfiler()->Print(ss);;
+ std::string dump = ss.str();
+
+ // Check there is a SyncMemGeneric workload as we exported
+ BOOST_TEST_CHECKPOINT("Find SyncMemGeneric");
+ int count = SubStringCounter(dump, "SyncMemGeneric");
+ BOOST_TEST(count == 1);
+
+ // Should be 1 CopyMemGeneric for the output as we did not import
+ BOOST_TEST_CHECKPOINT("Find CopyMemGeneric");
+ count = SubStringCounter(dump, "CopyMemGeneric");
+ BOOST_TEST(count == 1);
+
+ // Check the output is correct
+ BOOST_CHECK_EQUAL_COLLECTIONS(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end());
+}
+
+inline void ImportAndExportWorkload(std::vector<BackendId> backends)
+{
+ using namespace armnn;
+
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
+
+ // Builds up the structure of the network.
+ INetworkPtr net(INetwork::Create());
+
+ IConnectableLayer* input = net->AddInputLayer(0);
+
+ ActivationDescriptor descriptor;
+ descriptor.m_Function = ActivationFunction::Square;
+ IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
+
+ IConnectableLayer* output = net->AddOutputLayer(0);
+
+ input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
+ pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
+ pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
+
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+
+ BOOST_TEST_CHECKPOINT("Load Network");
+ // Load it into the runtime. It should pass.
+ NetworkId netId;
+ std::string ignoredErrorMessage;
+ INetworkProperties networkProperties(true, true);
+ BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
+ == Status::Success);
+
+ BOOST_TEST_CHECKPOINT("Generate Data");
+ // Creates structures for input & output
+ std::vector<float> inputData
+ {
+ 1.0f, 2.0f, 3.0f, 4.0f
+ };
+
+ std::vector<float> outputData(4);
+
+ std::vector<float> expectedOutput
+ {
+ 1.0f, 4.0f, 9.0f, 16.0f
+ };
+
+ BOOST_TEST_CHECKPOINT("Create Network");
+ InputTensors inputTensors
+ {
+ {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
+ };
+ OutputTensors outputTensors
+ {
+ {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
+ };
+
+ BOOST_TEST_CHECKPOINT("Get Profiler");
+
+ runtime->GetProfiler(netId)->EnableProfiling(true);
+
+ BOOST_TEST_CHECKPOINT("Run Inference");
+ // Do the inference
+ runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+
+ BOOST_TEST_CHECKPOINT("Print Profiler");
+ // Retrieve the Profiler.Print() output to get the workload execution
+ ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
+ std::stringstream ss;
+ profilerManager.GetProfiler()->Print(ss);;
+ std::string dump = ss.str();
+
+ // Check there is a SyncMemGeneric workload as we exported
+ BOOST_TEST_CHECKPOINT("Find SyncMemGeneric");
+ int count = SubStringCounter(dump, "SyncMemGeneric");
+ BOOST_TEST(count == 1);
+
+ // Shouldn't be any CopyMemGeneric workloads
+ BOOST_TEST_CHECKPOINT("Find CopyMemGeneric");
+ count = SubStringCounter(dump, "CopyMemGeneric");
+ BOOST_TEST(count == 0);
+
+ // Check the output is correct
+ BOOST_CHECK_EQUAL_COLLECTIONS(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end());
+}
+
+inline void ExportOutputWithSeveralOutputSlotConnectionsTest(std::vector<BackendId> backends)
+{
+ using namespace armnn;
+
+ // Create runtime in which test will run
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+ // build up the structure of the network
+ INetworkPtr net(INetwork::Create());
+
+ IConnectableLayer* input = net->AddInputLayer(0);
+
+ ActivationDescriptor descriptor;
+ descriptor.m_Function = ActivationFunction::Square;
+ IConnectableLayer* activation = net->AddActivationLayer(descriptor);
+
+ IConnectableLayer* output0 = net->AddOutputLayer(0);
+ IConnectableLayer* output1 = net->AddOutputLayer(1);
+
+ input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
+ activation->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
+ activation->GetOutputSlot(0).Connect(output1->GetInputSlot(0));
+
+ input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
+ activation->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
+
+ // Optimize the network
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+
+ // Loads it into the runtime.
+ NetworkId netId;
+ std::string ignoredErrorMessage;
+ // Enable Importing
+ INetworkProperties networkProperties(true, true);
+ runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
+
+ // Creates structures for input & output
+ std::vector<float> inputData
+ {
+ 1.0f, 2.0f, 3.0f, 4.0f
+ };
+
+ std::vector<float> outputData0(4);
+ std::vector<float> outputData1(4);
+
+ InputTensors inputTensors
+ {
+ {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
+ };
+ OutputTensors outputTensors
+ {
+ {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData0.data())},
+ {1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), outputData1.data())}
+ };
+
+ // The result of the inference is not important, just the fact that there
+ // should not be CopyMemGeneric workloads.
+ runtime->GetProfiler(netId)->EnableProfiling(true);
+
+ // Do the inference
+ runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+
+ // Retrieve the Profiler.Print() output to get the workload execution
+ ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
+ std::stringstream ss;
+ profilerManager.GetProfiler()->Print(ss);
+ std::string dump = ss.str();
+
+ std::size_t found = std::string::npos;
+
+ if (backends[0] == Compute::CpuRef)
+ {
+ found = dump.find("RefActivationWorkload");
+ }
+ else if (backends[0] == Compute::CpuAcc)
+ {
+ found = dump.find("NeonActivationWorkload");
+ }
+ else if (backends[0] == Compute::GpuAcc)
+ {
+ found = dump.find("ClActivationWorkload");
+ }
+
+ BOOST_TEST(found != std::string::npos);
+ // No contains SyncMemGeneric
+ found = dump.find("SyncMemGeneric");
+ BOOST_TEST(found == std::string::npos);
+ // Contains CopyMemGeneric
+ found = dump.find("CopyMemGeneric");
+ BOOST_TEST(found != std::string::npos);
+}
+
} // anonymous namespace
diff --git a/src/backends/neon/test/NeonEndToEndTests.cpp b/src/backends/neon/test/NeonEndToEndTests.cpp
index a09b95ed0e..3296918117 100644
--- a/src/backends/neon/test/NeonEndToEndTests.cpp
+++ b/src/backends/neon/test/NeonEndToEndTests.cpp
@@ -317,108 +317,34 @@ BOOST_AUTO_TEST_CASE(NeonImportNonAlignedInputPointerTest)
ImportNonAlignedInputPointerTest(defaultBackends);
}
-// Utility function to find the number of instances of a substring within a string.
-int SubStringCounter(std::string& string, std::string&& substring)
+BOOST_AUTO_TEST_CASE(NeonExportNonAlignedOutputPointerTest)
{
- std::size_t found = 0;
- int count = 0;
- // Look for the substring starting from where we last found the substring
- while((found = string.find(substring, found)) != std::string::npos)
- {
- count++;
- // Offset by substring length to avoid finding the same substring twice
- found += substring.length();
- }
- return count;
+ ExportNonAlignedOutputPointerTest(defaultBackends);
}
-BOOST_AUTO_TEST_CASE(NeonImportOnlyWorkload)
+BOOST_AUTO_TEST_CASE(NeonImportAlignedPointerTest)
{
- using namespace armnn;
-
- IRuntime::CreationOptions options;
- IRuntimePtr runtime(IRuntime::Create(options));
-
- // Builds up the structure of the network.
- INetworkPtr net(INetwork::Create());
-
- IConnectableLayer* input = net->AddInputLayer(0);
-
- ActivationDescriptor descriptor;
- descriptor.m_Function = ActivationFunction::Square;
- IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
-
- IConnectableLayer* output = net->AddOutputLayer(0);
+ ImportAlignedPointerTest(defaultBackends);
+}
- input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
- pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+BOOST_AUTO_TEST_CASE(NeonImportOnlyWorkload)
+{
+ ImportOnlyWorkload(defaultBackends);
+}
- input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
- pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
+BOOST_AUTO_TEST_CASE(NeonExportOnlyWorkload)
+{
+ ExportOnlyWorkload(defaultBackends);
+}
- // optimize the network
- std::vector<BackendId> backends = {Compute::CpuAcc};
- IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+BOOST_AUTO_TEST_CASE(NeonImportAndExportWorkload)
+{
+ ImportAndExportWorkload(defaultBackends);
+}
- BOOST_TEST_CHECKPOINT("Load Network");
- // Load it into the runtime. It should pass.
- NetworkId netId;
- std::string ignoredErrorMessage;
- INetworkProperties networkProperties(true, false);
- BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
- == Status::Success);
-
- BOOST_TEST_CHECKPOINT("Generate Data");
- // Creates structures for input & output
- std::vector<float> inputData
- {
- 1.0f, 2.0f, 3.0f, 4.0f
- };
-
- std::vector<float> outputData(4);
-
- std::vector<float> expectedOutput
- {
- 1.0f, 4.0f, 9.0f, 16.0f
- };
-
- BOOST_TEST_CHECKPOINT("Create Network");
- InputTensors inputTensors
- {
- {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
- };
- OutputTensors outputTensors
- {
- {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
- };
-
- BOOST_TEST_CHECKPOINT("Get Profiler");
-
- runtime->GetProfiler(netId)->EnableProfiling(true);
-
- BOOST_TEST_CHECKPOINT("Run Inference");
- // Do the inference
- runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
-
- BOOST_TEST_CHECKPOINT("Print Profiler");
- // Retrieve the Profiler.Print() output to get the workload execution
- ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
- std::stringstream ss;
- profilerManager.GetProfiler()->Print(ss);;
- std::string dump = ss.str();
-
- // Check there are no SyncMemGeneric workloads as we didn't export
- BOOST_TEST_CHECKPOINT("Find SyncMemGeneric");
- int count = SubStringCounter(dump, "SyncMemGeneric");
- BOOST_TEST(count == 0);
-
- // Should only be 1 CopyMemGeneric for the output as we imported
- BOOST_TEST_CHECKPOINT("Find CopyMemGeneric");
- count = SubStringCounter(dump, "CopyMemGeneric");
- BOOST_TEST(count == 1);
-
- // Check the output is correct
- BOOST_CHECK_EQUAL_COLLECTIONS(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end());
+BOOST_AUTO_TEST_CASE(NeonExportOutputWithSeveralOutputSlotConnectionsTest)
+{
+ ExportOutputWithSeveralOutputSlotConnectionsTest(defaultBackends);
}
BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/reference/test/RefEndToEndTests.cpp b/src/backends/reference/test/RefEndToEndTests.cpp
index 52454a2a26..b670015f77 100644
--- a/src/backends/reference/test/RefEndToEndTests.cpp
+++ b/src/backends/reference/test/RefEndToEndTests.cpp
@@ -978,7 +978,7 @@ BOOST_AUTO_TEST_CASE(RefImportNonAlignedPointerTest)
BOOST_AUTO_TEST_CASE(RefExportNonAlignedPointerTest)
{
- ImportNonAlignedOutputPointerTest(defaultBackends);
+ ExportNonAlignedOutputPointerTest(defaultBackends);
}
BOOST_AUTO_TEST_CASE(RefImportAlignedPointerTest)
@@ -986,6 +986,26 @@ BOOST_AUTO_TEST_CASE(RefImportAlignedPointerTest)
ImportAlignedPointerTest(defaultBackends);
}
+BOOST_AUTO_TEST_CASE(RefImportOnlyWorkload)
+{
+ ImportOnlyWorkload(defaultBackends);
+}
+
+BOOST_AUTO_TEST_CASE(RefExportOnlyWorkload)
+{
+ ExportOnlyWorkload(defaultBackends);
+}
+
+BOOST_AUTO_TEST_CASE(RefImportAndExportWorkload)
+{
+ ImportAndExportWorkload(defaultBackends);
+}
+
+BOOST_AUTO_TEST_CASE(RefExportOutputWithSeveralOutputSlotConnectionsTest)
+{
+ ExportOutputWithSeveralOutputSlotConnectionsTest(defaultBackends);
+}
+
#endif
BOOST_AUTO_TEST_SUITE_END()