aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Monahan <David.Monahan@arm.com>2022-02-03 17:04:59 +0000
committerDavid Monahan <david.monahan@arm.com>2022-02-07 14:17:30 +0000
commit16829713a5d2096c8f0dc989fbb0964a9280777a (patch)
treecba7f551b113cf8b9889ae0bf2d5329d19f64f5b
parent49428b7e8432e3f8c85e99e5aaccfd1b5b2d7842 (diff)
downloadarmnn-16829713a5d2096c8f0dc989fbb0964a9280777a.tar.gz
IVGCVSW-6698 EndToEnd tests on ref to ensure allocated data can be reused
Signed-off-by: David Monahan <David.Monahan@arm.com> Change-Id: I2cda579d18be765fbc2783d9fd80ff8e5372a8a8
-rw-r--r--src/backends/backendsCommon/test/EndToEndTestImpl.hpp368
-rw-r--r--src/backends/reference/RefTensorHandle.cpp3
-rw-r--r--src/backends/reference/test/RefEndToEndTests.cpp10
3 files changed, 370 insertions, 11 deletions
diff --git a/src/backends/backendsCommon/test/EndToEndTestImpl.hpp b/src/backends/backendsCommon/test/EndToEndTestImpl.hpp
index 0fa34aed63..59acb23d95 100644
--- a/src/backends/backendsCommon/test/EndToEndTestImpl.hpp
+++ b/src/backends/backendsCommon/test/EndToEndTestImpl.hpp
@@ -380,7 +380,7 @@ inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
// Retrieve the Profiler.Print() output to get the workload execution
ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
std::stringstream ss;
- profilerManager.GetProfiler()->Print(ss);;
+ profilerManager.GetProfiler()->Print(ss);
std::string dump = ss.str();
// Contains ActivationWorkload
@@ -472,7 +472,7 @@ inline void ImportOnlyWorkload(std::vector<BackendId> backends)
// Retrieve the Profiler.Print() output to get the workload execution
ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
std::stringstream ss;
- profilerManager.GetProfiler()->Print(ss);;
+ profilerManager.GetProfiler()->Print(ss);
std::string dump = ss.str();
// Check there are no SyncMemGeneric workloads as we didn't export
@@ -560,7 +560,7 @@ inline void ExportOnlyWorkload(std::vector<BackendId> backends)
// Retrieve the Profiler.Print() output to get the workload execution
ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
std::stringstream ss;
- profilerManager.GetProfiler()->Print(ss);;
+ profilerManager.GetProfiler()->Print(ss);
std::string dump = ss.str();
// Check there is a SyncMemGeneric workload as we exported
@@ -649,7 +649,7 @@ inline void ImportAndExportWorkload(std::vector<BackendId> backends)
// Retrieve the Profiler.Print() output to get the workload execution
ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
std::stringstream ss;
- profilerManager.GetProfiler()->Print(ss);;
+ profilerManager.GetProfiler()->Print(ss);
std::string dump = ss.str();
// Check there is a SyncMemGeneric workload as we exported
@@ -877,7 +877,7 @@ inline void ForceImportWithAlignedBuffersEndToEndTest(std::vector<BackendId> bac
// Retrieve the Profiler.Print() output to get the workload execution
ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
std::stringstream ss;
- profilerManager.GetProfiler()->Print(ss);;
+ profilerManager.GetProfiler()->Print(ss);
std::string dump = ss.str();
if (backends[0] == Compute::CpuAcc)
@@ -987,7 +987,7 @@ inline void ForceImportWithMisalignedInputBuffersEndToEndTest(std::vector<Backen
// Retrieve the Profiler.Print() output to get the workload execution
ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
std::stringstream ss;
- profilerManager.GetProfiler()->Print(ss);;
+ profilerManager.GetProfiler()->Print(ss);
std::string dump = ss.str();
// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
@@ -1103,7 +1103,7 @@ inline void ForceImportWithMisalignedOutputBuffersEndToEndTest(std::vector<Backe
// Retrieve the Profiler.Print() output to get the workload execution
ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
std::stringstream ss;
- profilerManager.GetProfiler()->Print(ss);;
+ profilerManager.GetProfiler()->Print(ss);
std::string dump = ss.str();
// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
@@ -1221,7 +1221,7 @@ inline void ForceImportWithMisalignedInputAndOutputBuffersEndToEndTest(std::vect
// Retrieve the Profiler.Print() output to get the workload execution
ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
std::stringstream ss;
- profilerManager.GetProfiler()->Print(ss);;
+ profilerManager.GetProfiler()->Print(ss);
std::string dump = ss.str();
// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
@@ -1247,4 +1247,356 @@ inline void ForceImportWithMisalignedInputAndOutputBuffersEndToEndTest(std::vect
std::free(outputMemPtr);
}
+inline void ForceImportRepeatedInferencesEndToEndTest(std::vector<BackendId> backends)
+{
+ /**
+ * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
+ * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
+ * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
+ * In this we create some aligned buffers, import them into a network and validate the output and number of
+ * SynMemGeneric/CopyMemgeneric. Then we try the same network again with misaligned buffers to make sure it falls
+ * back to copying correctly.
+ */
+ using namespace armnn;
+
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
+
+ // Builds up the structure of the network.
+ INetworkPtr net(INetwork::Create());
+ IConnectableLayer* input = net->AddInputLayer(0);
+
+ ActivationDescriptor descriptor;
+ descriptor.m_Function = ActivationFunction::Square;
+ IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
+
+ IConnectableLayer* output = net->AddOutputLayer(0);
+
+ input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
+ activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+ input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
+ activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
+
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+ INFO("Load Network");
+ // Load it into the runtime. It should pass.
+ NetworkId netId;
+ std::string ignoredErrorMessage;
+ INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
+ CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
+ == Status::Success);
+ INFO("Generate Data");
+
+ // Creates structures for input & output
+ std::vector<float> inputData
+ {
+ 1.0f, 2.0f, 3.0f, 4.0f
+ };
+ std::vector<float> outputData(4);
+ std::vector<float> expectedOutput
+ {
+ 1.0f, 4.0f, 9.0f, 16.0f
+ };
+
+ // Check our input and output pointers are actually aligned
+ uintptr_t alignment = GetDataTypeSize(DataType::Float32);
+ CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
+ CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
+
+ INFO("Create Inference");
+ InputTensors inputTensors
+ {
+ {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
+ };
+ OutputTensors outputTensors
+ {
+ {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
+ };
+
+ runtime->GetProfiler(netId)->EnableProfiling(true);
+ std::vector<ImportedInputId> importedInputIds =
+ runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
+ std::vector<ImportedOutputId> importedOutputIds =
+ runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
+ // Do the inference and force the import as the memory is aligned.
+ runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
+
+ // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
+ ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
+ std::stringstream ss;
+ profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
+ std::string dump = ss.str();
+
+ if (backends[0] == Compute::CpuAcc)
+ {
+ // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
+ // reconfigure is implemented
+ int count = SubStringCounter(dump, "SyncMemGeneric");
+ CHECK(count == 0);
+ // Should be 2 CopyMemGeneric workloads
+ count = SubStringCounter(dump, "CopyMemGeneric");
+ CHECK(count >= 1);
+ }
+ else
+ {
+ // Check there is at least 1 SyncMemGeneric workload as we exported
+ int count = SubStringCounter(dump, "SyncMemGeneric");
+ CHECK(count >= 1);
+ // Shouldn't be any CopyMemGeneric workloads
+ count = SubStringCounter(dump, "CopyMemGeneric");
+ CHECK(count == 0);
+ }
+ // Check the output is correct
+ CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
+
+ // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
+ // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
+ auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
+ float* misalignedInputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(inputMemPtr) + 1);
+
+ // Check if our pointer is truly misaligned
+ CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
+ auto inputBuffer = reinterpret_cast<float*>(misalignedInputPtr);
+ for (int i = 0; i < 4; i++)
+ {
+ inputBuffer[i] = 2.0f + static_cast<float>(i);
+ }
+
+ auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
+ float* misalignedOutputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(outputMemPtr) + 1);
+
+ // Check if our pointer is truly misaligned
+ CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
+
+ std::vector<float> expectedMisalignedOutput
+ {
+ 4.0f, 9.0f, 16.0f, 25.0f
+ };
+
+ INFO("Create Second Inference");
+ InputTensors inputTensorsMisaligned
+ {
+ {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
+ };
+ OutputTensors outputTensorsMisaligned
+ {
+ {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
+ };
+ importedInputIds = runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
+ importedOutputIds = runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
+
+ // Do the inference and force the import as the memory is misaligned.
+ runtime->EnqueueWorkload(netId,
+ inputTensorsMisaligned,
+ outputTensorsMisaligned,
+ importedInputIds,
+ importedOutputIds);
+
+ // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
+ // We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
+ profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
+ dump = ss.str();
+
+ // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
+ // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
+ // for imports/copies. Only that the output is correct.
+ if (backends[0] != Compute::GpuAcc)
+ {
+ // The SyncMemGeneric will still be in the profiling log from the first inference
+ int count = SubStringCounter(dump, "SyncMemGeneric");
+ CHECK(count >= 1);
+ // We should now see CopyMemGeneric workloads as we copied all buffers
+ count = SubStringCounter(dump, "CopyMemGeneric");
+ CHECK(count >= 1);
+ }
+ // Check the output is correct
+ unsigned int index = 0;
+ for (auto outputValue : expectedMisalignedOutput)
+ {
+ CHECK(outputValue == reinterpret_cast<float*>(misalignedOutputPtr)[index]);
+ ++index;
+ }
+ // Clean up to avoid interfering with other tests
+ runtime->UnloadNetwork(netId);
+ std::free(inputMemPtr);
+ std::free(outputMemPtr);
+}
+
+
+inline void ForceImportRepeatedInferencesInvertedEndToEndTest(std::vector<BackendId> backends)
+{
+ /**
+ * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
+ * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
+ * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
+ * In this we create some misaligned buffers, copy them into a network and validate the output and number of
+ * SynMemGeneric/CopyMemgeneric. Then we try the same network again with aligned buffers to make sure it switches
+ * to importing correctly.
+ */
+ using namespace armnn;
+
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
+
+ // Builds up the structure of the network.
+ INetworkPtr net(INetwork::Create());
+ IConnectableLayer* input = net->AddInputLayer(0);
+
+ ActivationDescriptor descriptor;
+ descriptor.m_Function = ActivationFunction::Square;
+ IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
+
+ IConnectableLayer* output = net->AddOutputLayer(0);
+
+ input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
+ activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+ input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
+ activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
+
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+ INFO("Load Network");
+ // Load it into the runtime. It should pass.
+ NetworkId netId;
+ std::string ignoredErrorMessage;
+ INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
+ CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
+ == Status::Success);
+ INFO("Generate Data");
+
+ // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
+ // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
+ auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
+ float* misalignedInputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(inputMemPtr) + 1);
+
+ // Check if our pointer is truly misaligned
+ uintptr_t alignment = GetDataTypeSize(DataType::Float32);
+ CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
+ auto inputBuffer = reinterpret_cast<float*>(misalignedInputPtr);
+ for (int i = 0; i < 4; i++)
+ {
+ inputBuffer[i] = 2.0f + static_cast<float>(i);
+ }
+
+ auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
+ float* misalignedOutputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(outputMemPtr) + 1);
+
+ // Check if our pointer is truly misaligned
+ CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
+
+ std::vector<float> expectedMisalignedOutput
+ {
+ 4.0f, 9.0f, 16.0f, 25.0f
+ };
+
+ INFO("Create Second Inference");
+ InputTensors inputTensorsMisaligned
+ {
+ {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
+ };
+ OutputTensors outputTensorsMisaligned
+ {
+ {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
+ };
+ runtime->GetProfiler(netId)->EnableProfiling(true);
+ std::vector<ImportedInputId> importedInputIds =
+ runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
+ std::vector<ImportedOutputId> importedOutputIds =
+ runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
+
+ // Do the inference and force the import as the memory is misaligned.
+ runtime->EnqueueWorkload(netId,
+ inputTensorsMisaligned,
+ outputTensorsMisaligned,
+ importedInputIds,
+ importedOutputIds);
+
+ // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
+ ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
+ std::stringstream ss;
+ profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
+ std::string dump = ss.str();
+
+ // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
+ // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
+ // for imports/copies. Only that the output is correct.
+ if (backends[0] != Compute::GpuAcc)
+ {
+ // We can only copy so there should be no SyncMemGeneric
+ int count = SubStringCounter(dump, "SyncMemGeneric");
+ CHECK(count == 0);
+ // Should only be CopyMemGeneric workloads as we copied all buffers
+ count = SubStringCounter(dump, "CopyMemGeneric");
+ CHECK(count >= 1);
+ }
+ // Check the output is correct
+ unsigned int index = 0;
+ for (auto outputValue : expectedMisalignedOutput)
+ {
+ CHECK(outputValue == reinterpret_cast<float*>(misalignedOutputPtr)[index]);
+ ++index;
+ }
+ std::free(inputMemPtr);
+ std::free(outputMemPtr);
+
+ // Creates structures for input & output
+ std::vector<float> inputData
+ {
+ 1.0f, 2.0f, 3.0f, 4.0f
+ };
+ std::vector<float> outputData(4);
+ std::vector<float> expectedOutput
+ {
+ 1.0f, 4.0f, 9.0f, 16.0f
+ };
+
+ // Check our input and output pointers are actually aligned
+ CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
+ CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
+
+ INFO("Create Inference");
+ InputTensors inputTensors
+ {
+ {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
+ };
+ OutputTensors outputTensors
+ {
+ {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
+ };
+
+ importedInputIds = runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
+ importedOutputIds = runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
+ // Do the inference and force the import as the memory is aligned.
+ runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
+
+ // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
+ // We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
+ profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
+ dump = ss.str();
+
+ if (backends[0] == Compute::CpuAcc)
+ {
+ // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
+ // reconfigure is implemented
+ int count = SubStringCounter(dump, "SyncMemGeneric");
+ CHECK(count == 0);
+ // Should be 2 CopyMemGeneric workloads
+ count = SubStringCounter(dump, "CopyMemGeneric");
+ CHECK(count >= 1);
+ }
+ else
+ {
+ // Repeated inferences make it difficult to check for an accurate count. So we just validate that we have a
+ // SyncMemGeneric Workload when we previously didn't
+ int count = SubStringCounter(dump, "SyncMemGeneric");
+ CHECK(count >= 1);
+ // Should still be some CopyMemGeneric Workloads from the last inference
+ count = SubStringCounter(dump, "CopyMemGeneric");
+ CHECK(count >= 1);
+ }
+ // Check the output is correct
+ CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
+ // Clean up to avoid interfering with other tests
+ runtime->UnloadNetwork(netId);
+}
+
} // anonymous namespace
diff --git a/src/backends/reference/RefTensorHandle.cpp b/src/backends/reference/RefTensorHandle.cpp
index 0be9708cff..a8d6a66ebb 100644
--- a/src/backends/reference/RefTensorHandle.cpp
+++ b/src/backends/reference/RefTensorHandle.cpp
@@ -170,11 +170,8 @@ bool RefTensorHandle::CanBeImported(void *memory, MemorySource source)
{
return false;
}
-
return true;
-
}
-
}
return false;
}
diff --git a/src/backends/reference/test/RefEndToEndTests.cpp b/src/backends/reference/test/RefEndToEndTests.cpp
index 2828b6eff9..e1c2e2f2a7 100644
--- a/src/backends/reference/test/RefEndToEndTests.cpp
+++ b/src/backends/reference/test/RefEndToEndTests.cpp
@@ -1398,6 +1398,16 @@ TEST_CASE("RefForceImportWithMisalignedInputAndOutputBuffersEndToEndTest")
ForceImportWithMisalignedInputAndOutputBuffersEndToEndTest(defaultBackends);
}
+TEST_CASE("RefForceImportRepeatedInferencesEndToEndTest")
+{
+ ForceImportRepeatedInferencesEndToEndTest(defaultBackends);
+}
+
+TEST_CASE("RefForceImportRepeatedInferencesInvertedEndToEndTest")
+{
+ ForceImportRepeatedInferencesInvertedEndToEndTest(defaultBackends);
+}
+
#if !defined(__ANDROID__)
// Only run these tests on non Android platforms
TEST_CASE("RefImportNonAlignedPointerTest")