aboutsummaryrefslogtreecommitdiff
path: root/samples
diff options
context:
space:
mode:
authorJan Eilers <jan.eilers@arm.com>2021-09-02 13:12:11 +0100
committerJan Eilers <jan.eilers@arm.com>2021-09-08 12:14:33 +0100
commite38c418ebc434d6c2a5618388b0bd05963308047 (patch)
tree6516938a5a4e5d99426908442662fb565dcc6a09 /samples
parent6e330413f91e1fa13d22e3f3b0eaf31a04be6dcc (diff)
downloadarmnn-e38c418ebc434d6c2a5618388b0bd05963308047.tar.gz
Add sample app for asynchronous execution
Signed-off-by: Jan Eilers <jan.eilers@arm.com> Change-Id: I6d903c721d71a28bc02e4e98aaa813fb9159b678
Diffstat (limited to 'samples')
-rw-r--r--samples/AsyncExecutionSample.cpp145
-rw-r--r--samples/CMakeLists.txt3
-rw-r--r--samples/examples.dox8
3 files changed, 156 insertions, 0 deletions
diff --git a/samples/AsyncExecutionSample.cpp b/samples/AsyncExecutionSample.cpp
new file mode 100644
index 0000000000..6d2fe243dd
--- /dev/null
+++ b/samples/AsyncExecutionSample.cpp
@@ -0,0 +1,145 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#include <armnn/INetwork.hpp>
+#include <armnn/IRuntime.hpp>
+#include <armnn/Utils.hpp>
+#include <armnn/Descriptors.hpp>
+
+#include <iostream>
+#include <thread>
+
+/// A simple example of using the ArmNN SDK API to run a network multiple times with different inputs in an asynchronous
+/// manner.
+///
+/// Background info: The usual runtime->EnqueueWorkload, which is used to trigger the execution of a network, is not
+/// thread safe. Each workload has memory assigned to it which would be overwritten by each thread.
+/// Before we added support for this you had to load a network multiple times to execute it at the
+/// same time. Every time a network is loaded, it takes up memory on your device. Making the
+/// execution thread safe helps to reduce the memory footprint for concurrent executions significantly.
+/// This example shows you how to execute a model concurrently (multiple threads) while still only
+/// loading it once.
+///
+/// As in most of our simple samples, the network in this example will ask the user for a single input number for each
+/// execution of the network.
+/// The network consists of a single fully connected layer with a single neuron. The neurons weight is set to 1.0f
+/// to produce an output number that is the same as the input.
+int main()
+{
+ using namespace armnn;
+
+ // The first part of this code is very similar to the SimpleSample.cpp you should check it out for comparison
+ // The interesting part starts when the graph is loaded into the runtime
+
+ std::vector<float> inputs;
+ float number1;
+ std::cout << "Please enter a number for the first iteration: " << std::endl;
+ std::cin >> number1;
+ float number2;
+ std::cout << "Please enter a number for the second iteration: " << std::endl;
+ std::cin >> number2;
+
+ // Turn on logging to standard output
+ // This is useful in this sample so that users can learn more about what is going on
+ ConfigureLogging(true, false, LogSeverity::Warning);
+
+ // Construct ArmNN network
+ NetworkId networkIdentifier;
+ INetworkPtr myNetwork = INetwork::Create();
+
+ float weightsData[] = {1.0f}; // Identity
+ TensorInfo weightsInfo(TensorShape({1, 1}), DataType::Float32);
+ weightsInfo.SetConstant();
+ ConstTensor weights(weightsInfo, weightsData);
+
+ // Constant layer that now holds weights data for FullyConnected
+ IConnectableLayer* const constantWeightsLayer = myNetwork->AddConstantLayer(weights, "const weights");
+
+ FullyConnectedDescriptor fullyConnectedDesc;
+ IConnectableLayer* const fullyConnectedLayer = myNetwork->AddFullyConnectedLayer(fullyConnectedDesc,
+ "fully connected");
+ IConnectableLayer* InputLayer = myNetwork->AddInputLayer(0);
+ IConnectableLayer* OutputLayer = myNetwork->AddOutputLayer(0);
+
+ InputLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0));
+ constantWeightsLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(1));
+ fullyConnectedLayer->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0));
+
+ // Create ArmNN runtime
+ IRuntime::CreationOptions options; // default options
+ IRuntimePtr run = IRuntime::Create(options);
+
+ //Set the tensors in the network.
+ TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32);
+ InputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
+
+ TensorInfo outputTensorInfo(TensorShape({1, 1}), DataType::Float32);
+ fullyConnectedLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+ constantWeightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
+
+ // Optimise ArmNN network
+ IOptimizedNetworkPtr optNet = Optimize(*myNetwork, {Compute::CpuRef}, run->GetDeviceSpec());
+ if (!optNet)
+ {
+ // This shouldn't happen for this simple sample, with reference backend.
+ // But in general usage Optimize could fail if the hardware at runtime cannot
+ // support the model that has been provided.
+ std::cerr << "Error: Failed to optimise the input network." << std::endl;
+ return 1;
+ }
+
+ // Load graph into runtime.
+ std::string errmsg; // To hold an eventual error message if loading the network fails
+ // Add network properties to enable async execution. The MemorySource::Undefined variables indicate
+ // that neither inputs nor outputs will be imported. Importing will be covered in another example.
+ armnn::INetworkProperties networkProperties(true, MemorySource::Undefined, MemorySource::Undefined);
+ run->LoadNetwork(networkIdentifier,
+ std::move(optNet),
+ errmsg,
+ networkProperties);
+
+ // Creates structures for inputs and outputs. A vector of float for each execution.
+ std::vector<std::vector<float>> inputData{{number1}, {number2}};
+ std::vector<std::vector<float>> outputData;
+ outputData.resize(2, std::vector<float>(1));
+
+
+ std::vector<InputTensors> inputTensors
+ {
+ {{0, armnn::ConstTensor(run->GetInputTensorInfo(networkIdentifier, 0), inputData[0].data())}},
+ {{0, armnn::ConstTensor(run->GetInputTensorInfo(networkIdentifier, 0), inputData[1].data())}}
+ };
+ std::vector<OutputTensors> outputTensors
+ {
+ {{0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputData[0].data())}},
+ {{0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputData[1].data())}}
+ };
+
+ // Lambda function to execute the network. We use it as thread function.
+ auto execute = [&](unsigned int executionIndex)
+ {
+ auto memHandle = run->CreateWorkingMemHandle(networkIdentifier);
+ run->Execute(*memHandle, inputTensors[executionIndex], outputTensors[executionIndex]);
+ };
+
+ // Prepare some threads and let each execute the network with a different input
+ std::vector<std::thread> threads;
+ for (unsigned int i = 0; i < inputTensors.size(); ++i)
+ {
+ threads.emplace_back(std::thread(execute, i));
+ }
+
+ // Wait for the threads to finish
+ for (std::thread& t : threads)
+ {
+ if(t.joinable())
+ {
+ t.join();
+ }
+ }
+
+ std::cout << "Your numbers were " << outputData[0][0] << " and " << outputData[1][0] << std::endl;
+ return 0;
+
+}
diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt
index 7be6a69369..7af8b7265a 100644
--- a/samples/CMakeLists.txt
+++ b/samples/CMakeLists.txt
@@ -1,6 +1,9 @@
if(BUILD_SAMPLE_APP AND ARMNNREF)
add_executable(SimpleSample SimpleSample.cpp)
target_link_libraries(SimpleSample armnn ${CMAKE_THREAD_LIBS_INIT})
+
+ add_executable(AsyncExecutionSample AsyncExecutionSample.cpp)
+ target_link_libraries(AsyncExecutionSample armnn ${CMAKE_THREAD_LIBS_INIT})
endif()
if(BUILD_SAMPLE_APP AND SAMPLE_DYNAMIC_BACKEND)
diff --git a/samples/examples.dox b/samples/examples.dox
index e0b0ea345e..4a41e30a48 100644
--- a/samples/examples.dox
+++ b/samples/examples.dox
@@ -38,4 +38,12 @@ memory for the inputs, outputs and inter layer memory.
@example CustomMemoryAllocatorSample.cpp
**/
+/**
+Yet another variant of the SimpleSample application. In this little sample app you will be shown how to run a
+network multiple times asynchronously.
+
+@note This is currently an experimental interface
+@example AsyncExecutionSample.cpp
+**/
+
}