From 7bfd38a721360183f3392f9ab35db18a0dd7fef8 Mon Sep 17 00:00:00 2001 From: Nikhil Raj Date: Fri, 19 Aug 2022 15:23:36 +0100 Subject: Update Doxygen for 22.08 Release Signed-off-by: Nikhil Raj Change-Id: I4789fe868e0492839be1482e5cee3642ed90d756 --- 22.08/_async_execution_sample_8cpp-example.xhtml | 114 +++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 22.08/_async_execution_sample_8cpp-example.xhtml (limited to '22.08/_async_execution_sample_8cpp-example.xhtml') diff --git a/22.08/_async_execution_sample_8cpp-example.xhtml b/22.08/_async_execution_sample_8cpp-example.xhtml new file mode 100644 index 0000000000..11fa5a7af1 --- /dev/null +++ b/22.08/_async_execution_sample_8cpp-example.xhtml @@ -0,0 +1,114 @@ + + + + + + + + + + + + + +ArmNN: AsyncExecutionSample.cpp + + + + + + + + + + + + + + + + +
+
+ + + + ArmNN + + + +
+
+  22.08 +
+
+
+ + + + + + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
+
AsyncExecutionSample.cpp
+
+
+

Yet another variant of the SimpleSample application. In this little sample app you will be shown how to run a network multiple times asynchronously.

+
Note
This is currently an experimental interface
+
//
// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include <armnn/Utils.hpp>
#include <iostream>
#include <thread>
/// A simple example of using the ArmNN SDK API to run a network multiple times with different inputs in an asynchronous
/// manner.
///
/// Background info: The usual runtime->EnqueueWorkload, which is used to trigger the execution of a network, is not
/// thread safe. Each workload has memory assigned to it which would be overwritten by each thread.
/// Before we added support for this you had to load a network multiple times to execute it at the
/// same time. Every time a network is loaded, it takes up memory on your device. Making the
/// execution thread safe helps to reduce the memory footprint for concurrent executions significantly.
/// This example shows you how to execute a model concurrently (multiple threads) while still only
/// loading it once.
///
/// As in most of our simple samples, the network in this example will ask the user for a single input number for each
/// execution of the network.
/// The network consists of a single fully connected layer with a single neuron. The neurons weight is set to 1.0f
/// to produce an output number that is the same as the input.
int main()
{
using namespace armnn;
// The first part of this code is very similar to the SimpleSample.cpp you should check it out for comparison
// The interesting part starts when the graph is loaded into the runtime
std::vector<float> inputs;
float number1;
std::cout << "Please enter a number for the first iteration: " << std::endl;
std::cin >> number1;
float number2;
std::cout << "Please enter a number for the second iteration: " << std::endl;
std::cin >> number2;
// Turn on logging to standard output
// This is useful in this sample so that users can learn more about what is going on
// Construct ArmNN network
NetworkId networkIdentifier;
float weightsData[] = {1.0f}; // Identity
TensorInfo weightsInfo(TensorShape({1, 1}), DataType::Float32, 0.0f, 0, true);
weightsInfo.SetConstant();
ConstTensor weights(weightsInfo, weightsData);
// Constant layer that now holds weights data for FullyConnected
IConnectableLayer* const constantWeightsLayer = myNetwork->AddConstantLayer(weights, "const weights");
FullyConnectedDescriptor fullyConnectedDesc;
IConnectableLayer* const fullyConnectedLayer = myNetwork->AddFullyConnectedLayer(fullyConnectedDesc,
"fully connected");
IConnectableLayer* InputLayer = myNetwork->AddInputLayer(0);
IConnectableLayer* OutputLayer = myNetwork->AddOutputLayer(0);
InputLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0));
constantWeightsLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(1));
fullyConnectedLayer->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0));
// Create ArmNN runtime
IRuntime::CreationOptions options; // default options
IRuntimePtr run = IRuntime::Create(options);
//Set the tensors in the network.
TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32);
InputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
TensorInfo outputTensorInfo(TensorShape({1, 1}), DataType::Float32);
fullyConnectedLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
constantWeightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
// Optimise ArmNN network
IOptimizedNetworkPtr optNet = Optimize(*myNetwork, {Compute::CpuRef}, run->GetDeviceSpec());
if (!optNet)
{
// This shouldn't happen for this simple sample, with reference backend.
// But in general usage Optimize could fail if the hardware at runtime cannot
// support the model that has been provided.
std::cerr << "Error: Failed to optimise the input network." << std::endl;
return 1;
}
// Load graph into runtime.
std::string errmsg; // To hold an eventual error message if loading the network fails
// Add network properties to enable async execution. The MemorySource::Undefined variables indicate
// that neither inputs nor outputs will be imported. Importing will be covered in another example.
run->LoadNetwork(networkIdentifier,
std::move(optNet),
errmsg,
networkProperties);
// Creates structures for inputs and outputs. A vector of float for each execution.
std::vector<std::vector<float>> inputData{{number1}, {number2}};
std::vector<std::vector<float>> outputData;
outputData.resize(2, std::vector<float>(1));
inputTensorInfo = run->GetInputTensorInfo(networkIdentifier, 0);
inputTensorInfo.SetConstant(true);
std::vector<InputTensors> inputTensors
{
{{0, armnn::ConstTensor(inputTensorInfo, inputData[0].data())}},
{{0, armnn::ConstTensor(inputTensorInfo, inputData[1].data())}}
};
std::vector<OutputTensors> outputTensors
{
{{0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputData[0].data())}},
{{0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputData[1].data())}}
};
// Lambda function to execute the network. We use it as thread function.
auto execute = [&](unsigned int executionIndex)
{
auto memHandle = run->CreateWorkingMemHandle(networkIdentifier);
run->Execute(*memHandle, inputTensors[executionIndex], outputTensors[executionIndex]);
};
// Prepare some threads and let each execute the network with a different input
std::vector<std::thread> threads;
for (unsigned int i = 0; i < inputTensors.size(); ++i)
{
threads.emplace_back(std::thread(execute, i));
}
// Wait for the threads to finish
for (std::thread& t : threads)
{
if(t.joinable())
{
t.join();
}
}
std::cout << "Your numbers were " << outputData[0][0] << " and " << outputData[1][0] << std::endl;
return 0;
}
+
+ + + + -- cgit v1.2.1