ArmNN
 23.02
CustomMemoryAllocatorSample.cpp

This example is basically a copy of the SimpleSample example. But it makes use of a CustomAllocator to allocate memory for the inputs, outputs and inter layer memory.

Note
This is currently an experimental interface
//
// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include <armnn/ArmNN.hpp>
#include <arm_compute/core/CL/CLKernelLibrary.h>
#include <arm_compute/runtime/CL/CLScheduler.h>
#include <iostream>
/** Sample implementation of ICustomAllocator for use with the ClBackend.
* Note: any memory allocated must be host addressable with write access
* in order for ArmNN to be able to properly use it. */
class SampleClBackendCustomAllocator : public armnn::ICustomAllocator
{
public:
SampleClBackendCustomAllocator() = default;
void* allocate(size_t size, size_t alignment) override
{
// If alignment is 0 just use the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE for alignment
if (alignment == 0)
{
alignment = arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
}
size_t space = size + alignment + alignment;
auto allocatedMemPtr = std::malloc(space * sizeof(size_t));
if (std::align(alignment, size, allocatedMemPtr, space) == nullptr)
{
throw armnn::Exception("SampleClBackendCustomAllocator::Alignment failed");
}
return allocatedMemPtr;
}
void free(void* ptr) override
{
std::free(ptr);
}
{
}
};
// A simple example application to show the usage of a custom memory allocator. In this sample, the users single
// input number is multiplied by 1.0f using a fully connected layer with a single neuron to produce an output
// number that is the same as the input. All memory required to execute this mini network is allocated with
// the provided custom allocator.
//
// Using a Custom Allocator is required for use with Protected Mode and Protected Memory.
// This example is provided using only unprotected malloc as Protected Memory is platform
// and implementation specific.
//
// Note: This example is similar to the SimpleSample application that can also be found in armnn/samples.
// The differences are in the use of a custom allocator, the backend is GpuAcc, and the inputs/outputs
// are being imported instead of copied. (Import must be enabled when using a Custom Allocator)
// You might find this useful for comparison.
int main()
{
using namespace armnn;
float number;
std::cout << "Please enter a number: " << std::endl;
std::cin >> number;
// Turn on logging to standard output
// This is useful in this sample so that users can learn more about what is going on
// Construct ArmNN network
NetworkId networkIdentifier;
FullyConnectedDescriptor fullyConnectedDesc;
float weightsData[] = {1.0f}; // Identity
TensorInfo weightsInfo(TensorShape({1, 1}), DataType::Float32, 0.0f, 0, true);
weightsInfo.SetConstant(true);
ConstTensor weights(weightsInfo, weightsData);
IConnectableLayer* inputLayer = network->AddInputLayer(0);
IConnectableLayer* weightsLayer = network->AddConstantLayer(weights, "Weights");
IConnectableLayer* fullyConnectedLayer =
network->AddFullyConnectedLayer(fullyConnectedDesc, "fully connected");
IConnectableLayer* outputLayer = network->AddOutputLayer(0);
inputLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0));
weightsLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(1));
fullyConnectedLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
// Create ArmNN runtime:
//
// This is the interesting bit when executing a model with a custom allocator.
// You can have different allocators for different backends. To support this
// the runtime creation option has a map that takes a BackendId and the corresponding
// allocator that should be used for that backend.
// Only GpuAcc supports a Custom Allocator for now
//
// Note: This is not covered in this example but if you want to run a model on
// protected memory a custom allocator needs to be provided that supports
// protected memory allocations and the MemorySource of that allocator is
// set to MemorySource::DmaBufProtected
auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}};
IRuntimePtr runtime = IRuntime::Create(options);
//Set the tensors in the network.
TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32);
inputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
unsigned int numElements = inputTensorInfo.GetNumElements();
size_t totalBytes = numElements * sizeof(float);
TensorInfo outputTensorInfo(TensorShape({1, 1}), DataType::Float32);
fullyConnectedLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
// Optimise ArmNN network
OptimizerOptions optOptions;
optOptions.m_ImportEnabled = true;
Optimize(*network, {"GpuAcc"}, runtime->GetDeviceSpec(), optOptions);
if (!optNet)
{
// This shouldn't happen for this simple sample, with GpuAcc backend.
// But in general usage Optimize could fail if the backend at runtime cannot
// support the model that has been provided.
std::cerr << "Error: Failed to optimise the input network." << std::endl;
return 1;
}
// Load graph into runtime
std::string ignoredErrorMessage;
runtime->LoadNetwork(networkIdentifier, std::move(optNet), ignoredErrorMessage, networkProperties);
// Creates structures for input & output
const size_t alignment =
arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
// Input with negative values
auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr);
std::fill_n(inputPtr, numElements, number);
void* alignedOutputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
std::fill_n(outputPtr, numElements, -10.0f);
inputTensorInfo = runtime->GetInputTensorInfo(networkIdentifier, 0);
inputTensorInfo.SetConstant(true);
InputTensors inputTensors
{
{0, ConstTensor(inputTensorInfo, alignedInputPtr)},
};
OutputTensors outputTensors
{
{0, Tensor(runtime->GetOutputTensorInfo(networkIdentifier, 0), alignedOutputPtr)}
};
// Execute network
runtime->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
// Tell the CLBackend to sync memory so we can read the output.
arm_compute::CLScheduler::get().sync();
auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
std::cout << "Your number was " << outputResult[0] << std::endl;
runtime->UnloadNetwork(networkIdentifier);
return 0;
}
armnn::ICustomAllocator::allocate
virtual void * allocate(size_t size, size_t alignment)=0
Interface to be implemented by the child class to allocate bytes.
armnn::IOptimizedNetworkPtr
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:253
armnn::Exception
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
armnn::IConnectableLayer
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:68
armnn::FullyConnectedDescriptor
A FullyConnectedDescriptor for the FullyConnectedLayer.
Definition: Descriptors.hpp:475
armnn::MemorySource::Malloc
@ Malloc
ArmNN.hpp
armnn::ConstTensor
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
armnn::ConfigureLogging
void ConfigureLogging(bool printToStandardOutput, bool printToDebugOutput, LogSeverity severity)
Configures the logging behaviour of the ARMNN library.
Definition: Utils.cpp:18
armnn::IRuntime::CreationOptions
Definition: IRuntime.hpp:85
armnn::IConnectableLayer::GetOutputSlot
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
armnn::LogSeverity::Info
@ Info
armnn::IOutputSlot::Connect
virtual int Connect(IInputSlot &destination)=0
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnn::IConnectableLayer::GetInputSlot
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
armnn::INetworkProperties
Definition: IRuntime.hpp:43
armnn::TensorShape
Definition: Tensor.hpp:20
armnn::InputTensors
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
armnn::ICustomAllocator
Custom Allocator interface.
Definition: ICustomAllocator.hpp:16
armnn::DataType::Float32
@ Float32
armnn::MemorySource
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:230
armnn::TensorInfo
Definition: Tensor.hpp:152
armnn::ICustomAllocator::GetMemorySourceType
virtual armnn::MemorySource GetMemorySourceType()=0
Used to specify what type of memory is being allocated by this allocator.
armnn::NetworkId
int NetworkId
Definition: IRuntime.hpp:35
armnn::INetwork::Create
static INetworkPtr Create(const NetworkOptions &networkOptions={})
Definition: Network.cpp:452
armnn::IOutputSlot::SetTensorInfo
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
ICustomAllocator.hpp
armnn::OutputTensors
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
armnn::INetworkPtr
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:252
armnn::IRuntime::CreationOptions::m_CustomAllocatorMap
std::map< BackendId, std::shared_ptr< ICustomAllocator > > m_CustomAllocatorMap
A map to define a custom memory allocator for specific backend Ids.
Definition: IRuntime.hpp:122
armnn::IRuntime::Create
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:52
main
int main(int argc, char *argv[])
Definition: ArmnnConverter.cpp:327
armnn::Tensor
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
armnn::Optimize
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > & > messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1773
armnn::OptimizerOptions
ArmNN performs an optimization on each model/network before it gets loaded for execution.
Definition: INetwork.hpp:137
armnn::IRuntimePtr
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:41
armnn::ICustomAllocator::free
virtual void free(void *ptr)=0
Interface to be implemented by the child class to free the allocated bytes.
armnn::OptimizerOptions::m_ImportEnabled
bool m_ImportEnabled
Enable Import.
Definition: INetwork.hpp:235