plain/22.05/_cl_custom_allocator_tests_8cpp_source.xhtml

 //
 // Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //

 #include <armnn/backends/ICustomAllocator.hpp>
 #include <armnn/Descriptors.hpp>
 #include <armnn/Exceptions.hpp>
 #include <armnn/INetwork.hpp>
 #include <armnn/IRuntime.hpp>
 #include <armnn/Utils.hpp>
 #include <armnn/BackendRegistry.hpp>

 #include <cl/ClBackend.hpp>
 #if defined(ARMCOMPUTENEON_ENABLED)
 #include <neon/NeonBackend.hpp>
 #endif
 #include <doctest/doctest.h>
 #include <armnn/utility/IgnoreUnused.hpp>
 // Contains the OpenCl interfaces for mapping memory in the Gpu Page Tables
 // Requires the OpenCl backend to be included (GpuAcc)
 #include <arm_compute/core/CL/CLKernelLibrary.h>
 #include <CL/cl_ext.h>
 #include <arm_compute/runtime/CL/CLScheduler.h>

 /** Sample implementation of ICustomAllocator for use with the ClBackend.
  *  Note: any memory allocated must be host accessible with write access to allow for weights and biases
  *  to be passed in. Read access is not required.. */
 class SampleClBackendCustomAllocator : public armnn::ICustomAllocator
 {
 public:
     SampleClBackendCustomAllocator() = default;

     void* allocate(size_t size, size_t alignment) override
     {
         // If alignment is 0 just use the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE for alignment
         if (alignment == 0)
         {
             alignment = arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
         }
         size_t space = size + alignment + alignment;
         auto allocatedMemPtr = std::malloc(space * sizeof(size_t));
         if (std::align(alignment, size, allocatedMemPtr, space) == nullptr)
         {
             throw armnn::Exception("SampleClBackendCustomAllocator::Alignment failed");
         }
         return allocatedMemPtr;
     }

     /** Interface to be implemented by the child class to free the allocated tensor */
     void free(void* ptr) override
     {
         std::free(ptr);
     }

     armnn::MemorySource GetMemorySourceType() override
     {
         return armnn::MemorySource::Malloc;
     }
 };

 armnn::INetworkPtr CreateTestNetwork(armnn::TensorInfo& inputTensorInfo)
 {
     using namespace armnn;

     armnn::FullyConnectedDescriptor fullyConnectedDesc;
     float weightsData[] = {1.0f}; // Identity
     TensorInfo weightsInfo(TensorShape({1, 1}), DataType::Float32, 0.0f, 0, true);
     weightsInfo.SetConstant(true);
     armnn::ConstTensor weights(weightsInfo, weightsData);

     armnn::INetworkPtr network = armnn::INetwork::Create();
     armnn::IConnectableLayer* const inputLayer = network->AddInputLayer(0);
     armnn::IConnectableLayer* const weightsLayer = network->AddConstantLayer(weights, "Weights");
     armnn::IConnectableLayer* const fullyConnectedLayer =
         network->AddFullyConnectedLayer(fullyConnectedDesc, "fully connected");
     armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0);

     inputLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0));
     weightsLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(1));
     fullyConnectedLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));

     weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);

     //Set the tensors in the network.

     inputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);

     TensorInfo outputTensorInfo(TensorShape({1, 1}), DataType::Float32);
     fullyConnectedLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);

     return network;
 }

 TEST_SUITE("ClCustomAllocatorTests")
 {

 // This is a copy of the SimpleSample app modified to use a custom
 // allocator for the clbackend. It creates a FullyConnected network with a single layer
 // taking a single number as an input
 TEST_CASE("ClCustomAllocatorTest")
 {
     using namespace armnn;

     float number = 3;

     // Construct ArmNN network
     armnn::NetworkId networkIdentifier;

     TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32);

     INetworkPtr myNetwork = CreateTestNetwork(inputTensorInfo);

     // Create ArmNN runtime
     IRuntime::CreationOptions options; // default options
     auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
     options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}};
     IRuntimePtr run = IRuntime::Create(options);

     // Optimise ArmNN network
     OptimizerOptions optOptions;
     optOptions.m_ImportEnabled = true;
     armnn::IOptimizedNetworkPtr optNet = Optimize(*myNetwork, {"GpuAcc"}, run->GetDeviceSpec(), optOptions);
     CHECK(optNet);

     // Load graph into runtime
     std::string ignoredErrorMessage;
     INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
     run->LoadNetwork(networkIdentifier, std::move(optNet), ignoredErrorMessage, networkProperties);

     // Creates structures for input & output
     unsigned int numElements = inputTensorInfo.GetNumElements();
     size_t totalBytes = numElements * sizeof(float);

     const size_t alignment =
             arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();

     void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);

     // Input with negative values
     auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr);
     std::fill_n(inputPtr, numElements, number);

     void* alignedOutputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
     auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
     std::fill_n(outputPtr, numElements, -10.0f);

     armnn::TensorInfo inputTensorInfo2 = run->GetInputTensorInfo(networkIdentifier, 0);
     inputTensorInfo2.SetConstant(true);
     armnn::InputTensors inputTensors
     {
         {0, armnn::ConstTensor(inputTensorInfo2, alignedInputPtr)},
     };
     armnn::OutputTensors outputTensors
     {
         {0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), alignedOutputPtr)}
     };

     // Execute network
     run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
     run->UnloadNetwork(networkIdentifier);


     // Tell the CLBackend to sync memory so we can read the output.
     arm_compute::CLScheduler::get().sync();
     auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);

     run->UnloadNetwork(networkIdentifier);
     CHECK(outputResult[0] == number);
     auto& backendRegistry = armnn::BackendRegistryInstance();
     backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic());
 }

 // Only run this test if NEON is enabled
 #if defined(ARMCOMPUTENEON_ENABLED)

 TEST_CASE("ClCustomAllocatorCpuAccNegativeTest")
 {
     using namespace armnn;

     // Create ArmNN runtime
     IRuntime::CreationOptions options; // default options
     auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
     options.m_CustomAllocatorMap = {{"CpuAcc", std::move(customAllocator)}};
     IRuntimePtr run = IRuntime::Create(options);
     TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32);
     INetworkPtr myNetwork = CreateTestNetwork(inputTensorInfo);

     // Optimise ArmNN network
     OptimizerOptions optOptions;
     optOptions.m_ImportEnabled = true;
     IOptimizedNetworkPtr optNet(nullptr, nullptr);
     std::vector<std::string> errMessages;

     CHECK_THROWS_AS_MESSAGE(Optimize(*myNetwork, {"CpuAcc"}, run->GetDeviceSpec(), optOptions, errMessages),
                             armnn::InvalidArgumentException,
                             "Expected an exception as GetAvailablePreferredBackends() should be empty in Optimize().");

     auto& backendRegistry = armnn::BackendRegistryInstance();
     backendRegistry.DeregisterAllocator(NeonBackend::GetIdStatic());
 }

 #endif

 TEST_CASE("ClCustomAllocatorGpuAccNullptrTest")
 {
     using namespace armnn;

     // Create ArmNN runtime
     IRuntime::CreationOptions options; // default options
     auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
     options.m_CustomAllocatorMap = {{"GpuAcc", nullptr}};

     CHECK_THROWS_AS_MESSAGE(IRuntimePtr run = IRuntime::Create(options),
                             armnn::Exception,
                             "Expected exception in RuntimeImpl::RuntimeImpl() as allocator was nullptr.");
 }

 } // test suite ClCustomAllocatorTests
armnn::ICustomAllocator
Custom Allocator interface.
Definition: ICustomAllocator.hpp:16

IgnoreUnused.hpp

armnn::IConnectableLayer
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:66

NeonBackend.hpp

Utils.hpp

armnn::ICustomAllocator::free
virtual void free(void *ptr)=0
Interface to be implemented by the child class to free the allocated bytes.

armnn::TensorInfo
Definition: Tensor.hpp:152

armnn::IRuntimePtr
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:33

IRuntime.hpp

armnn::BackendRegistryInstance
BackendRegistry & BackendRegistryInstance()
Definition: BackendRegistry.cpp:15

armnn::InputTensors
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392

armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6

armnn::TensorShape
Definition: Tensor.hpp:20

ClBackend.hpp

TEST_SUITE
TEST_SUITE("ClCustomAllocatorTests")
Definition: ClCustomAllocatorTests.cpp:95

BackendRegistry.hpp

armnn::IOutputSlot::SetTensorInfo
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0

armnn::INetworkProperties
Definition: IRuntime.hpp:35

armnn::Tensor
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319

armnn::Optimize
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1847

armnn::FullyConnectedDescriptor
A FullyConnectedDescriptor for the FullyConnectedLayer.
Definition: Descriptors.hpp:468

armnn::NetworkId
int NetworkId
Definition: IRuntime.hpp:27

armnn::ConstTensor
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327

armnn::OutputTensors
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393

armnn::ICustomAllocator::GetMemorySourceType
virtual armnn::MemorySource GetMemorySourceType()=0
Used to specify what type of memory is being allocated by this allocator.

armnn::IOptimizedNetworkPtr
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:242

armnn::IRuntime::CreationOptions::m_CustomAllocatorMap
std::map< BackendId, std::shared_ptr< ICustomAllocator > > m_CustomAllocatorMap
A map to define a custom memory allocator for specific backend Ids.
Definition: IRuntime.hpp:114

armnn::MemorySource::Malloc

armnn::OptimizerOptions
ArmNN performs an optimization on each model/network before it gets loaded for execution.
Definition: INetwork.hpp:137

armnn::InvalidArgumentException
Definition: Exceptions.hpp:80

INetwork.hpp

armnn::OptimizerOptions::m_ImportEnabled
bool m_ImportEnabled
Definition: INetwork.hpp:230

armnn::IRuntime::CreationOptions
Definition: IRuntime.hpp:77

ICustomAllocator.hpp

armnn::Exception
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46

Descriptors.hpp

armnn::MemorySource
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:230

Exceptions.hpp

armnn::IConnectableLayer::GetInputSlot
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.

armnn::TensorInfo::SetConstant
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:514

armnn::IConnectableLayer::GetOutputSlot
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.

CreateTestNetwork
armnn::INetworkPtr CreateTestNetwork(armnn::TensorInfo &inputTensorInfo)
Definition: ClCustomAllocatorTests.cpp:62

armnn::INetworkPtr
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:241

armnn::IOutputSlot::Connect
virtual int Connect(IInputSlot &destination)=0

armnn::INetwork::Create
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:476

armnn::ICustomAllocator::allocate
virtual void * allocate(size_t size, size_t alignment)=0
Interface to be implemented by the child class to allocate bytes.

armnn::TensorInfo::GetNumElements
unsigned int GetNumElements() const
Definition: Tensor.hpp:196