aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Eilers <jan.eilers@arm.com>2021-07-22 13:17:04 +0100
committerDavid Monahan <david.monahan@arm.com>2021-08-10 16:35:33 +0100
commitc1c872f12797ef6fe52c4589113e7efc353e56eb (patch)
tree911320c5306f9d2273ee76201806bfb12cbe4cd3
parentf487486c843a38fced90229923433d09f99fc2e5 (diff)
downloadarmnn-c1c872f12797ef6fe52c4589113e7efc353e56eb.tar.gz
Adds CustomAllocator interface and Sample App
* Updates the runtime options with a CustomAllocatorMap which allows to define a CustomAllocator for specific backends * Change IBackendInternal interface to use a shared pointer to a custom allocator * Update ClBackend.hpp/cpp to use the CustomAllocator * Adds an example application and unit test which uses a CustomAllocator for GpuAcc * Refactor of the interface to use MemorySource instead of the user Mapping cl_mem directly * Modify the BackendRegistry to also hold a registry of CustomAllocators * BackendRegistry Deregister will also deregister any allocators associated with that backend id * set_global_allocator within the BaseMemoryManager so that it always matches the currently used allocator Signed-off-by: Jan Eilers <jan.eilers@arm.com> Change-Id: I156d819686021865f4375e6cb7a5c5dec8fee9e8 Signed-off-by: David Monahan <david.monahan@arm.com>
-rw-r--r--include/armnn/BackendRegistry.hpp5
-rw-r--r--include/armnn/IRuntime.hpp20
-rw-r--r--include/armnn/backends/IBackendInternal.hpp5
-rw-r--r--include/armnn/backends/ICustomAllocator.hpp18
-rw-r--r--samples/CMakeLists.txt5
-rw-r--r--samples/CustomMemoryAllocatorSample.cpp175
-rw-r--r--src/armnn/BackendRegistry.cpp21
-rw-r--r--src/armnn/Runtime.cpp64
-rw-r--r--src/armnn/test/OptimizerTests.cpp133
-rw-r--r--src/backends/aclCommon/BaseMemoryManager.cpp2
-rw-r--r--src/backends/aclCommon/BaseMemoryManager.hpp8
-rw-r--r--src/backends/backendsCommon/test/CompatibilityTests.cpp4
-rw-r--r--src/backends/cl/ClBackend.cpp62
-rw-r--r--src/backends/cl/ClBackend.hpp128
-rw-r--r--src/backends/cl/ClImportTensorHandle.hpp10
-rw-r--r--src/backends/cl/ClRegistryInitializer.cpp8
-rw-r--r--src/backends/cl/test/CMakeLists.txt1
-rw-r--r--src/backends/cl/test/ClCustomAllocatorTests.cpp160
-rw-r--r--src/backends/cl/test/ClImportTensorHandleTests.cpp2
19 files changed, 658 insertions, 173 deletions
diff --git a/include/armnn/BackendRegistry.hpp b/include/armnn/BackendRegistry.hpp
index fe6451cde0..c13aa9f8b6 100644
--- a/include/armnn/BackendRegistry.hpp
+++ b/include/armnn/BackendRegistry.hpp
@@ -7,6 +7,7 @@
#include <armnn/Types.hpp>
#include <armnn/BackendId.hpp>
#include <armnn/Optional.hpp>
+#include <armnn/backends/ICustomAllocator.hpp>
#include <memory>
#include <unordered_map>
@@ -35,6 +36,8 @@ public:
BackendIdSet GetBackendIds() const;
std::string GetBackendIdsAsString() const;
void SetProfilingService(armnn::Optional<profiling::ProfilingService&> profilingService);
+ void RegisterAllocator(const BackendId& id, std::shared_ptr<ICustomAllocator> alloc);
+ std::unordered_map<BackendId, std::shared_ptr<ICustomAllocator>> GetAllocators();
BackendRegistry() {}
virtual ~BackendRegistry() {}
@@ -50,6 +53,7 @@ public:
};
void Deregister(const BackendId& id);
+ void DeregisterAllocator(const BackendId &id);
protected:
using FactoryStorage = std::unordered_map<BackendId, FactoryFunction>;
@@ -63,6 +67,7 @@ private:
FactoryStorage m_Factories;
armnn::Optional<profiling::ProfilingService&> m_ProfilingService;
+ std::unordered_map<BackendId, std::shared_ptr<ICustomAllocator>> m_CustomMemoryAllocatorMap;
};
BackendRegistry& BackendRegistryInstance();
diff --git a/include/armnn/IRuntime.hpp b/include/armnn/IRuntime.hpp
index 8c269dee49..97a9c2889e 100644
--- a/include/armnn/IRuntime.hpp
+++ b/include/armnn/IRuntime.hpp
@@ -16,6 +16,7 @@
#include <armnn/backends/ICustomAllocator.hpp>
#include <memory>
+#include <map>
namespace armnn
{
@@ -103,8 +104,8 @@ public:
: m_GpuAccTunedParameters(nullptr)
, m_EnableGpuProfiling(false)
, m_DynamicBackendsPath("")
- , m_CustomAllocator(nullptr)
, m_ProtectedMode(false)
+ , m_CustomAllocatorMap()
{}
/// If set, uses the GpuAcc tuned parameters from the given object when executing GPU workloads.
@@ -118,17 +119,22 @@ public:
/// Only a single path is allowed for the override
std::string m_DynamicBackendsPath;
- /// A Custom Allocator used for allocation of working memory in the backends.
- /// Set this for when you need to allocate Protected Working Memory, required for ProtectedMode
- /// Only supported for GpuAcc
- ICustomAllocator* m_CustomAllocator;
-
/// Setting this flag will allow the user to create the Runtime in protected mode.
/// It will run all the inferences on protected memory and will make sure that
/// INetworkProperties::m_ImportEnabled set to true with MemorySource::DmaBufProtected option
- /// This will use Protected Memory Allocator associated with the backend
+ /// This requires that the backend supports Protected Memory and has an allocator capable of
+ /// allocating Protected Memory associated with it.
bool m_ProtectedMode;
+ /// @brief A map to define a custom memory allocator for specific backend Ids.
+ ///
+ /// @details A Custom Allocator is used for allocation of working memory in the backends.
+ /// Set this if you need to take control of how memory is allocated on a backend. Required for
+ /// Protected Mode in order to correctly allocate Protected Memory
+ ///
+ /// @note Only supported for GpuAcc
+ std::map<BackendId, std::shared_ptr<ICustomAllocator>> m_CustomAllocatorMap;
+
struct ExternalProfilingOptions
{
ExternalProfilingOptions()
diff --git a/include/armnn/backends/IBackendInternal.hpp b/include/armnn/backends/IBackendInternal.hpp
index 3b4ef95703..626746465f 100644
--- a/include/armnn/backends/IBackendInternal.hpp
+++ b/include/armnn/backends/IBackendInternal.hpp
@@ -199,10 +199,13 @@ public:
/// Signals the backend to use a custom memory allocator provided by the user
///
+ /// \param allocator - a pointer to the provided ICustomAllocator to use with this backend
/// \param errMsg - Optional string variable to return error messages
/// \return - Returns true if switching to custom allocator was successful
- virtual bool UseCustomMemoryAllocator(armnn::Optional<std::string&> errMsg)
+ virtual bool UseCustomMemoryAllocator(std::shared_ptr<ICustomAllocator> allocator,
+ armnn::Optional<std::string&> errMsg)
{
+ IgnoreUnused(allocator);
if (errMsg)
{
std::stringstream message;
diff --git a/include/armnn/backends/ICustomAllocator.hpp b/include/armnn/backends/ICustomAllocator.hpp
index 1d4df0cb86..92cbcc2641 100644
--- a/include/armnn/backends/ICustomAllocator.hpp
+++ b/include/armnn/backends/ICustomAllocator.hpp
@@ -7,6 +7,7 @@
#include <cstddef>
#include <memory>
+#include <armnn/MemorySources.hpp>
namespace armnn
{
@@ -23,13 +24,20 @@ public:
* @param[in] alignment Alignment that the returned pointer should comply with
*
* @return A pointer to the allocated memory
+ * The returned pointer must be host write accessible
*/
- virtual void *allocate(size_t size, size_t alignment) = 0;
- /** Interface to be implemented by the child class to free the allocated tensor */
- virtual void free(void *ptr) = 0;
+ virtual void* allocate(size_t size, size_t alignment) = 0;
- // Utility Function to define the Custom Memory Allocators capabilities
- virtual bool SupportsProtectedMemory() = 0;
+ /** Interface to be implemented by the child class to free the allocated bytes */
+ virtual void free(void* ptr) = 0;
+
+ // Used to specify what type of memory is being allocated by this allocator.
+ // Supported types are:
+ // MemorySource::Malloc
+ // Unsupported types are:
+ // MemorySource::DmaBuf
+ // MemorySource::DmaBufProtected
+ virtual armnn::MemorySource GetMemorySourceType() = 0;
};
} // namespace armnn \ No newline at end of file
diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt
index ff45eecbe0..7be6a69369 100644
--- a/samples/CMakeLists.txt
+++ b/samples/CMakeLists.txt
@@ -8,3 +8,8 @@ if(BUILD_SAMPLE_APP AND SAMPLE_DYNAMIC_BACKEND)
target_link_libraries(DynamicSample armnn ${CMAKE_THREAD_LIBS_INIT})
endif()
+if(BUILD_SAMPLE_APP AND ARMCOMPUTECL)
+ add_executable(CustomMemoryAllocatorSample CustomMemoryAllocatorSample.cpp)
+ target_link_libraries(CustomMemoryAllocatorSample armnn ${CMAKE_THREAD_LIBS_INIT})
+endif()
+
diff --git a/samples/CustomMemoryAllocatorSample.cpp b/samples/CustomMemoryAllocatorSample.cpp
new file mode 100644
index 0000000000..51b3c81079
--- /dev/null
+++ b/samples/CustomMemoryAllocatorSample.cpp
@@ -0,0 +1,175 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <armnn/ArmNN.hpp>
+#include <armnn/backends/ICustomAllocator.hpp>
+
+#include <arm_compute/core/CL/CLKernelLibrary.h>
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+#include <iostream>
+
+/** Sample implementation of ICustomAllocator for use with the ClBackend.
+ * Note: any memory allocated must be host addressable with write access
+ * in order for ArmNN to be able to properly use it. */
+class SampleClBackendCustomAllocator : public armnn::ICustomAllocator
+{
+public:
+ SampleClBackendCustomAllocator() = default;
+
+ void* allocate(size_t size, size_t alignment)
+ {
+ // If alignment is 0 just use the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE for alignment
+ if (alignment == 0)
+ {
+ alignment = arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
+ }
+ size_t space = size + alignment + alignment;
+ auto allocatedMemPtr = std::malloc(space * sizeof(size_t));
+
+ if (std::align(alignment, size, allocatedMemPtr, space) == nullptr)
+ {
+ throw armnn::Exception("SampleClBackendCustomAllocator::Alignment failed");
+ }
+ return allocatedMemPtr;
+ }
+
+ void free(void* ptr)
+ {
+ std::free(ptr);
+ }
+
+ armnn::MemorySource GetMemorySourceType()
+ {
+ return armnn::MemorySource::Malloc;
+ }
+};
+
+
+// A simple example application to show the usage of a custom memory allocator. In this sample, the users single
+// input number is multiplied by 1.0f using a fully connected layer with a single neuron to produce an output
+// number that is the same as the input. All memory required to execute this mini network is allocated with
+// the provided custom allocator.
+//
+// Using a Custom Allocator is required for use with Protected Mode and Protected Memory.
+// This example is provided using only unprotected malloc as Protected Memory is platform
+// and implementation specific.
+//
+// Note: This example is similar to the SimpleSample application that can also be found in armnn/samples.
+// The differences are in the use of a custom allocator, the backend is GpuAcc, and the inputs/outputs
+// are being imported instead of copied. (Import must be enabled when using a Custom Allocator)
+// You might find this useful for comparison.
+int main()
+{
+ using namespace armnn;
+
+ float number;
+ std::cout << "Please enter a number: " << std::endl;
+ std::cin >> number;
+
+ // Turn on logging to standard output
+ // This is useful in this sample so that users can learn more about what is going on
+ armnn::ConfigureLogging(true, false, LogSeverity::Info);
+
+ // Construct ArmNN network
+ armnn::NetworkId networkIdentifier;
+ INetworkPtr myNetwork = INetwork::Create();
+ armnn::FullyConnectedDescriptor fullyConnectedDesc;
+ float weightsData[] = {1.0f}; // Identity
+ TensorInfo weightsInfo(TensorShape({1, 1}), DataType::Float32);
+ weightsInfo.SetConstant(true);
+ armnn::ConstTensor weights(weightsInfo, weightsData);
+ ARMNN_NO_DEPRECATE_WARN_BEGIN
+ IConnectableLayer *fullyConnected = myNetwork->AddFullyConnectedLayer(fullyConnectedDesc,
+ weights,
+ EmptyOptional(),
+ "fully connected");
+ ARMNN_NO_DEPRECATE_WARN_END
+ IConnectableLayer *InputLayer = myNetwork->AddInputLayer(0);
+ IConnectableLayer *OutputLayer = myNetwork->AddOutputLayer(0);
+ InputLayer->GetOutputSlot(0).Connect(fullyConnected->GetInputSlot(0));
+ fullyConnected->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0));
+
+ // Create ArmNN runtime:
+ //
+ // This is the interesting bit when executing a model with a custom allocator.
+ // You can have different allocators for different backends. To support this
+ // the runtime creation option has a map that takes a BackendId and the corresponding
+ // allocator that should be used for that backend.
+ // Only GpuAcc supports a Custom Allocator for now
+ //
+ // Note: This is not covered in this example but if you want to run a model on
+ // protected memory a custom allocator needs to be provided that supports
+ // protected memory allocations and the MemorySource of that allocator is
+ // set to MemorySource::DmaBufProtected
+ IRuntime::CreationOptions options;
+ auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
+ options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}};
+ IRuntimePtr runtime = IRuntime::Create(options);
+
+ //Set the tensors in the network.
+ TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32);
+ InputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
+
+ unsigned int numElements = inputTensorInfo.GetNumElements();
+ size_t totalBytes = numElements * sizeof(float);
+
+ TensorInfo outputTensorInfo(TensorShape({1, 1}), DataType::Float32);
+ fullyConnected->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+
+ // Optimise ArmNN network
+ OptimizerOptions optOptions;
+ optOptions.m_ImportEnabled = true;
+ armnn::IOptimizedNetworkPtr optNet =
+ Optimize(*myNetwork, {"GpuAcc"}, runtime->GetDeviceSpec(), optOptions);
+ if (!optNet)
+ {
+ // This shouldn't happen for this simple sample, with GpuAcc backend.
+ // But in general usage Optimize could fail if the backend at runtime cannot
+ // support the model that has been provided.
+ std::cerr << "Error: Failed to optimise the input network." << std::endl;
+ return 1;
+ }
+
+ // Load graph into runtime
+ std::string ignoredErrorMessage;
+ INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
+ runtime->LoadNetwork(networkIdentifier, std::move(optNet), ignoredErrorMessage, networkProperties);
+
+ // Creates structures for input & output
+ const size_t alignment =
+ arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
+
+ void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
+
+ // Input with negative values
+ auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr);
+ std::fill_n(inputPtr, numElements, number);
+
+ void* alignedOutputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
+ auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
+ std::fill_n(outputPtr, numElements, -10.0f);
+
+
+ armnn::InputTensors inputTensors
+ {
+ {0, armnn::ConstTensor(runtime->GetInputTensorInfo(networkIdentifier, 0), alignedInputPtr)},
+ };
+ armnn::OutputTensors outputTensors
+ {
+ {0, armnn::Tensor(runtime->GetOutputTensorInfo(networkIdentifier, 0), alignedOutputPtr)}
+ };
+
+ // Execute network
+ runtime->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
+
+ // Tell the CLBackend to sync memory so we can read the output.
+ arm_compute::CLScheduler::get().sync();
+ auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
+ std::cout << "Your number was " << outputResult[0] << std::endl;
+ runtime->UnloadNetwork(networkIdentifier);
+ return 0;
+
+}
diff --git a/src/armnn/BackendRegistry.cpp b/src/armnn/BackendRegistry.cpp
index ff63c8236a..80daed9896 100644
--- a/src/armnn/BackendRegistry.cpp
+++ b/src/armnn/BackendRegistry.cpp
@@ -39,6 +39,7 @@ void BackendRegistry::Register(const BackendId& id, BackendRegistry::FactoryFunc
void BackendRegistry::Deregister(const BackendId& id)
{
m_Factories.erase(id);
+ DeregisterAllocator(id);
if (m_ProfilingService.has_value() && m_ProfilingService.value().IsProfilingEnabled())
{
@@ -106,5 +107,25 @@ void BackendRegistry::SetProfilingService(armnn::Optional<profiling::ProfilingSe
m_ProfilingService = profilingService;
}
+void BackendRegistry::RegisterAllocator(const BackendId& id, std::shared_ptr<ICustomAllocator> alloc)
+{
+ if (m_CustomMemoryAllocatorMap.find(id) != m_CustomMemoryAllocatorMap.end())
+ {
+ throw InvalidArgumentException(
+ std::string(id) + " already has an allocator associated with it",
+ CHECK_LOCATION());
+ }
+ m_CustomMemoryAllocatorMap[id] = alloc;
+}
+
+void BackendRegistry::DeregisterAllocator(const BackendId& id)
+{
+ m_CustomMemoryAllocatorMap.erase(id);
+}
+
+std::unordered_map<BackendId, std::shared_ptr<ICustomAllocator>> BackendRegistry::GetAllocators()
+{
+ return m_CustomMemoryAllocatorMap;
+}
} // namespace armnn
diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp
index c2b748653d..9fe58287c3 100644
--- a/src/armnn/Runtime.cpp
+++ b/src/armnn/Runtime.cpp
@@ -130,7 +130,8 @@ Status RuntimeImpl::LoadNetwork(NetworkId& networkIdOut,
IOptimizedNetworkPtr inNetwork,
std::string& errorMessage)
{
- INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
+ INetworkProperties networkProperties(
+ false, MemorySource::Undefined, MemorySource::Undefined);
return LoadNetwork(networkIdOut, std::move(inNetwork), errorMessage, networkProperties);
}
@@ -267,7 +268,8 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options)
if ( options.m_ProfilingOptions.m_TimelineEnabled && !options.m_ProfilingOptions.m_EnableProfiling )
{
- throw RuntimeException("It is not possible to enable timeline reporting without profiling being enabled");
+ throw RuntimeException(
+ "It is not possible to enable timeline reporting without profiling being enabled");
}
// Load any available/compatible dynamic backend before the runtime
@@ -283,6 +285,8 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options)
auto backend = factoryFun();
ARMNN_ASSERT(backend.get() != nullptr);
+ auto customAllocatorMapIterator = options.m_CustomAllocatorMap.find(id);
+
// If the runtime is created in protected mode only add backends that support this mode
if (options.m_ProtectedMode)
{
@@ -298,17 +302,61 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options)
<< " is not registered as does not support protected content allocation \n";
continue;
}
- std::string err;
- if (!backend->UseCustomMemoryAllocator(err))
+ // The user is responsible to provide a custom memory allocator which allows to allocate
+ // protected memory
+ if (customAllocatorMapIterator != options.m_CustomAllocatorMap.end())
{
- ARMNN_LOG(error) << "The backend "
+ std::string err;
+ if (customAllocatorMapIterator->second->GetMemorySourceType()
+ == armnn::MemorySource::DmaBufProtected)
+ {
+ if (!backend->UseCustomMemoryAllocator(customAllocatorMapIterator->second, err))
+ {
+ ARMNN_LOG(error) << "The backend "
+ << id
+ << " reported an error when entering protected mode. Backend won't be"
+ << " used. ErrorMsg: " << err;
+ continue;
+ }
+ // No errors so register the Custom Allocator with the BackendRegistry
+ BackendRegistryInstance().RegisterAllocator(id, customAllocatorMapIterator->second);
+ }
+ else
+ {
+ ARMNN_LOG(error) << "The CustomAllocator provided with the runtime options doesn't support "
+ "protected memory. Protected mode can't be activated. The backend "
<< id
- << " reported an error when entering protected mode. Backend won't be used."
- << " ErrorMsg: " << err;
+ << " is not going to be used. MemorySource must be MemorySource::DmaBufProtected";
+ continue;
+ }
+ }
+ else
+ {
+ ARMNN_LOG(error) << "Protected mode can't be activated for backend: "
+ << id
+ << " no custom allocator was provided to the runtime options.";
continue;
}
}
-
+ else
+ {
+ // If a custom memory allocator is provided make the backend use that instead of the default
+ if (customAllocatorMapIterator != options.m_CustomAllocatorMap.end())
+ {
+ std::string err;
+ if (!backend->UseCustomMemoryAllocator(customAllocatorMapIterator->second, err))
+ {
+ ARMNN_LOG(error) << "The backend "
+ << id
+ << " reported an error when trying to use the provided custom allocator."
+ " Backend won't be used."
+ << " ErrorMsg: " << err;
+ continue;
+ }
+ // No errors so register the Custom Allocator with the BackendRegistry
+ BackendRegistryInstance().RegisterAllocator(id, customAllocatorMapIterator->second);
+ }
+ }
auto context = backend->CreateBackendContext(options);
// backends are allowed to return nullptrs if they
diff --git a/src/armnn/test/OptimizerTests.cpp b/src/armnn/test/OptimizerTests.cpp
index 19bd58193a..38aef671d2 100644
--- a/src/armnn/test/OptimizerTests.cpp
+++ b/src/armnn/test/OptimizerTests.cpp
@@ -220,9 +220,10 @@ public:
return m_BackendCapabilities;
};
- virtual bool UseCustomMemoryAllocator(armnn::Optional<std::string&> errMsg) override
+ virtual bool UseCustomMemoryAllocator(std::shared_ptr<ICustomAllocator> allocator,
+ armnn::Optional<std::string&> errMsg) override
{
- IgnoreUnused(errMsg);
+ IgnoreUnused(errMsg, allocator);
m_CustomAllocator = true;
return m_CustomAllocator;
}
@@ -925,131 +926,3 @@ TEST_CASE("OptimizeForExclusiveConnectionsWithoutFuseTest")
&IsLayerOfType<armnn::OutputLayer>));
}
} // Optimizer TestSuite
-
-TEST_SUITE("Runtime")
-{
-// This test really belongs into RuntimeTests.cpp but it requires all sort of MockBackends which are
-// already defined here
-TEST_CASE("RuntimeProtectedModeOption")
-{
- using namespace armnn;
-
- struct MockPolicy
- {
- static const BackendId& GetIdStatic()
- {
- static BackendId id = "MockBackend";
- return id;
- }
- };
-
- struct ProtectedPolicy
- {
- static const BackendId& GetIdStatic()
- {
- static BackendId id = "MockBackendProtectedContent";
- return id;
- }
- };
-
- struct SillyPolicy
- {
- static const BackendId& GetIdStatic()
- {
- static BackendId id = "SillyMockBackend";
- return id;
- }
- };
-
- BackendCapabilities mockBackendCapabilities("MockBackend",
- {
- {"ProtectedContentAllocation", false}
- });
- BackendCapabilities mockProtectedBackendCapabilities("MockBackendProtectedContent",
- {
- {"ProtectedContentAllocation", true}
- });
-
- auto& backendRegistry = BackendRegistryInstance();
-
- // clean up from previous test runs
- std::vector<BackendId> mockBackends = {"MockBackend", "MockBackendProtectedContent", "SillyMockBackend"};
- for (auto& backend : mockBackends)
- {
- backendRegistry.Deregister(backend);
- }
-
- // Create a bunch of MockBackends with different capabilities
- // 1. Doesn't support protected mode even though it knows about this capability
- backendRegistry.Register("MockBackend", [mockBackendCapabilities]()
- {
- return std::make_unique<MockBackend<MockPolicy>>(mockBackendCapabilities);
- });
- // 2. Supports protected mode and has it implemented correctly
- backendRegistry.Register("MockBackendProtectedContent", [mockProtectedBackendCapabilities]()
- {
- return std::make_unique<MockBackend<ProtectedPolicy>>(mockProtectedBackendCapabilities);
- });
- // 3. Claims to support protected mode but doesn't have the UseCustomMemoryAllocator function implemented
- backendRegistry.Register("SillyMockBackend", [mockProtectedBackendCapabilities]()
- {
- return std::make_unique<NoProtectedModeMockBackend<SillyPolicy>>(mockProtectedBackendCapabilities);
- });
-
- // Creates a runtime that is not in protected mode
- {
- IRuntime::CreationOptions creationOptions;
- creationOptions.m_ProtectedMode = false;
-
- IRuntimePtr run = IRuntime::Create(creationOptions);
-
- const armnn::BackendIdSet supportedDevices = run->GetDeviceSpec().GetSupportedBackends();
- // Both MockBackends that are registered should show up in the runtimes supported backends list
- for (auto& backend : mockBackends)
- {
- CHECK(std::find(supportedDevices.cbegin(), supportedDevices.cend(), backend) != supportedDevices.cend());
- }
- }
-
- // If the runtime is in protected mode only backends that support protected content should be added
- {
- IRuntime::CreationOptions creationOptions;
- creationOptions.m_ProtectedMode = true;
-
- IRuntimePtr run = IRuntime::Create(creationOptions);
-
- const armnn::BackendIdSet supportedDevices = run->GetDeviceSpec().GetSupportedBackends();
- // Only the MockBackends that claims support for protected content should show up in the
- // runtimes supported backends list
- CHECK(std::find(supportedDevices.cbegin(),
- supportedDevices.cend(),
- "MockBackendProtectedContent") != supportedDevices.cend());
- CHECK(std::find(supportedDevices.cbegin(),
- supportedDevices.cend(),
- "MockBackend") == supportedDevices.cend());
- CHECK(std::find(supportedDevices.cbegin(),
- supportedDevices.cend(),
- "SillyMockBackend") == supportedDevices.cend());
- }
-
- // If the runtime is in protected mode only backends that support protected content should be added
- {
- IRuntime::CreationOptions creationOptions;
- creationOptions.m_ProtectedMode = true;
-
- IRuntimePtr run = IRuntime::Create(creationOptions);
-
- const armnn::BackendIdSet supportedDevices = run->GetDeviceSpec().GetSupportedBackends();
- // Only the MockBackend that claims support for protected content should show up in the
- // runtimes supported backends list
- CHECK(std::find(supportedDevices.cbegin(),
- supportedDevices.cend(),
- "MockBackendProtectedContent") != supportedDevices.cend());
-
- CHECK(std::find(supportedDevices.cbegin(),
- supportedDevices.cend(),
- "MockBackend") == supportedDevices.cend());
- }
-
-}
-}
diff --git a/src/backends/aclCommon/BaseMemoryManager.cpp b/src/backends/aclCommon/BaseMemoryManager.cpp
index 45e0480a84..c60a4a04ae 100644
--- a/src/backends/aclCommon/BaseMemoryManager.cpp
+++ b/src/backends/aclCommon/BaseMemoryManager.cpp
@@ -15,7 +15,7 @@ namespace armnn
{
#if defined(ARMCOMPUTENEON_ENABLED) || defined(ARMCOMPUTECL_ENABLED)
-BaseMemoryManager::BaseMemoryManager(std::unique_ptr<arm_compute::IAllocator> alloc,
+BaseMemoryManager::BaseMemoryManager(std::shared_ptr<arm_compute::IAllocator> alloc,
MemoryAffinity memoryAffinity)
{
ARMNN_ASSERT(alloc);
diff --git a/src/backends/aclCommon/BaseMemoryManager.hpp b/src/backends/aclCommon/BaseMemoryManager.hpp
index e80abf0edd..e3ffd188a1 100644
--- a/src/backends/aclCommon/BaseMemoryManager.hpp
+++ b/src/backends/aclCommon/BaseMemoryManager.hpp
@@ -15,6 +15,7 @@
#include <arm_compute/runtime/IAllocator.h>
#include <arm_compute/runtime/IMemoryGroup.h>
#include <arm_compute/runtime/MemoryManagerOnDemand.h>
+#include <arm_compute/runtime/CL/CLTensorAllocator.h>
#endif
namespace armnn
@@ -36,14 +37,14 @@ public:
void Release() override;
#if defined(ARMCOMPUTENEON_ENABLED) || defined(ARMCOMPUTECL_ENABLED)
- BaseMemoryManager(std::unique_ptr<arm_compute::IAllocator> alloc, MemoryAffinity memoryAffinity);
+ BaseMemoryManager(std::shared_ptr<arm_compute::IAllocator> alloc, MemoryAffinity memoryAffinity);
std::shared_ptr<arm_compute::MemoryManagerOnDemand>& GetIntraLayerManager() { return m_IntraLayerMemoryMgr; }
std::shared_ptr<arm_compute::MemoryManagerOnDemand>& GetInterLayerManager() { return m_InterLayerMemoryMgr; }
std::shared_ptr<arm_compute::IMemoryGroup>& GetInterLayerMemoryGroup() { return m_InterLayerMemoryGroup; }
protected:
- std::unique_ptr<arm_compute::IAllocator> m_Allocator;
+ std::shared_ptr<arm_compute::IAllocator> m_Allocator;
std::shared_ptr<arm_compute::MemoryManagerOnDemand> m_IntraLayerMemoryMgr;
std::shared_ptr<arm_compute::MemoryManagerOnDemand> m_InterLayerMemoryMgr;
std::shared_ptr<arm_compute::IMemoryGroup> m_InterLayerMemoryGroup;
@@ -81,9 +82,10 @@ public:
ClMemoryManager() {}
virtual ~ClMemoryManager() {}
- ClMemoryManager(std::unique_ptr<arm_compute::IAllocator> alloc)
+ ClMemoryManager(std::shared_ptr<arm_compute::IAllocator> alloc)
: BaseMemoryManager(std::move(alloc), MemoryAffinity::Buffer)
{
+ arm_compute::CLTensorAllocator::set_global_allocator(alloc.get());
m_InterLayerMemoryGroup = CreateMemoryGroup(m_InterLayerMemoryMgr);
}
diff --git a/src/backends/backendsCommon/test/CompatibilityTests.cpp b/src/backends/backendsCommon/test/CompatibilityTests.cpp
index 12cb5e9956..34baad9d0c 100644
--- a/src/backends/backendsCommon/test/CompatibilityTests.cpp
+++ b/src/backends/backendsCommon/test/CompatibilityTests.cpp
@@ -3,8 +3,12 @@
// SPDX-License-Identifier: MIT
//
+#if defined(ARMCOMPUTECL_ENABLED)
#include <cl/ClBackend.hpp>
+#endif
+#if defined(ARMCOMPUTENEON_ENABLED)
#include <neon/NeonBackend.hpp>
+#endif
#include <reference/RefBackend.hpp>
#include <armnn/BackendHelper.hpp>
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp
index f1e52c1998..b85232e75c 100644
--- a/src/backends/cl/ClBackend.cpp
+++ b/src/backends/cl/ClBackend.cpp
@@ -49,6 +49,10 @@ const BackendId& ClBackend::GetIdStatic()
IBackendInternal::IMemoryManagerUniquePtr ClBackend::CreateMemoryManager() const
{
+ if (m_UsingCustomAllocator)
+ {
+ return std::make_unique<ClMemoryManager>(m_CustomAllocator);
+ }
return std::make_unique<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
}
@@ -69,7 +73,15 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
TensorHandleFactoryRegistry& registry) const
{
- auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+ std::shared_ptr<ClMemoryManager> memoryManager;
+ if (m_UsingCustomAllocator)
+ {
+ memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
+ }
+ else
+ {
+ memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+ }
registry.RegisterMemoryManager(memoryManager);
registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
@@ -83,7 +95,15 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
TensorHandleFactoryRegistry& registry, const ModelOptions& modelOptions) const
{
- auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+ std::shared_ptr<ClMemoryManager> memoryManager;
+ if (m_UsingCustomAllocator)
+ {
+ memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
+ }
+ else
+ {
+ memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+ }
registry.RegisterMemoryManager(memoryManager);
registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
@@ -100,7 +120,15 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
MemorySourceFlags inputFlags,
MemorySourceFlags outputFlags) const
{
- auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+ std::shared_ptr<ClMemoryManager> memoryManager;
+ if (m_UsingCustomAllocator)
+ {
+ memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
+ }
+ else
+ {
+ memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+ }
registry.RegisterMemoryManager(memoryManager);
registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
@@ -118,10 +146,18 @@ std::vector<ITensorHandleFactory::FactoryId> ClBackend::GetHandleFactoryPreferen
void ClBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry)
{
- auto mgr = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+ std::shared_ptr<ClMemoryManager> memoryManager;
+ if (m_UsingCustomAllocator)
+ {
+ memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
+ }
+ else
+ {
+ memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+ }
- registry.RegisterMemoryManager(mgr);
- registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(mgr));
+ registry.RegisterMemoryManager(memoryManager);
+ registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
registry.RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(
static_cast<MemorySourceFlags>(MemorySource::Malloc), static_cast<MemorySourceFlags>(MemorySource::Malloc)));
}
@@ -130,10 +166,18 @@ void ClBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& regis
MemorySourceFlags inputFlags,
MemorySourceFlags outputFlags)
{
- auto mgr = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+ std::shared_ptr<ClMemoryManager> memoryManager;
+ if (m_UsingCustomAllocator)
+ {
+ memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
+ }
+ else
+ {
+ memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+ }
- registry.RegisterMemoryManager(mgr);
- registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(mgr));
+ registry.RegisterMemoryManager(memoryManager);
+ registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
registry.RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(inputFlags, outputFlags));
}
diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp
index c742c0b204..c63bd25c56 100644
--- a/src/backends/cl/ClBackend.hpp
+++ b/src/backends/cl/ClBackend.hpp
@@ -6,6 +6,15 @@
#include <armnn/backends/IBackendInternal.hpp>
+#include <arm_compute/core/Types.h>
+#include <arm_compute/runtime/CL/CLBufferAllocator.h>
+
+#include <aclCommon/BaseMemoryManager.hpp>
+#include <arm_compute/runtime/CL/CLMemoryRegion.h>
+
+#include <arm_compute/core/CL/CLKernelLibrary.h>
+#include <CL/cl_ext.h>
+
namespace armnn
{
@@ -20,7 +29,12 @@ const BackendCapabilities gpuAccCapabilities("GpuAcc",
class ClBackend : public IBackendInternal
{
public:
- ClBackend() : m_EnableCustomAllocator(false) {};
+ ClBackend() : m_CustomAllocator(nullptr) {};
+ ClBackend(std::shared_ptr<ICustomAllocator> allocator)
+ {
+ std::string err;
+ UseCustomMemoryAllocator(allocator, err);
+ }
~ClBackend() = default;
static const BackendId& GetIdStatic();
@@ -72,17 +86,119 @@ public:
return gpuAccCapabilities;
};
- virtual bool UseCustomMemoryAllocator(armnn::Optional<std::string&> errMsg) override
+ virtual bool UseCustomMemoryAllocator(std::shared_ptr<ICustomAllocator> allocator,
+ armnn::Optional<std::string&> errMsg) override
{
IgnoreUnused(errMsg);
+ ARMNN_LOG(info) << "Using Custom Allocator for ClBackend";
// Set flag to signal the backend to use a custom memory allocator
- m_EnableCustomAllocator = true;
-
- return m_EnableCustomAllocator;
+ m_CustomAllocator = std::make_shared<ClBackendCustomAllocatorWrapper>(std::move(allocator));
+ m_UsingCustomAllocator = true;
+ return m_UsingCustomAllocator;
}
- bool m_EnableCustomAllocator;
+ // Cl requires a arm_compute::IAllocator we wrap the Arm NN ICustomAllocator to achieve this
+ class ClBackendCustomAllocatorWrapper : public arm_compute::IAllocator
+ {
+ public:
+ ClBackendCustomAllocatorWrapper(std::shared_ptr<ICustomAllocator> alloc) : m_CustomAllocator(alloc)
+ {}
+ // Inherited methods overridden:
+ void* allocate(size_t size, size_t alignment) override
+ {
+ auto alloc = m_CustomAllocator->allocate(size, alignment);
+ return MapAllocatedMemory(alloc, size, m_CustomAllocator->GetMemorySourceType());
+ }
+ void free(void* ptr) override
+ {
+ auto hostMemPtr = m_AllocatedBufferMappings[ptr];
+ clReleaseMemObject(static_cast<cl_mem>(ptr));
+ m_CustomAllocator->free(hostMemPtr);
+ }
+ std::unique_ptr<arm_compute::IMemoryRegion> make_region(size_t size, size_t alignment) override
+ {
+ auto hostMemPtr = m_CustomAllocator->allocate(size, alignment);
+ cl_mem buffer = MapAllocatedMemory(hostMemPtr, size, m_CustomAllocator->GetMemorySourceType());
+
+ return std::make_unique<ClBackendCustomAllocatorMemoryRegion>(cl::Buffer(buffer), hostMemPtr);
+ }
+ private:
+ cl_mem MapAllocatedMemory(void* memory, size_t size, MemorySource source)
+ {
+ // Round the size of the buffer to a multiple of the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
+ auto cachelineAlignment =
+ arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
+ auto roundedSize = cachelineAlignment + size - (size % cachelineAlignment);
+
+ if (source == MemorySource::Malloc)
+ {
+ const cl_import_properties_arm importProperties[] =
+ {
+ CL_IMPORT_TYPE_ARM,
+ CL_IMPORT_TYPE_HOST_ARM,
+ 0
+ };
+ cl_int error = CL_SUCCESS;
+ cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
+ CL_MEM_READ_WRITE,
+ importProperties,
+ memory,
+ roundedSize,
+ &error);
+ if (error == CL_SUCCESS)
+ {
+ m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory));
+ return buffer;
+ }
+ throw armnn::Exception(
+ "Mapping allocated memory from CustomMemoryAllocator failed, errcode: " + std::to_string(error));
+ }
+ throw armnn::Exception(
+ "Attempting to allocate memory with unsupported MemorySource type in CustomAllocator");
+ }
+ std::shared_ptr<ICustomAllocator> m_CustomAllocator;
+ std::map<void*, void*> m_AllocatedBufferMappings;
+ };
+
+ class ClBackendCustomAllocatorMemoryRegion : public arm_compute::ICLMemoryRegion
+ {
+ public:
+ // We need to have a new version of ICLMemoryRegion which holds a hostMemPtr to allow for cpu copy access
+ ClBackendCustomAllocatorMemoryRegion(const cl::Buffer &buffer, void* hostMemPtr)
+ : ICLMemoryRegion(buffer.getInfo<CL_MEM_SIZE>())
+ {
+ _mem = buffer;
+ m_HostMemPtr = hostMemPtr;
+ }
+
+ // Inherited methods overridden :
+ void* ptr() override
+ {
+ return nullptr;
+ }
+
+ void* map(cl::CommandQueue &q, bool blocking) override
+ {
+ armnn::IgnoreUnused(q, blocking);
+ if (m_HostMemPtr == nullptr)
+ {
+ throw armnn::Exception("ClBackend: Attempting to map memory with an invalid host ptr");
+ }
+ _mapping = m_HostMemPtr;
+ return _mapping;
+ }
+
+ void unmap(cl::CommandQueue &q) override
+ {
+ armnn::IgnoreUnused(q);
+ _mapping = nullptr;
+ }
+ void* m_HostMemPtr = nullptr;
+ };
+
+ std::shared_ptr<ClBackendCustomAllocatorWrapper> m_CustomAllocator;
+ bool m_UsingCustomAllocator = false;
};
} // namespace armnn
diff --git a/src/backends/cl/ClImportTensorHandle.hpp b/src/backends/cl/ClImportTensorHandle.hpp
index 3fca7cb127..69cd4a6d81 100644
--- a/src/backends/cl/ClImportTensorHandle.hpp
+++ b/src/backends/cl/ClImportTensorHandle.hpp
@@ -140,10 +140,16 @@ public:
private:
bool ClImport(const cl_import_properties_arm* importProperties, void* memory)
{
- const size_t totalBytes = m_Tensor.info()->total_size();
+ size_t totalBytes = m_Tensor.info()->total_size();
+
+ // Round the size of the buffer to a multiple of the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
+ auto cachelineAlignment =
+ arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
+ auto roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
+
cl_int error = CL_SUCCESS;
cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
- CL_MEM_READ_WRITE, importProperties, memory, totalBytes, &error);
+ CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
if (error != CL_SUCCESS)
{
throw MemoryImportException("ClImportTensorHandle::Invalid imported memory" + std::to_string(error));
diff --git a/src/backends/cl/ClRegistryInitializer.cpp b/src/backends/cl/ClRegistryInitializer.cpp
index 8decd6f689..aadc14bd68 100644
--- a/src/backends/cl/ClRegistryInitializer.cpp
+++ b/src/backends/cl/ClRegistryInitializer.cpp
@@ -18,6 +18,14 @@ static BackendRegistry::StaticRegistryInitializer g_RegisterHelper
ClBackend::GetIdStatic(),
[]()
{
+ // Check if we have a CustomMemoryAllocator associated with the backend
+ // and if so register it with the backend.
+ auto customAllocators = BackendRegistryInstance().GetAllocators();
+ auto allocatorIterator = customAllocators.find(ClBackend::GetIdStatic());
+ if (allocatorIterator != customAllocators.end())
+ {
+ return IBackendInternalUniquePtr(new ClBackend(allocatorIterator->second));
+ }
return IBackendInternalUniquePtr(new ClBackend);
}
};
diff --git a/src/backends/cl/test/CMakeLists.txt b/src/backends/cl/test/CMakeLists.txt
index 6662a1e659..41cbe24c15 100644
--- a/src/backends/cl/test/CMakeLists.txt
+++ b/src/backends/cl/test/CMakeLists.txt
@@ -6,6 +6,7 @@
list(APPEND armnnClBackendUnitTests_sources
ClContextControlFixture.hpp
ClContextSerializerTests.cpp
+ ClCustomAllocatorTests.cpp
ClCreateWorkloadTests.cpp
ClEndToEndTests.cpp
ClImportTensorHandleFactoryTests.cpp
diff --git a/src/backends/cl/test/ClCustomAllocatorTests.cpp b/src/backends/cl/test/ClCustomAllocatorTests.cpp
new file mode 100644
index 0000000000..4d1a0e1cfb
--- /dev/null
+++ b/src/backends/cl/test/ClCustomAllocatorTests.cpp
@@ -0,0 +1,160 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <armnn/backends/ICustomAllocator.hpp>
+#include <armnn/Descriptors.hpp>
+#include <armnn/Exceptions.hpp>
+#include <armnn/INetwork.hpp>
+#include <armnn/IRuntime.hpp>
+#include <armnn/Utils.hpp>
+#include <armnn/BackendRegistry.hpp>
+#include <cl/ClBackend.hpp>
+
+#include <doctest/doctest.h>
+
+// Contains the OpenCl interfaces for mapping memory in the Gpu Page Tables
+// Requires the OpenCl backend to be included (GpuAcc)
+#include <arm_compute/core/CL/CLKernelLibrary.h>
+#include <CL/cl_ext.h>
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+
+/** Sample implementation of ICustomAllocator for use with the ClBackend.
+ * Note: any memory allocated must be host accessible with write access to allow for weights and biases
+ * to be passed in. Read access is not required.. */
+class SampleClBackendCustomAllocator : public armnn::ICustomAllocator
+{
+public:
+ SampleClBackendCustomAllocator() = default;
+
+ void* allocate(size_t size, size_t alignment)
+ {
+ // If alignment is 0 just use the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE for alignment
+ if (alignment == 0)
+ {
+ alignment = arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
+ }
+ size_t space = size + alignment + alignment;
+ auto allocatedMemPtr = std::malloc(space * sizeof(size_t));
+
+ if (std::align(alignment, size, allocatedMemPtr, space) == nullptr)
+ {
+ throw armnn::Exception("SampleClBackendCustomAllocator::Alignment failed");
+ }
+ return allocatedMemPtr;
+ }
+
+ /** Interface to be implemented by the child class to free the allocated tensor */
+ void free(void* ptr)
+ {
+ std::free(ptr);
+ }
+
+ armnn::MemorySource GetMemorySourceType()
+ {
+ return armnn::MemorySource::Malloc;
+ }
+};
+
+TEST_SUITE("ClCustomAllocatorTests")
+{
+
+// This is a copy of the SimpleSample app modified to use a custom
+// allocator for the clbackend. It creates a FullyConnected network with a single layer
+// taking a single number as an input
+TEST_CASE("ClCustomAllocatorTest")
+{
+ using namespace armnn;
+
+ float number = 3;
+
+ // Construct ArmNN network
+ armnn::NetworkId networkIdentifier;
+ INetworkPtr myNetwork = INetwork::Create();
+
+ armnn::FullyConnectedDescriptor fullyConnectedDesc;
+ float weightsData[] = {1.0f}; // Identity
+ TensorInfo weightsInfo(TensorShape({1, 1}), DataType::Float32);
+ weightsInfo.SetConstant(true);
+ armnn::ConstTensor weights(weightsInfo, weightsData);
+
+ ARMNN_NO_DEPRECATE_WARN_BEGIN
+ IConnectableLayer* fullyConnected = myNetwork->AddFullyConnectedLayer(fullyConnectedDesc,
+ weights,
+ EmptyOptional(),
+ "fully connected");
+ ARMNN_NO_DEPRECATE_WARN_END
+ IConnectableLayer* InputLayer = myNetwork->AddInputLayer(0);
+ IConnectableLayer* OutputLayer = myNetwork->AddOutputLayer(0);
+ InputLayer->GetOutputSlot(0).Connect(fullyConnected->GetInputSlot(0));
+ fullyConnected->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0));
+
+
+ // Create ArmNN runtime
+ IRuntime::CreationOptions options; // default options
+ auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
+ options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}};
+ IRuntimePtr run = IRuntime::Create(options);
+
+ //Set the tensors in the network.
+ TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32);
+ InputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
+
+ TensorInfo outputTensorInfo(TensorShape({1, 1}), DataType::Float32);
+ fullyConnected->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+
+ // Optimise ArmNN network
+ OptimizerOptions optOptions;
+ optOptions.m_ImportEnabled = true;
+ armnn::IOptimizedNetworkPtr optNet = Optimize(*myNetwork, {"GpuAcc"}, run->GetDeviceSpec(), optOptions);
+ CHECK(optNet);
+
+ // Load graph into runtime
+ std::string ignoredErrorMessage;
+ INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
+ run->LoadNetwork(networkIdentifier, std::move(optNet), ignoredErrorMessage, networkProperties);
+
+ // Creates structures for input & output
+ unsigned int numElements = inputTensorInfo.GetNumElements();
+ size_t totalBytes = numElements * sizeof(float);
+
+ const size_t alignment =
+ arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
+
+ void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
+
+ // Input with negative values
+ auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr);
+ std::fill_n(inputPtr, numElements, number);
+
+ void* alignedOutputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
+ auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
+ std::fill_n(outputPtr, numElements, -10.0f);
+
+ armnn::InputTensors inputTensors
+ {
+ {0, armnn::ConstTensor(run->GetInputTensorInfo(networkIdentifier, 0), alignedInputPtr)},
+ };
+ armnn::OutputTensors outputTensors
+ {
+ {0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), alignedOutputPtr)}
+ };
+
+ // Execute network
+ run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
+ run->UnloadNetwork(networkIdentifier);
+
+
+ // Tell the CLBackend to sync memory so we can read the output.
+ arm_compute::CLScheduler::get().sync();
+ auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
+
+ run->UnloadNetwork(networkIdentifier);
+ CHECK(outputResult[0] == number);
+ auto& backendRegistry = armnn::BackendRegistryInstance();
+ backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic());
+}
+
+} // test suite ClCustomAllocatorTests \ No newline at end of file
diff --git a/src/backends/cl/test/ClImportTensorHandleTests.cpp b/src/backends/cl/test/ClImportTensorHandleTests.cpp
index 931729a736..6b1d3521d5 100644
--- a/src/backends/cl/test/ClImportTensorHandleTests.cpp
+++ b/src/backends/cl/test/ClImportTensorHandleTests.cpp
@@ -61,7 +61,7 @@ TEST_CASE_FIXTURE(ClContextControlFixture, "ClMallocImport")
// Validate result by checking that the output has no negative values
for(unsigned int i = 0; i < numElements; ++i)
{
- CHECK(typedPtr[i] >= 0);
+ CHECK(typedPtr[i] == 0);
}
}