aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSadik Armagan <sadik.armagan@arm.com>2021-10-06 16:41:44 +0100
committermike.kelly <mike.kelly@arm.com>2021-11-04 15:16:10 +0000
commit0a2dfabd76a45c58d0a14567f0503369c4e6fbf3 (patch)
tree035340e9f663d599f83992846e1772b161640654
parent1b46d132a3330692fcf9a603b21363a28f46ef03 (diff)
downloadandroid-nn-driver-0a2dfabd76a45c58d0a14567f0503369c4e6fbf3.tar.gz
IVGCVSW-5636 'Implement NNAPI caching functions'
* Cached serialized ArmNN model. !armnn:6384 Signed-off-by: Sadik Armagan <sadik.armagan@arm.com> Signed-off-by: Kevin May <kevin.may@arm.com> Change-Id: I78120a7f8ea892a28c0ff25f1b54e67a4f912574
-rw-r--r--1.2/ArmnnDriver.hpp40
-rw-r--r--1.2/ArmnnDriverImpl.cpp390
-rw-r--r--1.2/ArmnnDriverImpl.hpp30
-rw-r--r--1.3/ArmnnDriver.hpp86
-rw-r--r--1.3/ArmnnDriverImpl.cpp391
-rw-r--r--1.3/ArmnnDriverImpl.hpp29
-rw-r--r--Android.mk2
-rw-r--r--ArmnnDriverImpl.cpp6
-rw-r--r--ArmnnPreparedModel_1_2.cpp61
-rw-r--r--ArmnnPreparedModel_1_2.hpp11
-rw-r--r--ArmnnPreparedModel_1_3.cpp77
-rw-r--r--ArmnnPreparedModel_1_3.hpp12
-rw-r--r--CacheDataHandler.cpp63
-rw-r--r--CacheDataHandler.hpp68
-rw-r--r--Utils.cpp56
-rw-r--r--Utils.hpp5
16 files changed, 1201 insertions, 126 deletions
diff --git a/1.2/ArmnnDriver.hpp b/1.2/ArmnnDriver.hpp
index a350d3f4..c855b527 100644
--- a/1.2/ArmnnDriver.hpp
+++ b/1.2/ArmnnDriver.hpp
@@ -19,6 +19,8 @@
#include "../1.0/ArmnnDriverImpl.hpp"
#include "../1.0/HalPolicy.hpp"
+#include <armnn/BackendHelper.hpp>
+
#include <log/log.h>
namespace armnn_driver
@@ -135,20 +137,26 @@ public:
}
Return<V1_0::ErrorStatus> prepareModelFromCache(
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
- const HidlToken&,
- const android::sp<V1_2::IPreparedModelCallback>& callback)
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
+ const android::sp<V1_2::IPreparedModelCallback>& cb)
{
ALOGV("hal_1_2::ArmnnDriver::prepareModelFromCache()");
- callback->notify_1_2(V1_0::ErrorStatus::GENERAL_FAILURE, nullptr);
- return V1_0::ErrorStatus::GENERAL_FAILURE;
+ return ArmnnDriverImpl::prepareModelFromCache(m_Runtime,
+ m_Options,
+ modelCacheHandle,
+ dataCacheHandle,
+ token,
+ cb);
}
- Return<V1_0::ErrorStatus> prepareModel_1_2(const V1_2::Model& model, V1_1::ExecutionPreference preference,
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&, const HidlToken&,
- const android::sp<V1_2::IPreparedModelCallback>& cb)
+ Return<V1_0::ErrorStatus> prepareModel_1_2(
+ const V1_2::Model& model, V1_1::ExecutionPreference preference,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
+ const android::sp<V1_2::IPreparedModelCallback>& cb)
{
ALOGV("hal_1_2::ArmnnDriver::prepareModel_1_2()");
@@ -165,6 +173,9 @@ public:
m_ClTunedParameters,
m_Options,
model,
+ modelCacheHandle,
+ dataCacheHandle,
+ token,
cb,
model.relaxComputationFloat32toFloat16
&& m_Options.GetFp16Enabled());
@@ -198,9 +209,12 @@ public:
Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb)
{
ALOGV("hal_1_2::ArmnnDriver::getSupportedExtensions()");
-
- // Set both numbers to be 0 for cache not supported.
- cb(V1_0::ErrorStatus::NONE, 0, 0);
+ unsigned int numberOfCachedModelFiles = 0;
+ for (auto& backend : m_Options.GetBackends())
+ {
+ numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
+ }
+ cb(V1_0::ErrorStatus::NONE, numberOfCachedModelFiles, 1ul);
return Void();
}
};
diff --git a/1.2/ArmnnDriverImpl.cpp b/1.2/ArmnnDriverImpl.cpp
index 01b3ab51..b3bc5cd1 100644
--- a/1.2/ArmnnDriverImpl.cpp
+++ b/1.2/ArmnnDriverImpl.cpp
@@ -8,7 +8,10 @@
#include "../ModelToINetworkConverter.hpp"
#include "../SystemPropertiesUtils.hpp"
+#include <armnnDeserializer/IDeserializer.hpp>
+
#include <log/log.h>
+#include <sys/stat.h>
namespace
{
@@ -90,6 +93,9 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2(
const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
const DriverOptions& options,
const V1_2::Model& model,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
const android::sp<V1_2::IPreparedModelCallback>& cb,
bool float32ToFloat16)
{
@@ -127,8 +133,13 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2(
// Serialize the network graph to a .armnn file if an output directory
// has been specified in the drivers' arguments.
+ std::vector<uint8_t> dataCacheData;
+ bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
auto serializedNetworkFileName =
- SerializeNetwork(*modelConverter.GetINetwork(), options.GetRequestInputsAndOutputsDumpDir());
+ SerializeNetwork(*modelConverter.GetINetwork(),
+ options.GetRequestInputsAndOutputsDumpDir(),
+ dataCacheData,
+ serializeToFile);
// Optimize the network
armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
@@ -136,12 +147,41 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2(
OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
+ int cachedFd = -1;
+ bool saveCachedNetwork = options.SaveCachedNetwork();
+
+ unsigned int numberOfCachedModelFiles = 0;
+ if (modelCacheHandle.size() > 0)
+ {
+ unsigned int index = 0;
+ for (auto& backend : options.GetBackends())
+ {
+ // modelCacheHandle size should be equal to numberOfCachedModelFiles
+ // modelCacheHandle vector should be in same order as backends
+ auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
+ if (numberOfCacheFiles > 0)
+ {
+ numberOfCachedModelFiles += numberOfCacheFiles;
+ if (modelCacheHandle[index]->numFds == 1)
+ {
+ if (backend == armnn::Compute::GpuAcc)
+ {
+ cachedFd = modelCacheHandle[index]->data[0];
+ saveCachedNetwork = true;
+ }
+ }
+ index += numberOfCachedModelFiles;
+ }
+ }
+ }
+
armnn::BackendOptions gpuAcc("GpuAcc",
{
{ "FastMathEnabled", options.IsFastMathEnabled() },
- { "SaveCachedNetwork", options.SaveCachedNetwork() },
+ { "SaveCachedNetwork", saveCachedNetwork },
{ "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
- { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() }
+ { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
+ { "CachedFileDescriptor", cachedFd }
});
armnn::BackendOptions cpuAcc("CpuAcc",
@@ -192,12 +232,16 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2(
std::string msg;
armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
MemorySource::Undefined,
- MemorySource::Undefined);
+ MemorySource::Undefined,
+ options.IsGpuProfilingEnabled());
+
+ auto numInputs = getMainModel(model).inputIndexes.size();
+ auto numOutputs = getMainModel(model).outputIndexes.size();
try
{
if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
{
- return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);
+ return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, msg, cb);
}
}
catch (std::exception& e)
@@ -227,28 +271,344 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2(
// Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
// this is enabled) before the first 'real' inference which removes the overhead of the first inference.
- if (!preparedModel->ExecuteWithDummyInputs())
+ // Only run this if the GpuAcc backend has been added to options
+ if (std::find(options.GetBackends().begin(),
+ options.GetBackends().end(),
+ armnn::Compute::GpuAcc) != options.GetBackends().end())
{
- return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
+ if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
+ {
+ return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
+ }
+
+ if (clTunedParameters &&
+ options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
+ {
+ // Now that we've done one inference the CL kernel parameters will have been tuned,
+ // so save the updated file.
+ try
+ {
+ clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+ }
+ catch (std::exception& error)
+ {
+ ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
+ options.GetClTunedParametersFile().c_str(), error.what());
+ }
+ }
+ }
+
+ size_t hashValue = 0;
+ // Cache the model
+ if (dataCacheHandle.size() > 0)
+ {
+ // Cache the Arm NN model, should be only 1
+ if (dataCacheHandle.size() != 1)
+ {
+ NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+ return V1_0::ErrorStatus::NONE;
+ }
+
+ if (dataCacheHandle[0]->numFds != 1)
+ {
+ ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, numFds != 1.");
+ NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+ return V1_0::ErrorStatus::NONE;
+ }
+ int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
+ if (dataCacheFileAccessMode != O_RDWR)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_2(): Invalid Access Mode.");
+ NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+ return V1_0::ErrorStatus::NONE;
+ }
+
+ write(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size());
+ hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
}
- if (clTunedParameters &&
- options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
+ if (modelCacheHandle.size() > 0)
{
- // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
- try
+ if (modelCacheHandle.size() != numberOfCachedModelFiles)
{
- clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+ NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+ return V1_0::ErrorStatus::NONE;
}
- catch (std::exception& error)
+ for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
{
- ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
- options.GetClTunedParametersFile().c_str(), error.what());
+ if (modelCacheHandle[i]->numFds == 1)
+ {
+ int modelCacheFileAccessMode = fcntl(modelCacheHandle[i]->data[0], F_GETFL) & O_ACCMODE;
+ if (modelCacheFileAccessMode != O_RDONLY)
+ {
+ struct stat statBuffer;
+ if (fstat(modelCacheHandle[i]->data[0], &statBuffer) == 0)
+ {
+ long modelDataSize = statBuffer.st_size;
+ if (modelDataSize > 0)
+ {
+ std::vector <uint8_t> modelData(modelDataSize);
+ pread(modelCacheHandle[i]->data[0], modelData.data(), modelData.size(), 0);
+ hashValue ^= CacheDataHandlerInstance().Hash(modelData);
+ }
+ }
+ }
+ }
}
}
+ if (hashValue != 0)
+ {
+ CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size());
+ }
NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+ return V1_0::ErrorStatus::NONE;
+}
+
+Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareModelFromCache(
+ const armnn::IRuntimePtr& runtime,
+ const DriverOptions& options,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
+ const android::sp<V1_2::IPreparedModelCallback>& cb,
+ bool float32ToFloat16)
+{
+ ALOGV("ArmnnDriverImpl::prepareModelFromCache()");
+
+ if (cb.get() == nullptr)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid callback passed to prepareModel");
+ return V1_0::ErrorStatus::INVALID_ARGUMENT;
+ }
+ if (!runtime)
+ {
+ return FailPrepareModel(V1_0::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb);
+ }
+
+ if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
+ {
+ FailPrepareModel(V1_0::ErrorStatus::INVALID_ARGUMENT, "Invalid token passed!", cb);
+ return V1_0::ErrorStatus::INVALID_ARGUMENT;
+ }
+
+ // DataCacheHandle size should always be 1
+ // Arm NN model
+ if (dataCacheHandle.size() != 1)
+ {
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "No data cache!", cb);
+ return V1_0::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ // Check if model files cached they match the expected value
+ unsigned int numberOfCachedModelFiles = 0;
+ for (auto& backend : options.GetBackends())
+ {
+ numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
+ }
+ if (modelCacheHandle.size() != numberOfCachedModelFiles)
+ {
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid model cache!", cb);
+ return V1_0::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ if (dataCacheHandle[0]->numFds != 1)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache: Cannot read from the cache data, numFds != 1.");
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "No data cache!", cb);
+ return V1_0::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
+ if (dataCacheFileAccessMode != O_RDWR)
+ {
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid Access Mode!", cb);
+ return V1_0::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ auto dataSize = CacheDataHandlerInstance().GetCacheSize(token);
+ if (dataSize == 0)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid data to deserialize!");
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid data to deserialize!", cb);
+ return V1_0::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ int offset = 0;
+ {
+ struct stat statBuffer;
+ if (fstat(dataCacheHandle[0]->data[0], &statBuffer) == 0)
+ {
+ unsigned long bufferSize = statBuffer.st_size;
+ if (bufferSize <= 0)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid data to deserialize!");
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid data to deserialize!", cb);
+ return V1_0::ErrorStatus::GENERAL_FAILURE;
+ }
+ if (bufferSize > dataSize)
+ {
+ offset = bufferSize - dataSize;
+ }
+ }
+ }
+ std::vector<uint8_t> dataCacheData(dataSize);
+ pread(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size(), offset);
+ auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
+
+ int gpuAccCachedFd = -1;
+ bool saveCachedNetwork = false;
+ if (modelCacheHandle.size() > 0)
+ {
+ unsigned int index = 0;
+ for (auto& backend : options.GetBackends())
+ {
+ // modelCacheHandle size should be equal to numberOfCachedModelFiles
+ // modelCacheHandle vector should be in same order as backends
+ auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
+ if (numberOfCacheFiles > 0)
+ {
+ if (modelCacheHandle[index]->numFds != 1)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache: Cannot read from the model cache, numFds != 1.");
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE,
+ "Cannot read from the model cache, numFds != 1.", cb);
+ return V1_0::ErrorStatus::GENERAL_FAILURE;
+ }
+ auto cachedFd = modelCacheHandle[index]->data[0];
+
+ int modelCacheFileAccessMode = fcntl(cachedFd, F_GETFL) & O_ACCMODE;
+ if (modelCacheFileAccessMode != O_RDWR)
+ {
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid Access Mode!", cb);
+ return V1_0::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ struct stat statBuffer;
+ if (cachedFd != -1 && fstat(cachedFd, &statBuffer) == 0)
+ {
+ long modelDataSize = statBuffer.st_size;
+ if (modelDataSize > 0)
+ {
+ std::vector<uint8_t> modelData(modelDataSize);
+ pread(cachedFd, modelData.data(), modelData.size(), 0);
+ hashValue ^= CacheDataHandlerInstance().Hash(modelData);
+
+ // For GpuAcc numberOfCachedFiles is 1
+ if (backend == armnn::Compute::GpuAcc)
+ {
+ gpuAccCachedFd = cachedFd;
+ }
+ }
+ }
+ index += numberOfCacheFiles;
+ }
+ }
+ }
+
+ if (!CacheDataHandlerInstance().Validate(token, hashValue))
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache: ValidateHash() failed!");
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "ValidateHash Failed!", cb);
+ return V1_0::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ // Deserialize the network..
+ auto network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
+
+ // Optimize the network
+ armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
+ armnn::OptimizerOptions OptOptions;
+ OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
+ OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
+
+ armnn::BackendOptions gpuAcc("GpuAcc",
+ {
+ {"FastMathEnabled", options.IsFastMathEnabled()},
+ {"SaveCachedNetwork", saveCachedNetwork},
+ {"CachedNetworkFilePath", options.GetCachedNetworkFilePath()},
+ {"MLGOTuningFilePath", options.GetClMLGOTunedParametersFile()},
+ {"CachedFileDescriptor", gpuAccCachedFd}
+ });
+
+ armnn::BackendOptions cpuAcc("CpuAcc",
+ {
+ {"FastMathEnabled", options.IsFastMathEnabled()},
+ {"NumberOfThreads", options.GetNumberOfThreads()}
+ });
+ OptOptions.m_ModelOptions.push_back(gpuAcc);
+ OptOptions.m_ModelOptions.push_back(cpuAcc);
+
+ std::vector<std::string> errMessages;
+ try
+ {
+ optNet = armnn::Optimize(*network.get(),
+ options.GetBackends(),
+ runtime->GetDeviceSpec(),
+ OptOptions,
+ errMessages);
+ }
+ catch (std::exception& e)
+ {
+ std::stringstream message;
+ message << "Exception (" << e.what() << ") caught from optimize.";
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+ return V1_0::ErrorStatus::NONE;
+ }
+
+ // Check that the optimized network is valid.
+ if (!optNet)
+ {
+ std::stringstream message;
+ message << "Invalid optimized network";
+ for (const std::string& msg : errMessages)
+ {
+ message << "\n" << msg;
+ }
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+ return V1_0::ErrorStatus::NONE;
+ }
+
+ // Export the optimized network graph to a dot file if an output dump directory
+ // has been specified in the drivers' arguments.
+ std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
+ options.GetRequestInputsAndOutputsDumpDir());
+
+ // Load it into the runtime.
+ armnn::NetworkId netId = 0;
+ std::string msg;
+ armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
+ MemorySource::Undefined,
+ MemorySource::Undefined,
+ options.IsGpuProfilingEnabled());
+
+ try
+ {
+ if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
+ {
+ return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, msg, cb);
+ }
+ }
+ catch (std::exception& e)
+ {
+ std::stringstream message;
+ message << "Exception (" << e.what() << ") caught from LoadNetwork.";
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+ return V1_0::ErrorStatus::NONE;
+ }
+
+ std::unique_ptr<ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>> preparedModel(
+ new ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>(
+ netId,
+ runtime.get(),
+ options.GetRequestInputsAndOutputsDumpDir(),
+ options.IsGpuProfilingEnabled(),
+ options.isAsyncModelExecutionEnabled(),
+ options.getNoOfArmnnThreads(),
+ true));
+
+ NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
return V1_0::ErrorStatus::NONE;
}
diff --git a/1.2/ArmnnDriverImpl.hpp b/1.2/ArmnnDriverImpl.hpp
index eeb491b6..70f46cba 100644
--- a/1.2/ArmnnDriverImpl.hpp
+++ b/1.2/ArmnnDriverImpl.hpp
@@ -7,10 +7,13 @@
#include <HalInterfaces.h>
+#include "../CacheDataHandler.hpp"
#include "../DriverOptions.hpp"
#include <armnn/ArmNN.hpp>
+#include <NeuralNetworks.h>
+
#ifdef ARMNN_ANDROID_R
using namespace android::nn::hal;
#endif
@@ -30,12 +33,27 @@ namespace hal_1_2
class ArmnnDriverImpl
{
public:
- static Return<V1_0::ErrorStatus> prepareArmnnModel_1_2(const armnn::IRuntimePtr& runtime,
- const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
- const DriverOptions& options,
- const V1_2::Model& model,
- const android::sp<V1_2::IPreparedModelCallback>& cb,
- bool float32ToFloat16 = false);
+ using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
+
+ static Return<V1_0::ErrorStatus> prepareArmnnModel_1_2(
+ const armnn::IRuntimePtr& runtime,
+ const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
+ const DriverOptions& options,
+ const V1_2::Model& model,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
+ const android::sp<V1_2::IPreparedModelCallback>& cb,
+ bool float32ToFloat16 = false);
+
+ static Return<V1_0::ErrorStatus> prepareModelFromCache(
+ const armnn::IRuntimePtr& runtime,
+ const DriverOptions& options,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
+ const android::sp<V1_2::IPreparedModelCallback>& cb,
+ bool float32ToFloat16 = false);
static Return<void> getCapabilities_1_2(const armnn::IRuntimePtr& runtime,
V1_2::IDevice::getCapabilities_1_2_cb cb);
diff --git a/1.3/ArmnnDriver.hpp b/1.3/ArmnnDriver.hpp
index fd4aa74c..6d2e0b7a 100644
--- a/1.3/ArmnnDriver.hpp
+++ b/1.3/ArmnnDriver.hpp
@@ -21,6 +21,8 @@
#include "../1.0/ArmnnDriverImpl.hpp"
#include "../1.0/HalPolicy.hpp"
+#include <armnn/BackendHelper.hpp>
+
#include <log/log.h>
namespace armnn_driver
@@ -31,6 +33,7 @@ namespace hal_1_3
class ArmnnDriver : public ArmnnDevice, public V1_3::IDevice
{
public:
+ using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
ArmnnDriver(DriverOptions options)
: ArmnnDevice(std::move(options))
@@ -39,9 +42,7 @@ public:
}
~ArmnnDriver() {}
- using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
-public:
Return<void> getCapabilities(V1_0::IDevice::getCapabilities_cb cb) override
{
ALOGV("hal_1_3::ArmnnDriver::getCapabilities()");
@@ -131,10 +132,13 @@ public:
cb);
}
- Return<V1_0::ErrorStatus> prepareModel_1_2(const V1_2::Model& model, V1_1::ExecutionPreference preference,
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&, const HidlToken&,
- const android::sp<V1_2::IPreparedModelCallback>& cb)
+ Return<V1_0::ErrorStatus> prepareModel_1_2(
+ const V1_2::Model& model,
+ V1_1::ExecutionPreference preference,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
+ const android::sp<V1_2::IPreparedModelCallback>& cb)
{
ALOGV("hal_1_3::ArmnnDriver::prepareModel_1_2()");
@@ -151,6 +155,9 @@ public:
m_ClTunedParameters,
m_Options,
model,
+ modelCacheHandle,
+ dataCacheHandle,
+ token,
cb,
model.relaxComputationFloat32toFloat16
&& m_Options.GetFp16Enabled());
@@ -174,14 +181,15 @@ public:
cb);
}
- Return<V1_3::ErrorStatus> prepareModel_1_3(const V1_3::Model& model,
- V1_1::ExecutionPreference preference,
- V1_3::Priority priority,
- const V1_3::OptionalTimePoint&,
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
- const HidlToken&,
- const android::sp<V1_3::IPreparedModelCallback>& cb)
+ Return<V1_3::ErrorStatus> prepareModel_1_3(
+ const V1_3::Model& model,
+ V1_1::ExecutionPreference preference,
+ V1_3::Priority priority,
+ const V1_3::OptionalTimePoint&,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCache,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCache,
+ const HidlToken& token,
+ const android::sp<V1_3::IPreparedModelCallback>& cb)
{
ALOGV("hal_1_3::ArmnnDriver::prepareModel_1_3()");
@@ -199,11 +207,13 @@ public:
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
-
return ArmnnDriverImpl::prepareArmnnModel_1_3(m_Runtime,
m_ClTunedParameters,
m_Options,
model,
+ modelCache,
+ dataCache,
+ token,
cb,
model.relaxComputationFloat32toFloat16
&& m_Options.GetFp16Enabled(),
@@ -219,10 +229,13 @@ public:
Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb)
{
- ALOGV("hal_1_3::ArmnnDriver::getSupportedExtensions()");
-
- // Set both numbers to be 0 for cache not supported.
- cb(V1_0::ErrorStatus::NONE, 0, 0);
+ ALOGV("hal_1_3::ArmnnDriver::getNumberOfCacheFilesNeeded()");
+ unsigned int numberOfCachedModelFiles = 0;
+ for (auto& backend : m_Options.GetBackends())
+ {
+ numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
+ }
+ cb(V1_0::ErrorStatus::NONE, numberOfCachedModelFiles, 1ul);
return Void();
}
@@ -250,26 +263,35 @@ public:
}
Return<V1_0::ErrorStatus> prepareModelFromCache(
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
- const HidlToken&,
- const android::sp<V1_2::IPreparedModelCallback>& callback)
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
+ const android::sp<V1_2::IPreparedModelCallback>& cb)
{
ALOGV("hal_1_3::ArmnnDriver::prepareModelFromCache()");
- callback->notify_1_2(V1_0::ErrorStatus::GENERAL_FAILURE, nullptr);
- return V1_0::ErrorStatus::GENERAL_FAILURE;
+ return hal_1_2::ArmnnDriverImpl::prepareModelFromCache(m_Runtime,
+ m_Options,
+ modelCacheHandle,
+ dataCacheHandle,
+ token,
+ cb);
}
Return<V1_3::ErrorStatus> prepareModelFromCache_1_3(
const V1_3::OptionalTimePoint&,
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
- const HidlToken&,
- const android::sp<V1_3::IPreparedModelCallback>& callback)
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
+ const android::sp<V1_3::IPreparedModelCallback>& cb)
{
- ALOGV("hal_1_3::ArmnnDriver::prepareModelFromCache()");
- callback->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
- return V1_3::ErrorStatus::GENERAL_FAILURE;
+ ALOGV("hal_1_3::ArmnnDriver::prepareModelFromCache_1_3()");
+
+ return ArmnnDriverImpl::prepareModelFromCache_1_3(m_Runtime,
+ m_Options,
+ modelCacheHandle,
+ dataCacheHandle,
+ token,
+ cb);
}
Return<void> allocate(const V1_3::BufferDesc& /*desc*/,
diff --git a/1.3/ArmnnDriverImpl.cpp b/1.3/ArmnnDriverImpl.cpp
index 3ecd2f82..e1d65f92 100644
--- a/1.3/ArmnnDriverImpl.cpp
+++ b/1.3/ArmnnDriverImpl.cpp
@@ -8,8 +8,12 @@
#include "../ModelToINetworkConverter.hpp"
#include "../SystemPropertiesUtils.hpp"
+#include <armnnDeserializer/IDeserializer.hpp>
+
#include <log/log.h>
+#include <sys/stat.h>
+
namespace
{
const char *g_RelaxedFloat32toFloat16PerformanceExecTime = "ArmNN.relaxedFloat32toFloat16Performance.execTime";
@@ -100,6 +104,9 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
const DriverOptions& options,
const V1_3::Model& model,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
const android::sp<V1_3::IPreparedModelCallback>& cb,
bool float32ToFloat16,
V1_3::Priority priority)
@@ -138,8 +145,13 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
// Serialize the network graph to a .armnn file if an output directory
// has been specified in the drivers' arguments.
+ std::vector<uint8_t> dataCacheData;
+ bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
auto serializedNetworkFileName =
- SerializeNetwork(*modelConverter.GetINetwork(), options.GetRequestInputsAndOutputsDumpDir());
+ SerializeNetwork(*modelConverter.GetINetwork(),
+ options.GetRequestInputsAndOutputsDumpDir(),
+ dataCacheData,
+ serializeToFile);
// Optimize the network
armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
@@ -147,12 +159,42 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
+ int cachedFd = -1;
+ bool saveCachedNetwork = options.SaveCachedNetwork();
+
+ unsigned int numberOfCachedModelFiles = 0;
+ if (modelCacheHandle.size() > 0)
+ {
+ unsigned int index = 0;
+ for (auto& backend : options.GetBackends())
+ {
+ // modelCacheHandle size should be equal to numberOfCachedModelFiles
+ // modelCacheHandle vector should be in same order as backends
+ auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
+ if (numberOfCacheFiles > 0)
+ {
+ numberOfCachedModelFiles += numberOfCacheFiles;
+ if (modelCacheHandle[index]->numFds == 1)
+ {
+ // For GpuAcc numberOfCachedFiles is 1
+ if (backend == armnn::Compute::GpuAcc)
+ {
+ cachedFd = modelCacheHandle[index]->data[0];
+ saveCachedNetwork = true;
+ }
+ }
+ index += numberOfCachedModelFiles;
+ }
+ }
+ }
+
armnn::BackendOptions gpuAcc("GpuAcc",
{
{ "FastMathEnabled", options.IsFastMathEnabled() },
- { "SaveCachedNetwork", options.SaveCachedNetwork() },
+ { "SaveCachedNetwork", saveCachedNetwork },
{ "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
- { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() }
+ { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
+ { "CachedFileDescriptor", cachedFd }
});
armnn::BackendOptions cpuAcc("CpuAcc",
@@ -203,7 +245,11 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
std::string msg;
armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
MemorySource::Undefined,
- MemorySource::Undefined);
+ MemorySource::Undefined,
+ options.IsGpuProfilingEnabled());
+
+ auto numInputs = getMainModel(model).inputIndexes.size();
+ auto numOutputs = getMainModel(model).outputIndexes.size();
try
{
if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
@@ -239,28 +285,345 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
// Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
// this is enabled) before the first 'real' inference which removes the overhead of the first inference.
- if (!preparedModel->ExecuteWithDummyInputs())
+ // Only run this if the GpuAcc backend has been added to options
+ if (std::find(options.GetBackends().begin(),
+ options.GetBackends().end(),
+ armnn::Compute::GpuAcc) != options.GetBackends().end())
{
- return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
+ if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
+ {
+ return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
+ }
+
+ if (clTunedParameters &&
+ options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
+ {
+ // Now that we've done one inference the CL kernel parameters will have been tuned,
+ // so save the updated file.
+ try
+ {
+ clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+ }
+ catch (std::exception& error)
+ {
+ ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
+ options.GetClTunedParametersFile().c_str(), error.what());
+ }
+ }
+ }
+ size_t hashValue = 0;
+ // Cache the model
+ if (dataCacheHandle.size() > 0)
+ {
+ // Cache the Arm NN model
+ if (dataCacheHandle.size() != 1)
+ {
+ NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+ return V1_3::ErrorStatus::NONE;
+ }
+
+ if (dataCacheHandle[0]->numFds != 1)
+ {
+ ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, numFds != 1.");
+ NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+ return V1_3::ErrorStatus::NONE;
+ }
+ int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
+ if (dataCacheFileAccessMode != O_RDWR)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Invalid Access Mode.");
+ NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+ return V1_3::ErrorStatus::NONE;
+ }
+
+ write(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size());
+ hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
}
- if (clTunedParameters &&
- options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
+ // Cache the model data
+ if (modelCacheHandle.size() > 0)
{
- // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
- try
+ if (modelCacheHandle.size() != numberOfCachedModelFiles)
{
- clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+ NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+ return V1_3::ErrorStatus::NONE;
}
- catch (std::exception& error)
+
+ for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
{
- ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
- options.GetClTunedParametersFile().c_str(), error.what());
+ if (modelCacheHandle[i]->numFds == 1)
+ {
+ int modelCacheFileAccessMode = fcntl(modelCacheHandle[i]->data[0], F_GETFL) & O_ACCMODE;
+ if (modelCacheFileAccessMode != O_RDONLY)
+ {
+ struct stat statBuffer;
+ if (fstat(modelCacheHandle[i]->data[0], &statBuffer) == 0)
+ {
+ long modelDataSize = statBuffer.st_size;
+ if (modelDataSize > 0)
+ {
+ std::vector<uint8_t> modelData(modelDataSize);
+ pread(modelCacheHandle[i]->data[0], modelData.data(), modelData.size(), 0);
+ hashValue ^= CacheDataHandlerInstance().Hash(modelData);
+ }
+ }
+ }
+ }
}
}
+ if (hashValue != 0)
+ {
+ CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size());
+ }
NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+ return V1_3::ErrorStatus::NONE;
+}
+
+Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareModelFromCache_1_3(
+ const armnn::IRuntimePtr& runtime,
+ const DriverOptions& options,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
+ const android::sp<V1_3::IPreparedModelCallback>& cb)
+{
+ ALOGV("ArmnnDriverImpl::prepareModelFromCache_1_3()");
+ if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
+ {
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ if (cb.get() == nullptr)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid callback passed to prepareModelFromCache_1_3");
+ return V1_3::ErrorStatus::INVALID_ARGUMENT;
+ }
+
+ if (!runtime)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Device unavailable");
+ return V1_3::ErrorStatus::DEVICE_UNAVAILABLE;
+ }
+
+ // DataCacheHandle size should always be 1
+ // Arm NN model
+ if (dataCacheHandle.size() != 1)
+ {
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ // Check if model files cached they match the expected value
+ unsigned int numberOfCachedModelFiles = 0;
+ for (auto& backend : options.GetBackends())
+ {
+ numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
+ }
+ if (modelCacheHandle.size() != numberOfCachedModelFiles)
+ {
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ if (dataCacheHandle[0]->numFds != 1)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the cache data, numFds != 1.");
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
+ if (dataCacheFileAccessMode != O_RDWR)
+ {
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ auto dataSize = CacheDataHandlerInstance().GetCacheSize(token);
+ if (dataSize == 0)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid data to deserialize!");
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ int offset = 0;
+ {
+ struct stat statBuffer;
+ if (fstat(dataCacheHandle[0]->data[0], &statBuffer) == 0)
+ {
+ unsigned long bufferSize = statBuffer.st_size;
+ if (bufferSize <= 0)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid data to deserialize!");
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
+ }
+ if (bufferSize > dataSize)
+ {
+ offset = bufferSize - dataSize;
+ }
+ }
+ }
+ std::vector<uint8_t> dataCacheData(dataSize);
+ pread(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size(), offset);
+ auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
+
+ int gpuAccCachedFd = -1;
+ bool saveCachedNetwork = false;
+ if (modelCacheHandle.size() > 0)
+ {
+ unsigned int index = 0;
+ for (auto& backend : options.GetBackends())
+ {
+ // modelCacheHandle size should be equal to numberOfCachedModelFiles
+ // modelCacheHandle vector should be in same order as backends
+ auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
+ if (numberOfCacheFiles > 0)
+ {
+ if (modelCacheHandle[index]->numFds != 1)
+ {
+ ALOGW(
+ "ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the model cache, numFds != 1.");
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
+ }
+ auto cachedFd = modelCacheHandle[index]->data[0];
+
+ int modelCacheFileAccessMode = fcntl(cachedFd, F_GETFL) & O_ACCMODE;
+ if (modelCacheFileAccessMode != O_RDWR)
+ {
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ struct stat statBuffer;
+ if (cachedFd != -1 && fstat(cachedFd, &statBuffer) == 0)
+ {
+ long modelDataSize = statBuffer.st_size;
+ if (modelDataSize > 0)
+ {
+ std::vector<uint8_t> modelData(modelDataSize);
+ pread(cachedFd, modelData.data(), modelData.size(), 0);
+ hashValue ^= CacheDataHandlerInstance().Hash(modelData);
+
+ // For GpuAcc numberOfCachedFiles is 1
+ if (backend == armnn::Compute::GpuAcc)
+ {
+ gpuAccCachedFd = cachedFd;
+ }
+ }
+ }
+ index += numberOfCacheFiles;
+ }
+ }
+ }
+
+ if (!CacheDataHandlerInstance().Validate(token, hashValue))
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: ValidateHash() failed!");
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ // Deserialize the network..
+ auto network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
+
+ // Optimize the network
+ armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
+ armnn::OptimizerOptions OptOptions;
+ OptOptions.m_ReduceFp32ToFp16 = options.GetFp16Enabled();
+ OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
+
+ armnn::BackendOptions gpuAcc("GpuAcc",
+ {
+ {"FastMathEnabled", options.IsFastMathEnabled()},
+ {"SaveCachedNetwork", saveCachedNetwork},
+ {"CachedNetworkFilePath", options.GetCachedNetworkFilePath()},
+ {"MLGOTuningFilePath", options.GetClMLGOTunedParametersFile()},
+ {"CachedFileDescriptor", gpuAccCachedFd}
+ });
+
+ armnn::BackendOptions cpuAcc("CpuAcc",
+ {
+ {"FastMathEnabled", options.IsFastMathEnabled()},
+ {"NumberOfThreads", options.GetNumberOfThreads()}
+ });
+ OptOptions.m_ModelOptions.push_back(gpuAcc);
+ OptOptions.m_ModelOptions.push_back(cpuAcc);
+
+ std::vector<std::string> errMessages;
+ try
+ {
+ optNet = armnn::Optimize(*network.get(),
+ options.GetBackends(),
+ runtime->GetDeviceSpec(),
+ OptOptions,
+ errMessages);
+ }
+ catch (std::exception& e)
+ {
+ std::stringstream message;
+ message << "Exception (" << e.what() << ") caught from optimize.";
+ FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+ return V1_3::ErrorStatus::NONE;
+ }
+
+ // Check that the optimized network is valid.
+ if (!optNet)
+ {
+ std::stringstream message;
+ message << "Invalid optimized network";
+ for (const std::string& msg : errMessages)
+ {
+ message << "\n" << msg;
+ }
+ FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+ return V1_3::ErrorStatus::NONE;
+ }
+
+ // Export the optimized network graph to a dot file if an output dump directory
+ // has been specified in the drivers' arguments.
+ std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
+ options.GetRequestInputsAndOutputsDumpDir());
+
+ // Load it into the runtime.
+ armnn::NetworkId netId = 0;
+ std::string msg;
+ armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
+ MemorySource::Undefined,
+ MemorySource::Undefined,
+ options.IsGpuProfilingEnabled());
+
+ try
+ {
+ if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
+ {
+ return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, msg, cb);
+ }
+ }
+ catch (std::exception& e)
+ {
+ std::stringstream message;
+ message << "Exception (" << e.what() << ") caught from LoadNetwork.";
+ FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+ return V1_3::ErrorStatus::NONE;
+ }
+
+ std::unique_ptr<ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>> preparedModel(
+ new ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>(netId,
+ runtime.get(),
+ options.GetRequestInputsAndOutputsDumpDir(),
+ options.IsGpuProfilingEnabled(),
+ V1_3::Priority::MEDIUM,
+ options.isAsyncModelExecutionEnabled(),
+ options.getNoOfArmnnThreads(),
+ true));
+
+ NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
return V1_3::ErrorStatus::NONE;
}
diff --git a/1.3/ArmnnDriverImpl.hpp b/1.3/ArmnnDriverImpl.hpp
index 3c094fe5..a482edac 100644
--- a/1.3/ArmnnDriverImpl.hpp
+++ b/1.3/ArmnnDriverImpl.hpp
@@ -7,6 +7,7 @@
#include <HalInterfaces.h>
+#include "../CacheDataHandler.hpp"
#include "../DriverOptions.hpp"
#include <armnn/ArmNN.hpp>
@@ -31,13 +32,27 @@ namespace hal_1_3
class ArmnnDriverImpl
{
public:
- static Return<V1_3::ErrorStatus> prepareArmnnModel_1_3(const armnn::IRuntimePtr& runtime,
- const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
- const DriverOptions& options,
- const V1_3::Model& model,
- const android::sp<V1_3::IPreparedModelCallback>& cb,
- bool float32ToFloat16 = false,
- V1_3::Priority priority = V1_3::Priority::MEDIUM);
+ using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
+
+ static Return<V1_3::ErrorStatus> prepareArmnnModel_1_3(
+ const armnn::IRuntimePtr& runtime,
+ const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
+ const DriverOptions& options,
+ const V1_3::Model& model,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
+ const android::sp<V1_3::IPreparedModelCallback>& cb,
+ bool float32ToFloat16 = false,
+ V1_3::Priority priority = V1_3::Priority::MEDIUM);
+
+ static Return<V1_3::ErrorStatus> prepareModelFromCache_1_3(
+ const armnn::IRuntimePtr& runtime,
+ const DriverOptions& options,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
+ const android::sp<V1_3::IPreparedModelCallback>& cb);
static Return<void> getCapabilities_1_3(const armnn::IRuntimePtr& runtime,
V1_3::IDevice::getCapabilities_1_3_cb cb);
diff --git a/Android.mk b/Android.mk
index 1d61930c..d0f4493a 100644
--- a/Android.mk
+++ b/Android.mk
@@ -469,6 +469,7 @@ LOCAL_SRC_FILES := \
ArmnnDriverImpl.cpp \
ArmnnPreparedModel.cpp \
ArmnnPreparedModel_1_2.cpp \
+ CacheDataHandler.cpp \
ConversionUtils.cpp \
DriverOptions.cpp \
ModelToINetworkConverter.cpp \
@@ -599,6 +600,7 @@ LOCAL_SRC_FILES := \
ArmnnPreparedModel.cpp \
ArmnnPreparedModel_1_2.cpp \
ArmnnPreparedModel_1_3.cpp \
+ CacheDataHandler.cpp \
ConversionUtils.cpp \
DriverOptions.cpp \
ModelToINetworkConverter.cpp \
diff --git a/ArmnnDriverImpl.cpp b/ArmnnDriverImpl.cpp
index 78ef12fe..0b3b9191 100644
--- a/ArmnnDriverImpl.cpp
+++ b/ArmnnDriverImpl.cpp
@@ -102,8 +102,12 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl<HalPolicy>::prepareModel(
// Serialize the network graph to a .armnn file if an output directory
// has been specified in the drivers' arguments.
+ std::vector<uint8_t> dataCacheData;
auto serializedNetworkFileName =
- SerializeNetwork(*modelConverter.GetINetwork(), options.GetRequestInputsAndOutputsDumpDir());
+ SerializeNetwork(*modelConverter.GetINetwork(),
+ options.GetRequestInputsAndOutputsDumpDir(),
+ dataCacheData,
+ false);
// Optimize the network
armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
diff --git a/ArmnnPreparedModel_1_2.cpp b/ArmnnPreparedModel_1_2.cpp
index 2e378801..7cc75473 100644
--- a/ArmnnPreparedModel_1_2.cpp
+++ b/ArmnnPreparedModel_1_2.cpp
@@ -159,6 +159,47 @@ ArmnnPreparedModel_1_2<HalVersion>::ArmnnPreparedModel_1_2(armnn::NetworkId netw
, m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
, m_GpuProfilingEnabled(gpuProfilingEnabled)
, m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled)
+ , m_PreparedFromCache(false)
+{
+ // Enable profiling if required.
+ m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
+
+ if (m_AsyncModelExecutionEnabled)
+ {
+ std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
+ for (unsigned int i=0; i < numberOfThreads; ++i)
+ {
+ memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
+ }
+
+ if (!m_Threadpool)
+ {
+ m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
+ }
+ else
+ {
+ m_Threadpool->LoadMemHandles(memHandles);
+ }
+
+ m_WorkingMemHandle = memHandles.back();
+ }
+}
+
+template<typename HalVersion>
+ArmnnPreparedModel_1_2<HalVersion>::ArmnnPreparedModel_1_2(armnn::NetworkId networkId,
+ armnn::IRuntime* runtime,
+ const std::string& requestInputsAndOutputsDumpDir,
+ const bool gpuProfilingEnabled,
+ const bool asyncModelExecutionEnabled,
+ const unsigned int numberOfThreads,
+ const bool preparedFromCache)
+ : m_NetworkId(networkId)
+ , m_Runtime(runtime)
+ , m_RequestCount(0)
+ , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
+ , m_GpuProfilingEnabled(gpuProfilingEnabled)
+ , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled)
+ , m_PreparedFromCache(preparedFromCache)
{
// Enable profiling if required.
m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
@@ -384,7 +425,10 @@ Return<void> ArmnnPreparedModel_1_2<HalVersion>::executeSynchronously(const V1_0
V1_2::MeasureTiming measureTiming,
executeSynchronously_cb cb)
{
- ALOGV("ArmnnPreparedModel_1_2::executeSynchronously(): %s", GetModelSummary(m_Model).c_str());
+ if (!m_PreparedFromCache)
+ {
+ ALOGV("ArmnnPreparedModel_1_2::executeSynchronously(): %s", GetModelSummary(m_Model).c_str());
+ }
m_RequestCount++;
if (cb == nullptr)
@@ -400,7 +444,7 @@ Return<void> ArmnnPreparedModel_1_2<HalVersion>::executeSynchronously(const V1_0
driverStart = Now();
}
- if (!android::nn::validateRequest(request, m_Model))
+ if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model))
{
ALOGE("ArmnnPreparedModel_1_2::executeSynchronously invalid request model");
cb(V1_0::ErrorStatus::INVALID_ARGUMENT, {}, g_NoTiming);
@@ -530,11 +574,11 @@ bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteGraph(
}
template<typename HalVersion>
-bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteWithDummyInputs()
+bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs)
{
std::vector<std::vector<char>> storage;
armnn::InputTensors inputTensors;
- for (unsigned int i = 0; i < getMainModel(m_Model).inputIndexes.size(); i++)
+ for (unsigned int i = 0; i < numInputs; i++)
{
const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
storage.emplace_back(inputTensorInfo.GetNumBytes());
@@ -544,7 +588,7 @@ bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteWithDummyInputs()
}
armnn::OutputTensors outputTensors;
- for (unsigned int i = 0; i < getMainModel(m_Model).outputIndexes.size(); i++)
+ for (unsigned int i = 0; i < numOutputs; i++)
{
const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
storage.emplace_back(outputTensorInfo.GetNumBytes());
@@ -576,10 +620,13 @@ Return <V1_0::ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::Execute(const V1_
ctx.driverStart = Now();
}
- ALOGV("ArmnnPreparedModel_1_2::execute(): %s", GetModelSummary(m_Model).c_str());
+ if (!m_PreparedFromCache)
+ {
+ ALOGV("ArmnnPreparedModel_1_2::execute(): %s", GetModelSummary(m_Model).c_str());
+ }
m_RequestCount++;
- if (!android::nn::validateRequest(request, m_Model))
+ if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model))
{
callback(V1_0::ErrorStatus::INVALID_ARGUMENT, {}, g_NoTiming, "ArmnnPreparedModel_1_2::execute");
return V1_0::ErrorStatus::INVALID_ARGUMENT;
diff --git a/ArmnnPreparedModel_1_2.hpp b/ArmnnPreparedModel_1_2.hpp
index c64c891e..255fc187 100644
--- a/ArmnnPreparedModel_1_2.hpp
+++ b/ArmnnPreparedModel_1_2.hpp
@@ -49,6 +49,14 @@ public:
const bool asyncModelExecutionEnabled = false,
const unsigned int numberOfThreads = 1);
+ ArmnnPreparedModel_1_2(armnn::NetworkId networkId,
+ armnn::IRuntime* runtime,
+ const std::string& requestInputsAndOutputsDumpDir,
+ const bool gpuProfilingEnabled,
+ const bool asyncModelExecutionEnabled = false,
+ const unsigned int numberOfThreads = 1,
+ const bool preparedFromCache = false);
+
virtual ~ArmnnPreparedModel_1_2();
virtual Return<V1_0::ErrorStatus> execute(const V1_0::Request& request,
@@ -76,7 +84,7 @@ public:
/// Executes this model with dummy inputs (e.g. all zeroes).
/// \return false on failure, otherwise true
- bool ExecuteWithDummyInputs();
+ bool ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs);
private:
@@ -156,6 +164,7 @@ private:
static std::unique_ptr<armnn::Threadpool> m_Threadpool;
std::shared_ptr<IWorkingMemHandle> m_WorkingMemHandle;
const bool m_AsyncModelExecutionEnabled;
+ const bool m_PreparedFromCache;
};
}
diff --git a/ArmnnPreparedModel_1_3.cpp b/ArmnnPreparedModel_1_3.cpp
index a5032360..e963d4e4 100644
--- a/ArmnnPreparedModel_1_3.cpp
+++ b/ArmnnPreparedModel_1_3.cpp
@@ -181,6 +181,49 @@ ArmnnPreparedModel_1_3<HalVersion>::ArmnnPreparedModel_1_3(armnn::NetworkId netw
, m_GpuProfilingEnabled(gpuProfilingEnabled)
, m_ModelPriority(priority)
, m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled)
+ , m_PreparedFromCache(false)
+{
+ // Enable profiling if required.
+ m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
+
+ if (m_AsyncModelExecutionEnabled)
+ {
+ std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
+ for (unsigned int i=0; i < numberOfThreads; ++i)
+ {
+ memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
+ }
+
+ if (!m_Threadpool)
+ {
+ m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
+ }
+ else
+ {
+ m_Threadpool->LoadMemHandles(memHandles);
+ }
+
+ m_WorkingMemHandle = memHandles.back();
+ }
+}
+
+template<typename HalVersion>
+ArmnnPreparedModel_1_3<HalVersion>::ArmnnPreparedModel_1_3(armnn::NetworkId networkId,
+ armnn::IRuntime* runtime,
+ const std::string& requestInputsAndOutputsDumpDir,
+ const bool gpuProfilingEnabled,
+ V1_3::Priority priority,
+ const bool asyncModelExecutionEnabled,
+ const unsigned int numberOfThreads,
+ const bool preparedFromCache)
+ : m_NetworkId(networkId)
+ , m_Runtime(runtime)
+ , m_RequestCount(0)
+ , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
+ , m_GpuProfilingEnabled(gpuProfilingEnabled)
+ , m_ModelPriority(priority)
+ , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled)
+ , m_PreparedFromCache(preparedFromCache)
{
// Enable profiling if required.
m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
@@ -343,7 +386,7 @@ Return<void> ArmnnPreparedModel_1_3<HalVersion>::executeFenced(const V1_3::Reque
ALOGW("ArmnnPreparedModel_1_3::executeFenced parameter loopTimeoutDuration is set but not supported.");
}
- if (!android::nn::validateRequest(request, m_Model, /*allowUnspecifiedOutput=*/false))
+ if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model, /*allowUnspecifiedOutput=*/false))
{
ALOGV("ArmnnPreparedModel_1_3::executeFenced outputs must be specified for fenced execution ");
cb(V1_3::ErrorStatus::INVALID_ARGUMENT, hidl_handle(nullptr), nullptr);
@@ -357,7 +400,10 @@ Return<void> ArmnnPreparedModel_1_3<HalVersion>::executeFenced(const V1_3::Reque
ctx.driverStart = Now();
}
- ALOGV("ArmnnPreparedModel_1_3::executeFenced(): %s", GetModelSummary(m_Model).c_str());
+ if (!m_PreparedFromCache)
+ {
+ ALOGV("ArmnnPreparedModel_1_3::executeFenced(): %s", GetModelSummary(m_Model).c_str());
+ }
m_RequestCount++;
if (!m_RequestInputsAndOutputsDumpDir.empty())
@@ -587,7 +633,7 @@ Return<void> ArmnnPreparedModel_1_3<HalVersion>::ExecuteSynchronously(const V1_3
cbCtx.ctx.driverStart = Now();
}
- if (!android::nn::validateRequest(convertToV1_3(request), m_Model))
+ if (!m_PreparedFromCache && !android::nn::validateRequest(convertToV1_3(request), m_Model))
{
ALOGE("ArmnnPreparedModel_1_3::ExecuteSynchronously invalid request model");
cbCtx.callback(V1_3::ErrorStatus::INVALID_ARGUMENT,
@@ -597,7 +643,7 @@ Return<void> ArmnnPreparedModel_1_3<HalVersion>::ExecuteSynchronously(const V1_3
return Void();
}
- if (!android::nn::validateRequest(request, m_Model))
+ if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model))
{
ALOGE("ArmnnPreparedModel_1_3::ExecuteSynchronously invalid request model");
cbCtx.callback(V1_3::ErrorStatus::INVALID_ARGUMENT,
@@ -634,7 +680,10 @@ Return<void> ArmnnPreparedModel_1_3<HalVersion>::executeSynchronously(const V1_0
V1_2::MeasureTiming measureTiming,
executeSynchronously_cb cb)
{
- ALOGV("ArmnnPreparedModel_1_3::executeSynchronously(): %s", GetModelSummary(m_Model).c_str());
+ if (!m_PreparedFromCache)
+ {
+ ALOGV("ArmnnPreparedModel_1_3::executeSynchronously(): %s", GetModelSummary(m_Model).c_str());
+ }
m_RequestCount++;
if (cb == nullptr)
@@ -667,7 +716,10 @@ Return<void> ArmnnPreparedModel_1_3<HalVersion>::executeSynchronously_1_3(
const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
executeSynchronously_1_3_cb cb)
{
- ALOGV("ArmnnPreparedModel_1_3::executeSynchronously_1_3(): %s", GetModelSummary(m_Model).c_str());
+ if (!m_PreparedFromCache)
+ {
+ ALOGV("ArmnnPreparedModel_1_3::executeSynchronously_1_3(): %s", GetModelSummary(m_Model).c_str());
+ }
m_RequestCount++;
if (cb == nullptr)
@@ -854,11 +906,11 @@ void ArmnnPreparedModel_1_3<HalVersion>::ScheduleGraphForExecution(
}
template<typename HalVersion>
-bool ArmnnPreparedModel_1_3<HalVersion>::ExecuteWithDummyInputs()
+bool ArmnnPreparedModel_1_3<HalVersion>::ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs)
{
std::vector<std::vector<char>> storage;
armnn::InputTensors inputTensors;
- for (unsigned int i = 0; i < getMainModel(m_Model).inputIndexes.size(); i++)
+ for (unsigned int i = 0; i < numInputs; i++)
{
const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
storage.emplace_back(inputTensorInfo.GetNumBytes());
@@ -868,7 +920,7 @@ bool ArmnnPreparedModel_1_3<HalVersion>::ExecuteWithDummyInputs()
}
armnn::OutputTensors outputTensors;
- for (unsigned int i = 0; i < getMainModel(m_Model).outputIndexes.size(); i++)
+ for (unsigned int i = 0; i < numOutputs; i++)
{
const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
storage.emplace_back(outputTensorInfo.GetNumBytes());
@@ -902,10 +954,13 @@ Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::Execute(const V1_
ctx.driverStart = Now();
}
- ALOGV("ArmnnPreparedModel_1_3::execute(): %s", GetModelSummary(m_Model).c_str());
+ if (!m_PreparedFromCache)
+ {
+ ALOGV("ArmnnPreparedModel_1_3::execute(): %s", GetModelSummary(m_Model).c_str());
+ }
m_RequestCount++;
- if (!android::nn::validateRequest(request, m_Model))
+ if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model))
{
callback(V1_3::ErrorStatus::INVALID_ARGUMENT, {}, g_NoTiming, "ArmnnPreparedModel_1_3::execute");
return V1_3::ErrorStatus::INVALID_ARGUMENT;
diff --git a/ArmnnPreparedModel_1_3.hpp b/ArmnnPreparedModel_1_3.hpp
index a245cc4c..cd5fc0ed 100644
--- a/ArmnnPreparedModel_1_3.hpp
+++ b/ArmnnPreparedModel_1_3.hpp
@@ -57,6 +57,15 @@ public:
const bool asyncModelExecutionEnabled = false,
const unsigned int numberOfThreads = 1);
+ ArmnnPreparedModel_1_3(armnn::NetworkId networkId,
+ armnn::IRuntime* runtime,
+ const std::string& requestInputsAndOutputsDumpDir,
+ const bool gpuProfilingEnabled,
+ V1_3::Priority priority = V1_3::Priority::MEDIUM,
+ const bool asyncModelExecutionEnabled = false,
+ const unsigned int numberOfThreads = 1,
+ const bool preparedFromCache = false);
+
virtual ~ArmnnPreparedModel_1_3();
Return<V1_0::ErrorStatus> execute(const V1_0::Request& request,
@@ -108,7 +117,7 @@ public:
/// Executes this model with dummy inputs (e.g. all zeroes).
/// \return false on failure, otherwise true
- bool ExecuteWithDummyInputs();
+ bool ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs);
V1_3::Priority GetModelPriority();
@@ -192,6 +201,7 @@ private:
static std::unique_ptr<armnn::Threadpool> m_Threadpool;
std::shared_ptr<IWorkingMemHandle> m_WorkingMemHandle;
const bool m_AsyncModelExecutionEnabled;
+ const bool m_PreparedFromCache;
};
}
diff --git a/CacheDataHandler.cpp b/CacheDataHandler.cpp
new file mode 100644
index 00000000..36881629
--- /dev/null
+++ b/CacheDataHandler.cpp
@@ -0,0 +1,63 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "CacheDataHandler.hpp"
+
+#include <log/log.h>
+
+namespace armnn_driver
+{
+
+CacheDataHandler& CacheDataHandlerInstance()
+{
+ static CacheDataHandler instance;
+ return instance;
+}
+
+void CacheDataHandler::Register(const HidlToken token, const size_t hashValue, const size_t cacheSize)
+{
+ if (m_CacheDataMap.find(hashValue) != m_CacheDataMap.end())
+ {
+ ALOGV("CacheHandler::Register() Token has been already registered.");
+ return;
+ }
+ CacheHandle cacheHandle(token, cacheSize);
+ m_CacheDataMap.insert({hashValue, cacheHandle});
+}
+
+bool CacheDataHandler::Validate(const HidlToken token, const size_t hashValue) const
+{
+ return (m_CacheDataMap.find(hashValue) != m_CacheDataMap.end()
+ && m_CacheDataMap.at(hashValue).GetToken() == token);
+}
+
+size_t CacheDataHandler::Hash(std::vector<uint8_t>& cacheData)
+{
+ std::size_t hash = cacheData.size();
+ for (auto& i : cacheData)
+ {
+ hash ^= std::hash<unsigned int>{}(i);
+ }
+ return hash;
+}
+
+size_t CacheDataHandler::GetCacheSize(HidlToken token)
+{
+ for (auto i = m_CacheDataMap.begin(); i != m_CacheDataMap.end(); ++i)
+ {
+ if (i->second.GetToken() == token)
+ {
+ return i->second.GetCacheSize();
+ }
+ }
+ return 0;
+}
+
+void CacheDataHandler::Clear()
+{
+ m_CacheDataMap.clear();
+}
+
+} // armnn_driver
diff --git a/CacheDataHandler.hpp b/CacheDataHandler.hpp
new file mode 100644
index 00000000..cea73d20
--- /dev/null
+++ b/CacheDataHandler.hpp
@@ -0,0 +1,68 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <HalInterfaces.h>
+
+#include <vector>
+#include <unordered_map>
+
+#include <NeuralNetworks.h>
+
+namespace armnn_driver
+{
+
+using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
+
+class CacheHandle
+{
+public:
+ CacheHandle(const HidlToken token, const size_t cacheSize)
+ : m_HidlToken(token), m_CacheSize(cacheSize) {}
+
+ ~CacheHandle() {};
+
+ HidlToken GetToken() const
+ {
+ return m_HidlToken;
+ }
+
+ size_t GetCacheSize() const
+ {
+ return m_CacheSize;
+ }
+
+private:
+ const HidlToken m_HidlToken;
+ const size_t m_CacheSize;
+};
+
+class CacheDataHandler
+{
+public:
+ CacheDataHandler() {}
+ ~CacheDataHandler() {}
+
+ void Register(const HidlToken token, const size_t hashValue, const size_t cacheSize);
+
+ bool Validate(const HidlToken token, const size_t hashValue) const;
+
+ size_t Hash(std::vector<uint8_t>& cacheData);
+
+ size_t GetCacheSize(HidlToken token);
+
+ void Clear();
+
+private:
+ CacheDataHandler(const CacheDataHandler&) = delete;
+ CacheDataHandler& operator=(const CacheDataHandler&) = delete;
+
+ std::unordered_map<size_t, CacheHandle> m_CacheDataMap;
+};
+
+CacheDataHandler& CacheDataHandlerInstance();
+
+} // armnn_driver
diff --git a/Utils.cpp b/Utils.cpp
index 9b52f5eb..f910cd49 100644
--- a/Utils.cpp
+++ b/Utils.cpp
@@ -554,37 +554,59 @@ std::string ExportNetworkGraphToDotFile(const armnn::IOptimizedNetwork& optimize
return fileName;
}
-std::string SerializeNetwork(const armnn::INetwork& network, const std::string& dumpDir)
+std::string SerializeNetwork(const armnn::INetwork& network,
+ const std::string& dumpDir,
+ std::vector<uint8_t>& dataCacheData,
+ bool dataCachingActive)
{
std::string fileName;
- // The dump directory must exist in advance.
+ bool bSerializeToFile = true;
if (dumpDir.empty())
{
- return fileName;
+ bSerializeToFile = false;
}
-
- std::string timestamp = GetFileTimestamp();
- if (timestamp.empty())
+ else
+ {
+ std::string timestamp = GetFileTimestamp();
+ if (timestamp.empty())
+ {
+ bSerializeToFile = false;
+ }
+ }
+ if (!bSerializeToFile && !dataCachingActive)
{
return fileName;
}
auto serializer(armnnSerializer::ISerializer::Create());
-
// Serialize the Network
serializer->Serialize(network);
+ if (dataCachingActive)
+ {
+ std::stringstream stream;
+ auto serialized = serializer->SaveSerializedToStream(stream);
+ if (serialized)
+ {
+ std::string const serializedString{stream.str()};
+ std::copy(serializedString.begin(), serializedString.end(), std::back_inserter(dataCacheData));
+ }
+ }
- // Set the name of the output .armnn file.
- fs::path dumpPath = dumpDir;
- fs::path tempFilePath = dumpPath / (timestamp + "_network.armnn");
- fileName = tempFilePath.string();
-
- // Save serialized network to a file
- std::ofstream serializedFile(fileName, std::ios::out | std::ios::binary);
- bool serialized = serializer->SaveSerializedToStream(serializedFile);
- if (!serialized)
+ if (bSerializeToFile)
{
- ALOGW("An error occurred when serializing to file %s", fileName.c_str());
+ // Set the name of the output .armnn file.
+ fs::path dumpPath = dumpDir;
+ std::string timestamp = GetFileTimestamp();
+ fs::path tempFilePath = dumpPath / (timestamp + "_network.armnn");
+ fileName = tempFilePath.string();
+
+ // Save serialized network to a file
+ std::ofstream serializedFile(fileName, std::ios::out | std::ios::binary);
+ auto serialized = serializer->SaveSerializedToStream(serializedFile);
+ if (!serialized)
+ {
+ ALOGW("An error occurred when serializing to file %s", fileName.c_str());
+ }
}
return fileName;
}
diff --git a/Utils.hpp b/Utils.hpp
index da101535..9bd28ba6 100644
--- a/Utils.hpp
+++ b/Utils.hpp
@@ -139,7 +139,10 @@ void DumpJsonProfilingIfRequired(bool gpuProfilingEnabled,
std::string ExportNetworkGraphToDotFile(const armnn::IOptimizedNetwork& optimizedNetwork,
const std::string& dumpDir);
-std::string SerializeNetwork(const armnn::INetwork& network, const std::string& dumpDir);
+std::string SerializeNetwork(const armnn::INetwork& network,
+ const std::string& dumpDir,
+ std::vector<uint8_t>& dataCacheData,
+ bool dataCachingActive = true);
void RenameExportedFiles(const std::string& existingSerializedFileName,
const std::string& existingDotFileName,