From 0a2dfabd76a45c58d0a14567f0503369c4e6fbf3 Mon Sep 17 00:00:00 2001 From: Sadik Armagan Date: Wed, 6 Oct 2021 16:41:44 +0100 Subject: IVGCVSW-5636 'Implement NNAPI caching functions' * Cached serialized ArmNN model. !armnn:6384 Signed-off-by: Sadik Armagan Signed-off-by: Kevin May Change-Id: I78120a7f8ea892a28c0ff25f1b54e67a4f912574 --- 1.2/ArmnnDriver.hpp | 40 +++-- 1.2/ArmnnDriverImpl.cpp | 390 ++++++++++++++++++++++++++++++++++++++++++-- 1.2/ArmnnDriverImpl.hpp | 30 +++- 1.3/ArmnnDriver.hpp | 86 ++++++---- 1.3/ArmnnDriverImpl.cpp | 391 +++++++++++++++++++++++++++++++++++++++++++-- 1.3/ArmnnDriverImpl.hpp | 29 +++- Android.mk | 2 + ArmnnDriverImpl.cpp | 6 +- ArmnnPreparedModel_1_2.cpp | 61 ++++++- ArmnnPreparedModel_1_2.hpp | 11 +- ArmnnPreparedModel_1_3.cpp | 77 +++++++-- ArmnnPreparedModel_1_3.hpp | 12 +- CacheDataHandler.cpp | 63 ++++++++ CacheDataHandler.hpp | 68 ++++++++ Utils.cpp | 56 +++++-- Utils.hpp | 5 +- 16 files changed, 1201 insertions(+), 126 deletions(-) create mode 100644 CacheDataHandler.cpp create mode 100644 CacheDataHandler.hpp diff --git a/1.2/ArmnnDriver.hpp b/1.2/ArmnnDriver.hpp index a350d3f4..c855b527 100644 --- a/1.2/ArmnnDriver.hpp +++ b/1.2/ArmnnDriver.hpp @@ -19,6 +19,8 @@ #include "../1.0/ArmnnDriverImpl.hpp" #include "../1.0/HalPolicy.hpp" +#include + #include namespace armnn_driver @@ -135,20 +137,26 @@ public: } Return prepareModelFromCache( - const android::hardware::hidl_vec&, - const android::hardware::hidl_vec&, - const HidlToken&, - const android::sp& callback) + const android::hardware::hidl_vec& modelCacheHandle, + const android::hardware::hidl_vec& dataCacheHandle, + const HidlToken& token, + const android::sp& cb) { ALOGV("hal_1_2::ArmnnDriver::prepareModelFromCache()"); - callback->notify_1_2(V1_0::ErrorStatus::GENERAL_FAILURE, nullptr); - return V1_0::ErrorStatus::GENERAL_FAILURE; + return ArmnnDriverImpl::prepareModelFromCache(m_Runtime, + m_Options, + modelCacheHandle, + dataCacheHandle, + token, + cb); } - Return prepareModel_1_2(const V1_2::Model& model, V1_1::ExecutionPreference preference, - const android::hardware::hidl_vec&, - const android::hardware::hidl_vec&, const HidlToken&, - const android::sp& cb) + Return prepareModel_1_2( + const V1_2::Model& model, V1_1::ExecutionPreference preference, + const android::hardware::hidl_vec& modelCacheHandle, + const android::hardware::hidl_vec& dataCacheHandle, + const HidlToken& token, + const android::sp& cb) { ALOGV("hal_1_2::ArmnnDriver::prepareModel_1_2()"); @@ -165,6 +173,9 @@ public: m_ClTunedParameters, m_Options, model, + modelCacheHandle, + dataCacheHandle, + token, cb, model.relaxComputationFloat32toFloat16 && m_Options.GetFp16Enabled()); @@ -198,9 +209,12 @@ public: Return getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb) { ALOGV("hal_1_2::ArmnnDriver::getSupportedExtensions()"); - - // Set both numbers to be 0 for cache not supported. - cb(V1_0::ErrorStatus::NONE, 0, 0); + unsigned int numberOfCachedModelFiles = 0; + for (auto& backend : m_Options.GetBackends()) + { + numberOfCachedModelFiles += GetNumberOfCacheFiles(backend); + } + cb(V1_0::ErrorStatus::NONE, numberOfCachedModelFiles, 1ul); return Void(); } }; diff --git a/1.2/ArmnnDriverImpl.cpp b/1.2/ArmnnDriverImpl.cpp index 01b3ab51..b3bc5cd1 100644 --- a/1.2/ArmnnDriverImpl.cpp +++ b/1.2/ArmnnDriverImpl.cpp @@ -8,7 +8,10 @@ #include "../ModelToINetworkConverter.hpp" #include "../SystemPropertiesUtils.hpp" +#include + #include +#include namespace { @@ -90,6 +93,9 @@ Return ArmnnDriverImpl::prepareArmnnModel_1_2( const armnn::IGpuAccTunedParametersPtr& clTunedParameters, const DriverOptions& options, const V1_2::Model& model, + const android::hardware::hidl_vec& modelCacheHandle, + const android::hardware::hidl_vec& dataCacheHandle, + const HidlToken& token, const android::sp& cb, bool float32ToFloat16) { @@ -127,8 +133,13 @@ Return ArmnnDriverImpl::prepareArmnnModel_1_2( // Serialize the network graph to a .armnn file if an output directory // has been specified in the drivers' arguments. + std::vector dataCacheData; + bool serializeToFile = dataCacheHandle.size() < 1 ? false : true; auto serializedNetworkFileName = - SerializeNetwork(*modelConverter.GetINetwork(), options.GetRequestInputsAndOutputsDumpDir()); + SerializeNetwork(*modelConverter.GetINetwork(), + options.GetRequestInputsAndOutputsDumpDir(), + dataCacheData, + serializeToFile); // Optimize the network armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr); @@ -136,12 +147,41 @@ Return ArmnnDriverImpl::prepareArmnnModel_1_2( OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16; OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled(); + int cachedFd = -1; + bool saveCachedNetwork = options.SaveCachedNetwork(); + + unsigned int numberOfCachedModelFiles = 0; + if (modelCacheHandle.size() > 0) + { + unsigned int index = 0; + for (auto& backend : options.GetBackends()) + { + // modelCacheHandle size should be equal to numberOfCachedModelFiles + // modelCacheHandle vector should be in same order as backends + auto numberOfCacheFiles = GetNumberOfCacheFiles(backend); + if (numberOfCacheFiles > 0) + { + numberOfCachedModelFiles += numberOfCacheFiles; + if (modelCacheHandle[index]->numFds == 1) + { + if (backend == armnn::Compute::GpuAcc) + { + cachedFd = modelCacheHandle[index]->data[0]; + saveCachedNetwork = true; + } + } + index += numberOfCachedModelFiles; + } + } + } + armnn::BackendOptions gpuAcc("GpuAcc", { { "FastMathEnabled", options.IsFastMathEnabled() }, - { "SaveCachedNetwork", options.SaveCachedNetwork() }, + { "SaveCachedNetwork", saveCachedNetwork }, { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() }, - { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() } + { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() }, + { "CachedFileDescriptor", cachedFd } }); armnn::BackendOptions cpuAcc("CpuAcc", @@ -192,12 +232,16 @@ Return ArmnnDriverImpl::prepareArmnnModel_1_2( std::string msg; armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(), MemorySource::Undefined, - MemorySource::Undefined); + MemorySource::Undefined, + options.IsGpuProfilingEnabled()); + + auto numInputs = getMainModel(model).inputIndexes.size(); + auto numOutputs = getMainModel(model).outputIndexes.size(); try { if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success) { - return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb); + return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, msg, cb); } } catch (std::exception& e) @@ -227,28 +271,344 @@ Return ArmnnDriverImpl::prepareArmnnModel_1_2( // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if // this is enabled) before the first 'real' inference which removes the overhead of the first inference. - if (!preparedModel->ExecuteWithDummyInputs()) + // Only run this if the GpuAcc backend has been added to options + if (std::find(options.GetBackends().begin(), + options.GetBackends().end(), + armnn::Compute::GpuAcc) != options.GetBackends().end()) { - return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb); + if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs)) + { + return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb); + } + + if (clTunedParameters && + options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters) + { + // Now that we've done one inference the CL kernel parameters will have been tuned, + // so save the updated file. + try + { + clTunedParameters->Save(options.GetClTunedParametersFile().c_str()); + } + catch (std::exception& error) + { + ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s", + options.GetClTunedParametersFile().c_str(), error.what()); + } + } + } + + size_t hashValue = 0; + // Cache the model + if (dataCacheHandle.size() > 0) + { + // Cache the Arm NN model, should be only 1 + if (dataCacheHandle.size() != 1) + { + NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release()); + return V1_0::ErrorStatus::NONE; + } + + if (dataCacheHandle[0]->numFds != 1) + { + ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, numFds != 1."); + NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release()); + return V1_0::ErrorStatus::NONE; + } + int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE; + if (dataCacheFileAccessMode != O_RDWR) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_2(): Invalid Access Mode."); + NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release()); + return V1_0::ErrorStatus::NONE; + } + + write(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size()); + hashValue = CacheDataHandlerInstance().Hash(dataCacheData); } - if (clTunedParameters && - options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters) + if (modelCacheHandle.size() > 0) { - // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file. - try + if (modelCacheHandle.size() != numberOfCachedModelFiles) { - clTunedParameters->Save(options.GetClTunedParametersFile().c_str()); + NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release()); + return V1_0::ErrorStatus::NONE; } - catch (std::exception& error) + for (uint32_t i = 0; i < modelCacheHandle.size(); ++i) { - ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s", - options.GetClTunedParametersFile().c_str(), error.what()); + if (modelCacheHandle[i]->numFds == 1) + { + int modelCacheFileAccessMode = fcntl(modelCacheHandle[i]->data[0], F_GETFL) & O_ACCMODE; + if (modelCacheFileAccessMode != O_RDONLY) + { + struct stat statBuffer; + if (fstat(modelCacheHandle[i]->data[0], &statBuffer) == 0) + { + long modelDataSize = statBuffer.st_size; + if (modelDataSize > 0) + { + std::vector modelData(modelDataSize); + pread(modelCacheHandle[i]->data[0], modelData.data(), modelData.size(), 0); + hashValue ^= CacheDataHandlerInstance().Hash(modelData); + } + } + } + } } } + if (hashValue != 0) + { + CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size()); + } NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release()); + return V1_0::ErrorStatus::NONE; +} + +Return ArmnnDriverImpl::prepareModelFromCache( + const armnn::IRuntimePtr& runtime, + const DriverOptions& options, + const android::hardware::hidl_vec& modelCacheHandle, + const android::hardware::hidl_vec& dataCacheHandle, + const HidlToken& token, + const android::sp& cb, + bool float32ToFloat16) +{ + ALOGV("ArmnnDriverImpl::prepareModelFromCache()"); + + if (cb.get() == nullptr) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid callback passed to prepareModel"); + return V1_0::ErrorStatus::INVALID_ARGUMENT; + } + if (!runtime) + { + return FailPrepareModel(V1_0::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb); + } + + if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN) + { + FailPrepareModel(V1_0::ErrorStatus::INVALID_ARGUMENT, "Invalid token passed!", cb); + return V1_0::ErrorStatus::INVALID_ARGUMENT; + } + + // DataCacheHandle size should always be 1 + // Arm NN model + if (dataCacheHandle.size() != 1) + { + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "No data cache!", cb); + return V1_0::ErrorStatus::GENERAL_FAILURE; + } + + // Check if model files cached they match the expected value + unsigned int numberOfCachedModelFiles = 0; + for (auto& backend : options.GetBackends()) + { + numberOfCachedModelFiles += GetNumberOfCacheFiles(backend); + } + if (modelCacheHandle.size() != numberOfCachedModelFiles) + { + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid model cache!", cb); + return V1_0::ErrorStatus::GENERAL_FAILURE; + } + + if (dataCacheHandle[0]->numFds != 1) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache: Cannot read from the cache data, numFds != 1."); + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "No data cache!", cb); + return V1_0::ErrorStatus::GENERAL_FAILURE; + } + + int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE; + if (dataCacheFileAccessMode != O_RDWR) + { + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid Access Mode!", cb); + return V1_0::ErrorStatus::GENERAL_FAILURE; + } + + auto dataSize = CacheDataHandlerInstance().GetCacheSize(token); + if (dataSize == 0) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid data to deserialize!"); + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid data to deserialize!", cb); + return V1_0::ErrorStatus::GENERAL_FAILURE; + } + + int offset = 0; + { + struct stat statBuffer; + if (fstat(dataCacheHandle[0]->data[0], &statBuffer) == 0) + { + unsigned long bufferSize = statBuffer.st_size; + if (bufferSize <= 0) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid data to deserialize!"); + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid data to deserialize!", cb); + return V1_0::ErrorStatus::GENERAL_FAILURE; + } + if (bufferSize > dataSize) + { + offset = bufferSize - dataSize; + } + } + } + std::vector dataCacheData(dataSize); + pread(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size(), offset); + auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData); + + int gpuAccCachedFd = -1; + bool saveCachedNetwork = false; + if (modelCacheHandle.size() > 0) + { + unsigned int index = 0; + for (auto& backend : options.GetBackends()) + { + // modelCacheHandle size should be equal to numberOfCachedModelFiles + // modelCacheHandle vector should be in same order as backends + auto numberOfCacheFiles = GetNumberOfCacheFiles(backend); + if (numberOfCacheFiles > 0) + { + if (modelCacheHandle[index]->numFds != 1) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache: Cannot read from the model cache, numFds != 1."); + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, + "Cannot read from the model cache, numFds != 1.", cb); + return V1_0::ErrorStatus::GENERAL_FAILURE; + } + auto cachedFd = modelCacheHandle[index]->data[0]; + + int modelCacheFileAccessMode = fcntl(cachedFd, F_GETFL) & O_ACCMODE; + if (modelCacheFileAccessMode != O_RDWR) + { + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid Access Mode!", cb); + return V1_0::ErrorStatus::GENERAL_FAILURE; + } + + struct stat statBuffer; + if (cachedFd != -1 && fstat(cachedFd, &statBuffer) == 0) + { + long modelDataSize = statBuffer.st_size; + if (modelDataSize > 0) + { + std::vector modelData(modelDataSize); + pread(cachedFd, modelData.data(), modelData.size(), 0); + hashValue ^= CacheDataHandlerInstance().Hash(modelData); + + // For GpuAcc numberOfCachedFiles is 1 + if (backend == armnn::Compute::GpuAcc) + { + gpuAccCachedFd = cachedFd; + } + } + } + index += numberOfCacheFiles; + } + } + } + + if (!CacheDataHandlerInstance().Validate(token, hashValue)) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache: ValidateHash() failed!"); + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "ValidateHash Failed!", cb); + return V1_0::ErrorStatus::GENERAL_FAILURE; + } + + // Deserialize the network.. + auto network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData); + + // Optimize the network + armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr); + armnn::OptimizerOptions OptOptions; + OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16; + OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled(); + + armnn::BackendOptions gpuAcc("GpuAcc", + { + {"FastMathEnabled", options.IsFastMathEnabled()}, + {"SaveCachedNetwork", saveCachedNetwork}, + {"CachedNetworkFilePath", options.GetCachedNetworkFilePath()}, + {"MLGOTuningFilePath", options.GetClMLGOTunedParametersFile()}, + {"CachedFileDescriptor", gpuAccCachedFd} + }); + + armnn::BackendOptions cpuAcc("CpuAcc", + { + {"FastMathEnabled", options.IsFastMathEnabled()}, + {"NumberOfThreads", options.GetNumberOfThreads()} + }); + OptOptions.m_ModelOptions.push_back(gpuAcc); + OptOptions.m_ModelOptions.push_back(cpuAcc); + + std::vector errMessages; + try + { + optNet = armnn::Optimize(*network.get(), + options.GetBackends(), + runtime->GetDeviceSpec(), + OptOptions, + errMessages); + } + catch (std::exception& e) + { + std::stringstream message; + message << "Exception (" << e.what() << ") caught from optimize."; + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb); + return V1_0::ErrorStatus::NONE; + } + + // Check that the optimized network is valid. + if (!optNet) + { + std::stringstream message; + message << "Invalid optimized network"; + for (const std::string& msg : errMessages) + { + message << "\n" << msg; + } + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb); + return V1_0::ErrorStatus::NONE; + } + + // Export the optimized network graph to a dot file if an output dump directory + // has been specified in the drivers' arguments. + std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet, + options.GetRequestInputsAndOutputsDumpDir()); + + // Load it into the runtime. + armnn::NetworkId netId = 0; + std::string msg; + armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(), + MemorySource::Undefined, + MemorySource::Undefined, + options.IsGpuProfilingEnabled()); + + try + { + if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success) + { + return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, msg, cb); + } + } + catch (std::exception& e) + { + std::stringstream message; + message << "Exception (" << e.what() << ") caught from LoadNetwork."; + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb); + return V1_0::ErrorStatus::NONE; + } + + std::unique_ptr> preparedModel( + new ArmnnPreparedModel_1_2( + netId, + runtime.get(), + options.GetRequestInputsAndOutputsDumpDir(), + options.IsGpuProfilingEnabled(), + options.isAsyncModelExecutionEnabled(), + options.getNoOfArmnnThreads(), + true)); + + NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release()); return V1_0::ErrorStatus::NONE; } diff --git a/1.2/ArmnnDriverImpl.hpp b/1.2/ArmnnDriverImpl.hpp index eeb491b6..70f46cba 100644 --- a/1.2/ArmnnDriverImpl.hpp +++ b/1.2/ArmnnDriverImpl.hpp @@ -7,10 +7,13 @@ #include +#include "../CacheDataHandler.hpp" #include "../DriverOptions.hpp" #include +#include + #ifdef ARMNN_ANDROID_R using namespace android::nn::hal; #endif @@ -30,12 +33,27 @@ namespace hal_1_2 class ArmnnDriverImpl { public: - static Return prepareArmnnModel_1_2(const armnn::IRuntimePtr& runtime, - const armnn::IGpuAccTunedParametersPtr& clTunedParameters, - const DriverOptions& options, - const V1_2::Model& model, - const android::sp& cb, - bool float32ToFloat16 = false); + using HidlToken = android::hardware::hidl_array; + + static Return prepareArmnnModel_1_2( + const armnn::IRuntimePtr& runtime, + const armnn::IGpuAccTunedParametersPtr& clTunedParameters, + const DriverOptions& options, + const V1_2::Model& model, + const android::hardware::hidl_vec& modelCacheHandle, + const android::hardware::hidl_vec& dataCacheHandle, + const HidlToken& token, + const android::sp& cb, + bool float32ToFloat16 = false); + + static Return prepareModelFromCache( + const armnn::IRuntimePtr& runtime, + const DriverOptions& options, + const android::hardware::hidl_vec& modelCacheHandle, + const android::hardware::hidl_vec& dataCacheHandle, + const HidlToken& token, + const android::sp& cb, + bool float32ToFloat16 = false); static Return getCapabilities_1_2(const armnn::IRuntimePtr& runtime, V1_2::IDevice::getCapabilities_1_2_cb cb); diff --git a/1.3/ArmnnDriver.hpp b/1.3/ArmnnDriver.hpp index fd4aa74c..6d2e0b7a 100644 --- a/1.3/ArmnnDriver.hpp +++ b/1.3/ArmnnDriver.hpp @@ -21,6 +21,8 @@ #include "../1.0/ArmnnDriverImpl.hpp" #include "../1.0/HalPolicy.hpp" +#include + #include namespace armnn_driver @@ -31,6 +33,7 @@ namespace hal_1_3 class ArmnnDriver : public ArmnnDevice, public V1_3::IDevice { public: + using HidlToken = android::hardware::hidl_array; ArmnnDriver(DriverOptions options) : ArmnnDevice(std::move(options)) @@ -39,9 +42,7 @@ public: } ~ArmnnDriver() {} - using HidlToken = android::hardware::hidl_array; -public: Return getCapabilities(V1_0::IDevice::getCapabilities_cb cb) override { ALOGV("hal_1_3::ArmnnDriver::getCapabilities()"); @@ -131,10 +132,13 @@ public: cb); } - Return prepareModel_1_2(const V1_2::Model& model, V1_1::ExecutionPreference preference, - const android::hardware::hidl_vec&, - const android::hardware::hidl_vec&, const HidlToken&, - const android::sp& cb) + Return prepareModel_1_2( + const V1_2::Model& model, + V1_1::ExecutionPreference preference, + const android::hardware::hidl_vec& modelCacheHandle, + const android::hardware::hidl_vec& dataCacheHandle, + const HidlToken& token, + const android::sp& cb) { ALOGV("hal_1_3::ArmnnDriver::prepareModel_1_2()"); @@ -151,6 +155,9 @@ public: m_ClTunedParameters, m_Options, model, + modelCacheHandle, + dataCacheHandle, + token, cb, model.relaxComputationFloat32toFloat16 && m_Options.GetFp16Enabled()); @@ -174,14 +181,15 @@ public: cb); } - Return prepareModel_1_3(const V1_3::Model& model, - V1_1::ExecutionPreference preference, - V1_3::Priority priority, - const V1_3::OptionalTimePoint&, - const android::hardware::hidl_vec&, - const android::hardware::hidl_vec&, - const HidlToken&, - const android::sp& cb) + Return prepareModel_1_3( + const V1_3::Model& model, + V1_1::ExecutionPreference preference, + V1_3::Priority priority, + const V1_3::OptionalTimePoint&, + const android::hardware::hidl_vec& modelCache, + const android::hardware::hidl_vec& dataCache, + const HidlToken& token, + const android::sp& cb) { ALOGV("hal_1_3::ArmnnDriver::prepareModel_1_3()"); @@ -199,11 +207,13 @@ public: return V1_3::ErrorStatus::INVALID_ARGUMENT; } - return ArmnnDriverImpl::prepareArmnnModel_1_3(m_Runtime, m_ClTunedParameters, m_Options, model, + modelCache, + dataCache, + token, cb, model.relaxComputationFloat32toFloat16 && m_Options.GetFp16Enabled(), @@ -219,10 +229,13 @@ public: Return getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb) { - ALOGV("hal_1_3::ArmnnDriver::getSupportedExtensions()"); - - // Set both numbers to be 0 for cache not supported. - cb(V1_0::ErrorStatus::NONE, 0, 0); + ALOGV("hal_1_3::ArmnnDriver::getNumberOfCacheFilesNeeded()"); + unsigned int numberOfCachedModelFiles = 0; + for (auto& backend : m_Options.GetBackends()) + { + numberOfCachedModelFiles += GetNumberOfCacheFiles(backend); + } + cb(V1_0::ErrorStatus::NONE, numberOfCachedModelFiles, 1ul); return Void(); } @@ -250,26 +263,35 @@ public: } Return prepareModelFromCache( - const android::hardware::hidl_vec&, - const android::hardware::hidl_vec&, - const HidlToken&, - const android::sp& callback) + const android::hardware::hidl_vec& modelCacheHandle, + const android::hardware::hidl_vec& dataCacheHandle, + const HidlToken& token, + const android::sp& cb) { ALOGV("hal_1_3::ArmnnDriver::prepareModelFromCache()"); - callback->notify_1_2(V1_0::ErrorStatus::GENERAL_FAILURE, nullptr); - return V1_0::ErrorStatus::GENERAL_FAILURE; + return hal_1_2::ArmnnDriverImpl::prepareModelFromCache(m_Runtime, + m_Options, + modelCacheHandle, + dataCacheHandle, + token, + cb); } Return prepareModelFromCache_1_3( const V1_3::OptionalTimePoint&, - const android::hardware::hidl_vec&, - const android::hardware::hidl_vec&, - const HidlToken&, - const android::sp& callback) + const android::hardware::hidl_vec& modelCacheHandle, + const android::hardware::hidl_vec& dataCacheHandle, + const HidlToken& token, + const android::sp& cb) { - ALOGV("hal_1_3::ArmnnDriver::prepareModelFromCache()"); - callback->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); - return V1_3::ErrorStatus::GENERAL_FAILURE; + ALOGV("hal_1_3::ArmnnDriver::prepareModelFromCache_1_3()"); + + return ArmnnDriverImpl::prepareModelFromCache_1_3(m_Runtime, + m_Options, + modelCacheHandle, + dataCacheHandle, + token, + cb); } Return allocate(const V1_3::BufferDesc& /*desc*/, diff --git a/1.3/ArmnnDriverImpl.cpp b/1.3/ArmnnDriverImpl.cpp index 3ecd2f82..e1d65f92 100644 --- a/1.3/ArmnnDriverImpl.cpp +++ b/1.3/ArmnnDriverImpl.cpp @@ -8,8 +8,12 @@ #include "../ModelToINetworkConverter.hpp" #include "../SystemPropertiesUtils.hpp" +#include + #include +#include + namespace { const char *g_RelaxedFloat32toFloat16PerformanceExecTime = "ArmNN.relaxedFloat32toFloat16Performance.execTime"; @@ -100,6 +104,9 @@ Return ArmnnDriverImpl::prepareArmnnModel_1_3( const armnn::IGpuAccTunedParametersPtr& clTunedParameters, const DriverOptions& options, const V1_3::Model& model, + const android::hardware::hidl_vec& modelCacheHandle, + const android::hardware::hidl_vec& dataCacheHandle, + const HidlToken& token, const android::sp& cb, bool float32ToFloat16, V1_3::Priority priority) @@ -138,8 +145,13 @@ Return ArmnnDriverImpl::prepareArmnnModel_1_3( // Serialize the network graph to a .armnn file if an output directory // has been specified in the drivers' arguments. + std::vector dataCacheData; + bool serializeToFile = dataCacheHandle.size() < 1 ? false : true; auto serializedNetworkFileName = - SerializeNetwork(*modelConverter.GetINetwork(), options.GetRequestInputsAndOutputsDumpDir()); + SerializeNetwork(*modelConverter.GetINetwork(), + options.GetRequestInputsAndOutputsDumpDir(), + dataCacheData, + serializeToFile); // Optimize the network armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr); @@ -147,12 +159,42 @@ Return ArmnnDriverImpl::prepareArmnnModel_1_3( OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16; OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled(); + int cachedFd = -1; + bool saveCachedNetwork = options.SaveCachedNetwork(); + + unsigned int numberOfCachedModelFiles = 0; + if (modelCacheHandle.size() > 0) + { + unsigned int index = 0; + for (auto& backend : options.GetBackends()) + { + // modelCacheHandle size should be equal to numberOfCachedModelFiles + // modelCacheHandle vector should be in same order as backends + auto numberOfCacheFiles = GetNumberOfCacheFiles(backend); + if (numberOfCacheFiles > 0) + { + numberOfCachedModelFiles += numberOfCacheFiles; + if (modelCacheHandle[index]->numFds == 1) + { + // For GpuAcc numberOfCachedFiles is 1 + if (backend == armnn::Compute::GpuAcc) + { + cachedFd = modelCacheHandle[index]->data[0]; + saveCachedNetwork = true; + } + } + index += numberOfCachedModelFiles; + } + } + } + armnn::BackendOptions gpuAcc("GpuAcc", { { "FastMathEnabled", options.IsFastMathEnabled() }, - { "SaveCachedNetwork", options.SaveCachedNetwork() }, + { "SaveCachedNetwork", saveCachedNetwork }, { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() }, - { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() } + { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() }, + { "CachedFileDescriptor", cachedFd } }); armnn::BackendOptions cpuAcc("CpuAcc", @@ -203,7 +245,11 @@ Return ArmnnDriverImpl::prepareArmnnModel_1_3( std::string msg; armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(), MemorySource::Undefined, - MemorySource::Undefined); + MemorySource::Undefined, + options.IsGpuProfilingEnabled()); + + auto numInputs = getMainModel(model).inputIndexes.size(); + auto numOutputs = getMainModel(model).outputIndexes.size(); try { if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success) @@ -239,28 +285,345 @@ Return ArmnnDriverImpl::prepareArmnnModel_1_3( // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if // this is enabled) before the first 'real' inference which removes the overhead of the first inference. - if (!preparedModel->ExecuteWithDummyInputs()) + // Only run this if the GpuAcc backend has been added to options + if (std::find(options.GetBackends().begin(), + options.GetBackends().end(), + armnn::Compute::GpuAcc) != options.GetBackends().end()) { - return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb); + if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs)) + { + return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb); + } + + if (clTunedParameters && + options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters) + { + // Now that we've done one inference the CL kernel parameters will have been tuned, + // so save the updated file. + try + { + clTunedParameters->Save(options.GetClTunedParametersFile().c_str()); + } + catch (std::exception& error) + { + ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s", + options.GetClTunedParametersFile().c_str(), error.what()); + } + } + } + size_t hashValue = 0; + // Cache the model + if (dataCacheHandle.size() > 0) + { + // Cache the Arm NN model + if (dataCacheHandle.size() != 1) + { + NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release()); + return V1_3::ErrorStatus::NONE; + } + + if (dataCacheHandle[0]->numFds != 1) + { + ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, numFds != 1."); + NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release()); + return V1_3::ErrorStatus::NONE; + } + int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE; + if (dataCacheFileAccessMode != O_RDWR) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Invalid Access Mode."); + NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release()); + return V1_3::ErrorStatus::NONE; + } + + write(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size()); + hashValue = CacheDataHandlerInstance().Hash(dataCacheData); } - if (clTunedParameters && - options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters) + // Cache the model data + if (modelCacheHandle.size() > 0) { - // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file. - try + if (modelCacheHandle.size() != numberOfCachedModelFiles) { - clTunedParameters->Save(options.GetClTunedParametersFile().c_str()); + NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release()); + return V1_3::ErrorStatus::NONE; } - catch (std::exception& error) + + for (uint32_t i = 0; i < modelCacheHandle.size(); ++i) { - ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s", - options.GetClTunedParametersFile().c_str(), error.what()); + if (modelCacheHandle[i]->numFds == 1) + { + int modelCacheFileAccessMode = fcntl(modelCacheHandle[i]->data[0], F_GETFL) & O_ACCMODE; + if (modelCacheFileAccessMode != O_RDONLY) + { + struct stat statBuffer; + if (fstat(modelCacheHandle[i]->data[0], &statBuffer) == 0) + { + long modelDataSize = statBuffer.st_size; + if (modelDataSize > 0) + { + std::vector modelData(modelDataSize); + pread(modelCacheHandle[i]->data[0], modelData.data(), modelData.size(), 0); + hashValue ^= CacheDataHandlerInstance().Hash(modelData); + } + } + } + } } } + if (hashValue != 0) + { + CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size()); + } NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release()); + return V1_3::ErrorStatus::NONE; +} + +Return ArmnnDriverImpl::prepareModelFromCache_1_3( + const armnn::IRuntimePtr& runtime, + const DriverOptions& options, + const android::hardware::hidl_vec& modelCacheHandle, + const android::hardware::hidl_vec& dataCacheHandle, + const HidlToken& token, + const android::sp& cb) +{ + ALOGV("ArmnnDriverImpl::prepareModelFromCache_1_3()"); + if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN) + { + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + + if (cb.get() == nullptr) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid callback passed to prepareModelFromCache_1_3"); + return V1_3::ErrorStatus::INVALID_ARGUMENT; + } + + if (!runtime) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Device unavailable"); + return V1_3::ErrorStatus::DEVICE_UNAVAILABLE; + } + + // DataCacheHandle size should always be 1 + // Arm NN model + if (dataCacheHandle.size() != 1) + { + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + + // Check if model files cached they match the expected value + unsigned int numberOfCachedModelFiles = 0; + for (auto& backend : options.GetBackends()) + { + numberOfCachedModelFiles += GetNumberOfCacheFiles(backend); + } + if (modelCacheHandle.size() != numberOfCachedModelFiles) + { + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + + if (dataCacheHandle[0]->numFds != 1) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the cache data, numFds != 1."); + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + + int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE; + if (dataCacheFileAccessMode != O_RDWR) + { + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + + auto dataSize = CacheDataHandlerInstance().GetCacheSize(token); + if (dataSize == 0) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid data to deserialize!"); + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + + int offset = 0; + { + struct stat statBuffer; + if (fstat(dataCacheHandle[0]->data[0], &statBuffer) == 0) + { + unsigned long bufferSize = statBuffer.st_size; + if (bufferSize <= 0) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid data to deserialize!"); + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + if (bufferSize > dataSize) + { + offset = bufferSize - dataSize; + } + } + } + std::vector dataCacheData(dataSize); + pread(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size(), offset); + auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData); + + int gpuAccCachedFd = -1; + bool saveCachedNetwork = false; + if (modelCacheHandle.size() > 0) + { + unsigned int index = 0; + for (auto& backend : options.GetBackends()) + { + // modelCacheHandle size should be equal to numberOfCachedModelFiles + // modelCacheHandle vector should be in same order as backends + auto numberOfCacheFiles = GetNumberOfCacheFiles(backend); + if (numberOfCacheFiles > 0) + { + if (modelCacheHandle[index]->numFds != 1) + { + ALOGW( + "ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the model cache, numFds != 1."); + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + auto cachedFd = modelCacheHandle[index]->data[0]; + + int modelCacheFileAccessMode = fcntl(cachedFd, F_GETFL) & O_ACCMODE; + if (modelCacheFileAccessMode != O_RDWR) + { + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + + struct stat statBuffer; + if (cachedFd != -1 && fstat(cachedFd, &statBuffer) == 0) + { + long modelDataSize = statBuffer.st_size; + if (modelDataSize > 0) + { + std::vector modelData(modelDataSize); + pread(cachedFd, modelData.data(), modelData.size(), 0); + hashValue ^= CacheDataHandlerInstance().Hash(modelData); + + // For GpuAcc numberOfCachedFiles is 1 + if (backend == armnn::Compute::GpuAcc) + { + gpuAccCachedFd = cachedFd; + } + } + } + index += numberOfCacheFiles; + } + } + } + + if (!CacheDataHandlerInstance().Validate(token, hashValue)) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: ValidateHash() failed!"); + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + + // Deserialize the network.. + auto network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData); + + // Optimize the network + armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr); + armnn::OptimizerOptions OptOptions; + OptOptions.m_ReduceFp32ToFp16 = options.GetFp16Enabled(); + OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled(); + + armnn::BackendOptions gpuAcc("GpuAcc", + { + {"FastMathEnabled", options.IsFastMathEnabled()}, + {"SaveCachedNetwork", saveCachedNetwork}, + {"CachedNetworkFilePath", options.GetCachedNetworkFilePath()}, + {"MLGOTuningFilePath", options.GetClMLGOTunedParametersFile()}, + {"CachedFileDescriptor", gpuAccCachedFd} + }); + + armnn::BackendOptions cpuAcc("CpuAcc", + { + {"FastMathEnabled", options.IsFastMathEnabled()}, + {"NumberOfThreads", options.GetNumberOfThreads()} + }); + OptOptions.m_ModelOptions.push_back(gpuAcc); + OptOptions.m_ModelOptions.push_back(cpuAcc); + + std::vector errMessages; + try + { + optNet = armnn::Optimize(*network.get(), + options.GetBackends(), + runtime->GetDeviceSpec(), + OptOptions, + errMessages); + } + catch (std::exception& e) + { + std::stringstream message; + message << "Exception (" << e.what() << ") caught from optimize."; + FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb); + return V1_3::ErrorStatus::NONE; + } + + // Check that the optimized network is valid. + if (!optNet) + { + std::stringstream message; + message << "Invalid optimized network"; + for (const std::string& msg : errMessages) + { + message << "\n" << msg; + } + FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb); + return V1_3::ErrorStatus::NONE; + } + + // Export the optimized network graph to a dot file if an output dump directory + // has been specified in the drivers' arguments. + std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet, + options.GetRequestInputsAndOutputsDumpDir()); + + // Load it into the runtime. + armnn::NetworkId netId = 0; + std::string msg; + armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(), + MemorySource::Undefined, + MemorySource::Undefined, + options.IsGpuProfilingEnabled()); + + try + { + if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success) + { + return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, msg, cb); + } + } + catch (std::exception& e) + { + std::stringstream message; + message << "Exception (" << e.what() << ") caught from LoadNetwork."; + FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb); + return V1_3::ErrorStatus::NONE; + } + + std::unique_ptr> preparedModel( + new ArmnnPreparedModel_1_3(netId, + runtime.get(), + options.GetRequestInputsAndOutputsDumpDir(), + options.IsGpuProfilingEnabled(), + V1_3::Priority::MEDIUM, + options.isAsyncModelExecutionEnabled(), + options.getNoOfArmnnThreads(), + true)); + + NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release()); return V1_3::ErrorStatus::NONE; } diff --git a/1.3/ArmnnDriverImpl.hpp b/1.3/ArmnnDriverImpl.hpp index 3c094fe5..a482edac 100644 --- a/1.3/ArmnnDriverImpl.hpp +++ b/1.3/ArmnnDriverImpl.hpp @@ -7,6 +7,7 @@ #include +#include "../CacheDataHandler.hpp" #include "../DriverOptions.hpp" #include @@ -31,13 +32,27 @@ namespace hal_1_3 class ArmnnDriverImpl { public: - static Return prepareArmnnModel_1_3(const armnn::IRuntimePtr& runtime, - const armnn::IGpuAccTunedParametersPtr& clTunedParameters, - const DriverOptions& options, - const V1_3::Model& model, - const android::sp& cb, - bool float32ToFloat16 = false, - V1_3::Priority priority = V1_3::Priority::MEDIUM); + using HidlToken = android::hardware::hidl_array; + + static Return prepareArmnnModel_1_3( + const armnn::IRuntimePtr& runtime, + const armnn::IGpuAccTunedParametersPtr& clTunedParameters, + const DriverOptions& options, + const V1_3::Model& model, + const android::hardware::hidl_vec& modelCacheHandle, + const android::hardware::hidl_vec& dataCacheHandle, + const HidlToken& token, + const android::sp& cb, + bool float32ToFloat16 = false, + V1_3::Priority priority = V1_3::Priority::MEDIUM); + + static Return prepareModelFromCache_1_3( + const armnn::IRuntimePtr& runtime, + const DriverOptions& options, + const android::hardware::hidl_vec& modelCacheHandle, + const android::hardware::hidl_vec& dataCacheHandle, + const HidlToken& token, + const android::sp& cb); static Return getCapabilities_1_3(const armnn::IRuntimePtr& runtime, V1_3::IDevice::getCapabilities_1_3_cb cb); diff --git a/Android.mk b/Android.mk index 1d61930c..d0f4493a 100644 --- a/Android.mk +++ b/Android.mk @@ -469,6 +469,7 @@ LOCAL_SRC_FILES := \ ArmnnDriverImpl.cpp \ ArmnnPreparedModel.cpp \ ArmnnPreparedModel_1_2.cpp \ + CacheDataHandler.cpp \ ConversionUtils.cpp \ DriverOptions.cpp \ ModelToINetworkConverter.cpp \ @@ -599,6 +600,7 @@ LOCAL_SRC_FILES := \ ArmnnPreparedModel.cpp \ ArmnnPreparedModel_1_2.cpp \ ArmnnPreparedModel_1_3.cpp \ + CacheDataHandler.cpp \ ConversionUtils.cpp \ DriverOptions.cpp \ ModelToINetworkConverter.cpp \ diff --git a/ArmnnDriverImpl.cpp b/ArmnnDriverImpl.cpp index 78ef12fe..0b3b9191 100644 --- a/ArmnnDriverImpl.cpp +++ b/ArmnnDriverImpl.cpp @@ -102,8 +102,12 @@ Return ArmnnDriverImpl::prepareModel( // Serialize the network graph to a .armnn file if an output directory // has been specified in the drivers' arguments. + std::vector dataCacheData; auto serializedNetworkFileName = - SerializeNetwork(*modelConverter.GetINetwork(), options.GetRequestInputsAndOutputsDumpDir()); + SerializeNetwork(*modelConverter.GetINetwork(), + options.GetRequestInputsAndOutputsDumpDir(), + dataCacheData, + false); // Optimize the network armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr); diff --git a/ArmnnPreparedModel_1_2.cpp b/ArmnnPreparedModel_1_2.cpp index 2e378801..7cc75473 100644 --- a/ArmnnPreparedModel_1_2.cpp +++ b/ArmnnPreparedModel_1_2.cpp @@ -159,6 +159,47 @@ ArmnnPreparedModel_1_2::ArmnnPreparedModel_1_2(armnn::NetworkId netw , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir) , m_GpuProfilingEnabled(gpuProfilingEnabled) , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled) + , m_PreparedFromCache(false) +{ + // Enable profiling if required. + m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled); + + if (m_AsyncModelExecutionEnabled) + { + std::vector> memHandles; + for (unsigned int i=0; i < numberOfThreads; ++i) + { + memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId)); + } + + if (!m_Threadpool) + { + m_Threadpool = std::make_unique(numberOfThreads, runtime, memHandles); + } + else + { + m_Threadpool->LoadMemHandles(memHandles); + } + + m_WorkingMemHandle = memHandles.back(); + } +} + +template +ArmnnPreparedModel_1_2::ArmnnPreparedModel_1_2(armnn::NetworkId networkId, + armnn::IRuntime* runtime, + const std::string& requestInputsAndOutputsDumpDir, + const bool gpuProfilingEnabled, + const bool asyncModelExecutionEnabled, + const unsigned int numberOfThreads, + const bool preparedFromCache) + : m_NetworkId(networkId) + , m_Runtime(runtime) + , m_RequestCount(0) + , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir) + , m_GpuProfilingEnabled(gpuProfilingEnabled) + , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled) + , m_PreparedFromCache(preparedFromCache) { // Enable profiling if required. m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled); @@ -384,7 +425,10 @@ Return ArmnnPreparedModel_1_2::executeSynchronously(const V1_0 V1_2::MeasureTiming measureTiming, executeSynchronously_cb cb) { - ALOGV("ArmnnPreparedModel_1_2::executeSynchronously(): %s", GetModelSummary(m_Model).c_str()); + if (!m_PreparedFromCache) + { + ALOGV("ArmnnPreparedModel_1_2::executeSynchronously(): %s", GetModelSummary(m_Model).c_str()); + } m_RequestCount++; if (cb == nullptr) @@ -400,7 +444,7 @@ Return ArmnnPreparedModel_1_2::executeSynchronously(const V1_0 driverStart = Now(); } - if (!android::nn::validateRequest(request, m_Model)) + if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model)) { ALOGE("ArmnnPreparedModel_1_2::executeSynchronously invalid request model"); cb(V1_0::ErrorStatus::INVALID_ARGUMENT, {}, g_NoTiming); @@ -530,11 +574,11 @@ bool ArmnnPreparedModel_1_2::ExecuteGraph( } template -bool ArmnnPreparedModel_1_2::ExecuteWithDummyInputs() +bool ArmnnPreparedModel_1_2::ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs) { std::vector> storage; armnn::InputTensors inputTensors; - for (unsigned int i = 0; i < getMainModel(m_Model).inputIndexes.size(); i++) + for (unsigned int i = 0; i < numInputs; i++) { const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i); storage.emplace_back(inputTensorInfo.GetNumBytes()); @@ -544,7 +588,7 @@ bool ArmnnPreparedModel_1_2::ExecuteWithDummyInputs() } armnn::OutputTensors outputTensors; - for (unsigned int i = 0; i < getMainModel(m_Model).outputIndexes.size(); i++) + for (unsigned int i = 0; i < numOutputs; i++) { const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i); storage.emplace_back(outputTensorInfo.GetNumBytes()); @@ -576,10 +620,13 @@ Return ArmnnPreparedModel_1_2::Execute(const V1_ ctx.driverStart = Now(); } - ALOGV("ArmnnPreparedModel_1_2::execute(): %s", GetModelSummary(m_Model).c_str()); + if (!m_PreparedFromCache) + { + ALOGV("ArmnnPreparedModel_1_2::execute(): %s", GetModelSummary(m_Model).c_str()); + } m_RequestCount++; - if (!android::nn::validateRequest(request, m_Model)) + if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model)) { callback(V1_0::ErrorStatus::INVALID_ARGUMENT, {}, g_NoTiming, "ArmnnPreparedModel_1_2::execute"); return V1_0::ErrorStatus::INVALID_ARGUMENT; diff --git a/ArmnnPreparedModel_1_2.hpp b/ArmnnPreparedModel_1_2.hpp index c64c891e..255fc187 100644 --- a/ArmnnPreparedModel_1_2.hpp +++ b/ArmnnPreparedModel_1_2.hpp @@ -49,6 +49,14 @@ public: const bool asyncModelExecutionEnabled = false, const unsigned int numberOfThreads = 1); + ArmnnPreparedModel_1_2(armnn::NetworkId networkId, + armnn::IRuntime* runtime, + const std::string& requestInputsAndOutputsDumpDir, + const bool gpuProfilingEnabled, + const bool asyncModelExecutionEnabled = false, + const unsigned int numberOfThreads = 1, + const bool preparedFromCache = false); + virtual ~ArmnnPreparedModel_1_2(); virtual Return execute(const V1_0::Request& request, @@ -76,7 +84,7 @@ public: /// Executes this model with dummy inputs (e.g. all zeroes). /// \return false on failure, otherwise true - bool ExecuteWithDummyInputs(); + bool ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs); private: @@ -156,6 +164,7 @@ private: static std::unique_ptr m_Threadpool; std::shared_ptr m_WorkingMemHandle; const bool m_AsyncModelExecutionEnabled; + const bool m_PreparedFromCache; }; } diff --git a/ArmnnPreparedModel_1_3.cpp b/ArmnnPreparedModel_1_3.cpp index a5032360..e963d4e4 100644 --- a/ArmnnPreparedModel_1_3.cpp +++ b/ArmnnPreparedModel_1_3.cpp @@ -181,6 +181,49 @@ ArmnnPreparedModel_1_3::ArmnnPreparedModel_1_3(armnn::NetworkId netw , m_GpuProfilingEnabled(gpuProfilingEnabled) , m_ModelPriority(priority) , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled) + , m_PreparedFromCache(false) +{ + // Enable profiling if required. + m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled); + + if (m_AsyncModelExecutionEnabled) + { + std::vector> memHandles; + for (unsigned int i=0; i < numberOfThreads; ++i) + { + memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId)); + } + + if (!m_Threadpool) + { + m_Threadpool = std::make_unique(numberOfThreads, runtime, memHandles); + } + else + { + m_Threadpool->LoadMemHandles(memHandles); + } + + m_WorkingMemHandle = memHandles.back(); + } +} + +template +ArmnnPreparedModel_1_3::ArmnnPreparedModel_1_3(armnn::NetworkId networkId, + armnn::IRuntime* runtime, + const std::string& requestInputsAndOutputsDumpDir, + const bool gpuProfilingEnabled, + V1_3::Priority priority, + const bool asyncModelExecutionEnabled, + const unsigned int numberOfThreads, + const bool preparedFromCache) + : m_NetworkId(networkId) + , m_Runtime(runtime) + , m_RequestCount(0) + , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir) + , m_GpuProfilingEnabled(gpuProfilingEnabled) + , m_ModelPriority(priority) + , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled) + , m_PreparedFromCache(preparedFromCache) { // Enable profiling if required. m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled); @@ -343,7 +386,7 @@ Return ArmnnPreparedModel_1_3::executeFenced(const V1_3::Reque ALOGW("ArmnnPreparedModel_1_3::executeFenced parameter loopTimeoutDuration is set but not supported."); } - if (!android::nn::validateRequest(request, m_Model, /*allowUnspecifiedOutput=*/false)) + if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model, /*allowUnspecifiedOutput=*/false)) { ALOGV("ArmnnPreparedModel_1_3::executeFenced outputs must be specified for fenced execution "); cb(V1_3::ErrorStatus::INVALID_ARGUMENT, hidl_handle(nullptr), nullptr); @@ -357,7 +400,10 @@ Return ArmnnPreparedModel_1_3::executeFenced(const V1_3::Reque ctx.driverStart = Now(); } - ALOGV("ArmnnPreparedModel_1_3::executeFenced(): %s", GetModelSummary(m_Model).c_str()); + if (!m_PreparedFromCache) + { + ALOGV("ArmnnPreparedModel_1_3::executeFenced(): %s", GetModelSummary(m_Model).c_str()); + } m_RequestCount++; if (!m_RequestInputsAndOutputsDumpDir.empty()) @@ -587,7 +633,7 @@ Return ArmnnPreparedModel_1_3::ExecuteSynchronously(const V1_3 cbCtx.ctx.driverStart = Now(); } - if (!android::nn::validateRequest(convertToV1_3(request), m_Model)) + if (!m_PreparedFromCache && !android::nn::validateRequest(convertToV1_3(request), m_Model)) { ALOGE("ArmnnPreparedModel_1_3::ExecuteSynchronously invalid request model"); cbCtx.callback(V1_3::ErrorStatus::INVALID_ARGUMENT, @@ -597,7 +643,7 @@ Return ArmnnPreparedModel_1_3::ExecuteSynchronously(const V1_3 return Void(); } - if (!android::nn::validateRequest(request, m_Model)) + if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model)) { ALOGE("ArmnnPreparedModel_1_3::ExecuteSynchronously invalid request model"); cbCtx.callback(V1_3::ErrorStatus::INVALID_ARGUMENT, @@ -634,7 +680,10 @@ Return ArmnnPreparedModel_1_3::executeSynchronously(const V1_0 V1_2::MeasureTiming measureTiming, executeSynchronously_cb cb) { - ALOGV("ArmnnPreparedModel_1_3::executeSynchronously(): %s", GetModelSummary(m_Model).c_str()); + if (!m_PreparedFromCache) + { + ALOGV("ArmnnPreparedModel_1_3::executeSynchronously(): %s", GetModelSummary(m_Model).c_str()); + } m_RequestCount++; if (cb == nullptr) @@ -667,7 +716,10 @@ Return ArmnnPreparedModel_1_3::executeSynchronously_1_3( const V1_3::OptionalTimeoutDuration& loopTimeoutDuration, executeSynchronously_1_3_cb cb) { - ALOGV("ArmnnPreparedModel_1_3::executeSynchronously_1_3(): %s", GetModelSummary(m_Model).c_str()); + if (!m_PreparedFromCache) + { + ALOGV("ArmnnPreparedModel_1_3::executeSynchronously_1_3(): %s", GetModelSummary(m_Model).c_str()); + } m_RequestCount++; if (cb == nullptr) @@ -854,11 +906,11 @@ void ArmnnPreparedModel_1_3::ScheduleGraphForExecution( } template -bool ArmnnPreparedModel_1_3::ExecuteWithDummyInputs() +bool ArmnnPreparedModel_1_3::ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs) { std::vector> storage; armnn::InputTensors inputTensors; - for (unsigned int i = 0; i < getMainModel(m_Model).inputIndexes.size(); i++) + for (unsigned int i = 0; i < numInputs; i++) { const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i); storage.emplace_back(inputTensorInfo.GetNumBytes()); @@ -868,7 +920,7 @@ bool ArmnnPreparedModel_1_3::ExecuteWithDummyInputs() } armnn::OutputTensors outputTensors; - for (unsigned int i = 0; i < getMainModel(m_Model).outputIndexes.size(); i++) + for (unsigned int i = 0; i < numOutputs; i++) { const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i); storage.emplace_back(outputTensorInfo.GetNumBytes()); @@ -902,10 +954,13 @@ Return ArmnnPreparedModel_1_3::Execute(const V1_ ctx.driverStart = Now(); } - ALOGV("ArmnnPreparedModel_1_3::execute(): %s", GetModelSummary(m_Model).c_str()); + if (!m_PreparedFromCache) + { + ALOGV("ArmnnPreparedModel_1_3::execute(): %s", GetModelSummary(m_Model).c_str()); + } m_RequestCount++; - if (!android::nn::validateRequest(request, m_Model)) + if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model)) { callback(V1_3::ErrorStatus::INVALID_ARGUMENT, {}, g_NoTiming, "ArmnnPreparedModel_1_3::execute"); return V1_3::ErrorStatus::INVALID_ARGUMENT; diff --git a/ArmnnPreparedModel_1_3.hpp b/ArmnnPreparedModel_1_3.hpp index a245cc4c..cd5fc0ed 100644 --- a/ArmnnPreparedModel_1_3.hpp +++ b/ArmnnPreparedModel_1_3.hpp @@ -57,6 +57,15 @@ public: const bool asyncModelExecutionEnabled = false, const unsigned int numberOfThreads = 1); + ArmnnPreparedModel_1_3(armnn::NetworkId networkId, + armnn::IRuntime* runtime, + const std::string& requestInputsAndOutputsDumpDir, + const bool gpuProfilingEnabled, + V1_3::Priority priority = V1_3::Priority::MEDIUM, + const bool asyncModelExecutionEnabled = false, + const unsigned int numberOfThreads = 1, + const bool preparedFromCache = false); + virtual ~ArmnnPreparedModel_1_3(); Return execute(const V1_0::Request& request, @@ -108,7 +117,7 @@ public: /// Executes this model with dummy inputs (e.g. all zeroes). /// \return false on failure, otherwise true - bool ExecuteWithDummyInputs(); + bool ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs); V1_3::Priority GetModelPriority(); @@ -192,6 +201,7 @@ private: static std::unique_ptr m_Threadpool; std::shared_ptr m_WorkingMemHandle; const bool m_AsyncModelExecutionEnabled; + const bool m_PreparedFromCache; }; } diff --git a/CacheDataHandler.cpp b/CacheDataHandler.cpp new file mode 100644 index 00000000..36881629 --- /dev/null +++ b/CacheDataHandler.cpp @@ -0,0 +1,63 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "CacheDataHandler.hpp" + +#include + +namespace armnn_driver +{ + +CacheDataHandler& CacheDataHandlerInstance() +{ + static CacheDataHandler instance; + return instance; +} + +void CacheDataHandler::Register(const HidlToken token, const size_t hashValue, const size_t cacheSize) +{ + if (m_CacheDataMap.find(hashValue) != m_CacheDataMap.end()) + { + ALOGV("CacheHandler::Register() Token has been already registered."); + return; + } + CacheHandle cacheHandle(token, cacheSize); + m_CacheDataMap.insert({hashValue, cacheHandle}); +} + +bool CacheDataHandler::Validate(const HidlToken token, const size_t hashValue) const +{ + return (m_CacheDataMap.find(hashValue) != m_CacheDataMap.end() + && m_CacheDataMap.at(hashValue).GetToken() == token); +} + +size_t CacheDataHandler::Hash(std::vector& cacheData) +{ + std::size_t hash = cacheData.size(); + for (auto& i : cacheData) + { + hash ^= std::hash{}(i); + } + return hash; +} + +size_t CacheDataHandler::GetCacheSize(HidlToken token) +{ + for (auto i = m_CacheDataMap.begin(); i != m_CacheDataMap.end(); ++i) + { + if (i->second.GetToken() == token) + { + return i->second.GetCacheSize(); + } + } + return 0; +} + +void CacheDataHandler::Clear() +{ + m_CacheDataMap.clear(); +} + +} // armnn_driver diff --git a/CacheDataHandler.hpp b/CacheDataHandler.hpp new file mode 100644 index 00000000..cea73d20 --- /dev/null +++ b/CacheDataHandler.hpp @@ -0,0 +1,68 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include +#include + +#include + +namespace armnn_driver +{ + +using HidlToken = android::hardware::hidl_array; + +class CacheHandle +{ +public: + CacheHandle(const HidlToken token, const size_t cacheSize) + : m_HidlToken(token), m_CacheSize(cacheSize) {} + + ~CacheHandle() {}; + + HidlToken GetToken() const + { + return m_HidlToken; + } + + size_t GetCacheSize() const + { + return m_CacheSize; + } + +private: + const HidlToken m_HidlToken; + const size_t m_CacheSize; +}; + +class CacheDataHandler +{ +public: + CacheDataHandler() {} + ~CacheDataHandler() {} + + void Register(const HidlToken token, const size_t hashValue, const size_t cacheSize); + + bool Validate(const HidlToken token, const size_t hashValue) const; + + size_t Hash(std::vector& cacheData); + + size_t GetCacheSize(HidlToken token); + + void Clear(); + +private: + CacheDataHandler(const CacheDataHandler&) = delete; + CacheDataHandler& operator=(const CacheDataHandler&) = delete; + + std::unordered_map m_CacheDataMap; +}; + +CacheDataHandler& CacheDataHandlerInstance(); + +} // armnn_driver diff --git a/Utils.cpp b/Utils.cpp index 9b52f5eb..f910cd49 100644 --- a/Utils.cpp +++ b/Utils.cpp @@ -554,37 +554,59 @@ std::string ExportNetworkGraphToDotFile(const armnn::IOptimizedNetwork& optimize return fileName; } -std::string SerializeNetwork(const armnn::INetwork& network, const std::string& dumpDir) +std::string SerializeNetwork(const armnn::INetwork& network, + const std::string& dumpDir, + std::vector& dataCacheData, + bool dataCachingActive) { std::string fileName; - // The dump directory must exist in advance. + bool bSerializeToFile = true; if (dumpDir.empty()) { - return fileName; + bSerializeToFile = false; } - - std::string timestamp = GetFileTimestamp(); - if (timestamp.empty()) + else + { + std::string timestamp = GetFileTimestamp(); + if (timestamp.empty()) + { + bSerializeToFile = false; + } + } + if (!bSerializeToFile && !dataCachingActive) { return fileName; } auto serializer(armnnSerializer::ISerializer::Create()); - // Serialize the Network serializer->Serialize(network); + if (dataCachingActive) + { + std::stringstream stream; + auto serialized = serializer->SaveSerializedToStream(stream); + if (serialized) + { + std::string const serializedString{stream.str()}; + std::copy(serializedString.begin(), serializedString.end(), std::back_inserter(dataCacheData)); + } + } - // Set the name of the output .armnn file. - fs::path dumpPath = dumpDir; - fs::path tempFilePath = dumpPath / (timestamp + "_network.armnn"); - fileName = tempFilePath.string(); - - // Save serialized network to a file - std::ofstream serializedFile(fileName, std::ios::out | std::ios::binary); - bool serialized = serializer->SaveSerializedToStream(serializedFile); - if (!serialized) + if (bSerializeToFile) { - ALOGW("An error occurred when serializing to file %s", fileName.c_str()); + // Set the name of the output .armnn file. + fs::path dumpPath = dumpDir; + std::string timestamp = GetFileTimestamp(); + fs::path tempFilePath = dumpPath / (timestamp + "_network.armnn"); + fileName = tempFilePath.string(); + + // Save serialized network to a file + std::ofstream serializedFile(fileName, std::ios::out | std::ios::binary); + auto serialized = serializer->SaveSerializedToStream(serializedFile); + if (!serialized) + { + ALOGW("An error occurred when serializing to file %s", fileName.c_str()); + } } return fileName; } diff --git a/Utils.hpp b/Utils.hpp index da101535..9bd28ba6 100644 --- a/Utils.hpp +++ b/Utils.hpp @@ -139,7 +139,10 @@ void DumpJsonProfilingIfRequired(bool gpuProfilingEnabled, std::string ExportNetworkGraphToDotFile(const armnn::IOptimizedNetwork& optimizedNetwork, const std::string& dumpDir); -std::string SerializeNetwork(const armnn::INetwork& network, const std::string& dumpDir); +std::string SerializeNetwork(const armnn::INetwork& network, + const std::string& dumpDir, + std::vector& dataCacheData, + bool dataCachingActive = true); void RenameExportedFiles(const std::string& existingSerializedFileName, const std::string& existingDotFileName, -- cgit v1.2.1