diff options
Diffstat (limited to '1.3/ArmnnDriverImpl.cpp')
-rw-r--r-- | 1.3/ArmnnDriverImpl.cpp | 452 |
1 files changed, 432 insertions, 20 deletions
diff --git a/1.3/ArmnnDriverImpl.cpp b/1.3/ArmnnDriverImpl.cpp index 6d8fbe64..ec176d59 100644 --- a/1.3/ArmnnDriverImpl.cpp +++ b/1.3/ArmnnDriverImpl.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2020 Arm Ltd. All rights reserved. +// Copyright © 2020, 2023 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // @@ -8,8 +8,13 @@ #include "../ModelToINetworkConverter.hpp" #include "../SystemPropertiesUtils.hpp" +#include <armnnDeserializer/IDeserializer.hpp> + #include <log/log.h> +#include <sys/stat.h> +#include <chrono> + namespace { const char *g_RelaxedFloat32toFloat16PerformanceExecTime = "ArmNN.relaxedFloat32toFloat16Performance.execTime"; @@ -100,12 +105,17 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3( const armnn::IGpuAccTunedParametersPtr& clTunedParameters, const DriverOptions& options, const V1_3::Model& model, + const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle, + const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle, + const HidlToken& token, const android::sp<V1_3::IPreparedModelCallback>& cb, bool float32ToFloat16, V1_3::Priority priority) { ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_3()"); + std::chrono::time_point<std::chrono::system_clock> prepareModelTimepoint = std::chrono::system_clock::now(); + if (cb.get() == nullptr) { ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel"); @@ -138,20 +148,56 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3( // Serialize the network graph to a .armnn file if an output directory // has been specified in the drivers' arguments. + std::vector<uint8_t> dataCacheData; + bool serializeToFile = dataCacheHandle.size() < 1 ? false : true; auto serializedNetworkFileName = - SerializeNetwork(*modelConverter.GetINetwork(), options.GetRequestInputsAndOutputsDumpDir()); + SerializeNetwork(*modelConverter.GetINetwork(), + options.GetRequestInputsAndOutputsDumpDir(), + dataCacheData, + serializeToFile); // Optimize the network armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr); - armnn::OptimizerOptions OptOptions; - OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16; + armnn::OptimizerOptionsOpaque OptOptions; + OptOptions.SetReduceFp32ToFp16(float32ToFloat16); + OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled()); + + int cachedFd = -1; + bool saveCachedNetwork = options.SaveCachedNetwork(); + + unsigned int numberOfCachedModelFiles = 0; + if (modelCacheHandle.size() > 0) + { + unsigned int index = 0; + for (auto& backend : options.GetBackends()) + { + // modelCacheHandle size should be equal to numberOfCachedModelFiles + // modelCacheHandle vector should be in same order as backends + auto numberOfCacheFiles = GetNumberOfCacheFiles(backend); + if (numberOfCacheFiles > 0) + { + numberOfCachedModelFiles += numberOfCacheFiles; + if (modelCacheHandle[index]->numFds == 1) + { + // For GpuAcc numberOfCachedFiles is 1 + if (backend == armnn::Compute::GpuAcc) + { + cachedFd = modelCacheHandle[index]->data[0]; + saveCachedNetwork = true; + } + } + index += numberOfCachedModelFiles; + } + } + } armnn::BackendOptions gpuAcc("GpuAcc", { { "FastMathEnabled", options.IsFastMathEnabled() }, - { "SaveCachedNetwork", options.SaveCachedNetwork() }, + { "SaveCachedNetwork", saveCachedNetwork }, { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() }, - { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() } + { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() }, + { "CachedFileDescriptor", cachedFd } }); armnn::BackendOptions cpuAcc("CpuAcc", @@ -159,8 +205,8 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3( { "FastMathEnabled", options.IsFastMathEnabled() }, { "NumberOfThreads", options.GetNumberOfThreads() } }); - OptOptions.m_ModelOptions.push_back(gpuAcc); - OptOptions.m_ModelOptions.push_back(cpuAcc); + OptOptions.AddModelOption(gpuAcc); + OptOptions.AddModelOption(cpuAcc); std::vector<std::string> errMessages; try @@ -199,9 +245,17 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3( // Load it into the runtime. armnn::NetworkId netId = 0; + std::string msg; + armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(), + MemorySource::Undefined, + MemorySource::Undefined, + options.IsGpuProfilingEnabled()); + + auto numInputs = getMainModel(model).inputIndexes.size(); + auto numOutputs = getMainModel(model).outputIndexes.size(); try { - if (runtime->LoadNetwork(netId, move(optNet)) != armnn::Status::Success) + if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success) { return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb); } @@ -228,32 +282,390 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3( model, options.GetRequestInputsAndOutputsDumpDir(), options.IsGpuProfilingEnabled(), - priority)); + priority, + options.isAsyncModelExecutionEnabled(), + options.getNoOfArmnnThreads(), + options.isImportEnabled(), + options.isExportEnabled())); // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if // this is enabled) before the first 'real' inference which removes the overhead of the first inference. - if (!preparedModel->ExecuteWithDummyInputs()) + // Only run this if the GpuAcc backend has been added to options + if (std::find(options.GetBackends().begin(), + options.GetBackends().end(), + armnn::Compute::GpuAcc) != options.GetBackends().end()) + { + if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs)) + { + return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb); + } + + if (clTunedParameters && + options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters) + { + // Now that we've done one inference the CL kernel parameters will have been tuned, + // so save the updated file. + try + { + clTunedParameters->Save(options.GetClTunedParametersFile().c_str()); + } + catch (std::exception& error) + { + ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s", + options.GetClTunedParametersFile().c_str(), error.what()); + } + } + } + size_t hashValue = 0; + // Cache the model + if (dataCacheHandle.size() > 0) + { + // Cache the Arm NN model + if (dataCacheHandle.size() != 1) + { + NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release()); + return V1_3::ErrorStatus::NONE; + } + + if (dataCacheHandle[0]->numFds != 1) + { + ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, numFds != 1."); + NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release()); + return V1_3::ErrorStatus::NONE; + } + + if (dataCacheHandle[0]->data[0] < 0) + { + ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, fd < 0"); + NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release()); + return V1_3::ErrorStatus::NONE; + } + + int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE; + if (dataCacheFileAccessMode != O_RDWR) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Invalid Access Mode."); + NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release()); + return V1_3::ErrorStatus::NONE; + } + + write(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size()); + hashValue = CacheDataHandlerInstance().Hash(dataCacheData); + } + + // Cache the model data + if (modelCacheHandle.size() > 0) + { + if (modelCacheHandle.size() != numberOfCachedModelFiles) + { + NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release()); + return V1_3::ErrorStatus::NONE; + } + + for (uint32_t i = 0; i < modelCacheHandle.size(); ++i) + { + if (modelCacheHandle[i]->numFds == 1) + { + int modelCacheFileAccessMode = fcntl(modelCacheHandle[i]->data[0], F_GETFL) & O_ACCMODE; + if (modelCacheFileAccessMode != O_RDONLY) + { + struct stat statBuffer; + if (fstat(modelCacheHandle[i]->data[0], &statBuffer) == 0) + { + long modelDataSize = statBuffer.st_size; + if (modelDataSize > 0) + { + std::vector<uint8_t> modelData(modelDataSize); + pread(modelCacheHandle[i]->data[0], modelData.data(), modelData.size(), 0); + hashValue ^= CacheDataHandlerInstance().Hash(modelData); + } + } + } + } + } + } + if (hashValue != 0) + { + CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size()); + } + + NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release()); + + ALOGV("ArmnnDriverImpl::prepareModel cache timing = %lld µs", std::chrono::duration_cast<std::chrono::microseconds> + (std::chrono::system_clock::now() - prepareModelTimepoint).count()); + + + return V1_3::ErrorStatus::NONE; +} + +Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareModelFromCache_1_3( + const armnn::IRuntimePtr& runtime, + const DriverOptions& options, + const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle, + const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle, + const HidlToken& token, + const android::sp<V1_3::IPreparedModelCallback>& cb) +{ + ALOGV("ArmnnDriverImpl::prepareModelFromCache_1_3()"); + std::chrono::time_point<std::chrono::system_clock> modelFromCacheTimepoint = std::chrono::system_clock::now(); + + if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN) + { + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + + if (cb.get() == nullptr) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid callback passed to prepareModelFromCache_1_3"); + return V1_3::ErrorStatus::INVALID_ARGUMENT; + } + + if (!runtime) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Device unavailable"); + return V1_3::ErrorStatus::DEVICE_UNAVAILABLE; + } + + // DataCacheHandle size should always be 1 + // Arm NN model + if (dataCacheHandle.size() != 1) + { + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + + // Check if model files cached they match the expected value + unsigned int numberOfCachedModelFiles = 0; + for (auto& backend : options.GetBackends()) + { + numberOfCachedModelFiles += GetNumberOfCacheFiles(backend); + } + if (modelCacheHandle.size() != numberOfCachedModelFiles) + { + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + + if (dataCacheHandle[0]->numFds != 1) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the cache data, numFds != 1."); + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + + if (dataCacheHandle[0]->data[0] < 0) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the cache data, fd < 0"); + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + + int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE; + if (dataCacheFileAccessMode != O_RDWR) { - return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb); + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::GENERAL_FAILURE; } - if (clTunedParameters && - options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters) + auto dataSize = CacheDataHandlerInstance().GetCacheSize(token); + if (dataSize == 0) { - // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file. - try + ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid data to deserialize!"); + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + + int offset = 0; + { + struct stat statBuffer; + if (fstat(dataCacheHandle[0]->data[0], &statBuffer) == 0) + { + unsigned long bufferSize = statBuffer.st_size; + if (bufferSize != dataSize) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid data to deserialize!"); + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + } + } + std::vector<uint8_t> dataCacheData(dataSize); + pread(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size(), offset); + auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData); + + int gpuAccCachedFd = -1; + bool saveCachedNetwork = false; + if (modelCacheHandle.size() > 0) + { + unsigned int index = 0; + for (auto& backend : options.GetBackends()) + { + // modelCacheHandle size should be equal to numberOfCachedModelFiles + // modelCacheHandle vector should be in same order as backends + auto numberOfCacheFiles = GetNumberOfCacheFiles(backend); + if (numberOfCacheFiles > 0) + { + if (modelCacheHandle[index]->numFds != 1) + { + ALOGW( + "ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the model cache, numFds != 1."); + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + auto cachedFd = modelCacheHandle[index]->data[0]; + + int modelCacheFileAccessMode = fcntl(cachedFd, F_GETFL) & O_ACCMODE; + if (modelCacheFileAccessMode != O_RDWR) + { + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + + struct stat statBuffer; + if (cachedFd != -1 && fstat(cachedFd, &statBuffer) == 0) + { + long modelDataSize = statBuffer.st_size; + if (modelDataSize <= 0) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Wrong cached model size!"); + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::NONE; + } + std::vector<uint8_t> modelData(modelDataSize); + pread(cachedFd, modelData.data(), modelData.size(), 0); + hashValue ^= CacheDataHandlerInstance().Hash(modelData); + + // For GpuAcc numberOfCachedFiles is 1 + if (backend == armnn::Compute::GpuAcc) + { + gpuAccCachedFd = cachedFd; + } + } + index += numberOfCacheFiles; + } + } + } + + if (!CacheDataHandlerInstance().Validate(token, hashValue, dataCacheData.size())) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: ValidateHash() failed!"); + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + + // Deserialize the network.. + armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){}); + try + { + network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData); + } + catch (std::exception&) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Exception caught from Deserializer!"); + cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); + return V1_3::ErrorStatus::GENERAL_FAILURE; + } + + // Optimize the network + armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr); + armnn::OptimizerOptionsOpaque OptOptions; + OptOptions.SetReduceFp32ToFp16(options.GetFp16Enabled()); + OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled()); + + armnn::BackendOptions gpuAcc("GpuAcc", + { + {"FastMathEnabled", options.IsFastMathEnabled()}, + {"SaveCachedNetwork", saveCachedNetwork}, + {"CachedNetworkFilePath", options.GetCachedNetworkFilePath()}, + {"MLGOTuningFilePath", options.GetClMLGOTunedParametersFile()}, + {"CachedFileDescriptor", gpuAccCachedFd} + }); + + armnn::BackendOptions cpuAcc("CpuAcc", + { + {"FastMathEnabled", options.IsFastMathEnabled()}, + {"NumberOfThreads", options.GetNumberOfThreads()} + }); + OptOptions.AddModelOption(gpuAcc); + OptOptions.AddModelOption(cpuAcc); + + std::vector<std::string> errMessages; + try + { + optNet = armnn::Optimize(*network.get(), + options.GetBackends(), + runtime->GetDeviceSpec(), + OptOptions, + errMessages); + } + catch (std::exception& e) + { + std::stringstream message; + message << "Exception (" << e.what() << ") caught from optimize."; + FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb); + return V1_3::ErrorStatus::NONE; + } + + // Check that the optimized network is valid. + if (!optNet) + { + std::stringstream message; + message << "Invalid optimized network"; + for (const std::string& msg : errMessages) { - clTunedParameters->Save(options.GetClTunedParametersFile().c_str()); + message << "\n" << msg; } - catch (std::exception& error) + FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb); + return V1_3::ErrorStatus::NONE; + } + + // Export the optimized network graph to a dot file if an output dump directory + // has been specified in the drivers' arguments. + std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet, + options.GetRequestInputsAndOutputsDumpDir()); + + // Load it into the runtime. + armnn::NetworkId netId = 0; + std::string msg; + armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(), + MemorySource::Undefined, + MemorySource::Undefined, + options.IsGpuProfilingEnabled()); + + try + { + if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success) { - ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s", - options.GetClTunedParametersFile().c_str(), error.what()); + return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, msg, cb); } } + catch (std::exception& e) + { + std::stringstream message; + message << "Exception (" << e.what() << ") caught from LoadNetwork."; + FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb); + return V1_3::ErrorStatus::NONE; + } + + std::unique_ptr<ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>> preparedModel( + new ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>(netId, + runtime.get(), + options.GetRequestInputsAndOutputsDumpDir(), + options.IsGpuProfilingEnabled(), + V1_3::Priority::MEDIUM, + options.isAsyncModelExecutionEnabled(), + options.getNoOfArmnnThreads(), + options.isImportEnabled(), + options.isExportEnabled(), + true)); NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release()); + ALOGV("ArmnnDriverImpl::prepareModelFromCache timing = %lld µs", + std::chrono::duration_cast<std::chrono::microseconds> + (std::chrono::system_clock::now() - modelFromCacheTimepoint).count()); + return V1_3::ErrorStatus::NONE; } |