diff options
Diffstat (limited to '1.2')
-rw-r--r-- | 1.2/ArmnnDriver.hpp | 44 | ||||
-rw-r--r-- | 1.2/ArmnnDriverImpl.cpp | 450 | ||||
-rw-r--r-- | 1.2/ArmnnDriverImpl.hpp | 30 | ||||
-rw-r--r-- | 1.2/HalPolicy.cpp | 145 | ||||
-rw-r--r-- | 1.2/HalPolicy.hpp | 35 |
5 files changed, 602 insertions, 102 deletions
diff --git a/1.2/ArmnnDriver.hpp b/1.2/ArmnnDriver.hpp index 5227272f..c855b527 100644 --- a/1.2/ArmnnDriver.hpp +++ b/1.2/ArmnnDriver.hpp @@ -19,6 +19,8 @@ #include "../1.0/ArmnnDriverImpl.hpp" #include "../1.0/HalPolicy.hpp" +#include <armnn/BackendHelper.hpp> + #include <log/log.h> namespace armnn_driver @@ -129,26 +131,32 @@ public: Return<void> getType(getType_cb cb) { ALOGV("hal_1_2::ArmnnDriver::getType()"); - - cb(V1_0::ErrorStatus::NONE, V1_2::DeviceType::CPU); + const auto device_type = hal_1_2::HalPolicy::GetDeviceTypeFromOptions(this->m_Options); + cb(V1_0::ErrorStatus::NONE, device_type); return Void(); } Return<V1_0::ErrorStatus> prepareModelFromCache( - const android::hardware::hidl_vec<android::hardware::hidl_handle>&, - const android::hardware::hidl_vec<android::hardware::hidl_handle>&, - const HidlToken&, - const android::sp<V1_2::IPreparedModelCallback>& callback) + const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle, + const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle, + const HidlToken& token, + const android::sp<V1_2::IPreparedModelCallback>& cb) { ALOGV("hal_1_2::ArmnnDriver::prepareModelFromCache()"); - callback->notify_1_2(V1_0::ErrorStatus::GENERAL_FAILURE, nullptr); - return V1_0::ErrorStatus::GENERAL_FAILURE; + return ArmnnDriverImpl::prepareModelFromCache(m_Runtime, + m_Options, + modelCacheHandle, + dataCacheHandle, + token, + cb); } - Return<V1_0::ErrorStatus> prepareModel_1_2(const V1_2::Model& model, V1_1::ExecutionPreference preference, - const android::hardware::hidl_vec<android::hardware::hidl_handle>&, - const android::hardware::hidl_vec<android::hardware::hidl_handle>&, const HidlToken&, - const android::sp<V1_2::IPreparedModelCallback>& cb) + Return<V1_0::ErrorStatus> prepareModel_1_2( + const V1_2::Model& model, V1_1::ExecutionPreference preference, + const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle, + const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle, + const HidlToken& token, + const android::sp<V1_2::IPreparedModelCallback>& cb) { ALOGV("hal_1_2::ArmnnDriver::prepareModel_1_2()"); @@ -165,6 +173,9 @@ public: m_ClTunedParameters, m_Options, model, + modelCacheHandle, + dataCacheHandle, + token, cb, model.relaxComputationFloat32toFloat16 && m_Options.GetFp16Enabled()); @@ -198,9 +209,12 @@ public: Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb) { ALOGV("hal_1_2::ArmnnDriver::getSupportedExtensions()"); - - // Set both numbers to be 0 for cache not supported. - cb(V1_0::ErrorStatus::NONE, 0, 0); + unsigned int numberOfCachedModelFiles = 0; + for (auto& backend : m_Options.GetBackends()) + { + numberOfCachedModelFiles += GetNumberOfCacheFiles(backend); + } + cb(V1_0::ErrorStatus::NONE, numberOfCachedModelFiles, 1ul); return Void(); } }; diff --git a/1.2/ArmnnDriverImpl.cpp b/1.2/ArmnnDriverImpl.cpp index ccf82d0e..f0a426fa 100644 --- a/1.2/ArmnnDriverImpl.cpp +++ b/1.2/ArmnnDriverImpl.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017, 2023 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // @@ -8,7 +8,11 @@ #include "../ModelToINetworkConverter.hpp" #include "../SystemPropertiesUtils.hpp" +#include <armnnDeserializer/IDeserializer.hpp> + #include <log/log.h> +#include <sys/stat.h> +#include <chrono> namespace { @@ -90,11 +94,16 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2( const armnn::IGpuAccTunedParametersPtr& clTunedParameters, const DriverOptions& options, const V1_2::Model& model, + const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle, + const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle, + const HidlToken& token, const android::sp<V1_2::IPreparedModelCallback>& cb, bool float32ToFloat16) { ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_2()"); + std::chrono::time_point<std::chrono::system_clock> prepareModelTimepoint = std::chrono::system_clock::now(); + if (cb.get() == nullptr) { ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel"); @@ -127,20 +136,55 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2( // Serialize the network graph to a .armnn file if an output directory // has been specified in the drivers' arguments. + std::vector<uint8_t> dataCacheData; + bool serializeToFile = dataCacheHandle.size() < 1 ? false : true; auto serializedNetworkFileName = - SerializeNetwork(*modelConverter.GetINetwork(), options.GetRequestInputsAndOutputsDumpDir()); + SerializeNetwork(*modelConverter.GetINetwork(), + options.GetRequestInputsAndOutputsDumpDir(), + dataCacheData, + serializeToFile); // Optimize the network armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr); - armnn::OptimizerOptions OptOptions; - OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16; + armnn::OptimizerOptionsOpaque OptOptions; + OptOptions.SetReduceFp32ToFp16(float32ToFloat16); + OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled()); + + int cachedFd = -1; + bool saveCachedNetwork = options.SaveCachedNetwork(); + + unsigned int numberOfCachedModelFiles = 0; + if (modelCacheHandle.size() > 0) + { + unsigned int index = 0; + for (auto& backend : options.GetBackends()) + { + // modelCacheHandle size should be equal to numberOfCachedModelFiles + // modelCacheHandle vector should be in same order as backends + auto numberOfCacheFiles = GetNumberOfCacheFiles(backend); + if (numberOfCacheFiles > 0) + { + numberOfCachedModelFiles += numberOfCacheFiles; + if (modelCacheHandle[index]->numFds == 1) + { + if (backend == armnn::Compute::GpuAcc) + { + cachedFd = modelCacheHandle[index]->data[0]; + saveCachedNetwork = true; + } + } + index += numberOfCachedModelFiles; + } + } + } armnn::BackendOptions gpuAcc("GpuAcc", { { "FastMathEnabled", options.IsFastMathEnabled() }, - { "SaveCachedNetwork", options.SaveCachedNetwork() }, + { "SaveCachedNetwork", saveCachedNetwork }, { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() }, - { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() } + { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() }, + { "CachedFileDescriptor", cachedFd } }); armnn::BackendOptions cpuAcc("CpuAcc", @@ -148,8 +192,8 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2( { "FastMathEnabled", options.IsFastMathEnabled() }, { "NumberOfThreads", options.GetNumberOfThreads() } }); - OptOptions.m_ModelOptions.push_back(gpuAcc); - OptOptions.m_ModelOptions.push_back(cpuAcc); + OptOptions.AddModelOption(gpuAcc); + OptOptions.AddModelOption(cpuAcc); std::vector<std::string> errMessages; try @@ -188,11 +232,19 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2( // Load it into the runtime. armnn::NetworkId netId = 0; + std::string msg; + armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(), + MemorySource::Undefined, + MemorySource::Undefined, + options.IsGpuProfilingEnabled()); + + auto numInputs = getMainModel(model).inputIndexes.size(); + auto numOutputs = getMainModel(model).outputIndexes.size(); try { - if (runtime->LoadNetwork(netId, move(optNet)) != armnn::Status::Success) + if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success) { - return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb); + return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, msg, cb); } } catch (std::exception& e) @@ -216,32 +268,388 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2( runtime.get(), model, options.GetRequestInputsAndOutputsDumpDir(), - options.IsGpuProfilingEnabled())); + options.IsGpuProfilingEnabled(), + options.isAsyncModelExecutionEnabled(), + options.getNoOfArmnnThreads(), + options.isImportEnabled(), + options.isExportEnabled())); // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if // this is enabled) before the first 'real' inference which removes the overhead of the first inference. - if (!preparedModel->ExecuteWithDummyInputs()) + // Only run this if the GpuAcc backend has been added to options + if (std::find(options.GetBackends().begin(), + options.GetBackends().end(), + armnn::Compute::GpuAcc) != options.GetBackends().end()) { - return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb); + if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs)) + { + return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb); + } + + if (clTunedParameters && + options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters) + { + // Now that we've done one inference the CL kernel parameters will have been tuned, + // so save the updated file. + try + { + clTunedParameters->Save(options.GetClTunedParametersFile().c_str()); + } + catch (std::exception& error) + { + ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s", + options.GetClTunedParametersFile().c_str(), error.what()); + } + } } - if (clTunedParameters && - options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters) + size_t hashValue = 0; + // Cache the model + if (dataCacheHandle.size() > 0) { - // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file. - try + // Cache the Arm NN model, should be only 1 + if (dataCacheHandle.size() != 1) + { + NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release()); + return V1_0::ErrorStatus::NONE; + } + + if (dataCacheHandle[0]->numFds != 1) { - clTunedParameters->Save(options.GetClTunedParametersFile().c_str()); + ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, numFds != 1."); + NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release()); + return V1_0::ErrorStatus::NONE; } - catch (std::exception& error) + + if (dataCacheHandle[0]->data[0] < 0) + { + ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, fd < 0"); + NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release()); + return V1_0::ErrorStatus::NONE; + } + + int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE; + if (dataCacheFileAccessMode != O_RDWR) { - ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s", - options.GetClTunedParametersFile().c_str(), error.what()); + ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_2(): Invalid Access Mode."); + NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release()); + return V1_0::ErrorStatus::NONE; } + + write(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size()); + hashValue = CacheDataHandlerInstance().Hash(dataCacheData); + } + + if (modelCacheHandle.size() > 0) + { + if (modelCacheHandle.size() != numberOfCachedModelFiles) + { + NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release()); + return V1_0::ErrorStatus::NONE; + } + for (uint32_t i = 0; i < modelCacheHandle.size(); ++i) + { + if (modelCacheHandle[i]->numFds == 1) + { + int modelCacheFileAccessMode = fcntl(modelCacheHandle[i]->data[0], F_GETFL) & O_ACCMODE; + if (modelCacheFileAccessMode != O_RDONLY) + { + struct stat statBuffer; + if (fstat(modelCacheHandle[i]->data[0], &statBuffer) == 0) + { + long modelDataSize = statBuffer.st_size; + if (modelDataSize > 0) + { + std::vector <uint8_t> modelData(modelDataSize); + pread(modelCacheHandle[i]->data[0], modelData.data(), modelData.size(), 0); + hashValue ^= CacheDataHandlerInstance().Hash(modelData); + } + } + } + } + } + } + if (hashValue != 0) + { + CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size()); } NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release()); + ALOGV("ArmnnDriverImpl::prepareModel cache timing = %lld µs", std::chrono::duration_cast<std::chrono::microseconds> + (std::chrono::system_clock::now() - prepareModelTimepoint).count()); + + return V1_0::ErrorStatus::NONE; +} + +Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareModelFromCache( + const armnn::IRuntimePtr& runtime, + const DriverOptions& options, + const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle, + const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle, + const HidlToken& token, + const android::sp<V1_2::IPreparedModelCallback>& cb, + bool float32ToFloat16) +{ + ALOGV("ArmnnDriverImpl::prepareModelFromCache()"); + std::chrono::time_point<std::chrono::system_clock> modelFromCacheTimepoint = std::chrono::system_clock::now(); + + if (cb.get() == nullptr) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid callback passed to prepareModel"); + return V1_0::ErrorStatus::INVALID_ARGUMENT; + } + + if (!runtime) + { + return FailPrepareModel(V1_0::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb); + } + + if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN) + { + FailPrepareModel(V1_0::ErrorStatus::INVALID_ARGUMENT, "Invalid token passed!", cb); + return V1_0::ErrorStatus::INVALID_ARGUMENT; + } + + // DataCacheHandle size should always be 1 + // Arm NN model + if (dataCacheHandle.size() != 1) + { + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "No data cache!", cb); + return V1_0::ErrorStatus::GENERAL_FAILURE; + } + + // Check if model files cached they match the expected value + unsigned int numberOfCachedModelFiles = 0; + for (auto& backend : options.GetBackends()) + { + numberOfCachedModelFiles += GetNumberOfCacheFiles(backend); + } + if (modelCacheHandle.size() != numberOfCachedModelFiles) + { + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid model cache!", cb); + return V1_0::ErrorStatus::GENERAL_FAILURE; + } + + if (dataCacheHandle[0]->numFds != 1) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache: Cannot read from the cache data, numFds != 1."); + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "No data cache!", cb); + return V1_0::ErrorStatus::GENERAL_FAILURE; + } + + if (dataCacheHandle[0]->data[0] < 0) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache: Cannot read from the cache data, fd < 0"); + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "No data cache!", cb); + return V1_0::ErrorStatus::GENERAL_FAILURE; + } + + int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE; + if (dataCacheFileAccessMode != O_RDWR) + { + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid Access Mode!", cb); + return V1_0::ErrorStatus::GENERAL_FAILURE; + } + + auto dataSize = CacheDataHandlerInstance().GetCacheSize(token); + if (dataSize == 0) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid data to deserialize!"); + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid data to deserialize!", cb); + return V1_0::ErrorStatus::GENERAL_FAILURE; + } + + int offset = 0; + { + struct stat statBuffer; + if (fstat(dataCacheHandle[0]->data[0], &statBuffer) == 0) + { + unsigned long bufferSize = statBuffer.st_size; + if (bufferSize != dataSize) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid data to deserialize!"); + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid data to deserialize!", cb); + return V1_0::ErrorStatus::GENERAL_FAILURE; + } + } + } + std::vector<uint8_t> dataCacheData(dataSize); + pread(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size(), offset); + auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData); + + int gpuAccCachedFd = -1; + bool saveCachedNetwork = false; + if (modelCacheHandle.size() > 0) + { + unsigned int index = 0; + for (auto& backend : options.GetBackends()) + { + // modelCacheHandle size should be equal to numberOfCachedModelFiles + // modelCacheHandle vector should be in same order as backends + auto numberOfCacheFiles = GetNumberOfCacheFiles(backend); + if (numberOfCacheFiles > 0) + { + if (modelCacheHandle[index]->numFds != 1) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache: Cannot read from the model cache, numFds != 1."); + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, + "Cannot read from the model cache, numFds != 1.", cb); + return V1_0::ErrorStatus::GENERAL_FAILURE; + } + auto cachedFd = modelCacheHandle[index]->data[0]; + + int modelCacheFileAccessMode = fcntl(cachedFd, F_GETFL) & O_ACCMODE; + if (modelCacheFileAccessMode != O_RDWR) + { + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid Access Mode!", cb); + return V1_0::ErrorStatus::GENERAL_FAILURE; + } + + struct stat statBuffer; + if (cachedFd != -1 && fstat(cachedFd, &statBuffer) == 0) + { + long modelDataSize = statBuffer.st_size; + if (modelDataSize <= 0) + { + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Wrong cached model size!", cb); + return V1_0::ErrorStatus::NONE; + } + std::vector<uint8_t> modelData(modelDataSize); + pread(cachedFd, modelData.data(), modelData.size(), 0); + hashValue ^= CacheDataHandlerInstance().Hash(modelData); + + // For GpuAcc numberOfCachedFiles is 1 + if (backend == armnn::Compute::GpuAcc) + { + gpuAccCachedFd = cachedFd; + } + } + index += numberOfCacheFiles; + } + } + } + + if (!CacheDataHandlerInstance().Validate(token, hashValue, dataCacheData.size())) + { + ALOGW("ArmnnDriverImpl::prepareModelFromCache: ValidateHash() failed!"); + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "ValidateHash Failed!", cb); + return V1_0::ErrorStatus::GENERAL_FAILURE; + } + + // Deserialize the network.. + armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){}); + try + { + network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData); + } + catch (std::exception& e) + { + std::stringstream message; + message << "Exception (" << e.what() << ") caught from Deserializer."; + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb); + return V1_0::ErrorStatus::GENERAL_FAILURE; + } + + // Optimize the network + armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr); + armnn::OptimizerOptionsOpaque OptOptions; + OptOptions.SetReduceFp32ToFp16(float32ToFloat16); + OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled()); + + armnn::BackendOptions gpuAcc("GpuAcc", + { + {"FastMathEnabled", options.IsFastMathEnabled()}, + {"SaveCachedNetwork", saveCachedNetwork}, + {"CachedNetworkFilePath", options.GetCachedNetworkFilePath()}, + {"MLGOTuningFilePath", options.GetClMLGOTunedParametersFile()}, + {"CachedFileDescriptor", gpuAccCachedFd} + }); + + armnn::BackendOptions cpuAcc("CpuAcc", + { + {"FastMathEnabled", options.IsFastMathEnabled()}, + {"NumberOfThreads", options.GetNumberOfThreads()} + }); + OptOptions.AddModelOption(gpuAcc); + OptOptions.AddModelOption(cpuAcc); + + std::vector<std::string> errMessages; + try + { + optNet = armnn::Optimize(*network.get(), + options.GetBackends(), + runtime->GetDeviceSpec(), + OptOptions, + errMessages); + } + catch (std::exception& e) + { + std::stringstream message; + message << "Exception (" << e.what() << ") caught from optimize."; + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb); + return V1_0::ErrorStatus::NONE; + } + + // Check that the optimized network is valid. + if (!optNet) + { + std::stringstream message; + message << "Invalid optimized network"; + for (const std::string& msg : errMessages) + { + message << "\n" << msg; + } + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb); + return V1_0::ErrorStatus::NONE; + } + + // Export the optimized network graph to a dot file if an output dump directory + // has been specified in the drivers' arguments. + std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet, + options.GetRequestInputsAndOutputsDumpDir()); + + // Load it into the runtime. + armnn::NetworkId netId = 0; + std::string msg; + armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(), + MemorySource::Undefined, + MemorySource::Undefined, + options.IsGpuProfilingEnabled()); + + try + { + if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success) + { + return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, msg, cb); + } + } + catch (std::exception& e) + { + std::stringstream message; + message << "Exception (" << e.what() << ") caught from LoadNetwork."; + FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb); + return V1_0::ErrorStatus::NONE; + } + + std::unique_ptr<ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>> preparedModel( + new ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>( + netId, + runtime.get(), + options.GetRequestInputsAndOutputsDumpDir(), + options.IsGpuProfilingEnabled(), + options.isAsyncModelExecutionEnabled(), + options.getNoOfArmnnThreads(), + options.isImportEnabled(), + options.isExportEnabled(), + true)); + + NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release()); + + ALOGV("ArmnnDriverImpl::prepareModelFromCache cache timing = %lld µs", + std::chrono::duration_cast<std::chrono::microseconds> + (std::chrono::system_clock::now() - modelFromCacheTimepoint).count()); + return V1_0::ErrorStatus::NONE; } diff --git a/1.2/ArmnnDriverImpl.hpp b/1.2/ArmnnDriverImpl.hpp index eeb491b6..70f46cba 100644 --- a/1.2/ArmnnDriverImpl.hpp +++ b/1.2/ArmnnDriverImpl.hpp @@ -7,10 +7,13 @@ #include <HalInterfaces.h> +#include "../CacheDataHandler.hpp" #include "../DriverOptions.hpp" #include <armnn/ArmNN.hpp> +#include <NeuralNetworks.h> + #ifdef ARMNN_ANDROID_R using namespace android::nn::hal; #endif @@ -30,12 +33,27 @@ namespace hal_1_2 class ArmnnDriverImpl { public: - static Return<V1_0::ErrorStatus> prepareArmnnModel_1_2(const armnn::IRuntimePtr& runtime, - const armnn::IGpuAccTunedParametersPtr& clTunedParameters, - const DriverOptions& options, - const V1_2::Model& model, - const android::sp<V1_2::IPreparedModelCallback>& cb, - bool float32ToFloat16 = false); + using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>; + + static Return<V1_0::ErrorStatus> prepareArmnnModel_1_2( + const armnn::IRuntimePtr& runtime, + const armnn::IGpuAccTunedParametersPtr& clTunedParameters, + const DriverOptions& options, + const V1_2::Model& model, + const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle, + const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle, + const HidlToken& token, + const android::sp<V1_2::IPreparedModelCallback>& cb, + bool float32ToFloat16 = false); + + static Return<V1_0::ErrorStatus> prepareModelFromCache( + const armnn::IRuntimePtr& runtime, + const DriverOptions& options, + const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle, + const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle, + const HidlToken& token, + const android::sp<V1_2::IPreparedModelCallback>& cb, + bool float32ToFloat16 = false); static Return<void> getCapabilities_1_2(const armnn::IRuntimePtr& runtime, V1_2::IDevice::getCapabilities_1_2_cb cb); diff --git a/1.2/HalPolicy.cpp b/1.2/HalPolicy.cpp index e96c4cb0..9c44003f 100644 --- a/1.2/HalPolicy.cpp +++ b/1.2/HalPolicy.cpp @@ -1,9 +1,10 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2019-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "HalPolicy.hpp" +#include "DriverOptions.hpp" namespace armnn_driver { @@ -17,6 +18,33 @@ namespace } // anonymous namespace +HalPolicy::DeviceType HalPolicy::GetDeviceTypeFromOptions(const DriverOptions& options) +{ + // Query backends list from the options + auto backends = options.GetBackends(); + // Return first backend + if(backends.size()>0) + { + const auto &first_backend = backends[0]; + if(first_backend.IsCpuAcc()||first_backend.IsCpuRef()) + { + return V1_2::DeviceType::CPU; + } + else if(first_backend.IsGpuAcc()) + { + return V1_2::DeviceType::GPU; + } + else + { + return V1_2::DeviceType::ACCELERATOR; + } + } + else + { + return V1_2::DeviceType::CPU; + } +} + bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model, ConversionData& data) { switch (operation.type) @@ -24,7 +52,7 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model, case V1_2::OperationType::ABS: return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Abs); case V1_2::OperationType::ADD: - return ConvertAdd(operation, model, data); + return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Add); case V1_2::OperationType::ARGMAX: return ConvertArgMinMax(operation, model, data, ArgMinMaxFunction::Max); case V1_2::OperationType::ARGMIN: @@ -33,6 +61,10 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model, return ConvertAveragePool2d(operation, model, data); case V1_2::OperationType::BATCH_TO_SPACE_ND: return ConvertBatchToSpaceNd(operation, model, data); + case V1_2::OperationType::CAST: + return ConvertCast(operation, model, data); + case V1_2::OperationType::CHANNEL_SHUFFLE: + return ConvertChannelShuffle(operation, model, data); case V1_2::OperationType::CONCATENATION: return ConvertConcatenation(operation, model, data); case V1_2::OperationType::CONV_2D: @@ -44,7 +76,7 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model, case V1_2::OperationType::DEQUANTIZE: return ConvertDequantize(operation, model, data); case V1_2::OperationType::DIV: - return ConvertDiv(operation, model, data); + return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Div); case V1_2::OperationType::EQUAL: return ConvertComparison(operation, model, data, ComparisonOperation::Equal); case V1_2::OperationType::EXP: @@ -75,6 +107,8 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model, return ConvertComparison(operation, model, data, ComparisonOperation::LessOrEqual); case V1_2::OperationType::LOCAL_RESPONSE_NORMALIZATION: return ConvertLocalResponseNormalization(operation, model, data); + case V1_2::OperationType::LOG: + return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Log); case V1_2::OperationType::LOGISTIC: return ConvertLogistic(operation, model, data); case V1_2::OperationType::LOG_SOFTMAX: @@ -84,13 +118,13 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model, case V1_2::OperationType::MAX_POOL_2D: return ConvertMaxPool2d(operation, model, data); case V1_2::OperationType::MAXIMUM: - return ConvertMaximum(operation, model, data); + return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Maximum); case V1_2::OperationType::MEAN: return ConvertMean(operation, model, data); case V1_2::OperationType::MINIMUM: - return ConvertMinimum(operation, model, data); + return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Minimum); case V1_2::OperationType::MUL: - return ConvertMul(operation, model, data); + return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Mul); case V1_2::OperationType::NEG: return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Neg); case V1_2::OperationType::NOT_EQUAL: @@ -99,6 +133,10 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model, return ConvertPad(operation, model, data); case V1_2::OperationType::PAD_V2: return ConvertPadV2(operation, model, data); +// There's a problem with the combination of Hal 1.2, Android Q and the POW operator. The problem does not happen +// with Hal 1.3. +// case V1_2::OperationType::POW: +// return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Power); case V1_2::OperationType::PRELU: return ConvertPrelu(operation, model, data); case V1_2::OperationType::QUANTIZE: @@ -109,6 +147,8 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model, return ConvertReduce(operation, model, data, ReduceOperation::Max); case V1_2::OperationType::REDUCE_MIN: return ConvertReduce(operation, model, data, ReduceOperation::Min); + case V1_2::OperationType::REDUCE_PROD: + return ConvertReduce(operation, model, data, ReduceOperation::Prod); case V1_2::OperationType::REDUCE_SUM: return ConvertReduce(operation, model, data, ReduceOperation::Sum); case V1_2::OperationType::RELU: @@ -125,38 +165,40 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model, return ConvertResize(operation, model, data, ResizeMethod::NearestNeighbor); case V1_2::OperationType::RSQRT: return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Rsqrt); + case V1_2::OperationType::SIN: + return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Sin); + case V1_2::OperationType::SOFTMAX: + return ConvertSoftmax(operation, model, data); + case V1_2::OperationType::SPACE_TO_BATCH_ND : + return ConvertSpaceToBatchNd(operation, model, data); + case V1_2::OperationType::SPACE_TO_DEPTH: + return ConvertSpaceToDepth(operation, model, data); + case V1_2::OperationType::SPLIT: + return ConvertSplit(operation, model, data); case V1_2::OperationType::SQRT: return ConvertSqrt(operation, model, data); case V1_2::OperationType::SQUEEZE: return ConvertSqueeze(operation, model, data); case V1_2::OperationType::STRIDED_SLICE: return ConvertStridedSlice(operation, model, data); + case V1_2::OperationType::SUB: + return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Sub); case V1_2::OperationType::TRANSPOSE: return ConvertTranspose(operation, model, data); case V1_2::OperationType::TRANSPOSE_CONV_2D: return ConvertTransposeConv2d(operation, model, data); - case V1_2::OperationType::SOFTMAX: - return ConvertSoftmax(operation, model, data); - case V1_2::OperationType::SPACE_TO_BATCH_ND : - return ConvertSpaceToBatchNd(operation, model, data); - case V1_2::OperationType::SPACE_TO_DEPTH: - return ConvertSpaceToDepth(operation, model, data); - case V1_2::OperationType::SUB: - return ConvertSub(operation, model, data); case V1_2::OperationType::TANH: return ConvertTanH(operation, model, data); + case V1_2::OperationType::TILE: + return ConvertTile(operation, model, data); + case V1_2::OperationType::UNIDIRECTIONAL_SEQUENCE_LSTM: + return ConvertUnidirectionalSequenceLstm(operation, model, data); default: return Fail("%s: Operation type %s not supported in ArmnnDriver", __func__, toString(operation.type).c_str()); } } -bool HalPolicy::ConvertAdd(const Operation& operation, const Model& model, ConversionData& data) -{ - ALOGV("hal_1_2::HalPolicy::ConvertAdd()"); - return ::ConvertAdd<hal_1_2::HalPolicy>(operation, model, data); -} - bool HalPolicy::ConvertArgMinMax(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data, @@ -178,6 +220,18 @@ bool HalPolicy::ConvertBatchToSpaceNd(const Operation& operation, const Model& m return ::ConvertBatchToSpaceNd<hal_1_2::HalPolicy>(operation, model, data); } +bool HalPolicy::ConvertCast(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_2::HalPolicy::ConvertCast()"); + return ::ConvertCast<hal_1_2::HalPolicy>(operation, model, data); +} + +bool HalPolicy::ConvertChannelShuffle(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_2::HalPolicy::ConvertChannelShuffle()"); + return ::ConvertChannelShuffle<hal_1_2::HalPolicy>(operation, model, data); +} + bool HalPolicy::ConvertComparison(const Operation& operation, const Model& model, ConversionData& data, @@ -217,10 +271,13 @@ bool HalPolicy::ConvertDequantize(const Operation& operation, const Model& model return ::ConvertDequantize_1_2<hal_1_2::HalPolicy>(operation, model, data); } -bool HalPolicy::ConvertDiv(const Operation& operation, const Model& model, ConversionData& data) +bool HalPolicy::ConvertElementwiseBinary(const Operation& operation, + const Model& model, + ConversionData& data, + BinaryOperation binaryOperation) { - ALOGV("hal_1_2::HalPolicy::ConvertDiv()"); - return ::ConvertDiv<hal_1_2::HalPolicy>(operation, model, data); + ALOGV("hal_1_2::HalPolicy::ConvertElementwiseBinary()"); + return ::ConvertElementwiseBinary<hal_1_2::HalPolicy>(operation, model, data, binaryOperation); } bool HalPolicy::ConvertElementwiseUnary(const Operation& operation, @@ -306,30 +363,12 @@ bool HalPolicy::ConvertMaxPool2d(const Operation& operation, const Model& model, return ConvertPooling2d<hal_1_2::HalPolicy>(operation, __func__, PoolingAlgorithm::Max, model, data); } -bool HalPolicy::ConvertMaximum(const Operation& operation, const Model& model, ConversionData& data) -{ - ALOGV("hal_1_2::HalPolicy::ConvertMaximum()"); - return ::ConvertMaximum<hal_1_2::HalPolicy>(operation, model, data); -} - bool HalPolicy::ConvertMean(const Operation& operation, const Model& model, ConversionData& data) { ALOGV("hal_1_2::HalPolicy::ConvertMean()"); return ::ConvertMean<hal_1_2::HalPolicy>(operation, model, data); } -bool HalPolicy::ConvertMinimum(const Operation& operation, const Model& model, ConversionData& data) -{ - ALOGV("hal_1_2::HalPolicy::ConvertMinimum()"); - return ::ConvertMinimum<hal_1_2::HalPolicy>(operation, model, data); -} - -bool HalPolicy::ConvertMul(const Operation& operation, const Model& model, ConversionData& data) -{ - ALOGV("hal_1_2::HalPolicy::ConvertMul()"); - return ::ConvertMul<hal_1_2::HalPolicy>(operation, model, data); -} - bool HalPolicy::ConvertPad(const Operation& operation, const Model& model, ConversionData& data) { ALOGV("hal_1_2::HalPolicy::ConvertPad()"); @@ -420,12 +459,6 @@ bool HalPolicy::ConvertSoftmax(const Operation& operation, const Model& model, C return ::ConvertSoftmax<hal_1_2::HalPolicy>(operation, model, data); } -bool HalPolicy::ConvertSub(const Operation& operation, const Model& model, ConversionData& data) -{ - ALOGV("hal_1_2::HalPolicy::ConvertSub()"); - return ::ConvertSub<hal_1_2::HalPolicy>(operation, model, data); -} - bool HalPolicy::ConvertTanH(const Operation& operation, const Model& model, ConversionData& data) { ALOGV("hal_1_2::HalPolicy::ConvertTanH()"); @@ -438,6 +471,12 @@ bool HalPolicy::ConvertLstm(const Operation& operation, const Model& model, Conv return ::ConvertLstm<hal_1_2::HalPolicy>(operation, model, data); } +bool HalPolicy::ConvertSplit(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_2::HalPolicy::ConvertSplit()"); + return ::ConvertSplit<hal_1_2::HalPolicy>(operation, model, data); +} + bool HalPolicy::ConvertSqrt(const Operation& operation, const Model& model, ConversionData& data) { ALOGV("hal_1_2::HalPolicy::ConvertSqrt()"); @@ -471,5 +510,17 @@ bool HalPolicy::ConvertTransposeConv2d(const Operation& operation, const Model& return ::ConvertTransposeConv2d<hal_1_2::HalPolicy>(operation, model, data); } +bool HalPolicy::ConvertTile(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_2::HalPolicy::ConvertTile()"); + return ::ConvertTile<hal_1_2::HalPolicy>(operation, model, data); +} + +bool HalPolicy::ConvertUnidirectionalSequenceLstm(const Operation& operation, const Model& model, ConversionData& data) +{ + ALOGV("hal_1_2::HalPolicy::ConvertUnidirectionalSequenceLstm()"); + return ::ConvertUnidirectionalSequenceLstm<hal_1_2::HalPolicy>(operation, model, data); +} + } // namespace hal_1_2 } // namespace armnn_driver diff --git a/1.2/HalPolicy.hpp b/1.2/HalPolicy.hpp index abd60e72..4d77dfe5 100644 --- a/1.2/HalPolicy.hpp +++ b/1.2/HalPolicy.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2019-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -16,6 +16,7 @@ namespace V1_2 = ::android::hardware::neuralnetworks::V1_2; namespace armnn_driver { +class DriverOptions; namespace hal_1_2 { @@ -31,12 +32,13 @@ public: using ExecutionCallback = V1_2::IExecutionCallback; using getSupportedOperations_cb = V1_2::IDevice::getSupportedOperations_1_2_cb; using ErrorStatus = V1_0::ErrorStatus; + using DeviceType = V1_2::DeviceType; + + static DeviceType GetDeviceTypeFromOptions(const DriverOptions& options); static bool ConvertOperation(const Operation& operation, const Model& model, ConversionData& data); private: - static bool ConvertAdd(const Operation& operation, const Model& model, ConversionData& data); - static bool ConvertArgMinMax(const Operation& operation, const Model& model, ConversionData& data, @@ -46,6 +48,10 @@ private: static bool ConvertBatchToSpaceNd(const Operation& operation, const Model& model, ConversionData& data); + static bool ConvertCast(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertChannelShuffle(const Operation& operation, const Model& model, ConversionData& data); + static bool ConvertComparison(const Operation& operation, const Model& model, ConversionData& data, @@ -61,10 +67,13 @@ private: static bool ConvertDequantize(const Operation& operation, const Model& model, ConversionData& data); - static bool ConvertDiv(const Operation& operation, const Model& model, ConversionData& data); - static bool ConvertExpandDims(const Operation& operation, const Model& model, ConversionData& data); + static bool ConvertElementwiseBinary(const Operation& operation, + const Model& model, + ConversionData& data, + armnn::BinaryOperation binaryOperation); + static bool ConvertElementwiseUnary(const Operation& operation, const Model& model, ConversionData& data, @@ -96,14 +105,8 @@ private: static bool ConvertMaxPool2d(const Operation& operation, const Model& model, ConversionData& data); - static bool ConvertMaximum(const Operation& operation, const Model& model, ConversionData& data); - static bool ConvertMean(const Operation& operation, const Model& model, ConversionData& data); - static bool ConvertMinimum(const Operation& operation, const Model& model, ConversionData& data); - - static bool ConvertMul(const Operation& operation, const Model& model, ConversionData& data); - static bool ConvertPad(const Operation& operation, const Model& model, ConversionData& data); static bool ConvertPadV2(const Operation& operation, const Model& model, ConversionData& data); @@ -138,19 +141,25 @@ private: static bool ConvertSpaceToDepth(const Operation& operation, const Model& model, ConversionData& data); + static bool ConvertSplit(const Operation& operation, const Model& model, ConversionData& data); + static bool ConvertSqrt(const Operation& operation, const Model& model, ConversionData& data); static bool ConvertSqueeze(const Operation& operation, const Model& model, ConversionData& data); static bool ConvertStridedSlice(const Operation& operation, const Model& model, ConversionData& data); - static bool ConvertSub(const Operation& operation, const Model& model, ConversionData& data); - static bool ConvertTanH(const Operation& operation, const Model& model, ConversionData& data); static bool ConvertTranspose(const Operation& operation, const Model& model, ConversionData& data); static bool ConvertTransposeConv2d(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertTile(const Operation& operation, const Model& model, ConversionData& data); + + static bool ConvertUnidirectionalSequenceLstm(const Operation& operation, + const Model& model, + ConversionData& data); }; } // namespace hal_1_2 |