// // Copyright © 2020, 2023 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // #include "ArmnnDriverImpl.hpp" #include "../ArmnnPreparedModel_1_3.hpp" #include "../ModelToINetworkConverter.hpp" #include "../SystemPropertiesUtils.hpp" #include #include #include #include namespace { const char *g_RelaxedFloat32toFloat16PerformanceExecTime = "ArmNN.relaxedFloat32toFloat16Performance.execTime"; const char *g_RelaxedFloat32toFloat16PerformancePowerUsage = "ArmNN.relaxedFloat32toFloat16Performance.powerUsage"; const char *g_ifPerformanceExecTime = "ArmNN.ifPerformance.execTime"; const char *g_ifPerformancePowerUsage = "ArmNN.ifPerformance.powerUsage"; const char *g_whilePerformanceExecTime = "ArmNN.whilePerformance.execTime"; const char *g_whilePerformancePowerUsage = "ArmNN.whilePerformance.powerUsage"; const char *g_OperandTypeTensorFloat32PerformanceExecTime = "Armnn.operandTypeTensorFloat32Performance.execTime"; const char *g_OperandTypeTensorFloat32PerformancePowerUsage = "Armnn.operandTypeTensorFloat32Performance.powerUsage"; const char *g_OperandTypeFloat32PerformanceExecTime = "Armnn.operandTypeFloat32Performance.execTime"; const char *g_OperandTypeFloat32PerformancePowerUsage = "Armnn.operandTypeFloat32Performance.powerUsage"; const char *g_OperandTypeTensorFloat16PerformanceExecTime = "Armnn.operandTypeTensorFloat16Performance.execTime"; const char *g_OperandTypeTensorFloat16PerformancePowerUsage = "Armnn.operandTypeTensorFloat16Performance.powerUsage"; const char *g_OperandTypeFloat16PerformanceExecTime = "Armnn.operandTypeFloat16Performance.execTime"; const char *g_OperandTypeFloat16PerformancePowerUsage = "Armnn.operandTypeFloat16Performance.powerUsage"; const char *g_OperandTypeTensorQuant8AsymmPerformanceExecTime = "Armnn.operandTypeTensorQuant8AsymmPerformance.execTime"; const char *g_OperandTypeTensorQuant8AsymmPerformancePowerUsage = "Armnn.operandTypeTensorQuant8AsymmPerformance.powerUsage"; const char *g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime = "Armnn.operandTypeTensorQuant8AsymmSignedPerformance.execTime"; const char *g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage = "Armnn.operandTypeTensorQuant8AsymmSignedPerformance.powerUsage"; const char *g_OperandTypeTensorQuant16SymmPerformanceExecTime = "Armnn.operandTypeTensorQuant16SymmPerformance.execTime"; const char *g_OperandTypeTensorQuant16SymmPerformancePowerUsage = "Armnn.operandTypeTensorQuant16SymmPerformance.powerUsage"; const char *g_OperandTypeTensorQuant8SymmPerformanceExecTime = "Armnn.operandTypeTensorQuant8SymmPerformance.execTime"; const char *g_OperandTypeTensorQuant8SymmPerformancePowerUsage = "Armnn.operandTypeTensorQuant8SymmPerformance.powerUsage"; const char *g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime = "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.execTime"; const char *g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage = "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.powerUsage"; const char *g_OperandTypeTensorInt32PerformanceExecTime = "Armnn.operandTypeTensorInt32Performance.execTime"; const char *g_OperandTypeTensorInt32PerformancePowerUsage = "Armnn.operandTypeTensorInt32Performance.powerUsage"; const char *g_OperandTypeInt32PerformanceExecTime = "Armnn.operandTypeInt32Performance.execTime"; const char *g_OperandTypeInt32PerformancePowerUsage = "Armnn.operandTypeInt32Performance.powerUsage"; void NotifyCallbackAndCheck(const android::sp& callback, V1_3::ErrorStatus errorStatus, const android::sp& preparedModelPtr) { Return returned = callback->notify_1_3(errorStatus, preparedModelPtr); // This check is required, if the callback fails and it isn't checked it will bring down the service if (!returned.isOk()) { ALOGE("ArmnnDriverImpl::prepareModel: hidl callback failed to return properly: %s ", returned.description().c_str()); } } Return FailPrepareModel(V1_3::ErrorStatus error, const std::string& message, const android::sp& callback) { ALOGW("ArmnnDriverImpl::prepareModel: %s", message.c_str()); NotifyCallbackAndCheck(callback, error, nullptr); return error; } } // anonymous namespace namespace armnn_driver { namespace hal_1_3 { Return ArmnnDriverImpl::prepareArmnnModel_1_3( const armnn::IRuntimePtr& runtime, const armnn::IGpuAccTunedParametersPtr& clTunedParameters, const DriverOptions& options, const V1_3::Model& model, const android::hardware::hidl_vec& modelCacheHandle, const android::hardware::hidl_vec& dataCacheHandle, const HidlToken& token, const android::sp& cb, bool float32ToFloat16, V1_3::Priority priority) { ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_3()"); std::chrono::time_point prepareModelTimepoint = std::chrono::system_clock::now(); if (cb.get() == nullptr) { ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel"); return V1_3::ErrorStatus::INVALID_ARGUMENT; } if (!runtime) { return FailPrepareModel(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb); } if (!android::nn::validateModel(model)) { return FailPrepareModel(V1_3::ErrorStatus::INVALID_ARGUMENT, "Invalid model passed as input", cb); } // Deliberately ignore any unsupported operations requested by the options - // at this point we're being asked to prepare a model that we've already declared support for // and the operation indices may be different to those in getSupportedOperations anyway. std::set unsupportedOperations; ModelToINetworkConverter modelConverter(options.GetBackends(), model, unsupportedOperations); if (modelConverter.GetConversionResult() != ConversionResult::Success) { FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb); return V1_3::ErrorStatus::NONE; } // Serialize the network graph to a .armnn file if an output directory // has been specified in the drivers' arguments. std::vector dataCacheData; bool serializeToFile = dataCacheHandle.size() < 1 ? false : true; auto serializedNetworkFileName = SerializeNetwork(*modelConverter.GetINetwork(), options.GetRequestInputsAndOutputsDumpDir(), dataCacheData, serializeToFile); // Optimize the network armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr); armnn::OptimizerOptionsOpaque OptOptions; OptOptions.SetReduceFp32ToFp16(float32ToFloat16); OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled()); int cachedFd = -1; bool saveCachedNetwork = options.SaveCachedNetwork(); unsigned int numberOfCachedModelFiles = 0; if (modelCacheHandle.size() > 0) { unsigned int index = 0; for (auto& backend : options.GetBackends()) { // modelCacheHandle size should be equal to numberOfCachedModelFiles // modelCacheHandle vector should be in same order as backends auto numberOfCacheFiles = GetNumberOfCacheFiles(backend); if (numberOfCacheFiles > 0) { numberOfCachedModelFiles += numberOfCacheFiles; if (modelCacheHandle[index]->numFds == 1) { // For GpuAcc numberOfCachedFiles is 1 if (backend == armnn::Compute::GpuAcc) { cachedFd = modelCacheHandle[index]->data[0]; saveCachedNetwork = true; } } index += numberOfCachedModelFiles; } } } armnn::BackendOptions gpuAcc("GpuAcc", { { "FastMathEnabled", options.IsFastMathEnabled() }, { "SaveCachedNetwork", saveCachedNetwork }, { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() }, { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() }, { "CachedFileDescriptor", cachedFd } }); armnn::BackendOptions cpuAcc("CpuAcc", { { "FastMathEnabled", options.IsFastMathEnabled() }, { "NumberOfThreads", options.GetNumberOfThreads() } }); OptOptions.AddModelOption(gpuAcc); OptOptions.AddModelOption(cpuAcc); std::vector errMessages; try { optNet = armnn::Optimize(*modelConverter.GetINetwork(), options.GetBackends(), runtime->GetDeviceSpec(), OptOptions, errMessages); } catch (std::exception& e) { std::stringstream message; message << "Exception (" << e.what() << ") caught from optimize."; FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb); return V1_3::ErrorStatus::NONE; } // Check that the optimized network is valid. if (!optNet) { std::stringstream message; message << "Invalid optimized network"; for (const std::string& msg : errMessages) { message << "\n" << msg; } FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb); return V1_3::ErrorStatus::NONE; } // Export the optimized network graph to a dot file if an output dump directory // has been specified in the drivers' arguments. std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet, options.GetRequestInputsAndOutputsDumpDir()); // Load it into the runtime. armnn::NetworkId netId = 0; std::string msg; armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(), MemorySource::Undefined, MemorySource::Undefined, options.IsGpuProfilingEnabled()); auto numInputs = getMainModel(model).inputIndexes.size(); auto numOutputs = getMainModel(model).outputIndexes.size(); try { if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success) { return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb); } } catch (std::exception& e) { std::stringstream message; message << "Exception (" << e.what()<< ") caught from LoadNetwork."; FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb); return V1_3::ErrorStatus::NONE; } // Now that we have a networkId for the graph rename the exported files to use it // so that we can associate the graph file and the input/output tensor exported files RenameExportedFiles(serializedNetworkFileName, dotGraphFileName, options.GetRequestInputsAndOutputsDumpDir(), netId); std::unique_ptr> preparedModel( new ArmnnPreparedModel_1_3( netId, runtime.get(), model, options.GetRequestInputsAndOutputsDumpDir(), options.IsGpuProfilingEnabled(), priority, options.isAsyncModelExecutionEnabled(), options.getNoOfArmnnThreads(), options.isImportEnabled(), options.isExportEnabled())); // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if // this is enabled) before the first 'real' inference which removes the overhead of the first inference. // Only run this if the GpuAcc backend has been added to options if (std::find(options.GetBackends().begin(), options.GetBackends().end(), armnn::Compute::GpuAcc) != options.GetBackends().end()) { if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs)) { return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb); } if (clTunedParameters && options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters) { // Now that we've done one inference the CL kernel parameters will have been tuned, // so save the updated file. try { clTunedParameters->Save(options.GetClTunedParametersFile().c_str()); } catch (std::exception& error) { ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s", options.GetClTunedParametersFile().c_str(), error.what()); } } } size_t hashValue = 0; // Cache the model if (dataCacheHandle.size() > 0) { // Cache the Arm NN model if (dataCacheHandle.size() != 1) { NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release()); return V1_3::ErrorStatus::NONE; } if (dataCacheHandle[0]->numFds != 1) { ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, numFds != 1."); NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release()); return V1_3::ErrorStatus::NONE; } if (dataCacheHandle[0]->data[0] < 0) { ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, fd < 0"); NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release()); return V1_3::ErrorStatus::NONE; } int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE; if (dataCacheFileAccessMode != O_RDWR) { ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Invalid Access Mode."); NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release()); return V1_3::ErrorStatus::NONE; } write(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size()); hashValue = CacheDataHandlerInstance().Hash(dataCacheData); } // Cache the model data if (modelCacheHandle.size() > 0) { if (modelCacheHandle.size() != numberOfCachedModelFiles) { NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release()); return V1_3::ErrorStatus::NONE; } for (uint32_t i = 0; i < modelCacheHandle.size(); ++i) { if (modelCacheHandle[i]->numFds == 1) { int modelCacheFileAccessMode = fcntl(modelCacheHandle[i]->data[0], F_GETFL) & O_ACCMODE; if (modelCacheFileAccessMode != O_RDONLY) { struct stat statBuffer; if (fstat(modelCacheHandle[i]->data[0], &statBuffer) == 0) { long modelDataSize = statBuffer.st_size; if (modelDataSize > 0) { std::vector modelData(modelDataSize); pread(modelCacheHandle[i]->data[0], modelData.data(), modelData.size(), 0); hashValue ^= CacheDataHandlerInstance().Hash(modelData); } } } } } } if (hashValue != 0) { CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size()); } NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release()); ALOGV("ArmnnDriverImpl::prepareModel cache timing = %lld µs", std::chrono::duration_cast (std::chrono::system_clock::now() - prepareModelTimepoint).count()); return V1_3::ErrorStatus::NONE; } Return ArmnnDriverImpl::prepareModelFromCache_1_3( const armnn::IRuntimePtr& runtime, const DriverOptions& options, const android::hardware::hidl_vec& modelCacheHandle, const android::hardware::hidl_vec& dataCacheHandle, const HidlToken& token, const android::sp& cb) { ALOGV("ArmnnDriverImpl::prepareModelFromCache_1_3()"); std::chrono::time_point modelFromCacheTimepoint = std::chrono::system_clock::now(); if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN) { cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); return V1_3::ErrorStatus::GENERAL_FAILURE; } if (cb.get() == nullptr) { ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid callback passed to prepareModelFromCache_1_3"); return V1_3::ErrorStatus::INVALID_ARGUMENT; } if (!runtime) { ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Device unavailable"); return V1_3::ErrorStatus::DEVICE_UNAVAILABLE; } // DataCacheHandle size should always be 1 // Arm NN model if (dataCacheHandle.size() != 1) { cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); return V1_3::ErrorStatus::GENERAL_FAILURE; } // Check if model files cached they match the expected value unsigned int numberOfCachedModelFiles = 0; for (auto& backend : options.GetBackends()) { numberOfCachedModelFiles += GetNumberOfCacheFiles(backend); } if (modelCacheHandle.size() != numberOfCachedModelFiles) { cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); return V1_3::ErrorStatus::GENERAL_FAILURE; } if (dataCacheHandle[0]->numFds != 1) { ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the cache data, numFds != 1."); cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); return V1_3::ErrorStatus::GENERAL_FAILURE; } if (dataCacheHandle[0]->data[0] < 0) { ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the cache data, fd < 0"); cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); return V1_3::ErrorStatus::GENERAL_FAILURE; } int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE; if (dataCacheFileAccessMode != O_RDWR) { cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); return V1_3::ErrorStatus::GENERAL_FAILURE; } auto dataSize = CacheDataHandlerInstance().GetCacheSize(token); if (dataSize == 0) { ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid data to deserialize!"); cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); return V1_3::ErrorStatus::GENERAL_FAILURE; } int offset = 0; { struct stat statBuffer; if (fstat(dataCacheHandle[0]->data[0], &statBuffer) == 0) { unsigned long bufferSize = statBuffer.st_size; if (bufferSize != dataSize) { ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid data to deserialize!"); cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); return V1_3::ErrorStatus::GENERAL_FAILURE; } } } std::vector dataCacheData(dataSize); pread(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size(), offset); auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData); int gpuAccCachedFd = -1; bool saveCachedNetwork = false; if (modelCacheHandle.size() > 0) { unsigned int index = 0; for (auto& backend : options.GetBackends()) { // modelCacheHandle size should be equal to numberOfCachedModelFiles // modelCacheHandle vector should be in same order as backends auto numberOfCacheFiles = GetNumberOfCacheFiles(backend); if (numberOfCacheFiles > 0) { if (modelCacheHandle[index]->numFds != 1) { ALOGW( "ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the model cache, numFds != 1."); cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); return V1_3::ErrorStatus::GENERAL_FAILURE; } auto cachedFd = modelCacheHandle[index]->data[0]; int modelCacheFileAccessMode = fcntl(cachedFd, F_GETFL) & O_ACCMODE; if (modelCacheFileAccessMode != O_RDWR) { cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); return V1_3::ErrorStatus::GENERAL_FAILURE; } struct stat statBuffer; if (cachedFd != -1 && fstat(cachedFd, &statBuffer) == 0) { long modelDataSize = statBuffer.st_size; if (modelDataSize <= 0) { ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Wrong cached model size!"); cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); return V1_3::ErrorStatus::NONE; } std::vector modelData(modelDataSize); pread(cachedFd, modelData.data(), modelData.size(), 0); hashValue ^= CacheDataHandlerInstance().Hash(modelData); // For GpuAcc numberOfCachedFiles is 1 if (backend == armnn::Compute::GpuAcc) { gpuAccCachedFd = cachedFd; } } index += numberOfCacheFiles; } } } if (!CacheDataHandlerInstance().Validate(token, hashValue, dataCacheData.size())) { ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: ValidateHash() failed!"); cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); return V1_3::ErrorStatus::GENERAL_FAILURE; } // Deserialize the network.. armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){}); try { network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData); } catch (std::exception&) { ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Exception caught from Deserializer!"); cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr); return V1_3::ErrorStatus::GENERAL_FAILURE; } // Optimize the network armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr); armnn::OptimizerOptionsOpaque OptOptions; OptOptions.SetReduceFp32ToFp16(options.GetFp16Enabled()); OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled()); armnn::BackendOptions gpuAcc("GpuAcc", { {"FastMathEnabled", options.IsFastMathEnabled()}, {"SaveCachedNetwork", saveCachedNetwork}, {"CachedNetworkFilePath", options.GetCachedNetworkFilePath()}, {"MLGOTuningFilePath", options.GetClMLGOTunedParametersFile()}, {"CachedFileDescriptor", gpuAccCachedFd} }); armnn::BackendOptions cpuAcc("CpuAcc", { {"FastMathEnabled", options.IsFastMathEnabled()}, {"NumberOfThreads", options.GetNumberOfThreads()} }); OptOptions.AddModelOption(gpuAcc); OptOptions.AddModelOption(cpuAcc); std::vector errMessages; try { optNet = armnn::Optimize(*network.get(), options.GetBackends(), runtime->GetDeviceSpec(), OptOptions, errMessages); } catch (std::exception& e) { std::stringstream message; message << "Exception (" << e.what() << ") caught from optimize."; FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb); return V1_3::ErrorStatus::NONE; } // Check that the optimized network is valid. if (!optNet) { std::stringstream message; message << "Invalid optimized network"; for (const std::string& msg : errMessages) { message << "\n" << msg; } FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb); return V1_3::ErrorStatus::NONE; } // Export the optimized network graph to a dot file if an output dump directory // has been specified in the drivers' arguments. std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet, options.GetRequestInputsAndOutputsDumpDir()); // Load it into the runtime. armnn::NetworkId netId = 0; std::string msg; armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(), MemorySource::Undefined, MemorySource::Undefined, options.IsGpuProfilingEnabled()); try { if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success) { return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, msg, cb); } } catch (std::exception& e) { std::stringstream message; message << "Exception (" << e.what() << ") caught from LoadNetwork."; FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb); return V1_3::ErrorStatus::NONE; } std::unique_ptr> preparedModel( new ArmnnPreparedModel_1_3(netId, runtime.get(), options.GetRequestInputsAndOutputsDumpDir(), options.IsGpuProfilingEnabled(), V1_3::Priority::MEDIUM, options.isAsyncModelExecutionEnabled(), options.getNoOfArmnnThreads(), options.isImportEnabled(), options.isExportEnabled(), true)); NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release()); ALOGV("ArmnnDriverImpl::prepareModelFromCache timing = %lld µs", std::chrono::duration_cast (std::chrono::system_clock::now() - modelFromCacheTimepoint).count()); return V1_3::ErrorStatus::NONE; } Return ArmnnDriverImpl::getCapabilities_1_3(const armnn::IRuntimePtr& runtime, V1_3::IDevice::getCapabilities_1_3_cb cb) { ALOGV("hal_1_3::ArmnnDriverImpl::getCapabilities()"); V1_3::Capabilities capabilities; float defaultValue = .1f; if (runtime) { capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime = ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue); capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage = ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue); capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime = ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue); capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage = ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue); capabilities.ifPerformance.execTime = ParseSystemProperty(g_ifPerformanceExecTime, defaultValue); capabilities.ifPerformance.powerUsage = ParseSystemProperty(g_ifPerformancePowerUsage, defaultValue); capabilities.whilePerformance.execTime = ParseSystemProperty(g_whilePerformanceExecTime, defaultValue); capabilities.whilePerformance.powerUsage = ParseSystemProperty(g_whilePerformancePowerUsage, defaultValue); // Set the base value for all operand types capabilities.operandPerformance = nonExtensionOperandPerformance({FLT_MAX, FLT_MAX}); // Load supported operand types update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT32, { .execTime = ParseSystemProperty(g_OperandTypeTensorFloat32PerformanceExecTime, defaultValue), .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat32PerformancePowerUsage, defaultValue) }); update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT32, { .execTime = ParseSystemProperty(g_OperandTypeFloat32PerformanceExecTime, defaultValue), .powerUsage = ParseSystemProperty(g_OperandTypeFloat32PerformancePowerUsage, defaultValue) }); update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT16, { .execTime = ParseSystemProperty(g_OperandTypeTensorFloat16PerformanceExecTime, defaultValue), .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat16PerformancePowerUsage, defaultValue) }); update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT16, { .execTime = ParseSystemProperty(g_OperandTypeFloat16PerformanceExecTime, defaultValue), .powerUsage = ParseSystemProperty(g_OperandTypeFloat16PerformancePowerUsage, defaultValue) }); update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM, { .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformanceExecTime, defaultValue), .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformancePowerUsage, defaultValue) }); update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM, { .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformanceExecTime, defaultValue), .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformancePowerUsage, defaultValue) }); update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM_SIGNED, { .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime, defaultValue), .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage, defaultValue) }); update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT16_SYMM, { .execTime = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformanceExecTime, defaultValue), .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformancePowerUsage, defaultValue) }); update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL, { .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime, defaultValue), .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage, defaultValue) }); update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_INT32, { .execTime = ParseSystemProperty(g_OperandTypeTensorInt32PerformanceExecTime, defaultValue), .powerUsage = ParseSystemProperty(g_OperandTypeTensorInt32PerformancePowerUsage, defaultValue) }); update(&capabilities.operandPerformance, V1_3::OperandType::INT32, { .execTime = ParseSystemProperty(g_OperandTypeInt32PerformanceExecTime, defaultValue), .powerUsage = ParseSystemProperty(g_OperandTypeInt32PerformancePowerUsage, defaultValue) }); cb(V1_3::ErrorStatus::NONE, capabilities); } else { capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime = 0; capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage = 0; capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime = 0; capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage = 0; capabilities.ifPerformance.execTime = 0; capabilities.ifPerformance.powerUsage = 0; capabilities.whilePerformance.execTime = 0; capabilities.whilePerformance.powerUsage = 0; // Set the base value for all operand types capabilities.operandPerformance = nonExtensionOperandPerformance({0.f, 0.0f}); cb(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, capabilities); } return Void(); } } // namespace hal_1_3 } // namespace armnn_driver