diff options
Diffstat (limited to 'shim/sl/canonical/ArmnnDriverImpl.cpp')
-rw-r--r-- | shim/sl/canonical/ArmnnDriverImpl.cpp | 561 |
1 files changed, 561 insertions, 0 deletions
diff --git a/shim/sl/canonical/ArmnnDriverImpl.cpp b/shim/sl/canonical/ArmnnDriverImpl.cpp new file mode 100644 index 0000000000..3223d9e8bf --- /dev/null +++ b/shim/sl/canonical/ArmnnDriverImpl.cpp @@ -0,0 +1,561 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ArmnnDriverImpl.hpp" +#include "ArmnnPreparedModel.hpp" +#include "CacheDataHandler.hpp" +#include "ModelToINetworkTransformer.hpp" +#include "SystemPropertiesUtils.hpp" + +#include <armnnDeserializer/IDeserializer.hpp> + +#include <log/log.h> +#include <sys/stat.h> + +namespace +{ + +Capabilities GenerateCapabilities() +{ + VLOG(DRIVER) << "ArmnnDriverImpl::GenerateCapabilities()"; + + float defaultPerfValue = .1f; + const Capabilities::PerformanceInfo defaultPerfInfo = { /* execTime */ defaultPerfValue, + /* powerUsage */ defaultPerfValue + }; + std::vector<OperandType> operandsTypes({ + OperandType::FLOAT32, + OperandType::INT32, + OperandType::UINT32, + OperandType::TENSOR_FLOAT32, + OperandType::TENSOR_INT32, + OperandType::TENSOR_QUANT8_ASYMM, + OperandType::BOOL, + OperandType::TENSOR_QUANT16_SYMM, + OperandType::TENSOR_FLOAT16, + OperandType::TENSOR_BOOL8, + OperandType::FLOAT16, + OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL, + OperandType::TENSOR_QUANT16_ASYMM, + OperandType::TENSOR_QUANT8_SYMM, + OperandType::TENSOR_QUANT8_ASYMM_SIGNED, + }); + + std::vector<Capabilities::OperandPerformance> operandPerformances; + operandPerformances.reserve(operandsTypes.size()); + + for (auto opType : operandsTypes) + { + operandPerformances.push_back( + Capabilities::OperandPerformance{ /* type */ opType, /* info */ defaultPerfInfo }); + } + + auto operandPerformanceTable = + Capabilities::OperandPerformanceTable::create(std::move(operandPerformances)).value(); + + return { /* relaxedFloat32toFloat16PerformanceScalar */ defaultPerfInfo, + /* relaxedFloat32toFloat16PerformanceTensor */ defaultPerfInfo, + /* operandPerformance */ std::move(operandPerformanceTable), + /* ifPerformance */ defaultPerfInfo, + /* whilePerformance */ defaultPerfInfo }; +} + +} // anonymous namespace + +using namespace android::nn; + +namespace armnn_driver +{ + +bool ArmnnDriverImpl::ValidateSharedHandle(const SharedHandle& sharedHandle) +{ + bool valid = true; + + if (*sharedHandle < 0) + { + return !valid; + } + + int dataCacheFileAccessMode = fcntl(*sharedHandle, F_GETFL) & O_ACCMODE; + if (dataCacheFileAccessMode != O_RDWR) + { + return !valid; + } + + return valid; +} + +bool ArmnnDriverImpl::ValidateDataCacheHandle(const std::vector<SharedHandle>& dataCacheHandle, const size_t dataSize) +{ + bool valid = true; + // DataCacheHandle size should always be 1 for ArmNN model + if (dataCacheHandle.size() != 1) + { + return !valid; + } + + if (dataSize == 0) + { + return !valid; + } + + struct stat statBuffer; + if (fstat(*dataCacheHandle[0], &statBuffer) == 0) + { + unsigned long bufferSize = statBuffer.st_size; + if (bufferSize != dataSize) + { + return !valid; + } + } + + return ValidateSharedHandle(dataCacheHandle[0]); +} + +std::vector<armnn::NetworkId>& ArmnnDriverImpl::GetLoadedNetworks() +{ + return m_NetworkIDs; +} + +GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModel( + const armnn::IRuntimePtr& runtime, + const armnn::IGpuAccTunedParametersPtr& clTunedParameters, + const DriverOptions& options, + const Model& model, + const std::vector<SharedHandle>& modelCacheHandle, + const std::vector<SharedHandle>& dataCacheHandle, + const CacheToken& token, + bool float32ToFloat16, + Priority priority) +{ + VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModel()"; + + if (!runtime) + { + return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE) << "Device unavailable"; + } + + if (const auto result = validate(model); !result.ok()) + { + return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << "Invalid model passed as input"; + } + + // Deliberately ignore any unsupported operations requested by the options - + // at this point we're being asked to prepare a model that we've already declared support for + // and the operation indices may be different to those in getSupportedOperations anyway. + std::set<unsigned int> unsupportedOperations; + ModelToINetworkTransformer modelConverter(options.GetBackends(), + model, + unsupportedOperations); + + if (modelConverter.GetConversionResult() != ConversionResult::Success) + { + return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "ModelToINetworkConverter failed"; + } + + // Serialize the network graph to a .armnn file if an output directory + // has been specified in the drivers' arguments. + std::vector<uint8_t> dataCacheData; + bool serializeToFile = dataCacheHandle.size() < 1 ? false : true; + auto serializedNetworkFileName = + SerializeNetwork(*modelConverter.GetINetwork(), + options.GetRequestInputsAndOutputsDumpDir(), + dataCacheData, + serializeToFile); + + // Optimize the network + armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr); + armnn::OptimizerOptions OptOptions; + OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16; + OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled(); + + int cachedFd = -1; + bool saveCachedNetwork = options.SaveCachedNetwork(); + + unsigned int numberOfCachedModelFiles = 0; + if (modelCacheHandle.size() > 0) + { + unsigned int index = 0; + for (auto& backend : options.GetBackends()) + { + // modelCacheHandle size should be equal to numberOfCachedModelFiles + // modelCacheHandle vector should be in same order as backends + auto numberOfCacheFiles = GetNumberOfCacheFiles(backend); + if (numberOfCacheFiles > 0) + { + numberOfCachedModelFiles += numberOfCacheFiles; + // For GpuAcc numberOfCachedFiles is 1 + if (backend == armnn::Compute::GpuAcc) + { + cachedFd = *modelCacheHandle[index]; + saveCachedNetwork = true; + } + index += numberOfCachedModelFiles; + } + } + } + + armnn::BackendOptions gpuAcc("GpuAcc", + { + { "FastMathEnabled", options.IsFastMathEnabled() }, + { "SaveCachedNetwork", saveCachedNetwork }, + { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() }, + { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() }, + { "CachedFileDescriptor", cachedFd } + }); + + armnn::BackendOptions cpuAcc("CpuAcc", + { + { "FastMathEnabled", options.IsFastMathEnabled() }, + { "NumberOfThreads", options.GetNumberOfThreads() } + }); + OptOptions.m_ModelOptions.push_back(gpuAcc); + OptOptions.m_ModelOptions.push_back(cpuAcc); + + std::vector<std::string> errMessages; + try + { + optNet = armnn::Optimize(*modelConverter.GetINetwork(), + options.GetBackends(), + runtime->GetDeviceSpec(), + OptOptions, + errMessages); + } + catch (std::exception& e) + { + return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what(); + } + + // Check that the optimized network is valid. + if (!optNet) + { + std::stringstream message; + message << "Invalid optimized network"; + for (const std::string& msg : errMessages) + { + message << "\n" << msg; + } + return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str(); + } + + // Export the optimized network graph to a dot file if an output dump directory + // has been specified in the drivers' arguments. + std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet, + options.GetRequestInputsAndOutputsDumpDir()); + + // Load it into the runtime. + armnn::NetworkId netId = 0; + std::string msg; + armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(), + MemorySource::Undefined, + MemorySource::Undefined, + options.IsGpuProfilingEnabled()); + auto numInputs = getMainModel(model).inputIndexes.size(); + auto numOutputs = getMainModel(model).outputIndexes.size(); + try + { + if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success) + { + return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded"; + } + } + catch (std::exception& e) + { + std::stringstream message; + message << "Exception (" << e.what()<< ") caught from LoadNetwork."; + return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str(); + } + + // Now that we have a networkId for the graph rename the exported files to use it + // so that we can associate the graph file and the input/output tensor exported files + RenameExportedFiles(serializedNetworkFileName, + dotGraphFileName, + options.GetRequestInputsAndOutputsDumpDir(), + netId); + + // Cache the model + size_t hashValue = 0; + if (dataCacheHandle.size() == 1 ) + { + write(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size()); + hashValue = CacheDataHandlerInstance().Hash(dataCacheData); + } + + // Cache the model data + if (modelCacheHandle.size() > 0) + { + if (modelCacheHandle.size() == numberOfCachedModelFiles) + { + for (uint32_t i = 0; i < modelCacheHandle.size(); ++i) + { + int modelCacheFileAccessMode = fcntl(*modelCacheHandle[i], F_GETFL) & O_ACCMODE; + if (modelCacheFileAccessMode != O_RDONLY) + { + struct stat statBuffer; + if (fstat(*modelCacheHandle[i], &statBuffer) == 0) + { + long modelDataSize = statBuffer.st_size; + if (modelDataSize > 0) + { + std::vector<uint8_t> modelData(modelDataSize); + pread(*modelCacheHandle[i], modelData.data(), modelData.size(), 0); + hashValue ^= CacheDataHandlerInstance().Hash(modelData); + } + } + } + } + } + } + if (hashValue != 0) + { + CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size()); + } + + bool executeWithDummyInputs = (std::find(options.GetBackends().begin(), + options.GetBackends().end(), + armnn::Compute::GpuAcc) != options.GetBackends().end()); + + m_NetworkIDs.push_back(netId); + auto preparedModel = std::make_shared<const ArmnnPreparedModel>(netId, + runtime.get(), + model, + options.GetRequestInputsAndOutputsDumpDir(), + options.IsGpuProfilingEnabled(), + priority); + + // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if + // this is enabled) before the first 'real' inference which removes the overhead of the first inference. + // Only run this if the GpuAcc backend has been added to options + if (std::find(options.GetBackends().begin(), + options.GetBackends().end(), + armnn::Compute::GpuAcc) != options.GetBackends().end()) + { + if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs)) + { + return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be executed"; + } + + if (clTunedParameters && + options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters) + { + // Now that we've done one inference the CL kernel parameters will have been tuned, + // so save the updated file. + try + { + clTunedParameters->Save(options.GetClTunedParametersFile().c_str()); + } + catch (std::exception& error) + { + VLOG(DRIVER) << "ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file" + << options.GetClTunedParametersFile().c_str() << error.what(); + } + } + } + return std::move(preparedModel); +} + +std::vector<armnn::NetworkId> ArmnnDriverImpl::m_NetworkIDs = {}; + +GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModelFromCache( + const armnn::IRuntimePtr& runtime, + const armnn::IGpuAccTunedParametersPtr& clTunedParameters, + const DriverOptions& options, + const std::vector<SharedHandle>& modelCacheHandle, + const std::vector<SharedHandle>& dataCacheHandle, + const CacheToken& token, + bool float32ToFloat16) +{ + VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModelFromCache()"; + + if (!runtime) + { + return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE) + << "ArmnnDriverImpl::prepareModelFromCache(): Device unavailable"; + } + + if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN) + { + return NN_ERROR(ErrorStatus::GENERAL_FAILURE) + << "ArmnnDriverImpl::prepareModelFromCache(): Token size does not match!"; + } + + // Validate dataCacheHandle + auto dataSize = CacheDataHandlerInstance().GetCacheSize(token); + if (!ValidateDataCacheHandle(dataCacheHandle, dataSize)) + { + return NN_ERROR(ErrorStatus::GENERAL_FAILURE) + << "ArmnnDriverImpl::prepareModelFromCache(): Not valid data cache handle!"; + } + + // Check if model files cached they match the expected value + unsigned int numberOfCachedModelFiles = 0; + for (auto& backend : options.GetBackends()) + { + numberOfCachedModelFiles += GetNumberOfCacheFiles(backend); + } + if (modelCacheHandle.size() != numberOfCachedModelFiles) + { + return NN_ERROR(ErrorStatus::GENERAL_FAILURE) + << "ArmnnDriverImpl::prepareModelFromCache(): Model cache handle size does not match."; + } + + // Read the model + std::vector<uint8_t> dataCacheData(dataSize); + pread(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size(), 0); + auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData); + + int gpuAccCachedFd = -1; + if (modelCacheHandle.size() > 0) + { + unsigned int index = 0; + for (auto& backend : options.GetBackends()) + { + // modelCacheHandle size should be equal to numberOfCachedModelFiles + // modelCacheHandle vector should be in same order as backends + auto numberOfCacheFiles = GetNumberOfCacheFiles(backend); + if (numberOfCacheFiles > 0) + { + if (!ValidateSharedHandle(modelCacheHandle[index])) + { + return NN_ERROR(ErrorStatus::GENERAL_FAILURE) + << "ArmnnDriverImpl::prepareModelFromCache(): Invalid model cache handle!"; + } + int cachedFd = *modelCacheHandle[index]; + struct stat statBuffer; + if (fstat(cachedFd, &statBuffer) == 0) + { + long modelDataSize = statBuffer.st_size; + if (modelDataSize > 0) + { + std::vector<uint8_t> modelData(modelDataSize); + pread(cachedFd, modelData.data(), modelData.size(), 0); + hashValue ^= CacheDataHandlerInstance().Hash(modelData); + + if (backend == armnn::Compute::GpuAcc) + { + gpuAccCachedFd = cachedFd; + } + } + } + index += numberOfCacheFiles; + } + } + } + + if (!CacheDataHandlerInstance().Validate(token, hashValue, dataCacheData.size())) + { + return NN_ERROR(ErrorStatus::GENERAL_FAILURE) + << "ArmnnDriverImpl::prepareModelFromCache(): ValidateHash() failed!"; + } + + // Deserialize the network.. + armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){}); + try + { + network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData); + } + catch (std::exception&) + { + return NN_ERROR(ErrorStatus::GENERAL_FAILURE) + << "ArmnnDriverImpl::prepareModelFromCache(): Exception caught from Deserializer!"; + } + + // Optimize the network + armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr); + armnn::OptimizerOptions OptOptions; + OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16; + OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled(); + + armnn::BackendOptions gpuAcc("GpuAcc", + { + { "FastMathEnabled", options.IsFastMathEnabled() }, + { "SaveCachedNetwork", false }, + { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() }, + { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() }, + { "CachedFileDescriptor", gpuAccCachedFd } + }); + + armnn::BackendOptions cpuAcc("CpuAcc", + { + { "FastMathEnabled", options.IsFastMathEnabled() }, + { "NumberOfThreads", options.GetNumberOfThreads() } + }); + OptOptions.m_ModelOptions.push_back(gpuAcc); + OptOptions.m_ModelOptions.push_back(cpuAcc); + + std::vector<std::string> errMessages; + try + { + optNet = armnn::Optimize(*network.get(), + options.GetBackends(), + runtime->GetDeviceSpec(), + OptOptions, + errMessages); + } + catch (std::exception& e) + { + return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what(); + } + + // Check that the optimized network is valid. + if (!optNet) + { + std::stringstream message; + message << "Invalid optimized network"; + for (const std::string& msg : errMessages) + { + message << "\n" << msg; + } + return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str(); + } + + // Export the optimized network graph to a dot file if an output dump directory + // has been specified in the drivers' arguments. + std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet, + options.GetRequestInputsAndOutputsDumpDir()); + + // Load it into the runtime. + armnn::NetworkId netId = 0; + std::string msg; + armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(), + MemorySource::Undefined, + MemorySource::Undefined, + options.IsGpuProfilingEnabled()); + try + { + if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success) + { + return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded"; + } + } + catch (std::exception& e) + { + std::stringstream message; + message << "Exception (" << e.what()<< ") caught from LoadNetwork."; + return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str(); + } + + m_NetworkIDs.push_back(netId); + return std::make_shared<const ArmnnPreparedModel>(netId, + runtime.get(), + options.GetRequestInputsAndOutputsDumpDir(), + options.IsGpuProfilingEnabled(), + Priority::MEDIUM, + true); +} + +const Capabilities& ArmnnDriverImpl::GetCapabilities(const armnn::IRuntimePtr& runtime) +{ + VLOG(DRIVER) << "ArmnnDriverImpl::GetCapabilities()"; + static const Capabilities theCapabilities = GenerateCapabilities(); + return theCapabilities; +} + +void ArmnnDriverImpl::ClearNetworks() +{ + m_NetworkIDs.clear(); +} + +} // namespace armnn_driver |