aboutsummaryrefslogtreecommitdiff
path: root/shim/sl/canonical/ArmnnDriverImpl.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'shim/sl/canonical/ArmnnDriverImpl.cpp')
-rw-r--r--shim/sl/canonical/ArmnnDriverImpl.cpp561
1 files changed, 561 insertions, 0 deletions
diff --git a/shim/sl/canonical/ArmnnDriverImpl.cpp b/shim/sl/canonical/ArmnnDriverImpl.cpp
new file mode 100644
index 0000000000..3223d9e8bf
--- /dev/null
+++ b/shim/sl/canonical/ArmnnDriverImpl.cpp
@@ -0,0 +1,561 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ArmnnDriverImpl.hpp"
+#include "ArmnnPreparedModel.hpp"
+#include "CacheDataHandler.hpp"
+#include "ModelToINetworkTransformer.hpp"
+#include "SystemPropertiesUtils.hpp"
+
+#include <armnnDeserializer/IDeserializer.hpp>
+
+#include <log/log.h>
+#include <sys/stat.h>
+
+namespace
+{
+
+Capabilities GenerateCapabilities()
+{
+ VLOG(DRIVER) << "ArmnnDriverImpl::GenerateCapabilities()";
+
+ float defaultPerfValue = .1f;
+ const Capabilities::PerformanceInfo defaultPerfInfo = { /* execTime */ defaultPerfValue,
+ /* powerUsage */ defaultPerfValue
+ };
+ std::vector<OperandType> operandsTypes({
+ OperandType::FLOAT32,
+ OperandType::INT32,
+ OperandType::UINT32,
+ OperandType::TENSOR_FLOAT32,
+ OperandType::TENSOR_INT32,
+ OperandType::TENSOR_QUANT8_ASYMM,
+ OperandType::BOOL,
+ OperandType::TENSOR_QUANT16_SYMM,
+ OperandType::TENSOR_FLOAT16,
+ OperandType::TENSOR_BOOL8,
+ OperandType::FLOAT16,
+ OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
+ OperandType::TENSOR_QUANT16_ASYMM,
+ OperandType::TENSOR_QUANT8_SYMM,
+ OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
+ });
+
+ std::vector<Capabilities::OperandPerformance> operandPerformances;
+ operandPerformances.reserve(operandsTypes.size());
+
+ for (auto opType : operandsTypes)
+ {
+ operandPerformances.push_back(
+ Capabilities::OperandPerformance{ /* type */ opType, /* info */ defaultPerfInfo });
+ }
+
+ auto operandPerformanceTable =
+ Capabilities::OperandPerformanceTable::create(std::move(operandPerformances)).value();
+
+ return { /* relaxedFloat32toFloat16PerformanceScalar */ defaultPerfInfo,
+ /* relaxedFloat32toFloat16PerformanceTensor */ defaultPerfInfo,
+ /* operandPerformance */ std::move(operandPerformanceTable),
+ /* ifPerformance */ defaultPerfInfo,
+ /* whilePerformance */ defaultPerfInfo };
+}
+
+} // anonymous namespace
+
+using namespace android::nn;
+
+namespace armnn_driver
+{
+
+bool ArmnnDriverImpl::ValidateSharedHandle(const SharedHandle& sharedHandle)
+{
+ bool valid = true;
+
+ if (*sharedHandle < 0)
+ {
+ return !valid;
+ }
+
+ int dataCacheFileAccessMode = fcntl(*sharedHandle, F_GETFL) & O_ACCMODE;
+ if (dataCacheFileAccessMode != O_RDWR)
+ {
+ return !valid;
+ }
+
+ return valid;
+}
+
+bool ArmnnDriverImpl::ValidateDataCacheHandle(const std::vector<SharedHandle>& dataCacheHandle, const size_t dataSize)
+{
+ bool valid = true;
+ // DataCacheHandle size should always be 1 for ArmNN model
+ if (dataCacheHandle.size() != 1)
+ {
+ return !valid;
+ }
+
+ if (dataSize == 0)
+ {
+ return !valid;
+ }
+
+ struct stat statBuffer;
+ if (fstat(*dataCacheHandle[0], &statBuffer) == 0)
+ {
+ unsigned long bufferSize = statBuffer.st_size;
+ if (bufferSize != dataSize)
+ {
+ return !valid;
+ }
+ }
+
+ return ValidateSharedHandle(dataCacheHandle[0]);
+}
+
+std::vector<armnn::NetworkId>& ArmnnDriverImpl::GetLoadedNetworks()
+{
+ return m_NetworkIDs;
+}
+
+GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModel(
+ const armnn::IRuntimePtr& runtime,
+ const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
+ const DriverOptions& options,
+ const Model& model,
+ const std::vector<SharedHandle>& modelCacheHandle,
+ const std::vector<SharedHandle>& dataCacheHandle,
+ const CacheToken& token,
+ bool float32ToFloat16,
+ Priority priority)
+{
+ VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModel()";
+
+ if (!runtime)
+ {
+ return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE) << "Device unavailable";
+ }
+
+ if (const auto result = validate(model); !result.ok())
+ {
+ return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << "Invalid model passed as input";
+ }
+
+ // Deliberately ignore any unsupported operations requested by the options -
+ // at this point we're being asked to prepare a model that we've already declared support for
+ // and the operation indices may be different to those in getSupportedOperations anyway.
+ std::set<unsigned int> unsupportedOperations;
+ ModelToINetworkTransformer modelConverter(options.GetBackends(),
+ model,
+ unsupportedOperations);
+
+ if (modelConverter.GetConversionResult() != ConversionResult::Success)
+ {
+ return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "ModelToINetworkConverter failed";
+ }
+
+ // Serialize the network graph to a .armnn file if an output directory
+ // has been specified in the drivers' arguments.
+ std::vector<uint8_t> dataCacheData;
+ bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
+ auto serializedNetworkFileName =
+ SerializeNetwork(*modelConverter.GetINetwork(),
+ options.GetRequestInputsAndOutputsDumpDir(),
+ dataCacheData,
+ serializeToFile);
+
+ // Optimize the network
+ armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
+ armnn::OptimizerOptions OptOptions;
+ OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
+ OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
+
+ int cachedFd = -1;
+ bool saveCachedNetwork = options.SaveCachedNetwork();
+
+ unsigned int numberOfCachedModelFiles = 0;
+ if (modelCacheHandle.size() > 0)
+ {
+ unsigned int index = 0;
+ for (auto& backend : options.GetBackends())
+ {
+ // modelCacheHandle size should be equal to numberOfCachedModelFiles
+ // modelCacheHandle vector should be in same order as backends
+ auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
+ if (numberOfCacheFiles > 0)
+ {
+ numberOfCachedModelFiles += numberOfCacheFiles;
+ // For GpuAcc numberOfCachedFiles is 1
+ if (backend == armnn::Compute::GpuAcc)
+ {
+ cachedFd = *modelCacheHandle[index];
+ saveCachedNetwork = true;
+ }
+ index += numberOfCachedModelFiles;
+ }
+ }
+ }
+
+ armnn::BackendOptions gpuAcc("GpuAcc",
+ {
+ { "FastMathEnabled", options.IsFastMathEnabled() },
+ { "SaveCachedNetwork", saveCachedNetwork },
+ { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
+ { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
+ { "CachedFileDescriptor", cachedFd }
+ });
+
+ armnn::BackendOptions cpuAcc("CpuAcc",
+ {
+ { "FastMathEnabled", options.IsFastMathEnabled() },
+ { "NumberOfThreads", options.GetNumberOfThreads() }
+ });
+ OptOptions.m_ModelOptions.push_back(gpuAcc);
+ OptOptions.m_ModelOptions.push_back(cpuAcc);
+
+ std::vector<std::string> errMessages;
+ try
+ {
+ optNet = armnn::Optimize(*modelConverter.GetINetwork(),
+ options.GetBackends(),
+ runtime->GetDeviceSpec(),
+ OptOptions,
+ errMessages);
+ }
+ catch (std::exception& e)
+ {
+ return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();
+ }
+
+ // Check that the optimized network is valid.
+ if (!optNet)
+ {
+ std::stringstream message;
+ message << "Invalid optimized network";
+ for (const std::string& msg : errMessages)
+ {
+ message << "\n" << msg;
+ }
+ return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
+ }
+
+ // Export the optimized network graph to a dot file if an output dump directory
+ // has been specified in the drivers' arguments.
+ std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
+ options.GetRequestInputsAndOutputsDumpDir());
+
+ // Load it into the runtime.
+ armnn::NetworkId netId = 0;
+ std::string msg;
+ armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
+ MemorySource::Undefined,
+ MemorySource::Undefined,
+ options.IsGpuProfilingEnabled());
+ auto numInputs = getMainModel(model).inputIndexes.size();
+ auto numOutputs = getMainModel(model).outputIndexes.size();
+ try
+ {
+ if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
+ {
+ return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";
+ }
+ }
+ catch (std::exception& e)
+ {
+ std::stringstream message;
+ message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
+ return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
+ }
+
+ // Now that we have a networkId for the graph rename the exported files to use it
+ // so that we can associate the graph file and the input/output tensor exported files
+ RenameExportedFiles(serializedNetworkFileName,
+ dotGraphFileName,
+ options.GetRequestInputsAndOutputsDumpDir(),
+ netId);
+
+ // Cache the model
+ size_t hashValue = 0;
+ if (dataCacheHandle.size() == 1 )
+ {
+ write(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size());
+ hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
+ }
+
+ // Cache the model data
+ if (modelCacheHandle.size() > 0)
+ {
+ if (modelCacheHandle.size() == numberOfCachedModelFiles)
+ {
+ for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
+ {
+ int modelCacheFileAccessMode = fcntl(*modelCacheHandle[i], F_GETFL) & O_ACCMODE;
+ if (modelCacheFileAccessMode != O_RDONLY)
+ {
+ struct stat statBuffer;
+ if (fstat(*modelCacheHandle[i], &statBuffer) == 0)
+ {
+ long modelDataSize = statBuffer.st_size;
+ if (modelDataSize > 0)
+ {
+ std::vector<uint8_t> modelData(modelDataSize);
+ pread(*modelCacheHandle[i], modelData.data(), modelData.size(), 0);
+ hashValue ^= CacheDataHandlerInstance().Hash(modelData);
+ }
+ }
+ }
+ }
+ }
+ }
+ if (hashValue != 0)
+ {
+ CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size());
+ }
+
+ bool executeWithDummyInputs = (std::find(options.GetBackends().begin(),
+ options.GetBackends().end(),
+ armnn::Compute::GpuAcc) != options.GetBackends().end());
+
+ m_NetworkIDs.push_back(netId);
+ auto preparedModel = std::make_shared<const ArmnnPreparedModel>(netId,
+ runtime.get(),
+ model,
+ options.GetRequestInputsAndOutputsDumpDir(),
+ options.IsGpuProfilingEnabled(),
+ priority);
+
+ // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
+ // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
+ // Only run this if the GpuAcc backend has been added to options
+ if (std::find(options.GetBackends().begin(),
+ options.GetBackends().end(),
+ armnn::Compute::GpuAcc) != options.GetBackends().end())
+ {
+ if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
+ {
+ return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be executed";
+ }
+
+ if (clTunedParameters &&
+ options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
+ {
+ // Now that we've done one inference the CL kernel parameters will have been tuned,
+ // so save the updated file.
+ try
+ {
+ clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+ }
+ catch (std::exception& error)
+ {
+ VLOG(DRIVER) << "ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file"
+ << options.GetClTunedParametersFile().c_str() << error.what();
+ }
+ }
+ }
+ return std::move(preparedModel);
+}
+
+std::vector<armnn::NetworkId> ArmnnDriverImpl::m_NetworkIDs = {};
+
+GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModelFromCache(
+ const armnn::IRuntimePtr& runtime,
+ const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
+ const DriverOptions& options,
+ const std::vector<SharedHandle>& modelCacheHandle,
+ const std::vector<SharedHandle>& dataCacheHandle,
+ const CacheToken& token,
+ bool float32ToFloat16)
+{
+ VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModelFromCache()";
+
+ if (!runtime)
+ {
+ return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE)
+ << "ArmnnDriverImpl::prepareModelFromCache(): Device unavailable";
+ }
+
+ if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
+ {
+ return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
+ << "ArmnnDriverImpl::prepareModelFromCache(): Token size does not match!";
+ }
+
+ // Validate dataCacheHandle
+ auto dataSize = CacheDataHandlerInstance().GetCacheSize(token);
+ if (!ValidateDataCacheHandle(dataCacheHandle, dataSize))
+ {
+ return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
+ << "ArmnnDriverImpl::prepareModelFromCache(): Not valid data cache handle!";
+ }
+
+ // Check if model files cached they match the expected value
+ unsigned int numberOfCachedModelFiles = 0;
+ for (auto& backend : options.GetBackends())
+ {
+ numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
+ }
+ if (modelCacheHandle.size() != numberOfCachedModelFiles)
+ {
+ return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
+ << "ArmnnDriverImpl::prepareModelFromCache(): Model cache handle size does not match.";
+ }
+
+ // Read the model
+ std::vector<uint8_t> dataCacheData(dataSize);
+ pread(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size(), 0);
+ auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
+
+ int gpuAccCachedFd = -1;
+ if (modelCacheHandle.size() > 0)
+ {
+ unsigned int index = 0;
+ for (auto& backend : options.GetBackends())
+ {
+ // modelCacheHandle size should be equal to numberOfCachedModelFiles
+ // modelCacheHandle vector should be in same order as backends
+ auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
+ if (numberOfCacheFiles > 0)
+ {
+ if (!ValidateSharedHandle(modelCacheHandle[index]))
+ {
+ return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
+ << "ArmnnDriverImpl::prepareModelFromCache(): Invalid model cache handle!";
+ }
+ int cachedFd = *modelCacheHandle[index];
+ struct stat statBuffer;
+ if (fstat(cachedFd, &statBuffer) == 0)
+ {
+ long modelDataSize = statBuffer.st_size;
+ if (modelDataSize > 0)
+ {
+ std::vector<uint8_t> modelData(modelDataSize);
+ pread(cachedFd, modelData.data(), modelData.size(), 0);
+ hashValue ^= CacheDataHandlerInstance().Hash(modelData);
+
+ if (backend == armnn::Compute::GpuAcc)
+ {
+ gpuAccCachedFd = cachedFd;
+ }
+ }
+ }
+ index += numberOfCacheFiles;
+ }
+ }
+ }
+
+ if (!CacheDataHandlerInstance().Validate(token, hashValue, dataCacheData.size()))
+ {
+ return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
+ << "ArmnnDriverImpl::prepareModelFromCache(): ValidateHash() failed!";
+ }
+
+ // Deserialize the network..
+ armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){});
+ try
+ {
+ network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
+ }
+ catch (std::exception&)
+ {
+ return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
+ << "ArmnnDriverImpl::prepareModelFromCache(): Exception caught from Deserializer!";
+ }
+
+ // Optimize the network
+ armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
+ armnn::OptimizerOptions OptOptions;
+ OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
+ OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
+
+ armnn::BackendOptions gpuAcc("GpuAcc",
+ {
+ { "FastMathEnabled", options.IsFastMathEnabled() },
+ { "SaveCachedNetwork", false },
+ { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
+ { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
+ { "CachedFileDescriptor", gpuAccCachedFd }
+ });
+
+ armnn::BackendOptions cpuAcc("CpuAcc",
+ {
+ { "FastMathEnabled", options.IsFastMathEnabled() },
+ { "NumberOfThreads", options.GetNumberOfThreads() }
+ });
+ OptOptions.m_ModelOptions.push_back(gpuAcc);
+ OptOptions.m_ModelOptions.push_back(cpuAcc);
+
+ std::vector<std::string> errMessages;
+ try
+ {
+ optNet = armnn::Optimize(*network.get(),
+ options.GetBackends(),
+ runtime->GetDeviceSpec(),
+ OptOptions,
+ errMessages);
+ }
+ catch (std::exception& e)
+ {
+ return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();
+ }
+
+ // Check that the optimized network is valid.
+ if (!optNet)
+ {
+ std::stringstream message;
+ message << "Invalid optimized network";
+ for (const std::string& msg : errMessages)
+ {
+ message << "\n" << msg;
+ }
+ return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
+ }
+
+ // Export the optimized network graph to a dot file if an output dump directory
+ // has been specified in the drivers' arguments.
+ std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
+ options.GetRequestInputsAndOutputsDumpDir());
+
+ // Load it into the runtime.
+ armnn::NetworkId netId = 0;
+ std::string msg;
+ armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
+ MemorySource::Undefined,
+ MemorySource::Undefined,
+ options.IsGpuProfilingEnabled());
+ try
+ {
+ if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
+ {
+ return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";
+ }
+ }
+ catch (std::exception& e)
+ {
+ std::stringstream message;
+ message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
+ return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
+ }
+
+ m_NetworkIDs.push_back(netId);
+ return std::make_shared<const ArmnnPreparedModel>(netId,
+ runtime.get(),
+ options.GetRequestInputsAndOutputsDumpDir(),
+ options.IsGpuProfilingEnabled(),
+ Priority::MEDIUM,
+ true);
+}
+
+const Capabilities& ArmnnDriverImpl::GetCapabilities(const armnn::IRuntimePtr& runtime)
+{
+ VLOG(DRIVER) << "ArmnnDriverImpl::GetCapabilities()";
+ static const Capabilities theCapabilities = GenerateCapabilities();
+ return theCapabilities;
+}
+
+void ArmnnDriverImpl::ClearNetworks()
+{
+ m_NetworkIDs.clear();
+}
+
+} // namespace armnn_driver