patch/23.02/_armnn_driver_impl_8cpp_source.xhtml

 //
 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //

 #include "ArmnnDriverImpl.hpp"
 #include "ArmnnPreparedModel.hpp"
 #include "ModelToINetworkTransformer.hpp"
 #include "SystemPropertiesUtils.hpp"

 #include <armnnDeserializer/IDeserializer.hpp>

 #include <log/log.h>
 #include <sys/stat.h>

 namespace
 {

 Capabilities GenerateCapabilities()
 {
     VLOG(DRIVER) << "ArmnnDriverImpl::GenerateCapabilities()";

     float defaultPerfValue = .1f;
     const Capabilities::PerformanceInfo defaultPerfInfo = { /* execTime */ defaultPerfValue,
                                                             /* powerUsage */ defaultPerfValue
                                                           };
     std::vector<OperandType> operandsTypes({
                 OperandType::FLOAT32,
                 OperandType::INT32,
                 OperandType::UINT32,
                 OperandType::TENSOR_FLOAT32,
                 OperandType::TENSOR_INT32,
                 OperandType::TENSOR_QUANT8_ASYMM,
                 OperandType::BOOL,
                 OperandType::TENSOR_QUANT16_SYMM,
                 OperandType::TENSOR_FLOAT16,
                 OperandType::TENSOR_BOOL8,
                 OperandType::FLOAT16,
                 OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
                 OperandType::TENSOR_QUANT16_ASYMM,
                 OperandType::TENSOR_QUANT8_SYMM,
                 OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
     });

     std::vector<Capabilities::OperandPerformance> operandPerformances;
     operandPerformances.reserve(operandsTypes.size());

     for (auto opType : operandsTypes)
     {
         operandPerformances.push_back(
                 Capabilities::OperandPerformance{ /* type */ opType, /* info */ defaultPerfInfo });
     }

     auto operandPerformanceTable =
                Capabilities::OperandPerformanceTable::create(std::move(operandPerformances)).value();

     return { /* relaxedFloat32toFloat16PerformanceScalar */ defaultPerfInfo,
              /* relaxedFloat32toFloat16PerformanceTensor */ defaultPerfInfo,
              /* operandPerformance */ std::move(operandPerformanceTable),
              /* ifPerformance */ defaultPerfInfo,
              /* whilePerformance */ defaultPerfInfo };
 }

 size_t Hash(std::vector<uint8_t>& cacheData)
 {
     std::size_t hash = cacheData.size();
     for (auto& i : cacheData)
     {
         hash = ((hash << 5) - hash) + i;
     }
     return hash;
 }

 } // anonymous namespace

 using namespace android::nn;

 namespace armnn_driver
 {

 bool ArmnnDriverImpl::ValidateSharedHandle(const SharedHandle& sharedHandle)
 {
     bool valid = true;

     if (*sharedHandle < 0)
     {
         return !valid;
     }

     int dataCacheFileAccessMode = fcntl(*sharedHandle, F_GETFL) & O_ACCMODE;
     if (dataCacheFileAccessMode != O_RDWR)
     {
         return !valid;
     }

     return valid;
 }

 GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModel(
     const armnn::IRuntimePtr& runtime,
     const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
     const DriverOptions& options,
     const Model& model,
     const std::vector<SharedHandle>& modelCacheHandle,
     const std::vector<SharedHandle>& dataCacheHandle,
     const CacheToken& token,
     bool float32ToFloat16,
     Priority priority)
 {
     VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModel()";

     if (!runtime)
     {
         return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE) << "Device unavailable";
     }

     if (const auto result = validate(model); !result.ok())
     {
         return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << "Invalid model passed as input";
     }

     // Deliberately ignore any unsupported operations requested by the options -
     // at this point we're being asked to prepare a model that we've already declared support for
     // and the operation indices may be different to those in getSupportedOperations anyway.
     std::set<unsigned int> unsupportedOperations;
     ModelToINetworkTransformer modelConverter(options.GetBackends(),
                                               model,
                                               unsupportedOperations);

     if (modelConverter.GetConversionResult() != ConversionResult::Success)
     {
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "ModelToINetworkConverter failed";
     }

     // Serialize the network graph to a .armnn file if an output directory
     // has been specified in the drivers' arguments.
     std::vector<uint8_t> dataCacheData;
     bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
     auto serializedNetworkFileName =
             SerializeNetwork(*modelConverter.GetINetwork(),
                              options.GetRequestInputsAndOutputsDumpDir(),
                              dataCacheData,
                              serializeToFile);

     // Optimize the network
     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
     armnn::OptimizerOptions OptOptions;
     OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
     OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();

     int cachedFd = -1;
     bool saveCachedNetwork = options.SaveCachedNetwork();

     unsigned int numberOfCachedModelFiles = 0;
     if (modelCacheHandle.size() > 0)
     {
         unsigned int index = 0;
         for (auto& backend : options.GetBackends())
         {
             // modelCacheHandle size should be equal to numberOfCachedModelFiles
             // modelCacheHandle vector should be in same order as backends
             auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
             if (numberOfCacheFiles > 0)
             {
                 numberOfCachedModelFiles += numberOfCacheFiles;
                 // For GpuAcc numberOfCachedFiles is 1
                 if (backend == armnn::Compute::GpuAcc)
                 {
                     cachedFd = *modelCacheHandle[index];
                     saveCachedNetwork = true;
                 }
                 index += numberOfCachedModelFiles;
             }
         }
     }

     armnn::BackendOptions gpuAcc("GpuAcc",
     {
         { "FastMathEnabled", options.IsFastMathEnabled() },
         { "SaveCachedNetwork", saveCachedNetwork },
         { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
         { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
         { "CachedFileDescriptor", cachedFd }
     });

     armnn::BackendOptions cpuAcc("CpuAcc",
     {
         { "FastMathEnabled", options.IsFastMathEnabled() },
         { "NumberOfThreads", options.GetNumberOfThreads() }
     });
     OptOptions.m_ModelOptions.push_back(gpuAcc);
     OptOptions.m_ModelOptions.push_back(cpuAcc);

     std::vector<std::string> errMessages;
     try
     {
         optNet = armnn::Optimize(*modelConverter.GetINetwork(),
                                  options.GetBackends(),
                                  runtime->GetDeviceSpec(),
                                  OptOptions,
                                  errMessages);
     }
     catch (std::exception& e)
     {
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();
     }

     // Check that the optimized network is valid.
     if (!optNet)
     {
         std::stringstream message;
         message << "Invalid optimized network";
         for (const std::string& msg : errMessages)
         {
             message << "\n" << msg;
         }
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
     }

     // Export the optimized network graph to a dot file if an output dump directory
     // has been specified in the drivers' arguments.
     std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
                                                                options.GetRequestInputsAndOutputsDumpDir());

     // Load it into the runtime.
     armnn::NetworkId netId = 0;
     std::string msg;
     armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
                                                 MemorySource::Undefined,
                                                 MemorySource::Undefined,
                                                 options.IsGpuProfilingEnabled());
     auto numInputs  = getMainModel(model).inputIndexes.size();
     auto numOutputs = getMainModel(model).outputIndexes.size();
     try
     {
         if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
         {
             return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";
         }
     }
     catch (std::exception& e)
     {
         std::stringstream message;
         message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
     }

     // Now that we have a networkId for the graph rename the exported files to use it
     // so that we can associate the graph file and the input/output tensor exported files
     RenameExportedFiles(serializedNetworkFileName,
                         dotGraphFileName,
                         options.GetRequestInputsAndOutputsDumpDir(),
                         netId);

     // Cache the model
     size_t hashValue = 0;
     if (dataCacheHandle.size() == 1 )
     {
         hashValue = Hash(dataCacheData);
     }

     // Cache the model data
     if (modelCacheHandle.size() > 0)
     {
         if (modelCacheHandle.size() == numberOfCachedModelFiles)
         {
             for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
             {
                 int modelCacheFileAccessMode = fcntl(*modelCacheHandle[i], F_GETFL) & O_ACCMODE;
                 if (modelCacheFileAccessMode != O_RDONLY)
                 {
                     struct stat statBuffer;
                     if (fstat(*modelCacheHandle[i], &statBuffer) == 0)
                     {
                         long modelDataSize = statBuffer.st_size;
                         if (modelDataSize > 0)
                         {
                             std::vector<uint8_t> modelData(modelDataSize);
                             pread(*modelCacheHandle[i], modelData.data(), modelData.size(), 0);
                             hashValue ^= Hash(modelData);
                         }
                     }
                 }
             }
         }
     }
     if (dataCacheHandle.size() == 1 && hashValue != 0)
     {
         std::vector<uint8_t> theHashValue(sizeof(hashValue));
         ::memcpy(theHashValue.data(), &hashValue, sizeof(hashValue));

         write(*dataCacheHandle[0], theHashValue.data(), theHashValue.size());
         pwrite(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size(), theHashValue.size());
     }

     bool executeWithDummyInputs = (std::find(options.GetBackends().begin(),
                                             options.GetBackends().end(),
                                             armnn::Compute::GpuAcc) != options.GetBackends().end());

     auto preparedModel = std::make_shared<const ArmnnPreparedModel>(netId,
                                                                     runtime.get(),
                                                                     model,
                                                                     options.GetRequestInputsAndOutputsDumpDir(),
                                                                     options.IsGpuProfilingEnabled(),
                                                                     priority);

     // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
     // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
     // Only run this if the GpuAcc backend has been added to options
     if (std::find(options.GetBackends().begin(),
                   options.GetBackends().end(),
                   armnn::Compute::GpuAcc) != options.GetBackends().end())
     {
         if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
         {
             return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be executed";
         }

         if (clTunedParameters &&
             options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
         {
             // Now that we've done one inference the CL kernel parameters will have been tuned,
             // so save the updated file.
             try
             {
                 clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
             }
             catch (std::exception& error)
             {
                 VLOG(DRIVER) << "ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file"
                              << options.GetClTunedParametersFile().c_str() << error.what();
             }
         }
     }
     return std::move(preparedModel);
 }

 GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModelFromCache(
     const armnn::IRuntimePtr& runtime,
     const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
     const DriverOptions& options,
     const std::vector<SharedHandle>& modelCacheHandle,
     const std::vector<SharedHandle>& dataCacheHandle,
     const CacheToken& token,
     bool float32ToFloat16)
 {
     VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModelFromCache()";

     if (!runtime)
     {
         return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE)
                             << "ArmnnDriverImpl::prepareModelFromCache(): Device unavailable";
     }

     if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
     {
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
                             << "ArmnnDriverImpl::prepareModelFromCache(): Token size does not match!";
     }

     // Validate dataCacheHandle
     if (dataCacheHandle.size() != 1)
     {
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
                             << "ArmnnDriverImpl::prepareModelFromCache(): Not valid data cache handle!";
     }

     if (!ValidateSharedHandle(dataCacheHandle[0]))
     {
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
                 << "ArmnnDriverImpl::prepareModelFromCache(): Not valid data cache handle!";
     }

     size_t cachedDataSize = 0;
     struct stat dataStatBuffer;
     if (fstat(*dataCacheHandle[0], &dataStatBuffer) == 0)
     {
         cachedDataSize = dataStatBuffer.st_size;
     }
     if (cachedDataSize == 0)
     {
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
                 << "ArmnnDriverImpl::prepareModelFromCache(): Not valid cached data!";
     }

     // Check if model files cached they match the expected value
     unsigned int numberOfCachedModelFiles = 0;
     for (auto& backend : options.GetBackends())
     {
         numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
     }
     if (modelCacheHandle.size() != numberOfCachedModelFiles)
     {
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
                            << "ArmnnDriverImpl::prepareModelFromCache(): Model cache handle size does not match.";
     }

     // Read the hashValue
     std::vector<uint8_t> hashValue(sizeof(size_t));
     pread(*dataCacheHandle[0], hashValue.data(), hashValue.size(), 0);

     // Read the model
     std::vector<uint8_t> dataCacheData(cachedDataSize - hashValue.size());
     pread(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size(), hashValue.size());
     auto calculatedHashValue = Hash(dataCacheData);

     int gpuAccCachedFd = -1;
     if (modelCacheHandle.size() > 0)
     {
         unsigned int index = 0;
         for (auto& backend : options.GetBackends())
         {
             // modelCacheHandle size should be equal to numberOfCachedModelFiles
             // modelCacheHandle vector should be in same order as backends
             auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
             if (numberOfCacheFiles > 0)
             {
                 if (!ValidateSharedHandle(modelCacheHandle[index]))
                 {
                     return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
                             << "ArmnnDriverImpl::prepareModelFromCache(): Invalid model cache handle!";
                 }
                 int cachedFd = *modelCacheHandle[index];
                 struct stat statBuffer;
                 if (fstat(cachedFd, &statBuffer) == 0)
                 {
                     long modelDataSize = statBuffer.st_size;
                     if (modelDataSize > 0)
                     {
                         std::vector<uint8_t> modelData(modelDataSize);
                         pread(cachedFd, modelData.data(), modelData.size(), 0);
                         calculatedHashValue ^= Hash(modelData);

                         if (backend == armnn::Compute::GpuAcc)
                         {
                             gpuAccCachedFd = cachedFd;
                         }
                     }
                 }
                 index += numberOfCacheFiles;
             }
         }
     }

     std::vector<uint8_t> calculatedHashData(sizeof(calculatedHashValue));
     ::memcpy(calculatedHashData.data(), &calculatedHashValue, sizeof(calculatedHashValue));
     if (hashValue != calculatedHashData)
     {
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
                 << "ArmnnDriverImpl::prepareModelFromCache(): ValidateHash() failed!";
     }

     // Deserialize the network..
     armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){});
     try
     {
         network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
     }
     catch (std::exception&)
     {
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
                 << "ArmnnDriverImpl::prepareModelFromCache(): Exception caught from Deserializer!";
     }

     // Optimize the network
     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
     armnn::OptimizerOptions OptOptions;
     OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
     OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();

     armnn::BackendOptions gpuAcc("GpuAcc",
     {
         { "FastMathEnabled", options.IsFastMathEnabled() },
         { "SaveCachedNetwork", false },
         { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
         { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
         { "CachedFileDescriptor", gpuAccCachedFd }
     });

     armnn::BackendOptions cpuAcc("CpuAcc",
     {
         { "FastMathEnabled", options.IsFastMathEnabled() },
         { "NumberOfThreads", options.GetNumberOfThreads() }
     });
     OptOptions.m_ModelOptions.push_back(gpuAcc);
     OptOptions.m_ModelOptions.push_back(cpuAcc);

     std::vector<std::string> errMessages;
     try
     {
         optNet = armnn::Optimize(*network.get(),
                                  options.GetBackends(),
                                  runtime->GetDeviceSpec(),
                                  OptOptions,
                                  errMessages);
     }
     catch (std::exception& e)
     {
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();
     }

     // Check that the optimized network is valid.
     if (!optNet)
     {
         std::stringstream message;
         message << "Invalid optimized network";
         for (const std::string& msg : errMessages)
         {
             message << "\n" << msg;
         }
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
     }

     // Export the optimized network graph to a dot file if an output dump directory
     // has been specified in the drivers' arguments.
     std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
                                                                options.GetRequestInputsAndOutputsDumpDir());

     // Load it into the runtime.
     armnn::NetworkId netId = 0;
     std::string msg;
     armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
                                                 MemorySource::Undefined,
                                                 MemorySource::Undefined,
                                                 options.IsGpuProfilingEnabled());
     try
     {
         if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
         {
             return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";
         }
     }
     catch (std::exception& e)
     {
         std::stringstream message;
         message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
     }

     auto preparedModel = std::make_shared<const ArmnnPreparedModel>(netId,
                                                       runtime.get(),
                                                       options.GetRequestInputsAndOutputsDumpDir(),
                                                       options.IsGpuProfilingEnabled(),
                                                       Priority::MEDIUM,
                                                       true);
     return std::move(preparedModel);
 }

 const Capabilities& ArmnnDriverImpl::GetCapabilities(const armnn::IRuntimePtr& runtime)
 {
     VLOG(DRIVER) << "ArmnnDriverImpl::GetCapabilities()";
     static const Capabilities theCapabilities = GenerateCapabilities();
     return theCapabilities;
 }

 } // namespace armnn_driver
armnn::OptimizerOptions::m_ModelOptions
ModelOptions m_ModelOptions
Enable Model Options.
Definition: INetwork.hpp:238

-armnn_driver::DriverOptions::IsFastMathEnabled
bool IsFastMathEnabled() const
Definition: DriverOptions.hpp:36

-armnn::IRuntimePtr
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:41

-armnn::INetwork
Main network class which provides the interface for building up a neural network. ...
Definition: INetwork.hpp:260

-armnnDeserializer::IDeserializer::Create
static IDeserializerPtr Create()
Definition: Deserializer.cpp:47

-armnn::BoostLogSeverityMapping::error

-armnn_driver::DriverOptions::isAsyncModelExecutionEnabled
bool isAsyncModelExecutionEnabled() const
Definition: DriverOptions.hpp:44

-armnn::INetworkProperties
Definition: IRuntime.hpp:43

-armnn::OptimizerOptions::m_ReduceFp32ToFp16
bool m_ReduceFp32ToFp16
Reduces all Fp32 operators in the model to Fp16 for faster processing.
Definition: INetwork.hpp:219

-armnn_driver::RenameExportedFiles
void RenameExportedFiles(const std::string &existingSerializedFileName, const std::string &existingDotFileName, const std::string &dumpDir, const armnn::NetworkId networkId)
Definition: CanonicalUtils.cpp:575

-armnn::Status::Success

-armnn_driver::Model
::android::nn::Model Model
Helper classes.
Definition: ConversionUtils.hpp:43

-armnn::GetNumberOfCacheFiles
unsigned int GetNumberOfCacheFiles(const armnn::BackendId &backend)
Returns the number of cached files if backend supports caching.
Definition: BackendHelper.cpp:120

-armnn::Optimize
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1773

-armnn_driver::SerializeNetwork
std::string SerializeNetwork(const armnn::INetwork &network, const std::string &dumpDir, std::vector< uint8_t > &dataCacheData, bool dataCachingActive)
Definition: CanonicalUtils.cpp:427

-armnn_driver::DriverOptions::GetClTunedParametersMode
armnn::IGpuAccTunedParameters::Mode GetClTunedParametersMode() const
Definition: DriverOptions.hpp:33

-ArmnnDriverImpl.hpp

-armnn::NetworkId
int NetworkId
Definition: IRuntime.hpp:35

-ArmnnPreparedModel.hpp

-armnn::IOptimizedNetworkPtr
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:253

-armnn::OptimizerOptions::m_ProfilingEnabled
bool m_ProfilingEnabled
Enable profiling dump of the optimizer phase.
Definition: INetwork.hpp:241

-armnn_driver::DriverOptions::GetRequestInputsAndOutputsDumpDir
const std::string & GetRequestInputsAndOutputsDumpDir() const
Definition: DriverOptions.hpp:28

-android::nn
Definition: support_library_service.cpp:10

-armnn::Compute::GpuAcc
GPU Execution: OpenCL: ArmCompute.

-armnn::OptimizerOptions
ArmNN performs an optimization on each model/network before it gets loaded for execution.
Definition: INetwork.hpp:137

-armnn_driver::ModelToINetworkTransformer
Definition: ModelToINetworkTransformer.hpp:30

-armnn_driver::DriverOptions
Definition: DriverOptions.hpp:17

-armnn_driver::DriverOptions::GetNumberOfThreads
unsigned int GetNumberOfThreads() const
Definition: DriverOptions.hpp:43

-armnn::IGpuAccTunedParametersPtr
std::shared_ptr< IGpuAccTunedParameters > IGpuAccTunedParametersPtr
The following API is replaced by the backend options API.
Definition: IRuntime.hpp:307

-SystemPropertiesUtils.hpp

-armnn::BackendOptions
Struct for the users to pass backend specific options.
Definition: BackendOptions.hpp:22

-armnn_driver::ExportNetworkGraphToDotFile
std::string ExportNetworkGraphToDotFile(const armnn::IOptimizedNetwork &optimizedNetwork, const std::string &dumpDir)
Definition: CanonicalUtils.cpp:387

-getMainModel
const android::nn::Model::Subgraph & getMainModel(const android::nn::Model &model)
Definition: ConversionUtils.hpp:28

-IDeserializer.hpp

-armnn_driver::DriverOptions::GetBackends
const std::vector< armnn::BackendId > & GetBackends() const
Definition: DriverOptions.hpp:26

-ModelToINetworkTransformer.hpp

-armnn_driver::DriverOptions::IsGpuProfilingEnabled
bool IsGpuProfilingEnabled() const
Definition: DriverOptions.hpp:35

-armnn::INetworkPtr
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:252

-armnn_driver::DriverOptions::GetCachedNetworkFilePath
const std::string & GetCachedNetworkFilePath() const
Definition: DriverOptions.hpp:41

-armnn_driver::DriverOptions::GetClTunedParametersFile
const std::string & GetClTunedParametersFile() const
Definition: DriverOptions.hpp:31

-armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters

-armnn_driver
Helper classes.
Definition: ArmnnDevice.cpp:37

-armnn_driver::DriverOptions::SaveCachedNetwork
bool SaveCachedNetwork() const
Definition: DriverOptions.hpp:42

-armnn_driver::DriverOptions::GetClMLGOTunedParametersFile
const std::string & GetClMLGOTunedParametersFile() const
Definition: DriverOptions.hpp:32

+Go to the documentation of this file. //

+ // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.

+ // SPDX-License-Identifier: MIT

+ //

+

+ #include "ArmnnDriverImpl.hpp"

+ #include "ArmnnPreparedModel.hpp"

+ #include "ModelToINetworkTransformer.hpp"

+ #include "SystemPropertiesUtils.hpp"

+

+ #include <armnnDeserializer/IDeserializer.hpp>

+

+ #include <log/log.h>

+ #include <sys/stat.h>

+

+ namespace

+ {

+

+ Capabilities GenerateCapabilities()

+ {

+     VLOG(DRIVER) << "ArmnnDriverImpl::GenerateCapabilities()";

+

+     float defaultPerfValue = .1f;

+     const Capabilities::PerformanceInfo defaultPerfInfo = { /* execTime */ defaultPerfValue,

+                                                             /* powerUsage */ defaultPerfValue

+                                                           };

+     std::vector<OperandType> operandsTypes({

+                 OperandType::FLOAT32,

+                 OperandType::INT32,

+                 OperandType::UINT32,

+                 OperandType::TENSOR_FLOAT32,

+                 OperandType::TENSOR_INT32,

+                 OperandType::TENSOR_QUANT8_ASYMM,

+                 OperandType::BOOL,

+                 OperandType::TENSOR_QUANT16_SYMM,

+                 OperandType::TENSOR_FLOAT16,

+                 OperandType::TENSOR_BOOL8,

+                 OperandType::FLOAT16,

+                 OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,

+                 OperandType::TENSOR_QUANT16_ASYMM,

+                 OperandType::TENSOR_QUANT8_SYMM,

+                 OperandType::TENSOR_QUANT8_ASYMM_SIGNED,

+     });

+

+     std::vector<Capabilities::OperandPerformance> operandPerformances;

+     operandPerformances.reserve(operandsTypes.size());

+

+     for (auto opType : operandsTypes)

+     {

+         operandPerformances.push_back(

+                 Capabilities::OperandPerformance{ /* type */ opType, /* info */ defaultPerfInfo });

+     }

+

+     auto operandPerformanceTable =

+                Capabilities::OperandPerformanceTable::create(std::move(operandPerformances)).value();

+

+     return { /* relaxedFloat32toFloat16PerformanceScalar */ defaultPerfInfo,

+              /* relaxedFloat32toFloat16PerformanceTensor */ defaultPerfInfo,

+              /* operandPerformance */ std::move(operandPerformanceTable),

+              /* ifPerformance */ defaultPerfInfo,

+              /* whilePerformance */ defaultPerfInfo };

+ }

+

+ size_t Hash(std::vector<uint8_t>& cacheData)

+ {

+     std::size_t hash = cacheData.size();

+     for (auto& i : cacheData)

+     {

+         hash = ((hash << 5) - hash) + i;

+     }

+     return hash;

+ }

+

+ } // anonymous namespace

+

+ using namespace android::nn;

+

+ namespace armnn_driver

+ {

+

+ bool ArmnnDriverImpl::ValidateSharedHandle(const SharedHandle& sharedHandle)

+ {

+     bool valid = true;

+

+     if (*sharedHandle < 0)

+     {

+         return !valid;

+     }

+

+     int dataCacheFileAccessMode = fcntl(*sharedHandle, F_GETFL) & O_ACCMODE;

+     if (dataCacheFileAccessMode != O_RDWR)

+     {

+         return !valid;

+     }

+

+     return valid;

+ }

+

+ GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModel(

+     const armnn::IRuntimePtr& runtime,

+     const armnn::IGpuAccTunedParametersPtr& clTunedParameters,

+     const DriverOptions& options,

+     const Model& model,

+     const std::vector<SharedHandle>& modelCacheHandle,

+     const std::vector<SharedHandle>& dataCacheHandle,

+     const CacheToken& token,

+     bool float32ToFloat16,

+     Priority priority)

+ {

+     VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModel()";

+

+     if (!runtime)

+     {

+         return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE) << "Device unavailable";

+     }

+

+     if (const auto result = validate(model); !result.ok())

+     {

+         return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << "Invalid model passed as input";

+     }

+

+     // Deliberately ignore any unsupported operations requested by the options -

+     // at this point we're being asked to prepare a model that we've already declared support for

+     // and the operation indices may be different to those in getSupportedOperations anyway.

+     std::set<unsigned int> unsupportedOperations;

+     ModelToINetworkTransformer modelConverter(options.GetBackends(),

+                                               model,

+                                               unsupportedOperations);

+

+     if (modelConverter.GetConversionResult() != ConversionResult::Success)

+     {

+         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "ModelToINetworkConverter failed";

+     }

+

+     // Serialize the network graph to a .armnn file if an output directory

+     // has been specified in the drivers' arguments.

+     std::vector<uint8_t> dataCacheData;

+     bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;

+     auto serializedNetworkFileName =

+             SerializeNetwork(*modelConverter.GetINetwork(),

+                              options.GetRequestInputsAndOutputsDumpDir(),

+                              dataCacheData,

+                              serializeToFile);

+

+     // Optimize the network

+     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);

+     armnn::OptimizerOptions OptOptions;

+     OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;

+     OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();

+

+     int cachedFd = -1;

+     bool saveCachedNetwork = options.SaveCachedNetwork();

+

+     unsigned int numberOfCachedModelFiles = 0;

+     if (modelCacheHandle.size() > 0)

+     {

+         unsigned int index = 0;

+         for (auto& backend : options.GetBackends())

+         {

+             // modelCacheHandle size should be equal to numberOfCachedModelFiles

+             // modelCacheHandle vector should be in same order as backends

+             auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);

+             if (numberOfCacheFiles > 0)

+             {

+                 numberOfCachedModelFiles += numberOfCacheFiles;

+                 // For GpuAcc numberOfCachedFiles is 1

+                 if (backend == armnn::Compute::GpuAcc)

+                 {

+                     cachedFd = *modelCacheHandle[index];

+                     saveCachedNetwork = true;

+                 }

+                 index += numberOfCachedModelFiles;

+             }

+         }

+     }

+

+     armnn::BackendOptions gpuAcc("GpuAcc",

+     {

+         { "FastMathEnabled", options.IsFastMathEnabled() },

+         { "SaveCachedNetwork", saveCachedNetwork },

+         { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },

+         { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },

+         { "CachedFileDescriptor", cachedFd }

+     });

+

+     armnn::BackendOptions cpuAcc("CpuAcc",

+     {

+         { "FastMathEnabled", options.IsFastMathEnabled() },

+         { "NumberOfThreads", options.GetNumberOfThreads() }

+     });

+     OptOptions.m_ModelOptions.push_back(gpuAcc);

+     OptOptions.m_ModelOptions.push_back(cpuAcc);

+

+     std::vector<std::string> errMessages;

+     try

+     {

+         optNet = armnn::Optimize(*modelConverter.GetINetwork(),

+                                  options.GetBackends(),

+                                  runtime->GetDeviceSpec(),

+                                  OptOptions,

+                                  errMessages);

+     }

+     catch (std::exception& e)

+     {

+         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();

+     }

+

+     // Check that the optimized network is valid.

+     if (!optNet)

+     {

+         std::stringstream message;

+         message << "Invalid optimized network";

+         for (const std::string& msg : errMessages)

+         {

+             message << "\n" << msg;

+         }

+         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();

+     }

+

+     // Export the optimized network graph to a dot file if an output dump directory

+     // has been specified in the drivers' arguments.

+     std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,

+                                                                options.GetRequestInputsAndOutputsDumpDir());

+

+     // Load it into the runtime.

+     armnn::NetworkId netId = 0;

+     std::string msg;

+     armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),

+                                                 MemorySource::Undefined,

+                                                 MemorySource::Undefined,

+                                                 options.IsGpuProfilingEnabled());

+     auto numInputs  = getMainModel(model).inputIndexes.size();

+     auto numOutputs = getMainModel(model).outputIndexes.size();

+     try

+     {

+         if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)

+         {

+             return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";

+         }

+     }

+     catch (std::exception& e)

+     {

+         std::stringstream message;

+         message << "Exception (" << e.what()<< ") caught from LoadNetwork.";

+         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();

+     }

+

+     // Now that we have a networkId for the graph rename the exported files to use it

+     // so that we can associate the graph file and the input/output tensor exported files

+     RenameExportedFiles(serializedNetworkFileName,

+                         dotGraphFileName,

+                         options.GetRequestInputsAndOutputsDumpDir(),

+                         netId);

+

+     // Cache the model

+     size_t hashValue = 0;

+     if (dataCacheHandle.size() == 1 )

+     {

+         hashValue = Hash(dataCacheData);

+     }

+

+     // Cache the model data

+     if (modelCacheHandle.size() > 0)

+     {

+         if (modelCacheHandle.size() == numberOfCachedModelFiles)

+         {

+             for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)

+             {

+                 int modelCacheFileAccessMode = fcntl(*modelCacheHandle[i], F_GETFL) & O_ACCMODE;

+                 if (modelCacheFileAccessMode != O_RDONLY)

+                 {

+                     struct stat statBuffer;

+                     if (fstat(*modelCacheHandle[i], &statBuffer) == 0)

+                     {

+                         long modelDataSize = statBuffer.st_size;

+                         if (modelDataSize > 0)

+                         {

+                             std::vector<uint8_t> modelData(modelDataSize);

+                             pread(*modelCacheHandle[i], modelData.data(), modelData.size(), 0);

+                             hashValue ^= Hash(modelData);

+                         }

+                     }

+                 }

+             }

+         }

+     }

+     if (dataCacheHandle.size() == 1 && hashValue != 0)

+     {

+         std::vector<uint8_t> theHashValue(sizeof(hashValue));

+         ::memcpy(theHashValue.data(), &hashValue, sizeof(hashValue));

+

+         write(*dataCacheHandle[0], theHashValue.data(), theHashValue.size());

+         pwrite(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size(), theHashValue.size());

+     }

+

+     bool executeWithDummyInputs = (std::find(options.GetBackends().begin(),

+                                             options.GetBackends().end(),

+                                             armnn::Compute::GpuAcc) != options.GetBackends().end());

+

+     auto preparedModel = std::make_shared<const ArmnnPreparedModel>(netId,

+                                                                     runtime.get(),

+                                                                     model,

+                                                                     options.GetRequestInputsAndOutputsDumpDir(),

+                                                                     options.IsGpuProfilingEnabled(),

+                                                                     priority);

+

+     // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if

+     // this is enabled) before the first 'real' inference which removes the overhead of the first inference.

+     // Only run this if the GpuAcc backend has been added to options

+     if (std::find(options.GetBackends().begin(),

+                   options.GetBackends().end(),

+                   armnn::Compute::GpuAcc) != options.GetBackends().end())

+     {

+         if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))

+         {

+             return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be executed";

+         }

+

+         if (clTunedParameters &&

+             options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)

+         {

+             // Now that we've done one inference the CL kernel parameters will have been tuned,

+             // so save the updated file.

+             try

+             {

+                 clTunedParameters->Save(options.GetClTunedParametersFile().c_str());

+             }

+             catch (std::exception& error)

+             {

+                 VLOG(DRIVER) << "ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file"

+                              << options.GetClTunedParametersFile().c_str() << error.what();

+             }

+         }

+     }

+     return std::move(preparedModel);

+ }

+

+ GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModelFromCache(

+     const armnn::IRuntimePtr& runtime,

+     const armnn::IGpuAccTunedParametersPtr& clTunedParameters,

+     const DriverOptions& options,

+     const std::vector<SharedHandle>& modelCacheHandle,

+     const std::vector<SharedHandle>& dataCacheHandle,

+     const CacheToken& token,

+     bool float32ToFloat16)

+ {

+     VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModelFromCache()";

+

+     if (!runtime)

+     {

+         return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE)

+                             << "ArmnnDriverImpl::prepareModelFromCache(): Device unavailable";

+     }

+

+     if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)

+     {

+         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)

+                             << "ArmnnDriverImpl::prepareModelFromCache(): Token size does not match!";

+     }

+

+     // Validate dataCacheHandle

+     if (dataCacheHandle.size() != 1)

+     {

+         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)

+                             << "ArmnnDriverImpl::prepareModelFromCache(): Not valid data cache handle!";

+     }

+

+     if (!ValidateSharedHandle(dataCacheHandle[0]))

+     {

+         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)

+                 << "ArmnnDriverImpl::prepareModelFromCache(): Not valid data cache handle!";

+     }

+

+     size_t cachedDataSize = 0;

+     struct stat dataStatBuffer;

+     if (fstat(*dataCacheHandle[0], &dataStatBuffer) == 0)

+     {

+         cachedDataSize = dataStatBuffer.st_size;

+     }

+     if (cachedDataSize == 0)

+     {

+         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)

+                 << "ArmnnDriverImpl::prepareModelFromCache(): Not valid cached data!";

+     }

+

+     // Check if model files cached they match the expected value

+     unsigned int numberOfCachedModelFiles = 0;

+     for (auto& backend : options.GetBackends())

+     {

+         numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);

+     }

+     if (modelCacheHandle.size() != numberOfCachedModelFiles)

+     {

+         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)

+                            << "ArmnnDriverImpl::prepareModelFromCache(): Model cache handle size does not match.";

+     }

+

+     // Read the hashValue

+     std::vector<uint8_t> hashValue(sizeof(size_t));

+     pread(*dataCacheHandle[0], hashValue.data(), hashValue.size(), 0);

+

+     // Read the model

+     std::vector<uint8_t> dataCacheData(cachedDataSize - hashValue.size());

+     pread(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size(), hashValue.size());

+     auto calculatedHashValue = Hash(dataCacheData);

+

+     int gpuAccCachedFd = -1;

+     if (modelCacheHandle.size() > 0)

+     {

+         unsigned int index = 0;

+         for (auto& backend : options.GetBackends())

+         {

+             // modelCacheHandle size should be equal to numberOfCachedModelFiles

+             // modelCacheHandle vector should be in same order as backends

+             auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);

+             if (numberOfCacheFiles > 0)

+             {

+                 if (!ValidateSharedHandle(modelCacheHandle[index]))

+                 {

+                     return NN_ERROR(ErrorStatus::GENERAL_FAILURE)

+                             << "ArmnnDriverImpl::prepareModelFromCache(): Invalid model cache handle!";

+                 }

+                 int cachedFd = *modelCacheHandle[index];

+                 struct stat statBuffer;

+                 if (fstat(cachedFd, &statBuffer) == 0)

+                 {

+                     long modelDataSize = statBuffer.st_size;

+                     if (modelDataSize > 0)

+                     {

+                         std::vector<uint8_t> modelData(modelDataSize);

+                         pread(cachedFd, modelData.data(), modelData.size(), 0);

+                         calculatedHashValue ^= Hash(modelData);

+

+                         if (backend == armnn::Compute::GpuAcc)

+                         {

+                             gpuAccCachedFd = cachedFd;

+                         }

+                     }

+                 }

+                 index += numberOfCacheFiles;

+             }

+         }

+     }

+

+     std::vector<uint8_t> calculatedHashData(sizeof(calculatedHashValue));

+     ::memcpy(calculatedHashData.data(), &calculatedHashValue, sizeof(calculatedHashValue));

+     if (hashValue != calculatedHashData)

+     {

+         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)

+                 << "ArmnnDriverImpl::prepareModelFromCache(): ValidateHash() failed!";

+     }

+

+     // Deserialize the network..

+     armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){});

+     try

+     {

+         network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);

+     }

+     catch (std::exception&)

+     {

+         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)

+                 << "ArmnnDriverImpl::prepareModelFromCache(): Exception caught from Deserializer!";

+     }

+

+     // Optimize the network

+     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);

+     armnn::OptimizerOptions OptOptions;

+     OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;

+     OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();

+

+     armnn::BackendOptions gpuAcc("GpuAcc",

+     {

+         { "FastMathEnabled", options.IsFastMathEnabled() },

+         { "SaveCachedNetwork", false },

+         { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },

+         { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },

+         { "CachedFileDescriptor", gpuAccCachedFd }

+     });

+

+     armnn::BackendOptions cpuAcc("CpuAcc",

+     {

+         { "FastMathEnabled", options.IsFastMathEnabled() },

+         { "NumberOfThreads", options.GetNumberOfThreads() }

+     });

+     OptOptions.m_ModelOptions.push_back(gpuAcc);

+     OptOptions.m_ModelOptions.push_back(cpuAcc);

+

+     std::vector<std::string> errMessages;

+     try

+     {

+         optNet = armnn::Optimize(*network.get(),

+                                  options.GetBackends(),

+                                  runtime->GetDeviceSpec(),

+                                  OptOptions,

+                                  errMessages);

+     }

+     catch (std::exception& e)

+     {

+         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();

+     }

+

+     // Check that the optimized network is valid.

+     if (!optNet)

+     {

+         std::stringstream message;

+         message << "Invalid optimized network";

+         for (const std::string& msg : errMessages)

+         {

+             message << "\n" << msg;

+         }

+         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();

+     }

+

+     // Export the optimized network graph to a dot file if an output dump directory

+     // has been specified in the drivers' arguments.

+     std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,

+                                                                options.GetRequestInputsAndOutputsDumpDir());

+

+     // Load it into the runtime.

+     armnn::NetworkId netId = 0;

+     std::string msg;

+     armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),

+                                                 MemorySource::Undefined,

+                                                 MemorySource::Undefined,

+                                                 options.IsGpuProfilingEnabled());

+     try

+     {

+         if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)

+         {

+             return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";

+         }

+     }

+     catch (std::exception& e)

+     {

+         std::stringstream message;

+         message << "Exception (" << e.what()<< ") caught from LoadNetwork.";

+         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();

+     }

+

+     auto preparedModel = std::make_shared<const ArmnnPreparedModel>(netId,

+                                                       runtime.get(),

+                                                       options.GetRequestInputsAndOutputsDumpDir(),

+                                                       options.IsGpuProfilingEnabled(),

+                                                       Priority::MEDIUM,

+                                                       true);

+     return std::move(preparedModel);

+ }

+

+ const Capabilities& ArmnnDriverImpl::GetCapabilities(const armnn::IRuntimePtr& runtime)

+ {

+     VLOG(DRIVER) << "ArmnnDriverImpl::GetCapabilities()";

+     static const Capabilities theCapabilities = GenerateCapabilities();

+     return theCapabilities;

+ }

+

+ } // namespace armnn_driver