1 files changed, 561 insertions, 0 deletions
diff --git a/shim/sl/canonical/ArmnnDriverImpl.cpp b/shim/sl/canonical/ArmnnDriverImpl.cpp
new file mode 100644
index 0000000000..3223d9e8bf
--- /dev/null
+++ b/shim/sl/canonical/ArmnnDriverImpl.cpp
@@ -0,0 +1,561 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ArmnnDriverImpl.hpp"
+#include "ArmnnPreparedModel.hpp"
+#include "CacheDataHandler.hpp"
+#include "ModelToINetworkTransformer.hpp"
+#include "SystemPropertiesUtils.hpp"
+
+#include <armnnDeserializer/IDeserializer.hpp>
+
+#include <log/log.h>
+#include <sys/stat.h>
+
+namespace
+{
+
+Capabilities GenerateCapabilities()
+{
+    VLOG(DRIVER) << "ArmnnDriverImpl::GenerateCapabilities()";
+
+    float defaultPerfValue = .1f;
+    const Capabilities::PerformanceInfo defaultPerfInfo = { /* execTime */ defaultPerfValue,
+                                                            /* powerUsage */ defaultPerfValue
+                                                          };
+    std::vector<OperandType> operandsTypes({
+                OperandType::FLOAT32,
+                OperandType::INT32,
+                OperandType::UINT32,
+                OperandType::TENSOR_FLOAT32,
+                OperandType::TENSOR_INT32,
+                OperandType::TENSOR_QUANT8_ASYMM,
+                OperandType::BOOL,
+                OperandType::TENSOR_QUANT16_SYMM,
+                OperandType::TENSOR_FLOAT16,
+                OperandType::TENSOR_BOOL8,
+                OperandType::FLOAT16,
+                OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
+                OperandType::TENSOR_QUANT16_ASYMM,
+                OperandType::TENSOR_QUANT8_SYMM,
+                OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
+    });
+
+    std::vector<Capabilities::OperandPerformance> operandPerformances;
+    operandPerformances.reserve(operandsTypes.size());
+
+    for (auto opType : operandsTypes)
+    {
+        operandPerformances.push_back(
+                Capabilities::OperandPerformance{ /* type */ opType, /* info */ defaultPerfInfo });
+    }
+
+    auto operandPerformanceTable =
+               Capabilities::OperandPerformanceTable::create(std::move(operandPerformances)).value();
+
+    return { /* relaxedFloat32toFloat16PerformanceScalar */ defaultPerfInfo,
+             /* relaxedFloat32toFloat16PerformanceTensor */ defaultPerfInfo,
+             /* operandPerformance */ std::move(operandPerformanceTable),
+             /* ifPerformance */ defaultPerfInfo,
+             /* whilePerformance */ defaultPerfInfo };
+}
+
+} // anonymous namespace
+
+using namespace android::nn;
+
+namespace armnn_driver
+{
+
+bool ArmnnDriverImpl::ValidateSharedHandle(const SharedHandle& sharedHandle)
+{
+    bool valid = true;
+
+    if (*sharedHandle < 0)
+    {
+        return !valid;
+    }
+
+    int dataCacheFileAccessMode = fcntl(*sharedHandle, F_GETFL) & O_ACCMODE;
+    if (dataCacheFileAccessMode != O_RDWR)
+    {
+        return !valid;
+    }
+
+    return valid;
+}
+
+bool ArmnnDriverImpl::ValidateDataCacheHandle(const std::vector<SharedHandle>& dataCacheHandle, const size_t dataSize)
+{
+    bool valid = true;
+    // DataCacheHandle size should always be 1 for ArmNN model
+    if (dataCacheHandle.size() != 1)
+    {
+        return !valid;
+    }
+
+    if (dataSize == 0)
+    {
+        return !valid;
+    }
+
+    struct stat statBuffer;
+    if (fstat(*dataCacheHandle[0], &statBuffer) == 0)
+    {
+        unsigned long bufferSize = statBuffer.st_size;
+        if (bufferSize != dataSize)
+        {
+            return !valid;
+        }
+    }
+
+    return ValidateSharedHandle(dataCacheHandle[0]);
+}
+
+std::vector<armnn::NetworkId>& ArmnnDriverImpl::GetLoadedNetworks()
+{
+    return m_NetworkIDs;
+}
+
+GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModel(
+    const armnn::IRuntimePtr& runtime,
+    const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
+    const DriverOptions& options,
+    const Model& model,
+    const std::vector<SharedHandle>& modelCacheHandle,
+    const std::vector<SharedHandle>& dataCacheHandle,
+    const CacheToken& token,
+    bool float32ToFloat16,
+    Priority priority)
+{
+    VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModel()";
+
+    if (!runtime)
+    {
+        return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE) << "Device unavailable";
+    }
+
+    if (const auto result = validate(model); !result.ok())
+    {
+        return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << "Invalid model passed as input";
+    }
+
+    // Deliberately ignore any unsupported operations requested by the options -
+    // at this point we're being asked to prepare a model that we've already declared support for
+    // and the operation indices may be different to those in getSupportedOperations anyway.
+    std::set<unsigned int> unsupportedOperations;
+    ModelToINetworkTransformer modelConverter(options.GetBackends(),
+                                              model,
+                                              unsupportedOperations);
+
+    if (modelConverter.GetConversionResult() != ConversionResult::Success)
+    {
+        return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "ModelToINetworkConverter failed";
+    }
+
+    // Serialize the network graph to a .armnn file if an output directory
+    // has been specified in the drivers' arguments.
+    std::vector<uint8_t> dataCacheData;
+    bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
+    auto serializedNetworkFileName =
+            SerializeNetwork(*modelConverter.GetINetwork(),
+                             options.GetRequestInputsAndOutputsDumpDir(),
+                             dataCacheData,
+                             serializeToFile);
+
+    // Optimize the network
+    armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
+    armnn::OptimizerOptions OptOptions;
+    OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
+    OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
+
+    int cachedFd = -1;
+    bool saveCachedNetwork = options.SaveCachedNetwork();
+
+    unsigned int numberOfCachedModelFiles = 0;
+    if (modelCacheHandle.size() > 0)
+    {
+        unsigned int index = 0;
+        for (auto& backend : options.GetBackends())
+        {
+            // modelCacheHandle size should be equal to numberOfCachedModelFiles
+            // modelCacheHandle vector should be in same order as backends
+            auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
+            if (numberOfCacheFiles > 0)
+            {
+                numberOfCachedModelFiles += numberOfCacheFiles;
+                // For GpuAcc numberOfCachedFiles is 1
+                if (backend == armnn::Compute::GpuAcc)
+                {
+                    cachedFd = *modelCacheHandle[index];
+                    saveCachedNetwork = true;
+                }
+                index += numberOfCachedModelFiles;
+            }
+        }
+    }
+
+    armnn::BackendOptions gpuAcc("GpuAcc",
+    {
+        { "FastMathEnabled", options.IsFastMathEnabled() },
+        { "SaveCachedNetwork", saveCachedNetwork },
+        { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
+        { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
+        { "CachedFileDescriptor", cachedFd }
+    });
+
+    armnn::BackendOptions cpuAcc("CpuAcc",
+    {
+        { "FastMathEnabled", options.IsFastMathEnabled() },
+        { "NumberOfThreads", options.GetNumberOfThreads() }
+    });
+    OptOptions.m_ModelOptions.push_back(gpuAcc);
+    OptOptions.m_ModelOptions.push_back(cpuAcc);
+
+    std::vector<std::string> errMessages;
+    try
+    {
+        optNet = armnn::Optimize(*modelConverter.GetINetwork(),
+                                 options.GetBackends(),
+                                 runtime->GetDeviceSpec(),
+                                 OptOptions,
+                                 errMessages);
+    }
+    catch (std::exception& e)
+    {
+        return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();
+    }
+
+    // Check that the optimized network is valid.
+    if (!optNet)
+    {
+        std::stringstream message;
+        message << "Invalid optimized network";
+        for (const std::string& msg : errMessages)
+        {
+            message << "\n" << msg;
+        }
+        return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
+    }
+
+    // Export the optimized network graph to a dot file if an output dump directory
+    // has been specified in the drivers' arguments.
+    std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
+                                                               options.GetRequestInputsAndOutputsDumpDir());
+
+    // Load it into the runtime.
+    armnn::NetworkId netId = 0;
+    std::string msg;
+    armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
+                                                MemorySource::Undefined,
+                                                MemorySource::Undefined,
+                                                options.IsGpuProfilingEnabled());
+    auto numInputs  = getMainModel(model).inputIndexes.size();
+    auto numOutputs = getMainModel(model).outputIndexes.size();
+    try
+    {
+        if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
+        {
+            return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";
+        }
+    }
+    catch (std::exception& e)
+    {
+        std::stringstream message;
+        message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
+        return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
+    }
+
+    // Now that we have a networkId for the graph rename the exported files to use it
+    // so that we can associate the graph file and the input/output tensor exported files
+    RenameExportedFiles(serializedNetworkFileName,
+                        dotGraphFileName,
+                        options.GetRequestInputsAndOutputsDumpDir(),
+                        netId);
+
+    // Cache the model
+    size_t hashValue = 0;
+    if (dataCacheHandle.size() == 1 )
+    {
+        write(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size());
+        hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
+    }
+
+    // Cache the model data
+    if (modelCacheHandle.size() > 0)
+    {
+        if (modelCacheHandle.size() == numberOfCachedModelFiles)
+        {
+            for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
+            {
+                int modelCacheFileAccessMode = fcntl(*modelCacheHandle[i], F_GETFL) & O_ACCMODE;
+                if (modelCacheFileAccessMode != O_RDONLY)
+                {
+                    struct stat statBuffer;
+                    if (fstat(*modelCacheHandle[i], &statBuffer) == 0)
+                    {
+                        long modelDataSize = statBuffer.st_size;
+                        if (modelDataSize > 0)
+                        {
+                            std::vector<uint8_t> modelData(modelDataSize);
+                            pread(*modelCacheHandle[i], modelData.data(), modelData.size(), 0);
+                            hashValue ^= CacheDataHandlerInstance().Hash(modelData);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    if (hashValue != 0)
+    {
+        CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size());
+    }
+
+    bool executeWithDummyInputs = (std::find(options.GetBackends().begin(),
+                                            options.GetBackends().end(),
+                                            armnn::Compute::GpuAcc) != options.GetBackends().end());
+
+    m_NetworkIDs.push_back(netId);
+    auto preparedModel = std::make_shared<const ArmnnPreparedModel>(netId,
+                                                                    runtime.get(),
+                                                                    model,
+                                                                    options.GetRequestInputsAndOutputsDumpDir(),
+                                                                    options.IsGpuProfilingEnabled(),
+                                                                    priority);
+
+    // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
+    // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
+    // Only run this if the GpuAcc backend has been added to options
+    if (std::find(options.GetBackends().begin(),
+                  options.GetBackends().end(),
+                  armnn::Compute::GpuAcc) != options.GetBackends().end())
+    {
+        if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
+        {
+            return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be executed";
+        }
+
+        if (clTunedParameters &&
+            options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
+        {
+            // Now that we've done one inference the CL kernel parameters will have been tuned,
+            // so save the updated file.
+            try
+            {
+                clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+            }
+            catch (std::exception& error)
+            {
+                VLOG(DRIVER) << "ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file"
+                             << options.GetClTunedParametersFile().c_str() << error.what();
+            }
+        }
+    }
+    return std::move(preparedModel);
+}
+
+std::vector<armnn::NetworkId> ArmnnDriverImpl::m_NetworkIDs = {};
+
+GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModelFromCache(
+    const armnn::IRuntimePtr& runtime,
+    const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
+    const DriverOptions& options,
+    const std::vector<SharedHandle>& modelCacheHandle,
+    const std::vector<SharedHandle>& dataCacheHandle,
+    const CacheToken& token,
+    bool float32ToFloat16)
+{
+    VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModelFromCache()";
+
+    if (!runtime)
+    {
+        return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE)
+                            << "ArmnnDriverImpl::prepareModelFromCache(): Device unavailable";
+    }
+
+    if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
+    {
+        return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
+                            << "ArmnnDriverImpl::prepareModelFromCache(): Token size does not match!";
+    }
+
+    // Validate dataCacheHandle
+    auto dataSize = CacheDataHandlerInstance().GetCacheSize(token);
+    if (!ValidateDataCacheHandle(dataCacheHandle, dataSize))
+    {
+        return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
+                            << "ArmnnDriverImpl::prepareModelFromCache(): Not valid data cache handle!";
+    }
+
+    // Check if model files cached they match the expected value
+    unsigned int numberOfCachedModelFiles = 0;
+    for (auto& backend : options.GetBackends())
+    {
+        numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
+    }
+    if (modelCacheHandle.size() != numberOfCachedModelFiles)
+    {
+        return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
+                           << "ArmnnDriverImpl::prepareModelFromCache(): Model cache handle size does not match.";
+    }
+
+    // Read the model
+    std::vector<uint8_t> dataCacheData(dataSize);
+    pread(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size(), 0);
+    auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
+
+    int gpuAccCachedFd = -1;
+    if (modelCacheHandle.size() > 0)
+    {
+        unsigned int index = 0;
+        for (auto& backend : options.GetBackends())
+        {
+            // modelCacheHandle size should be equal to numberOfCachedModelFiles
+            // modelCacheHandle vector should be in same order as backends
+            auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
+            if (numberOfCacheFiles > 0)
+            {
+                if (!ValidateSharedHandle(modelCacheHandle[index]))
+                {
+                    return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
+                            << "ArmnnDriverImpl::prepareModelFromCache(): Invalid model cache handle!";
+                }
+                int cachedFd = *modelCacheHandle[index];
+                struct stat statBuffer;
+                if (fstat(cachedFd, &statBuffer) == 0)
+                {
+                    long modelDataSize = statBuffer.st_size;
+                    if (modelDataSize > 0)
+                    {
+                        std::vector<uint8_t> modelData(modelDataSize);
+                        pread(cachedFd, modelData.data(), modelData.size(), 0);
+                        hashValue ^= CacheDataHandlerInstance().Hash(modelData);
+
+                        if (backend == armnn::Compute::GpuAcc)
+                        {
+                            gpuAccCachedFd = cachedFd;
+                        }
+                    }
+                }
+                index += numberOfCacheFiles;
+            }
+        }
+    }
+
+    if (!CacheDataHandlerInstance().Validate(token, hashValue, dataCacheData.size()))
+    {
+        return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
+                << "ArmnnDriverImpl::prepareModelFromCache(): ValidateHash() failed!";
+    }
+
+    // Deserialize the network..
+    armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){});
+    try
+    {
+        network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
+    }
+    catch (std::exception&)
+    {
+        return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
+                << "ArmnnDriverImpl::prepareModelFromCache(): Exception caught from Deserializer!";
+    }
+
+    // Optimize the network
+    armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
+    armnn::OptimizerOptions OptOptions;
+    OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
+    OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
+
+    armnn::BackendOptions gpuAcc("GpuAcc",
+    {
+        { "FastMathEnabled", options.IsFastMathEnabled() },
+        { "SaveCachedNetwork", false },
+        { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
+        { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
+        { "CachedFileDescriptor", gpuAccCachedFd }
+    });
+
+    armnn::BackendOptions cpuAcc("CpuAcc",
+    {
+        { "FastMathEnabled", options.IsFastMathEnabled() },
+        { "NumberOfThreads", options.GetNumberOfThreads() }
+    });
+    OptOptions.m_ModelOptions.push_back(gpuAcc);
+    OptOptions.m_ModelOptions.push_back(cpuAcc);
+
+    std::vector<std::string> errMessages;
+    try
+    {
+        optNet = armnn::Optimize(*network.get(),
+                                 options.GetBackends(),
+                                 runtime->GetDeviceSpec(),
+                                 OptOptions,
+                                 errMessages);
+    }
+    catch (std::exception& e)
+    {
+        return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();
+    }
+
+    // Check that the optimized network is valid.
+    if (!optNet)
+    {
+        std::stringstream message;
+        message << "Invalid optimized network";
+        for (const std::string& msg : errMessages)
+        {
+            message << "\n" << msg;
+        }
+        return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
+    }
+
+    // Export the optimized network graph to a dot file if an output dump directory
+    // has been specified in the drivers' arguments.
+    std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
+                                                               options.GetRequestInputsAndOutputsDumpDir());
+
+    // Load it into the runtime.
+    armnn::NetworkId netId = 0;
+    std::string msg;
+    armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
+                                                MemorySource::Undefined,
+                                                MemorySource::Undefined,
+                                                options.IsGpuProfilingEnabled());
+    try
+    {
+        if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
+        {
+            return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";
+        }
+    }
+    catch (std::exception& e)
+    {
+        std::stringstream message;
+        message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
+        return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
+    }
+
+    m_NetworkIDs.push_back(netId);
+    return std::make_shared<const ArmnnPreparedModel>(netId,
+                                                      runtime.get(),
+                                                      options.GetRequestInputsAndOutputsDumpDir(),
+                                                      options.IsGpuProfilingEnabled(),
+                                                      Priority::MEDIUM,
+                                                      true);
+}
+
+const Capabilities& ArmnnDriverImpl::GetCapabilities(const armnn::IRuntimePtr& runtime)
+{
+    VLOG(DRIVER) << "ArmnnDriverImpl::GetCapabilities()";
+    static const Capabilities theCapabilities = GenerateCapabilities();
+    return theCapabilities;
+}
+
+void ArmnnDriverImpl::ClearNetworks()
+{
+    m_NetworkIDs.clear();
+}
+
+} // namespace armnn_driver