diff options
Diffstat (limited to 'ArmnnDriverImpl.cpp')
-rw-r--r-- | ArmnnDriverImpl.cpp | 73 |
1 files changed, 48 insertions, 25 deletions
diff --git a/ArmnnDriverImpl.cpp b/ArmnnDriverImpl.cpp index 3e4aab3c..dd60cc7b 100644 --- a/ArmnnDriverImpl.cpp +++ b/ArmnnDriverImpl.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017, 2023 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // @@ -23,6 +23,7 @@ #include <ValidateHal.h> #include <log/log.h> +#include <chrono> using namespace std; using namespace android; @@ -70,6 +71,8 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl<HalPolicy>::prepareModel( { ALOGV("ArmnnDriverImpl::prepareModel()"); + std::chrono::time_point<std::chrono::system_clock> prepareModelTimepoint = std::chrono::system_clock::now(); + if (cb.get() == nullptr) { ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel"); @@ -102,13 +105,17 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl<HalPolicy>::prepareModel( // Serialize the network graph to a .armnn file if an output directory // has been specified in the drivers' arguments. + std::vector<uint8_t> dataCacheData; auto serializedNetworkFileName = - SerializeNetwork(*modelConverter.GetINetwork(), options.GetRequestInputsAndOutputsDumpDir()); + SerializeNetwork(*modelConverter.GetINetwork(), + options.GetRequestInputsAndOutputsDumpDir(), + dataCacheData, + false); // Optimize the network armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr); - armnn::OptimizerOptions OptOptions; - OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16; + armnn::OptimizerOptionsOpaque OptOptions; + OptOptions.SetReduceFp32ToFp16(float32ToFloat16); armnn::BackendOptions gpuAcc("GpuAcc", { @@ -124,8 +131,8 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl<HalPolicy>::prepareModel( { "FastMathEnabled", options.IsFastMathEnabled() }, { "NumberOfThreads", options.GetNumberOfThreads() } }); - OptOptions.m_ModelOptions.push_back(gpuAcc); - OptOptions.m_ModelOptions.push_back(cpuAcc); + OptOptions.AddModelOption(gpuAcc); + OptOptions.AddModelOption(cpuAcc); std::vector<std::string> errMessages; try @@ -163,9 +170,14 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl<HalPolicy>::prepareModel( // Load it into the runtime. armnn::NetworkId netId = 0; + std::string msg; + armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(), + armnn::MemorySource::Undefined, + armnn::MemorySource::Undefined); + try { - if (runtime->LoadNetwork(netId, move(optNet)) != armnn::Status::Success) + if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success) { return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb); } @@ -191,32 +203,43 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl<HalPolicy>::prepareModel( runtime.get(), model, options.GetRequestInputsAndOutputsDumpDir(), - options.IsGpuProfilingEnabled())); - - // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if - // this is enabled) before the first 'real' inference which removes the overhead of the first inference. - if (!preparedModel->ExecuteWithDummyInputs()) + options.IsGpuProfilingEnabled(), + options.isAsyncModelExecutionEnabled(), + options.getNoOfArmnnThreads(), + options.isImportEnabled(), + options.isExportEnabled())); + + if (std::find(options.GetBackends().begin(), + options.GetBackends().end(), + armnn::Compute::GpuAcc) != options.GetBackends().end()) { - return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb); - } - - if (clTunedParameters && - options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters) - { - // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file. - try + // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if + // this is enabled) before the first 'real' inference which removes the overhead of the first inference. + if (!preparedModel->ExecuteWithDummyInputs()) { - clTunedParameters->Save(options.GetClTunedParametersFile().c_str()); + return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb); } - catch (std::exception& error) + + if (clTunedParameters && + options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters) { - ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s", - options.GetClTunedParametersFile().c_str(), error.what()); + // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file + try + { + clTunedParameters->Save(options.GetClTunedParametersFile().c_str()); + } + catch (std::exception& error) + { + ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s", + options.GetClTunedParametersFile().c_str(), error.what()); + } } } - NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel); + ALOGV("ArmnnDriverImpl::prepareModel cache timing = %lld µs", std::chrono::duration_cast<std::chrono::microseconds> + (std::chrono::system_clock::now() - prepareModelTimepoint).count()); + return V1_0::ErrorStatus::NONE; } |