aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Kelly <mike.kelly@arm.com>2019-07-22 14:06:00 +0100
committerMike Kelly <mike.kelly@arm.com>2019-07-22 14:06:00 +0100
commit65c42dc4d68ac163b77a3139feee3e7d4530b5c5 (patch)
treeb3ae65334c2dbfc1cc3d5a2cb336a0244b1ececc
parent9843c014726028b9082d5a9901db80b9dc519121 (diff)
downloadandroid-nn-driver-65c42dc4d68ac163b77a3139feee3e7d4530b5c5.tar.gz
IVGCVSW-3463 Fix Hal 1.2 Dynamic Output Shape VTS test failures
*Updating ArmnnPreparedModel_1_2 to work with output shapes and timing. Change-Id: I06c4ecaf1e2c36ef77a0731ece4885fc3997cd3b Signed-off-by: Sadik Armagan <sadik.armagan@arm.com> Signed-off-by: Mike Kelly <mike.kelly@arm.com>
-rw-r--r--ArmnnPreparedModel.cpp22
-rw-r--r--ArmnnPreparedModel.hpp22
-rw-r--r--ArmnnPreparedModel_1_2.cpp272
-rw-r--r--ArmnnPreparedModel_1_2.hpp35
-rw-r--r--RequestThread.cpp34
-rw-r--r--RequestThread.hpp12
6 files changed, 313 insertions, 84 deletions
diff --git a/ArmnnPreparedModel.cpp b/ArmnnPreparedModel.cpp
index 3256836e..462970aa 100644
--- a/ArmnnPreparedModel.cpp
+++ b/ArmnnPreparedModel.cpp
@@ -87,9 +87,8 @@ using namespace android::hardware;
namespace armnn_driver
{
-
template<typename HalVersion>
-RequestThread<ArmnnPreparedModel, HalVersion> ArmnnPreparedModel<HalVersion>::m_RequestThread;
+RequestThread<ArmnnPreparedModel, HalVersion, ArmnnCallback_1_0> ArmnnPreparedModel<HalVersion>::m_RequestThread;
template<typename HalVersion>
template <typename TensorBindingCollection>
@@ -218,10 +217,17 @@ Return<ErrorStatus> ArmnnPreparedModel<HalVersion>::execute(const Request& reque
}
ALOGV("ArmnnPreparedModel::execute(...) before PostMsg");
+
+ auto cb = [callback](ErrorStatus errorStatus, std::string callingFunction)
+ {
+ NotifyCallbackAndCheck(callback, errorStatus, callingFunction);
+ };
+
+ ArmnnCallback_1_0 armnnCb;
+ armnnCb.callback = cb;
// post the request for asynchronous execution
- m_RequestThread.PostMsg(this, pMemPools, pInputTensors, pOutputTensors, callback);
+ m_RequestThread.PostMsg(this, pMemPools, pInputTensors, pOutputTensors, armnnCb);
ALOGV("ArmnnPreparedModel::execute(...) after PostMsg");
-
return ErrorStatus::NONE; // successfully queued
}
@@ -230,7 +236,7 @@ void ArmnnPreparedModel<HalVersion>::ExecuteGraph(
std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
std::shared_ptr<armnn::InputTensors>& pInputTensors,
std::shared_ptr<armnn::OutputTensors>& pOutputTensors,
- const ::android::sp<V1_0::IExecutionCallback>& callback)
+ ArmnnCallback_1_0 cb)
{
ALOGV("ArmnnPreparedModel::ExecuteGraph(...)");
@@ -243,14 +249,14 @@ void ArmnnPreparedModel<HalVersion>::ExecuteGraph(
if (status != armnn::Status::Success)
{
ALOGW("EnqueueWorkload failed");
- NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::ExecuteGraph");
+ cb.callback(ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::ExecuteGraph");
return;
}
}
catch (armnn::Exception& e)
{
ALOGW("armnn::Exception caught from EnqueueWorkload: %s", e.what());
- NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::ExecuteGraph");
+ cb.callback(ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::ExecuteGraph");
return;
}
@@ -264,7 +270,7 @@ void ArmnnPreparedModel<HalVersion>::ExecuteGraph(
pool.update();
}
- NotifyCallbackAndCheck(callback, ErrorStatus::NONE, "ExecuteGraph");
+ cb.callback(ErrorStatus::NONE, "ExecuteGraph");
}
template<typename HalVersion>
diff --git a/ArmnnPreparedModel.hpp b/ArmnnPreparedModel.hpp
index 275af316..33be972f 100644
--- a/ArmnnPreparedModel.hpp
+++ b/ArmnnPreparedModel.hpp
@@ -17,6 +17,12 @@
namespace armnn_driver
{
+using armnnExecuteCallback_1_0 = std::function<void(V1_0::ErrorStatus status, std::string callingFunction)>;
+
+struct ArmnnCallback_1_0
+{
+ armnnExecuteCallback_1_0 callback;
+};
template <typename HalVersion>
class ArmnnPreparedModel : public V1_0::IPreparedModel
@@ -39,7 +45,7 @@ public:
void ExecuteGraph(std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
std::shared_ptr<armnn::InputTensors>& pInputTensors,
std::shared_ptr<armnn::OutputTensors>& pOutputTensors,
- const ::android::sp<V1_0::IExecutionCallback>& callback);
+ ArmnnCallback_1_0 callback);
/// Executes this model with dummy inputs (e.g. all zeroes).
/// \return false on failure, otherwise true
@@ -49,15 +55,15 @@ private:
template <typename TensorBindingCollection>
void DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings);
- armnn::NetworkId m_NetworkId;
- armnn::IRuntime* m_Runtime;
- HalModel m_Model;
+ armnn::NetworkId m_NetworkId;
+ armnn::IRuntime* m_Runtime;
+ HalModel m_Model;
// There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads
// It is specific to this class, so it is declared as static here
- static RequestThread<ArmnnPreparedModel, HalVersion> m_RequestThread;
- uint32_t m_RequestCount;
- const std::string& m_RequestInputsAndOutputsDumpDir;
- const bool m_GpuProfilingEnabled;
+ static RequestThread<ArmnnPreparedModel, HalVersion, ArmnnCallback_1_0> m_RequestThread;
+ uint32_t m_RequestCount;
+ const std::string& m_RequestInputsAndOutputsDumpDir;
+ const bool m_GpuProfilingEnabled;
};
}
diff --git a/ArmnnPreparedModel_1_2.cpp b/ArmnnPreparedModel_1_2.cpp
index f6b44621..a7997c72 100644
--- a/ArmnnPreparedModel_1_2.cpp
+++ b/ArmnnPreparedModel_1_2.cpp
@@ -37,7 +37,10 @@ unsigned long MicrosecondsDuration(TimePoint endPoint, TimePoint startPoint)
endPoint - startPoint).count());
}
-void NotifyCallbackAndCheck(const ::android::sp<V1_0::IExecutionCallback>& callback, ErrorStatus errorStatus,
+void NotifyCallbackAndCheck(const ::android::sp<V1_0::IExecutionCallback>& callback,
+ ErrorStatus errorStatus,
+ std::vector<OutputShape>,
+ const Timing,
std::string callingFunction)
{
Return<void> returned = callback->notify(errorStatus);
@@ -49,10 +52,13 @@ void NotifyCallbackAndCheck(const ::android::sp<V1_0::IExecutionCallback>& callb
}
}
-void NotifyCallbackAndCheck(const ::android::sp<V1_2::IExecutionCallback>& callback, ErrorStatus errorStatus,
+void NotifyCallbackAndCheck(const ::android::sp<V1_2::IExecutionCallback>& callback,
+ ErrorStatus errorStatus,
+ std::vector<OutputShape> outputShapes,
+ const Timing timing,
std::string callingFunction)
{
- Return<void> returned = callback->notify(errorStatus);
+ Return<void> returned = callback->notify_1_2(errorStatus, outputShapes, timing);
// This check is required, if the callback fails and it isn't checked it will bring down the service
if (!returned.isOk())
{
@@ -111,7 +117,8 @@ namespace armnn_driver
{
template<typename HalVersion>
-RequestThread<ArmnnPreparedModel_1_2, HalVersion> ArmnnPreparedModel_1_2<HalVersion>::m_RequestThread;
+RequestThread<ArmnnPreparedModel_1_2, HalVersion, ArmnnCallback_1_2>
+ ArmnnPreparedModel_1_2<HalVersion>::m_RequestThread;
template<typename HalVersion>
template<typename TensorBindingCollection>
@@ -165,15 +172,43 @@ template<typename HalVersion>
Return <ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::execute(const Request& request,
const ::android::sp<V1_0::IExecutionCallback>& callback)
{
- return Execute<V1_0::IExecutionCallback>(request, callback);
+ if (callback.get() == nullptr)
+ {
+ ALOGE("ArmnnPreparedModel_1_2::execute invalid callback passed");
+ return ErrorStatus::INVALID_ARGUMENT;
+ }
+
+ auto cb = [callback](ErrorStatus errorStatus,
+ std::vector<OutputShape> outputShapes,
+ const Timing& timing,
+ std::string callingFunction)
+ {
+ NotifyCallbackAndCheck(callback, errorStatus, outputShapes, timing, callingFunction);
+ };
+
+ return Execute(request, MeasureTiming::NO, cb);
}
template<typename HalVersion>
Return <ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::execute_1_2(const Request& request,
- MeasureTiming,
+ MeasureTiming measureTiming,
const sp<V1_2::IExecutionCallback>& callback)
{
- return Execute<V1_2::IExecutionCallback>(request, callback);
+ if (callback.get() == nullptr)
+ {
+ ALOGE("ArmnnPreparedModel_1_2::execute_1_2 invalid callback passed");
+ return ErrorStatus::INVALID_ARGUMENT;
+ }
+
+ auto cb = [callback](ErrorStatus errorStatus,
+ std::vector<OutputShape> outputShapes,
+ const Timing& timing,
+ std::string callingFunction)
+ {
+ NotifyCallbackAndCheck(callback, errorStatus, outputShapes, timing, callingFunction);
+ };
+
+ return Execute(request, measureTiming, cb);
}
template<typename HalVersion>
@@ -217,8 +252,8 @@ Return<void> ArmnnPreparedModel_1_2<HalVersion>::executeSynchronously(const Requ
cb(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming);
return Void();
}
+ std::vector<OutputShape> outputShapes(request.outputs.size());
- // add the inputs and outputs with their data
try
{
pInputTensors->reserve(request.inputs.size());
@@ -238,8 +273,8 @@ Return<void> ArmnnPreparedModel_1_2<HalVersion>::executeSynchronously(const Requ
pInputTensors->emplace_back(i, inputTensor);
}
-
pOutputTensors->reserve(request.outputs.size());
+
for (unsigned int i = 0; i < request.outputs.size(); i++)
{
const auto& outputArg = request.outputs[i];
@@ -253,6 +288,28 @@ Return<void> ArmnnPreparedModel_1_2<HalVersion>::executeSynchronously(const Requ
cb(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming);
return Void();
}
+ const size_t outputSize = outputTensorInfo.GetNumBytes();
+ const size_t bufferSize = pMemPools->at(outputArg.location.poolIndex).getHidlMemory().size();
+
+ hidl_vec<uint32_t> dimensions;
+
+ armnn::TensorShape tensorShape = outputTensorInfo.GetShape();
+ const unsigned int numDims = tensorShape.GetNumDimensions();
+ dimensions.resize(numDims);
+
+ for (unsigned int outputIdx = 0u; outputIdx < numDims; ++outputIdx)
+ {
+ dimensions[outputIdx] = tensorShape[outputIdx];
+ }
+ outputShapes[i].dimensions = dimensions;
+ outputShapes[i].isSufficient = bufferSize >= outputSize;
+
+ if (bufferSize < outputSize)
+ {
+ ALOGW("ArmnnPreparedModel_1_2::Execute failed");
+ cb(ErrorStatus::OUTPUT_INSUFFICIENT_SIZE, outputShapes, g_NoTiming);
+ return Void();
+ }
pOutputTensors->emplace_back(i, outputTensor);
}
@@ -314,15 +371,82 @@ Return<void> ArmnnPreparedModel_1_2<HalVersion>::executeSynchronously(const Requ
timing.timeInDriver = MicrosecondsDuration(driverEnd, driverStart);
ALOGV("ArmnnPreparedModel_1_2::executeSynchronously timing Device = %lu Driver = %lu", timing.timeOnDevice,
timing.timeInDriver);
- cb(ErrorStatus::NONE, {}, timing);
+ cb(ErrorStatus::NONE, outputShapes, timing);
}
else
{
- cb(ErrorStatus::NONE, {}, g_NoTiming);
+ cb(ErrorStatus::NONE, outputShapes, g_NoTiming);
}
return Void();
}
+class ArmnnBurstExecutorWithCache : public ExecutionBurstServer::IBurstExecutorWithCache {
+public:
+ ArmnnBurstExecutorWithCache(IPreparedModel* preparedModel)
+ : m_PreparedModel(preparedModel)
+ {}
+
+ bool isCacheEntryPresent(int32_t slot) const override
+ {
+ const auto it = m_MemoryCache.find(slot);
+ return (it != m_MemoryCache.end()) && it->second.valid();
+ }
+
+ void addCacheEntry(const hidl_memory& memory, int32_t slot) override
+ {
+ m_MemoryCache[slot] = memory;
+ }
+
+ void removeCacheEntry(int32_t slot) override
+ {
+ m_MemoryCache.erase(slot);
+ }
+
+ std::tuple<ErrorStatus, hidl_vec<OutputShape>, Timing> execute(
+ const Request& request, const std::vector<int32_t>& slots,
+ MeasureTiming measure) override
+ {
+ ALOGV("ArmnnPreparedModel_1_2::BurstExecutorWithCache::execute");
+ hidl_vec<hidl_memory> pools(slots.size());
+
+ std::transform(slots.begin(), slots.end(), pools.begin(), [this](int32_t slot)
+ {
+ return m_MemoryCache[slot];
+ });
+
+ Request fullRequest = request;
+ fullRequest.pools = std::move(pools);
+
+ // Setup Callback
+ ErrorStatus returnedStatus = ErrorStatus::GENERAL_FAILURE;
+ hidl_vec<OutputShape> returnedOutputShapes;
+ Timing returnedTiming;
+ auto cb = [&returnedStatus, &returnedOutputShapes, &returnedTiming](ErrorStatus status,
+ const hidl_vec<OutputShape>& outputShapes,
+ const Timing& timing)
+ {
+ returnedStatus = status;
+ returnedOutputShapes = outputShapes;
+ returnedTiming = timing;
+ };
+
+ // Execute
+ ALOGV("ArmnnPreparedModel_1_2::BurstExecutorWithCache executing");
+ const Return<void> ret = m_PreparedModel->executeSynchronously(fullRequest, measure, cb);
+
+ if (!ret.isOk() || returnedStatus != ErrorStatus::NONE)
+ {
+ ALOGE("ArmnnPreparedModel_1_2::BurstExecutorWithCache::error executing");
+ }
+ return std::make_tuple(returnedStatus, std::move(returnedOutputShapes), returnedTiming);
+ }
+
+private:
+ IPreparedModel* const m_PreparedModel;
+ std::map<int, hidl_memory> m_MemoryCache;
+};
+
+
template<typename HalVersion>
Return<void> ArmnnPreparedModel_1_2<HalVersion>::configureExecutionBurst(
const sp<V1_2::IBurstCallback>& callback,
@@ -331,7 +455,12 @@ Return<void> ArmnnPreparedModel_1_2<HalVersion>::configureExecutionBurst(
V1_2::IPreparedModel::configureExecutionBurst_cb cb)
{
ALOGV("ArmnnPreparedModel_1_2::configureExecutionBurst");
- const sp<V1_2::IBurstContext> burst = ExecutionBurstServer::create(callback, requestChannel, resultChannel, this);
+ const std::shared_ptr<ArmnnBurstExecutorWithCache> executorWithCache =
+ std::make_shared<ArmnnBurstExecutorWithCache>(this);
+ const sp<V1_2::IBurstContext> burst = ExecutionBurstServer::create(callback,
+ requestChannel,
+ resultChannel,
+ executorWithCache);
if (burst == nullptr)
{
@@ -349,27 +478,64 @@ void ArmnnPreparedModel_1_2<HalVersion>::ExecuteGraph(
std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
std::shared_ptr<armnn::InputTensors>& pInputTensors,
std::shared_ptr<armnn::OutputTensors>& pOutputTensors,
- const ::android::sp<V1_0::IExecutionCallback>& callback)
+ ArmnnCallback_1_2 cb)
{
ALOGV("ArmnnPreparedModel_1_2::ExecuteGraph(...)");
+ TimePoint driverEnd, deviceStart, deviceEnd;
+
DumpTensorsIfRequired("Input", *pInputTensors);
+ std::vector<std::pair<int, armnn::Tensor> > outputTensors = *pOutputTensors.get();
+ std::vector<OutputShape> outputShapes(outputTensors.size());
+
+ for (unsigned int i = 0; i < outputTensors.size(); i++)
+ {
+ std::pair<int, armnn::Tensor> outputTensorPair = outputTensors[i];
+ const armnn::Tensor outputTensor = outputTensorPair.second;
+ const armnn::TensorInfo outputTensorInfo = outputTensor.GetInfo();
+
+ hidl_vec<uint32_t> dimensions;
+
+ armnn::TensorShape tensorShape = outputTensorInfo.GetShape();
+ const unsigned int numDims = tensorShape.GetNumDimensions();
+ dimensions.resize(numDims);
+
+ for (unsigned int outputIdx = 0u; outputIdx < numDims; ++outputIdx)
+ {
+ dimensions[outputIdx] = tensorShape[outputIdx];
+ }
+ outputShapes[i].dimensions = dimensions;
+ outputShapes[i].isSufficient = true;
+ }
+
// run it
try
{
+ if (cb.measureTiming == MeasureTiming::YES)
+ {
+ deviceStart = Now();
+ }
+
armnn::Status status = m_Runtime->EnqueueWorkload(m_NetworkId, *pInputTensors, *pOutputTensors);
+
+ if (cb.measureTiming == MeasureTiming::YES)
+ {
+ deviceEnd = Now();
+ }
if (status != armnn::Status::Success)
{
ALOGW("EnqueueWorkload failed");
- NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel_1_2::ExecuteGraph");
+ cb.callback(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming,
+ "ArmnnPreparedModel_1_2::ExecuteGraph");
return;
}
}
catch (armnn::Exception& e)
{
ALOGW("armnn::Exception caught from EnqueueWorkload: %s", e.what());
- NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel_1_2::ExecuteGraph");
+ cb.callback(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming,
+ "ArmnnPreparedModel_1_2::ExecuteGraph");
return;
}
@@ -383,7 +549,16 @@ void ArmnnPreparedModel_1_2<HalVersion>::ExecuteGraph(
pool.update();
}
- NotifyCallbackAndCheck(callback, ErrorStatus::NONE, "ExecuteGraph");
+ if (cb.measureTiming == MeasureTiming::YES)
+ {
+ driverEnd = Now();
+ Timing timing;
+ timing.timeOnDevice = MicrosecondsDuration(deviceEnd, deviceStart);
+ timing.timeInDriver = MicrosecondsDuration(driverEnd, cb.driverStart);
+ cb.callback(ErrorStatus::NONE, outputShapes, timing, "ExecuteGraph");
+ } else {
+ cb.callback(ErrorStatus::NONE, outputShapes, g_NoTiming, "ExecuteGraph");
+ }
}
template<typename HalVersion>
@@ -428,28 +603,29 @@ bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteWithDummyInputs()
}
template<typename HalVersion>
-template<typename ExecutionCallback>
Return <ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::Execute(const Request& request,
- const sp<ExecutionCallback>& callback)
+ MeasureTiming measureTiming,
+ armnnExecuteCallback_1_2 callback)
{
- ALOGV("ArmnnPreparedModel_1_2::execute(): %s", GetModelSummary(m_Model).c_str());
- m_RequestCount++;
+ TimePoint driverStart;
- if (callback.get() == nullptr)
+ if (measureTiming == MeasureTiming::YES)
{
- ALOGE("ArmnnPreparedModel_1_2::execute invalid callback passed");
- return ErrorStatus::INVALID_ARGUMENT;
+ driverStart = Now();
}
+ ALOGV("ArmnnPreparedModel_1_2::execute(): %s", GetModelSummary(m_Model).c_str());
+ m_RequestCount++;
+
if (!android::nn::validateRequest(request, m_Model))
{
- NotifyCallbackAndCheck(callback, ErrorStatus::INVALID_ARGUMENT, "ArmnnPreparedModel_1_2::execute");
+ callback(ErrorStatus::INVALID_ARGUMENT, {}, g_NoTiming, "ArmnnPreparedModel_1_2::execute");
return ErrorStatus::INVALID_ARGUMENT;
}
if (!m_RequestInputsAndOutputsDumpDir.empty())
{
- ALOGD("Dumping inputs and outputs for request %" PRIuPTR, reinterpret_cast<std::uintptr_t>(callback.get()));
+ ALOGD("Dumping inputs and outputs for request %" PRIuPTR, reinterpret_cast<std::uintptr_t>(&callback));
}
// allocate the tensors on the heap, as they are passed to the request thread
@@ -462,7 +638,7 @@ Return <ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::Execute(const Request&
if (!setRunTimePoolInfosFromHidlMemories(pMemPools.get(), request.pools))
{
- NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel_1_2::execute");
+ callback(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_2::execute");
return ErrorStatus::GENERAL_FAILURE;
}
@@ -480,8 +656,7 @@ Return <ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::Execute(const Request&
if (inputTensor.GetMemoryArea() == nullptr)
{
ALOGE("Cannot execute request. Error converting request input %u to tensor", i);
- NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE,
- "ArmnnPreparedModel_1_2::execute");
+ callback(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_2::execute");
return ErrorStatus::GENERAL_FAILURE;
}
@@ -489,6 +664,8 @@ Return <ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::Execute(const Request&
}
pOutputTensors->reserve(request.outputs.size());
+ std::vector<OutputShape> outputShapes(request.outputs.size());
+
for (unsigned int i = 0; i < request.outputs.size(); i++)
{
const auto& outputArg = request.outputs[i];
@@ -496,33 +673,58 @@ Return <ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::Execute(const Request&
const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
const armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, *pMemPools);
if (outputTensor.GetMemoryArea() == nullptr)
-
{
ALOGE("Cannot execute request. Error converting request output %u to tensor", i);
- NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE,
- "ArmnnPreparedModel_1_2::execute");
+ callback(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_2::execute");
return ErrorStatus::GENERAL_FAILURE;
}
+ const size_t outputSize = outputTensorInfo.GetNumBytes();
+ const size_t bufferSize = pMemPools->at(outputArg.location.poolIndex).getHidlMemory().size();
pOutputTensors->emplace_back(i, outputTensor);
+
+ hidl_vec<uint32_t> dimensions;
+
+ armnn::TensorShape tensorShape = outputTensorInfo.GetShape();
+ const unsigned int numDims = tensorShape.GetNumDimensions();
+ dimensions.resize(numDims);
+
+ for (unsigned int outputIdx = 0u; outputIdx < numDims; ++outputIdx)
+ {
+ dimensions[outputIdx] = tensorShape[outputIdx];
+ }
+ outputShapes[i].dimensions = dimensions;
+ outputShapes[i].isSufficient = bufferSize >= outputSize;
+
+ if (bufferSize < outputSize)
+ {
+ ALOGW("ArmnnPreparedModel_1_2::Execute failed");
+ callback(ErrorStatus::OUTPUT_INSUFFICIENT_SIZE,
+ outputShapes,
+ g_NoTiming,
+ "ArmnnPreparedModel_1_2::Execute");
+ return ErrorStatus::NONE;
+ }
}
}
catch (armnn::Exception& e)
{
ALOGW("armnn::Exception caught while preparing for EnqueueWorkload: %s", e.what());
- NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel_1_2::execute");
+ callback(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_2::execute");
return ErrorStatus::GENERAL_FAILURE;
}
ALOGV("ArmnnPreparedModel_1_2::execute(...) before PostMsg");
// post the request for asynchronous execution
- m_RequestThread.PostMsg(this, pMemPools, pInputTensors, pOutputTensors, callback);
+ ArmnnCallback_1_2 armnnCb;
+ armnnCb.callback = callback;
+ armnnCb.measureTiming = measureTiming;
+ armnnCb.driverStart = driverStart;
+ m_RequestThread.PostMsg(this, pMemPools, pInputTensors, pOutputTensors, armnnCb);
ALOGV("ArmnnPreparedModel_1_2::execute(...) after PostMsg");
-
return ErrorStatus::NONE;
}
-
#ifdef ARMNN_ANDROID_NN_V1_2
template class ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>;
#endif
diff --git a/ArmnnPreparedModel_1_2.hpp b/ArmnnPreparedModel_1_2.hpp
index 4e883b6b..b97895e8 100644
--- a/ArmnnPreparedModel_1_2.hpp
+++ b/ArmnnPreparedModel_1_2.hpp
@@ -19,6 +19,18 @@
namespace armnn_driver
{
+typedef std::function<void(::android::hardware::neuralnetworks::V1_0::ErrorStatus status,
+ std::vector<::android::hardware::neuralnetworks::V1_2::OutputShape> outputShapes,
+ const ::android::hardware::neuralnetworks::V1_2::Timing& timing,
+ std::string callingFunction)> armnnExecuteCallback_1_2;
+
+struct ArmnnCallback_1_2
+{
+ armnnExecuteCallback_1_2 callback;
+ TimePoint driverStart;
+ MeasureTiming measureTiming;
+};
+
template <typename HalVersion>
class ArmnnPreparedModel_1_2 : public V1_2::IPreparedModel
{
@@ -34,7 +46,7 @@ public:
virtual ~ArmnnPreparedModel_1_2();
virtual Return<ErrorStatus> execute(const Request& request,
- const ::android::sp<V1_0::IExecutionCallback>& callback) override;
+ const sp<V1_0::IExecutionCallback>& callback) override;
virtual Return<ErrorStatus> execute_1_2(const Request& request, MeasureTiming measure,
const sp<V1_2::IExecutionCallback>& callback) override;
@@ -53,28 +65,29 @@ public:
void ExecuteGraph(std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
std::shared_ptr<armnn::InputTensors>& pInputTensors,
std::shared_ptr<armnn::OutputTensors>& pOutputTensors,
- const ::android::sp<V1_0::IExecutionCallback>& callback);
+ ArmnnCallback_1_2 callbackDescriptor);
/// Executes this model with dummy inputs (e.g. all zeroes).
/// \return false on failure, otherwise true
bool ExecuteWithDummyInputs();
private:
- template <typename ExecutionCallback>
- Return <ErrorStatus> Execute(const Request &request, const sp <ExecutionCallback> &callback);
+ Return <ErrorStatus> Execute(const Request& request,
+ MeasureTiming measureTiming,
+ armnnExecuteCallback_1_2 callback);
template <typename TensorBindingCollection>
void DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings);
- armnn::NetworkId m_NetworkId;
- armnn::IRuntime* m_Runtime;
- V1_2::Model m_Model;
+ armnn::NetworkId m_NetworkId;
+ armnn::IRuntime* m_Runtime;
+ V1_2::Model m_Model;
// There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads
// It is specific to this class, so it is declared as static here
- static RequestThread<ArmnnPreparedModel_1_2, HalVersion> m_RequestThread;
- uint32_t m_RequestCount;
- const std::string& m_RequestInputsAndOutputsDumpDir;
- const bool m_GpuProfilingEnabled;
+ static RequestThread<ArmnnPreparedModel_1_2, HalVersion, ArmnnCallback_1_2> m_RequestThread;
+ uint32_t m_RequestCount;
+ const std::string& m_RequestInputsAndOutputsDumpDir;
+ const bool m_GpuProfilingEnabled;
};
}
diff --git a/RequestThread.cpp b/RequestThread.cpp
index 4b646034..052c5c11 100644
--- a/RequestThread.cpp
+++ b/RequestThread.cpp
@@ -21,15 +21,15 @@ using namespace android;
namespace armnn_driver
{
-template <template <typename HalVersion> class PreparedModel, typename HalVersion>
-RequestThread<PreparedModel, HalVersion>::RequestThread()
+template <template <typename HalVersion> class PreparedModel, typename HalVersion, typename Callback>
+RequestThread<PreparedModel, HalVersion, Callback>::RequestThread()
{
ALOGV("RequestThread::RequestThread()");
m_Thread = std::make_unique<std::thread>(&RequestThread::Process, this);
}
-template <template <typename HalVersion> class PreparedModel, typename HalVersion>
-RequestThread<PreparedModel, HalVersion>::~RequestThread()
+template <template <typename HalVersion> class PreparedModel, typename HalVersion, typename Callback>
+RequestThread<PreparedModel, HalVersion, Callback>::~RequestThread()
{
ALOGV("RequestThread::~RequestThread()");
@@ -54,12 +54,12 @@ RequestThread<PreparedModel, HalVersion>::~RequestThread()
catch (const std::exception&) { } // Swallow any exception.
}
-template <template <typename HalVersion> class PreparedModel, typename HalVersion>
-void RequestThread<PreparedModel, HalVersion>::PostMsg(PreparedModel<HalVersion>* model,
+template <template <typename HalVersion> class PreparedModel, typename HalVersion, typename Callback>
+void RequestThread<PreparedModel, HalVersion, Callback>::PostMsg(PreparedModel<HalVersion>* model,
std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& memPools,
std::shared_ptr<armnn::InputTensors>& inputTensors,
std::shared_ptr<armnn::OutputTensors>& outputTensors,
- const ::android::sp<V1_0::IExecutionCallback>& callback)
+ Callback callback)
{
ALOGV("RequestThread::PostMsg(...)");
auto data = std::make_shared<AsyncExecuteData>(model,
@@ -71,8 +71,8 @@ void RequestThread<PreparedModel, HalVersion>::PostMsg(PreparedModel<HalVersion>
PostMsg(pMsg);
}
-template <template <typename HalVersion> class PreparedModel, typename HalVersion>
-void RequestThread<PreparedModel, HalVersion>::PostMsg(std::shared_ptr<ThreadMsg>& pMsg)
+template <template <typename HalVersion> class PreparedModel, typename HalVersion, typename Callback>
+void RequestThread<PreparedModel, HalVersion, Callback>::PostMsg(std::shared_ptr<ThreadMsg>& pMsg)
{
ALOGV("RequestThread::PostMsg(pMsg)");
// Add a message to the queue and notify the request thread
@@ -81,8 +81,8 @@ void RequestThread<PreparedModel, HalVersion>::PostMsg(std::shared_ptr<ThreadMsg
m_Cv.notify_one();
}
-template <template <typename HalVersion> class PreparedModel, typename HalVersion>
-void RequestThread<PreparedModel, HalVersion>::Process()
+template <template <typename HalVersion> class PreparedModel, typename HalVersion, typename Callback>
+void RequestThread<PreparedModel, HalVersion, Callback>::Process()
{
ALOGV("RequestThread::Process()");
while (true)
@@ -111,7 +111,7 @@ void RequestThread<PreparedModel, HalVersion>::Process()
model->ExecuteGraph(pMsg->data->m_MemPools,
pMsg->data->m_InputTensors,
pMsg->data->m_OutputTensors,
- pMsg->data->m_callback);
+ pMsg->data->m_Callback);
break;
}
@@ -139,16 +139,16 @@ void RequestThread<PreparedModel, HalVersion>::Process()
/// Class template specializations
///
-template class RequestThread<ArmnnPreparedModel, hal_1_0::HalPolicy>;
+template class RequestThread<ArmnnPreparedModel, hal_1_0::HalPolicy, ArmnnCallback_1_0>;
#ifdef ARMNN_ANDROID_NN_V1_1
-template class RequestThread<armnn_driver::ArmnnPreparedModel, hal_1_1::HalPolicy>;
+template class RequestThread<armnn_driver::ArmnnPreparedModel, hal_1_1::HalPolicy, ArmnnCallback_1_0>;
#endif
#ifdef ARMNN_ANDROID_NN_V1_2
-template class RequestThread<ArmnnPreparedModel, hal_1_1::HalPolicy>;
-template class RequestThread<ArmnnPreparedModel, hal_1_2::HalPolicy>;
-template class RequestThread<ArmnnPreparedModel_1_2, hal_1_2::HalPolicy>;
+template class RequestThread<ArmnnPreparedModel, hal_1_1::HalPolicy, ArmnnCallback_1_0>;
+template class RequestThread<ArmnnPreparedModel, hal_1_2::HalPolicy, ArmnnCallback_1_0>;
+template class RequestThread<ArmnnPreparedModel_1_2, hal_1_2::HalPolicy, ArmnnCallback_1_2>;
#endif
} // namespace armnn_driver
diff --git a/RequestThread.hpp b/RequestThread.hpp
index dc1b535a..253d104c 100644
--- a/RequestThread.hpp
+++ b/RequestThread.hpp
@@ -18,8 +18,10 @@
namespace armnn_driver
{
+using TimePoint = std::chrono::steady_clock::time_point;
+static const TimePoint g_Min = std::chrono::steady_clock::time_point::min();
-template<template <typename HalVersion> class PreparedModel, typename HalVersion>
+template<template <typename HalVersion> class PreparedModel, typename HalVersion, typename Callback>
class RequestThread
{
public:
@@ -39,7 +41,7 @@ public:
std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& memPools,
std::shared_ptr<armnn::InputTensors>& inputTensors,
std::shared_ptr<armnn::OutputTensors>& outputTensors,
- const ::android::sp<V1_0::IExecutionCallback>& callback);
+ Callback callback);
private:
RequestThread(const RequestThread&) = delete;
@@ -52,12 +54,12 @@ private:
std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& memPools,
std::shared_ptr<armnn::InputTensors>& inputTensors,
std::shared_ptr<armnn::OutputTensors>& outputTensors,
- const ::android::sp<V1_0::IExecutionCallback>& cb)
+ Callback callback)
: m_Model(model)
, m_MemPools(memPools)
, m_InputTensors(inputTensors)
, m_OutputTensors(outputTensors)
- , m_callback(cb)
+ , m_Callback(callback)
{
}
@@ -65,7 +67,7 @@ private:
std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>> m_MemPools;
std::shared_ptr<armnn::InputTensors> m_InputTensors;
std::shared_ptr<armnn::OutputTensors> m_OutputTensors;
- const ::android::sp<V1_0::IExecutionCallback> m_callback;
+ Callback m_Callback;
};
enum class ThreadMsgType