aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSadik Armagan <sadik.armagan@arm.com>2020-04-23 12:56:05 +0100
committerSadik Armagan <sadik.armagan@arm.com>2020-04-23 12:56:05 +0100
commitd7be72ee37f5f4de1d981a400bf6d79b08ac0b2a (patch)
tree68ade07ae1e7b8c59b0e8ec936dfd530deb7e056
parent51e0b138f9fd2bd5eb2f22d9e861e77d33397680 (diff)
downloadandroid-nn-driver-d7be72ee37f5f4de1d981a400bf6d79b08ac0b2a.tar.gz
IVGCVSW-4677 Fix FencedComputeTest Hal 1.3
* Implemented executeFenced() function in HAL 1.3 Driver Signed-off-by: Sadik Armagan <sadik.armagan@arm.com> Change-Id: I11f8f532e9688d4e194992b46dbed575a19be3c5
-rw-r--r--Android.mk8
-rw-r--r--ArmnnPreparedModel_1_3.cpp166
-rw-r--r--ArmnnPreparedModel_1_3.hpp13
-rw-r--r--test/Android.mk3
4 files changed, 164 insertions, 26 deletions
diff --git a/Android.mk b/Android.mk
index 5431a74c..bf8bc65b 100644
--- a/Android.mk
+++ b/Android.mk
@@ -184,6 +184,7 @@ endif # Q or later
ifeq ($(R_OR_LATER),1)
LOCAL_SHARED_LIBRARIES+= \
+ libsync \
android.hardware.neuralnetworks@1.3
endif # R or later
@@ -307,6 +308,7 @@ endif # PLATFORM_VERSION == Q
ifeq ($(R_OR_LATER),1)
LOCAL_SHARED_LIBRARIES+= \
+ libsync \
android.hardware.neuralnetworks@1.3
endif # R or later
@@ -421,6 +423,7 @@ LOCAL_SHARED_LIBRARIES := \
ifeq ($(R_OR_LATER),1)
LOCAL_SHARED_LIBRARIES+= \
+ libsync \
android.hardware.neuralnetworks@1.3
endif # R or later
@@ -527,6 +530,7 @@ LOCAL_SHARED_LIBRARIES := \
libcutils \
android.hidl.allocator@1.0 \
android.hidl.memory@1.0 \
+ libsync \
android.hardware.neuralnetworks@1.0 \
android.hardware.neuralnetworks@1.1 \
android.hardware.neuralnetworks@1.2 \
@@ -631,6 +635,7 @@ endif # PLATFORM_VERSION == Q
ifeq ($(R_OR_LATER),1)
LOCAL_SHARED_LIBRARIES+= \
+ libsync \
android.hardware.neuralnetworks@1.3
endif # R or later
@@ -729,6 +734,7 @@ endif # PLATFORM_VERSION == Q
ifeq ($(R_OR_LATER),1)
LOCAL_SHARED_LIBRARIES+= \
+ libsync \
android.hardware.neuralnetworks@1.3
endif # PLATFORM_VERSION == R
@@ -816,6 +822,7 @@ LOCAL_SHARED_LIBRARIES := \
ifeq ($(R_OR_LATER),1)
LOCAL_SHARED_LIBRARIES+= \
+ libsync \
android.hardware.neuralnetworks@1.3
endif # R or later
@@ -890,6 +897,7 @@ LOCAL_SHARED_LIBRARIES := \
libui \
libfmq \
libcutils \
+ libsync \
android.hidl.allocator@1.0 \
android.hidl.memory@1.0 \
android.hardware.neuralnetworks@1.0 \
diff --git a/ArmnnPreparedModel_1_3.cpp b/ArmnnPreparedModel_1_3.cpp
index 5b45b4a1..6c4aec9a 100644
--- a/ArmnnPreparedModel_1_3.cpp
+++ b/ArmnnPreparedModel_1_3.cpp
@@ -2,6 +2,10 @@
// Copyright © 2020 Arm Ltd. All rights reserved.
// SPDX-License-Identifier: MIT
//
+// Note: the ArmnnFencedExecutionCallback and code snippet in the executeFenced() function
+// in this file is based on Android code
+// under the Apache 2.0 license. See comments below for details.
+//
#define LOG_TAG "ArmnnDriver"
@@ -9,6 +13,7 @@
#include "Utils.hpp"
#include <Utils.h>
+#include <android/sync.h>
#include <boost/format.hpp>
#include <log/log.h>
#include <OperationsUtils.h>
@@ -254,10 +259,31 @@ Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::execute_1_3(
return Execute(request, measureTiming, cb);
}
+/// This class is inspired by the sample implementation in Android named SampleFencedExecutionCallback.
+/// The original code is licensed under Apache-2.0 and can be found at the following link:
+/// https://android.googlesource.com/platform/frameworks/ml/+/master/nn/driver/sample/SampleDriver.h
+class ArmnnFencedExecutionCallback : public V1_3::IFencedExecutionCallback
+{
+public:
+ ArmnnFencedExecutionCallback(V1_3::ErrorStatus errorStatus, Timing timing, Timing fenceTiming)
+ : m_ErrorStatus(errorStatus), m_Timing(timing), m_FenceTiming(fenceTiming) {}
+ ~ArmnnFencedExecutionCallback() {}
+
+ Return<void> getExecutionInfo(getExecutionInfo_cb callback) override
+ {
+ callback(m_ErrorStatus, m_Timing, m_FenceTiming);
+ return Void();
+ }
+private:
+ V1_3::ErrorStatus m_ErrorStatus;
+ Timing m_Timing;
+ Timing m_FenceTiming;
+};
+
template<typename HalVersion>
-Return<void> ArmnnPreparedModel_1_3<HalVersion>::executeFenced(const V1_3::Request&,
- const hidl_vec<hidl_handle>&,
- MeasureTiming,
+Return<void> ArmnnPreparedModel_1_3<HalVersion>::executeFenced(const V1_3::Request& request,
+ const hidl_vec<hidl_handle>& fenceWaitFor,
+ MeasureTiming measureTiming,
const OptionalTimePoint& deadline,
const OptionalTimeoutDuration& loopTimeoutDuration,
const OptionalTimeoutDuration&,
@@ -281,7 +307,104 @@ Return<void> ArmnnPreparedModel_1_3<HalVersion>::executeFenced(const V1_3::Reque
ALOGW("ArmnnPreparedModel_1_3::executeFenced parameter loopTimeoutDuration is set but not supported.");
}
- cb(ErrorStatus::INVALID_ARGUMENT, hidl_handle(nullptr), nullptr);
+ ExecutionContext_1_3 ctx;
+ if (measureTiming == MeasureTiming::YES)
+ {
+ ctx.measureTimings = measureTiming;
+ ctx.driverStart = Now();
+ }
+
+ ALOGV("ArmnnPreparedModel_1_3::executeFenced(): %s", GetModelSummary(m_Model).c_str());
+ m_RequestCount++;
+
+ if (!android::nn::validateRequest(request, m_Model))
+ {
+ cb(ErrorStatus::INVALID_ARGUMENT, hidl_handle(nullptr), nullptr);
+ return Void();
+ }
+
+ if (!m_RequestInputsAndOutputsDumpDir.empty())
+ {
+ ALOGD("Dumping inputs and outputs for request %" PRIuPTR, reinterpret_cast<std::uintptr_t>(&cb));
+ }
+
+ // This code snippet is inspired by the sample implementation in Android named SampleDriver::executeFenced()
+ // function. The original code is licensed under Apache-2.0 and can be found at the following link:
+ // https://android.googlesource.com/platform/frameworks/ml/+/master/nn/driver/sample/SampleDriver.cpp
+ const auto fenceSize = fenceWaitFor.size();
+ for (unsigned int index = 0; index < fenceSize; ++index)
+ {
+ auto fenceNativeHandle = fenceWaitFor[index].getNativeHandle();
+ if (!fenceNativeHandle)
+ {
+ cb(ErrorStatus::INVALID_ARGUMENT, hidl_handle(nullptr), nullptr);
+ return Void();
+ }
+
+ if (sync_wait(fenceNativeHandle->data[0], -1) < 0)
+ {
+ ALOGE("ArmnnPreparedModel_1_3::executeFenced sync fence failed.");
+ cb(ErrorStatus::GENERAL_FAILURE, hidl_handle(nullptr), nullptr);
+ return Void();
+ }
+ }
+
+ TimePoint fenceExecutionStart;
+ if (measureTiming == MeasureTiming::YES)
+ {
+ fenceExecutionStart = Now();
+ }
+
+ // map the memory pool into shared pointers
+ // use a shared memory pools vector on the heap, as it is passed to the request thread
+ auto memPools = std::make_shared<std::vector<android::nn::RunTimePoolInfo>>();
+
+ // allocate the tensors on the heap, as they are passed to the request thread
+ auto inputs = std::make_shared<armnn::InputTensors>();
+ auto outputs = std::make_shared<armnn::OutputTensors>();
+
+ auto [status, outShapes, timings, message] = PrepareMemoryForIO(*inputs, *outputs, *memPools, request);
+ if (status != V1_3::ErrorStatus::NONE)
+ {
+ cb(ErrorStatus::INVALID_ARGUMENT, hidl_handle(nullptr), nullptr);
+ return Void();
+ }
+
+ ALOGV("ArmnnPreparedModel_1_3::executeFenced(...) before ExecuteGraph");
+
+ // call it with nullCallback for now as we will report the error status from here..
+ auto nullCallback = [](V1_3::ErrorStatus, std::vector<OutputShape>, const Timing&, std::string) {};
+ CallbackContext_1_3 cbCtx;
+ cbCtx.callback = nullCallback;
+ cbCtx.ctx = ctx;
+
+ auto errorStatus = ExecuteGraph(memPools, *inputs, *outputs, cbCtx);
+ if (errorStatus != V1_3::ErrorStatus::NONE)
+ {
+ cb(errorStatus, hidl_handle(nullptr), nullptr);
+ return Void();
+ }
+ ALOGV("ArmnnPreparedModel_1_3::executeFenced(...) after ExecuteGraph");
+
+ Timing timing = g_NoTiming;
+ Timing fenceTiming = g_NoTiming;
+ if (measureTiming == MeasureTiming::YES)
+ {
+ TimePoint driverEnd = Now();
+ timing.timeOnDevice = MicrosecondsDuration(ctx.deviceEnd, ctx.deviceStart);
+ timing.timeInDriver = MicrosecondsDuration(driverEnd, ctx.driverStart);
+ ALOGV("ArmnnPreparedModel_1_2::fenceExecutionTiming - Device = %lu Driver = %lu",
+ timing.timeOnDevice, timing.timeInDriver);
+
+ fenceTiming.timeOnDevice = MicrosecondsDuration(ctx.deviceEnd, ctx.deviceStart);
+ fenceTiming.timeInDriver = MicrosecondsDuration(driverEnd, fenceExecutionStart);
+ ALOGV("ArmnnPreparedModel_1_2::fenceFinishExecutionTiming - Device = %lu Driver = %lu",
+ fenceTiming.timeOnDevice, fenceTiming.timeInDriver);
+ }
+
+ sp<ArmnnFencedExecutionCallback> armnnFencedExecutionCallback =
+ new ArmnnFencedExecutionCallback(ErrorStatus::NONE, timing, fenceTiming);
+ cb(ErrorStatus::NONE, hidl_handle(nullptr), armnnFencedExecutionCallback);
return Void();
}
@@ -540,7 +663,7 @@ Return<void> ArmnnPreparedModel_1_3<HalVersion>::configureExecutionBurst(
template<typename HalVersion>
template<typename CallbackContext>
-bool ArmnnPreparedModel_1_3<HalVersion>::ExecuteGraph(
+Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::ExecuteGraph(
std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
armnn::InputTensors& inputTensors,
armnn::OutputTensors& outputTensors,
@@ -567,34 +690,33 @@ bool ArmnnPreparedModel_1_3<HalVersion>::ExecuteGraph(
{
if (cb.ctx.measureTimings == MeasureTiming::YES)
{
- deviceStart = Now();
+ cb.ctx.deviceStart = Now();
}
armnn::Status status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
if (cb.ctx.measureTimings == MeasureTiming::YES)
{
- deviceEnd = Now();
+ cb.ctx.deviceEnd = Now();
}
if (status != armnn::Status::Success)
{
ALOGW("EnqueueWorkload failed");
- cb.callback(V1_3::ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming,
- "ArmnnPreparedModel_1_3::ExecuteGraph");
- return false;
+ cb.callback(V1_3::ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_3::ExecuteGraph");
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
catch (armnn::Exception& e)
{
ALOGW("armnn:Exception caught from EnqueueWorkload: %s", e.what());
cb.callback(V1_3::ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_3::ExecuteGraph");
- return false;
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
}
catch (std::exception& e)
{
ALOGE("std::exception caught from EnqueueWorkload: %s", e.what());
cb.callback(V1_3::ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_3::ExecuteGraph");
- return false;
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
}
CommitPools(*pMemPools);
@@ -605,16 +727,16 @@ bool ArmnnPreparedModel_1_3<HalVersion>::ExecuteGraph(
{
driverEnd = Now();
Timing timing;
- timing.timeOnDevice = MicrosecondsDuration(deviceEnd, deviceStart);
+ timing.timeOnDevice = MicrosecondsDuration(cb.ctx.deviceEnd, cb.ctx.deviceStart);
timing.timeInDriver = MicrosecondsDuration(driverEnd, cb.ctx.driverStart);
ALOGV("ArmnnPreparedModel_1_2::execute timing - Device = %lu Driver = %lu", timing.timeOnDevice,
timing.timeInDriver);
cb.callback(V1_3::ErrorStatus::NONE, outputShapes, timing, "ArmnnPreparedModel_1_3::ExecuteGraph");
- } else {
+ } else
+ {
cb.callback(V1_3::ErrorStatus::NONE, outputShapes, g_NoTiming, "ArmnnPreparedModel_1_3::ExecuteGraph");
}
-
- return true;
+ return V1_3::ErrorStatus::NONE;
}
template<typename HalVersion>
@@ -646,10 +768,12 @@ bool ArmnnPreparedModel_1_3<HalVersion>::ExecuteWithDummyInputs()
callbackContext.callback = nullCallback;
callbackContext.ctx.measureTimings = MeasureTiming::NO;
auto memPools = std::make_shared<std::vector<::android::nn::RunTimePoolInfo>>();
- return ExecuteGraph(memPools,
- inputTensors,
- outputTensors,
- callbackContext);
+
+ auto errorStatus = ExecuteGraph(memPools,
+ inputTensors,
+ outputTensors,
+ callbackContext);
+ return errorStatus == V1_3::ErrorStatus::NONE;
}
template<typename HalVersion>
@@ -716,7 +840,7 @@ Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::Execute(const V1_
#ifdef ARMNN_ANDROID_NN_V1_3
template class ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>;
-template bool ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>::ExecuteGraph<CallbackContext_1_3>(
+template Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>::ExecuteGraph<CallbackContext_1_3>(
std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
armnn::InputTensors& pInputTensors,
armnn::OutputTensors& pOutputTensors,
diff --git a/ArmnnPreparedModel_1_3.hpp b/ArmnnPreparedModel_1_3.hpp
index 47529aa8..fa674058 100644
--- a/ArmnnPreparedModel_1_3.hpp
+++ b/ArmnnPreparedModel_1_3.hpp
@@ -29,6 +29,8 @@ struct ExecutionContext_1_3
::android::hardware::neuralnetworks::V1_2::MeasureTiming measureTimings =
::android::hardware::neuralnetworks::V1_2::MeasureTiming::NO;
TimePoint driverStart;
+ TimePoint deviceStart;
+ TimePoint deviceEnd;
};
using CallbackContext_1_3 = CallbackContext<CallbackAsync_1_3, ExecutionContext_1_3>;
@@ -74,7 +76,7 @@ public:
V1_3::IPreparedModel::executeSynchronously_1_3_cb cb) override;
Return<void> executeFenced(const V1_3::Request& request,
- const android::hardware::hidl_vec<android::hardware::hidl_handle>& wait_for,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& fenceWaitFor,
MeasureTiming measure,
const V1_3::OptionalTimePoint& deadline,
const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
@@ -92,10 +94,11 @@ public:
/// execute the graph prepared from the request
template<typename CallbackContext>
- bool ExecuteGraph(std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
- armnn::InputTensors& inputTensors,
- armnn::OutputTensors& outputTensors,
- CallbackContext callback);
+ Return <V1_3::ErrorStatus> ExecuteGraph(
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+ armnn::InputTensors& inputTensors,
+ armnn::OutputTensors& outputTensors,
+ CallbackContext callback);
/// Executes this model with dummy inputs (e.g. all zeroes).
/// \return false on failure, otherwise true
diff --git a/test/Android.mk b/test/Android.mk
index 17def765..af04c838 100644
--- a/test/Android.mk
+++ b/test/Android.mk
@@ -109,6 +109,7 @@ endif # PLATFORM_VERSION == Q
ifeq ($(R_OR_LATER),1)
LOCAL_SHARED_LIBRARIES+= \
+ libsync \
android.hardware.neuralnetworks@1.3
endif # R or later
@@ -218,6 +219,7 @@ endif # PLATFORM_VERSION == Q
ifeq ($(R_OR_LATER),1)
LOCAL_SHARED_LIBRARIES+= \
+ libsync \
android.hardware.neuralnetworks@1.3
endif # R or later
@@ -320,6 +322,7 @@ LOCAL_SHARED_LIBRARIES := \
ifeq ($(R_OR_LATER),1)
LOCAL_SHARED_LIBRARIES+= \
+ libsync \
android.hardware.neuralnetworks@1.3
endif # R or later