From d7be72ee37f5f4de1d981a400bf6d79b08ac0b2a Mon Sep 17 00:00:00 2001 From: Sadik Armagan Date: Thu, 23 Apr 2020 12:56:05 +0100 Subject: IVGCVSW-4677 Fix FencedComputeTest Hal 1.3 * Implemented executeFenced() function in HAL 1.3 Driver Signed-off-by: Sadik Armagan Change-Id: I11f8f532e9688d4e194992b46dbed575a19be3c5 --- Android.mk | 8 +++ ArmnnPreparedModel_1_3.cpp | 166 +++++++++++++++++++++++++++++++++++++++------ ArmnnPreparedModel_1_3.hpp | 13 ++-- test/Android.mk | 3 + 4 files changed, 164 insertions(+), 26 deletions(-) diff --git a/Android.mk b/Android.mk index 5431a74c..bf8bc65b 100644 --- a/Android.mk +++ b/Android.mk @@ -184,6 +184,7 @@ endif # Q or later ifeq ($(R_OR_LATER),1) LOCAL_SHARED_LIBRARIES+= \ + libsync \ android.hardware.neuralnetworks@1.3 endif # R or later @@ -307,6 +308,7 @@ endif # PLATFORM_VERSION == Q ifeq ($(R_OR_LATER),1) LOCAL_SHARED_LIBRARIES+= \ + libsync \ android.hardware.neuralnetworks@1.3 endif # R or later @@ -421,6 +423,7 @@ LOCAL_SHARED_LIBRARIES := \ ifeq ($(R_OR_LATER),1) LOCAL_SHARED_LIBRARIES+= \ + libsync \ android.hardware.neuralnetworks@1.3 endif # R or later @@ -527,6 +530,7 @@ LOCAL_SHARED_LIBRARIES := \ libcutils \ android.hidl.allocator@1.0 \ android.hidl.memory@1.0 \ + libsync \ android.hardware.neuralnetworks@1.0 \ android.hardware.neuralnetworks@1.1 \ android.hardware.neuralnetworks@1.2 \ @@ -631,6 +635,7 @@ endif # PLATFORM_VERSION == Q ifeq ($(R_OR_LATER),1) LOCAL_SHARED_LIBRARIES+= \ + libsync \ android.hardware.neuralnetworks@1.3 endif # R or later @@ -729,6 +734,7 @@ endif # PLATFORM_VERSION == Q ifeq ($(R_OR_LATER),1) LOCAL_SHARED_LIBRARIES+= \ + libsync \ android.hardware.neuralnetworks@1.3 endif # PLATFORM_VERSION == R @@ -816,6 +822,7 @@ LOCAL_SHARED_LIBRARIES := \ ifeq ($(R_OR_LATER),1) LOCAL_SHARED_LIBRARIES+= \ + libsync \ android.hardware.neuralnetworks@1.3 endif # R or later @@ -890,6 +897,7 @@ LOCAL_SHARED_LIBRARIES := \ libui \ libfmq \ libcutils \ + libsync \ android.hidl.allocator@1.0 \ android.hidl.memory@1.0 \ android.hardware.neuralnetworks@1.0 \ diff --git a/ArmnnPreparedModel_1_3.cpp b/ArmnnPreparedModel_1_3.cpp index 5b45b4a1..6c4aec9a 100644 --- a/ArmnnPreparedModel_1_3.cpp +++ b/ArmnnPreparedModel_1_3.cpp @@ -2,6 +2,10 @@ // Copyright © 2020 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // +// Note: the ArmnnFencedExecutionCallback and code snippet in the executeFenced() function +// in this file is based on Android code +// under the Apache 2.0 license. See comments below for details. +// #define LOG_TAG "ArmnnDriver" @@ -9,6 +13,7 @@ #include "Utils.hpp" #include +#include #include #include #include @@ -254,10 +259,31 @@ Return ArmnnPreparedModel_1_3::execute_1_3( return Execute(request, measureTiming, cb); } +/// This class is inspired by the sample implementation in Android named SampleFencedExecutionCallback. +/// The original code is licensed under Apache-2.0 and can be found at the following link: +/// https://android.googlesource.com/platform/frameworks/ml/+/master/nn/driver/sample/SampleDriver.h +class ArmnnFencedExecutionCallback : public V1_3::IFencedExecutionCallback +{ +public: + ArmnnFencedExecutionCallback(V1_3::ErrorStatus errorStatus, Timing timing, Timing fenceTiming) + : m_ErrorStatus(errorStatus), m_Timing(timing), m_FenceTiming(fenceTiming) {} + ~ArmnnFencedExecutionCallback() {} + + Return getExecutionInfo(getExecutionInfo_cb callback) override + { + callback(m_ErrorStatus, m_Timing, m_FenceTiming); + return Void(); + } +private: + V1_3::ErrorStatus m_ErrorStatus; + Timing m_Timing; + Timing m_FenceTiming; +}; + template -Return ArmnnPreparedModel_1_3::executeFenced(const V1_3::Request&, - const hidl_vec&, - MeasureTiming, +Return ArmnnPreparedModel_1_3::executeFenced(const V1_3::Request& request, + const hidl_vec& fenceWaitFor, + MeasureTiming measureTiming, const OptionalTimePoint& deadline, const OptionalTimeoutDuration& loopTimeoutDuration, const OptionalTimeoutDuration&, @@ -281,7 +307,104 @@ Return ArmnnPreparedModel_1_3::executeFenced(const V1_3::Reque ALOGW("ArmnnPreparedModel_1_3::executeFenced parameter loopTimeoutDuration is set but not supported."); } - cb(ErrorStatus::INVALID_ARGUMENT, hidl_handle(nullptr), nullptr); + ExecutionContext_1_3 ctx; + if (measureTiming == MeasureTiming::YES) + { + ctx.measureTimings = measureTiming; + ctx.driverStart = Now(); + } + + ALOGV("ArmnnPreparedModel_1_3::executeFenced(): %s", GetModelSummary(m_Model).c_str()); + m_RequestCount++; + + if (!android::nn::validateRequest(request, m_Model)) + { + cb(ErrorStatus::INVALID_ARGUMENT, hidl_handle(nullptr), nullptr); + return Void(); + } + + if (!m_RequestInputsAndOutputsDumpDir.empty()) + { + ALOGD("Dumping inputs and outputs for request %" PRIuPTR, reinterpret_cast(&cb)); + } + + // This code snippet is inspired by the sample implementation in Android named SampleDriver::executeFenced() + // function. The original code is licensed under Apache-2.0 and can be found at the following link: + // https://android.googlesource.com/platform/frameworks/ml/+/master/nn/driver/sample/SampleDriver.cpp + const auto fenceSize = fenceWaitFor.size(); + for (unsigned int index = 0; index < fenceSize; ++index) + { + auto fenceNativeHandle = fenceWaitFor[index].getNativeHandle(); + if (!fenceNativeHandle) + { + cb(ErrorStatus::INVALID_ARGUMENT, hidl_handle(nullptr), nullptr); + return Void(); + } + + if (sync_wait(fenceNativeHandle->data[0], -1) < 0) + { + ALOGE("ArmnnPreparedModel_1_3::executeFenced sync fence failed."); + cb(ErrorStatus::GENERAL_FAILURE, hidl_handle(nullptr), nullptr); + return Void(); + } + } + + TimePoint fenceExecutionStart; + if (measureTiming == MeasureTiming::YES) + { + fenceExecutionStart = Now(); + } + + // map the memory pool into shared pointers + // use a shared memory pools vector on the heap, as it is passed to the request thread + auto memPools = std::make_shared>(); + + // allocate the tensors on the heap, as they are passed to the request thread + auto inputs = std::make_shared(); + auto outputs = std::make_shared(); + + auto [status, outShapes, timings, message] = PrepareMemoryForIO(*inputs, *outputs, *memPools, request); + if (status != V1_3::ErrorStatus::NONE) + { + cb(ErrorStatus::INVALID_ARGUMENT, hidl_handle(nullptr), nullptr); + return Void(); + } + + ALOGV("ArmnnPreparedModel_1_3::executeFenced(...) before ExecuteGraph"); + + // call it with nullCallback for now as we will report the error status from here.. + auto nullCallback = [](V1_3::ErrorStatus, std::vector, const Timing&, std::string) {}; + CallbackContext_1_3 cbCtx; + cbCtx.callback = nullCallback; + cbCtx.ctx = ctx; + + auto errorStatus = ExecuteGraph(memPools, *inputs, *outputs, cbCtx); + if (errorStatus != V1_3::ErrorStatus::NONE) + { + cb(errorStatus, hidl_handle(nullptr), nullptr); + return Void(); + } + ALOGV("ArmnnPreparedModel_1_3::executeFenced(...) after ExecuteGraph"); + + Timing timing = g_NoTiming; + Timing fenceTiming = g_NoTiming; + if (measureTiming == MeasureTiming::YES) + { + TimePoint driverEnd = Now(); + timing.timeOnDevice = MicrosecondsDuration(ctx.deviceEnd, ctx.deviceStart); + timing.timeInDriver = MicrosecondsDuration(driverEnd, ctx.driverStart); + ALOGV("ArmnnPreparedModel_1_2::fenceExecutionTiming - Device = %lu Driver = %lu", + timing.timeOnDevice, timing.timeInDriver); + + fenceTiming.timeOnDevice = MicrosecondsDuration(ctx.deviceEnd, ctx.deviceStart); + fenceTiming.timeInDriver = MicrosecondsDuration(driverEnd, fenceExecutionStart); + ALOGV("ArmnnPreparedModel_1_2::fenceFinishExecutionTiming - Device = %lu Driver = %lu", + fenceTiming.timeOnDevice, fenceTiming.timeInDriver); + } + + sp armnnFencedExecutionCallback = + new ArmnnFencedExecutionCallback(ErrorStatus::NONE, timing, fenceTiming); + cb(ErrorStatus::NONE, hidl_handle(nullptr), armnnFencedExecutionCallback); return Void(); } @@ -540,7 +663,7 @@ Return ArmnnPreparedModel_1_3::configureExecutionBurst( template template -bool ArmnnPreparedModel_1_3::ExecuteGraph( +Return ArmnnPreparedModel_1_3::ExecuteGraph( std::shared_ptr>& pMemPools, armnn::InputTensors& inputTensors, armnn::OutputTensors& outputTensors, @@ -567,34 +690,33 @@ bool ArmnnPreparedModel_1_3::ExecuteGraph( { if (cb.ctx.measureTimings == MeasureTiming::YES) { - deviceStart = Now(); + cb.ctx.deviceStart = Now(); } armnn::Status status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors); if (cb.ctx.measureTimings == MeasureTiming::YES) { - deviceEnd = Now(); + cb.ctx.deviceEnd = Now(); } if (status != armnn::Status::Success) { ALOGW("EnqueueWorkload failed"); - cb.callback(V1_3::ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, - "ArmnnPreparedModel_1_3::ExecuteGraph"); - return false; + cb.callback(V1_3::ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_3::ExecuteGraph"); + return V1_3::ErrorStatus::GENERAL_FAILURE; } } catch (armnn::Exception& e) { ALOGW("armnn:Exception caught from EnqueueWorkload: %s", e.what()); cb.callback(V1_3::ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_3::ExecuteGraph"); - return false; + return V1_3::ErrorStatus::GENERAL_FAILURE; } catch (std::exception& e) { ALOGE("std::exception caught from EnqueueWorkload: %s", e.what()); cb.callback(V1_3::ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_3::ExecuteGraph"); - return false; + return V1_3::ErrorStatus::GENERAL_FAILURE; } CommitPools(*pMemPools); @@ -605,16 +727,16 @@ bool ArmnnPreparedModel_1_3::ExecuteGraph( { driverEnd = Now(); Timing timing; - timing.timeOnDevice = MicrosecondsDuration(deviceEnd, deviceStart); + timing.timeOnDevice = MicrosecondsDuration(cb.ctx.deviceEnd, cb.ctx.deviceStart); timing.timeInDriver = MicrosecondsDuration(driverEnd, cb.ctx.driverStart); ALOGV("ArmnnPreparedModel_1_2::execute timing - Device = %lu Driver = %lu", timing.timeOnDevice, timing.timeInDriver); cb.callback(V1_3::ErrorStatus::NONE, outputShapes, timing, "ArmnnPreparedModel_1_3::ExecuteGraph"); - } else { + } else + { cb.callback(V1_3::ErrorStatus::NONE, outputShapes, g_NoTiming, "ArmnnPreparedModel_1_3::ExecuteGraph"); } - - return true; + return V1_3::ErrorStatus::NONE; } template @@ -646,10 +768,12 @@ bool ArmnnPreparedModel_1_3::ExecuteWithDummyInputs() callbackContext.callback = nullCallback; callbackContext.ctx.measureTimings = MeasureTiming::NO; auto memPools = std::make_shared>(); - return ExecuteGraph(memPools, - inputTensors, - outputTensors, - callbackContext); + + auto errorStatus = ExecuteGraph(memPools, + inputTensors, + outputTensors, + callbackContext); + return errorStatus == V1_3::ErrorStatus::NONE; } template @@ -716,7 +840,7 @@ Return ArmnnPreparedModel_1_3::Execute(const V1_ #ifdef ARMNN_ANDROID_NN_V1_3 template class ArmnnPreparedModel_1_3; -template bool ArmnnPreparedModel_1_3::ExecuteGraph( +template Return ArmnnPreparedModel_1_3::ExecuteGraph( std::shared_ptr>& pMemPools, armnn::InputTensors& pInputTensors, armnn::OutputTensors& pOutputTensors, diff --git a/ArmnnPreparedModel_1_3.hpp b/ArmnnPreparedModel_1_3.hpp index 47529aa8..fa674058 100644 --- a/ArmnnPreparedModel_1_3.hpp +++ b/ArmnnPreparedModel_1_3.hpp @@ -29,6 +29,8 @@ struct ExecutionContext_1_3 ::android::hardware::neuralnetworks::V1_2::MeasureTiming measureTimings = ::android::hardware::neuralnetworks::V1_2::MeasureTiming::NO; TimePoint driverStart; + TimePoint deviceStart; + TimePoint deviceEnd; }; using CallbackContext_1_3 = CallbackContext; @@ -74,7 +76,7 @@ public: V1_3::IPreparedModel::executeSynchronously_1_3_cb cb) override; Return executeFenced(const V1_3::Request& request, - const android::hardware::hidl_vec& wait_for, + const android::hardware::hidl_vec& fenceWaitFor, MeasureTiming measure, const V1_3::OptionalTimePoint& deadline, const V1_3::OptionalTimeoutDuration& loopTimeoutDuration, @@ -92,10 +94,11 @@ public: /// execute the graph prepared from the request template - bool ExecuteGraph(std::shared_ptr>& pMemPools, - armnn::InputTensors& inputTensors, - armnn::OutputTensors& outputTensors, - CallbackContext callback); + Return ExecuteGraph( + std::shared_ptr>& pMemPools, + armnn::InputTensors& inputTensors, + armnn::OutputTensors& outputTensors, + CallbackContext callback); /// Executes this model with dummy inputs (e.g. all zeroes). /// \return false on failure, otherwise true diff --git a/test/Android.mk b/test/Android.mk index 17def765..af04c838 100644 --- a/test/Android.mk +++ b/test/Android.mk @@ -109,6 +109,7 @@ endif # PLATFORM_VERSION == Q ifeq ($(R_OR_LATER),1) LOCAL_SHARED_LIBRARIES+= \ + libsync \ android.hardware.neuralnetworks@1.3 endif # R or later @@ -218,6 +219,7 @@ endif # PLATFORM_VERSION == Q ifeq ($(R_OR_LATER),1) LOCAL_SHARED_LIBRARIES+= \ + libsync \ android.hardware.neuralnetworks@1.3 endif # R or later @@ -320,6 +322,7 @@ LOCAL_SHARED_LIBRARIES := \ ifeq ($(R_OR_LATER),1) LOCAL_SHARED_LIBRARIES+= \ + libsync \ android.hardware.neuralnetworks@1.3 endif # R or later -- cgit v1.2.1