From 65c42dc4d68ac163b77a3139feee3e7d4530b5c5 Mon Sep 17 00:00:00 2001 From: Mike Kelly Date: Mon, 22 Jul 2019 14:06:00 +0100 Subject: IVGCVSW-3463 Fix Hal 1.2 Dynamic Output Shape VTS test failures *Updating ArmnnPreparedModel_1_2 to work with output shapes and timing. Change-Id: I06c4ecaf1e2c36ef77a0731ece4885fc3997cd3b Signed-off-by: Sadik Armagan Signed-off-by: Mike Kelly --- ArmnnPreparedModel_1_2.cpp | 272 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 237 insertions(+), 35 deletions(-) (limited to 'ArmnnPreparedModel_1_2.cpp') diff --git a/ArmnnPreparedModel_1_2.cpp b/ArmnnPreparedModel_1_2.cpp index f6b44621..a7997c72 100644 --- a/ArmnnPreparedModel_1_2.cpp +++ b/ArmnnPreparedModel_1_2.cpp @@ -37,7 +37,10 @@ unsigned long MicrosecondsDuration(TimePoint endPoint, TimePoint startPoint) endPoint - startPoint).count()); } -void NotifyCallbackAndCheck(const ::android::sp& callback, ErrorStatus errorStatus, +void NotifyCallbackAndCheck(const ::android::sp& callback, + ErrorStatus errorStatus, + std::vector, + const Timing, std::string callingFunction) { Return returned = callback->notify(errorStatus); @@ -49,10 +52,13 @@ void NotifyCallbackAndCheck(const ::android::sp& callb } } -void NotifyCallbackAndCheck(const ::android::sp& callback, ErrorStatus errorStatus, +void NotifyCallbackAndCheck(const ::android::sp& callback, + ErrorStatus errorStatus, + std::vector outputShapes, + const Timing timing, std::string callingFunction) { - Return returned = callback->notify(errorStatus); + Return returned = callback->notify_1_2(errorStatus, outputShapes, timing); // This check is required, if the callback fails and it isn't checked it will bring down the service if (!returned.isOk()) { @@ -111,7 +117,8 @@ namespace armnn_driver { template -RequestThread ArmnnPreparedModel_1_2::m_RequestThread; +RequestThread + ArmnnPreparedModel_1_2::m_RequestThread; template template @@ -165,15 +172,43 @@ template Return ArmnnPreparedModel_1_2::execute(const Request& request, const ::android::sp& callback) { - return Execute(request, callback); + if (callback.get() == nullptr) + { + ALOGE("ArmnnPreparedModel_1_2::execute invalid callback passed"); + return ErrorStatus::INVALID_ARGUMENT; + } + + auto cb = [callback](ErrorStatus errorStatus, + std::vector outputShapes, + const Timing& timing, + std::string callingFunction) + { + NotifyCallbackAndCheck(callback, errorStatus, outputShapes, timing, callingFunction); + }; + + return Execute(request, MeasureTiming::NO, cb); } template Return ArmnnPreparedModel_1_2::execute_1_2(const Request& request, - MeasureTiming, + MeasureTiming measureTiming, const sp& callback) { - return Execute(request, callback); + if (callback.get() == nullptr) + { + ALOGE("ArmnnPreparedModel_1_2::execute_1_2 invalid callback passed"); + return ErrorStatus::INVALID_ARGUMENT; + } + + auto cb = [callback](ErrorStatus errorStatus, + std::vector outputShapes, + const Timing& timing, + std::string callingFunction) + { + NotifyCallbackAndCheck(callback, errorStatus, outputShapes, timing, callingFunction); + }; + + return Execute(request, measureTiming, cb); } template @@ -217,8 +252,8 @@ Return ArmnnPreparedModel_1_2::executeSynchronously(const Requ cb(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming); return Void(); } + std::vector outputShapes(request.outputs.size()); - // add the inputs and outputs with their data try { pInputTensors->reserve(request.inputs.size()); @@ -238,8 +273,8 @@ Return ArmnnPreparedModel_1_2::executeSynchronously(const Requ pInputTensors->emplace_back(i, inputTensor); } - pOutputTensors->reserve(request.outputs.size()); + for (unsigned int i = 0; i < request.outputs.size(); i++) { const auto& outputArg = request.outputs[i]; @@ -253,6 +288,28 @@ Return ArmnnPreparedModel_1_2::executeSynchronously(const Requ cb(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming); return Void(); } + const size_t outputSize = outputTensorInfo.GetNumBytes(); + const size_t bufferSize = pMemPools->at(outputArg.location.poolIndex).getHidlMemory().size(); + + hidl_vec dimensions; + + armnn::TensorShape tensorShape = outputTensorInfo.GetShape(); + const unsigned int numDims = tensorShape.GetNumDimensions(); + dimensions.resize(numDims); + + for (unsigned int outputIdx = 0u; outputIdx < numDims; ++outputIdx) + { + dimensions[outputIdx] = tensorShape[outputIdx]; + } + outputShapes[i].dimensions = dimensions; + outputShapes[i].isSufficient = bufferSize >= outputSize; + + if (bufferSize < outputSize) + { + ALOGW("ArmnnPreparedModel_1_2::Execute failed"); + cb(ErrorStatus::OUTPUT_INSUFFICIENT_SIZE, outputShapes, g_NoTiming); + return Void(); + } pOutputTensors->emplace_back(i, outputTensor); } @@ -314,15 +371,82 @@ Return ArmnnPreparedModel_1_2::executeSynchronously(const Requ timing.timeInDriver = MicrosecondsDuration(driverEnd, driverStart); ALOGV("ArmnnPreparedModel_1_2::executeSynchronously timing Device = %lu Driver = %lu", timing.timeOnDevice, timing.timeInDriver); - cb(ErrorStatus::NONE, {}, timing); + cb(ErrorStatus::NONE, outputShapes, timing); } else { - cb(ErrorStatus::NONE, {}, g_NoTiming); + cb(ErrorStatus::NONE, outputShapes, g_NoTiming); } return Void(); } +class ArmnnBurstExecutorWithCache : public ExecutionBurstServer::IBurstExecutorWithCache { +public: + ArmnnBurstExecutorWithCache(IPreparedModel* preparedModel) + : m_PreparedModel(preparedModel) + {} + + bool isCacheEntryPresent(int32_t slot) const override + { + const auto it = m_MemoryCache.find(slot); + return (it != m_MemoryCache.end()) && it->second.valid(); + } + + void addCacheEntry(const hidl_memory& memory, int32_t slot) override + { + m_MemoryCache[slot] = memory; + } + + void removeCacheEntry(int32_t slot) override + { + m_MemoryCache.erase(slot); + } + + std::tuple, Timing> execute( + const Request& request, const std::vector& slots, + MeasureTiming measure) override + { + ALOGV("ArmnnPreparedModel_1_2::BurstExecutorWithCache::execute"); + hidl_vec pools(slots.size()); + + std::transform(slots.begin(), slots.end(), pools.begin(), [this](int32_t slot) + { + return m_MemoryCache[slot]; + }); + + Request fullRequest = request; + fullRequest.pools = std::move(pools); + + // Setup Callback + ErrorStatus returnedStatus = ErrorStatus::GENERAL_FAILURE; + hidl_vec returnedOutputShapes; + Timing returnedTiming; + auto cb = [&returnedStatus, &returnedOutputShapes, &returnedTiming](ErrorStatus status, + const hidl_vec& outputShapes, + const Timing& timing) + { + returnedStatus = status; + returnedOutputShapes = outputShapes; + returnedTiming = timing; + }; + + // Execute + ALOGV("ArmnnPreparedModel_1_2::BurstExecutorWithCache executing"); + const Return ret = m_PreparedModel->executeSynchronously(fullRequest, measure, cb); + + if (!ret.isOk() || returnedStatus != ErrorStatus::NONE) + { + ALOGE("ArmnnPreparedModel_1_2::BurstExecutorWithCache::error executing"); + } + return std::make_tuple(returnedStatus, std::move(returnedOutputShapes), returnedTiming); + } + +private: + IPreparedModel* const m_PreparedModel; + std::map m_MemoryCache; +}; + + template Return ArmnnPreparedModel_1_2::configureExecutionBurst( const sp& callback, @@ -331,7 +455,12 @@ Return ArmnnPreparedModel_1_2::configureExecutionBurst( V1_2::IPreparedModel::configureExecutionBurst_cb cb) { ALOGV("ArmnnPreparedModel_1_2::configureExecutionBurst"); - const sp burst = ExecutionBurstServer::create(callback, requestChannel, resultChannel, this); + const std::shared_ptr executorWithCache = + std::make_shared(this); + const sp burst = ExecutionBurstServer::create(callback, + requestChannel, + resultChannel, + executorWithCache); if (burst == nullptr) { @@ -349,27 +478,64 @@ void ArmnnPreparedModel_1_2::ExecuteGraph( std::shared_ptr>& pMemPools, std::shared_ptr& pInputTensors, std::shared_ptr& pOutputTensors, - const ::android::sp& callback) + ArmnnCallback_1_2 cb) { ALOGV("ArmnnPreparedModel_1_2::ExecuteGraph(...)"); + TimePoint driverEnd, deviceStart, deviceEnd; + DumpTensorsIfRequired("Input", *pInputTensors); + std::vector > outputTensors = *pOutputTensors.get(); + std::vector outputShapes(outputTensors.size()); + + for (unsigned int i = 0; i < outputTensors.size(); i++) + { + std::pair outputTensorPair = outputTensors[i]; + const armnn::Tensor outputTensor = outputTensorPair.second; + const armnn::TensorInfo outputTensorInfo = outputTensor.GetInfo(); + + hidl_vec dimensions; + + armnn::TensorShape tensorShape = outputTensorInfo.GetShape(); + const unsigned int numDims = tensorShape.GetNumDimensions(); + dimensions.resize(numDims); + + for (unsigned int outputIdx = 0u; outputIdx < numDims; ++outputIdx) + { + dimensions[outputIdx] = tensorShape[outputIdx]; + } + outputShapes[i].dimensions = dimensions; + outputShapes[i].isSufficient = true; + } + // run it try { + if (cb.measureTiming == MeasureTiming::YES) + { + deviceStart = Now(); + } + armnn::Status status = m_Runtime->EnqueueWorkload(m_NetworkId, *pInputTensors, *pOutputTensors); + + if (cb.measureTiming == MeasureTiming::YES) + { + deviceEnd = Now(); + } if (status != armnn::Status::Success) { ALOGW("EnqueueWorkload failed"); - NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel_1_2::ExecuteGraph"); + cb.callback(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, + "ArmnnPreparedModel_1_2::ExecuteGraph"); return; } } catch (armnn::Exception& e) { ALOGW("armnn::Exception caught from EnqueueWorkload: %s", e.what()); - NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel_1_2::ExecuteGraph"); + cb.callback(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, + "ArmnnPreparedModel_1_2::ExecuteGraph"); return; } @@ -383,7 +549,16 @@ void ArmnnPreparedModel_1_2::ExecuteGraph( pool.update(); } - NotifyCallbackAndCheck(callback, ErrorStatus::NONE, "ExecuteGraph"); + if (cb.measureTiming == MeasureTiming::YES) + { + driverEnd = Now(); + Timing timing; + timing.timeOnDevice = MicrosecondsDuration(deviceEnd, deviceStart); + timing.timeInDriver = MicrosecondsDuration(driverEnd, cb.driverStart); + cb.callback(ErrorStatus::NONE, outputShapes, timing, "ExecuteGraph"); + } else { + cb.callback(ErrorStatus::NONE, outputShapes, g_NoTiming, "ExecuteGraph"); + } } template @@ -428,28 +603,29 @@ bool ArmnnPreparedModel_1_2::ExecuteWithDummyInputs() } template -template Return ArmnnPreparedModel_1_2::Execute(const Request& request, - const sp& callback) + MeasureTiming measureTiming, + armnnExecuteCallback_1_2 callback) { - ALOGV("ArmnnPreparedModel_1_2::execute(): %s", GetModelSummary(m_Model).c_str()); - m_RequestCount++; + TimePoint driverStart; - if (callback.get() == nullptr) + if (measureTiming == MeasureTiming::YES) { - ALOGE("ArmnnPreparedModel_1_2::execute invalid callback passed"); - return ErrorStatus::INVALID_ARGUMENT; + driverStart = Now(); } + ALOGV("ArmnnPreparedModel_1_2::execute(): %s", GetModelSummary(m_Model).c_str()); + m_RequestCount++; + if (!android::nn::validateRequest(request, m_Model)) { - NotifyCallbackAndCheck(callback, ErrorStatus::INVALID_ARGUMENT, "ArmnnPreparedModel_1_2::execute"); + callback(ErrorStatus::INVALID_ARGUMENT, {}, g_NoTiming, "ArmnnPreparedModel_1_2::execute"); return ErrorStatus::INVALID_ARGUMENT; } if (!m_RequestInputsAndOutputsDumpDir.empty()) { - ALOGD("Dumping inputs and outputs for request %" PRIuPTR, reinterpret_cast(callback.get())); + ALOGD("Dumping inputs and outputs for request %" PRIuPTR, reinterpret_cast(&callback)); } // allocate the tensors on the heap, as they are passed to the request thread @@ -462,7 +638,7 @@ Return ArmnnPreparedModel_1_2::Execute(const Request& if (!setRunTimePoolInfosFromHidlMemories(pMemPools.get(), request.pools)) { - NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel_1_2::execute"); + callback(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_2::execute"); return ErrorStatus::GENERAL_FAILURE; } @@ -480,8 +656,7 @@ Return ArmnnPreparedModel_1_2::Execute(const Request& if (inputTensor.GetMemoryArea() == nullptr) { ALOGE("Cannot execute request. Error converting request input %u to tensor", i); - NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, - "ArmnnPreparedModel_1_2::execute"); + callback(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_2::execute"); return ErrorStatus::GENERAL_FAILURE; } @@ -489,6 +664,8 @@ Return ArmnnPreparedModel_1_2::Execute(const Request& } pOutputTensors->reserve(request.outputs.size()); + std::vector outputShapes(request.outputs.size()); + for (unsigned int i = 0; i < request.outputs.size(); i++) { const auto& outputArg = request.outputs[i]; @@ -496,33 +673,58 @@ Return ArmnnPreparedModel_1_2::Execute(const Request& const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i); const armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, *pMemPools); if (outputTensor.GetMemoryArea() == nullptr) - { ALOGE("Cannot execute request. Error converting request output %u to tensor", i); - NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, - "ArmnnPreparedModel_1_2::execute"); + callback(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_2::execute"); return ErrorStatus::GENERAL_FAILURE; } + const size_t outputSize = outputTensorInfo.GetNumBytes(); + const size_t bufferSize = pMemPools->at(outputArg.location.poolIndex).getHidlMemory().size(); pOutputTensors->emplace_back(i, outputTensor); + + hidl_vec dimensions; + + armnn::TensorShape tensorShape = outputTensorInfo.GetShape(); + const unsigned int numDims = tensorShape.GetNumDimensions(); + dimensions.resize(numDims); + + for (unsigned int outputIdx = 0u; outputIdx < numDims; ++outputIdx) + { + dimensions[outputIdx] = tensorShape[outputIdx]; + } + outputShapes[i].dimensions = dimensions; + outputShapes[i].isSufficient = bufferSize >= outputSize; + + if (bufferSize < outputSize) + { + ALOGW("ArmnnPreparedModel_1_2::Execute failed"); + callback(ErrorStatus::OUTPUT_INSUFFICIENT_SIZE, + outputShapes, + g_NoTiming, + "ArmnnPreparedModel_1_2::Execute"); + return ErrorStatus::NONE; + } } } catch (armnn::Exception& e) { ALOGW("armnn::Exception caught while preparing for EnqueueWorkload: %s", e.what()); - NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel_1_2::execute"); + callback(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_2::execute"); return ErrorStatus::GENERAL_FAILURE; } ALOGV("ArmnnPreparedModel_1_2::execute(...) before PostMsg"); // post the request for asynchronous execution - m_RequestThread.PostMsg(this, pMemPools, pInputTensors, pOutputTensors, callback); + ArmnnCallback_1_2 armnnCb; + armnnCb.callback = callback; + armnnCb.measureTiming = measureTiming; + armnnCb.driverStart = driverStart; + m_RequestThread.PostMsg(this, pMemPools, pInputTensors, pOutputTensors, armnnCb); ALOGV("ArmnnPreparedModel_1_2::execute(...) after PostMsg"); - return ErrorStatus::NONE; } - #ifdef ARMNN_ANDROID_NN_V1_2 template class ArmnnPreparedModel_1_2; #endif -- cgit v1.2.1