From 5a15bf4c86fad79523517afff5f2df2f6298d8da Mon Sep 17 00:00:00 2001 From: Kristofer Jonsson Date: Thu, 27 Jan 2022 17:36:55 +0100 Subject: Set TFLu external context Remove PMU configuration from the InferenceJob struct and add an external context parameter intead. The external context is passed to the TFLu interpreter and will be returned in the ethosu_inference_begin() and ethosu_inference_end() callbacks. Change-Id: I6dab04c0ab5088b1325be365d77d65d1182e7441 --- .../include/inference_process.hpp | 12 +-- .../inference_process/src/inference_process.cpp | 32 ++----- .../include/layer_by_layer_profiler.hpp | 21 ++-- .../src/layer_by_layer_profiler.cpp | 106 ++------------------- 4 files changed, 27 insertions(+), 144 deletions(-) diff --git a/applications/inference_process/include/inference_process.hpp b/applications/inference_process/include/inference_process.hpp index 6ab453c..9635884 100644 --- a/applications/inference_process/include/inference_process.hpp +++ b/applications/inference_process/include/inference_process.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. All rights reserved. + * Copyright (c) 2019-2022 Arm Limited. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -53,10 +53,7 @@ struct InferenceJob { std::vector output; std::vector expectedOutput; size_t numBytesToPrint; - std::vector pmuEventConfig; - bool pmuCycleCounterEnable; - std::vector pmuEventCount; - uint64_t pmuCycleCounterCount; + void *externalContext; InferenceJob(); InferenceJob(const std::string &name, @@ -64,9 +61,8 @@ struct InferenceJob { const std::vector &input, const std::vector &output, const std::vector &expectedOutput, - size_t numBytesToPrint, - const std::vector &pmuEventConfig, - const bool pmuCycleCounterEnable); + const size_t numBytesToPrint = 0, + void *externalContext = nullptr); void invalidate(); void clean(); diff --git a/applications/inference_process/src/inference_process.cpp b/applications/inference_process/src/inference_process.cpp index ebd9d6c..4c65005 100644 --- a/applications/inference_process/src/inference_process.cpp +++ b/applications/inference_process/src/inference_process.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. All rights reserved. + * Copyright (c) 2019-2022 Arm Limited. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -100,20 +100,18 @@ char *DataPtr::end() const { return static_cast(data) + size; } -InferenceJob::InferenceJob() : numBytesToPrint(0) {} +InferenceJob::InferenceJob() : numBytesToPrint(0), externalContext(nullptr) {} InferenceJob::InferenceJob(const string &_name, const DataPtr &_networkModel, const vector &_input, const vector &_output, const vector &_expectedOutput, - size_t _numBytesToPrint, - const vector &_pmuEventConfig, - const bool _pmuCycleCounterEnable) : + const size_t _numBytesToPrint, + void *_externalContext) : name(_name), networkModel(_networkModel), input(_input), output(_output), expectedOutput(_expectedOutput), - numBytesToPrint(_numBytesToPrint), pmuEventConfig(_pmuEventConfig), pmuCycleCounterEnable(_pmuCycleCounterEnable), - pmuEventCount(), pmuCycleCounterCount(0) {} + numBytesToPrint(_numBytesToPrint), externalContext(_externalContext) {} void InferenceJob::invalidate() { networkModel.invalidate(); @@ -167,16 +165,14 @@ bool InferenceProcess::runJob(InferenceJob &job) { // Create the TFL micro interpreter tflite::AllOpsResolver resolver; -#ifdef LAYER_BY_LAYER_PROFILER - tflite::LayerByLayerProfiler profiler(job.pmuEventConfig, job.pmuCycleCounterEnable); -#else tflite::ArmProfiler profiler; -#endif - tflite::MicroErrorReporter errorReporter; tflite::MicroInterpreter interpreter( model, resolver, tensorArena, tensorArenaSize, &errorReporter, nullptr, &profiler); + // Set external context + interpreter.SetMicroExternalContext(job.externalContext); + // Allocate tensors TfLiteStatus status = interpreter.AllocateTensors(); if (status != kTfLiteOk) { @@ -196,14 +192,6 @@ bool InferenceProcess::runJob(InferenceJob &job) { return true; } -#ifdef LAYER_BY_LAYER_PROFILER - if (job.pmuCycleCounterEnable) { - job.pmuCycleCounterCount = profiler.GetPmuCycleCounterCount(); - } - - job.pmuEventCount.assign(profiler.GetPmuEventCount().begin(), profiler.GetPmuEventCount().end()); -#endif - LOG("Inference runtime: %" PRId32 " cycles\n", profiler.GetTotalTicks()); // Copy output data from TFLu arena to job descriptor @@ -333,10 +321,6 @@ bool InferenceProcess::compareOfm(InferenceJob &job, tflite::MicroInterpreter &i } void InferenceProcess::printJob(InferenceJob &job, tflite::MicroInterpreter &interpreter) { - for (size_t i = 0; i < job.pmuEventCount.size(); i++) { - LOG("ethosu_pmu_cntr%zu : %" PRIu32 "\n", i, job.pmuEventCount[i]); - } - LOG("arena_used_bytes : %zu\n", interpreter.arena_used_bytes()); // Print all of the output data, or the first NUM_BYTES_TO_PRINT bytes, diff --git a/lib/layer_by_layer_profiler/include/layer_by_layer_profiler.hpp b/lib/layer_by_layer_profiler/include/layer_by_layer_profiler.hpp index a547576..0c50bc8 100644 --- a/lib/layer_by_layer_profiler/include/layer_by_layer_profiler.hpp +++ b/lib/layer_by_layer_profiler/include/layer_by_layer_profiler.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. All rights reserved. + * Copyright (c) 2021-2022 Arm Limited. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,7 +22,6 @@ #include "EventRecorder.h" #include "tensorflow/lite/kernels/internal/compatibility.h" #include -#include #include // NOTE: This profiler only works on systems with 1 NPU due to the use of @@ -31,29 +30,21 @@ namespace tflite { class LayerByLayerProfiler : public MicroProfiler { public: enum Backend { PRINTF, EVENT_RECORDER }; - LayerByLayerProfiler(const std::vector &event_config = {}, - bool pmu_cycle_counter_enable = true, - size_t max_events = 200, - Backend backend = PRINTF, - int32_t event_id = EventID(EventLevelError, EvtStatistics_No, EventRecordNone)); + + LayerByLayerProfiler(size_t max_events = 200, + Backend backend = PRINTF, + int32_t event_id = EventID(EventLevelError, EvtStatistics_No, EventRecordNone)); + uint32_t BeginEvent(const char *tag); void EndEvent(uint32_t event_handle); int32_t GetTotalTicks() const; void Log() const; - uint64_t GetPmuCycleCounterCount() const; - const std::vector &GetPmuEventCount() const; - private: std::unique_ptr tags_; std::unique_ptr start_ticks_; std::unique_ptr end_ticks_; - std::vector pmu_event_config; - std::vector pmu_event_count; - bool pmu_cycle_counter_enable; - uint64_t pmu_cycle_counter_count; - size_t max_events_; Backend backend; int32_t event_id; diff --git a/lib/layer_by_layer_profiler/src/layer_by_layer_profiler.cpp b/lib/layer_by_layer_profiler/src/layer_by_layer_profiler.cpp index a4f67d6..4f525ee 100644 --- a/lib/layer_by_layer_profiler/src/layer_by_layer_profiler.cpp +++ b/lib/layer_by_layer_profiler/src/layer_by_layer_profiler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. All rights reserved. + * Copyright (c) 2021-2022 Arm Limited. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -25,27 +25,12 @@ #include "ethosu_log.h" #include "layer_by_layer_profiler.hpp" -#include #include #include -namespace { - -uint64_t GetCurrentEthosuTicks(struct ethosu_driver *drv) { - return ETHOSU_PMU_Get_CCNTR(drv); -} - -} // namespace - namespace tflite { -LayerByLayerProfiler::LayerByLayerProfiler(const std::vector &event_config, - bool _pmu_cycle_counter_enable, - size_t max_events, - Backend _backend, - int32_t _event_id) : - pmu_event_config(event_config), - pmu_event_count(), pmu_cycle_counter_enable(_pmu_cycle_counter_enable), pmu_cycle_counter_count(0), +LayerByLayerProfiler::LayerByLayerProfiler(size_t max_events, Backend _backend, int32_t _event_id) : max_events_(max_events), backend(_backend), event_id(_event_id), num_events_(0) { tags_ = std::make_unique(max_events); @@ -60,47 +45,11 @@ uint32_t LayerByLayerProfiler::BeginEvent(const char *tag) { num_events_ = 0; } - tags_[num_events_] = tag; - - if (strcmp("ethos-u", tag) == 0) { - struct ethosu_driver *drv = ethosu_reserve_driver(); - size_t numEventCounters = ETHOSU_PMU_Get_NumEventCounters(); - - if (pmu_event_config.size() > numEventCounters) { - LOG_WARN("PMU event config list is bigger (%zu) than available PMU event counters (%zu)", - pmu_event_config.size(), - numEventCounters); - LOG_WARN("PMU event config list will be truncated"); - pmu_event_config.resize(numEventCounters); - } - // Enable PMU - ETHOSU_PMU_Enable(drv); - - for (size_t i = 0; i < pmu_event_config.size(); i++) { - ETHOSU_PMU_Set_EVTYPER(drv, i, static_cast(pmu_event_config[i])); - } - - ETHOSU_PMU_CNTR_Enable(drv, (1 << pmu_event_config.size()) - 1); - ETHOSU_PMU_EVCNTR_ALL_Reset(drv); - - // Configure the cycle counter - if (pmu_cycle_counter_enable) { - ETHOSU_PMU_CNTR_Disable(drv, ETHOSU_PMU_CCNT_Msk); - ETHOSU_PMU_CYCCNT_Reset(drv); - - ETHOSU_PMU_PMCCNTR_CFG_Set_Stop_Event(drv, ETHOSU_PMU_NPU_IDLE); - ETHOSU_PMU_PMCCNTR_CFG_Set_Start_Event(drv, ETHOSU_PMU_NPU_ACTIVE); - - ETHOSU_PMU_CNTR_Enable(drv, ETHOSU_PMU_CCNT_Msk); - } - start_ticks_[num_events_] = 0; // Hardware cycle counter has been reset above, thus starts at 0 - ethosu_release_driver(drv); - } else { - start_ticks_[num_events_] = GetCurrentTimeTicks(); - } - + tags_[num_events_] = tag; + start_ticks_[num_events_] = GetCurrentTimeTicks(); end_ticks_[num_events_] = start_ticks_[num_events_]; // NOTE: In case an EndEvent() doesn't trigger, cycles reports as 0 + return num_events_++; } @@ -108,41 +57,12 @@ uint32_t LayerByLayerProfiler::BeginEvent(const char *tag) { void LayerByLayerProfiler::EndEvent(uint32_t event_handle) { TFLITE_DCHECK(event_handle < max_events_); - if (strcmp("ethos-u", tags_[event_handle]) == 0) { - struct ethosu_driver *drv = ethosu_reserve_driver(); - - end_ticks_[event_handle] = GetCurrentEthosuTicks(drv); - // Get the cycle count - if (pmu_cycle_counter_enable) { - pmu_cycle_counter_count = end_ticks_[event_handle]; - } - - // Save the PMU counter values - // NOTE: If multiple ethos-u layers, only the latest will be saved - pmu_event_count.resize(pmu_event_config.size()); - for (size_t i = 0; i < pmu_event_config.size(); i++) { - pmu_event_count[i] = ETHOSU_PMU_Get_EVCNTR(drv, i); - } - - // Shut down the PMU - ETHOSU_PMU_Disable(drv); - - ethosu_release_driver(drv); - } else { - end_ticks_[event_handle] = GetCurrentTimeTicks(); - } + end_ticks_[event_handle] = GetCurrentTimeTicks(); if (backend == PRINTF) { - if (strcmp("ethos-u", tags_[event_handle]) == 0) { - for (size_t i = 0; i < pmu_event_count.size(); i++) { - LOG("ethos-u : ethosu_pmu_cntr%zu : %" PRIu32 "\n", i, pmu_event_count[i]); - } - LOG("ethos-u : cycle_cnt : %" PRIu64 " cycles\n", pmu_cycle_counter_count); - } else { - LOG("%s : cycle_cnt : %" PRIu64 " cycles\n", - tags_[event_handle], - end_ticks_[event_handle] - start_ticks_[event_handle]); - } + LOG("%s : cycle_cnt : %" PRIu64 " cycles\n", + tags_[event_handle], + end_ticks_[event_handle] - start_ticks_[event_handle]); } else { EventRecord2(event_id, (int32_t)event_handle, end_ticks_[event_handle] - start_ticks_[event_handle]); } @@ -158,14 +78,6 @@ int32_t LayerByLayerProfiler::GetTotalTicks() const { return ticks; } -uint64_t LayerByLayerProfiler::GetPmuCycleCounterCount() const { - return pmu_cycle_counter_count; -} - -const std::vector &LayerByLayerProfiler::GetPmuEventCount() const { - return pmu_event_count; -} - void LayerByLayerProfiler::Log() const { #if !defined(TF_LITE_STRIP_ERROR_STRINGS) -- cgit v1.2.1