diff options
4 files changed, 27 insertions, 144 deletions
diff --git a/applications/inference_process/include/inference_process.hpp b/applications/inference_process/include/inference_process.hpp index 6ab453c..9635884 100644 --- a/applications/inference_process/include/inference_process.hpp +++ b/applications/inference_process/include/inference_process.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. All rights reserved. + * Copyright (c) 2019-2022 Arm Limited. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -53,10 +53,7 @@ struct InferenceJob { std::vector<DataPtr> output; std::vector<DataPtr> expectedOutput; size_t numBytesToPrint; - std::vector<uint8_t> pmuEventConfig; - bool pmuCycleCounterEnable; - std::vector<uint32_t> pmuEventCount; - uint64_t pmuCycleCounterCount; + void *externalContext; InferenceJob(); InferenceJob(const std::string &name, @@ -64,9 +61,8 @@ struct InferenceJob { const std::vector<DataPtr> &input, const std::vector<DataPtr> &output, const std::vector<DataPtr> &expectedOutput, - size_t numBytesToPrint, - const std::vector<uint8_t> &pmuEventConfig, - const bool pmuCycleCounterEnable); + const size_t numBytesToPrint = 0, + void *externalContext = nullptr); void invalidate(); void clean(); diff --git a/applications/inference_process/src/inference_process.cpp b/applications/inference_process/src/inference_process.cpp index ebd9d6c..4c65005 100644 --- a/applications/inference_process/src/inference_process.cpp +++ b/applications/inference_process/src/inference_process.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. All rights reserved. + * Copyright (c) 2019-2022 Arm Limited. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -100,20 +100,18 @@ char *DataPtr::end() const { return static_cast<char *>(data) + size; } -InferenceJob::InferenceJob() : numBytesToPrint(0) {} +InferenceJob::InferenceJob() : numBytesToPrint(0), externalContext(nullptr) {} InferenceJob::InferenceJob(const string &_name, const DataPtr &_networkModel, const vector<DataPtr> &_input, const vector<DataPtr> &_output, const vector<DataPtr> &_expectedOutput, - size_t _numBytesToPrint, - const vector<uint8_t> &_pmuEventConfig, - const bool _pmuCycleCounterEnable) : + const size_t _numBytesToPrint, + void *_externalContext) : name(_name), networkModel(_networkModel), input(_input), output(_output), expectedOutput(_expectedOutput), - numBytesToPrint(_numBytesToPrint), pmuEventConfig(_pmuEventConfig), pmuCycleCounterEnable(_pmuCycleCounterEnable), - pmuEventCount(), pmuCycleCounterCount(0) {} + numBytesToPrint(_numBytesToPrint), externalContext(_externalContext) {} void InferenceJob::invalidate() { networkModel.invalidate(); @@ -167,16 +165,14 @@ bool InferenceProcess::runJob(InferenceJob &job) { // Create the TFL micro interpreter tflite::AllOpsResolver resolver; -#ifdef LAYER_BY_LAYER_PROFILER - tflite::LayerByLayerProfiler profiler(job.pmuEventConfig, job.pmuCycleCounterEnable); -#else tflite::ArmProfiler profiler; -#endif - tflite::MicroErrorReporter errorReporter; tflite::MicroInterpreter interpreter( model, resolver, tensorArena, tensorArenaSize, &errorReporter, nullptr, &profiler); + // Set external context + interpreter.SetMicroExternalContext(job.externalContext); + // Allocate tensors TfLiteStatus status = interpreter.AllocateTensors(); if (status != kTfLiteOk) { @@ -196,14 +192,6 @@ bool InferenceProcess::runJob(InferenceJob &job) { return true; } -#ifdef LAYER_BY_LAYER_PROFILER - if (job.pmuCycleCounterEnable) { - job.pmuCycleCounterCount = profiler.GetPmuCycleCounterCount(); - } - - job.pmuEventCount.assign(profiler.GetPmuEventCount().begin(), profiler.GetPmuEventCount().end()); -#endif - LOG("Inference runtime: %" PRId32 " cycles\n", profiler.GetTotalTicks()); // Copy output data from TFLu arena to job descriptor @@ -333,10 +321,6 @@ bool InferenceProcess::compareOfm(InferenceJob &job, tflite::MicroInterpreter &i } void InferenceProcess::printJob(InferenceJob &job, tflite::MicroInterpreter &interpreter) { - for (size_t i = 0; i < job.pmuEventCount.size(); i++) { - LOG("ethosu_pmu_cntr%zu : %" PRIu32 "\n", i, job.pmuEventCount[i]); - } - LOG("arena_used_bytes : %zu\n", interpreter.arena_used_bytes()); // Print all of the output data, or the first NUM_BYTES_TO_PRINT bytes, diff --git a/lib/layer_by_layer_profiler/include/layer_by_layer_profiler.hpp b/lib/layer_by_layer_profiler/include/layer_by_layer_profiler.hpp index a547576..0c50bc8 100644 --- a/lib/layer_by_layer_profiler/include/layer_by_layer_profiler.hpp +++ b/lib/layer_by_layer_profiler/include/layer_by_layer_profiler.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. All rights reserved. + * Copyright (c) 2021-2022 Arm Limited. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,7 +22,6 @@ #include "EventRecorder.h" #include "tensorflow/lite/kernels/internal/compatibility.h" #include <memory> -#include <pmu_ethosu.h> #include <vector> // NOTE: This profiler only works on systems with 1 NPU due to the use of @@ -31,29 +30,21 @@ namespace tflite { class LayerByLayerProfiler : public MicroProfiler { public: enum Backend { PRINTF, EVENT_RECORDER }; - LayerByLayerProfiler(const std::vector<uint8_t> &event_config = {}, - bool pmu_cycle_counter_enable = true, - size_t max_events = 200, - Backend backend = PRINTF, - int32_t event_id = EventID(EventLevelError, EvtStatistics_No, EventRecordNone)); + + LayerByLayerProfiler(size_t max_events = 200, + Backend backend = PRINTF, + int32_t event_id = EventID(EventLevelError, EvtStatistics_No, EventRecordNone)); + uint32_t BeginEvent(const char *tag); void EndEvent(uint32_t event_handle); int32_t GetTotalTicks() const; void Log() const; - uint64_t GetPmuCycleCounterCount() const; - const std::vector<uint32_t> &GetPmuEventCount() const; - private: std::unique_ptr<const char *[]> tags_; std::unique_ptr<uint64_t[]> start_ticks_; std::unique_ptr<uint64_t[]> end_ticks_; - std::vector<uint8_t> pmu_event_config; - std::vector<uint32_t> pmu_event_count; - bool pmu_cycle_counter_enable; - uint64_t pmu_cycle_counter_count; - size_t max_events_; Backend backend; int32_t event_id; diff --git a/lib/layer_by_layer_profiler/src/layer_by_layer_profiler.cpp b/lib/layer_by_layer_profiler/src/layer_by_layer_profiler.cpp index a4f67d6..4f525ee 100644 --- a/lib/layer_by_layer_profiler/src/layer_by_layer_profiler.cpp +++ b/lib/layer_by_layer_profiler/src/layer_by_layer_profiler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. All rights reserved. + * Copyright (c) 2021-2022 Arm Limited. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -25,27 +25,12 @@ #include "ethosu_log.h" #include "layer_by_layer_profiler.hpp" -#include <ethosu_driver.h> #include <inttypes.h> #include <stdio.h> -namespace { - -uint64_t GetCurrentEthosuTicks(struct ethosu_driver *drv) { - return ETHOSU_PMU_Get_CCNTR(drv); -} - -} // namespace - namespace tflite { -LayerByLayerProfiler::LayerByLayerProfiler(const std::vector<uint8_t> &event_config, - bool _pmu_cycle_counter_enable, - size_t max_events, - Backend _backend, - int32_t _event_id) : - pmu_event_config(event_config), - pmu_event_count(), pmu_cycle_counter_enable(_pmu_cycle_counter_enable), pmu_cycle_counter_count(0), +LayerByLayerProfiler::LayerByLayerProfiler(size_t max_events, Backend _backend, int32_t _event_id) : max_events_(max_events), backend(_backend), event_id(_event_id), num_events_(0) { tags_ = std::make_unique<const char *[]>(max_events); @@ -60,47 +45,11 @@ uint32_t LayerByLayerProfiler::BeginEvent(const char *tag) { num_events_ = 0; } - tags_[num_events_] = tag; - - if (strcmp("ethos-u", tag) == 0) { - struct ethosu_driver *drv = ethosu_reserve_driver(); - size_t numEventCounters = ETHOSU_PMU_Get_NumEventCounters(); - - if (pmu_event_config.size() > numEventCounters) { - LOG_WARN("PMU event config list is bigger (%zu) than available PMU event counters (%zu)", - pmu_event_config.size(), - numEventCounters); - LOG_WARN("PMU event config list will be truncated"); - pmu_event_config.resize(numEventCounters); - } - // Enable PMU - ETHOSU_PMU_Enable(drv); - - for (size_t i = 0; i < pmu_event_config.size(); i++) { - ETHOSU_PMU_Set_EVTYPER(drv, i, static_cast<ethosu_pmu_event_type>(pmu_event_config[i])); - } - - ETHOSU_PMU_CNTR_Enable(drv, (1 << pmu_event_config.size()) - 1); - ETHOSU_PMU_EVCNTR_ALL_Reset(drv); - - // Configure the cycle counter - if (pmu_cycle_counter_enable) { - ETHOSU_PMU_CNTR_Disable(drv, ETHOSU_PMU_CCNT_Msk); - ETHOSU_PMU_CYCCNT_Reset(drv); - - ETHOSU_PMU_PMCCNTR_CFG_Set_Stop_Event(drv, ETHOSU_PMU_NPU_IDLE); - ETHOSU_PMU_PMCCNTR_CFG_Set_Start_Event(drv, ETHOSU_PMU_NPU_ACTIVE); - - ETHOSU_PMU_CNTR_Enable(drv, ETHOSU_PMU_CCNT_Msk); - } - start_ticks_[num_events_] = 0; // Hardware cycle counter has been reset above, thus starts at 0 - ethosu_release_driver(drv); - } else { - start_ticks_[num_events_] = GetCurrentTimeTicks(); - } - + tags_[num_events_] = tag; + start_ticks_[num_events_] = GetCurrentTimeTicks(); end_ticks_[num_events_] = start_ticks_[num_events_]; // NOTE: In case an EndEvent() doesn't trigger, cycles reports as 0 + return num_events_++; } @@ -108,41 +57,12 @@ uint32_t LayerByLayerProfiler::BeginEvent(const char *tag) { void LayerByLayerProfiler::EndEvent(uint32_t event_handle) { TFLITE_DCHECK(event_handle < max_events_); - if (strcmp("ethos-u", tags_[event_handle]) == 0) { - struct ethosu_driver *drv = ethosu_reserve_driver(); - - end_ticks_[event_handle] = GetCurrentEthosuTicks(drv); - // Get the cycle count - if (pmu_cycle_counter_enable) { - pmu_cycle_counter_count = end_ticks_[event_handle]; - } - - // Save the PMU counter values - // NOTE: If multiple ethos-u layers, only the latest will be saved - pmu_event_count.resize(pmu_event_config.size()); - for (size_t i = 0; i < pmu_event_config.size(); i++) { - pmu_event_count[i] = ETHOSU_PMU_Get_EVCNTR(drv, i); - } - - // Shut down the PMU - ETHOSU_PMU_Disable(drv); - - ethosu_release_driver(drv); - } else { - end_ticks_[event_handle] = GetCurrentTimeTicks(); - } + end_ticks_[event_handle] = GetCurrentTimeTicks(); if (backend == PRINTF) { - if (strcmp("ethos-u", tags_[event_handle]) == 0) { - for (size_t i = 0; i < pmu_event_count.size(); i++) { - LOG("ethos-u : ethosu_pmu_cntr%zu : %" PRIu32 "\n", i, pmu_event_count[i]); - } - LOG("ethos-u : cycle_cnt : %" PRIu64 " cycles\n", pmu_cycle_counter_count); - } else { - LOG("%s : cycle_cnt : %" PRIu64 " cycles\n", - tags_[event_handle], - end_ticks_[event_handle] - start_ticks_[event_handle]); - } + LOG("%s : cycle_cnt : %" PRIu64 " cycles\n", + tags_[event_handle], + end_ticks_[event_handle] - start_ticks_[event_handle]); } else { EventRecord2(event_id, (int32_t)event_handle, end_ticks_[event_handle] - start_ticks_[event_handle]); } @@ -158,14 +78,6 @@ int32_t LayerByLayerProfiler::GetTotalTicks() const { return ticks; } -uint64_t LayerByLayerProfiler::GetPmuCycleCounterCount() const { - return pmu_cycle_counter_count; -} - -const std::vector<uint32_t> &LayerByLayerProfiler::GetPmuEventCount() const { - return pmu_event_count; -} - void LayerByLayerProfiler::Log() const { #if !defined(TF_LITE_STRIP_ERROR_STRINGS) |