summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIsabella Gottardi <isabella.gottardi@arm.com>2021-04-07 17:15:31 +0100
committerAlexander Efremov <alexander.efremov@arm.com>2021-04-12 14:00:49 +0000
commit8df12f37531d57a10cba2f8b2e8b6a9065202dd5 (patch)
treeba833d15649c3b0f885d57b40d3916970b3fd2c8
parent37ce22ebc9cf3e8529d9914c0eed0f718243d961 (diff)
downloadml-embedded-evaluation-kit-8df12f37531d57a10cba2f8b2e8b6a9065202dd5.tar.gz
MLECO-1870: Cherry pick profiling changes from dev to open source repo
* Documentation update Change-Id: If85e7ebc44498840b291c408f14e66a5a5faa424 Signed-off-by: Isabella Gottardi <isabella.gottardi@arm.com>
-rw-r--r--docs/sections/customizing.md19
-rw-r--r--docs/sections/deployment.md14
-rw-r--r--docs/sections/testing_benchmarking.md4
-rw-r--r--docs/use_cases/ad.md162
-rw-r--r--docs/use_cases/asr.md112
-rw-r--r--docs/use_cases/img_class.md96
-rw-r--r--docs/use_cases/inference_runner.md25
-rw-r--r--docs/use_cases/kws.md92
-rw-r--r--docs/use_cases/kws_asr.md184
-rw-r--r--source/application/hal/include/timer.h8
-rw-r--r--source/application/hal/platforms/bare-metal/bsp/bsp-core/include/bsp_core_log.h10
-rw-r--r--source/application/hal/platforms/bare-metal/timer/baremetal_timer.c131
-rw-r--r--source/application/hal/platforms/bare-metal/timer/include/baremetal_timer.h3
-rw-r--r--source/application/hal/platforms/native/data_presentation/log/log.c20
-rw-r--r--source/application/hal/platforms/native/utils/include/dummy_log.h10
-rw-r--r--source/application/main/Profiler.cc238
-rw-r--r--source/application/main/UseCaseCommonUtils.cc9
-rw-r--r--source/application/main/include/Profiler.hpp35
-rw-r--r--source/application/main/include/UseCaseCommonUtils.hpp4
-rw-r--r--source/use_case/ad/src/MainLoop.cc2
-rw-r--r--source/use_case/ad/src/UseCaseHandler.cc7
-rw-r--r--source/use_case/asr/src/MainLoop.cc2
-rw-r--r--source/use_case/asr/src/UseCaseHandler.cc22
-rw-r--r--source/use_case/img_class/src/MainLoop.cc2
-rw-r--r--source/use_case/img_class/src/UseCaseHandler.cc7
-rw-r--r--source/use_case/inference_runner/src/MainLoop.cc3
-rw-r--r--source/use_case/inference_runner/src/UseCaseHandler.cc6
-rw-r--r--source/use_case/kws/src/MainLoop.cc3
-rw-r--r--source/use_case/kws/src/UseCaseHandler.cc30
-rw-r--r--source/use_case/kws_asr/src/MainLoop.cc3
-rw-r--r--source/use_case/kws_asr/src/UseCaseHandler.cc17
-rw-r--r--tests/common/ProfilerTests.cc95
-rw-r--r--tests/use_case/img_class/ImgClassificationUCTest.cc6
-rw-r--r--tests/use_case/kws/KWSHandlerTest.cc5
34 files changed, 839 insertions, 547 deletions
diff --git a/docs/sections/customizing.md b/docs/sections/customizing.md
index e92c327..346a34c 100644
--- a/docs/sections/customizing.md
+++ b/docs/sections/customizing.md
@@ -243,12 +243,14 @@ Profiler is a helper class assisting in collection of timings and
Ethos-U55 cycle counts for operations. It uses platform timer to get
system timing information.
-| Method name | Description |
-|----------------------|-----------------------------------------------------------|
-| StartProfiling | Starts profiling and records the starting timing data. |
-| StopProfiling | Stops profiling and records the ending timing data. |
-| Reset | Resets the profiler and clears all collected data. |
-| GetResultsAndReset | Gets the results as string and resets the profiler. |
+| Method name | Description |
+|-------------------------|----------------------------------------------------------------|
+| StartProfiling | Starts profiling and records the starting timing data. |
+| StopProfiling | Stops profiling and records the ending timing data. |
+| StopProfilingAndReset | Stops the profiling and internally resets the platform timers. |
+| Reset | Resets the profiler and clears all collected data. |
+| GetAllResultsAndReset | Gets the results as string and resets the profiler. |
+| SetName | Set the profiler name. |
Usage example:
@@ -259,7 +261,7 @@ profiler.StartProfiling();
// Code running inference to profile
profiler.StopProfiling();
-info("%s\n", profiler.GetResultsAndReset().c_str());
+profiler.PrintProfilingResult();
```
## NN Model API
@@ -571,9 +573,8 @@ Profiler profiler{&platform, "Inference"};
profiler.StartProfiling();
model.RunInference();
profiler.StopProfiling();
-std::string profileResults = profiler.GetResultsAndReset();
-info("%s\n", profileResults.c_str());
+profiler.PrintProfilingResult();
```
## Printing to console
diff --git a/docs/sections/deployment.md b/docs/sections/deployment.md
index 354d30b..3d5796f 100644
--- a/docs/sections/deployment.md
+++ b/docs/sections/deployment.md
@@ -267,13 +267,13 @@ off.
7. On the second serial port, output similar to section 2.2 should be visible:
```log
- [INFO] Setting up system tick IRQ (for NPU)
- [INFO] V2M-MPS3 revision C
- [INFO] Application Note AN540, Revision B
- [INFO] FPGA build 1
- [INFO] Core clock has been set to: 32000000 Hz
- [INFO] CPU ID: 0x410fd220
- [INFO] CPU: Cortex-M55 r0p0
+ INFO - Setting up system tick IRQ (for NPU)
+ INFO - V2M-MPS3 revision C
+ INFO - Application Note AN540, Revision B
+ INFO - FPGA build 1
+ INFO - Core clock has been set to: 32000000 Hz
+ INFO - CPU ID: 0x410fd220
+ INFO - CPU: Cortex-M55 r0p0
...
```
diff --git a/docs/sections/testing_benchmarking.md b/docs/sections/testing_benchmarking.md
index 43bb7f4..0c7c675 100644
--- a/docs/sections/testing_benchmarking.md
+++ b/docs/sections/testing_benchmarking.md
@@ -45,8 +45,8 @@ dev_ethosu_eval-<use_case>-tests
```
```log
-[INFO] native platform initialised
-[INFO] ARM Ethos-U55 Evaluation application for MPS3 FPGA Prototyping Board and FastModel
+INFO - native platform initialised
+INFO - ARM Ethos-U55 Evaluation application for MPS3 FPGA Prototyping Board and FastModel
...
===============================================================================
diff --git a/docs/use_cases/ad.md b/docs/use_cases/ad.md
index ca95af8..1ff9c4f 100644
--- a/docs/use_cases/ad.md
+++ b/docs/use_cases/ad.md
@@ -390,41 +390,39 @@ Choice:
4. “Show NN model info” menu option prints information about model data type, input and output tensor sizes:
```log
- [INFO] uTFL version: 2.5.0
- [INFO] Model info:
- [INFO] Model INPUT tensors:
- [INFO] tensor type is INT8
- [INFO] tensor occupies 1024 bytes with dimensions
- [INFO] 0: 1
- [INFO] 1: 32
- [INFO] 2: 32
- [INFO] 3: 1
- [INFO] Quant dimension: 0
- [INFO] Scale[0] = 0.192437
- [INFO] ZeroPoint[0] = 11
- [INFO] Model OUTPUT tensors:
- [INFO] tensor type is INT8
- [INFO] tensor occupies 8 bytes with dimensions
- [INFO] 0: 1
- [INFO] 1: 8
- [INFO] Quant dimension: 0
- [INFO] Scale[0] = 0.048891
- [INFO] ZeroPoint[0] = -30
- [INFO] Activation buffer (a.k.a tensor arena) size used: 198016
- [INFO] Number of operators: 1
- [INFO] Operator 0: ethos-u
- [INFO] Use of Arm uNPU is enabled
-
+ INFO - uTFL version: 2.5.0
+ INFO - Model info:
+ INFO - Model INPUT tensors:
+ INFO - tensor type is INT8
+ INFO - tensor occupies 1024 bytes with dimensions
+ INFO - 0: 1
+ INFO - 1: 32
+ INFO - 2: 32
+ INFO - 3: 1
+ INFO - Quant dimension: 0
+ INFO - Scale[0] = 0.192437
+ INFO - ZeroPoint[0] = 11
+ INFO - Model OUTPUT tensors:
+ INFO - tensor type is INT8
+ INFO - tensor occupies 8 bytes with dimensions
+ INFO - 0: 1
+ INFO - 1: 8
+ INFO - Quant dimension: 0
+ INFO - Scale[0] = 0.048891
+ INFO - ZeroPoint[0] = -30
+ INFO - Activation buffer (a.k.a tensor arena) size used: 198016
+ INFO - Number of operators: 1
+ INFO - Operator 0: ethos-u
```
5. “List” menu option prints a list of pair ... indexes - the original filenames embedded in the application:
```log
- [INFO] List of Files:
- [INFO] 0 =>; anomaly_id_00_00000000.wav
- [INFO] 1 =>; anomaly_id_02_00000076.wav
- [INFO] 2 =>; normal_id_00_00000004.wav
- [INFO] 3 =>; normal_id_02_00000001.wav
+ INFO - List of Files:
+ INFO - 0 =>; anomaly_id_00_00000000.wav
+ INFO - 1 =>; anomaly_id_02_00000076.wav
+ INFO - 2 =>; normal_id_00_00000004.wav
+ INFO - 3 =>; normal_id_02_00000001.wav
```
### Running Anomaly Detection
@@ -434,76 +432,30 @@ Please select the first menu option to execute Anomaly Detection.
The following example illustrates application output:
```log
-[INFO] Running inference on audio clip 0 => anomaly_id_00_00000000.wav
-[INFO] Inference 1/13
-[INFO] Profile for Inference:
- Active NPU cycles: 1081154
- Idle NPU cycles: 1012
-
-[INFO] Inference 2/13
-[INFO] Profile for Inference:
- Active NPU cycles: 1080934
- Idle NPU cycles: 232
-
-[INFO] Inference 3/13
-[INFO] Profile for Inference:
- Active NPU cycles: 1081332
- Idle NPU cycles: 834
-
-[INFO] Inference 4/13
-[INFO] Profile for Inference:
- Active NPU cycles: 1080748
- Idle NPU cycles: 418
-
-[INFO] Inference 5/13
-[INFO] Profile for Inference:
- Active NPU cycles: 1080728
- Idle NPU cycles: 438
-
-[INFO] Inference 6/13
-[INFO] Profile for Inference:
- Active NPU cycles: 1081144
- Idle NPU cycles: 1022
-
-[INFO] Inference 7/13
-[INFO] Profile for Inference:
- Active NPU cycles: 1080924
- Idle NPU cycles: 242
-
-[INFO] Inference 8/13
-[INFO] Profile for Inference:
- Active NPU cycles: 1081322
- Idle NPU cycles: 844
-
-[INFO] Inference 9/13
-[INFO] Profile for Inference:
- Active NPU cycles: 1080738
- Idle NPU cycles: 428
-
-[INFO] Inference 10/13
-[INFO] Profile for Inference:
- Active NPU cycles: 1080718
- Idle NPU cycles: 448
-
-[INFO] Inference 11/13
-[INFO] Profile for Inference:
- Active NPU cycles: 1081134
- Idle NPU cycles: 1032
-
-[INFO] Inference 12/13
-[INFO] Profile for Inference:
- Active NPU cycles: 1080914
- Idle NPU cycles: 252
-
-[INFO] Inference 13/13
-[INFO] Profile for Inference:
- Active NPU cycles: 1081312
- Idle NPU cycles: 854
-
-[INFO] Average anomaly score is: -0.024493
+INFO - Running inference on audio clip 0 => anomaly_id_00_00000000.wav
+INFO - Inference 1/13
+INFO - Inference 2/13
+INFO - Inference 3/13
+INFO - Inference 4/13
+INFO - Inference 5/13
+INFO - Inference 6/13
+INFO - Inference 7/13
+INFO - Inference 8/13
+INFO - Inference 9/13
+INFO - Inference 10/13
+INFO - Inference 11/13
+INFO - Inference 12/13
+INFO - Inference 13/13
+INFO - Average anomaly score is: -0.024493
Anomaly threshold is: -0.800000
Anomaly detected!
-
+INFO - Profile for Inference:
+INFO - NPU AXI0_RD_DATA_BEAT_RECEIVED cycles: 628122
+INFO - NPU AXI0_WR_DATA_BEAT_WRITTEN cycles: 135087
+INFO - NPU AXI1_RD_DATA_BEAT_RECEIVED cycles: 62870
+INFO - NPU ACTIVE cycles: 1081007
+INFO - NPU IDLE cycles: 626
+INFO - NPU total cycles: 1081634
```
As multiple inferences have to be run for one clip it will take around a minute or so for all inferences to complete.
@@ -515,9 +467,19 @@ The profiling section of the log shows that for each inference. For the last inf
- Ethos-U55's PMU report:
- - 1,081,312 active cycles: number of cycles that were used for computation
+ - 1,081,634 total cycle: The number of NPU cycles
+
+ - 1,081,007 active cycles: number of NPU cycles that were used for computation
+
+ - 626 idle cycles: number of cycles for which the NPU was idle
+
+ - 628,122 AXI0 read cycles: The number of cycles the NPU spends to execute AXI0 read transactions.
+ AXI0 is the bus where Ethos-U55 NPU reads and writes to the computation buffers (activation buf/tensor arenas).
+
+ - 135,087 AXI0 write cycles: The number of cycles the NPU spends to execute AXI0 write transactions.
- - 854 idle cycles: number of cycles for which the NPU was idle
+ - 62,870 AXI1 read cycles: The number of cycles the NPU spends to execute AXI1 read transactions.
+ AXI1 is the bus where Ethos-U55 NPU reads the model (read only)
- For FPGA platforms, CPU cycle count can also be enabled. For FVP, however, CPU cycle counters should not be used as
the CPU model is not cycle-approximate or cycle-accurate.
diff --git a/docs/use_cases/asr.md b/docs/use_cases/asr.md
index d224aca..4600698 100644
--- a/docs/use_cases/asr.md
+++ b/docs/use_cases/asr.md
@@ -431,7 +431,7 @@ Choice:
compiled audio.
> **Note:** Note that if the clip is over a certain length, the application will invoke multiple inference runs to
->cover the entire file.
+ >cover the entire file.
2. “Classify audio clip at chosen index” menu option will run inference on the chosen audio clip.
@@ -444,41 +444,40 @@ Choice:
4. “Show NN model info” menu option prints information about model data type, input and output tensor sizes:
```log
- [INFO] uTFL version: 2.5.0
- [INFO] Model info:
- [INFO] Model INPUT tensors:
- [INFO] tensor type is INT8
- [INFO] tensor occupies 11544 bytes with dimensions
- [INFO] 0: 1
- [INFO] 1: 296
- [INFO] 2: 39
- [INFO] Quant dimension: 0
- [INFO] Scale[0] = 0.110316
- [INFO] ZeroPoint[0] = -11
- [INFO] Model OUTPUT tensors:
- [INFO] tensor type is INT8
- [INFO] tensor occupies 4292 bytes with dimensions
- [INFO] 0: 1
- [INFO] 1: 1
- [INFO] 2: 148
- [INFO] 3: 29
- [INFO] Quant dimension: 0
- [INFO] Scale[0] = 0.003906
- [INFO] ZeroPoint[0] = -128
- [INFO] Activation buffer (a.k.a tensor arena) size used: 783168
- [INFO] Number of operators: 1
- [INFO] Operator 0: ethos-u
- [INFO] Use of Arm uNPU is enabled
+ INFO - uTFL version: 2.5.0
+ INFO - Model info:
+ INFO - Model INPUT tensors:
+ INFO - tensor type is INT8
+ INFO - tensor occupies 11544 bytes with dimensions
+ INFO - 0: 1
+ INFO - 1: 296
+ INFO - 2: 39
+ INFO - Quant dimension: 0
+ INFO - Scale[0] = 0.110316
+ INFO - ZeroPoint[0] = -11
+ INFO - Model OUTPUT tensors:
+ INFO - tensor type is INT8
+ INFO - tensor occupies 4292 bytes with dimensions
+ INFO - 0: 1
+ INFO - 1: 1
+ INFO - 2: 148
+ INFO - 3: 29
+ INFO - Quant dimension: 0
+ INFO - Scale[0] = 0.003906
+ INFO - ZeroPoint[0] = -128
+ INFO - Activation buffer (a.k.a tensor arena) size used: 783168
+ INFO - Number of operators: 1
+ INFO - Operator 0: ethos-u
```
5. “List” menu option prints a list of pair audio clip indexes - the original filenames embedded in the application:
```log
- [INFO] List of Files:
- [INFO] 0 => anotherdoor.wav
- [INFO] 1 => anotherengineer.wav
- [INFO] 2 => itellyou.wav
- [INFO] 3 => testingroutine.wav
+ INFO - List of Files:
+ INFO - 0 => anotherdoor.wav
+ INFO - 1 => anotherengineer.wav
+ INFO - 2 => itellyou.wav
+ INFO - 3 => testingroutine.wav
```
### Running Automatic Speech Recognition
@@ -488,28 +487,21 @@ Please select the first menu option to execute Automatic Speech Recognition.
The following example illustrates application output:
```log
-[INFO] Running inference on audio clip 0 => anotherdoor.wav
-[INFO] Inference 1/2
-[INFO] Profile for pre-processing:
- Active NPU cycles: 0
- Idle NPU cycles: 6
-
-[INFO] Profile for Inference:
- Active NPU cycles: 28924342
- Idle NPU cycles: 824
-
-[INFO] Inference 2/2
-[INFO] Profile for pre-processing:
- Active NPU cycles: 0
- Idle NPU cycles: 6
-
-[INFO] Profile for Inference:
- Active NPU cycles: 28924298
- Idle NPU cycles: 868
-
-[INFO] Result for inf 0: and he walked immediately out o t
-[INFO] Result for inf 1: he aparctment by anoer dor
-[INFO] Final result: and he walked immediately out o the aparctment by anoer dor
+INFO - Running inference on audio clip 0 => another_door.wav
+INFO - Inference 1/2
+INFO - Inference 2/2
+INFO - Final results:
+INFO - Total number of inferences: 2
+INFO - For timestamp: 0.000000 (inference #: 0); label: and he walked immediately out of th
+INFO - For timestamp: 0.000000 (inference #: 1); label: e apartment by another door
+INFO - Complete recognition: and he walked immediately out of the apartment by another door
+INFO - Profile for Inference :
+INFO - NPU AXI0_RD_DATA_BEAT_RECEIVED cycles: 6564262
+INFO - NPU AXI0_WR_DATA_BEAT_WRITTEN cycles: 928889
+INFO - NPU AXI1_RD_DATA_BEAT_RECEIVED cycles: 841712
+INFO - NPU ACTIVE cycles: 28450696
+INFO - NPU IDLE cycles: 476
+INFO - NPU total cycles: 28451172
```
It could take several minutes to complete each inference (average time is 5-7 minutes), and on this audio clip multiple
@@ -519,9 +511,19 @@ The profiling section of the log shows that for the first inference:
- Ethos-U55's PMU report:
- - 28,924,298 active cycles: number of NPU cycles that were used for computation
+ - 28,451,172 total cycle: The number of NPU cycles
- - 868 idle cycles: number of cycles for which the NPU was idle
+ - 28,450,696 active cycles: number of NPU cycles that were used for computation
+
+ - 476 idle cycles: number of cycles for which the NPU was idle
+
+ - 6,564,262 AXI0 read cycles: The number of cycles the NPU spends to execute AXI0 read transactions.
+ AXI0 is the bus where Ethos-U55 NPU reads and writes to the computation buffers (activation buf/tensor arenas).
+
+ - 928,889 AXI0 write cycles: The number of cycles the NPU spends to execute AXI0 write transactions.
+
+ - 841,712 AXI1 read cycles: The number of cycles the NPU spends to execute AXI1 read transactions.
+ AXI1 is the bus where Ethos-U55 NPU reads the model (read only)
- For FPGA platforms, CPU cycle count can also be enabled. For FVP, however, CPU cycle counters should not be used as
the CPU model is not cycle-approximate or cycle-accurate.
diff --git a/docs/use_cases/img_class.md b/docs/use_cases/img_class.md
index 7a409f2..b26b746 100644
--- a/docs/use_cases/img_class.md
+++ b/docs/use_cases/img_class.md
@@ -371,40 +371,39 @@ Choice:
4. “Show NN model info” menu option prints information about model data type, input and output tensor sizes:
```log
- [INFO] uTFL version: 2.5.0
- [INFO] Model info:
- [INFO] Model INPUT tensors:
- [INFO] tensor type is UINT8
- [INFO] tensor occupies 150528 bytes with dimensions
- [INFO] 0: 1
- [INFO] 1: 224
- [INFO] 2: 224
- [INFO] 3: 3
- [INFO] Quant dimension: 0
- [INFO] Scale[0] = 0.007812
- [INFO] ZeroPoint[0] = 128
- [INFO] Model OUTPUT tensors:
- [INFO] tensor type is UINT8
- [INFO] tensor occupies 1001 bytes with dimensions
- [INFO] 0: 1
- [INFO] 1: 1001
- [INFO] Quant dimension: 0
- [INFO] Scale[0] = 0.098893
- [INFO] ZeroPoint[0] = 58
- [INFO] Activation buffer (a.k.a tensor arena) size used: 521760
- [INFO] Number of operators: 1
- [INFO] Operator 0: ethos-u
- [INFO] Use of Arm uNPU is enabled
+ INFO - uTFL version: 2.5.0
+ INFO - Model info:
+ INFO - Model INPUT tensors:
+ INFO - tensor type is UINT8
+ INFO - tensor occupies 150528 bytes with dimensions
+ INFO - 0: 1
+ INFO - 1: 224
+ INFO - 2: 224
+ INFO - 3: 3
+ INFO - Quant dimension: 0
+ INFO - Scale[0] = 0.007812
+ INFO - ZeroPoint[0] = 128
+ INFO - Model OUTPUT tensors:
+ INFO - tensor type is UINT8
+ INFO - tensor occupies 1001 bytes with dimensions
+ INFO - 0: 1
+ INFO - 1: 1001
+ INFO - Quant dimension: 0
+ INFO - Scale[0] = 0.098893
+ INFO - ZeroPoint[0] = 58
+ INFO - Activation buffer (a.k.a tensor arena) size used: 521760
+ INFO - Number of operators: 1
+ INFO - Operator 0: ethos-u
```
5. “List Images” menu option prints a list of pair image indexes - the original filenames embedded in the application:
```log
- [INFO] List of Files:
- [INFO] 0 => cat.bmp
- [INFO] 1 => dog.bmp
- [INFO] 2 => kimono.bmp
- [INFO] 3 => tiger.bmp
+ INFO - List of Files:
+ INFO - 0 => cat.bmp
+ INFO - 1 => dog.bmp
+ INFO - 2 => kimono.bmp
+ INFO - 3 => tiger.bmp
```
### Running Image Classification
@@ -414,16 +413,21 @@ Please select the first menu option to execute Image Classification.
The following example illustrates application output for classification:
```log
-[INFO] Running inference on image 0 => cat.bmp
-[INFO] Profile for Inference:
- Active NPU cycles: 7622641
- Idle NPU cycles: 525
-
-[INFO] 0) 282 (14.636096) -> tabby, tabby cat
-[INFO] 1) 286 (14.537203) -> Egyptian cat
-[INFO] 2) 283 (12.757138) -> tiger cat
-[INFO] 3) 458 (7.021370) -> bow tie, bow-tie, bowtie
-[INFO] 4) 288 (7.021370) -> lynx, catamount
+INFO - Running inference on image 0 => cat.bmp
+INFO - Final results:
+INFO - Total number of inferences: 1
+INFO - 0) 282 (14.636096) -> tabby, tabby cat
+INFO - 1) 286 (14.537203) -> Egyptian cat
+INFO - 2) 283 (12.757138) -> tiger cat
+INFO - 3) 458 (7.021370) -> bow tie, bow-tie, bowtie
+INFO - 4) 288 (7.021370) -> lynx, catamount
+INFO - Profile for Inference:
+INFO - NPU AXI0_RD_DATA_BEAT_RECEIVED cycles: 2489726
+INFO - NPU AXI0_WR_DATA_BEAT_WRITTEN cycles: 1098726
+INFO - NPU AXI1_RD_DATA_BEAT_RECEIVED cycles: 471129
+INFO - NPU ACTIVE cycles: 7489258
+INFO - NPU IDLE cycles: 914
+INFO - NPU total cycles: 7490172
```
It could take several minutes to complete one inference run (average time is 2-3 minutes).
@@ -435,9 +439,19 @@ The profiling section of the log shows that for this inference:
- Ethos-U55's PMU report:
- - 7,622,641 active cycles: number of NPU cycles that were used for computation
+ - 7,490,172 total cycle: The number of NPU cycles
- - 525 idle cycles: number of cycles for which the NPU was idle
+ - 7,489,258 active cycles: number of NPU cycles that were used for computation
+
+ - 914 idle cycles: number of cycles for which the NPU was idle
+
+ - 2,489,726 AXI0 read cycles: The number of cycles the NPU spends to execute AXI0 read transactions.
+ AXI0 is the bus where Ethos-U55 NPU reads and writes to the computation buffers (activation buf/tensor arenas).
+
+ - 1,098,726 AXI0 write cycles: The number of cycles the NPU spends to execute AXI0 write transactions.
+
+ - 471,129 AXI1 read cycles: The number of cycles the NPU spends to execute AXI1 read transactions.
+ AXI1 is the bus where Ethos-U55 NPU reads the model (read only)
- For FPGA platforms, CPU cycle count can also be enabled. For FVP, however, CPU cycle counters should not be used as
the CPU model is not cycle-approximate or cycle-accurate.
diff --git a/docs/use_cases/inference_runner.md b/docs/use_cases/inference_runner.md
index ffb205e..350c1e8 100644
--- a/docs/use_cases/inference_runner.md
+++ b/docs/use_cases/inference_runner.md
@@ -278,9 +278,14 @@ After the application has started the inference starts immediately and it output
The following example illustrates application output:
```log
-[INFO] Profile for Inference:
- Active NPU cycles: 26976
- Idle NPU cycles: 196
+INFO - Final results:
+INFO - Profile for Inference :
+INFO - NPU AXI0_RD_DATA_BEAT_RECEIVED cycles: 9332
+INFO - NPU AXI0_WR_DATA_BEAT_WRITTEN cycles: 3248
+INFO - NPU AXI1_RD_DATA_BEAT_RECEIVED cycles: 2219
+INFO - NPU ACTIVE cycles: 33145
+INFO - NPU IDLE cycles: 1033
+INFO - NPU total cycles: 34178
```
After running an inference on randomly generated data, the output of the log shows the profiling results that for this
@@ -288,9 +293,19 @@ inference:
- Ethos-U55's PMU report:
- - 26,976 active cycles: number of cycles that were used for computation
+ - 34,178 total cycle: The number of NPU cycles
- - 196 idle cycles: number of cycles for which the NPU was idle
+ - 33,145 active cycles: number of NPU cycles that were used for computation
+
+ - 1,033 idle cycles: number of cycles for which the NPU was idle
+
+ - 2,219 AXI0 read cycles: The number of cycles the NPU spends to execute AXI0 read transactions.
+ AXI0 is the bus where Ethos-U55 NPU reads and writes to the computation buffers (activation buf/tensor arenas).
+
+ - 3,248 AXI0 write cycles: The number of cycles the NPU spends to execute AXI0 write transactions.
+
+ - 9,332 AXI1 read cycles: The number of cycles the NPU spends to execute AXI1 read transactions.
+ AXI1 is the bus where Ethos-U55 NPU reads the model (read only)
- For FPGA platforms, CPU cycle count can also be enabled. For FVP, however, CPU cycle counters should not be used as
the CPU model is not cycle-approximate or cycle-accurate.
diff --git a/docs/use_cases/kws.md b/docs/use_cases/kws.md
index 316b501..4942744 100644
--- a/docs/use_cases/kws.md
+++ b/docs/use_cases/kws.md
@@ -405,41 +405,40 @@ Choice:
4. “Show NN model info” menu option prints information about model data type, input and output tensor sizes:
```log
- [INFO] uTFL version: 2.5.0
- [INFO] Model info:
- [INFO] Model INPUT tensors:
- [INFO] tensor type is INT8
- [INFO] tensor occupies 490 bytes with dimensions
- [INFO] 0: 1
- [INFO] 1: 1
- [INFO] 2: 49
- [INFO] 3: 10
- [INFO] Quant dimension: 0
- [INFO] Scale[0] = 1.107164
- [INFO] ZeroPoint[0] = 95
- [INFO] Model OUTPUT tensors:
- [INFO] tensor type is INT8
- [INFO] tensor occupies 12 bytes with dimensions
- [INFO] 0: 1
- [INFO] 1: 12
- [INFO] Quant dimension: 0
- [INFO] Scale[0] = 0.003906
- [INFO] ZeroPoint[0] = -128
- [INFO] Activation buffer (a.k.a tensor arena) size used: 72848
- [INFO] Number of operators: 1
- [INFO] Operator 0: ethos-u
- [INFO] Use of Arm uNPU is enabled
+ INFO - uTFL version: 2.5.0
+ INFO - Model info:
+ INFO - Model INPUT tensors:
+ INFO - tensor type is INT8
+ INFO - tensor occupies 490 bytes with dimensions
+ INFO - 0: 1
+ INFO - 1: 1
+ INFO - 2: 49
+ INFO - 3: 10
+ INFO - Quant dimension: 0
+ INFO - Scale[0] = 1.107164
+ INFO - ZeroPoint[0] = 95
+ INFO - Model OUTPUT tensors:
+ INFO - tensor type is INT8
+ INFO - tensor occupies 12 bytes with dimensions
+ INFO - 0: 1
+ INFO - 1: 12
+ INFO - Quant dimension: 0
+ INFO - Scale[0] = 0.003906
+ INFO - ZeroPoint[0] = -128
+ INFO - Activation buffer (a.k.a tensor arena) size used: 72848
+ INFO - Number of operators: 1
+ INFO - Operator 0: ethos-u
```
5. “List audio clips” menu option prints a list of pair audio indexes - the original filenames embedded in the
application:
```log
- [INFO] List of Files:
- [INFO] 0 => down.wav
- [INFO] 1 => rightleftup.wav
- [INFO] 2 => yes.wav
- [INFO] 3 => yesnogostop.wav
+ INFO - List of Files:
+ INFO - 0 => down.wav
+ INFO - 1 => rightleftup.wav
+ INFO - 2 => yes.wav
+ INFO - 3 => yesnogostop.wav
```
### Running Keyword Spotting
@@ -448,15 +447,18 @@ Selecting the first option will run inference on the first file.
The following example illustrates application output for classification:
-```log
-[INFO] Running inference on audio clip 0 => down.wav
-[INFO] Inference 1/1
-[INFO] Profile for Inference:
- Active NPU cycles: 680400
- Idle NPU cycles: 766
-
-[INFO] For timestamp: 0.000000 (inference #: 0); threshold: 0.900000
-[INFO] label @ 0: down, score: 0.996094
+```logINFO - Running inference on audio clip 0 => down.wav
+INFO - Inference 1/1
+INFO - Final results:
+INFO - Total number of inferences: 1
+INFO - For timestamp: 0.000000 (inference #: 0); label: down, score: 0.996094; threshold: 0.900000
+INFO - Profile for Inference:
+INFO - NPU AXI0_RD_DATA_BEAT_RECEIVED cycles: 217385
+INFO - NPU AXI0_WR_DATA_BEAT_WRITTEN cycles: 82607
+INFO - NPU AXI1_RD_DATA_BEAT_RECEIVED cycles: 59608
+INFO - NPU ACTIVE cycles: 680611
+INFO - NPU IDLE cycles: 561
+INFO - NPU total cycles: 681172
```
Each inference should take less than 30 seconds on most systems running Fast Model.
@@ -464,9 +466,19 @@ The profiling section of the log shows that for this inference:
- Ethos-U55's PMU report:
- - 680,400 active cycles: number of cycles that were used for computation
+ - 681,172 total cycle: The number of NPU cycles
+
+ - 680,611 active cycles: The number of NPU cycles that were used for computation
+
+ - 561 idle cycles: number of cycles for which the NPU was idle
+
+ - 217,385 AXI0 read cycles: The number of cycles the NPU spends to execute AXI0 read transactions.
+ AXI0 is the bus where Ethos-U55 NPU reads and writes to the computation buffers (activation buf/tensor arenas).
+
+ - 82,607 write cycles: The number of cycles the NPU spends to execute AXI0 write transactions.
- - 766 idle cycles: number of cycles for which the NPU was idle
+ - 59,608 AXI1 read cycles: The number of cycles the NPU spends to execute AXI1 read transactions.
+ AXI1 is the bus where Ethos-U55 NPU reads the model (read only)
- For FPGA platforms, CPU cycle count can also be enabled. For FVP, however, CPU cycle counters should not be used as
the CPU model is not cycle-approximate or cycle-accurate.
diff --git a/docs/use_cases/kws_asr.md b/docs/use_cases/kws_asr.md
index e79b887..132a82d 100644
--- a/docs/use_cases/kws_asr.md
+++ b/docs/use_cases/kws_asr.md
@@ -468,72 +468,72 @@ Choice:
4. “Show NN model info” menu option prints information about model data type, input and output tensor sizes:
```log
- [INFO] uTFL version: 2.5.0
- [INFO] Model INPUT tensors:
- [INFO] tensor type is INT8
- [INFO] tensor occupies 490 bytes with dimensions
- [INFO] 0: 1
- [INFO] 1: 1
- [INFO] 2: 49
- [INFO] 3: 10
- [INFO] Quant dimension: 0
- [INFO] Scale[0] = 1.107164
- [INFO] ZeroPoint[0] = 95
- [INFO] Model OUTPUT tensors:
- [INFO] tensor type is INT8
- [INFO] tensor occupies 12 bytes with dimensions
- [INFO] 0: 1
- [INFO] 1: 12
- [INFO] Quant dimension: 0
- [INFO] Scale[0] = 0.003906
- [INFO] ZeroPoint[0] = -128
- [INFO] Activation buffer (a.k.a tensor arena) size used: 123616
- [INFO] Number of operators: 16
- [INFO] Operator 0: RESHAPE
- [INFO] Operator 1: CONV_2D
- [INFO] Operator 2: DEPTHWISE_CONV_2D
- [INFO] Operator 3: CONV_2D
- [INFO] Operator 4: DEPTHWISE_CONV_2D
- [INFO] Operator 5: CONV_2D
- [INFO] Operator 6: DEPTHWISE_CONV_2D
- [INFO] Operator 7: CONV_2D
- [INFO] Operator 8: DEPTHWISE_CONV_2D
- [INFO] Operator 9: CONV_2D
- [INFO] Operator 10: DEPTHWISE_CONV_2D
- [INFO] Operator 11: CONV_2D
- [INFO] Operator 12: AVERAGE_POOL_2D
- [INFO] Operator 13: RESHAPE
- [INFO] Operator 14: FULLY_CONNECTED
- [INFO] Operator 15: SOFTMAX
- [INFO] Model INPUT tensors:
- [INFO] tensor type is INT8
- [INFO] tensor occupies 11544 bytes with dimensions
- [INFO] 0: 1
- [INFO] 1: 296
- [INFO] 2: 39
- [INFO] Quant dimension: 0
- [INFO] Scale[0] = 0.110316
- [INFO] ZeroPoint[0] = -11
- [INFO] Model OUTPUT tensors:
- [INFO] tensor type is INT8
- [INFO] tensor occupies 4292 bytes with dimensions
- [INFO] 0: 1
- [INFO] 1: 1
- [INFO] 2: 148
- [INFO] 3: 29
- [INFO] Quant dimension: 0
- [INFO] Scale[0] = 0.003906
- [INFO] ZeroPoint[0] = -128
- [INFO] Activation buffer (a.k.a tensor arena) size used: 809808
- [INFO] Number of operators: 1
- [INFO] Operator 0: ethos-u
+ INFO - uTFL version: 2.5.0
+ INFO - Model INPUT tensors:
+ INFO - tensor type is INT8
+ INFO - tensor occupies 490 bytes with dimensions
+ INFO - 0: 1
+ INFO - 1: 1
+ INFO - 2: 49
+ INFO - 3: 10
+ INFO - Quant dimension: 0
+ INFO - Scale[0] = 1.107164
+ INFO - ZeroPoint[0] = 95
+ INFO - Model OUTPUT tensors:
+ INFO - tensor type is INT8
+ INFO - tensor occupies 12 bytes with dimensions
+ INFO - 0: 1
+ INFO - 1: 12
+ INFO - Quant dimension: 0
+ INFO - Scale[0] = 0.003906
+ INFO - ZeroPoint[0] = -128
+ INFO - Activation buffer (a.k.a tensor arena) size used: 123616
+ INFO - Number of operators: 16
+ INFO - Operator 0: RESHAPE
+ INFO - Operator 1: CONV_2D
+ INFO - Operator 2: DEPTHWISE_CONV_2D
+ INFO - Operator 3: CONV_2D
+ INFO - Operator 4: DEPTHWISE_CONV_2D
+ INFO - Operator 5: CONV_2D
+ INFO - Operator 6: DEPTHWISE_CONV_2D
+ INFO - Operator 7: CONV_2D
+ INFO - Operator 8: DEPTHWISE_CONV_2D
+ INFO - Operator 9: CONV_2D
+ INFO - Operator 10: DEPTHWISE_CONV_2D
+ INFO - Operator 11: CONV_2D
+ INFO - Operator 12: AVERAGE_POOL_2D
+ INFO - Operator 13: RESHAPE
+ INFO - Operator 14: FULLY_CONNECTED
+ INFO - Operator 15: SOFTMAX
+ INFO - Model INPUT tensors:
+ INFO - tensor type is INT8
+ INFO - tensor occupies 11544 bytes with dimensions
+ INFO - 0: 1
+ INFO - 1: 296
+ INFO - 2: 39
+ INFO - Quant dimension: 0
+ INFO - Scale[0] = 0.110316
+ INFO - ZeroPoint[0] = -11
+ INFO - Model OUTPUT tensors:
+ INFO - tensor type is INT8
+ INFO - tensor occupies 4292 bytes with dimensions
+ INFO - 0: 1
+ INFO - 1: 1
+ INFO - 2: 148
+ INFO - 3: 29
+ INFO - Quant dimension: 0
+ INFO - Scale[0] = 0.003906
+ INFO - ZeroPoint[0] = -128
+ INFO - Activation buffer (a.k.a tensor arena) size used: 809808
+ INFO - Number of operators: 1
+ INFO - Operator 0: ethos-u
```
5. “List” menu option prints a list of pair ... indexes - the original filenames embedded in the application:
```log
- [INFO] List of Files:
- [INFO] 0 => yesnogostop.wav
+ INFO - List of Files:
+ INFO - 0 => yesnogostop.wav
```
### Running Keyword Spotting and Automatic Speech Recognition
@@ -543,29 +543,31 @@ Please select the first menu option to execute Keyword Spotting and Automatic Sp
The following example illustrates application output:
```log
-[INFO] KWS audio data window size 16000
-[INFO] Running KWS inference on audio clip 0 => yesnogostop.wav
-[INFO] Inference 1/7
-[INFO] Profile for Inference:
- Active NPU cycles: 0
- Idle NPU cycles: 6
-
-[INFO] For timestamp: 0.000000 (inference #: 0); threshold: 0.900000
-[INFO] label @ 0: yes, score: 0.996094
-[INFO] Keyword spotted
-[INFO] Inference 1/2
-[INFO] Profile for Inference:
- Active NPU cycles: 28924742
- Idle NPU cycles: 424
-
-[INFO] Inference 2/2
-[INFO] Profile for Inference:
- Active NPU cycles: 28924740
- Idle NPU cycles: 426
-
-[INFO] Result for inf 0: no gow
-[INFO] Result for inf 1: stoppe
-[INFO] Final result: no gow stoppe
+INFO - KWS audio data window size 16000
+INFO - Running KWS inference on audio clip 0 => yesnogostop.wav
+INFO - Inference 1/7
+INFO - For timestamp: 0.000000 (inference #: 0); threshold: 0.900000
+INFO - label @ 0: yes, score: 0.996094
+INFO - Profile for Inference:
+INFO - NPU AXI0_RD_DATA_BEAT_RECEIVED cycles: 217385
+INFO - NPU AXI0_WR_DATA_BEAT_WRITTEN cycles: 82607
+INFO - NPU AXI1_RD_DATA_BEAT_RECEIVED cycles: 59608
+INFO - NPU ACTIVE cycles: 680611
+INFO - NPU IDLE cycles: 561
+INFO - NPU total cycles: 681172
+INFO - Keyword spotted
+INFO - Inference 1/2
+INFO - Inference 2/2
+INFO - Result for inf 0: no gow
+INFO - Result for inf 1: stoppe
+INFO - Final result: no gow stoppe
+INFO - Profile for Inference:
+INFO - NPU AXI0_RD_DATA_BEAT_RECEIVED cycles: 13520864
+INFO - NPU AXI0_WR_DATA_BEAT_WRITTEN cycles: 2841970
+INFO - NPU AXI1_RD_DATA_BEAT_RECEIVED cycles: 2717670
+INFO - NPU ACTIVE cycles: 28909309
+INFO - NPU IDLE cycles: 863
+INFO - NPU total cycles: 28910172
```
It could take several minutes to complete one inference run (average time is 2-3 minutes).
@@ -578,9 +580,19 @@ The profiling section of the log shows that for the ASR inference:
- Ethos-U55's PMU report:
- - 28,924,740 active cycles: number of cycles that were used for computation
+ - 28,910,172 total cycle: The number of NPU cycles
- - 426 idle cycles: number of cycles for which the NPU was idle
+ - 28,909,309 active cycles: number of NPU cycles that were used for computation
+
+ - 863 idle cycles: number of cycles for which the NPU was idle
+
+ - 13,520,864 AXI0 read cycles: The number of cycles the NPU spends to execute AXI0 read transactions.
+ AXI0 is the bus where Ethos-U55 NPU reads and writes to the computation buffers (activation buf/tensor arenas).
+
+ - 2,841,970 AXI0 write cycles: The number of cycles the NPU spends to execute AXI0 write transactions.
+
+ - 2,717,670 AXI1 read cycles: The number of cycles the NPU spends to execute AXI1 read transactions.
+ AXI1 is the bus where Ethos-U55 NPU reads the model (read only)
- For FPGA platforms, CPU cycle count can also be enabled. For FVP, however, CPU cycle counters should not be used as
the CPU model is not cycle-approximate or cycle-accurate.
diff --git a/source/application/hal/include/timer.h b/source/application/hal/include/timer.h
index 2955b7f..4a09fd4 100644
--- a/source/application/hal/include/timer.h
+++ b/source/application/hal/include/timer.h
@@ -56,11 +56,9 @@ typedef struct _platform_timer {
/* Gets difference in CPU cycle counts. */
uint32_t (* get_cpu_cycle_diff)(time_counter *start, time_counter *end);
- /* Gets the difference in terms of total NPU cycle counts. */
- uint64_t (* get_npu_total_cycle_diff)(time_counter *start, time_counter *end);
-
- /* Gets the difference in terms of active NPU cycle counts. */
- uint64_t (* get_npu_active_cycle_diff)(time_counter *start, time_counter *end);
+ /* Gets the difference in terms of cycle counts for collected pmu counters. */
+ int (* get_npu_cycles_diff)(time_counter *start, time_counter *end,
+ uint64_t* pmu_counters_values, size_t size);
/* Wraps get_time_counter function with additional profiling
* initialisation, if required. */
diff --git a/source/application/hal/platforms/bare-metal/bsp/bsp-core/include/bsp_core_log.h b/source/application/hal/platforms/bare-metal/bsp/bsp-core/include/bsp_core_log.h
index f049209..ffb55e7 100644
--- a/source/application/hal/platforms/bare-metal/bsp/bsp-core/include/bsp_core_log.h
+++ b/source/application/hal/platforms/bare-metal/bsp/bsp-core/include/bsp_core_log.h
@@ -32,31 +32,31 @@
#endif /*LOG_LEVEL*/
#if (LOG_LEVEL == LOG_LEVEL_TRACE)
- #define trace(...) printf("[TRACE] "); printf(__VA_ARGS__)
+ #define trace(...) printf("TRACE - "); printf(__VA_ARGS__)
#else
#define trace(...)
#endif /* LOG_LEVEL == LOG_LEVEL_TRACE */
#if (LOG_LEVEL <= LOG_LEVEL_DEBUG)
- #define debug(...) printf("[DEBUG] "); printf(__VA_ARGS__)
+ #define debug(...) printf("DEBUG - "); printf(__VA_ARGS__)
#else
#define debug(...)
#endif /* LOG_LEVEL > LOG_LEVEL_TRACE */
#if (LOG_LEVEL <= LOG_LEVEL_INFO)
- #define info(...) printf("[INFO] "); printf(__VA_ARGS__)
+ #define info(...) printf("INFO - "); printf(__VA_ARGS__)
#else
#define info(...)
#endif /* LOG_LEVEL > LOG_LEVEL_DEBUG */
#if (LOG_LEVEL <= LOG_LEVEL_WARN)
- #define warn(...) printf("[WARN] "); printf(__VA_ARGS__)
+ #define warn(...) printf("WARN - "); printf(__VA_ARGS__)
#else
#define warn(...)
#endif /* LOG_LEVEL > LOG_LEVEL_INFO */
#if (LOG_LEVEL <= LOG_LEVEL_ERROR)
- #define printf_err(...) printf("[ERROR] "); printf(__VA_ARGS__)
+ #define printf_err(...) printf("ERROR - "); printf(__VA_ARGS__)
#else
#define printf_err(...)
#endif /* LOG_LEVEL > LOG_LEVEL_INFO */
diff --git a/source/application/hal/platforms/bare-metal/timer/baremetal_timer.c b/source/application/hal/platforms/bare-metal/timer/baremetal_timer.c
index 7257c1d..ef31a71 100644
--- a/source/application/hal/platforms/bare-metal/timer/baremetal_timer.c
+++ b/source/application/hal/platforms/bare-metal/timer/baremetal_timer.c
@@ -50,6 +50,53 @@ static uint64_t bm_get_npu_total_cycle_diff(time_counter *st,
static uint64_t bm_get_npu_active_cycle_diff(time_counter *st,
time_counter *end);
+/** @brief Gets the difference in idle NPU cycle counts
+ * @param[in] st Pointer to time_counter value at start time.
+ * @param[in] end Pointer to time_counter value at end.
+ * @return Idle NPU cycle counts difference between the arguments expressed
+ * as unsigned 64 bit integer.
+ **/
+static uint64_t bm_get_npu_idle_cycle_diff(time_counter *st,
+ time_counter *end);
+
+/** @brief Gets the difference in axi0 bus reads cycle counts
+ * @param[in] st Pointer to time_counter value at start time.
+ * @param[in] end Pointer to time_counter value at end.
+ * @return NPU AXI0 read cycle counts difference between the arguments expressed
+ * as unsigned 64 bit integer.
+ **/
+static uint64_t bm_get_npu_axi0_read_cycle_diff(time_counter *st,
+ time_counter *end);
+
+/** @brief Gets the difference in axi0 bus writes cycle counts
+ * @param[in] st Pointer to time_counter value at start time.
+ * @param[in] end Pointer to time_counter value at end.
+ * @return NPU AXI0 write cycle counts difference between the arguments expressed
+ * as unsigned 64 bit integer.
+ **/
+static uint64_t bm_get_npu_axi0_write_cycle_diff(time_counter *st,
+ time_counter *end);
+
+/** @brief Gets the difference in axi1 bus reads cycle counts
+ * @param[in] st Pointer to time_counter value at start time.
+ * @param[in] end Pointer to time_counter value at end.
+ * @return NPU AXI1 read cycle counts difference between the arguments expressed
+ * as unsigned 64 bit integer.
+ **/
+static uint64_t bm_get_npu_axi1_read_cycle_diff(time_counter *st,
+ time_counter *end);
+
+/** @brief Gets the difference for 6 collected cycle counts:
+ * 1) total NPU
+ * 2) active NPU
+ * 3) idle NPU
+ * 4) axi0 read
+ * 5) axi0 write
+ * 6) axi1 read
+ * */
+static int bm_get_npu_cycle_diff(time_counter *st, time_counter *end,
+ uint64_t* pmu_counters_values, const size_t size);
+
#endif /* defined (ARM_NPU) */
#if defined(MPS3_PLATFORM)
@@ -126,8 +173,7 @@ void init_timer(platform_timer *timer)
#if defined (ARM_NPU)
/* We are capable of reporting npu cycle counts. */
timer->cap.npu_cycles = 1;
- timer->get_npu_total_cycle_diff = bm_get_npu_total_cycle_diff;
- timer->get_npu_active_cycle_diff = bm_get_npu_active_cycle_diff;
+ timer->get_npu_cycles_diff = bm_get_npu_cycle_diff;
_init_ethosu_cyclecounter();
#endif /* defined (ARM_NPU) */
@@ -149,37 +195,89 @@ static void _init_ethosu_cyclecounter(void)
/* Reset overflow status. */
ETHOSU_PMU_Set_CNTR_OVS(ETHOSU_PMU_CNT1_Msk | ETHOSU_PMU_CCNT_Msk);
- /* Set the counter #0 to count idle cycles. */
+ /* We can retrieve only 4 PMU counters: */
ETHOSU_PMU_Set_EVTYPER(0, ETHOSU_PMU_NPU_IDLE);
+ ETHOSU_PMU_Set_EVTYPER(1, ETHOSU_PMU_AXI0_RD_DATA_BEAT_RECEIVED);
+ ETHOSU_PMU_Set_EVTYPER(2, ETHOSU_PMU_AXI0_WR_DATA_BEAT_WRITTEN);
+ ETHOSU_PMU_Set_EVTYPER(3, ETHOSU_PMU_AXI1_RD_DATA_BEAT_RECEIVED);
/* Enable PMU. */
ETHOSU_PMU_Enable();
/* Enable counters for cycle and counter# 0. */
- ETHOSU_PMU_CNTR_Enable(ETHOSU_PMU_CNT1_Msk | ETHOSU_PMU_CCNT_Msk);
-
+ ETHOSU_PMU_CNTR_Enable(ETHOSU_PMU_CNT1_Msk | ETHOSU_PMU_CNT2_Msk | ETHOSU_PMU_CNT3_Msk | ETHOSU_PMU_CNT4_Msk| ETHOSU_PMU_CCNT_Msk);
_reset_ethosu_counters();
}
+static int bm_get_npu_cycle_diff(time_counter *st, time_counter *end,
+ uint64_t* pmu_counters_values, const size_t size)
+{
+ if (size == 6) {
+ pmu_counters_values[0] = bm_get_npu_total_cycle_diff(st, end);
+ pmu_counters_values[1] = bm_get_npu_active_cycle_diff(st, end);
+ pmu_counters_values[2] = bm_get_npu_idle_cycle_diff(st, end);
+ pmu_counters_values[3] = bm_get_npu_axi0_read_cycle_diff(st, end);
+ pmu_counters_values[4] = bm_get_npu_axi0_write_cycle_diff(st, end);
+ pmu_counters_values[5] = bm_get_npu_axi1_read_cycle_diff(st, end);
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
static uint64_t bm_get_npu_total_cycle_diff(time_counter *st, time_counter *end)
{
return end->npu_total_ccnt - st->npu_total_ccnt;
}
-static uint64_t bm_get_npu_active_cycle_diff(time_counter *st, time_counter *end)
+static uint32_t counter_overflow(uint32_t pmu_counter_mask)
{
/* Check for overflow: The idle counter is 32 bit while the
total cycle count is 64 bit. */
const uint32_t overflow_status = ETHOSU_PMU_Get_CNTR_OVS();
+ return pmu_counter_mask & overflow_status;
+}
- if (ETHOSU_PMU_CNT1_Msk & overflow_status) {
+static uint64_t bm_get_npu_idle_cycle_diff(time_counter *st, time_counter *end)
+{
+ if (counter_overflow(ETHOSU_PMU_CNT1_Msk)) {
printf_err("EthosU PMU idle counter overflow.\n");
return 0;
}
+ return (uint64_t)(end->npu_idle_ccnt - st->npu_idle_ccnt);
+}
+static uint64_t bm_get_npu_active_cycle_diff(time_counter *st, time_counter *end)
+{
/* Active NPU time = total time - idle time */
- return (bm_get_npu_total_cycle_diff(st, end) +
- (uint64_t)(st->npu_idle_ccnt)) - (uint64_t)(end->npu_idle_ccnt);
+ return bm_get_npu_total_cycle_diff(st, end) - bm_get_npu_idle_cycle_diff(st, end);
+}
+
+static uint64_t bm_get_npu_axi0_read_cycle_diff(time_counter *st, time_counter *end)
+{
+ if (counter_overflow(ETHOSU_PMU_CNT2_Msk)) {
+ printf_err("EthosU PMU axi0 read counter overflow.\n");
+ return 0;
+ }
+ return (uint64_t)(end->npu_axi0_read_ccnt - st->npu_axi0_read_ccnt);
+}
+
+static uint64_t bm_get_npu_axi0_write_cycle_diff(time_counter *st, time_counter *end)
+{
+ if (counter_overflow(ETHOSU_PMU_CNT3_Msk)) {
+ printf_err("EthosU PMU axi0 write counter overflow.\n");
+ return 0;
+ }
+ return (uint64_t)(end->npu_axi0_write_ccnt - st->npu_axi0_write_ccnt);
+}
+
+static uint64_t bm_get_npu_axi1_read_cycle_diff(time_counter *st, time_counter *end)
+{
+ if (counter_overflow(ETHOSU_PMU_CNT4_Msk)) {
+ printf_err("EthosU PMU axi1 read counter overflow.\n");
+ return 0;
+ }
+ return (uint64_t)(end->npu_axi1_read_ccnt - st->npu_axi1_read_ccnt);
}
#endif /* defined (ARM_NPU) */
@@ -199,15 +297,22 @@ static time_counter bm_get_time_counter(void)
.counter = get_time_counter(),
#if defined (ARM_NPU)
- .npu_idle_ccnt = ETHOSU_PMU_Get_EVCNTR(0),
- .npu_total_ccnt = ETHOSU_PMU_Get_CCNTR()
+ .npu_total_ccnt = ETHOSU_PMU_Get_CCNTR(),
+ .npu_idle_ccnt = ETHOSU_PMU_Get_EVCNTR(0),
+ .npu_axi0_read_ccnt = ETHOSU_PMU_Get_EVCNTR(1),
+ .npu_axi0_write_ccnt = ETHOSU_PMU_Get_EVCNTR(2),
+ .npu_axi1_read_ccnt = ETHOSU_PMU_Get_EVCNTR(3)
#endif /* defined (ARM_NPU) */
};
#if defined (ARM_NPU)
- debug("NPU total cc: %llu; NPU idle cc: %u\n",
- t.npu_total_ccnt, t.npu_idle_ccnt);
+ debug("NPU total cc: %llu; NPU idle cc: %u; NPU axi0 read cc: %u; NPU axi0 write cc: %u; NPU axi1 read cc: %u\n",
+ t.npu_total_ccnt,
+ t.npu_idle_ccnt,
+ t.npu_axi0_read_ccnt,
+ t.npu_axi0_write_ccnt,
+ t.npu_axi1_read_ccnt);
#endif /* defined (ARM_NPU) */
return t;
diff --git a/source/application/hal/platforms/bare-metal/timer/include/baremetal_timer.h b/source/application/hal/platforms/bare-metal/timer/include/baremetal_timer.h
index c8fc32c..3020dac 100644
--- a/source/application/hal/platforms/bare-metal/timer/include/baremetal_timer.h
+++ b/source/application/hal/platforms/bare-metal/timer/include/baremetal_timer.h
@@ -34,6 +34,9 @@ typedef struct bm_time_counter {
#if defined (ARM_NPU)
uint64_t npu_total_ccnt;
uint32_t npu_idle_ccnt;
+ uint32_t npu_axi0_read_ccnt;
+ uint32_t npu_axi0_write_ccnt;
+ uint32_t npu_axi1_read_ccnt;
#endif /* ARM_NPU */
} time_counter;
diff --git a/source/application/hal/platforms/native/data_presentation/log/log.c b/source/application/hal/platforms/native/data_presentation/log/log.c
index 48e8b95..6ce6684 100644
--- a/source/application/hal/platforms/native/data_presentation/log/log.c
+++ b/source/application/hal/platforms/native/data_presentation/log/log.c
@@ -30,11 +30,11 @@ int log_display_image(uint8_t* data, const uint32_t width,
const uint32_t pos_x, const uint32_t pos_y,
const uint32_t downsample_factor)
{
- info("Image details\n");
- info("Data: %p\n", data);
- info("WxHxC: %dx%dx%d\n", width, height, channels);
- info("Pos (x,y): (%d,%d)\n", pos_x, pos_y);
- info("Downsampling factor: %u\n", downsample_factor);
+ debug("Image details\n");
+ debug("Data: %p\n", data);
+ debug("WxHxC: %dx%dx%d\n", width, height, channels);
+ debug("Pos (x,y): (%d,%d)\n", pos_x, pos_y);
+ debug("Downsampling factor: %u\n", downsample_factor);
return 0;
}
@@ -43,8 +43,8 @@ int log_display_text(const char* str, const size_t str_sz,
const bool allow_multiple_lines)
{
UNUSED(allow_multiple_lines);
- info("%s\n", str);
- info("Text size: %lu, x: %d, y: %d\n", str_sz, pos_x, pos_y);
+ debug("%s\n", str);
+ debug("Text size: %lu, x: %d, y: %d\n", str_sz, pos_x, pos_y);
return 0;
}
@@ -53,19 +53,19 @@ int log_display_box_icon(const uint32_t pos_x, const uint32_t pos_y,
const uint32_t width, const uint32_t height,
const uint16_t color)
{
- info("Showing rectangular, width: %d, height: %d, color: %d, x: %d, y: %d\n",
+ debug("Showing rectangular, width: %d, height: %d, color: %d, x: %d, y: %d\n",
width, height, color, pos_x, pos_y);
return 0;
}
int log_clear(const uint16_t color)
{
- info("Clearing with color: %d\n", color);
+ debug("Clearing with color: %d\n", color);
return 0;
}
int log_set_text_color (const uint16_t color)
{
- info("Setting text color: %d\n", color);
+ debug("Setting text color: %d\n", color);
return 0;
}
diff --git a/source/application/hal/platforms/native/utils/include/dummy_log.h b/source/application/hal/platforms/native/utils/include/dummy_log.h
index 626436a..3df5c5c 100644
--- a/source/application/hal/platforms/native/utils/include/dummy_log.h
+++ b/source/application/hal/platforms/native/utils/include/dummy_log.h
@@ -32,31 +32,31 @@
#define UNUSED(x) ((void)(x))
#if (LOG_LEVEL == LOG_LEVEL_TRACE)
- #define trace(...) printf("[TRACE] "); printf(__VA_ARGS__)
+ #define trace(...) printf("TRACE - "); printf(__VA_ARGS__)
#else
#define trace(...)
#endif /* LOG_LEVEL == LOG_LEVEL_TRACE */
#if (LOG_LEVEL <= LOG_LEVEL_DEBUG)
- #define debug(...) printf("[DEBUG] "); printf(__VA_ARGS__)
+ #define debug(...) printf("DEBUG - "); printf(__VA_ARGS__)
#else
#define debug(...)
#endif /* LOG_LEVEL > LOG_LEVEL_TRACE */
#if (LOG_LEVEL <= LOG_LEVEL_INFO)
- #define info(...) printf("[INFO] "); printf(__VA_ARGS__)
+ #define info(...) printf("INFO - "); printf(__VA_ARGS__)
#else
#define info(...)
#endif /* LOG_LEVEL > LOG_LEVEL_DEBUG */
#if (LOG_LEVEL <= LOG_LEVEL_WARN)
- #define warn(...) printf("[WARN] "); printf(__VA_ARGS__)
+ #define warn(...) printf("WARN - "); printf(__VA_ARGS__)
#else
#define warn(...)
#endif /* LOG_LEVEL > LOG_LEVEL_INFO */
#if (LOG_LEVEL <= LOG_LEVEL_ERROR)
- #define printf_err(...) printf("[ERROR] "); printf(__VA_ARGS__)
+ #define printf_err(...) printf("ERROR - "); printf(__VA_ARGS__)
#else
#define printf_err(...)
#endif /* LOG_LEVEL > LOG_LEVEL_INFO */
diff --git a/source/application/main/Profiler.cc b/source/application/main/Profiler.cc
index f364759..ce59d9c 100644
--- a/source/application/main/Profiler.cc
+++ b/source/application/main/Profiler.cc
@@ -17,25 +17,10 @@
#include "Profiler.hpp"
#include <cstring>
-#include <string>
-#include <sstream>
+#include <iomanip>
namespace arm {
namespace app {
-
- template<class T>
- static void writeStatLine(std::ostringstream& s,
- const char* desc,
- T total,
- uint32_t samples,
- T min,
- T max)
- {
- s << "\t" << desc << total << " / "
- << ((double)total / samples) << " / "
- << min << " / " << max << std::endl;
- }
-
Profiler::Profiler(hal_platform* platform, const char* name = "Unknown")
: _m_name(name)
{
@@ -90,99 +75,162 @@ namespace app {
void Profiler::Reset()
{
this->_m_started = false;
+ this->_m_series.clear();
memset(&this->_m_tstampSt, 0, sizeof(this->_m_tstampSt));
memset(&this->_m_tstampEnd, 0, sizeof(this->_m_tstampEnd));
}
- std::string Profiler::GetResultsAndReset()
+ void calcProfilingStat(uint64_t currentValue,
+ Statistics& data,
+ uint32_t samples)
{
- std::ostringstream strResults;
+ data.total += currentValue;
+ data.min = std::min(data.min, currentValue);
+ data.max = std::max(data.max, currentValue);
+ data.avrg = ((double)data.total / samples);
+ }
+ void Profiler::GetAllResultsAndReset(std::vector<ProfileResult>& results)
+ {
for (const auto& item: this->_m_series) {
auto name = item.first;
ProfilingSeries series = item.second;
-
- uint32_t samplesNum = series.size();
-
- uint64_t totalNpuCycles = 0; /* Total NPU cycles (idle + active). */
- uint64_t totalActiveNpuCycles = 0; /* Active NPU cycles. */
- uint64_t totalCpuCycles = 0; /* Total CPU cycles. */
- time_t totalTimeMs = 0;
-
- uint64_t minActiveNpuCycles = series[0].activeNpuCycles;
- uint64_t minIdleNpuCycles = series[0].npuCycles - minActiveNpuCycles;
- uint64_t minActiveCpuCycles = series[0].cpuCycles - minActiveNpuCycles;
- time_t minTimeMs = series[0].time;
-
- uint64_t maxIdleNpuCycles = 0;
- uint64_t maxActiveNpuCycles = 0;
- uint64_t maxActiveCpuCycles = 0;
- time_t maxTimeMs = 0;
-
+ ProfileResult result{};
+ result.name = item.first;
+ result.samplesNum = series.size();
+
+ Statistics AXI0_RD {
+ .name = "NPU AXI0_RD_DATA_BEAT_RECEIVED",
+ .unit = "cycles",
+ .total = 0,
+ .avrg = 0.0,
+ .min = series[0].axi0writes,
+ .max = 0
+ };
+ Statistics AXI0_WR {
+ .name = "NPU AXI0_WR_DATA_BEAT_WRITTEN",
+ .unit = "cycles",
+ .total = 0,
+ .avrg = 0.0,
+ .min = series[0].axi0reads,
+ .max = 0
+ };
+ Statistics AXI1_RD {
+ .name = "NPU AXI1_RD_DATA_BEAT_RECEIVED",
+ .unit = "cycles",
+ .total = 0,
+ .avrg = 0.0,
+ .min = series[0].axi1reads,
+ .max = 0
+ };
+ Statistics NPU_ACTIVE {
+ .name = "NPU ACTIVE",
+ .unit = "cycles",
+ .total = 0,
+ .avrg = 0.0,
+ .min = series[0].activeNpuCycles,
+ .max = 0
+ };
+ Statistics NPU_IDLE {
+ .name = "NPU IDLE",
+ .unit = "cycles",
+ .total = 0,
+ .avrg = 0.0,
+ .min = series[0].idleNpuCycles,
+ .max = 0
+ };
+ Statistics NPU_Total {
+ .name = "NPU total",
+ .unit = "cycles",
+ .total = 0,
+ .avrg = 0.0,
+ .min = series[0].npuCycles,
+ .max = 0,
+ };
+#if defined(CPU_PROFILE_ENABLED)
+ Statistics CPU_ACTIVE {
+ .name = "CPU ACTIVE",
+ .unit = "cycles (approx)",
+ .total = 0,
+ .avrg = 0.0,
+ .min = series[0].cpuCycles - NPU_ACTIVE.min,
+ .max = 0
+ };
+ Statistics TIME {
+ .name = "Time",
+ .unit = "ms",
+ .total = 0,
+ .avrg = 0.0,
+ .min = static_cast<uint64_t>(series[0].time),
+ .max = 0
+ };
+#endif
for(ProfilingUnit& unit: series){
- totalNpuCycles += unit.npuCycles;
- totalActiveNpuCycles += unit.activeNpuCycles;
- totalCpuCycles += unit.cpuCycles;
- totalTimeMs += unit.time;
-
- maxActiveNpuCycles = std::max(maxActiveNpuCycles,
- unit.activeNpuCycles);
- maxIdleNpuCycles = std::max(maxIdleNpuCycles,
- unit.npuCycles - maxActiveNpuCycles);
- maxActiveCpuCycles = std::max(maxActiveCpuCycles,
- unit.cpuCycles - maxActiveNpuCycles);
- maxTimeMs = std::max(maxTimeMs, unit.time);
-
- minActiveNpuCycles = std::min(minActiveNpuCycles,
- unit.activeNpuCycles);
- minIdleNpuCycles = std::min(minIdleNpuCycles,
- unit.npuCycles - minActiveNpuCycles);
- minActiveCpuCycles = std::min(minActiveCpuCycles,
- unit.cpuCycles - minActiveNpuCycles);
- minTimeMs = std::min(minTimeMs, unit.time);
- }
- strResults << "Profile for " << name << ": " << std::endl;
+ calcProfilingStat(unit.npuCycles,
+ NPU_Total, result.samplesNum);
- if (samplesNum > 1) {
- strResults << "\tSamples: " << samplesNum << std::endl;
- strResults << "\t Total / Avg./ Min / Max"
- << std::endl;
+ calcProfilingStat(unit.activeNpuCycles,
+ NPU_ACTIVE, result.samplesNum);
- writeStatLine<uint64_t>(strResults, "Active NPU cycles: ",
- totalActiveNpuCycles, samplesNum,
- minActiveNpuCycles, maxActiveNpuCycles);
+ calcProfilingStat(unit.idleNpuCycles,
+ NPU_IDLE, result.samplesNum);
- writeStatLine<uint64_t>(strResults, "Idle NPU cycles: ",
- (totalNpuCycles - totalActiveNpuCycles),
- samplesNum, minIdleNpuCycles, maxIdleNpuCycles);
+ calcProfilingStat(unit.axi0writes,
+ AXI0_WR, result.samplesNum);
-#if defined(CPU_PROFILE_ENABLED)
- writeStatLine<uint64_t>(strResults, "Active CPU cycles (approx): ",
- (totalCpuCycles - totalActiveNpuCycles),
- samplesNum, minActiveCpuCycles,
- maxActiveCpuCycles);
+ calcProfilingStat(unit.axi0reads,
+ AXI0_RD, result.samplesNum);
- writeStatLine<time_t>(strResults, "Time in ms: ",
- totalTimeMs, samplesNum, minTimeMs, maxTimeMs);
-#endif
- } else {
- strResults << "\tActive NPU cycles: " << totalActiveNpuCycles
- << std::endl;
- strResults << "\tIdle NPU cycles: "
- << (totalNpuCycles - totalActiveNpuCycles)
- << std::endl;
+ calcProfilingStat(unit.axi1reads,
+ AXI1_RD, result.samplesNum);
#if defined(CPU_PROFILE_ENABLED)
- strResults << "\tActive CPU cycles: "
- << (totalCpuCycles - totalActiveNpuCycles)
- << " (approx)" << std::endl;
+ calcProfilingStat(static_cast<uint64_t>(unit.time),
+ TIME, result.samplesNum);
- strResults << "\tTime in ms: " << totalTimeMs << std::endl;
+ calcProfilingStat(unit.cpuCycles - unit.activeNpuCycles,
+ CPU_ACTIVE, result.samplesNum);
#endif
}
+ result.data.emplace_back(AXI0_RD);
+ result.data.emplace_back(AXI0_WR);
+ result.data.emplace_back(AXI1_RD);
+ result.data.emplace_back(NPU_ACTIVE);
+ result.data.emplace_back(NPU_IDLE);
+ result.data.emplace_back(NPU_Total);
+#if defined(CPU_PROFILE_ENABLED)
+ result.data.emplace_back(CPU_ACTIVE);
+ result.data.emplace_back(TIME);
+#endif
+ results.emplace_back(result);
}
this->Reset();
- return strResults.str();
+ }
+
+ void printStatisticsHeader(uint32_t samplesNum) {
+ info("Number of samples: %i\n", samplesNum);
+ info("%s\n", "Total / Avg./ Min / Max");
+ }
+
+ void Profiler::PrintProfilingResult(bool printFullStat) {
+ std::vector<ProfileResult> results{};
+ GetAllResultsAndReset(results);
+ for(ProfileResult& result: results) {
+ info("Profile for %s:\n", result.name.c_str());
+
+ if (printFullStat) {
+ printStatisticsHeader(result.samplesNum);
+ }
+
+ for (Statistics &stat: result.data) {
+ if (printFullStat) {
+ info("%s %s: %llu / %.0f / %llu / %llu \n", stat.name.c_str(), stat.unit.c_str(),
+ stat.total, stat.avrg, stat.min, stat.max);
+ } else {
+ info("%s %s: %.0f\n", stat.name.c_str(), stat.unit.c_str(), stat.avrg);
+ }
+ }
+ }
}
void Profiler::SetName(const char* str)
@@ -197,11 +245,19 @@ namespace app {
struct ProfilingUnit unit;
- if (timer->cap.npu_cycles && timer->get_npu_total_cycle_diff &&
- timer->get_npu_active_cycle_diff)
+ if (timer->cap.npu_cycles && timer->get_npu_cycles_diff)
{
- unit.npuCycles = timer->get_npu_total_cycle_diff(&start, &end);
- unit.activeNpuCycles = timer->get_npu_active_cycle_diff(&start, &end);
+ const size_t size = 6;
+ uint64_t pmuCounters[size] = {0};
+ /* 6 values: total cc, active cc, idle cc, axi0 read, axi0 write, axi1 read*/
+ if (0 == timer->get_npu_cycles_diff(&start, &end, pmuCounters, size)) {
+ unit.npuCycles = pmuCounters[0];
+ unit.activeNpuCycles = pmuCounters[1];
+ unit.idleNpuCycles = pmuCounters[2];
+ unit.axi0reads = pmuCounters[3];
+ unit.axi0writes = pmuCounters[4];
+ unit.axi1reads = pmuCounters[5];
+ }
}
if (timer->cap.cpu_cycles && timer->get_cpu_cycle_diff) {
diff --git a/source/application/main/UseCaseCommonUtils.cc b/source/application/main/UseCaseCommonUtils.cc
index 4ea5e4d..3acf53f 100644
--- a/source/application/main/UseCaseCommonUtils.cc
+++ b/source/application/main/UseCaseCommonUtils.cc
@@ -21,16 +21,11 @@
namespace arm {
namespace app {
- bool RunInference(hal_platform& platform, arm::app::Model& model)
+ bool RunInference(arm::app::Model& model, Profiler& profiler)
{
- Profiler profiler{&platform, "Inference"};
- profiler.StartProfiling();
-
+ profiler.StartProfiling("Inference");
bool runInf = model.RunInference();
-
profiler.StopProfiling();
- std::string profileResults = profiler.GetResultsAndReset();
- info("%s\n", profileResults.c_str());
return runInf;
}
diff --git a/source/application/main/include/Profiler.hpp b/source/application/main/include/Profiler.hpp
index b16a63b..d93b257 100644
--- a/source/application/main/include/Profiler.hpp
+++ b/source/application/main/include/Profiler.hpp
@@ -26,10 +26,31 @@
namespace arm {
namespace app {
+ /** Statistics for a profiling metric. */
+ struct Statistics {
+ std::string name;
+ std::string unit;
+ std::uint64_t total;
+ double avrg;
+ std::uint64_t min;
+ std::uint64_t max;
+ };
+
+ /** Profiling results with calculated statistics. */
+ struct ProfileResult {
+ std::string name;
+ std::uint32_t samplesNum;
+ std::vector<Statistics> data;
+ };
+
/** A single profiling unit definition. */
struct ProfilingUnit {
uint64_t npuCycles = 0;
uint64_t activeNpuCycles = 0;
+ uint64_t idleNpuCycles = 0;
+ uint64_t axi0writes = 0;
+ uint64_t axi0reads = 0;
+ uint64_t axi1reads = 0;
uint64_t cpuCycles = 0;
time_t time = 0;
};
@@ -73,18 +94,22 @@ namespace app {
void Reset();
/**
- * @brief Gets the results as string and resets the profiler.
- * @returns Result string.
+ * @brief Collects profiling results statistics and resets the profiler.
+ **/
+ void GetAllResultsAndReset(std::vector<ProfileResult>& results);
+
+ /**
+ * @brief Prints collected profiling results and resets the profiler.
**/
- std::string GetResultsAndReset();
+ void PrintProfilingResult(bool printFullStat = false);
/** @brief Set the profiler name. */
void SetName(const char* str);
private:
ProfilingMap _m_series; /* Profiling series map. */
- time_counter _m_tstampSt; /* Container for a current starting timestamp. */
- time_counter _m_tstampEnd; /* Container for a current ending timestamp. */
+ time_counter _m_tstampSt{}; /* Container for a current starting timestamp. */
+ time_counter _m_tstampEnd{}; /* Container for a current ending timestamp. */
hal_platform * _m_pPlatform = nullptr; /* Platform pointer - to get the timer. */
bool _m_started = false; /* Indicates profiler has been started. */
diff --git a/source/application/main/include/UseCaseCommonUtils.hpp b/source/application/main/include/UseCaseCommonUtils.hpp
index 02200e8..7887aea 100644
--- a/source/application/main/include/UseCaseCommonUtils.hpp
+++ b/source/application/main/include/UseCaseCommonUtils.hpp
@@ -38,11 +38,11 @@ namespace app {
* @brief Run inference using given model
* object. If profiling is enabled, it will log the
* statistics too.
- * @param[in] platform Reference to the hal platform object.
* @param[in] model Reference to the initialised model.
+ * @param[in] profiler Reference to the initialised profiler.
* @return true if inference succeeds, false otherwise.
**/
- bool RunInference(hal_platform& platform, arm::app::Model& model);
+ bool RunInference(arm::app::Model& mode, Profiler& profiler);
/**
* @brief Read input and return as an integer.
diff --git a/source/use_case/ad/src/MainLoop.cc b/source/use_case/ad/src/MainLoop.cc
index 5455b43..6a7cbe0 100644
--- a/source/use_case/ad/src/MainLoop.cc
+++ b/source/use_case/ad/src/MainLoop.cc
@@ -56,6 +56,8 @@ void main_loop(hal_platform& platform)
/* Instantiate application context. */
arm::app::ApplicationContext caseContext;
+ arm::app::Profiler profiler{&platform, "ad"};
+ caseContext.Set<arm::app::Profiler&>("profiler", profiler);
caseContext.Set<hal_platform&>("platform", platform);
caseContext.Set<arm::app::Model&>("model", model);
caseContext.Set<uint32_t>("clipIndex", 0);
diff --git a/source/use_case/ad/src/UseCaseHandler.cc b/source/use_case/ad/src/UseCaseHandler.cc
index c18a0a4..1c15595 100644
--- a/source/use_case/ad/src/UseCaseHandler.cc
+++ b/source/use_case/ad/src/UseCaseHandler.cc
@@ -76,6 +76,7 @@ namespace app {
bool ClassifyVibrationHandler(ApplicationContext& ctx, uint32_t clipIndex, bool runAll)
{
auto& platform = ctx.Get<hal_platform&>("platform");
+ auto& profiler = ctx.Get<Profiler&>("profiler");
constexpr uint32_t dataPsnTxtInfStartX = 20;
constexpr uint32_t dataPsnTxtInfStartY = 40;
@@ -98,7 +99,7 @@ namespace app {
const auto frameLength = ctx.Get<int>("frameLength");
const auto frameStride = ctx.Get<int>("frameStride");
const auto scoreThreshold = ctx.Get<float>("scoreThreshold");
- const float trainingMean = ctx.Get<float>("trainingMean");
+ const auto trainingMean = ctx.Get<float>("trainingMean");
auto startClipIdx = ctx.Get<uint32_t>("clipIndex");
TfLiteTensor* outputTensor = model.GetOutputTensor(0);
@@ -193,7 +194,7 @@ namespace app {
audioDataSlider.TotalStrides() + 1);
/* Run inference over this audio clip sliding window */
- arm::app::RunInference(platform, model);
+ arm::app::RunInference(model, profiler);
/* Use the negative softmax score of the corresponding index as the outlier score */
std::vector<float> dequantOutput = Dequantize<int8_t>(outputTensor);
@@ -219,6 +220,8 @@ namespace app {
return false;
}
+ profiler.PrintProfilingResult();
+
_IncrementAppCtxClipIdx(ctx);
} while (runAll && ctx.Get<uint32_t>("clipIndex") != startClipIdx);
diff --git a/source/use_case/asr/src/MainLoop.cc b/source/use_case/asr/src/MainLoop.cc
index ca777be..c5a26a4 100644
--- a/source/use_case/asr/src/MainLoop.cc
+++ b/source/use_case/asr/src/MainLoop.cc
@@ -96,6 +96,8 @@ void main_loop(hal_platform& platform)
GetLabelsVector(labels);
arm::app::AsrClassifier classifier; /* Classifier wrapper object. */
+ arm::app::Profiler profiler{&platform, "asr"};
+ caseContext.Set<arm::app::Profiler&>("profiler", profiler);
caseContext.Set<hal_platform&>("platform", platform);
caseContext.Set<arm::app::Model&>("model", model);
caseContext.Set<uint32_t>("clipIndex", 0);
diff --git a/source/use_case/asr/src/UseCaseHandler.cc b/source/use_case/asr/src/UseCaseHandler.cc
index e706eb8..5d3157a 100644
--- a/source/use_case/asr/src/UseCaseHandler.cc
+++ b/source/use_case/asr/src/UseCaseHandler.cc
@@ -67,6 +67,8 @@ namespace app {
auto& platform = ctx.Get<hal_platform&>("platform");
platform.data_psn->clear(COLOR_BLACK);
+ auto& profiler = ctx.Get<Profiler&>("profiler");
+
/* If the request has a valid size, set the audio index. */
if (clipIndex < NUMBER_OF_FILES) {
if (!_SetAppCtxClipIdx(ctx, clipIndex)) {
@@ -168,18 +170,11 @@ namespace app {
info("Inference %zu/%zu\n", audioDataSlider.Index() + 1,
static_cast<size_t>(ceilf(audioDataSlider.FractionalTotalStrides() + 1)));
- Profiler prepProfiler{&platform, "pre-processing"};
- prepProfiler.StartProfiling();
-
/* Calculate MFCCs, deltas and populate the input tensor. */
prep.Invoke(inferenceWindow, inferenceWindowLen, inputTensor);
- prepProfiler.StopProfiling();
- std::string prepProfileResults = prepProfiler.GetResultsAndReset();
- info("%s\n", prepProfileResults.c_str());
-
/* Run inference over this audio clip sliding window. */
- arm::app::RunInference(platform, model);
+ arm::app::RunInference(model, profiler);
/* Post-process. */
postp.Invoke(outputTensor, reductionAxis, !audioDataSlider.HasNext());
@@ -216,6 +211,8 @@ namespace app {
return false;
}
+ profiler.PrintProfilingResult();
+
_IncrementAppCtxClipIdx(ctx);
} while (runAll && ctx.Get<uint32_t>("clipIndex") != startClipIdx);
@@ -256,6 +253,8 @@ namespace app {
platform.data_psn->set_text_color(COLOR_GREEN);
+ info("Final results:\n");
+ info("Total number of inferences: %zu\n", results.size());
/* Results from multiple inferences should be combined before processing. */
std::vector<arm::app::ClassificationResult> combinedResults;
for (auto& result : results) {
@@ -268,8 +267,9 @@ namespace app {
for (const auto & result : results) {
std::string infResultStr = audio::asr::DecodeOutput(result.m_resultVec);
- info("Result for inf %u: %s\n", result.m_inferenceNumber,
- infResultStr.c_str());
+ info("For timestamp: %f (inference #: %u); label: %s\n",
+ result.m_timeStamp, result.m_inferenceNumber,
+ infResultStr.c_str());
}
/* Get the decoded result for the combined result. */
@@ -280,7 +280,7 @@ namespace app {
dataPsnTxtStartX1, dataPsnTxtStartY1,
allow_multiple_lines);
- info("Final result: %s\n", finalResultStr.c_str());
+ info("Complete recognition: %s\n", finalResultStr.c_str());
return true;
}
diff --git a/source/use_case/img_class/src/MainLoop.cc b/source/use_case/img_class/src/MainLoop.cc
index 469907c..66d7064 100644
--- a/source/use_case/img_class/src/MainLoop.cc
+++ b/source/use_case/img_class/src/MainLoop.cc
@@ -58,6 +58,8 @@ void main_loop(hal_platform& platform)
/* Instantiate application context. */
arm::app::ApplicationContext caseContext;
+ arm::app::Profiler profiler{&platform, "img_class"};
+ caseContext.Set<arm::app::Profiler&>("profiler", profiler);
caseContext.Set<hal_platform&>("platform", platform);
caseContext.Set<arm::app::Model&>("model", model);
caseContext.Set<uint32_t>("imgIndex", 0);
diff --git a/source/use_case/img_class/src/UseCaseHandler.cc b/source/use_case/img_class/src/UseCaseHandler.cc
index a412fec..f7e83f5 100644
--- a/source/use_case/img_class/src/UseCaseHandler.cc
+++ b/source/use_case/img_class/src/UseCaseHandler.cc
@@ -74,6 +74,7 @@ namespace app {
bool ClassifyImageHandler(ApplicationContext& ctx, uint32_t imgIndex, bool runAll)
{
auto& platform = ctx.Get<hal_platform&>("platform");
+ auto& profiler = ctx.Get<Profiler&>("profiler");
constexpr uint32_t dataPsnImgDownscaleFactor = 2;
constexpr uint32_t dataPsnImgStartX = 10;
@@ -144,7 +145,7 @@ namespace app {
info("Running inference on image %u => %s\n", ctx.Get<uint32_t>("imgIndex"),
get_filename(ctx.Get<uint32_t>("imgIndex")));
- RunInference(platform, model);
+ RunInference(model, profiler);
/* Erase. */
str_inf = std::string(str_inf.size(), ' ');
@@ -167,6 +168,8 @@ namespace app {
return false;
}
+ profiler.PrintProfilingResult();
+
_IncrementAppCtxImageIdx(ctx);
} while (runAll && ctx.Get<uint32_t>("imgIndex") != curImIdx);
@@ -230,6 +233,8 @@ namespace app {
uint32_t rowIdx1 = dataPsnTxtStartY1 + 2 * dataPsnTxtYIncr;
uint32_t rowIdx2 = dataPsnTxtStartY2;
+ info("Final results:\n");
+ info("Total number of inferences: 1\n");
for (uint32_t i = 0; i < results.size(); ++i) {
std::string resultStr =
std::to_string(i + 1) + ") " +
diff --git a/source/use_case/inference_runner/src/MainLoop.cc b/source/use_case/inference_runner/src/MainLoop.cc
index b110a24..26a20de 100644
--- a/source/use_case/inference_runner/src/MainLoop.cc
+++ b/source/use_case/inference_runner/src/MainLoop.cc
@@ -38,6 +38,9 @@ void main_loop(hal_platform& platform)
/* Instantiate application context. */
arm::app::ApplicationContext caseContext;
+ arm::app::Profiler profiler{&platform, "inference_runner"};
+ caseContext.Set<arm::app::Profiler&>("profiler", profiler);
+
caseContext.Set<hal_platform&>("platform", platform);
caseContext.Set<arm::app::Model&>("model", model);
caseContext.Set<uint32_t>("imgIndex", 0);
diff --git a/source/use_case/inference_runner/src/UseCaseHandler.cc b/source/use_case/inference_runner/src/UseCaseHandler.cc
index ac4ea47..a2b3fb7 100644
--- a/source/use_case/inference_runner/src/UseCaseHandler.cc
+++ b/source/use_case/inference_runner/src/UseCaseHandler.cc
@@ -28,6 +28,7 @@ namespace app {
bool RunInferenceHandler(ApplicationContext& ctx)
{
auto& platform = ctx.Get<hal_platform&>("platform");
+ auto& profiler = ctx.Get<Profiler&>("profiler");
auto& model = ctx.Get<Model&>("model");
constexpr uint32_t dataPsnTxtInfStartX = 150;
@@ -67,7 +68,7 @@ namespace app {
str_inf.c_str(), str_inf.size(),
dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
- RunInference(platform, model);
+ RunInference(model, profiler);
/* Erase. */
str_inf = std::string(str_inf.size(), ' ');
@@ -75,6 +76,9 @@ namespace app {
str_inf.c_str(), str_inf.size(),
dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
+ info("Final results:\n");
+ profiler.PrintProfilingResult();
+
#if VERIFY_TEST_OUTPUT
for (size_t outputIndex = 0; outputIndex < model.GetNumOutputs(); outputIndex++) {
arm::app::DumpTensor(model.GetOutputTensor(outputIndex));
diff --git a/source/use_case/kws/src/MainLoop.cc b/source/use_case/kws/src/MainLoop.cc
index 24cb939..f971c30 100644
--- a/source/use_case/kws/src/MainLoop.cc
+++ b/source/use_case/kws/src/MainLoop.cc
@@ -58,6 +58,9 @@ void main_loop(hal_platform& platform)
/* Instantiate application context. */
arm::app::ApplicationContext caseContext;
+ arm::app::Profiler profiler{&platform, "kws"};
+ caseContext.Set<arm::app::Profiler&>("profiler", profiler);
+
caseContext.Set<hal_platform&>("platform", platform);
caseContext.Set<arm::app::Model&>("model", model);
caseContext.Set<uint32_t>("clipIndex", 0);
diff --git a/source/use_case/kws/src/UseCaseHandler.cc b/source/use_case/kws/src/UseCaseHandler.cc
index 872d323..d2cba55 100644
--- a/source/use_case/kws/src/UseCaseHandler.cc
+++ b/source/use_case/kws/src/UseCaseHandler.cc
@@ -82,6 +82,7 @@ namespace app {
bool ClassifyAudioHandler(ApplicationContext& ctx, uint32_t clipIndex, bool runAll)
{
auto& platform = ctx.Get<hal_platform&>("platform");
+ auto& profiler = ctx.Get<Profiler&>("profiler");
constexpr uint32_t dataPsnTxtInfStartX = 20;
constexpr uint32_t dataPsnTxtInfStartY = 40;
@@ -215,7 +216,7 @@ namespace app {
audioDataSlider.TotalStrides() + 1);
/* Run inference over this audio clip sliding window. */
- arm::app::RunInference(platform, model);
+ arm::app::RunInference(model, profiler);
std::vector<ClassificationResult> classificationResult;
auto& classifier = ctx.Get<KwsClassifier&>("classifier");
@@ -243,6 +244,8 @@ namespace app {
return false;
}
+ profiler.PrintProfilingResult();
+
_IncrementAppCtxClipIdx(ctx);
} while (runAll && ctx.Get<uint32_t>("clipIndex") != startClipIdx);
@@ -281,6 +284,8 @@ namespace app {
constexpr uint32_t dataPsnTxtYIncr = 16; /* Row index increment. */
platform.data_psn->set_text_color(COLOR_GREEN);
+ info("Final results:\n");
+ info("Total number of inferences: %zu\n", results.size());
/* Display each result */
uint32_t rowIdx1 = dataPsnTxtStartY1 + 2 * dataPsnTxtYIncr;
@@ -290,7 +295,7 @@ namespace app {
std::string topKeyword{"<none>"};
float score = 0.f;
- if (results[i].m_resultVec.size()) {
+ if (!results[i].m_resultVec.empty()) {
topKeyword = results[i].m_resultVec[0].m_label;
score = results[i].m_resultVec[0].m_normalisedVal;
}
@@ -305,13 +310,20 @@ namespace app {
dataPsnTxtStartX1, rowIdx1, false);
rowIdx1 += dataPsnTxtYIncr;
- info("For timestamp: %f (inference #: %u); threshold: %f\n",
- results[i].m_timeStamp, results[i].m_inferenceNumber,
- results[i].m_threshold);
- for (uint32_t j = 0; j < results[i].m_resultVec.size(); ++j) {
- info("\t\tlabel @ %u: %s, score: %f\n", j,
- results[i].m_resultVec[j].m_label.c_str(),
- results[i].m_resultVec[j].m_normalisedVal);
+ if (results[i].m_resultVec.empty()) {
+ info("For timestamp: %f (inference #: %u); label: %s; threshold: %f\n",
+ results[i].m_timeStamp, results[i].m_inferenceNumber,
+ topKeyword.c_str(),
+ results[i].m_threshold);
+ } else {
+ for (uint32_t j = 0; j < results[i].m_resultVec.size(); ++j) {
+ info("For timestamp: %f (inference #: %u); label: %s, score: %f; threshold: %f\n",
+ results[i].m_timeStamp,
+ results[i].m_inferenceNumber,
+ results[i].m_resultVec[j].m_label.c_str(),
+ results[i].m_resultVec[j].m_normalisedVal,
+ results[i].m_threshold);
+ }
}
}
diff --git a/source/use_case/kws_asr/src/MainLoop.cc b/source/use_case/kws_asr/src/MainLoop.cc
index 37146c9..95e5a8f 100644
--- a/source/use_case/kws_asr/src/MainLoop.cc
+++ b/source/use_case/kws_asr/src/MainLoop.cc
@@ -101,6 +101,9 @@ void main_loop(hal_platform& platform)
/* Instantiate application context. */
arm::app::ApplicationContext caseContext;
+ arm::app::Profiler profiler{&platform, "kws_asr"};
+ caseContext.Set<arm::app::Profiler&>("profiler", profiler);
+
caseContext.Set<hal_platform&>("platform", platform);
caseContext.Set<arm::app::Model&>("kwsmodel", kwsModel);
caseContext.Set<arm::app::Model&>("asrmodel", asrModel);
diff --git a/source/use_case/kws_asr/src/UseCaseHandler.cc b/source/use_case/kws_asr/src/UseCaseHandler.cc
index c50796f..a428210 100644
--- a/source/use_case/kws_asr/src/UseCaseHandler.cc
+++ b/source/use_case/kws_asr/src/UseCaseHandler.cc
@@ -127,6 +127,7 @@ namespace app {
KWSOutput output;
+ auto& profiler = ctx.Get<Profiler&>("profiler");
auto& kwsModel = ctx.Get<Model&>("kwsmodel");
if (!kwsModel.IsInited()) {
printf_err("KWS model has not been initialised\n");
@@ -243,7 +244,7 @@ namespace app {
audioDataSlider.TotalStrides() + 1);
/* Run inference over this audio clip sliding window. */
- arm::app::RunInference(platform, kwsModel);
+ arm::app::RunInference(kwsModel, profiler);
std::vector<ClassificationResult> kwsClassificationResult;
auto& kwsClassifier = ctx.Get<KwsClassifier&>("kwsclassifier");
@@ -284,6 +285,8 @@ namespace app {
return output;
}
+ profiler.PrintProfilingResult();
+
output.executionSuccess = true;
return output;
}
@@ -300,6 +303,7 @@ namespace app {
constexpr uint32_t dataPsnTxtInfStartX = 20;
constexpr uint32_t dataPsnTxtInfStartY = 40;
+ auto& profiler = ctx.Get<Profiler&>("profiler");
auto& platform = ctx.Get<hal_platform&>("platform");
platform.data_psn->clear(COLOR_BLACK);
@@ -389,18 +393,11 @@ namespace app {
info("Inference %zu/%zu\n", audioDataSlider.Index() + 1,
static_cast<size_t>(ceilf(audioDataSlider.FractionalTotalStrides() + 1)));
- Profiler prepProfiler{&platform, "pre-processing"};
- prepProfiler.StartProfiling();
-
/* Calculate MFCCs, deltas and populate the input tensor. */
asrPrep.Invoke(asrInferenceWindow, asrInferenceWindowLen, asrInputTensor);
- prepProfiler.StopProfiling();
- std::string prepProfileResults = prepProfiler.GetResultsAndReset();
- info("%s\n", prepProfileResults.c_str());
-
/* Run inference over this audio clip sliding window. */
- arm::app::RunInference(platform, asrModel);
+ arm::app::RunInference(asrModel, profiler);
/* Post-process. */
asrPostp.Invoke(asrOutputTensor, reductionAxis, !audioDataSlider.HasNext());
@@ -432,6 +429,8 @@ namespace app {
return false;
}
+ profiler.PrintProfilingResult();
+
return true;
}
diff --git a/tests/common/ProfilerTests.cc b/tests/common/ProfilerTests.cc
index caf492b..1435dde 100644
--- a/tests/common/ProfilerTests.cc
+++ b/tests/common/ProfilerTests.cc
@@ -34,28 +34,73 @@ TEST_CASE("Common: Test Profiler")
hal_init(&platform, &data_acq, &data_psn, &timer);
hal_platform_init(&platform);
- /* An invalid profiler shouldn't be of much use. */
- arm::app::Profiler profilerInvalid {nullptr, "test_invalid"};
- REQUIRE(false == profilerInvalid.StartProfiling());
- REQUIRE(false == profilerInvalid.StopProfiling());
-
- arm::app::Profiler profilerValid{&platform, "test_valid"};
- REQUIRE(true == profilerValid.StartProfiling());
- REQUIRE(true == profilerValid.StopProfiling());
-
- std::string strProfile = profilerValid.GetResultsAndReset();
- REQUIRE(std::string::npos != strProfile.find("test_valid"));
-
-#if defined(CPU_PROFILE_ENABLED)
- /* We should have milliseconds elapsed. */
- REQUIRE(std::string::npos != strProfile.find("ms"));
-#endif /* defined(CPU_PROFILE_ENABLED) */
-
- /* Abuse should fail: */
- REQUIRE(false == profilerValid.StopProfiling()); /* We need to start it first. */
- REQUIRE(true == profilerValid.StartProfiling()); /* Should be able to start it fine. */
- REQUIRE(false == profilerValid.StartProfiling()); /* Can't restart it without resetting. */
- profilerValid.Reset(); /* Reset. */
- REQUIRE(true == profilerValid.StartProfiling()); /* Can start it again now. */
- REQUIRE(true == profilerValid.StopProfiling()); /* Can start it again now. */
-}
+ /* An invalid profiler shouldn't be of much use */
+ SECTION("Test invalid profiler") {
+ arm::app::Profiler profilerInvalid{nullptr, "test_invalid"};
+ REQUIRE(false == profilerInvalid.StartProfiling());
+ REQUIRE(false == profilerInvalid.StopProfiling());
+ }
+
+ SECTION("Test valid profiler") {
+ arm::app::Profiler profilerValid{&platform, "test_valid"};
+ REQUIRE(true == profilerValid.StartProfiling());
+ REQUIRE(true == profilerValid.StopProfiling());
+ std::vector<arm::app::ProfileResult> results;
+ profilerValid.GetAllResultsAndReset(results);
+ REQUIRE(results.size() == 1);
+ REQUIRE(results[0].name == "test_valid");
+ /* Abuse should still fail: */
+ REQUIRE(false == profilerValid.StopProfiling()); /* We need to start it first */
+ REQUIRE(true == profilerValid.StartProfiling()); /* Should be able to start it fine */
+ REQUIRE(false == profilerValid.StartProfiling()); /* Can't restart it without resetting */
+ profilerValid.Reset();
+ REQUIRE(true == profilerValid.StartProfiling()); /* Can start it again now.. */
+ REQUIRE(true == profilerValid.StopProfiling());
+ }
+
+ SECTION("Test multiple profilers") {
+ arm::app::Profiler profilerValid{&platform, "one"};
+ REQUIRE(true == profilerValid.StartProfiling());
+ REQUIRE(true == profilerValid.StopProfiling());
+
+ REQUIRE(true == profilerValid.StartProfiling("two"));
+ REQUIRE(true == profilerValid.StopProfiling());
+ REQUIRE(true == profilerValid.StartProfiling("two"));
+ REQUIRE(true == profilerValid.StopProfiling());
+
+ std::vector<arm::app::ProfileResult> results;
+ profilerValid.GetAllResultsAndReset(results);
+ REQUIRE(results.size() == 2);
+ REQUIRE(results[0].name == "one");
+ REQUIRE(results[0].samplesNum == 1);
+ REQUIRE(results[1].name == "two");
+ REQUIRE(results[1].samplesNum == 2);
+ }
+
+#if defined (CPU_PROFILE_ENABLED)
+ SECTION("Test CPU profiler") {
+
+ arm::app::Profiler profilerCPU{&platform, "test cpu"};
+ std::vector<arm::app::ProfileResult> results;
+ profilerCPU.StartProfiling();
+ profilerCPU.StopProfiling();
+ profilerCPU.GetAllResultsAndReset(results);
+ REQUIRE(results.size() == 1);
+ bool foundTime = false;
+ bool foundCPU_ACTIVE = false;
+ for(arm::app::Statistics& stat: results[0].data) {
+
+ if (!foundTime) {
+ foundTime = stat.name == "Time";
+ }
+
+ if (!foundCPU_ACTIVE) {
+ foundCPU_ACTIVE = stat.name == "CPU ACTIVE";
+ }
+
+ }
+ REQUIRE(foundTime);
+ REQUIRE(foundCPU_ACTIVE);
+ }
+#endif /* defined (CPU_PROFILE_ENABLED) */
+} \ No newline at end of file
diff --git a/tests/use_case/img_class/ImgClassificationUCTest.cc b/tests/use_case/img_class/ImgClassificationUCTest.cc
index abfcc44..b989415 100644
--- a/tests/use_case/img_class/ImgClassificationUCTest.cc
+++ b/tests/use_case/img_class/ImgClassificationUCTest.cc
@@ -53,7 +53,7 @@ TEST_CASE("Inference by index", "[.]")
hal_platform_init(&platform);
/* Model wrapper object. */
- arm::app::MobileNetModel model;
+ arm::app::MobileNetModel model;
/* Load the model. */
REQUIRE(model.Init());
@@ -61,6 +61,8 @@ TEST_CASE("Inference by index", "[.]")
/* Instantiate application context. */
arm::app::ApplicationContext caseContext;
+ arm::app::Profiler profiler{&platform, "img_class"};
+ caseContext.Set<arm::app::Profiler&>("profiler", profiler);
caseContext.Set<hal_platform&>("platform", platform);
caseContext.Set<arm::app::Model&>("model", model);
caseContext.Set<uint32_t>("imgIndex", 0);
@@ -99,6 +101,8 @@ TEST_CASE("Inference run all images", "[.]")
/* Instantiate application context. */
arm::app::ApplicationContext caseContext;
+ arm::app::Profiler profiler{&platform, "img_class"};
+ caseContext.Set<arm::app::Profiler&>("profiler", profiler);
caseContext.Set<hal_platform&>("platform", platform);
caseContext.Set<arm::app::Model&>("model", model);
caseContext.Set<uint32_t>("imgIndex", 0);
diff --git a/tests/use_case/kws/KWSHandlerTest.cc b/tests/use_case/kws/KWSHandlerTest.cc
index dee2f6f..50e5a83 100644
--- a/tests/use_case/kws/KWSHandlerTest.cc
+++ b/tests/use_case/kws/KWSHandlerTest.cc
@@ -60,6 +60,9 @@ TEST_CASE("Inference by index")
/* Instantiate application context. */
arm::app::ApplicationContext caseContext;
+
+ arm::app::Profiler profiler{&platform, "kws"};
+ caseContext.Set<arm::app::Profiler&>("profiler", profiler);
caseContext.Set<hal_platform&>("platform", platform);
caseContext.Set<arm::app::Model&>("model", model);
caseContext.Set<int>("frameLength", g_FrameLength); /* 640 sample length for DSCNN. */
@@ -137,6 +140,8 @@ TEST_CASE("Inference run all clips")
/* Instantiate application context. */
arm::app::ApplicationContext caseContext;
+ arm::app::Profiler profiler{&platform, "kws"};
+ caseContext.Set<arm::app::Profiler&>("profiler", profiler);
caseContext.Set<hal_platform&>("platform", platform);
caseContext.Set<arm::app::Model&>("model", model);
caseContext.Set<uint32_t>("clipIndex", 0);