From 4692e11e5af29b97748a1585a092df6800a0a831 Mon Sep 17 00:00:00 2001
From: Kevin May <kevin.may@arm.com>
Date: Mon, 18 Oct 2021 14:41:50 +0100
Subject: IVGCVSW-6440 Print new Optimize and LoadedNetwork profiling points

* Add parent LoadedNetwork profiling point
* Make generic populateParent function to print new descendents in json

Signed-off-by: Kevin May <kevin.may@arm.com>
Change-Id: I41dc876bffae88e61a16d07fb13b062c321e78a6
---
 src/armnn/LoadedNetwork.cpp                        |   1 +
 src/armnn/Profiling.cpp                            |  46 ++++++--
 src/armnn/Profiling.hpp                            |   2 +-
 src/armnn/layers/Convolution2dLayer.cpp            |   2 +-
 src/armnn/test/ProfilerTests.cpp                   | 131 +++++++++++++++------
 .../backendsCommon/test/JsonPrinterTestImpl.cpp    |   7 +-
 .../cl/workloads/ClConvolution2dWorkload.cpp       |  29 +++--
 7 files changed, 158 insertions(+), 60 deletions(-)
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index d6dd5d2ee8..7fb14d0f32 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -122,6 +122,7 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
                              m_TensorHandleFactoryRegistry(),
                              m_ProfilingService(profilingService)
 {
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadedNetwork");
     // Get the profiler and register it for the current thread.
     const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
     ProfilerManager::GetInstance().RegisterProfiler(profiler.get());
diff --git a/src/armnn/Profiling.cpp b/src/armnn/Profiling.cpp
index 7de602fc40..6a3522ba29 100644
--- a/src/armnn/Profiling.cpp
+++ b/src/armnn/Profiling.cpp
@@ -254,16 +254,16 @@ int CalcLevel(const Event* eventPtr)
     return level;
 }
 
-void ProfilerImpl::PopulateInferences(std::vector<const Event*>& outInferences, int& outBaseLevel) const
+void ProfilerImpl::PopulateParent(std::vector<const Event*>& outEvents, int& outBaseLevel, std::string parentName) const
 {
-    outInferences.reserve(m_EventSequence.size());
+    outEvents.reserve(m_EventSequence.size());
     for (const auto& event : m_EventSequence)
     {
         const Event* eventPtrRaw = event.get();
-        if (eventPtrRaw->GetName() == "EnqueueWorkload")
+        if (eventPtrRaw->GetName() == parentName)
         {
             outBaseLevel = (outBaseLevel == -1) ? CalcLevel(eventPtrRaw) : outBaseLevel;
-            outInferences.push_back(eventPtrRaw);
+            outEvents.push_back(eventPtrRaw);
         }
     }
 }
@@ -362,18 +362,40 @@ void ProfilerImpl::Print(std::ostream& outStream) const
     outStream.setf(std::ios::fixed);
     JsonPrinter printer(outStream);
 
-    // First find all the "inference" Events and print out duration measurements.
+    // First find all the parent Events and print out duration measurements.
     int baseLevel = -1;
+
+    std::vector<const Event*> optimizations;
+    PopulateParent(optimizations, baseLevel, "Optimizer");
+
+    std::vector<const Event*> loadedNetworks;
+    PopulateParent(loadedNetworks, baseLevel, "LoadedNetwork");
+
     std::vector<const Event*> inferences;
-    PopulateInferences(inferences, baseLevel);
+    PopulateParent(inferences, baseLevel, "EnqueueWorkload");
 
     // Second map out descendants hierarchy
     std::map<const Event*, std::vector<const Event*>> descendantsMap;
     PopulateDescendants(descendantsMap);
 
+    // Extract json objects for each parent event type
+    JsonChildObject optimizeObject{ "optimize_measurements" };
+
+    for (unsigned int optimizeIndex = 0; optimizeIndex < optimizations.size(); ++optimizeIndex)
+    {
+        auto optimization = optimizations[optimizeIndex];
+        ExtractJsonObjects(optimizeIndex, optimization, optimizeObject, descendantsMap);
+    }
+
+    JsonChildObject loadedNetworkObject{ "loaded_network_measurements" };
+
+    for (unsigned int loadedNetworkIndex = 0; loadedNetworkIndex < loadedNetworks.size(); ++loadedNetworkIndex)
+    {
+        auto loadedNetwork = loadedNetworks[loadedNetworkIndex];
+        ExtractJsonObjects(loadedNetworkIndex, loadedNetwork, loadedNetworkObject, descendantsMap);
+    }
+
     JsonChildObject inferenceObject{ "inference_measurements" };
-    std::vector<JsonChildObject> workloadObjects;
-    std::map<unsigned int, std::vector<JsonChildObject>> workloadToKernelObjects;
 
     for (unsigned int inferenceIndex = 0; inferenceIndex < inferences.size(); ++inferenceIndex)
     {
@@ -399,6 +421,12 @@ void ProfilerImpl::Print(std::ostream& outStream) const
     size_t id = 0;
     if (m_DetailsToStdOutMethod != ProfilingDetailsMethod::DetailsOnly)
     {
+        printer.PrintJsonChildObject(optimizeObject, id);
+        printer.PrintSeparator();
+        printer.PrintNewLine();
+        printer.PrintJsonChildObject(loadedNetworkObject, id);
+        printer.PrintSeparator();
+        printer.PrintNewLine();
         printer.PrintJsonChildObject(inferenceObject, id);
     }
     // end of ArmNN
@@ -452,7 +480,7 @@ void ProfilerImpl::AnalyzeEventsAndWriteResults(std::ostream& outStream) const
 
         int baseLevel = -1;
         std::vector<const Event*> inferences;
-        PopulateInferences(inferences, baseLevel);
+        PopulateParent(inferences, baseLevel, "EnqueueWorkload");
 
         // Second map out descendants hierarchy
         std::map<const Event*, std::vector<const Event*>> descendantsMap;
diff --git a/src/armnn/Profiling.hpp b/src/armnn/Profiling.hpp
index 42d7f4d638..c6571d1309 100644
--- a/src/armnn/Profiling.hpp
+++ b/src/armnn/Profiling.hpp
@@ -95,7 +95,7 @@ public:
     void AnalyzeEventSequenceAndWriteResults(EventIterType first, EventIterType last, std::ostream& outStream) const;
 
     std::map<std::string, ProfilingEventStats> CalculateProfilingEventStats() const;
-    void PopulateInferences(std::vector<const Event*>& outInferences, int& outBaseLevel) const;
+    void PopulateParent(std::vector<const Event*>& outEvents, int& outBaseLevel, std::string parentName) const;
     void PopulateDescendants(std::map<const Event*, std::vector<const Event*>>& outDescendantsMap) const;
 
     std::stack<Event*> m_Parents;
diff --git a/src/armnn/layers/Convolution2dLayer.cpp b/src/armnn/layers/Convolution2dLayer.cpp
index ae29d833e8..d73e67badf 100644
--- a/src/armnn/layers/Convolution2dLayer.cpp
+++ b/src/armnn/layers/Convolution2dLayer.cpp
@@ -50,7 +50,7 @@ std::unique_ptr<IWorkload> Convolution2dLayer::CreateWorkload(const IWorkloadFac
 {
     // on this level constant data should not be released..
     ARMNN_ASSERT_MSG(m_Weight != nullptr, "Convolution2dLayer: Weights data should not be null.");
-
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Convolution2dLayer_CreateWorkload");
     Convolution2dQueueDescriptor descriptor;
 
     descriptor.m_Weight = m_Weight.get();
diff --git a/src/armnn/test/ProfilerTests.cpp b/src/armnn/test/ProfilerTests.cpp
index b23ac1ccc3..6cb8d7fae6 100644
--- a/src/armnn/test/ProfilerTests.cpp
+++ b/src/armnn/test/ProfilerTests.cpp
@@ -290,10 +290,9 @@ TEST_CASE("ProfilerJsonPrinter")
     profiler->EnableProfiling(true);
 
     {
-        // Test scoped macro.
         ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::CpuAcc,
                                                       armnn::EmptyOptional(),
-                                                      "EnqueueWorkload",
+                                                      "Optimizer",
                                                       TestInstrument())
         ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::CpuAcc,
                                                       armnn::EmptyOptional(),
@@ -306,21 +305,57 @@ TEST_CASE("ProfilerJsonPrinter")
                                                               "Level 1A",
                                                               TestInstrument())
             }
-
+        }
+    }
+    {
+        ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::CpuAcc,
+                                                      armnn::EmptyOptional(),
+                                                      "LoadedNetwork",
+                                                      TestInstrument())
+        ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::CpuAcc,
+                                                      armnn::EmptyOptional(),
+                                                      "Level 0",
+                                                      TestInstrument())
+        {
             {
                 ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::CpuAcc,
                                                               armnn::EmptyOptional(),
-                                                              "Level 1B",
+                                                              "Level 1A",
                                                               TestInstrument())
-
+            }
+        }
+    }
+    {
+        // Test scoped macro.
+            ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::CpuAcc,
+                                                          armnn::EmptyOptional(),
+                                                          "EnqueueWorkload",
+                                                          TestInstrument())
+            ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::CpuAcc,
+                                                          armnn::EmptyOptional(),
+                                                          "Level 0",
+                                                          TestInstrument())
+            {
                 {
                     ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::CpuAcc,
                                                                   armnn::EmptyOptional(),
-                                                                  "Level 2A",
+                                                                  "Level 1A",
                                                                   TestInstrument())
                 }
+                {
+                    ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::CpuAcc,
+                                                                  armnn::EmptyOptional(),
+                                                                  "Level 1B",
+                                                                  TestInstrument())
+
+                    {
+                        ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::CpuAcc,
+                                                                      armnn::EmptyOptional(),
+                                                                      "Level 2A",
+                                                                      TestInstrument())
+                    }
+                }
             }
-        }
     }
 
     std::stringbuf buffer;
@@ -335,38 +370,62 @@ TEST_CASE("ProfilerJsonPrinter")
 
     // blessed output validated by a human eyeballing the output to make sure it's ok and then copying it here.
     // validation also included running the blessed output through an online json validation site
-    std::string blessedOutput("{\n\t\"ArmNN\": {\n\t\t\"inference_measurements_#1\": {\n\t\t\t\"type\": \""
-                              "Event\",\n\t\t\t\"Measurement1_#1\": {\n\t\t\t\t\"type\": \""
-                              "Measurement\",\n\t\t\t\t\"raw\": [\n\t\t\t\t\t1.000000\n\t\t\t\t],\n\t\t\t\t\""
-                              "unit\": \"ms\"\n\t\t\t},\n\t\t\t\"Measurement2_#1\": {\n\t\t\t\t\"type\": \""
-                              "Measurement\",\n\t\t\t\t\"raw\": [\n\t\t\t\t\t2.000000\n\t\t\t\t],\n\t\t\t\t\""
-                              "unit\": \"us\"\n\t\t\t},\n\t\t\t\"Level 0_#2\": {\n\t\t\t\t\"type\": \""
-                              "Event\",\n\t\t\t\t\"Measurement1_#2\": {\n\t\t\t\t\t\"type\": \""
+    std::string blessedOutput("{\n\t\"ArmNN\": {\n\t\t\"optimize_measurements_#1\": {\n\t\t\t\"type\": \"Event\""
+                              ",\n\t\t\t\"Measurement1_#1\": {\n\t\t\t\t\"type\": \"Measurement\",\n\t\t\t\t\"raw\""
+                              ": [\n\t\t\t\t\t1.000000\n\t\t\t\t],\n\t\t\t\t\"unit\": \"ms\"\n\t\t\t},\n\t\t\t\""
+                              "Measurement2_#1\": {\n\t\t\t\t\"type\": \"Measurement\",\n\t\t\t\t\"raw\": [\n\t\t\t"
+                              "\t\t2.000000\n\t\t\t\t],\n\t\t\t\t\"unit\": \"us\"\n\t\t\t},\n\t\t\t\"Level 0_#2\": {\n"
+                              "\t\t\t\t\"type\": \"Event\",\n\t\t\t\t\"Measurement1_#2\": {\n\t\t\t\t\t\"type\": \""
                               "Measurement\",\n\t\t\t\t\t\"raw\": [\n\t\t\t\t\t\t1.000000\n\t\t\t\t\t],\n\t\t\t\t\t\""
                               "unit\": \"ms\"\n\t\t\t\t},\n\t\t\t\t\"Measurement2_#2\": {\n\t\t\t\t\t\"type\": \""
                               "Measurement\",\n\t\t\t\t\t\"raw\": [\n\t\t\t\t\t\t2.000000\n\t\t\t\t\t],\n\t\t\t\t\t\""
-                              "unit\": \"us\"\n\t\t\t\t},\n\t\t\t\t\"Level 1A_#3\": {\n\t\t\t\t\t\"type\": \""
-                              "Event\",\n\t\t\t\t\t\"Measurement1_#3\": {\n\t\t\t\t\t\t\"type\": \""
-                              "Measurement\",\n\t\t\t\t\t\t\"raw\": [\n\t\t\t\t\t\t\t"
-                              "1.000000\n\t\t\t\t\t\t],\n\t\t\t\t\t\t\""
-                              "unit\": \"ms\"\n\t\t\t\t\t},\n\t\t\t\t\t\"Measurement2_#3\": {\n\t\t\t\t\t\t\"type\": \""
-                              "Measurement\",\n\t\t\t\t\t\t\"raw\": [\n\t\t\t\t\t\t\t"
-                              "2.000000\n\t\t\t\t\t\t],\n\t\t\t\t\t\t\""
-                              "unit\": \"us\"\n\t\t\t\t\t}\n\t\t\t\t},\n\t\t\t\t\"Level 1B_#4\": {\n\t\t\t\t\t\""
-                              "type\": \"Event\",\n\t\t\t\t\t\"Measurement1_#4\": {\n\t\t\t\t\t\t\"type\": \""
-                              "Measurement\",\n\t\t\t\t\t\t\"raw\": [\n\t\t\t\t\t\t\t"
-                              "1.000000\n\t\t\t\t\t\t],\n\t\t\t\t\t\t\""
-                              "unit\": \"ms\"\n\t\t\t\t\t},\n\t\t\t\t\t\"Measurement2_#4\": {\n\t\t\t\t\t\t\""
-                              "type\": \"Measurement\",\n\t\t\t\t\t\t\"raw\": [\n\t\t\t\t\t\t\t"
-                              "2.000000\n\t\t\t\t\t\t],\n\t\t\t\t\t\t\""
-                              "unit\": \"us\"\n\t\t\t\t\t},\n\t\t\t\t\t\"Level 2A_#5\": {\n\t\t\t\t\t\t\""
-                              "type\": \"Event\",\n\t\t\t\t\t\t\"Measurement1_#5\": {\n\t\t\t\t\t\t\t\"type\": \""
-                              "Measurement\",\n\t\t\t\t\t\t\t\"raw\": [\n\t\t\t\t\t\t\t\t"
-                              "1.000000\n\t\t\t\t\t\t\t],\n\t\t\t\t\t\t\t\""
-                              "unit\": \"ms\"\n\t\t\t\t\t\t},\n\t\t\t\t\t\t\"Measurement2_#5\": {\n\t\t\t\t\t\t\t\""
-                              "type\": \"Measurement\",\n\t\t\t\t\t\t\t\"raw\": [\n\t\t\t\t\t\t\t\t"
-                              "2.000000\n\t\t\t\t\t\t\t],\n\t\t\t\t\t\t\t\""
-                              "unit\": \"us\"\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n}\n");
+                              "unit\": \"us\"\n\t\t\t\t},\n\t\t\t\t\"Level 1A_#3\": {\n\t\t\t\t\t\"type\": \"Event\",\n"
+                              "\t\t\t\t\t\"Measurement1_#3\": {\n\t\t\t\t\t\t\"type\": \"Measurement\",\n"
+                              "\t\t\t\t\t\t\"raw\": [\n\t\t\t\t\t\t\t1.000000\n\t\t\t\t\t\t],\n\t\t\t\t\t\t\"unit\""
+                              ": \"ms\"\n\t\t\t\t\t},\n\t\t\t\t\t\"Measurement2_#3\": {\n\t\t\t\t\t\t\"type\": \""
+                              "Measurement\",\n\t\t\t\t\t\t\"raw\": [\n\t\t\t\t\t\t\t2.000000\n\t\t\t\t\t\t],\n\t\t\t"
+                              "\t\t\t\"unit\": \"us\"\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t},\n\t\t\""
+                              "loaded_network_measurements_#4\": {\n\t\t\t\"type\": \"Event\",\n\t\t\t\""
+                              "Measurement1_#4\": {\n\t\t\t\t\"type\": \"Measurement\",\n\t\t\t\t\"raw\": [\n\t\t\t\t"
+                              "\t1.000000\n\t\t\t\t],\n\t\t\t\t\"unit\": \"ms\"\n\t\t\t},\n\t\t\t\"Measurement2_#4\""
+                              ": {\n\t\t\t\t\"type\": \"Measurement\",\n\t\t\t\t\"raw\": [\n\t\t\t\t\t2.000000\n"
+                              "\t\t\t\t],\n\t\t\t\t\"unit\": \"us\"\n\t\t\t},\n\t\t\t\"Level 0_#5\": {\n\t\t\t\t\""
+                              "type\": \"Event\",\n\t\t\t\t\"Measurement1_#5\": {\n\t\t\t\t\t\"type\": \"Measurement\""
+                              ",\n\t\t\t\t\t\"raw\": [\n\t\t\t\t\t\t1.000000\n\t\t\t\t\t],\n\t\t\t\t\t\"unit\": \""
+                              "ms\"\n\t\t\t\t},\n\t\t\t\t\"Measurement2_#5\": {\n\t\t\t\t\t\"type\": \"Measurement\""
+                              ",\n\t\t\t\t\t\"raw\": [\n\t\t\t\t\t\t2.000000\n\t\t\t\t\t],\n\t\t\t\t\t\"unit\": \"us\""
+                              "\n\t\t\t\t},\n\t\t\t\t\"Level 1A_#6\": {\n\t\t\t\t\t\"type\": \"Event\",\n\t\t\t\t\t\""
+                              "Measurement1_#6\": {\n\t\t\t\t\t\t\"type\": \"Measurement\",\n\t\t\t\t\t\t\"raw\": [\n"
+                              "\t\t\t\t\t\t\t1.000000\n\t\t\t\t\t\t],\n\t\t\t\t\t\t\"unit\": \"ms\"\n\t\t\t\t\t},\n"
+                              "\t\t\t\t\t\"Measurement2_#6\": {\n\t\t\t\t\t\t\"type\": \"Measurement\",\n\t\t\t\t\t\t\""
+                              "raw\": [\n\t\t\t\t\t\t\t2.000000\n\t\t\t\t\t\t],\n\t\t\t\t\t\t\"unit\": \"us\""
+                              "\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t},\n\t\t\"inference_measurements_#7\": {\n"
+                              "\t\t\t\"type\": \"Event\",\n\t\t\t\"Measurement1_#7\": {\n\t\t\t\t\"type\": \""
+                              "Measurement\",\n\t\t\t\t\"raw\": [\n\t\t\t\t\t1.000000\n\t\t\t\t],\n\t\t\t\t\"unit\": \""
+                              "ms\"\n\t\t\t},\n\t\t\t\"Measurement2_#7\": {\n\t\t\t\t\"type\": \"Measurement\",\n"
+                              "\t\t\t\t\"raw\": [\n\t\t\t\t\t2.000000\n\t\t\t\t],\n\t\t\t\t\"unit\": \"us\"\n\t\t\t},\n"
+                              "\t\t\t\"Level 0_#8\": {\n\t\t\t\t\"type\": \"Event\",\n\t\t\t\t\"Measurement1_#8\": {\n"
+                              "\t\t\t\t\t\"type\": \"Measurement\",\n\t\t\t\t\t\"raw\": [\n\t\t\t\t\t\t1.000000\n"
+                              "\t\t\t\t\t],\n\t\t\t\t\t\"unit\": \"ms\"\n\t\t\t\t},\n\t\t\t\t\"Measurement2_#8\": {\n"
+                              "\t\t\t\t\t\"type\": \"Measurement\",\n\t\t\t\t\t\"raw\": [\n\t\t\t\t\t\t2.000000\n"
+                              "\t\t\t\t\t],\n\t\t\t\t\t\"unit\": \"us\"\n\t\t\t\t},\n\t\t\t\t\"Level 1A_#9\": {\n"
+                              "\t\t\t\t\t\"type\": \"Event\",\n\t\t\t\t\t\"Measurement1_#9\": {\n\t\t\t\t\t\t\"type\""
+                              ": \"Measurement\",\n\t\t\t\t\t\t\"raw\": [\n\t\t\t\t\t\t\t1.000000\n\t\t\t\t\t\t],\n"
+                              "\t\t\t\t\t\t\"unit\": \"ms\"\n\t\t\t\t\t},\n\t\t\t\t\t\"Measurement2_#9\": {\n"
+                              "\t\t\t\t\t\t\"type\": \"Measurement\",\n\t\t\t\t\t\t\"raw\": [\n\t\t\t\t\t\t\t2.000000\n"
+                              "\t\t\t\t\t\t],\n\t\t\t\t\t\t\"unit\": \"us\"\n\t\t\t\t\t}\n\t\t\t\t},\n\t\t\t\t\""
+                              "Level 1B_#10\": {\n\t\t\t\t\t\"type\": \"Event\",\n\t\t\t\t\t\"Measurement1_#10\""
+                              ": {\n\t\t\t\t\t\t\"type\": \"Measurement\",\n\t\t\t\t\t\t\"raw\": [\n\t\t\t\t\t\t\t"
+                              "1.000000\n\t\t\t\t\t\t],\n\t\t\t\t\t\t\"unit\": \"ms\"\n\t\t\t\t\t},\n\t\t\t\t\t\""
+                              "Measurement2_#10\": {\n\t\t\t\t\t\t\"type\": \"Measurement\",\n\t\t\t\t\t\t\"raw\""
+                              ": [\n\t\t\t\t\t\t\t2.000000\n\t\t\t\t\t\t],\n\t\t\t\t\t\t\"unit\": \"us\"\n"
+                              "\t\t\t\t\t},\n\t\t\t\t\t\"Level 2A_#11\": {\n\t\t\t\t\t\t\"type\": \"Event\",\n\t\t\t"
+                              "\t\t\t\"Measurement1_#11\": {\n\t\t\t\t\t\t\t\"type\": \"Measurement\",\n\t\t\t\t\t\t"
+                              "\t\"raw\": [\n\t\t\t\t\t\t\t\t1.000000\n\t\t\t\t\t\t\t],\n\t\t\t\t\t\t\t\"unit\": \""
+                              "ms\"\n\t\t\t\t\t\t},\n\t\t\t\t\t\t\"Measurement2_#11\": {\n\t\t\t\t\t\t\t\"type\": \""
+                              "Measurement\",\n\t\t\t\t\t\t\t\"raw\": [\n\t\t\t\t\t\t\t\t2.000000\n\t\t\t\t\t\t\t],\n"
+                              "\t\t\t\t\t\t\t\"unit\": \"us\"\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t}\n"
+                              "\t}\n}\n");
 
     CHECK(output == blessedOutput);
     armnn::ProfilerManager::GetInstance().RegisterProfiler(nullptr);
diff --git a/src/backends/backendsCommon/test/JsonPrinterTestImpl.cpp b/src/backends/backendsCommon/test/JsonPrinterTestImpl.cpp
index a1128cfcb9..94855aa7b2 100644
--- a/src/backends/backendsCommon/test/JsonPrinterTestImpl.cpp
+++ b/src/backends/backendsCommon/test/JsonPrinterTestImpl.cpp
@@ -132,7 +132,6 @@ std::string GetSoftmaxProfilerJson(const std::vector<armnn::BackendId>& backends
 
     // build up the structure of the network
     INetworkPtr net(INetwork::Create());
-
     IConnectableLayer* input = net->AddInputLayer(0, "input");
     SoftmaxDescriptor softmaxDescriptor;
     // Set Axis to -1 if CL or Neon until further Axes are supported.
@@ -158,7 +157,9 @@ std::string GetSoftmaxProfilerJson(const std::vector<armnn::BackendId>& backends
     softmax->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
 
     // optimize the network
-    IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+    armnn::OptimizerOptions optOptions;
+    optOptions.m_ProfilingEnabled = true;
+    IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
     if(!optNet)
     {
         FAIL("Error occurred during Optimization, Optimize() returned nullptr.");
@@ -215,6 +216,8 @@ inline void ValidateProfilerJson(std::string& result)
         {
 
             if (sectionVector[i].find("\"ArmNN\":") != std::string::npos
+                || sectionVector[i].find("\"optimize_measurements\":") != std::string::npos
+                || sectionVector[i].find("\"loaded_network_measurements\":") != std::string::npos
                 || sectionVector[i].find("\"inference_measurements\":") != std::string::npos)
             {
                 sectionVector.erase(sectionVector.begin() + static_cast<int>(i));
diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
index 12a47dcd94..a5f4c7f254 100644
--- a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
+++ b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
@@ -70,6 +70,7 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip
     : BaseWorkload<Convolution2dQueueDescriptor>(descriptor, info)
     , m_ConvolutionLayer(memoryManager)
 {
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClConvolution2dWorkload");
     const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo();
 
     m_KernelTensor = std::make_unique<arm_compute::CLTensor>();
@@ -97,16 +98,19 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip
 
     const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
 
-    m_ConvolutionLayer.configure(clCompileContext,
-                                 &input,
-                                 m_KernelTensor.get(),
-                                 m_BiasTensor.get(),
-                                 &output,
-                                 padStrideInfo,
-                                 arm_compute::WeightsInfo(),
-                                 aclDilationInfo,
-                                 activationInfo,
-                                 isFastMathEnabled);
+    {
+        ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClConvolution2dWorkload_configure");
+        m_ConvolutionLayer.configure(clCompileContext,
+                                     &input,
+                                     m_KernelTensor.get(),
+                                     m_BiasTensor.get(),
+                                     &output,
+                                     padStrideInfo,
+                                     arm_compute::WeightsInfo(),
+                                     aclDilationInfo,
+                                     activationInfo,
+                                     isFastMathEnabled);
+    }
 
     m_ConvolutionMethod =
         m_ConvolutionLayer.get_convolution_method(input.info(),
@@ -146,7 +150,10 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip
 
     // Force Compute Library to perform the necessary copying and reshaping, after which
     // delete all the input tensors that will no longer be needed
-    m_ConvolutionLayer.prepare();
+    {
+        ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClConvolution2dWorkload_prepare");
+        m_ConvolutionLayer.prepare();
+    }
     FreeUnusedTensors();
 }
 
-- 
cgit v1.2.1